├── store-http.h ├── ui.h ├── store-local.h ├── sha.h ├── chunk.c ├── test ├── xorshift32.h ├── xorshift32.c ├── CMakeLists.txt ├── e2e │ ├── casync_test.py │ └── csn_test.py ├── sha_test.c ├── encrypt_test.c └── chunker_test.c ├── caibx.h ├── chunk.h ├── target.h ├── index.h ├── encrypt.h ├── ui.c ├── log.h ├── .github └── workflows │ ├── coverity.yml │ └── tests.yml ├── store.h ├── chunker.h ├── README.md ├── doc ├── csn-tool.1.scd ├── csn-tool.1 ├── chunk-encryption.md ├── csn.1.scd └── csn.1 ├── log.c ├── index.c ├── sha.c ├── CMakeLists.txt ├── patchdd.c ├── store.c ├── utils.h ├── target.c ├── csn-bench.c ├── csn-tool.c ├── encrypt.c ├── utils.c ├── store-local.c ├── caibx.c ├── main.c ├── chunker.c └── store-http.c /store-http.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "store.h" 4 | 5 | struct store *store_http_new(const char *url); 6 | -------------------------------------------------------------------------------- /ui.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #define PROGRESS_STATUS_INIT ((progess_status_t)0) 6 | typedef uint32_t progess_status_t; 7 | void show_progress(uint8_t percent, progess_status_t *status); 8 | -------------------------------------------------------------------------------- /store-local.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "store.h" 6 | #include "chunker.h" 7 | 8 | struct store *store_local_new(const char *path, const char *index, struct chunker_params *params); 9 | -------------------------------------------------------------------------------- /sha.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #define SHA_LEN 32 7 | 8 | int sha_kcapi_init(const char *driver); 9 | void sha_kcapi_deinit(void); 10 | int sha_once(const uint8_t *data, size_t len, uint8_t *out); 11 | -------------------------------------------------------------------------------- /chunk.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "utils.h" 6 | 7 | #include "chunk.h" 8 | 9 | void chunk_format_id(char *id_str, const uint8_t *id) 10 | { 11 | for (size_t i = 0; i < CHUNK_ID_LEN; i++) 12 | u_assert_se(sprintf(&id_str[2*i], "%02x", id[i]) == 2); 13 | } 14 | -------------------------------------------------------------------------------- /test/xorshift32.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | struct xorshift32_state { 7 | uint32_t a; 8 | }; 9 | 10 | #define XORSHIFT32_INIT(seed) (struct xorshift32_state) { \ 11 | .a = (seed) \ 12 | } 13 | 14 | uint32_t xorshift32(struct xorshift32_state *state); 15 | void xorshift32_fill(struct xorshift32_state *state, void *buf, size_t len); 16 | -------------------------------------------------------------------------------- /caibx.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "chunker.h" 7 | 8 | int caibx_load_header(int fd, struct chunker_params *params_out, size_t *n_entries_out); 9 | 10 | int caibx_iterate_entries(int fd, struct chunker_params *params, size_t n_entries, 11 | int (*cb)(uint64_t offset, uint32_t len, uint8_t *id, void *arg), void *arg); 12 | -------------------------------------------------------------------------------- /chunk.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "sha.h" 7 | #include "utils.h" 8 | 9 | #define CHUNK_ID_LEN 32 10 | #define CHUNK_ID_STRLEN (2 * (CHUNK_ID_LEN) + 1) 11 | 12 | void chunk_format_id(char *id_str, const uint8_t *id); 13 | 14 | static inline void chunk_calculate_id(const uint8_t *data, size_t len, uint8_t *id_out) 15 | { 16 | u_assert_se(sha_once(data, len, id_out) == 0); 17 | } 18 | -------------------------------------------------------------------------------- /test/xorshift32.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "xorshift32.h" 5 | 6 | uint32_t xorshift32(struct xorshift32_state *state) 7 | { 8 | /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ 9 | uint32_t x = state->a; 10 | x ^= x << 13; 11 | x ^= x >> 17; 12 | x ^= x << 5; 13 | return state->a = x; 14 | } 15 | 16 | void xorshift32_fill(struct xorshift32_state *state, void *buf, size_t len) 17 | { 18 | assert(len % 4 == 0); 19 | 20 | uint32_t *buf32 = (uint32_t*)buf; 21 | while (len) { 22 | *buf32++ = xorshift32(state); 23 | len -= 4; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /target.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "store.h" 8 | #include "index.h" 9 | 10 | struct target { 11 | struct store s; 12 | 13 | struct index idx; 14 | int fd; 15 | bool queryable; 16 | }; 17 | 18 | struct target *target_new(const char *path); 19 | 20 | int target_write(struct target *t, const uint8_t *data, size_t len, off_t offset, const uint8_t *id); 21 | int target_check_chunk(struct target *t, uint8_t *tmp, size_t len, off_t offset, const uint8_t *id); 22 | 23 | struct store *target_as_store(struct target *t, size_t chunk_estimate); 24 | -------------------------------------------------------------------------------- /index.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "chunker.h" 7 | 8 | struct index_entry { 9 | uint64_t start; 10 | uint32_t len; 11 | uint32_t flags; 12 | uint8_t id[CHUNK_ID_LEN]; 13 | }; 14 | 15 | struct index { 16 | bool sorted; 17 | 18 | size_t n_entries, space_avail; 19 | struct index_entry *entries; 20 | }; 21 | 22 | int index_init(struct index *idx, size_t expected_size); 23 | void index_cleanup(struct index *idx); 24 | 25 | struct index_entry *index_insert(struct index *idx, uint64_t offset, uint32_t len, const uint8_t *id); 26 | struct index_entry *index_query(struct index *idx, const uint8_t *id); 27 | -------------------------------------------------------------------------------- /encrypt.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | struct encrypt_ctx { 7 | uint8_t key[32]; 8 | 9 | void *cipher; 10 | }; 11 | 12 | int encrypt_init(struct encrypt_ctx *ctx, const uint8_t key[static 32]); 13 | void encrypt_close(struct encrypt_ctx *ctx); 14 | 15 | int encrypt_restart(struct encrypt_ctx *ctx, const uint8_t nonce[static 24]); 16 | int encrypt_do(struct encrypt_ctx *ctx, uint8_t *out, const uint8_t *in, size_t len); 17 | 18 | int encrypt_parse_keyspec(uint8_t key_out[static 32], const char *keyspec); 19 | 20 | #ifdef TESTING 21 | void hchacha20(uint8_t out[32], const uint8_t key[32], const uint8_t in[16]); 22 | #endif 23 | -------------------------------------------------------------------------------- /ui.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "utils.h" 6 | 7 | #include "ui.h" 8 | 9 | #define PROGRESS_BAR_LEN 60 10 | 11 | void show_progress(uint8_t percent, progess_status_t *status) 12 | { 13 | static const char spinner[] = "-\\|//"; 14 | char bar[PROGRESS_BAR_LEN + 1]; 15 | 16 | u_assert(status); 17 | 18 | size_t tip = percent * PROGRESS_BAR_LEN / 100; 19 | for (size_t i = 0; i < PROGRESS_BAR_LEN; i++) 20 | bar[i] = (i < tip) ? '=' : ' '; 21 | 22 | bar[PROGRESS_BAR_LEN] = 0; 23 | 24 | fprintf(stderr, "\033[0G[%c] |%s| %3d%%", spinner[(*status)++ % strlen(spinner)], bar, percent); 25 | 26 | if (percent == 100) 27 | fputc('\n', stderr); 28 | 29 | fflush(stderr); 30 | } 31 | -------------------------------------------------------------------------------- /log.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | enum u_loglevel { 7 | U_LOG_ERR, 8 | U_LOG_WARN, 9 | U_LOG_INFO, 10 | U_LOG_DEBUG, 11 | }; 12 | 13 | void u_log_init(void); 14 | 15 | __attribute__((format(printf, 5, 6))) 16 | void _u_log(enum u_loglevel level, const char *path, const char *func, int line, const char *format, ...); 17 | 18 | #define u_log(level, format, ...) _u_log(U_LOG_ ## level, __FILE__, __func__, __LINE__, format, ##__VA_ARGS__) 19 | #define u_log_errno(format, ...) do { \ 20 | if (format) \ 21 | u_log(ERR, format ": %s", ##__VA_ARGS__, strerror(errno)); \ 22 | else \ 23 | u_log(ERR, "%s", strerror(errno)); \ 24 | } while (0) 25 | 26 | bool check_loglevel(enum u_loglevel level); 27 | -------------------------------------------------------------------------------- /.github/workflows/coverity.yml: -------------------------------------------------------------------------------- 1 | name: Coverity Scan 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | coverity: 10 | runs-on: ubuntu-24.04 11 | steps: 12 | - run: sudo apt-get update -qq 13 | 14 | - run: sudo apt-get install -y 15 | libssl-dev 16 | libcurl4-openssl-dev 17 | 18 | - uses: actions/checkout@v4 19 | 20 | - run: cmake 21 | -DBUILD_TESTING=Off 22 | -B ${{github.workspace}}/build 23 | 24 | - uses: vapier/coverity-scan-action@v1 25 | with: 26 | project: casync-nano 27 | email: ${{ secrets.COVERITY_SCAN_EMAIL }} 28 | token: ${{ secrets.COVERITY_SCAN_TOKEN }} 29 | command: make -C ${{github.workspace}}/build 30 | -------------------------------------------------------------------------------- /store.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define STORE_NAME_MAX 128 9 | 10 | struct store { 11 | char name[STORE_NAME_MAX]; 12 | 13 | void (*free)(struct store *s); 14 | ssize_t (*get_chunk)(struct store *s, uint8_t *id, uint8_t *out, size_t out_max); 15 | }; 16 | 17 | 18 | static inline ssize_t store_get_chunk(struct store *s, uint8_t *id, uint8_t *out, size_t out_max) 19 | { 20 | return s->get_chunk(s, id, out, out_max); 21 | } 22 | 23 | static inline void store_free(struct store *s) 24 | { 25 | if (s->free) 26 | s->free(s); 27 | } 28 | 29 | static inline const char *store_get_name(struct store *s) 30 | { 31 | return s->name; 32 | } 33 | 34 | void store_free(struct store *s); 35 | 36 | struct store_chain *store_chain_new(size_t size_hint); 37 | int store_chain_append(struct store_chain *sc, struct store *s); 38 | 39 | static inline struct store *store_chain_to_store(struct store_chain *sc) 40 | { 41 | return (struct store*)sc; 42 | } 43 | -------------------------------------------------------------------------------- /chunker.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "chunk.h" 7 | 8 | #include 9 | 10 | #define CHUNKER_SIZE_AVG_DEFAULT ((size_t) (64U*1024U)) 11 | #define CHUNKER_WINDOW_SIZE 48 12 | 13 | struct chunker_params { 14 | uint32_t min_size, avg_size, max_size; 15 | }; 16 | 17 | struct chunker { 18 | struct chunker_params params; 19 | size_t discriminator; // XXX: why size_t? 20 | 21 | uint32_t h; 22 | size_t chunk_size; 23 | 24 | size_t window_fill; 25 | uint8_t window[CHUNKER_WINDOW_SIZE]; 26 | 27 | SHA256_CTX sha_ctx; 28 | }; 29 | 30 | int chunker_params_set(struct chunker_params *params, uint64_t min_size, uint64_t avg_size, uint64_t max_size); 31 | 32 | int chunker_init(struct chunker *c, struct chunker_params *params); 33 | size_t chunker_scan(struct chunker *c, uint8_t *buf, size_t len); 34 | void chunker_reset(struct chunker *c); 35 | void chunker_get_id(struct chunker *c, uint8_t *id_out); 36 | 37 | int chunker_scan_fd(int fd, struct chunker_params *params, 38 | int (*cb)(uint64_t offset, uint32_t len, uint8_t *id, void *arg), void *arg); 39 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(PkgConfig REQUIRED) 2 | 3 | pkg_search_module(CMOCKA REQUIRED cmocka) 4 | 5 | add_compile_options( 6 | -Wall 7 | -Wextra 8 | -Wshadow 9 | -Wno-unused-parameter 10 | -Wmissing-noreturn 11 | -Wmissing-prototypes 12 | -Wstrict-prototypes 13 | 14 | -fsanitize=undefined 15 | -fsanitize=address 16 | ) 17 | 18 | link_libraries(-fsanitize=address -fsanitize=undefined) 19 | 20 | add_compile_definitions(TESTING) 21 | 22 | add_executable(chunker_test chunker_test.c xorshift32.c ../chunker.c ../chunk.c ../sha.c ../log.c ../utils.c) 23 | target_include_directories(chunker_test PRIVATE ${PROJECT_SOURCE_DIR} ${CURRENT_SOURCE_DIR} ${CMOCKA_INCLUDES}) 24 | target_link_libraries(chunker_test PRIVATE ${CMOCKA_LIBRARIES} ${CRYPTO_LIBRARIES}) 25 | add_test(chunker_test chunker_test) 26 | 27 | add_executable(sha_test sha_test.c xorshift32.c ../sha.c ../log.c ../utils.c) 28 | target_include_directories(sha_test PRIVATE ${PROJECT_SOURCE_DIR} ${CURRENT_SOURCE_DIR} ${CMOCKA_INCLUDES}) 29 | target_link_libraries(sha_test PRIVATE ${CMOCKA_LIBRARIES} ${CRYPTO_LIBRARIES}) 30 | add_test(sha_test sha_test) 31 | 32 | add_executable(encrypt_test encrypt_test.c ../encrypt.c ../log.c ../utils.c) 33 | target_include_directories(encrypt_test PRIVATE ${PROJECT_SOURCE_DIR} ${CURRENT_SOURCE_DIR} ${CMOCKA_INCLUDES}) 34 | target_link_libraries(encrypt_test PRIVATE ${CMOCKA_LIBRARIES} ${CRYPTO_LIBRARIES}) 35 | add_test(encrypt_test encrypt_test) 36 | -------------------------------------------------------------------------------- /test/e2e/casync_test.py: -------------------------------------------------------------------------------- 1 | import filecmp 2 | import subprocess 3 | import tempfile 4 | import hashlib 5 | import http.server 6 | 7 | from pathlib import Path 8 | from typing import IO 9 | from threading import Thread 10 | 11 | from csn_test import Target, run_csn 12 | 13 | def write_reproducible_random(f: IO[bytes], size: int, seed: bytes): 14 | assert size % 32 == 0 15 | 16 | for i in range(0, size, 32): 17 | chunk = hashlib.sha256(seed + i.to_bytes(length = 8, byteorder = 'little')).digest() 18 | f.write(chunk) 19 | 20 | f.flush() 21 | 22 | def test_casync_interop(): 23 | """ 24 | Generate a caibx file using the regular casync tool, run a synchronization 25 | using csn and check that the results match. 26 | """ 27 | 28 | source_image = tempfile.NamedTemporaryFile() 29 | write_reproducible_random(source_image, 1024*1024, b'dummy image') 30 | 31 | casync_dir = tempfile.TemporaryDirectory() 32 | subprocess.run(['casync', 'make', '--digest=sha256', 'img.caibx', source_image.name], 33 | cwd=casync_dir.name, 34 | stdout=subprocess.DEVNULL 35 | ).check_returncode() 36 | 37 | class Server(http.server.HTTPServer): 38 | def finish_request(self, request, client_address): 39 | chunk_dir = Path(casync_dir.name) / 'default.castr' 40 | self.RequestHandlerClass(request, client_address, self, 41 | directory=str(chunk_dir)) 42 | 43 | httpd = Server(("", 8080), http.server.SimpleHTTPRequestHandler) 44 | server_thread = Thread(target=httpd.serve_forever, daemon=True) 45 | server_thread.start() 46 | 47 | t = Target(1024*1024) 48 | try: 49 | run_csn(Path(casync_dir.name) / 'img.caibx', t, http_store=True) 50 | finally: 51 | httpd.shutdown() 52 | 53 | assert filecmp.cmp(t.path(), source_image.name) 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # casync-nano 2 | 3 | casync-nano is a feature-reduced replacement for casync aimed at 4 | embedded targets. It only supports `extract` operations, the `.caibx` 5 | file format and limits hashing to SHA256 hashes, but in turn tries to be 6 | light on dependencies and system resources in operation. 7 | 8 | It implements enough of the regular casync CLI to work as a drop-in 9 | replacement in the casync mode of [RAUC](https://rauc.io/). 10 | 11 | See the [man page](doc/csn.1.scd) for more details. 12 | 13 | ## Scope 14 | 15 | casync-nano is meant to be used as part of an image-based differential 16 | update mechanism for embedded devices. As such, it leaves out features 17 | such as filesystem images (`.caidx`) and image generation/management. On 18 | the other hand, it has some features that the original casync 19 | implementation lacks: 20 | 21 | * Caching of index files corresponding to a local chunk store ("seed" 22 | in casync terminology). This is useful to avoid slow reindexing of 23 | a partition in an A/B update scheme where the active partition is 24 | read-only and thus corresponds to the index file that created it 25 | during the previous update by definition. (Chunks read from such a 26 | store are still validated to address unintentional changes and 27 | bit rot) 28 | * Support for the kernel crypto API to accelerate hashing operations 29 | using hardware mechanisms that are present on many modern SoCs. 30 | * Support for encrypting chunks so that they can be stored on untrusted 31 | third-party HTTP infrastructure. (Still considered experimental, but 32 | usable/in use already and unlikely to change. See 33 | [here](doc/chunk-encryption.md) for more details). 34 | * Vastly reduced memory usage 35 | 36 | ## State 37 | 38 | casync-nano is stable and is actively being used in the field. 39 | 40 | Currently, casync-nano only supports reading from and writing to block devices 41 | (such as eMMC storage). 42 | 43 | ## Dependencies 44 | * libcurl 45 | * openssl 46 | * zstd 47 | -------------------------------------------------------------------------------- /doc/csn-tool.1.scd: -------------------------------------------------------------------------------- 1 | CSN-TOOL(1) 2 | 3 | # NAME 4 | 5 | csn-tool - casync-nano helper tool 6 | 7 | # SYNOPSIS 8 | 9 | *csn-tool* crypt _KEYSPEC_ _INPUT_PATH_ [_OUTPUT_PATH_] 10 | 11 | # DESCRIPTION 12 | 13 | csn-tool is a helper tool that contains functionality that is useful for 14 | managing casync-nano based infrastructure, but is not needed on the target 15 | device itself. 16 | 17 | # COMMANDS 18 | 19 | *csn-tool* crypt _KEYSPEC_ [_INPUT_PATH_] [_OUTPUT_PATH_] 20 | 21 | Performs encryption and decryption of chunks. Since the chunk encryption 22 | algorithm in casync-nano is self-inverse, both operations are executed using 23 | the same command. To avoid repetition, only the term "encryption" is used in 24 | this section. 25 | 26 | _KEYSPEC_ describes how the encryption key is obtained. In each case, the 32 27 | byte key is represented in hexadecimal. Possible values for _KEYSPEC_ are: 28 | 29 | - _key:HEXSTRING_ - supplies the key literally through the command line. The 30 | usual caveats about this making it visible to other processes on the system 31 | apply. 32 | - _env:NAME_ - read the key from the environment variable _NAME_ 33 | - _file:PATH_ - read the key from the file at _PATH_ 34 | 35 | The file name at _INPUT_PATH_ must follow the casync naming convention (namely, 36 | its 32 byte chunk ID in hexadecimal followed by .cacnk or .cacnk.enc). 37 | 38 | If both _INPUT_PATH_ and _OUTPUT_PATH_ are supplied, the input file is 39 | encrypted and the output is stored at the supplied output path. If 40 | _OUTPUT_PATH_ is *-*, the output will be written to standard out. 41 | 42 | If only _INPUT_PATH_ is supplied, _OUTPUT_PATH_ is determined based on the 43 | input path. Namely, if the input file is a .cacnk file, the output file is the 44 | corresponding .cacnk.enc file in the same directory and vice versa. 45 | 46 | If neither _INPUT_PATH_ nor _OUTPUT_PATH_ are supplied, csn-tool reads a list 47 | of files from standard input and encrypts each one in turn, automatically 48 | deriving the output path for each one. This is useful when encrypting an entire 49 | chunk store, for example: 50 | 51 | find ./default.castr/ -name \*.cacnk | csn-tool crypt file:./key 52 | 53 | In each case, if the output file already exists, it is not modified. 54 | 55 | # BUG REPORTS 56 | 57 | Please report bugs in casync-nano or errors in this manual page via GitHub 58 | (https://github.com/florolf/casync-nano/issues) or email (_fl@n621.de_). 59 | 60 | # SEE ALSO 61 | _csn_(1)++ 62 | -------------------------------------------------------------------------------- /log.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "utils.h" 9 | #include "log.h" 10 | 11 | static enum u_loglevel loglevel = U_LOG_INFO; 12 | static bool is_tty; 13 | 14 | void u_log_init(void) 15 | { 16 | const char *s; 17 | 18 | is_tty = isatty(STDERR_FILENO); 19 | 20 | s = getenv("LOGLEVEL"); 21 | if (!s) 22 | return; 23 | 24 | if (streq(s, "debug")) 25 | loglevel = U_LOG_DEBUG; 26 | else if (streq(s, "info")) 27 | loglevel = U_LOG_INFO; 28 | else if (streq(s, "warn") || streq(s, "warning")) 29 | loglevel = U_LOG_WARN; 30 | else if (streq(s, "err") || streq(s, "error")) 31 | loglevel = U_LOG_ERR; 32 | else 33 | u_log(WARN, "unknown loglevel '%s'", s); 34 | } 35 | 36 | #define COLOR_START "\x1b[38;5;%dm" 37 | #define COLOR_END "\x1b[0m" 38 | 39 | #define COLOR_GREEN 40 40 | #define COLOR_BLUE 75 41 | #define COLOR_RED 196 42 | #define COLOR_ORANGE 202 43 | 44 | bool check_loglevel(enum u_loglevel level) 45 | { 46 | if (u_likely(level > loglevel)) 47 | return false; 48 | 49 | return true; 50 | } 51 | 52 | void _u_log(enum u_loglevel level, const char *path, const char *func, int line, const char *format, ...) 53 | { 54 | int _; 55 | 56 | if (check_loglevel(level) == false) 57 | return; 58 | 59 | const char *file = strrchr(path, '/'); 60 | if (file) 61 | file = file + 1; 62 | else 63 | file = path; 64 | 65 | char buf[128]; 66 | if (is_tty) { 67 | static const int color_map[] = { 68 | [U_LOG_ERR] = COLOR_RED, 69 | [U_LOG_WARN] = COLOR_ORANGE, 70 | [U_LOG_INFO] = COLOR_GREEN, 71 | [U_LOG_DEBUG] = COLOR_BLUE 72 | }; 73 | 74 | _ = snprintf(buf, sizeof(buf), COLOR_START "%s:%s:%d\t%s" COLOR_END "\n", 75 | color_map[level], 76 | file, func, line, 77 | format); 78 | } else { 79 | static const char *level_map[] = { 80 | [U_LOG_ERR] = "ERR", 81 | [U_LOG_WARN] = "WRN", 82 | [U_LOG_INFO] = "INF", 83 | [U_LOG_DEBUG] = "DBG" 84 | }; 85 | 86 | _ = snprintf(buf, sizeof(buf), "%s %s:%s:%d %s\n", 87 | level_map[level], 88 | file, func, line, 89 | format); 90 | } 91 | 92 | if (_ < 0) { 93 | fputs("error formatting log message", stderr); 94 | return; 95 | } 96 | 97 | va_list args; 98 | va_start(args, format); 99 | vfprintf(stderr, buf, args); 100 | va_end(args); 101 | } 102 | -------------------------------------------------------------------------------- /index.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "utils.h" 5 | #include "index.h" 6 | 7 | int index_init(struct index *idx, size_t expected_size) 8 | { 9 | u_assert(idx); 10 | u_assert(expected_size > 0); 11 | 12 | idx->entries = calloc(expected_size, sizeof(*idx->entries)); 13 | u_notnull(idx->entries, return -1); 14 | 15 | idx->sorted = false; 16 | idx->n_entries = 0; 17 | idx->space_avail = expected_size; 18 | 19 | return 0; 20 | } 21 | 22 | void index_cleanup(struct index *idx) 23 | { 24 | u_assert(idx); 25 | 26 | free(idx->entries); 27 | } 28 | 29 | struct index_entry *index_insert(struct index *idx, uint64_t offset, uint32_t len, const uint8_t *id) 30 | { 31 | u_assert(idx); 32 | u_assert(len != 0); 33 | u_assert(id); 34 | 35 | u_assert(idx->entries); 36 | 37 | if (idx->n_entries >= idx->space_avail) { 38 | size_t new_size; 39 | void *new_entries; 40 | 41 | u_log(DEBUG, "index %p exceeded designated space of %zu entries", 42 | idx, idx->space_avail); 43 | 44 | new_size = (idx->space_avail * 12ull) / 10 + 1; 45 | new_entries = realloc(idx->entries, new_size * sizeof(*idx->entries)); 46 | if (!new_entries) { 47 | u_log_errno("increasing size of entries array of index %p to %zu entries failed", 48 | idx, idx->space_avail); 49 | 50 | return NULL; 51 | } 52 | 53 | idx->space_avail = new_size; 54 | idx->entries = new_entries; 55 | } 56 | 57 | struct index_entry *e = &idx->entries[idx->n_entries]; 58 | e->start = offset; 59 | e->len = len; 60 | e->flags = 0; 61 | memcpy(e->id, id, CHUNK_ID_LEN); 62 | 63 | idx->sorted = false; 64 | idx->n_entries++; 65 | 66 | return e; 67 | } 68 | 69 | static int index_entry_compare(const void *a, const void *b) 70 | { 71 | const struct index_entry *ie_a = (const struct index_entry*)a; 72 | const struct index_entry *ie_b = (const struct index_entry*)b; 73 | 74 | return memcmp(ie_a->id, ie_b->id, CHUNK_ID_LEN); 75 | } 76 | 77 | static void index_sort(struct index *idx) 78 | { 79 | u_assert(idx); 80 | 81 | qsort(idx->entries, idx->n_entries, sizeof(*idx->entries), 82 | index_entry_compare); 83 | 84 | idx->sorted = true; 85 | } 86 | 87 | struct index_entry *index_query(struct index *idx, const uint8_t *id) 88 | { 89 | u_assert(idx); 90 | u_assert(id); 91 | 92 | if (!idx->sorted) 93 | index_sort(idx); 94 | 95 | struct index_entry key; 96 | memcpy(key.id, id, CHUNK_ID_LEN); 97 | 98 | return bsearch(&key, idx->entries, idx->n_entries, sizeof(*idx->entries), 99 | index_entry_compare); 100 | } 101 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: 3 | push: 4 | 5 | jobs: 6 | unit-tests: 7 | name: Unit Tests 8 | runs-on: ubuntu-24.04 9 | steps: 10 | - run: sudo apt-get update -qq 11 | 12 | - run: sudo apt-get install -y 13 | libssl-dev 14 | libcurl4-openssl-dev 15 | libcmocka-dev 16 | 17 | - uses: actions/checkout@v4 18 | 19 | - name: Configure 20 | run: cmake 21 | -B ${{github.workspace}}/build 22 | 23 | - name: Build 24 | run: cmake --build ${{github.workspace}}/build 25 | 26 | - name: Run tests 27 | run: CMOCKA_XML_FILE=cm_%g.xml CMOCKA_MESSAGE_OUTPUT=xml ctest --test-dir ${{github.workspace}}/build 28 | 29 | - uses: actions/upload-artifact@v4 30 | if: always() 31 | with: 32 | name: unit-test-results 33 | path: '${{github.workspace}}/build/test/cm_*.xml' 34 | 35 | end-to-end-tests: 36 | name: End-to-End Tests 37 | runs-on: ubuntu-24.04 38 | steps: 39 | - run: sudo apt-get update -qq 40 | 41 | - run: sudo apt-get install -y 42 | libssl-dev 43 | libcurl4-openssl-dev 44 | casync 45 | 46 | - uses: actions/setup-python@v5 47 | with: 48 | python-version: '3.13' 49 | 50 | - run: pip install pyzstd pycryptodome pytest 51 | 52 | - uses: actions/checkout@v4 53 | 54 | - name: Configure 55 | run: cmake 56 | -D BUILD_TESTING=Off 57 | -D CSN_SANITIZE=On 58 | -B ${{github.workspace}}/build 59 | 60 | - name: Build 61 | run: cmake --build ${{github.workspace}}/build 62 | 63 | - name: Run tests 64 | run: PATH="$PATH:${{github.workspace}}/build" pytest -rP --junitxml=junit.xml 65 | working-directory: "${{github.workspace}}/test/e2e" 66 | 67 | - uses: actions/upload-artifact@v4 68 | if: always() 69 | with: 70 | name: e2e-test-results 71 | path: "${{github.workspace}}/test/e2e/junit.xml" 72 | 73 | 74 | results-summary: 75 | name: Summarize test results 76 | runs-on: ubuntu-24.04 77 | needs: [unit-tests, end-to-end-tests] 78 | if: ${{ always() }} 79 | steps: 80 | - uses: actions/checkout@v4 81 | 82 | - uses: actions/download-artifact@v4 83 | with: 84 | name: unit-test-results 85 | path: ./unit-test-results 86 | 87 | - uses: actions/download-artifact@v4 88 | with: 89 | name: e2e-test-results 90 | path: ./e2e-test-results 91 | 92 | - name: Test summary 93 | uses: mikepenz/action-junit-report@v4 94 | with: 95 | report_paths: './*/*.xml' 96 | -------------------------------------------------------------------------------- /sha.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include "utils.h" 12 | 13 | #include "sha.h" 14 | 15 | #ifndef AF_ALG 16 | #define AF_ALG 38 17 | #endif 18 | #ifndef SOL_ALG 19 | #define SOL_ALG 279 20 | #endif 21 | 22 | static int kcapi_fd = -1; 23 | 24 | int sha_kcapi_init(const char *driver) 25 | { 26 | int alg_fd; 27 | 28 | alg_fd = socket(AF_ALG, SOCK_SEQPACKET, 0); 29 | if (alg_fd < 0) { 30 | u_log_errno("creating AF_ALG socket failed"); 31 | return -1; 32 | } 33 | 34 | if (!driver) 35 | driver = "sha256"; 36 | 37 | struct sockaddr_alg sa = { 38 | .salg_family = AF_ALG, 39 | .salg_type = "hash", 40 | }; 41 | 42 | if (strlen(driver) >= sizeof(sa.salg_name)) { 43 | u_log(ERR, "driver name too long"); 44 | goto err_algfd; 45 | } 46 | 47 | // Use strncpy to silence warnings. We've already checked that the 48 | // string will fit. 49 | strncpy((char*)sa.salg_name, driver, sizeof(sa.salg_name)); 50 | 51 | if (bind(alg_fd, (struct sockaddr*)&sa, sizeof(sa)) < 0) { 52 | u_log_errno("binding to AF_ALG socket failed"); 53 | goto err_algfd; 54 | } 55 | 56 | int fd; 57 | fd = accept(alg_fd, NULL, 0); 58 | if (fd < 0) { 59 | u_log_errno("accepting hash instance socket failed"); 60 | goto err_algfd; 61 | } 62 | 63 | kcapi_fd = fd; 64 | close(alg_fd); 65 | 66 | return 0; 67 | 68 | err_algfd: 69 | close(alg_fd); 70 | return -1; 71 | } 72 | 73 | void sha_kcapi_deinit(void) 74 | { 75 | if (kcapi_fd < 0) 76 | return; 77 | 78 | close(kcapi_fd); 79 | kcapi_fd = -1; 80 | } 81 | 82 | int sha_once(const uint8_t *data, size_t len, uint8_t *out) 83 | { 84 | if (kcapi_fd < 0) 85 | return (SHA256(data, len, out) != NULL) ? 0 : -1; 86 | 87 | while (len) { 88 | ssize_t ret; 89 | 90 | ret = send(kcapi_fd, data, len, MSG_MORE); 91 | if (ret < 0) { 92 | if (errno == EINTR) 93 | continue; 94 | 95 | u_log_errno("send to kcapi failed"); 96 | goto disable; 97 | } 98 | 99 | data += ret; 100 | len -= ret; 101 | } 102 | 103 | ssize_t ret; 104 | retry: 105 | ret = recv(kcapi_fd, out, SHA_LEN, 0); 106 | if (ret < 0) { 107 | if (errno == EINTR) 108 | goto retry; 109 | 110 | u_log_errno("recv from kcapi failed"); 111 | goto disable; 112 | } else if (ret != SHA_LEN) { 113 | u_log(ERR, "received unexpected number of bytes: %zd", ret); 114 | goto disable; 115 | } 116 | 117 | return 0; 118 | 119 | disable: 120 | u_log(ERR, "kcapi operation failed, disabling"); 121 | sha_kcapi_deinit(); 122 | 123 | // fall back to openssl 124 | return (SHA256(data, len, out) != NULL) ? 0 : -1; 125 | } 126 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.1.0...3.10 FATAL_ERROR) 2 | project(csn LANGUAGES C) 3 | 4 | option(BUILD_PATCHDD "build patchdd utility" OFF) 5 | option(BUILD_CSN_BENCH "build casync-nano benchmarking utility" OFF) 6 | option(BUILD_CSN "build main casync tool" ON) 7 | option(BUILD_CSN_TOOL "build csn helper tool" OFF) 8 | 9 | set(CMAKE_C_VISIBILITY_PRESET hidden) 10 | if(POLICY CMP0063) 11 | cmake_policy(SET CMP0063 NEW) 12 | endif() 13 | 14 | add_compile_options( 15 | -Wall 16 | -Wextra 17 | -Wshadow 18 | -Wno-unused-parameter 19 | -Wmissing-noreturn 20 | -Wmissing-prototypes 21 | -Wstrict-prototypes 22 | ) 23 | 24 | if(CSN_SANITIZE) 25 | add_compile_options(-fsanitize=undefined -fsanitize=address -fsanitize=leak) 26 | link_libraries(-fsanitize=address -fsanitize=undefined -fsanitize=leak) 27 | endif() 28 | 29 | find_package(PkgConfig REQUIRED) 30 | 31 | if(BUILD_CSN) 32 | include(CTest) 33 | 34 | pkg_search_module(ZSTD REQUIRED libzstd) 35 | pkg_search_module(CURL REQUIRED libcurl) 36 | pkg_search_module(CRYPTO REQUIRED libcrypto) 37 | 38 | add_executable(csn 39 | log.c 40 | utils.c 41 | chunk.c 42 | chunker.c 43 | caibx.c 44 | sha.c 45 | store.c 46 | store-local.c 47 | store-http.c 48 | index.c 49 | target.c 50 | ui.c 51 | main.c 52 | encrypt.c 53 | ) 54 | target_link_libraries(csn ${ZSTD_LIBRARIES} ${CURL_LIBRARIES} ${CRYPTO_LIBRARIES}) 55 | target_include_directories(csn PRIVATE ${ZSTD_INCLUDE_DIRS} ${CURL_INCLUDE_DIRS} ${CRYPTO_INCLUDE_DIRS}) 56 | 57 | install(TARGETS csn RUNTIME DESTINATION bin) 58 | install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink csn \$ENV{DESTDIR}/\${CMAKE_INSTALL_PREFIX}/bin/casync)") 59 | 60 | if(BUILD_TESTING) 61 | add_subdirectory(test) 62 | endif() 63 | endif() 64 | 65 | if(BUILD_PATCHDD) 66 | add_executable(patchdd 67 | log.c 68 | utils.c 69 | ui.c 70 | patchdd.c 71 | ) 72 | 73 | install(TARGETS patchdd RUNTIME DESTINATION bin) 74 | endif() 75 | 76 | if(BUILD_CSN_BENCH) 77 | pkg_search_module(CRYPTO REQUIRED libcrypto) 78 | 79 | add_executable(csn-bench 80 | log.c 81 | utils.c 82 | sha.c 83 | chunker.c 84 | test/xorshift32.c 85 | csn-bench.c 86 | ) 87 | 88 | target_link_libraries(csn-bench ${CRYPTO_LIBRARIES}) 89 | target_include_directories(csn-bench PRIVATE ${CRYPTO_INCLUDE_DIRS}) 90 | 91 | install(TARGETS csn-bench RUNTIME DESTINATION bin) 92 | endif() 93 | 94 | if(BUILD_CSN_TOOL) 95 | pkg_search_module(CRYPTO REQUIRED libcrypto) 96 | 97 | add_executable(csn-tool 98 | log.c 99 | utils.c 100 | csn-tool.c 101 | encrypt.c 102 | ) 103 | 104 | target_link_libraries(csn-tool ${CRYPTO_LIBRARIES}) 105 | target_include_directories(csn-tool PRIVATE ${CRYPTO_INCLUDE_DIRS}) 106 | 107 | install(TARGETS csn-tool RUNTIME DESTINATION bin) 108 | endif() 109 | -------------------------------------------------------------------------------- /doc/csn-tool.1: -------------------------------------------------------------------------------- 1 | .\" Generated by scdoc 1.11.3 2 | .\" Complete documentation for this program is not available as a GNU info page 3 | .ie \n(.g .ds Aq \(aq 4 | .el .ds Aq ' 5 | .nh 6 | .ad l 7 | .\" Begin generated content: 8 | .TH "CSN-TOOL" "1" "2024-11-01" 9 | .PP 10 | .SH NAME 11 | .PP 12 | csn-tool - casync-nano helper tool 13 | .PP 14 | .SH SYNOPSIS 15 | .PP 16 | \fBcsn-tool\fR crypt \fIKEYSPEC\fR \fIINPUT_PATH\fR [\fIOUTPUT_PATH\fR] 17 | .PP 18 | .SH DESCRIPTION 19 | .PP 20 | csn-tool is a helper tool that contains functionality that is useful for 21 | managing casync-nano based infrastructure, but is not needed on the target 22 | device itself.\& 23 | .PP 24 | .SH COMMANDS 25 | .PP 26 | \fBcsn-tool\fR crypt \fIKEYSPEC\fR [\fIINPUT_PATH\fR] [\fIOUTPUT_PATH\fR] 27 | .PP 28 | Performs encryption and decryption of chunks.\& Since the chunk encryption 29 | algorithm in casync-nano is self-inverse, both operations are executed using 30 | the same command.\& To avoid repetition, only the term "encryption" is used in 31 | this section.\& 32 | .PP 33 | \fIKEYSPEC\fR describes how the encryption key is obtained.\& In each case, the 32 34 | byte key is represented in hexadecimal.\& Possible values for \fIKEYSPEC\fR are: 35 | .PP 36 | .PD 0 37 | .IP \(bu 4 38 | \fIkey:HEXSTRING\fR - supplies the key literally through the command line.\& The 39 | usual caveats about this making it visible to other processes on the system 40 | apply.\& 41 | .IP \(bu 4 42 | \fIenv:NAME\fR - read the key from the environment variable \fINAME\fR 43 | .IP \(bu 4 44 | \fIfile:PATH\fR - read the key from the file at \fIPATH\fR 45 | .PD 46 | .PP 47 | The file name at \fIINPUT_PATH\fR must follow the casync naming convention (namely, 48 | its 32 byte chunk ID in hexadecimal followed by .\&cacnk or .\&cacnk.\&enc).\& 49 | .PP 50 | If both \fIINPUT_PATH\fR and \fIOUTPUT_PATH\fR are supplied, the input file is 51 | encrypted and the output is stored at the supplied output path.\& If 52 | \fIOUTPUT_PATH\fR is \fB-\fR, the output will be written to standard out.\& 53 | .PP 54 | If only \fIINPUT_PATH\fR is supplied, \fIOUTPUT_PATH\fR is determined based on the 55 | input path.\& Namely, if the input file is a .\&cacnk file, the output file is the 56 | corresponding .\&cacnk.\&enc file in the same directory and vice versa.\& 57 | .PP 58 | If neither \fIINPUT_PATH\fR nor \fIOUTPUT_PATH\fR are supplied, csn-tool reads a list 59 | of files from standard input and encrypts each one in turn, automatically 60 | deriving the output path for each one.\& This is useful when encrypting an entire 61 | chunk store, for example: 62 | .PP 63 | .RS 4 64 | find .\&/default.\&castr/ -name *.\&cacnk | csn-tool crypt file:.\&/key 65 | .PP 66 | .RE 67 | In each case, if the output file already exists, it is not modified.\& 68 | .PP 69 | .SH BUG REPORTS 70 | .PP 71 | Please report bugs in casync-nano or errors in this manual page via GitHub 72 | (https://github.\&com/florolf/casync-nano/issues) or email (\fIfl@n621.\&de\fR).\& 73 | .PP 74 | .SH SEE ALSO 75 | \fIcsn\fR(1) 76 | .br 77 | -------------------------------------------------------------------------------- /patchdd.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "utils.h" 11 | #include "ui.h" 12 | 13 | #define TRANSFER_SIZE (1024 * 1024) 14 | 15 | static size_t memcmp_offset(uint8_t *a, uint8_t *b, size_t len) 16 | { 17 | size_t offset; 18 | 19 | for (offset = 0; offset < len; offset++) 20 | if (a[offset] != b[offset]) 21 | break; 22 | 23 | return offset; 24 | } 25 | 26 | int main(int argc, char **argv) 27 | { 28 | u_log_init(); 29 | 30 | if (argc < 3 || 31 | (argc >= 2 && (streq(argv[1], "-h") || streq(argv[1], "--help")))) { 32 | fprintf(stderr, "usage: %s src dst\n", argv[0]); 33 | return EXIT_FAILURE; 34 | } 35 | 36 | int src, dst; 37 | src = open(argv[1], O_RDONLY); 38 | if (src < 0) { 39 | u_log_errno("opening source '%s' failed", argv[1]); 40 | return EXIT_FAILURE; 41 | } 42 | 43 | dst = open(argv[2], O_RDWR); 44 | if (dst < 0) { 45 | u_log_errno("opening destination '%s' failed", argv[2]); 46 | return EXIT_FAILURE; 47 | } 48 | 49 | off_t src_size, dst_size; 50 | checked(fd_size(src, &src_size), return EXIT_FAILURE); 51 | checked(fd_size(dst, &dst_size), return EXIT_FAILURE); 52 | 53 | if (dst_size < src_size) { 54 | u_log(ERR, "target size (%llu) is smaller than source size (%llu)", 55 | (unsigned long long)dst_size, (unsigned long long)src_size); 56 | return EXIT_FAILURE; 57 | } 58 | 59 | if (src_size < dst_size) 60 | u_log(INFO, "target size (%llu) is larger than source size (%llu)", 61 | (unsigned long long)dst_size, (unsigned long long)src_size); 62 | 63 | time_t now = time_monotonic(); 64 | time_t start = now; 65 | 66 | uint8_t src_buf[TRANSFER_SIZE]; 67 | uint8_t dst_buf[TRANSFER_SIZE]; 68 | 69 | bool print_progress = isatty(STDERR_FILENO); 70 | progess_status_t progess_status = PROGRESS_STATUS_INIT; 71 | 72 | off_t offset = 0; 73 | off_t total_skipped = 0; 74 | while (offset < src_size) { 75 | size_t to_read = MIN(src_size - offset, TRANSFER_SIZE); 76 | 77 | if (preadall(src, src_buf, to_read, offset) < 0) 78 | return EXIT_FAILURE; 79 | 80 | if (preadall(dst, dst_buf, to_read, offset) < 0) 81 | return EXIT_FAILURE; 82 | 83 | size_t first_diff = memcmp_offset(src_buf, dst_buf, to_read); 84 | total_skipped += first_diff; 85 | 86 | if (pwriteall(dst, &src_buf[first_diff], to_read - first_diff, offset + first_diff) < 0) 87 | return EXIT_FAILURE; 88 | 89 | offset += to_read; 90 | 91 | if (print_progress) 92 | show_progress((100ull * offset) / src_size, &progess_status); 93 | } 94 | 95 | if (fsync(dst) < 0) { 96 | u_log_errno("fsync on target failed"); 97 | return EXIT_FAILURE; 98 | } 99 | 100 | now = time_monotonic(); 101 | printf("synchronization finished after %u seconds\n", (unsigned int)(now - start)); 102 | printf("skipped a total of %llu bytes (%.2f%% of the input)\n", 103 | (unsigned long long)total_skipped, 104 | (double)((100.0 * total_skipped)/src_size)); 105 | 106 | return EXIT_SUCCESS; 107 | } 108 | -------------------------------------------------------------------------------- /store.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "utils.h" 7 | #include "chunk.h" 8 | 9 | #include "store.h" 10 | 11 | struct substore { 12 | struct store *s; 13 | bool active; 14 | uint32_t hits; 15 | }; 16 | 17 | struct store_chain { 18 | struct store s; 19 | 20 | struct substore *stores; 21 | size_t n_stores, stores_space; 22 | 23 | uint32_t queries; 24 | }; 25 | 26 | static ssize_t store_chain_get_chunk(struct store *s, uint8_t *id, uint8_t *out, size_t out_max) 27 | { 28 | struct store_chain *sc = (struct store_chain*) s; 29 | 30 | char chunk_name[CHUNK_ID_STRLEN]; 31 | chunk_format_id(chunk_name, id); 32 | 33 | sc->queries++; 34 | 35 | for (size_t i = 0; i < sc->n_stores; i++) { 36 | struct substore *substore = &sc->stores[i]; 37 | 38 | if (!substore->active) 39 | continue; 40 | 41 | ssize_t ret; 42 | ret = store_get_chunk(substore->s, id, out, out_max); 43 | if (ret < 0) { 44 | u_log(DEBUG, "store '%s' has failed, deactivating it", 45 | store_get_name(substore->s)); 46 | 47 | substore->active = false; 48 | continue; 49 | } else if (ret > 0) { 50 | substore->hits++; 51 | 52 | u_log(DEBUG, "chunk %s found in store '%s'", 53 | chunk_name, store_get_name(substore->s)); 54 | 55 | return ret; 56 | } 57 | } 58 | 59 | u_log(WARN, "chunk %s not found in any store", chunk_name); 60 | 61 | return 0; 62 | } 63 | 64 | static void store_chain_free(struct store *s) 65 | { 66 | struct store_chain *sc = (struct store_chain*) s; 67 | 68 | u_log(INFO, "%"PRIu32" queries received by %s", 69 | sc->queries, store_get_name(s)); 70 | for (size_t i = 0; i < sc->n_stores; i++) 71 | u_log(INFO, " %"PRIu32" answered by store %s", 72 | sc->stores[i].hits, 73 | store_get_name(sc->stores[i].s)); 74 | 75 | 76 | for (size_t i = 0; i < sc->n_stores; i++) 77 | store_free(sc->stores[i].s); 78 | 79 | free(sc->stores); 80 | free(sc); 81 | } 82 | 83 | struct store_chain *store_chain_new(size_t size_hint) 84 | { 85 | static int chain_ctr = 0; 86 | 87 | struct store_chain *sc; 88 | sc = calloc(1, sizeof(*sc)); 89 | u_notnull(sc, return NULL); 90 | 91 | sc->n_stores = 0; 92 | if (size_hint) 93 | sc->stores_space = size_hint; 94 | else 95 | sc->stores_space = 1; 96 | 97 | sc->stores = calloc(sc->stores_space, sizeof(*sc->stores)); 98 | u_notnull(sc->stores, goto err_sc); 99 | 100 | snprintf(sc->s.name, sizeof(sc->s.name), "chain%d", chain_ctr); 101 | sc->s.get_chunk = store_chain_get_chunk; 102 | sc->s.free = store_chain_free; 103 | 104 | chain_ctr++; 105 | return sc; 106 | 107 | err_sc: 108 | free(sc); 109 | 110 | return NULL; 111 | } 112 | 113 | int store_chain_append(struct store_chain *sc, struct store *s) 114 | { 115 | if (sc->n_stores >= sc->stores_space) { 116 | struct substore *new_stores; 117 | size_t new_space; 118 | 119 | new_space = sc->stores_space * 2; 120 | new_stores = realloc(sc->stores, new_space * sizeof(*sc->stores)); 121 | u_notnull(new_stores, return -1); 122 | 123 | sc->stores = new_stores; 124 | sc->stores_space = new_space; 125 | } 126 | 127 | struct substore *substore = &sc->stores[sc->n_stores]; 128 | substore->s = s; 129 | substore->active = true; 130 | 131 | sc->n_stores++; 132 | 133 | return 0; 134 | } 135 | -------------------------------------------------------------------------------- /utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "log.h" 10 | 11 | #define u_nop() do {} while (0) 12 | 13 | #ifdef __GNUC__ 14 | #define u_likely(expr) (__builtin_expect(!!(expr), 1)) 15 | #define u_unlikely(expr) (__builtin_expect(!!(expr), 0)) 16 | #else 17 | #define u_likely(expr) (expr) 18 | #define u_unlikely(expr) (expr) 19 | #endif 20 | 21 | #define u_assert_se(expr) do { \ 22 | if (u_unlikely(!(expr))) { \ 23 | u_log(ERR, "assertion failed: %s", #expr); \ 24 | abort(); \ 25 | } \ 26 | } while (0) 27 | 28 | #ifdef NDEBUG 29 | #define u_assert(expr) u_nop() 30 | #else 31 | #define u_assert(expr) u_assert_se(expr) 32 | #endif 33 | 34 | #define u_assert_not_reached() do { \ 35 | u_log(ERR, "reached location marked as unreachable"); \ 36 | abort(); \ 37 | } while (0) 38 | 39 | #define u_build_assert(expr) ((void)sizeof(char[1 - 2*!(expr)])) 40 | 41 | #define u_notnull(expr, action) do { \ 42 | if (u_unlikely((expr) == NULL)) { \ 43 | u_log(ERR, "%s is NULL", #expr); \ 44 | action; \ 45 | } \ 46 | } while (0) 47 | 48 | #define streq(a, b) (strcmp((a), (b)) == 0) 49 | #define strneq(a, b, n) (strncmp((a), (b), (n)) == 0) 50 | #define startswith(a, b) (strneq((a), (b), strlen((b)))) 51 | 52 | static inline uint32_t rol32(uint32_t x, size_t n) 53 | { 54 | n = n % 32; 55 | if (!n) 56 | return x; 57 | 58 | return (x << n) | (x >> (32 - n)); 59 | } 60 | 61 | #define MIN(a, b) (((a) < (b)) ? (a) : (b)) 62 | #define MAX(a, b) (((a) > (b)) ? (a) : (b)) 63 | 64 | #define ARRAY_SIZE(x) (sizeof((x))/sizeof(*(x))) 65 | 66 | #ifndef TESTING 67 | #define static_testexpose static 68 | #else 69 | #define static_testexpose 70 | #endif 71 | 72 | #define must_check __attribute__ ((warn_unused_result)) 73 | 74 | static inline uint64_t unp64le(const uint8_t *data) { 75 | return ((uint64_t)data[7] << 56) | 76 | ((uint64_t)data[6] << 48) | 77 | ((uint64_t)data[5] << 40) | 78 | ((uint64_t)data[4] << 32) | 79 | ((uint64_t)data[3] << 24) | 80 | ((uint64_t)data[2] << 16) | 81 | ((uint64_t)data[1] << 8) | 82 | ((uint64_t)data[0] << 0); 83 | } 84 | 85 | static inline uint32_t unp32le(const uint8_t *data) 86 | { 87 | return ((uint32_t)data[3] << 24) | 88 | ((uint32_t)data[2] << 16) | 89 | ((uint32_t)data[1] << 8) | 90 | ((uint32_t)data[0] << 0); 91 | } 92 | 93 | static inline void p32le(uint8_t *data, uint32_t v) 94 | { 95 | data[0] = (v >> 0) & 0xff; 96 | data[1] = (v >> 8) & 0xff; 97 | data[2] = (v >> 16) & 0xff; 98 | data[3] = (v >> 24) & 0xff; 99 | } 100 | 101 | must_check int readall(int fd, uint8_t *buf, size_t len); 102 | must_check int preadall(int fd, uint8_t *buf, size_t len, off_t offset); 103 | must_check int writeall(int fd, const uint8_t *buf, size_t len); 104 | must_check int pwriteall(int fd, const uint8_t *buf, size_t len, off_t offset); 105 | void *slurp_file(const char *path, bool text, off_t *size_out); 106 | 107 | #define checked(expr, action) do { \ 108 | if ((expr) < 0) { \ 109 | u_log(DEBUG, "checked statement failed"); \ 110 | action; \ 111 | } \ 112 | } while (0) 113 | 114 | int fd_size(int fd, off_t *size_out); 115 | time_t time_monotonic(void); 116 | 117 | int parse_hex(uint8_t *out, const char *in); 118 | void chomp(char *s); 119 | -------------------------------------------------------------------------------- /target.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "utils.h" 10 | #include "chunker.h" 11 | #include "index.h" 12 | 13 | #include "target.h" 14 | 15 | static void target_add_chunk(struct target *t, size_t len, off_t offset, const uint8_t *id) 16 | { 17 | if (!t->queryable) 18 | return; 19 | 20 | struct index_entry *e; 21 | e = index_query(&t->idx, id); 22 | if (!e) { 23 | e = index_insert(&t->idx, offset, len, id); 24 | if (!e) 25 | u_log(WARN, "inserting chunk into index failed"); 26 | } 27 | } 28 | 29 | int target_check_chunk(struct target *t, uint8_t *tmp, size_t len, off_t offset, const uint8_t *id) 30 | { 31 | uint8_t calculated_id[CHUNK_ID_LEN]; 32 | 33 | if (preadall(t->fd, tmp, len, offset) < 0) { 34 | u_log(WARN, "reading chunk failed"); 35 | return -1; 36 | } 37 | 38 | chunk_calculate_id(tmp, len, calculated_id); 39 | if (memcmp(calculated_id, id, CHUNK_ID_LEN)) 40 | return 0; 41 | 42 | target_add_chunk(t, len, offset, id); 43 | 44 | return 1; 45 | } 46 | 47 | static ssize_t target_get_chunk(struct store *s, uint8_t *id, uint8_t *out, size_t out_max) 48 | { 49 | struct target *t = (struct target*) s; 50 | 51 | if (!t->queryable) 52 | return 0; 53 | 54 | struct index_entry *e; 55 | e = index_query(&t->idx, id); 56 | 57 | if (!e) 58 | return 0; 59 | 60 | if (e->len > out_max) { 61 | u_log(ERR, "output buffer to small (%zu) to contain cunk of size %"PRIu32, 62 | out_max, e->len); 63 | return -1; 64 | } 65 | 66 | if (preadall(t->fd, out, e->len, e->start) < 0) { 67 | u_log(ERR, "reading chunk failed"); 68 | return -1; 69 | } 70 | 71 | return e->len; 72 | } 73 | 74 | int target_write(struct target *t, const uint8_t *data, size_t len, off_t offset, const uint8_t *id) 75 | { 76 | u_assert(t); 77 | u_assert(id); 78 | 79 | int ret; 80 | ret = pwriteall(t->fd, data, len, offset); 81 | if (ret < 0) { 82 | u_log_errno("writing %zu bytes to target failed", len); 83 | return -1; 84 | } 85 | 86 | target_add_chunk(t, len, offset, id); 87 | 88 | return 0; 89 | } 90 | 91 | struct target *target_new(const char *path) 92 | { 93 | u_assert(path); 94 | 95 | struct target *t; 96 | t = calloc(1, sizeof(*t)); 97 | u_notnull(t, return NULL); 98 | 99 | t->fd = open(path, O_RDWR); 100 | if (t->fd < 0) { 101 | u_log_errno("opening target '%s' failed", path); 102 | goto err_target; 103 | } 104 | 105 | snprintf(t->s.name, sizeof(t->s.name), "target:%s", path); 106 | t->s.get_chunk = target_get_chunk; 107 | 108 | /* Targets have a lifetime that is decoupled from the store they 109 | * represent. Don't free them when the store falls out of use. 110 | * 111 | * Conversely, this means that a target must not be destroyed while the 112 | * store is still in use. 113 | */ 114 | t->s.free = NULL; 115 | 116 | return t; 117 | 118 | err_target: 119 | free(t); 120 | 121 | return NULL; 122 | } 123 | 124 | struct store *target_as_store(struct target *t, size_t chunk_estimate) 125 | { 126 | if (t->queryable) 127 | return &t->s; 128 | 129 | u_log(DEBUG, "initializing index with an estimated %zu chunks", chunk_estimate); 130 | if (index_init(&t->idx, chunk_estimate) < 0) { 131 | u_log(ERR, "initializing index failed"); 132 | return NULL; 133 | } 134 | 135 | t->queryable = true; 136 | return &t->s; 137 | } 138 | -------------------------------------------------------------------------------- /test/sha_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "sha.h" 10 | #include "utils.h" 11 | #include "xorshift32.h" 12 | 13 | static int kcapi_create(void **state) 14 | { 15 | (void) state; 16 | 17 | assert_int_equal(sha_kcapi_init(NULL), 0); 18 | 19 | return 0; 20 | } 21 | 22 | static int kcapi_destroy(void **state) 23 | { 24 | (void) state; 25 | 26 | sha_kcapi_deinit(); 27 | 28 | return 0; 29 | } 30 | 31 | static void test_empty(void **state) 32 | { 33 | (void) state; 34 | 35 | uint8_t md[SHA_LEN]; 36 | assert_int_equal(sha_once(md /*dummy*/, 0, md), 0); 37 | 38 | uint8_t md_target[SHA_LEN] = { 39 | 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 40 | 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24, 41 | 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 42 | 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55 43 | }; 44 | 45 | assert_memory_equal(md, md_target, SHA_LEN); 46 | } 47 | 48 | static void test_short(void **state) 49 | { 50 | (void) state; 51 | 52 | const char *input = "hello, world"; 53 | 54 | uint8_t md[SHA_LEN]; 55 | assert_int_equal(sha_once((const uint8_t*)input, strlen(input), md), 0); 56 | 57 | uint8_t md_target[SHA_LEN] = { 58 | 0x09, 0xca, 0x7e, 0x4e, 0xaa, 0x6e, 0x8a, 0xe9, 59 | 0xc7, 0xd2, 0x61, 0x16, 0x71, 0x29, 0x18, 0x48, 60 | 0x83, 0x64, 0x4d, 0x07, 0xdf, 0xba, 0x7c, 0xbf, 61 | 0xbc, 0x4c, 0x8a, 0x2e, 0x08, 0x36, 0x0d, 0x5b, 62 | }; 63 | 64 | assert_memory_equal(md, md_target, SHA_LEN); 65 | } 66 | 67 | static void test_multi(void **state) 68 | { 69 | (void) state; 70 | 71 | const char *input = "hello, world"; 72 | 73 | uint8_t md[SHA_LEN]; 74 | assert_int_equal(sha_once((const uint8_t*)input, strlen(input), md), 0); 75 | 76 | uint8_t md_target[SHA_LEN] = { 77 | 0x09, 0xca, 0x7e, 0x4e, 0xaa, 0x6e, 0x8a, 0xe9, 78 | 0xc7, 0xd2, 0x61, 0x16, 0x71, 0x29, 0x18, 0x48, 79 | 0x83, 0x64, 0x4d, 0x07, 0xdf, 0xba, 0x7c, 0xbf, 80 | 0xbc, 0x4c, 0x8a, 0x2e, 0x08, 0x36, 0x0d, 0x5b, 81 | }; 82 | 83 | assert_memory_equal(md, md_target, SHA_LEN); 84 | 85 | const char *input2 = "another test string with the same kcapi context"; 86 | assert_int_equal(sha_once((const uint8_t*)input2, strlen(input2), md), 0); 87 | 88 | uint8_t md_target2[SHA_LEN] = { 89 | 0xa7, 0x75, 0xb2, 0x86, 0x71, 0xcc, 0x31, 0x6a, 90 | 0xa8, 0xf2, 0x11, 0x58, 0xcb, 0x4d, 0x5e, 0xec, 91 | 0xdc, 0x49, 0xe4, 0x5f, 0xf3, 0xd3, 0xd7, 0x15, 92 | 0x2e, 0xf1, 0x95, 0x0c, 0xef, 0xde, 0xae, 0x80, 93 | }; 94 | 95 | assert_memory_equal(md, md_target2, SHA_LEN); 96 | } 97 | 98 | static void test_long(void **state) 99 | { 100 | (void) state; 101 | 102 | static uint8_t input[128 * 1024]; 103 | struct xorshift32_state rng = XORSHIFT32_INIT(0xaffed00f); 104 | xorshift32_fill(&rng, input, sizeof(input)); 105 | 106 | uint8_t md[SHA_LEN]; 107 | assert_int_equal(sha_once((const uint8_t*)input, sizeof(input), md), 0); 108 | 109 | uint8_t md_target[SHA_LEN] = { 110 | 0xb3, 0x6a, 0x30, 0xca, 0x5b, 0x22, 0xf4, 0x30, 111 | 0x1a, 0x0e, 0x95, 0xf9, 0xe4, 0x13, 0x10, 0x98, 112 | 0x7e, 0xa3, 0xb0, 0xa6, 0xd2, 0x66, 0xee, 0x61, 113 | 0x13, 0x80, 0x19, 0xd0, 0x31, 0xff, 0x2b, 0xd3 114 | }; 115 | 116 | assert_memory_equal(md, md_target, SHA_LEN); 117 | } 118 | 119 | int main(void) 120 | { 121 | const struct CMUnitTest openssl_tests[] = { 122 | // test userspace crypto mode by not initializing kcapi 123 | cmocka_unit_test(test_empty), 124 | }; 125 | 126 | const struct CMUnitTest kcapi_tests[] = { 127 | cmocka_unit_test_setup_teardown(test_empty, kcapi_create, kcapi_destroy), 128 | cmocka_unit_test_setup_teardown(test_short, kcapi_create, kcapi_destroy), 129 | cmocka_unit_test_setup_teardown(test_multi, kcapi_create, kcapi_destroy), 130 | cmocka_unit_test_setup_teardown(test_long, kcapi_create, kcapi_destroy), 131 | }; 132 | 133 | u_log_init(); 134 | 135 | int ret; 136 | ret = cmocka_run_group_tests_name("sha-openssl", openssl_tests, NULL, NULL); 137 | if (ret) 138 | return ret; 139 | 140 | ret = cmocka_run_group_tests_name("sha-kcapi", kcapi_tests, NULL, NULL); 141 | if (ret) 142 | return ret; 143 | } 144 | -------------------------------------------------------------------------------- /csn-bench.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "chunker.h" 8 | #include "utils.h" 9 | #include "test/xorshift32.h" 10 | 11 | static uint64_t time_delta_ms(struct timespec *start, struct timespec *end) 12 | { 13 | uint64_t ret = end->tv_sec - start->tv_sec; 14 | ret *= 1000ull; 15 | ret += ((int32_t)end->tv_nsec - (int32_t)start->tv_nsec) / 1000000; 16 | 17 | return ret; 18 | } 19 | 20 | static int bench_hash(int argc, char **argv) 21 | { 22 | const char *env; 23 | 24 | if ((env = getenv("CSN_KCAPI_DRIVER")) != NULL) { 25 | if (sha_kcapi_init(env) < 0) { 26 | u_log(ERR, "kcapi init failed"); 27 | return EXIT_FAILURE; 28 | } 29 | 30 | u_log(INFO, "kcapi enabled with driver '%s'", env); 31 | } 32 | 33 | size_t buf_sz = CHUNKER_SIZE_AVG_DEFAULT * 4; 34 | uint8_t *buf; 35 | 36 | if (posix_memalign((void**)&buf, 4096, buf_sz)) { 37 | u_log(ERR, "failed to allocate buffer"); 38 | return EXIT_FAILURE; 39 | } 40 | 41 | // touch all memory to ensure we don't fault later 42 | memset(buf, 0, buf_sz); 43 | 44 | struct xorshift32_state rng = XORSHIFT32_INIT(0xdeadbeef); 45 | 46 | struct timespec start, end; 47 | for (size_t i = 0; i < 3; i++) { 48 | uint8_t hash_out[CHUNK_ID_LEN]; 49 | 50 | size_t count = 0; 51 | printf("hashing %zu byte blocks for about 10 seconds... ", buf_sz); 52 | fflush(stdout); 53 | 54 | u_assert_se(clock_gettime(CLOCK_MONOTONIC, &start) == 0); 55 | while (1) { 56 | for (size_t inner = 0; inner < 100; inner++) { 57 | xorshift32_fill(&rng, buf, buf_sz); 58 | sha_once(buf, buf_sz, hash_out); 59 | } 60 | count += 100; 61 | 62 | u_assert_se(clock_gettime(CLOCK_MONOTONIC, &end) == 0); 63 | if (end.tv_sec - start.tv_sec >= 10) 64 | break; 65 | } 66 | 67 | uint64_t delta = time_delta_ms(&start, &end); 68 | double rate = (double)count * buf_sz / (delta / 1000.0) / 1024.0 / 1024.0; 69 | printf("%zu iterations in %"PRIu64" ms -> %.2f MiB/s\n", count, delta, rate); 70 | 71 | buf_sz /= 4; 72 | } 73 | 74 | return EXIT_SUCCESS; 75 | } 76 | 77 | static int bench_chunker(int argc, char **argv) 78 | { 79 | size_t buf_sz = 1024*1024; 80 | uint8_t *buf = malloc(buf_sz); 81 | 82 | // touch all memory to ensure we don't fault later 83 | memset(buf, 0, buf_sz); 84 | 85 | struct xorshift32_state rng = XORSHIFT32_INIT(0xdeadbeef); 86 | xorshift32_fill(&rng, buf, buf_sz); 87 | 88 | struct chunker_params cp; 89 | chunker_params_set(&cp, 90 | CHUNKER_SIZE_AVG_DEFAULT / 4, 91 | CHUNKER_SIZE_AVG_DEFAULT, 92 | CHUNKER_SIZE_AVG_DEFAULT * 4); 93 | 94 | struct chunker c; 95 | chunker_init(&c, &cp); 96 | 97 | struct timespec start, end; 98 | printf("chunking %zu bytes for about 10 seconds... ", buf_sz); 99 | fflush(stdout); 100 | 101 | size_t count = 0; 102 | u_assert_se(clock_gettime(CLOCK_MONOTONIC, &start) == 0); 103 | while (1) { 104 | xorshift32_fill(&rng, buf, buf_sz); 105 | 106 | size_t buf_fill = buf_sz; 107 | while (buf_fill) { 108 | size_t chunker_ret; 109 | 110 | chunker_ret = chunker_scan(&c, buf, buf_fill); 111 | if (chunker_ret == (size_t)-1) 112 | break; 113 | 114 | uint8_t chunk_id[CHUNK_ID_LEN]; 115 | chunker_get_id(&c, chunk_id); 116 | 117 | chunker_reset(&c); 118 | memmove(buf, &buf[chunker_ret], buf_fill - chunker_ret); 119 | buf_fill -= chunker_ret; 120 | } 121 | 122 | count++; 123 | 124 | u_assert_se(clock_gettime(CLOCK_MONOTONIC, &end) == 0); 125 | if (end.tv_sec - start.tv_sec >= 10) 126 | break; 127 | } 128 | 129 | uint64_t delta = time_delta_ms(&start, &end); 130 | double rate = (double)count * buf_sz / (delta / 1000.0) / 1024.0 / 1024.0; 131 | printf("%zu iterations in %"PRIu64" ms -> %.2f MiB/s\n", count, delta, rate); 132 | 133 | return EXIT_SUCCESS; 134 | } 135 | 136 | static struct { 137 | const char *name, *desc; 138 | int (*fn)(int argc, char **argv); 139 | } benchmarks[] = { 140 | {"hash", "Benchmark chunk hashing operations", bench_hash}, 141 | {"chunker", "Benchmark chunking operations", bench_chunker}, 142 | {0} 143 | }; 144 | 145 | __attribute__((noreturn)) static void usage(const char *prog, bool error) 146 | { 147 | FILE *f = error ? stderr : stdout; 148 | 149 | fprintf(f, "usage: %s benchmark\n", prog); 150 | fprintf(f, "available benchmarks:\n"); 151 | 152 | for (size_t i = 0; benchmarks[i].name; i++) 153 | fprintf(f, " - %s: %s\n", benchmarks[i].name, benchmarks[i].desc); 154 | 155 | exit(error ? EXIT_FAILURE : EXIT_SUCCESS); 156 | } 157 | 158 | int main(int argc, char **argv) 159 | { 160 | u_log_init(); 161 | 162 | if (argc < 2) 163 | usage(argv[0], true); 164 | 165 | if (argc >= 2 && (streq(argv[1], "-h") || streq(argv[1], "--help"))) 166 | usage(argv[0], false); 167 | 168 | for (size_t i = 0; benchmarks[i].name; i++) 169 | if (streq(benchmarks[i].name, argv[1])) 170 | return benchmarks[i].fn(argc - 1, argv + 1); 171 | 172 | usage(argv[0], false); 173 | } 174 | -------------------------------------------------------------------------------- /csn-tool.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "encrypt.h" 10 | #include "chunk.h" 11 | #include "utils.h" 12 | 13 | static int chunk_id_from_path(uint8_t id[CHUNK_ID_LEN], const char *path) 14 | { 15 | const char *basename = path; 16 | 17 | char *p = strrchr(basename, '/'); 18 | if (p) 19 | basename = p+1; 20 | 21 | size_t name_len = strlen(basename); 22 | if (name_len != 70 && name_len != 74) { 23 | u_log(ERR, "chunk basename '%s' has wrong length, expected 70 or 74, got %zu", basename, name_len); 24 | return -1; 25 | } 26 | 27 | if (!streq(&basename[name_len-6], ".cacnk") && !streq(&basename[name_len-10], ".cacnk.enc")) { 28 | u_log(ERR, "chunk basename '%s' does not end with '.cacnk' or '.cacnk.enc'", basename); 29 | return -1; 30 | } 31 | 32 | char id_buf[65]; 33 | memcpy(id_buf, basename, 64); 34 | id_buf[64] = 0; 35 | 36 | return parse_hex(id, id_buf); 37 | } 38 | 39 | static int encrypt_one(struct encrypt_ctx *ec, const char *input_path, const char *output_path) 40 | { 41 | int ret = -1; 42 | 43 | uint8_t chunk_id[CHUNK_ID_LEN]; 44 | if (chunk_id_from_path(chunk_id, input_path) < 0) { 45 | u_log(ERR, "failed to derive chunk id from path"); 46 | return -1; 47 | } 48 | 49 | char path_buf[PATH_MAX]; 50 | if (!output_path){ 51 | size_t len = strlen(input_path); 52 | if (len >= 6 && streq(&input_path[len-6], ".cacnk")) { 53 | if (len > PATH_MAX - 5) { 54 | u_log(ERR, "input path too long"); 55 | return -1; 56 | } 57 | 58 | strcpy(path_buf, input_path); 59 | strcpy(&path_buf[len], ".enc"); 60 | output_path = path_buf; 61 | } else if (len >= 10 && streq(&input_path[len-10], ".cacnk.enc")) { 62 | if (len > PATH_MAX - 1) { 63 | u_log(ERR, "input path too long"); 64 | return -1; 65 | } 66 | 67 | strcpy(path_buf, input_path); 68 | path_buf[len-4] = 0; 69 | output_path = path_buf; 70 | } else { 71 | u_log(ERR, "unsupported input path format"); 72 | return -1; 73 | } 74 | } 75 | 76 | if (access(output_path, F_OK) == 0) 77 | return 0; 78 | 79 | // this uses the first 24 bytes of chunk_id 80 | if (encrypt_restart(ec, chunk_id) < 0) { 81 | u_log(ERR, "encrypt_restart failed"); 82 | return -1; 83 | } 84 | 85 | off_t chunk_size; 86 | uint8_t *buf = slurp_file(input_path, false, &chunk_size); 87 | if (!buf) { 88 | u_log(ERR, "failed to load source chunk"); 89 | return -1; 90 | } 91 | 92 | if (encrypt_do(ec, buf, buf, chunk_size)) { 93 | u_log(ERR, "encrypting chunk failed"); 94 | goto out_buf; 95 | } 96 | 97 | int fd; 98 | if (streq(output_path, "-")) { 99 | fd = STDOUT_FILENO; 100 | } else { 101 | fd = open(output_path, O_WRONLY | O_TRUNC | O_CREAT, 0444); 102 | 103 | if (fd < 0) { 104 | u_log_errno("failed to create output file"); 105 | goto out_buf; 106 | } 107 | } 108 | 109 | if (writeall(fd, buf, chunk_size) < 0) { 110 | u_log(ERR, "failed to write to output file"); 111 | goto out_fd; 112 | } 113 | 114 | ret = 0; 115 | 116 | out_fd: 117 | if (fd != STDOUT_FILENO) 118 | close(fd); 119 | out_buf: 120 | free(buf); 121 | 122 | return ret; 123 | } 124 | 125 | static int do_crypt(int argc, char **argv) 126 | { 127 | int ret = EXIT_FAILURE; 128 | 129 | if (argc < 2 || argc > 4 || 130 | streq(argv[1], "-h") || streq(argv[1], "--help")) { 131 | fprintf(stderr, "usage: crypt keyspec [/path/to/input] [/path/to/output]\n"); 132 | return EXIT_FAILURE; 133 | } 134 | 135 | uint8_t key[32]; 136 | if (encrypt_parse_keyspec(key, argv[1]) < 0) { 137 | u_log(ERR, "failed to parse keyspec"); 138 | return EXIT_FAILURE; 139 | } 140 | 141 | struct encrypt_ctx ec; 142 | if (encrypt_init(&ec, key) < 0) { 143 | u_log(ERR, "encrypt_init failed"); 144 | return EXIT_FAILURE; 145 | } 146 | 147 | if (argc == 2) { 148 | char line[PATH_MAX]; 149 | while ((fgets(line, sizeof(line), stdin))) { 150 | chomp(line); 151 | if (encrypt_one(&ec, line, NULL) < 0) 152 | goto out; 153 | } 154 | 155 | ret = EXIT_SUCCESS; 156 | } else { 157 | char *input_path = argv[2]; 158 | char *output_path = NULL; 159 | if (argc > 3) 160 | output_path = argv[3]; 161 | 162 | if (encrypt_one(&ec, input_path, output_path) == 0) 163 | ret = EXIT_SUCCESS; 164 | } 165 | 166 | out: 167 | encrypt_close(&ec); 168 | 169 | return ret; 170 | } 171 | 172 | static struct { 173 | const char *name, *desc; 174 | int (*fn)(int argc, char **argv); 175 | } cmds[] = { 176 | {"crypt", "encrypt/decrypt a single chunk", do_crypt}, 177 | {0} 178 | }; 179 | 180 | __attribute__((noreturn)) static void usage(const char *prog, bool error) 181 | { 182 | FILE *f = error ? stderr : stdout; 183 | 184 | fprintf(f, "usage: %s command\n", prog); 185 | fprintf(f, "available commands:\n"); 186 | 187 | for (size_t i = 0; cmds[i].name; i++) 188 | fprintf(f, " - %s: %s\n", cmds[i].name, cmds[i].desc); 189 | 190 | exit(error ? EXIT_FAILURE : EXIT_SUCCESS); 191 | } 192 | 193 | int main(int argc, char **argv) 194 | { 195 | u_log_init(); 196 | 197 | if (argc < 2) 198 | usage(argv[0], true); 199 | 200 | if (argc >= 2 && (streq(argv[1], "-h") || streq(argv[1], "--help"))) 201 | usage(argv[0], false); 202 | 203 | for (size_t i = 0; cmds[i].name; i++) 204 | if (streq(cmds[i].name, argv[1])) 205 | return cmds[i].fn(argc - 1, argv + 1); 206 | 207 | usage(argv[0], false); 208 | } 209 | -------------------------------------------------------------------------------- /encrypt.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "utils.h" 10 | 11 | #include "encrypt.h" 12 | 13 | /* HChaCha20 implementation adapted from Monocypher v4.0.1 14 | * (https://monocypher.org), licensed CC0-1.0 / BSD-2-Clause. */ 15 | 16 | #define WIPE_BUFFER(buffer) crypto_wipe(buffer, sizeof(buffer)) 17 | 18 | static void unp32le_buf(uint32_t *dst, const uint8_t *src, size_t size) 19 | { 20 | for (size_t i = 0; i < size; i++) 21 | dst[i] = unp32le(src + i*4); 22 | } 23 | 24 | static void p32le_buf(uint8_t *dst, const uint32_t *src, size_t size) 25 | { 26 | for (size_t i = 0; i < size; i++) 27 | p32le(dst + i*4, src[i]); 28 | } 29 | 30 | static void crypto_wipe(void *secret, size_t size) 31 | { 32 | volatile uint8_t *v_secret = (uint8_t*)secret; 33 | 34 | for (size_t i = 0; i < size; i++) 35 | v_secret[i] = 0; 36 | } 37 | 38 | #define QUARTERROUND(a, b, c, d) \ 39 | a += b; d = rol32(d ^ a, 16); \ 40 | c += d; b = rol32(b ^ c, 12); \ 41 | a += b; d = rol32(d ^ a, 8); \ 42 | c += d; b = rol32(b ^ c, 7) 43 | 44 | static void chacha20_rounds(uint32_t out[16], const uint32_t in[16]) 45 | { 46 | // The temporary variables make Chacha20 10% faster. 47 | uint32_t t0 = in[ 0]; uint32_t t1 = in[ 1]; uint32_t t2 = in[ 2]; uint32_t t3 = in[ 3]; 48 | uint32_t t4 = in[ 4]; uint32_t t5 = in[ 5]; uint32_t t6 = in[ 6]; uint32_t t7 = in[ 7]; 49 | uint32_t t8 = in[ 8]; uint32_t t9 = in[ 9]; uint32_t t10 = in[10]; uint32_t t11 = in[11]; 50 | uint32_t t12 = in[12]; uint32_t t13 = in[13]; uint32_t t14 = in[14]; uint32_t t15 = in[15]; 51 | 52 | for (size_t i = 0; i < 10; i++) { // 20 rounds, 2 rounds per loop. 53 | QUARTERROUND(t0, t4, t8 , t12); // column 0 54 | QUARTERROUND(t1, t5, t9 , t13); // column 1 55 | QUARTERROUND(t2, t6, t10, t14); // column 2 56 | QUARTERROUND(t3, t7, t11, t15); // column 3 57 | QUARTERROUND(t0, t5, t10, t15); // diagonal 0 58 | QUARTERROUND(t1, t6, t11, t12); // diagonal 1 59 | QUARTERROUND(t2, t7, t8 , t13); // diagonal 2 60 | QUARTERROUND(t3, t4, t9 , t14); // diagonal 3 61 | } 62 | out[ 0] = t0; out[ 1] = t1; out[ 2] = t2; out[ 3] = t3; 63 | out[ 4] = t4; out[ 5] = t5; out[ 6] = t6; out[ 7] = t7; 64 | out[ 8] = t8; out[ 9] = t9; out[10] = t10; out[11] = t11; 65 | out[12] = t12; out[13] = t13; out[14] = t14; out[15] = t15; 66 | } 67 | 68 | static_testexpose void hchacha20(uint8_t out[32], const uint8_t key[32], const uint8_t in[16]) 69 | { 70 | static const uint8_t *chacha20_constant = (const uint8_t*)"expand 32-byte k"; // 16 bytes 71 | 72 | uint32_t block[16]; 73 | unp32le_buf(block , chacha20_constant, 4); 74 | unp32le_buf(block + 4, key , 8); 75 | unp32le_buf(block + 12, in , 4); 76 | 77 | chacha20_rounds(block, block); 78 | 79 | // prevent reversal of the rounds by revealing only half of the buffer. 80 | p32le_buf(out , block , 4); // constant 81 | p32le_buf(out+16, block+12, 4); // counter and nonce 82 | WIPE_BUFFER(block); 83 | } 84 | 85 | /* End of Monocypher-based code. */ 86 | 87 | int encrypt_init(struct encrypt_ctx *ctx, const uint8_t key[static 32]) 88 | { 89 | ctx->cipher = EVP_CIPHER_CTX_new(); 90 | if (!ctx->cipher) { 91 | u_log(ERR, "failed to create EVP_CIPHER_CTX"); 92 | return -1; 93 | } 94 | 95 | memcpy(ctx->key, key, 32); 96 | 97 | return 0; 98 | } 99 | 100 | int encrypt_restart(struct encrypt_ctx *ctx, const uint8_t nonce[static 24]) 101 | { 102 | int ret = -1; 103 | 104 | uint8_t subkey[32]; 105 | hchacha20(subkey, ctx->key, &nonce[0]); 106 | 107 | uint8_t ctr_nonce[16]; 108 | memset(&ctr_nonce[0], 0, 8); 109 | memcpy(&ctr_nonce[8], &nonce[16], 8); 110 | 111 | if (!EVP_CIPHER_CTX_reset((EVP_CIPHER_CTX*)ctx->cipher)) { 112 | u_log(ERR, "EVP_CIPHER_CTX_reset failed"); 113 | goto out; 114 | } 115 | 116 | if (!EVP_CipherInit((EVP_CIPHER_CTX*)ctx->cipher, EVP_chacha20(), subkey, ctr_nonce, 1)) { 117 | u_log(ERR, "EVP_CipherInit failed"); 118 | goto out; 119 | } 120 | 121 | ret = 0; 122 | 123 | out: 124 | WIPE_BUFFER(subkey); 125 | WIPE_BUFFER(ctr_nonce); 126 | 127 | return ret; 128 | } 129 | 130 | void encrypt_close(struct encrypt_ctx *ctx) 131 | { 132 | EVP_CIPHER_CTX_free((EVP_CIPHER_CTX*)ctx->cipher); 133 | 134 | crypto_wipe(ctx, sizeof(*ctx)); 135 | } 136 | 137 | int encrypt_do(struct encrypt_ctx *ctx, uint8_t *out, const uint8_t *in, size_t len) 138 | { 139 | if (!EVP_Cipher((EVP_CIPHER_CTX*)ctx->cipher, out, in, len)) 140 | return -1; 141 | 142 | return 0; 143 | } 144 | 145 | int encrypt_parse_keyspec(uint8_t key_out[static 32], const char *keyspec) 146 | { 147 | int ret = -1; 148 | 149 | const char *key_str; 150 | bool do_free = false; 151 | 152 | if (startswith(keyspec, "key:")) { 153 | key_str = &keyspec[4]; 154 | } else if (startswith(keyspec, "env:")) { 155 | key_str = getenv(&keyspec[4]); 156 | if (!key_str) { 157 | u_log(ERR, "could not retrieve environment variable '%s'", &keyspec[4]); 158 | return -1; 159 | } 160 | } else if(startswith(keyspec, "file:")) { 161 | char *slurped = slurp_file(&keyspec[5], true, NULL); 162 | if (!slurped) 163 | return -1; 164 | 165 | chomp(slurped); 166 | 167 | key_str = slurped; 168 | do_free = true; 169 | } else { 170 | u_log(ERR, "unknown keyspec used in '%s'", keyspec); 171 | return -1; 172 | } 173 | 174 | if (strlen(key_str) != 64) { 175 | u_log(ERR, "encryption key must be exactly 64 hex characters, but is %zu long", strlen(key_str)); 176 | goto out; 177 | } 178 | 179 | if (parse_hex(key_out, key_str) < 0) { 180 | u_log(ERR, "decoding encryption key failed"); 181 | 182 | crypto_wipe(key_out, 32); 183 | goto out; 184 | } 185 | 186 | ret = 0; 187 | 188 | out: 189 | if (do_free) { 190 | crypto_wipe((void*)key_str, strlen(key_str)); 191 | free((void*)key_str); 192 | } 193 | 194 | return ret; 195 | } 196 | -------------------------------------------------------------------------------- /utils.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #ifdef __APPLE__ 12 | #include 13 | #endif 14 | 15 | #ifdef __linux__ 16 | #include 17 | #endif 18 | 19 | #include "utils.h" 20 | 21 | static int blockdevice_size(int fd, off_t *size_ptr) 22 | #if defined(__APPLE__) 23 | { 24 | uint32_t size; 25 | uint64_t count; 26 | 27 | if (ioctl(fd, DKIOCGETBLOCKSIZE, &size) < 0) { 28 | u_log_errno("getting device block size failed"); 29 | return -1; 30 | } 31 | 32 | if (ioctl(fd, DKIOCGETBLOCKCOUNT, &count) < 0) { 33 | u_log_errno("getting device block count failed"); 34 | return -1; 35 | } 36 | 37 | *size_ptr = (off_t)(size * count); 38 | return 0; 39 | } 40 | #elif defined(__linux__) 41 | { 42 | uint64_t tmp; 43 | 44 | if (ioctl(fd, BLKGETSIZE64, &tmp) < 0) { 45 | u_log_errno("getting block device size failed"); 46 | return -1; 47 | } 48 | 49 | *size_ptr = (off_t)tmp; 50 | return 0; 51 | } 52 | #else 53 | #error "blockdevice_size not implemented for target" 54 | #endif 55 | 56 | must_check int readall(int fd, uint8_t *buf, size_t len) 57 | { 58 | size_t read_bytes = 0; 59 | 60 | while (read_bytes < len) { 61 | ssize_t ret; 62 | 63 | ret = read(fd, &buf[read_bytes], len - read_bytes); 64 | if (ret < 0) { 65 | if (errno == EINTR) 66 | continue; 67 | 68 | u_log_errno("read failed"); 69 | return -1; 70 | } else if (ret == 0) { 71 | u_log(ERR, "short read, expected %zu, got %zu", len, read_bytes); 72 | return -1; 73 | } 74 | 75 | read_bytes += ret; 76 | } 77 | 78 | return 0; 79 | } 80 | 81 | must_check int preadall(int fd, uint8_t *buf, size_t len, off_t offset) 82 | { 83 | size_t read_bytes = 0; 84 | 85 | while (read_bytes < len) { 86 | ssize_t ret; 87 | 88 | ret = pread(fd, &buf[read_bytes], len - read_bytes, offset); 89 | if (ret < 0) { 90 | if (errno == EINTR) 91 | continue; 92 | 93 | u_log_errno("read failed"); 94 | return -1; 95 | } else if (ret == 0) { 96 | u_log(ERR, "short read, expected %zu, got %zu", len, read_bytes); 97 | return -1; 98 | } 99 | 100 | read_bytes += ret; 101 | offset += ret; 102 | } 103 | 104 | return 0; 105 | } 106 | 107 | must_check int writeall(int fd, const uint8_t *buf, size_t len) 108 | { 109 | size_t written_bytes = 0; 110 | 111 | while (written_bytes < len) { 112 | ssize_t ret; 113 | 114 | ret = write(fd, &buf[written_bytes], len - written_bytes); 115 | if (ret < 0) { 116 | if (errno == EINTR) 117 | continue; 118 | 119 | u_log_errno("write failed"); 120 | return -1; 121 | } 122 | 123 | u_assert(ret != 0); 124 | 125 | written_bytes += ret; 126 | } 127 | 128 | return 0; 129 | } 130 | 131 | must_check int pwriteall(int fd, const uint8_t *buf, size_t len, off_t offset) 132 | { 133 | size_t written_bytes = 0; 134 | 135 | while (written_bytes < len) { 136 | ssize_t ret; 137 | 138 | ret = pwrite(fd, &buf[written_bytes], len - written_bytes, offset); 139 | if (ret < 0) { 140 | if (errno == EINTR) 141 | continue; 142 | 143 | u_log_errno("write failed"); 144 | return -1; 145 | } 146 | 147 | u_assert(ret != 0); 148 | 149 | written_bytes += ret; 150 | offset += ret; 151 | } 152 | 153 | return 0; 154 | } 155 | 156 | int fd_size(int fd, off_t *size_out) 157 | { 158 | u_assert(fd >= 0); 159 | u_assert(size_out); 160 | 161 | struct stat s; 162 | 163 | if (fstat(fd, &s) < 0) { 164 | u_log_errno("stat failed"); 165 | return -1; 166 | } 167 | 168 | if (S_ISREG(s.st_mode)) { 169 | *size_out = s.st_size; 170 | } else if (S_ISBLK(s.st_mode)) { 171 | return blockdevice_size(fd, size_out); 172 | } else { 173 | u_log(ERR, "unsupported file type: 0%o", s.st_mode & S_IFMT); 174 | return -1; 175 | } 176 | 177 | return 0; 178 | } 179 | 180 | void *slurp_file(const char *path, bool text, off_t *size_out) 181 | { 182 | int fd; 183 | void *ret = NULL; 184 | 185 | fd = open(path, O_RDONLY); 186 | if (fd < 0) { 187 | u_log_errno("opening '%s' failed", path); 188 | return NULL; 189 | } 190 | 191 | off_t size; 192 | if (fd_size(fd, &size) < 0) { 193 | u_log(ERR, "getting file size of '%s' failed", path); 194 | goto out_fd; 195 | } 196 | 197 | void *buf; 198 | buf = malloc(text ? size + 1 : size); 199 | if (!buf) { 200 | u_log_errno("could not allocate %zu bytes", (size_t)size); 201 | goto out_fd; 202 | } 203 | 204 | if (readall(fd, buf, size) < 0) { 205 | u_log(ERR, "reading file '%s' failed", path); 206 | goto out_buf; 207 | } 208 | 209 | if (text) 210 | ((char*)buf)[size] = 0; 211 | 212 | if (size_out) 213 | *size_out = size; 214 | 215 | ret = buf; 216 | 217 | out_buf: 218 | if (ret == NULL) 219 | free(buf); 220 | 221 | out_fd: 222 | close(fd); 223 | 224 | return ret; 225 | } 226 | 227 | time_t time_monotonic(void) 228 | { 229 | struct timespec ts; 230 | 231 | u_assert_se(clock_gettime(CLOCK_MONOTONIC, &ts) == 0); 232 | 233 | return ts.tv_sec; 234 | } 235 | 236 | static int16_t nibble(char c) 237 | { 238 | if ('0' <= c && c <= '9') 239 | return c - '0'; 240 | 241 | c |= 0x20; 242 | 243 | if ('a' <= c && c <= 'f') 244 | return c - 'a' + 10; 245 | 246 | return -1; 247 | } 248 | 249 | int parse_hex(uint8_t *out, const char *in) 250 | { 251 | for (size_t i = 0; in[i]; i++) { 252 | int16_t n = nibble(in[i]); 253 | if (n < 0) { 254 | u_log(ERR, "invalid hex nibble '%c'", in[i]); 255 | return -1; 256 | } 257 | 258 | if (i % 2 == 0) { 259 | *out = (uint8_t)n << 4; 260 | } else { 261 | *out |= (uint8_t)n; 262 | out++; 263 | } 264 | } 265 | 266 | return 0; 267 | } 268 | 269 | void chomp(char *s) 270 | { 271 | char *p, *q; 272 | 273 | q = NULL; 274 | p = s; 275 | 276 | while (*p) { 277 | if (*p == '\r' || *p == '\n') { 278 | q = p; 279 | 280 | while (*p == '\r' || *p == '\n') 281 | p++; 282 | } else { 283 | p++; 284 | } 285 | } 286 | 287 | if (q) 288 | *q = 0; 289 | } 290 | -------------------------------------------------------------------------------- /store-local.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "utils.h" 11 | #include "store-local.h" 12 | #include "caibx.h" 13 | #include "chunk.h" 14 | #include "chunker.h" 15 | #include "index.h" 16 | 17 | struct store_local { 18 | struct store s; 19 | 20 | int fd; 21 | struct index idx; 22 | 23 | struct chunker_params chunker_params; 24 | }; 25 | 26 | #define INDEX_FLAG_VALIDATED (1<<0) 27 | 28 | struct insert_ctx { 29 | struct index *idx; 30 | uint32_t flags; 31 | }; 32 | 33 | static int store_insert_index(uint64_t offset, uint32_t len, uint8_t *id, void *arg) 34 | { 35 | struct insert_ctx *ctx = (struct insert_ctx*) arg; 36 | 37 | struct index_entry *e; 38 | e = index_insert(ctx->idx, offset, len, id); 39 | if (!e) { 40 | u_log(ERR, "inserting chunk at offset %"PRIu64" failed", offset); 41 | return -1; 42 | } 43 | 44 | e->flags |= ctx->flags; 45 | 46 | return 0; 47 | } 48 | 49 | static int store_sideload_index(struct store_local *ls, const char *index) 50 | { 51 | int index_fd; 52 | int ret = -1; 53 | 54 | index_fd = open(index, O_RDONLY); 55 | if (index_fd < 0) { 56 | u_log_errno("opening index for sideloading failed"); 57 | return -1; 58 | } 59 | 60 | size_t n_entries; 61 | struct chunker_params caibx_params; 62 | if (caibx_load_header(index_fd, &caibx_params, &n_entries) < 0) { 63 | u_log(ERR, "parsing caibx header failed"); 64 | goto out_fd; 65 | } 66 | 67 | if (caibx_params.min_size != ls->chunker_params.min_size || 68 | caibx_params.avg_size != ls->chunker_params.avg_size || 69 | caibx_params.max_size != ls->chunker_params.max_size) { 70 | u_log(ERR, "chunker parameter mismatch between sideload index (%"PRIu32" / %"PRIu32" / %"PRIu32")" 71 | "and chosen parameters (%"PRIu32" / %"PRIu32" / %"PRIu32")", 72 | caibx_params.min_size, caibx_params.avg_size, caibx_params.max_size, 73 | ls->chunker_params.min_size, ls->chunker_params.avg_size, ls->chunker_params.max_size); 74 | 75 | goto out_fd; 76 | } 77 | 78 | if (index_init(&ls->idx, n_entries) < 0) { 79 | u_log(ERR, "initializing index failed"); 80 | goto out_fd; 81 | } 82 | 83 | struct insert_ctx ctx = { 84 | .idx = &ls->idx, 85 | .flags = 0 86 | }; 87 | 88 | if (caibx_iterate_entries(index_fd, &caibx_params, n_entries, store_insert_index, &ctx) < 0) { 89 | u_log(ERR, "sideload iteration aborted"); 90 | 91 | index_cleanup(&ls->idx); 92 | goto out_fd; 93 | } 94 | 95 | ret = 0; 96 | 97 | out_fd: 98 | close(index_fd); 99 | 100 | return ret; 101 | } 102 | 103 | static int store_scan(struct store_local *ls) 104 | { 105 | off_t size; 106 | 107 | checked(fd_size(ls->fd, &size), return -1); 108 | size_t chunk_estimate = (12ull * size/ls->chunker_params.avg_size) / 10; 109 | 110 | u_log(DEBUG, "initializing index with an estimated %zu chunks", chunk_estimate); 111 | if (index_init(&ls->idx, chunk_estimate) < 0) { 112 | u_log(ERR, "initializing index failed"); 113 | return -1; 114 | } 115 | 116 | struct insert_ctx ctx = { 117 | .idx = &ls->idx, 118 | 119 | // as this entry is the result of scanning the store ourselves, we 120 | // consider it in sync with the data within the store 121 | .flags = INDEX_FLAG_VALIDATED 122 | }; 123 | 124 | if (chunker_scan_fd(ls->fd, &ls->chunker_params, store_insert_index, &ctx) < 0) 125 | return -1; 126 | 127 | return 0; 128 | } 129 | 130 | static ssize_t store_local_get_chunk(struct store *s, uint8_t *id, uint8_t *out, size_t out_max) 131 | { 132 | struct store_local *ls = (struct store_local*) s; 133 | 134 | struct index_entry *e; 135 | restart: 136 | e = index_query(&ls->idx, id); 137 | 138 | if (!e) 139 | return 0; 140 | 141 | if (e->len > out_max) { 142 | u_log(ERR, "output buffer to small (%zu) to contain cunk of size %"PRIu32, 143 | out_max, e->len); 144 | return -1; 145 | } 146 | 147 | if (preadall(ls->fd, out, e->len, e->start) < 0) { 148 | u_log(ERR, "reading chunk failed"); 149 | return -1; 150 | } 151 | 152 | if (e->flags & INDEX_FLAG_VALIDATED) 153 | return e->len; 154 | 155 | // if the entry has not been validated, check its contents 156 | uint8_t actual_chunk_id[CHUNK_ID_LEN]; 157 | chunk_calculate_id(out, e->len, actual_chunk_id); 158 | 159 | if (memcmp(id, actual_chunk_id, CHUNK_ID_LEN) == 0) { 160 | e->flags |= INDEX_FLAG_VALIDATED; 161 | return e->len; 162 | } 163 | 164 | char chunk_id_str[CHUNK_ID_STRLEN]; 165 | u_log(ERR, "chunk validation failure at offset %"PRIu64" (chunk length %"PRIu32")", 166 | e->start, e->len); 167 | 168 | chunk_format_id(chunk_id_str, id); 169 | u_log(ERR, " expected chunk id: %s", chunk_id_str); 170 | 171 | chunk_format_id(chunk_id_str, actual_chunk_id); 172 | u_log(ERR, " got chunk id: %s", chunk_id_str); 173 | 174 | u_log(ERR, "rescanning store '%s'", s->name); 175 | 176 | index_cleanup(&ls->idx); 177 | if (store_scan(ls) < 0) { 178 | u_log(ERR, "rescanning store failed"); 179 | return -1; 180 | } 181 | 182 | goto restart; 183 | } 184 | 185 | static void store_local_free(struct store *s) 186 | { 187 | struct store_local *ls = (struct store_local*) s; 188 | 189 | index_cleanup(&ls->idx); 190 | free(ls); 191 | } 192 | 193 | struct store *store_local_new(const char *path, const char *index, 194 | struct chunker_params *chunker_params) 195 | { 196 | u_assert(path); 197 | 198 | struct store_local *ls; 199 | ls = calloc(1, sizeof(*ls)); 200 | u_notnull(ls, return NULL); 201 | 202 | ls->fd = open(path, O_RDONLY); 203 | if (ls->fd < 0) { 204 | u_log_errno("opening store at '%s' failed", path); 205 | goto err_ls; 206 | } 207 | 208 | memcpy(&ls->chunker_params, chunker_params, sizeof(*chunker_params)); 209 | 210 | snprintf(ls->s.name, sizeof(ls->s.name), "%s", path); 211 | ls->s.get_chunk = store_local_get_chunk; 212 | ls->s.free = store_local_free; 213 | 214 | if (index) { 215 | u_log(INFO, "trying to sideload index '%s' for store '%s'", index, path); 216 | 217 | if (store_sideload_index(ls, index) == 0) { 218 | u_log(INFO, "sideloading succeeded"); 219 | return (struct store*)ls; 220 | } 221 | 222 | u_log(WARN, "sideloading index failed, falling back to scanning"); 223 | } 224 | 225 | u_log(INFO, "scanning store '%s'", path); 226 | if (store_scan(ls) < 0) { 227 | u_log(ERR, "scanning store '%s' failed", path); 228 | goto err_ls; 229 | } 230 | u_log(INFO, "scanning store finished"); 231 | 232 | return (struct store*)ls; 233 | 234 | err_ls: 235 | free(ls); 236 | return NULL; 237 | } 238 | -------------------------------------------------------------------------------- /caibx.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "utils.h" 11 | #include "chunker.h" 12 | #include "caibx.h" 13 | 14 | enum { 15 | CA_FORMAT_INDEX = UINT64_C(0x96824d9c7b129ff9), 16 | CA_FORMAT_TABLE = UINT64_C(0xe75b9e112f17417d), 17 | CA_FORMAT_TABLE_TAIL_MARKER = UINT64_C(0x4b4f050e5549ecd1), 18 | }; 19 | 20 | #define CA_HEADER_LEN 0x30 21 | #define CA_TABLE_HEADER_LEN 16 22 | #define CA_TABLE_ENTRY_LEN 40 23 | #define CA_TABLE_MIN_LEN (CA_TABLE_HEADER_LEN + CA_TABLE_ENTRY_LEN) 24 | 25 | static int load_section_header(int fd, uint64_t *magic_out, uint64_t *len_out) 26 | { 27 | u_assert(fd >= 0); 28 | u_assert(magic_out); 29 | u_assert(len_out); 30 | 31 | uint8_t buf[40]; 32 | checked(readall(fd, buf, 16), return -1); 33 | 34 | *len_out = unp64le(&buf[0]); 35 | *magic_out = unp64le(&buf[8]); 36 | 37 | return 0; 38 | } 39 | 40 | static int load_index_header(int fd, struct chunker_params *params_out) 41 | { 42 | u_assert(fd >= 0); 43 | 44 | uint64_t header_magic, header_len; 45 | checked(load_section_header(fd, &header_magic, &header_len), return -1); 46 | 47 | if (header_magic != CA_FORMAT_INDEX) { 48 | u_log(ERR, "unexpected magic: %"PRIx64, header_magic); 49 | return -1; 50 | } 51 | 52 | if (header_len > CA_HEADER_LEN) { 53 | u_log(WARN, "unexpected index header length %"PRIu64, header_len); 54 | } else if (header_len < CA_HEADER_LEN) { 55 | u_log(ERR, "index header length %"PRIu64 "is too short", header_len); 56 | return -1; 57 | } 58 | 59 | uint8_t buf[40]; 60 | checked(readall(fd, buf, 32), return -1); 61 | uint64_t flags; 62 | 63 | flags = unp64le(&buf[0]); 64 | if (flags & ~UINT64_C(0xd000000000000000)) { 65 | u_log(ERR, "unsupported feature set: 0x%016"PRIx64, flags); 66 | return -1; 67 | } 68 | 69 | uint64_t min_size = unp64le(&buf[8]); 70 | uint64_t avg_size = unp64le(&buf[16]); 71 | uint64_t max_size = unp64le(&buf[24]); 72 | 73 | if (params_out) { 74 | if (chunker_params_set(params_out, min_size, avg_size, max_size) < 0) { 75 | u_log(ERR, "incompatible chunker params given in file"); 76 | return -1; 77 | } 78 | } 79 | 80 | return 0; 81 | } 82 | 83 | static int validate_table_header(int fd) 84 | { 85 | u_assert(fd >= 0); 86 | 87 | uint64_t table_len, table_magic; 88 | 89 | checked(load_section_header(fd, &table_magic, &table_len), return -1); 90 | 91 | if (table_magic != CA_FORMAT_TABLE) { 92 | u_log(ERR, "invalid table magic, expected %016"PRIx64", got %016"PRIx64, 93 | CA_FORMAT_TABLE, table_magic); 94 | return -1; 95 | } 96 | 97 | if (table_len != ~(uint64_t)0) 98 | u_log(WARN, "unexpected table size: %"PRIu64, table_len); 99 | 100 | return 0; 101 | } 102 | 103 | static int validate_table_footer(int fd, size_t n_entries) 104 | { 105 | u_assert(fd >= 0); 106 | 107 | if (lseek(fd, CA_HEADER_LEN + CA_TABLE_HEADER_LEN + 108 | n_entries * CA_TABLE_ENTRY_LEN, SEEK_SET) < 0) { 109 | u_log_errno("failed to seek to end of chunk table"); 110 | return -1; 111 | } 112 | 113 | uint8_t buf[CA_TABLE_ENTRY_LEN]; 114 | checked(readall(fd, buf, CA_TABLE_ENTRY_LEN), return -1); 115 | 116 | uint64_t tmp64; 117 | if ((tmp64 = unp64le(&buf[0])) != 0) { 118 | u_log(ERR, "chunk table footer has invalid zero fill: 0x%"PRIx64, tmp64); 119 | return -1; 120 | } 121 | 122 | if ((tmp64 = unp64le(&buf[8])) != 0) { 123 | u_log(ERR, "chunk table footer has invalid zero fill: 0x%"PRIx64, tmp64); 124 | return -1; 125 | } 126 | 127 | if ((tmp64 = unp64le(&buf[16])) != CA_HEADER_LEN) { 128 | u_log(ERR, "invalid index offset in footer, expected 0x%x, got 0x%"PRIx64, 129 | CA_HEADER_LEN, tmp64); 130 | return -1; 131 | } 132 | 133 | tmp64 = unp64le(&buf[24]); 134 | uint64_t expected_len = CA_TABLE_HEADER_LEN + (n_entries+1) * CA_TABLE_ENTRY_LEN; 135 | if (tmp64 != expected_len) { 136 | u_log(ERR, "invalid size in footer, expected %"PRIu64", got %"PRIu64, 137 | expected_len, tmp64); 138 | return -1; 139 | } 140 | 141 | if ((tmp64 = unp64le(&buf[32])) != CA_FORMAT_TABLE_TAIL_MARKER) { 142 | u_log(ERR, "invalid magic in footer, expected 0x%"PRIx64", got 0x%"PRIx64, 143 | CA_FORMAT_TABLE_TAIL_MARKER, tmp64); 144 | return -1; 145 | } 146 | 147 | return 0; 148 | } 149 | 150 | int caibx_load_header(int fd, struct chunker_params *params_out, size_t *n_entries_out) 151 | { 152 | u_assert(fd >= 0); 153 | u_assert(params_out); 154 | u_assert(n_entries_out); 155 | 156 | off_t file_size; 157 | checked(fd_size(fd, &file_size), return -1); 158 | 159 | if (file_size < CA_HEADER_LEN + CA_TABLE_MIN_LEN) { 160 | u_log(ERR, "input index is too short (%jd bytes)", (intmax_t)file_size); 161 | return -1; 162 | } 163 | 164 | if ((file_size - CA_HEADER_LEN - CA_TABLE_MIN_LEN) % CA_TABLE_ENTRY_LEN) { 165 | u_log(ERR, "got a fractional number of table entries"); 166 | return -1; 167 | } 168 | 169 | *n_entries_out = (file_size - CA_HEADER_LEN - CA_TABLE_MIN_LEN) / CA_TABLE_ENTRY_LEN; 170 | 171 | checked(lseek(fd, 0, SEEK_SET), return -1); 172 | 173 | checked(load_index_header(fd, params_out), return -1); 174 | checked(validate_table_header(fd), return -1); 175 | checked(validate_table_footer(fd, *n_entries_out), return -1); 176 | 177 | u_log(DEBUG, "index has sizes %"PRIu32" / %"PRIu32" / %"PRIu32" and %zu entries", 178 | params_out->min_size, params_out->avg_size, params_out->max_size, 179 | *n_entries_out); 180 | 181 | return 0; 182 | } 183 | 184 | int caibx_iterate_entries(int fd, struct chunker_params *params, size_t n_entries, 185 | int (*cb)(uint64_t offset, uint32_t len, uint8_t *id, void *arg), void *arg) 186 | { 187 | u_assert(fd >= 0); 188 | u_assert(params); 189 | u_assert(cb); 190 | 191 | uint64_t last_offset = 0; 192 | 193 | // seek to the start of table entries 194 | checked(lseek(fd, CA_HEADER_LEN + CA_TABLE_HEADER_LEN, SEEK_SET), return -1); 195 | 196 | for (size_t i = 0; i < n_entries; i++) { 197 | uint8_t buf[CA_TABLE_ENTRY_LEN]; 198 | 199 | if (readall(fd, buf, CA_TABLE_ENTRY_LEN) < 0) { 200 | u_log(ERR, "reading entry %zu failed", i); 201 | return -1; 202 | } 203 | 204 | uint64_t offset = unp64le(&buf[0]); 205 | uint64_t len = offset - last_offset; 206 | 207 | char chunk_id[CHUNK_ID_STRLEN]; 208 | 209 | // chunks other than the last one should not be shorter than 210 | // the minimum size 211 | if (len < params->min_size && (i+1) < n_entries) { 212 | chunk_format_id(chunk_id, &buf[8]); 213 | u_log(WARN, "chunk %s is too short: %"PRIu64" < %"PRIu32, 214 | chunk_id, len, params->min_size); 215 | } else if (len > params->max_size) { 216 | chunk_format_id(chunk_id, &buf[8]); 217 | u_log(WARN, "chunk %s is too big : %"PRIu64" > %"PRIu32, 218 | chunk_id, len, params->max_size); 219 | 220 | /* From a correctness perspecitve, we should skip all 221 | * of the chunks that do not fit within the declared 222 | * size bounds, as such chunks should not be produced 223 | * by a conforming encoder. But if the chunk ID still 224 | * matches in the end we should be fine. Only skip 225 | * chunks that we cannot represent at all. 226 | */ 227 | if (len > UINT32_MAX) { 228 | u_log(WARN, "skipping chunk %s", chunk_id); 229 | continue; 230 | } 231 | } 232 | 233 | if (cb(last_offset, (uint32_t)len, &buf[8], arg) < 0) 234 | return -1; 235 | 236 | last_offset = offset; 237 | } 238 | 239 | return 0; 240 | } 241 | -------------------------------------------------------------------------------- /test/encrypt_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "encrypt.h" 10 | #include "utils.h" 11 | 12 | // draft-irtf-cfrg-xchacha-03 Section 2.2.1 13 | static void test_hchacha20_draft_irtf_cfrg_xchacha_03(void **state) 14 | { 15 | (void) state; 16 | 17 | static const uint8_t key[32] = { 18 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 19 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f 20 | }; 21 | 22 | static const uint8_t nonce[16] = { 23 | 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x00, 0x31, 0x41, 0x59, 0x27 24 | }; 25 | 26 | uint8_t subkey[32]; 27 | static const uint8_t subkey_should[32] = { 28 | 0x82, 0x41, 0x3b, 0x42, 0x27, 0xb2, 0x7b, 0xfe, 0xd3, 0x0e, 0x42, 0x50, 0x8a, 0x87, 0x7d, 0x73, 29 | 0xa0, 0xf9, 0xe4, 0xd5, 0x8a, 0x74, 0xa8, 0x53, 0xc1, 0x2e, 0xc4, 0x13, 0x26, 0xd3, 0xec, 0xdc 30 | }; 31 | 32 | hchacha20(subkey, key, nonce); 33 | 34 | assert_memory_equal(subkey, subkey_should, sizeof(subkey)); 35 | } 36 | 37 | // draft-irtf-cfrg-xchacha-03 Section A.2.1 38 | static void test_xchacha20_draft_irtf_cfrg_xchacha_03(void **state) 39 | { 40 | (void) state; 41 | 42 | static const uint8_t plaintext[] = 43 | "The dhole (pronounced \"dole\") is also known as the Asiatic wild dog, red dog, and whistling " 44 | "dog. It is about the size of a German shepherd but looks more like a long-legged fox. This " 45 | "highly elusive and skilled jumper is classified with wolves, coyotes, jackals, and foxes in " 46 | "the taxonomic family Canidae."; 47 | 48 | static const uint8_t key[] = { 49 | 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 50 | 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f 51 | }; 52 | 53 | static const uint8_t nonce[] = { 54 | 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 55 | 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x58 56 | }; 57 | 58 | uint8_t out[sizeof(plaintext)-1]; 59 | static const uint8_t out_should[] = { 60 | 0x45, 0x59, 0xab, 0xba, 0x4e, 0x48, 0xc1, 0x61, 0x02, 0xe8, 0xbb, 0x2c, 0x05, 0xe6, 0x94, 0x7f, 61 | 0x50, 0xa7, 0x86, 0xde, 0x16, 0x2f, 0x9b, 0x0b, 0x7e, 0x59, 0x2a, 0x9b, 0x53, 0xd0, 0xd4, 0xe9, 62 | 0x8d, 0x8d, 0x64, 0x10, 0xd5, 0x40, 0xa1, 0xa6, 0x37, 0x5b, 0x26, 0xd8, 0x0d, 0xac, 0xe4, 0xfa, 63 | 0xb5, 0x23, 0x84, 0xc7, 0x31, 0xac, 0xbf, 0x16, 0xa5, 0x92, 0x3c, 0x0c, 0x48, 0xd3, 0x57, 0x5d, 64 | 0x4d, 0x0d, 0x2c, 0x67, 0x3b, 0x66, 0x6f, 0xaa, 0x73, 0x10, 0x61, 0x27, 0x77, 0x01, 0x09, 0x3a, 65 | 0x6b, 0xf7, 0xa1, 0x58, 0xa8, 0x86, 0x42, 0x92, 0xa4, 0x1c, 0x48, 0xe3, 0xa9, 0xb4, 0xc0, 0xda, 66 | 0xec, 0xe0, 0xf8, 0xd9, 0x8d, 0x0d, 0x7e, 0x05, 0xb3, 0x7a, 0x30, 0x7b, 0xbb, 0x66, 0x33, 0x31, 67 | 0x64, 0xec, 0x9e, 0x1b, 0x24, 0xea, 0x0d, 0x6c, 0x3f, 0xfd, 0xdc, 0xec, 0x4f, 0x68, 0xe7, 0x44, 68 | 0x30, 0x56, 0x19, 0x3a, 0x03, 0xc8, 0x10, 0xe1, 0x13, 0x44, 0xca, 0x06, 0xd8, 0xed, 0x8a, 0x2b, 69 | 0xfb, 0x1e, 0x8d, 0x48, 0xcf, 0xa6, 0xbc, 0x0e, 0xb4, 0xe2, 0x46, 0x4b, 0x74, 0x81, 0x42, 0x40, 70 | 0x7c, 0x9f, 0x43, 0x1a, 0xee, 0x76, 0x99, 0x60, 0xe1, 0x5b, 0xa8, 0xb9, 0x68, 0x90, 0x46, 0x6e, 71 | 0xf2, 0x45, 0x75, 0x99, 0x85, 0x23, 0x85, 0xc6, 0x61, 0xf7, 0x52, 0xce, 0x20, 0xf9, 0xda, 0x0c, 72 | 0x09, 0xab, 0x6b, 0x19, 0xdf, 0x74, 0xe7, 0x6a, 0x95, 0x96, 0x74, 0x46, 0xf8, 0xd0, 0xfd, 0x41, 73 | 0x5e, 0x7b, 0xee, 0x2a, 0x12, 0xa1, 0x14, 0xc2, 0x0e, 0xb5, 0x29, 0x2a, 0xe7, 0xa3, 0x49, 0xae, 74 | 0x57, 0x78, 0x20, 0xd5, 0x52, 0x0a, 0x1f, 0x3f, 0xb6, 0x2a, 0x17, 0xce, 0x6a, 0x7e, 0x68, 0xfa, 75 | 0x7c, 0x79, 0x11, 0x1d, 0x88, 0x60, 0x92, 0x0b, 0xc0, 0x48, 0xef, 0x43, 0xfe, 0x84, 0x48, 0x6c, 76 | 0xcb, 0x87, 0xc2, 0x5f, 0x0a, 0xe0, 0x45, 0xf0, 0xcc, 0xe1, 0xe7, 0x98, 0x9a, 0x9a, 0xa2, 0x20, 77 | 0xa2, 0x8b, 0xdd, 0x48, 0x27, 0xe7, 0x51, 0xa2, 0x4a, 0x6d, 0x5c, 0x62, 0xd7, 0x90, 0xa6, 0x63, 78 | 0x93, 0xb9, 0x31, 0x11, 0xc1, 0xa5, 0x5d, 0xd7, 0x42, 0x1a, 0x10, 0x18, 0x49, 0x74, 0xc7, 0xc5 79 | }; 80 | 81 | u_build_assert(sizeof(out) == sizeof(out_should)); 82 | 83 | struct encrypt_ctx ec; 84 | assert_true(encrypt_init(&ec, key) == 0); 85 | 86 | for (size_t i = 0; i < 2; i++) { 87 | assert_true(encrypt_restart(&ec, nonce) == 0); 88 | 89 | memset(out, 0, sizeof(out)); 90 | assert_true(encrypt_do(&ec, out, plaintext, sizeof(plaintext)-1) == 0); 91 | assert_memory_equal(out, out_should, sizeof(out_should)); 92 | 93 | // subsequent encrypt_do calls should not reuse counter values 94 | memset(out, 0, sizeof(out)); 95 | assert_true(encrypt_do(&ec, out, plaintext, sizeof(plaintext)-1) == 0); 96 | assert_memory_not_equal(out, out_should, sizeof(out_should)); 97 | } 98 | 99 | // partial calls should also work 100 | assert_true(encrypt_restart(&ec, nonce) == 0); 101 | memset(out, 0, sizeof(out)); 102 | 103 | for (size_t offset = 0; offset < sizeof(out); ) { 104 | // use something that ends up block-misaligned 105 | size_t len = MIN(7, sizeof(out) - offset); 106 | assert_true(encrypt_do(&ec, 107 | &out[offset], 108 | &plaintext[offset], 109 | len) == 0); 110 | 111 | offset += len; 112 | } 113 | assert_memory_equal(out, out_should, sizeof(out_should)); 114 | 115 | encrypt_close(&ec); 116 | } 117 | 118 | static void test_design_doc_test_vector(void **state) 119 | { 120 | (void) state; 121 | 122 | static const uint8_t key[] = { 123 | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 124 | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f 125 | }; 126 | 127 | struct encrypt_ctx ec; 128 | assert_true(encrypt_init(&ec, key) == 0); 129 | 130 | static const uint8_t chunk_id[] = { 131 | 0x8a, 0x39, 0xd2, 0xab, 0xd3, 0x99, 0x9a, 0xb7, 0x3c, 0x34, 0xdb, 0x24, 0x76, 0x84, 0x9c, 0xdd, 132 | 0xf3, 0x03, 0xce, 0x38, 0x9b, 0x35, 0x82, 0x68, 0x50, 0xf9, 0xa7, 0x00, 0x58, 0x9b, 0x4a, 0x90 133 | }; 134 | 135 | // uses the first 24 bytes 136 | assert_true(encrypt_restart(&ec, chunk_id) == 0); 137 | 138 | uint8_t buf[128] = { 139 | 0x28, 0xb5, 0x2f, 0xfd, 0x00, 0x58, 0x54, 0x00, 0x00, 0x10, 0x00, 0x00, 0x01, 0x00, 0xfb, 0xff, 140 | 0x39, 0xc0, 0x02, 0x02, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00 141 | }; 142 | 143 | assert_true(encrypt_do(&ec, buf, buf, 26) == 0); 144 | 145 | static const uint8_t encrypted_ref[] = { 146 | 0xe8, 0xda, 0x60, 0x0a, 0x95, 0x61, 0x93, 0xc3, 0x4f, 0xd4, 0x9a, 0x77, 0xbf, 0x48, 0xda, 0x84, 147 | 0x8f, 0x5f, 0xff, 0xc1, 0x78, 0x66, 0x61, 0xcb, 0x7a, 0xe4 148 | }; 149 | 150 | assert_memory_equal(buf, encrypted_ref, sizeof(encrypted_ref)); 151 | 152 | // test keystream-only output 153 | memset(buf, 0, sizeof(buf)); 154 | assert_true(encrypt_restart(&ec, chunk_id) == 0); 155 | assert_true(encrypt_do(&ec, buf, buf, sizeof(buf)) == 0); 156 | 157 | static const uint8_t keystream_ref[] = { 158 | 0xc0, 0x6f, 0x4f, 0xf7, 0x95, 0x39, 0xc7, 0xc3, 0x4f, 0xc4, 0x9a, 0x77, 0xbe, 0x48, 0x21, 0x7b, 159 | 0xb6, 0x9f, 0xfd, 0xc3, 0x78, 0x76, 0x61, 0xca, 0x7a, 0xe4, 0x88, 0x12, 0xc9, 0xe0, 0x28, 0x3e, 160 | 0xf3, 0x9d, 0x9b, 0x3d, 0x51, 0xf4, 0xf7, 0xfd, 0xcf, 0xa9, 0x9e, 0xea, 0xd7, 0xa3, 0x80, 0x12, 161 | 0x9a, 0xcb, 0x33, 0x1f, 0x5b, 0x6d, 0x39, 0xe8, 0x4b, 0x09, 0x0b, 0x57, 0x39, 0x01, 0x08, 0x29, 162 | 0x81, 0x7c, 0x56, 0x33, 0x01, 0x5b, 0x44, 0x41, 0xe2, 0x29, 0x80, 0x93, 0x24, 0xcd, 0xea, 0x57, 163 | 0x39, 0xdf, 0xf8, 0xa5, 0x5d, 0xcc, 0xd7, 0x33, 0xa2, 0xe7, 0x41, 0x36, 0xb9, 0x26, 0xbe, 0x36, 164 | 0xf0, 0x4a, 0xf4, 0x22, 0x58, 0x77, 0x9e, 0x01, 0xc1, 0x20, 0x5d, 0x8b, 0x00, 0xa5, 0xcb, 0x9b, 165 | 0x20, 0x2d, 0xf3, 0x13, 0xeb, 0xc4, 0x73, 0xf7, 0xa5, 0xfc, 0x28, 0xef, 0xc3, 0xc6, 0xb6, 0x91 166 | }; 167 | 168 | assert_memory_equal(buf, keystream_ref, sizeof(keystream_ref)); 169 | 170 | encrypt_close(&ec); 171 | } 172 | 173 | int main(void) 174 | { 175 | const struct CMUnitTest tests[] = { 176 | cmocka_unit_test(test_hchacha20_draft_irtf_cfrg_xchacha_03), 177 | cmocka_unit_test(test_xchacha20_draft_irtf_cfrg_xchacha_03), 178 | cmocka_unit_test(test_design_doc_test_vector), 179 | }; 180 | 181 | u_log_init(); 182 | 183 | int ret; 184 | ret = cmocka_run_group_tests_name("encrypt", tests, NULL, NULL); 185 | if (ret) 186 | return ret; 187 | } 188 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "utils.h" 14 | #include "caibx.h" 15 | #include "chunker.h" 16 | #include "sha.h" 17 | #include "store-local.h" 18 | #include "store-http.h" 19 | #include "target.h" 20 | #include "ui.h" 21 | 22 | static struct store *store; 23 | const char *target_path; 24 | static struct target *target; 25 | static struct chunker_params chunker_params; 26 | static size_t n_entries; 27 | static size_t skipped; 28 | static int index_fd; 29 | 30 | static bool interactive = false; 31 | 32 | static int entry_cb(uint64_t offset, uint32_t len, uint8_t *id, void *arg) 33 | { 34 | static bool have_last_id = false; 35 | static bool resuming = true; 36 | static uint8_t last_id[CHUNK_ID_LEN]; 37 | static uint8_t buf[256*1024] __attribute__((aligned(4096))); 38 | 39 | if (resuming) { 40 | if (len > sizeof(buf)) { 41 | u_log(ERR, "chunk length (%"PRIu32") at offset %"PRIu64" exceeds buffer size (%zu)", 42 | len, offset, sizeof(buf)); 43 | 44 | // this means the index file is incompatible and store_get_chunk 45 | // would bail out anyway, abort. 46 | return -1; 47 | } 48 | 49 | if (target_check_chunk(target, buf, len, offset, id) == 1) { 50 | skipped++; 51 | goto progress; 52 | } 53 | 54 | resuming = false; 55 | } 56 | 57 | if (!have_last_id || memcmp(last_id, id, CHUNK_ID_LEN) != 0) { 58 | ssize_t ret = store_get_chunk(store, id, buf, sizeof(buf)); 59 | if (ret < 0 || len != (size_t)ret) { 60 | u_log(ERR, "result size %zd does not match expected length %"PRIu32, ret, len); 61 | return -1; 62 | } 63 | 64 | memcpy(last_id, id, CHUNK_ID_LEN); 65 | have_last_id = true; 66 | } 67 | 68 | if (target_write(target, buf, len, offset, id) < 0) { 69 | u_log(ERR, "failed to store chunk to target"); 70 | return -1; 71 | } 72 | 73 | static uint32_t handled_entries = 0; 74 | 75 | progress: 76 | handled_entries++; 77 | uint8_t progress = 100 * handled_entries / n_entries; 78 | 79 | // only show progress bar when running interactively and not spamming 80 | // debug information anyway 81 | if (interactive && !check_loglevel(U_LOG_DEBUG)) { 82 | static progess_status_t progress_status = PROGRESS_STATUS_INIT; 83 | 84 | show_progress(progress, &progress_status); 85 | } else { 86 | static time_t last_progress = 0; 87 | 88 | time_t now = time_monotonic(); 89 | if (now >= last_progress + 5 || progress == 100) { 90 | u_log(INFO, "synchronization progress: %"PRIu8"%%", progress); 91 | last_progress = now; 92 | } 93 | } 94 | 95 | return 0; 96 | } 97 | 98 | static int append_store_from_arg(struct store_chain *sc, char *arg) 99 | { 100 | struct store *s; 101 | 102 | if (streq(arg, target_path)) { 103 | s = target_as_store(target, n_entries); 104 | if (!s) { 105 | u_log(ERR, "creating store from target failed"); 106 | return -1; 107 | } 108 | } else if (startswith(arg, "http")) { 109 | s = store_http_new(arg); 110 | if (!s) { 111 | u_log(ERR, "creating HTTP store from '%s' failed", arg); 112 | return -1; 113 | } 114 | } else { 115 | char *p = strchr(arg, ':'); 116 | if (p) { 117 | *p = 0; 118 | p++; 119 | } 120 | 121 | s = store_local_new(arg, p, &chunker_params); 122 | if (!s) { 123 | u_log(ERR, "creating local store from '%s' failed", arg); 124 | return -1; 125 | } 126 | } 127 | 128 | if (store_chain_append(sc, s) < 0) { 129 | u_log(ERR, "appending store '%s' to store chain failed", arg); 130 | return -1; 131 | } 132 | 133 | return 0; 134 | } 135 | 136 | static int csn(int argc, char **argv) 137 | { 138 | if (argc < 4) { 139 | fprintf(stderr, "usage: %s input.caibx target store [store ...]\n", argv[0]); 140 | return -1; 141 | } 142 | 143 | const char *env; 144 | 145 | if ((env = getenv("CSN_KCAPI_DRIVER")) != NULL) { 146 | if (sha_kcapi_init(env) < 0) { 147 | u_log(ERR, "kcapi init failed"); 148 | return -1; 149 | } 150 | 151 | u_log(DEBUG, "kcapi enabled with driver '%s'", env); 152 | } 153 | 154 | index_fd = open(argv[1], O_RDONLY); 155 | if (index_fd < 0) { 156 | u_log_errno("opening index '%s' failed", argv[1]); 157 | return -1; 158 | } 159 | 160 | if (caibx_load_header(index_fd, &chunker_params, &n_entries) < 0) { 161 | u_log(ERR, "loading caibx header failed"); 162 | return -1; 163 | } 164 | 165 | target_path = argv[2]; 166 | target = target_new(target_path); 167 | if (!target) { 168 | u_log(ERR, "creating target failed"); 169 | return -1; 170 | } 171 | 172 | struct store_chain *sc = store_chain_new(argc - 3 + 1); 173 | if (!sc) { 174 | u_log(ERR, "creating storechain failed"); 175 | return -1; 176 | } 177 | 178 | for (int i = 3; i < argc; i++) { 179 | if (append_store_from_arg(sc, argv[i]) < 0) 180 | return -1; 181 | } 182 | 183 | store = store_chain_to_store(sc); 184 | 185 | return 0; 186 | } 187 | 188 | static void casync_help(void) 189 | { 190 | fprintf(stderr, 191 | "casync [OPTIONS...] extract BLOB_INDEX PATH\n" 192 | "\n" 193 | "note: This is casync-nano, which only supports extracting blob indices\n" 194 | " and just implements enough command line parsing to work with RAUC.\n" 195 | "\n" 196 | "supported options:\n" 197 | " --store add a chunk store (HTTP(S) only)\n" 198 | " --seed add seed (block device or file)\n"); 199 | } 200 | 201 | static int casync(int argc, char **argv) 202 | { 203 | enum { 204 | ARG_STORE = 0x100, 205 | ARG_SEED, 206 | ARG_SEED_OUTPUT 207 | }; 208 | 209 | static const struct option options[] = { 210 | { "help", no_argument, NULL, 'h' }, 211 | { "store", required_argument, NULL, ARG_STORE }, 212 | { "seed", required_argument, NULL, ARG_SEED }, 213 | { "seed-output", required_argument, NULL, ARG_SEED_OUTPUT }, 214 | {} 215 | }; 216 | 217 | if (argc < 3) { 218 | casync_help(); 219 | exit(EXIT_FAILURE); 220 | } 221 | 222 | index_fd = open(argv[argc-2], O_RDONLY); 223 | if (index_fd < 0) { 224 | u_log_errno("opening index '%s' failed", argv[argc-2]); 225 | return -1; 226 | } 227 | 228 | if (caibx_load_header(index_fd, &chunker_params, &n_entries) < 0) { 229 | u_log(ERR, "loading caibx header failed"); 230 | return -1; 231 | } 232 | 233 | target_path = argv[argc-1]; 234 | target = target_new(target_path); 235 | if (!target) { 236 | u_log(ERR, "creating target failed"); 237 | return -1; 238 | } 239 | 240 | // allocate (more than) enough store chain slots 241 | struct store_chain *sc = store_chain_new((argc - 4) / 2 + 1); 242 | if (!sc) { 243 | u_log(ERR, "creating storechain failed"); 244 | return -1; 245 | } 246 | 247 | int c; 248 | while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0) { 249 | switch (c) { 250 | case 'h': 251 | casync_help(); 252 | return 0; 253 | case ARG_STORE: 254 | case ARG_SEED: 255 | if (append_store_from_arg(sc, optarg) < 0) 256 | return -1; 257 | 258 | break; 259 | case ARG_SEED_OUTPUT: 260 | // ignored to silence unknown option warning 261 | // with rauc 262 | break; 263 | default: 264 | casync_help(); 265 | exit(EXIT_FAILURE); 266 | } 267 | } 268 | 269 | argc -= optind; 270 | argv += optind; 271 | 272 | if (argc < 3 || !streq(argv[0], "extract")) { 273 | casync_help(); 274 | exit(EXIT_FAILURE); 275 | } 276 | 277 | store = store_chain_to_store(sc); 278 | 279 | return 0; 280 | } 281 | 282 | int main(int argc, char **argv) 283 | { 284 | u_log_init(); 285 | 286 | int ret; 287 | 288 | if (isatty(STDERR_FILENO)) 289 | interactive = true; 290 | 291 | time_t now; 292 | 293 | now = time_monotonic(); 294 | time_t start = now; 295 | 296 | const char *appname = basename(argv[0]); 297 | if (streq(appname, "csn")) { 298 | ret = csn(argc, argv); 299 | } else if (streq(appname, "casync")) { 300 | ret = casync(argc, argv); 301 | } else { 302 | u_log(ERR, "unimplemented app variant '%s'", appname); 303 | return EXIT_FAILURE; 304 | } 305 | 306 | if (ret) { 307 | u_log(ERR, "initializing synchronization process failed"); 308 | return EXIT_FAILURE; 309 | } 310 | 311 | now = time_monotonic(); 312 | u_log(INFO, "init finished after %u seconds", (unsigned int)(now - start)); 313 | start = now; 314 | 315 | u_log(INFO, "starting synchronization"); 316 | 317 | if (caibx_iterate_entries(index_fd, &chunker_params, n_entries, entry_cb, NULL) < 0) { 318 | u_log(ERR, "iterating entries failed"); 319 | return EXIT_FAILURE; 320 | } 321 | 322 | now = time_monotonic(); 323 | u_log(INFO, "synchronization finished after %u seconds", (unsigned int)(now - start)); 324 | if (skipped) 325 | u_log(INFO, "skipped prefix of %zu chunks", skipped); 326 | 327 | store_free(store); 328 | 329 | return EXIT_SUCCESS; 330 | } 331 | -------------------------------------------------------------------------------- /chunker.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "utils.h" 9 | 10 | #include "chunker.h" 11 | 12 | static const uint32_t buzhash_table[] = { 13 | 0x458be752, 0xc10748cc, 0xfbbcdbb8, 0x6ded5b68, 14 | 0xb10a82b5, 0x20d75648, 0xdfc5665f, 0xa8428801, 15 | 0x7ebf5191, 0x841135c7, 0x65cc53b3, 0x280a597c, 16 | 0x16f60255, 0xc78cbc3e, 0x294415f5, 0xb938d494, 17 | 0xec85c4e6, 0xb7d33edc, 0xe549b544, 0xfdeda5aa, 18 | 0x882bf287, 0x3116737c, 0x05569956, 0xe8cc1f68, 19 | 0x0806ac5e, 0x22a14443, 0x15297e10, 0x50d090e7, 20 | 0x4ba60f6f, 0xefd9f1a7, 0x5c5c885c, 0x82482f93, 21 | 0x9bfd7c64, 0x0b3e7276, 0xf2688e77, 0x8fad8abc, 22 | 0xb0509568, 0xf1ada29f, 0xa53efdfe, 0xcb2b1d00, 23 | 0xf2a9e986, 0x6463432b, 0x95094051, 0x5a223ad2, 24 | 0x9be8401b, 0x61e579cb, 0x1a556a14, 0x5840fdc2, 25 | 0x9261ddf6, 0xcde002bb, 0x52432bb0, 0xbf17373e, 26 | 0x7b7c222f, 0x2955ed16, 0x9f10ca59, 0xe840c4c9, 27 | 0xccabd806, 0x14543f34, 0x1462417a, 0x0d4a1f9c, 28 | 0x087ed925, 0xd7f8f24c, 0x7338c425, 0xcf86c8f5, 29 | 0xb19165cd, 0x9891c393, 0x325384ac, 0x0308459d, 30 | 0x86141d7e, 0xc922116a, 0xe2ffa6b6, 0x53f52aed, 31 | 0x2cd86197, 0xf5b9f498, 0xbf319c8f, 0xe0411fae, 32 | 0x977eb18c, 0xd8770976, 0x9833466a, 0xc674df7f, 33 | 0x8c297d45, 0x8ca48d26, 0xc49ed8e2, 0x7344f874, 34 | 0x556f79c7, 0x6b25eaed, 0xa03e2b42, 0xf68f66a4, 35 | 0x8e8b09a2, 0xf2e0e62a, 0x0d3a9806, 0x9729e493, 36 | 0x8c72b0fc, 0x160b94f6, 0x450e4d3d, 0x7a320e85, 37 | 0xbef8f0e1, 0x21d73653, 0x4e3d977a, 0x1e7b3929, 38 | 0x1cc6c719, 0xbe478d53, 0x8d752809, 0xe6d8c2c6, 39 | 0x275f0892, 0xc8acc273, 0x4cc21580, 0xecc4a617, 40 | 0xf5f7be70, 0xe795248a, 0x375a2fe9, 0x425570b6, 41 | 0x8898dcf8, 0xdc2d97c4, 0x0106114b, 0x364dc22f, 42 | 0x1e0cad1f, 0xbe63803c, 0x5f69fac2, 0x4d5afa6f, 43 | 0x1bc0dfb5, 0xfb273589, 0x0ea47f7b, 0x3c1c2b50, 44 | 0x21b2a932, 0x6b1223fd, 0x2fe706a8, 0xf9bd6ce2, 45 | 0xa268e64e, 0xe987f486, 0x3eacf563, 0x1ca2018c, 46 | 0x65e18228, 0x2207360a, 0x57cf1715, 0x34c37d2b, 47 | 0x1f8f3cde, 0x93b657cf, 0x31a019fd, 0xe69eb729, 48 | 0x8bca7b9b, 0x4c9d5bed, 0x277ebeaf, 0xe0d8f8ae, 49 | 0xd150821c, 0x31381871, 0xafc3f1b0, 0x927db328, 50 | 0xe95effac, 0x305a47bd, 0x426ba35b, 0x1233af3f, 51 | 0x686a5b83, 0x50e072e5, 0xd9d3bb2a, 0x8befc475, 52 | 0x487f0de6, 0xc88dff89, 0xbd664d5e, 0x971b5d18, 53 | 0x63b14847, 0xd7d3c1ce, 0x7f583cf3, 0x72cbcb09, 54 | 0xc0d0a81c, 0x7fa3429b, 0xe9158a1b, 0x225ea19a, 55 | 0xd8ca9ea3, 0xc763b282, 0xbb0c6341, 0x020b8293, 56 | 0xd4cd299d, 0x58cfa7f8, 0x91b4ee53, 0x37e4d140, 57 | 0x95ec764c, 0x30f76b06, 0x5ee68d24, 0x679c8661, 58 | 0xa41979c2, 0xf2b61284, 0x4fac1475, 0x0adb49f9, 59 | 0x19727a23, 0x15a7e374, 0xc43a18d5, 0x3fb1aa73, 60 | 0x342fc615, 0x924c0793, 0xbee2d7f0, 0x8a279de9, 61 | 0x4aa2d70c, 0xe24dd37f, 0xbe862c0b, 0x177c22c2, 62 | 0x5388e5ee, 0xcd8a7510, 0xf901b4fd, 0xdbc13dbc, 63 | 0x6c0bae5b, 0x64efe8c7, 0x48b02079, 0x80331a49, 64 | 0xca3d8ae6, 0xf3546190, 0xfed7108b, 0xc49b941b, 65 | 0x32baf4a9, 0xeb833a4a, 0x88a3f1a5, 0x3a91ce0a, 66 | 0x3cc27da1, 0x7112e684, 0x4a3096b1, 0x3794574c, 67 | 0xa3c8b6f3, 0x1d213941, 0x6e0a2e00, 0x233479f1, 68 | 0x0f4cd82f, 0x6093edd2, 0x5d7d209e, 0x464fe319, 69 | 0xd4dcac9e, 0x0db845cb, 0xfb5e4bc3, 0xe0256ce1, 70 | 0x09fb4ed1, 0x0914be1e, 0xa5bdb2c3, 0xc6eb57bb, 71 | 0x30320350, 0x3f397e91, 0xa67791bc, 0x86bc0e2c, 72 | 0xefa0a7e2, 0xe9ff7543, 0xe733612c, 0xd185897b, 73 | 0x329e5388, 0x91dd236b, 0x2ecb0d93, 0xf4d82a3d, 74 | 0x35b5c03f, 0xe4e606f0, 0x05b21843, 0x37b45964, 75 | 0x5eff22f4, 0x6027f4cc, 0x77178b3c, 0xae507131, 76 | 0x7bf7cabc, 0xf9c18d66, 0x593ade65, 0xd95ddf11, 77 | }; 78 | 79 | int chunker_params_set(struct chunker_params *params, uint64_t min_size, uint64_t avg_size, uint64_t max_size) 80 | { 81 | // FIXME: support fixed-size chunks 82 | if (!(min_size < avg_size && avg_size < max_size)) { 83 | u_log(ERR, "non-monotonic chunk size parameters: %"PRIu64" / %"PRIu64" / %"PRIu64, 84 | min_size, avg_size, max_size); 85 | return -1; 86 | } 87 | 88 | if ((min_size > UINT32_MAX) || 89 | (avg_size > UINT32_MAX) || 90 | (max_size > UINT32_MAX)) { 91 | u_log(ERR, "unreasonably large chunk size parameters: %"PRIu64" / %"PRIu64" / %"PRIu64, 92 | min_size, avg_size, max_size); 93 | return -1; 94 | } 95 | 96 | params->min_size = (uint32_t) min_size; 97 | params->avg_size = (uint32_t) avg_size; 98 | params->max_size = (uint32_t) max_size; 99 | return 0; 100 | } 101 | 102 | void chunker_reset(struct chunker *c) 103 | { 104 | c->h = 0; 105 | c->chunk_size = 0; 106 | c->window_fill = 0; 107 | 108 | SHA256_Init(&c->sha_ctx); 109 | } 110 | 111 | #define CA_CHUNKER_DISCRIMINATOR_FROM_AVG(avg) ((size_t) (avg / (-1.42888852e-7 * avg + 1.33237515))) 112 | int chunker_init(struct chunker *c, struct chunker_params *params) 113 | { 114 | u_assert(c); 115 | 116 | memcpy(&c->params, params, sizeof(*params)); 117 | 118 | // make sure we match casync's results here 119 | u_build_assert(CA_CHUNKER_DISCRIMINATOR_FROM_AVG(64 * 1024) == 0xc17f); 120 | c->discriminator = CA_CHUNKER_DISCRIMINATOR_FROM_AVG(c->params.avg_size); 121 | 122 | chunker_reset(c); 123 | 124 | return 0; 125 | } 126 | 127 | static bool chunker_shall_break(struct chunker *c) 128 | { 129 | if (c->chunk_size >= c->params.max_size) 130 | return true; 131 | 132 | if (c->chunk_size < c->params.min_size) 133 | return false; 134 | 135 | // special case the common case to allow for compile-time optimization 136 | if (c->params.avg_size == CHUNKER_SIZE_AVG_DEFAULT) { 137 | size_t default_discriminator; 138 | 139 | default_discriminator = CA_CHUNKER_DISCRIMINATOR_FROM_AVG(CHUNKER_SIZE_AVG_DEFAULT); 140 | return (c->h % default_discriminator) == (default_discriminator - 1); 141 | } else { 142 | return (c->h % c->discriminator) == (c->discriminator - 1); 143 | } 144 | } 145 | 146 | size_t chunker_scan(struct chunker *c, uint8_t *buf, size_t len) 147 | { 148 | u_assert(c); 149 | u_assert(buf); 150 | 151 | size_t consumed = 0; 152 | 153 | if (c->window_fill < CHUNKER_WINDOW_SIZE) { 154 | size_t to_copy; 155 | 156 | to_copy = MIN(len, CHUNKER_WINDOW_SIZE - c->window_fill); 157 | memcpy(&c->window[c->window_fill], buf, to_copy); 158 | SHA256_Update(&c->sha_ctx, buf, to_copy); 159 | 160 | c->window_fill += to_copy; 161 | c->chunk_size += to_copy; 162 | 163 | buf += to_copy; 164 | consumed = to_copy; 165 | len -= to_copy; 166 | 167 | if (c->window_fill < CHUNKER_WINDOW_SIZE) 168 | return (size_t)-1; 169 | 170 | // window is full now, do initial hashing 171 | for (size_t i = 0; i < CHUNKER_WINDOW_SIZE; i++) 172 | c->h ^= rol32(buzhash_table[c->window[i]], CHUNKER_WINDOW_SIZE-i-1); 173 | } 174 | 175 | if (chunker_shall_break(c)) 176 | return consumed; 177 | 178 | size_t window_leave_idx = c->chunk_size % CHUNKER_WINDOW_SIZE; 179 | uint8_t *p = buf; 180 | while (len) { 181 | c->h = rol32(c->h, 1) ^ 182 | rol32(buzhash_table[c->window[window_leave_idx]], CHUNKER_WINDOW_SIZE) ^ 183 | buzhash_table[*p]; 184 | 185 | c->chunk_size++; 186 | consumed++; 187 | 188 | c->window[window_leave_idx] = *p; 189 | 190 | window_leave_idx++; 191 | if (window_leave_idx == CHUNKER_WINDOW_SIZE) 192 | window_leave_idx = 0; 193 | 194 | p++; 195 | len--; 196 | 197 | if (chunker_shall_break(c)) { 198 | SHA256_Update(&c->sha_ctx, buf, (size_t)(p - buf)); 199 | return consumed; 200 | } 201 | } 202 | 203 | SHA256_Update(&c->sha_ctx, buf, (size_t)(p - buf)); 204 | 205 | return (size_t)-1; 206 | } 207 | 208 | void chunker_get_id(struct chunker *c, uint8_t *id_out) 209 | { 210 | u_assert(c); 211 | u_assert(id_out); 212 | 213 | SHA256_Final(id_out, &c->sha_ctx); 214 | } 215 | 216 | int chunker_scan_fd(int fd, struct chunker_params *params, 217 | int (*cb)(uint64_t offset, uint32_t len, uint8_t *id, void *arg), void *arg) 218 | { 219 | u_assert(fd >= 0); 220 | u_assert(cb); 221 | 222 | struct chunker c; 223 | uint8_t buf[BUFSIZ]; 224 | 225 | if (chunker_init(&c, params) < 0) { 226 | u_log(ERR, "initializing chunker failed"); 227 | return -1; 228 | } 229 | 230 | uint64_t offset = 0; 231 | uint64_t last_boundary = 0; 232 | 233 | checked(lseek(fd, 0, SEEK_SET), return -1); 234 | 235 | uint8_t chunk_id[CHUNK_ID_LEN]; 236 | while (1) { 237 | ssize_t bytes_read; 238 | 239 | bytes_read = read(fd, buf, sizeof(buf)); 240 | if (bytes_read < 0) { 241 | if (errno == EINTR) 242 | continue; 243 | 244 | u_log_errno("error at offset %"PRIu64" while chunking", offset); 245 | return -1; 246 | } else if (bytes_read == 0) 247 | break; 248 | 249 | while (bytes_read) { 250 | size_t chunker_ret; 251 | 252 | chunker_ret = chunker_scan(&c, buf, bytes_read); 253 | if (chunker_ret == (size_t)-1) { 254 | // no chunk boundary here 255 | 256 | offset += bytes_read; 257 | break; 258 | } 259 | 260 | offset += chunker_ret; 261 | 262 | chunker_get_id(&c, chunk_id); 263 | if (cb(last_boundary, offset - last_boundary, chunk_id, arg) < 0) 264 | return -1; 265 | 266 | last_boundary = offset; 267 | 268 | chunker_reset(&c); 269 | memmove(buf, &buf[chunker_ret], bytes_read - chunker_ret); 270 | bytes_read -= chunker_ret; 271 | } 272 | } 273 | 274 | // do we have a leftover chunk? 275 | if (offset != last_boundary) { 276 | chunker_get_id(&c, chunk_id); 277 | if (cb(last_boundary, offset - last_boundary, chunk_id, arg) < 0) 278 | return -1; 279 | } 280 | 281 | return 0; 282 | } 283 | -------------------------------------------------------------------------------- /doc/chunk-encryption.md: -------------------------------------------------------------------------------- 1 | # Chunk encryption design document 2 | 3 | This document describes the design and implementation of chunk encryption in 4 | casync-nano. In this document, "casync" refers to the general family of casync 5 | tools (like casync, desync and casync-nano). "casync-nano" refers to 6 | casync-nano specifically. 7 | 8 | ## Rationale 9 | 10 | casync-nano is primarily intended for distributing system images. System images 11 | can contain secrets or other proprietary information, thus protecting them from 12 | an attacker is important. 13 | 14 | casync-based systems lend themselves to an architecture where the small index 15 | file is distributed over a channel that is expensive (in terms of capacity), 16 | but trustworthy (e.g. a mutually authenticated connection to some kind of 17 | update server which supplies the device with a specific index file based on its 18 | identity) and the large but immutable chunks are stored on a less-trusted (both 19 | in terms of integrity and confidentiality) and possibly third-party system such 20 | as a CDN. 21 | 22 | In the following, we thus focus on the protection of the chunk data. 23 | 24 | The integrity of individual chunks is already ensured relative to the integrity 25 | of the index file since the checksum of each chunk is verified against the 26 | index file after downloading it. Therefore, the integrity of the entire system 27 | image is guaranteed if the integrity of the index file is ensured by some kind 28 | of external mechanism that is out of scope for casync. 29 | 30 | The "confidentiality" aspect however is not addressed by any casync 31 | implementation at this time - chunks are stored unencrypted. 32 | 33 | Even without access to the casync index file that specifies the selection/order 34 | of chunks that comprise a given image, just being able to retrieve the bare 35 | chunks can yield valuable information to an attacker since the minimum chunk 36 | size is usually large (16 KiB) compared to the length of a secret, for example. 37 | Furthermore, reasonable guesses as to which chunks were consecutive in the 38 | source image can usually be made. 39 | 40 | Access to the chunks themselves can of course be limited using the usual HTTP 41 | authentication mechanisms at the chunk store. The hash-based addressing of a 42 | chunk store also makes it more difficult to guess the URL of a chunk (if one 43 | does not have access to the index file and directory listings are turned off, 44 | of course). Nonetheless, this is brittle. Modern OTA mechanisms usually apply 45 | end-to-end encryption to the images themselves to avoid having to trust the 46 | storage or transmission layers. 47 | 48 | Content-defined chunking allows the implementation of delta updates by 49 | leveraging similarities between different revisions of the same system image. 50 | This approach only works when the similarities are visible to the chunking 51 | process, which precludes potentially desirable transformations of the source 52 | image such as compressing it or encrypting it. 53 | 54 | While casync resolves the compression issue by simply compressing the chunks 55 | after they have been carved from the source image, there is no equivalent 56 | mechanism for encrypting them. The rest of this document explores how to fill 57 | this gap. 58 | 59 | ## Design considerations 60 | 61 | The proposed scheme will encrypt chunks individually to preserve as much of the 62 | original casync architecture as possible. Since the chunks in the chunk store 63 | will normally be shared between different system image versions, running the 64 | chunk generation process for different source images should not produce chunk 65 | files that share the same ID but aren't usable for another images that also 66 | nominally include the same chunk. In other words, each chunk must be encrypted 67 | (and decryptable) independently from all the others. 68 | 69 | Using authenticated encryption is considered the norm nowadays. Luckily, while 70 | the devices casync-nano runs on are generally resource-constrained, modern, 71 | fast AEAD constructions are [available][rfc8439] that don't require specific 72 | hardware for speed or side-channel resistance. However, authentication 73 | necessarily adds a (very minor) data overhead for the authentication tag and 74 | complicates streaming processing of the data, since it either must be buffered 75 | in full, verified and then decrypted before it can be processed further, or 76 | subdivided into smaller chunks that are processed individually (see the 77 | [age][age-spec] design for an implementation of this). Decrypting and passing 78 | on the data *before* verifying the authentication tag violates the 79 | [Cryptographic Doom Principle][moxie-crypto-doom] and is generally considered a 80 | bad idea. 81 | 82 | [rfc8439]: https://www.rfc-editor.org/rfc/rfc8439.html 83 | [age-spec]: https://age-encryption.org/v1 84 | [moxie-crypto-doom]: https://moxie.org/2011/12/13/the-cryptographic-doom-principle.html 85 | 86 | In the following, we will, however, do just that, since it simplifies the 87 | implementation a lot for the aforementioned reasons. There are mitigating 88 | factors that limit the downside enough to make this a viable option, though: 89 | 90 | - The security model of casync-nano always has been that the index file is the 91 | source of truth of how the restored system image is supposed to look like. 92 | If the user performs out-of-band verification of the index file (via a 93 | cryptographic signature, for example), casync-nano guarantees that the 94 | resulting image corresponds to the index file. As described above, it does 95 | this by not trusting the chunk store in the first place and verifying that 96 | the hash of each downloaded chunk corresponds to the chunk ID before writing 97 | it to disk. In effect, this authenticates the data after all. 98 | 99 | - Thus, the only step that processes untrusted data is the zstd decompression 100 | that happens before this hashing step (since zstd compression is not 101 | necessarily reproducible, which would change the chunk ID and since it would 102 | require a superfluous compression step for determining the ID of locally 103 | sourced chunks on the target device even if it were). 104 | 105 | While a data compression library certainly is a potential exploit target, 106 | libzstd is already processing huge amounts of untrusted data on the web 107 | these days. It stands to reason that the remaining risk for vulnerabilities 108 | is small compared to the trouble of significantly complicating the 109 | encryption scheme (which carries its own implementation risks). 110 | 111 | Most notably however, all of this is already the case in the status quo 112 | design and adding a non-AEAD encryption step does not make it worse, it 113 | only neglects the opportunity to add a mitigation for it. 114 | 115 | Taking all this into account, we are going to use XChaCha20 (the extension of 116 | the XSalsa20 extended nonce construction to ChaCha20, see the expired I-D 117 | [`draft-irtf-cfrg-xchacha-03`][xchacha20] or libsodium for a description) to 118 | encrypt each chunk. The key is a global fixed parameter and the nonce is 119 | derived from the chunk ID. We cannot use the chunk ID verbatim since SHA256 120 | hashes are 256 bit long, but XChaCha20 nonces are only 192 bits long. Thus, the 121 | 192 bit prefix of the chunk ID is used as the nonce. This does not meaningfully 122 | affect the security properties since XChaCha20 is meant to be used with random 123 | nonces and practically, SHA256 hashes are random bit strings, so their prefixes 124 | are random bit strings too. 125 | 126 | [xchacha20]: https://datatracker.ietf.org/doc/draft-irtf-cfrg-xchacha/03/ 127 | 128 | Each key/nonce combination can encrypt up to 256 GiB (64 bytes per block with a 129 | 32 bit counter) of data, which is plenty for a single chunk. 130 | 131 | ## Encryption scheme 132 | 133 | A XChaCha20 instance is initialized using the encryption key set by the user 134 | and by using the first 24 bytes of the chunk ID as the nonce. The block counter 135 | starts at 0. 136 | 137 | A compressed chunk is encrypted by performing a byte-wise XOR with the 138 | generated keystream. Decryption is performed analogously. 139 | 140 | An encrypted chunk uses the file extension `.cacnk.enc`, so a full path looks 141 | like this: `/8a39/8a39d2abd3999ab73c34db2476849cddf303ce389b35826850f9a700589b4a90.cacnk.enc`. 142 | 143 | ## Security considerations 144 | 145 | As discussed above, this is not an authenticated construction. XChaCha20 is a 146 | stream cipher so the encrypted blocks are trivially malleable. However, 147 | integrity of the restored system image is indirectly assured through the 148 | cryptographic hashes present in the index files. 149 | 150 | Confidentiality of the contents individual chunks is protected and while this 151 | is a necessary precondition for protecting the confidentiality of the *entire* 152 | system image, it is not sufficient to achieve it: The chunk file name leaks the 153 | hash of its unencrypted contents, which can give an attacker information about 154 | the contents for certain well-known data. An attacker can also prove the 155 | presence of data larger than a single chunk in the image by experimentally 156 | chunking it and checking if a chunk with that hash exists in the chunk store or 157 | the index file. 158 | 159 | These issues can be somewhat mitigated for example by also tying the chunk ID 160 | to a key (for example using a construction such as HMAC or a hash function with 161 | a keyed mode such as BLAKE2). However: 162 | 163 | - Changing the way chunk IDs are computed presents a significant break from 164 | the existing casync ecosystem as it requires changes to the caibx file 165 | format. 166 | 167 | - The information leak introduced here is limited. The likelihood that an 168 | attacker is able to derive more precise information than what was described 169 | above is relatively slim and this is sufficient to make extracting secrets 170 | or reverse-engineering the system much harder. 171 | 172 | - Obfuscation of the chunk IDs as described above can be implemented 173 | independently of chunk encryption. 174 | 175 | Thus, we postpone this topic for a future extension. 176 | 177 | Furthermore, even after encryption, an attacker can still learn the compressed 178 | size and (if they have the index file) the sequence of chunks, which also 179 | reveals some information about the plain text. 180 | 181 | ## Test vectors 182 | 183 | See the test vectors in [`draft-irtf-cfrg-xchacha-03`][xchacha20] for the 184 | XChaCha20 implementation. 185 | 186 | For a concrete example of the casync-nano usage, consider the following chunk: 187 | 188 | ``` 189 | 00000000 28 b5 2f fd 00 58 54 00 00 10 00 00 01 00 fb ff 190 | 00000010 39 c0 02 02 00 10 00 01 00 00 191 | ``` 192 | 193 | This is the compressed version of the 256 KiB all-zero chunk. It has the ID 194 | `8a39d2abd3999ab73c34db2476849cddf303ce389b35826850f9a700589b4a90`. Thus, the 195 | nonce is: 196 | 197 | ``` 198 | 00000000 8a 39 d2 ab d3 99 9a b7 3c 34 db 24 76 84 9c dd 199 | 00000010 f3 03 ce 38 9b 35 82 68 200 | ``` 201 | 202 | Encrypting this chunk with the key `000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f` 203 | yields: 204 | 205 | ``` 206 | 00000000 e8 da 60 0a 95 61 93 c3 4f d4 9a 77 bf 48 da 84 207 | 00000010 8f 5f ff c1 78 66 61 cb 7a e4 208 | ``` 209 | 210 | Since this chunk is shorter than a single ChaCha20 block, it is more 211 | instructive to look at the corresponding keystream. The first 128 bytes (of 212 | which 26 are used in the example above) of the keystream are: 213 | 214 | ``` 215 | 00000000 c0 6f 4f f7 95 39 c7 c3 4f c4 9a 77 be 48 21 7b 216 | 00000010 b6 9f fd c3 78 76 61 ca 7a e4 88 12 c9 e0 28 3e 217 | 00000020 f3 9d 9b 3d 51 f4 f7 fd cf a9 9e ea d7 a3 80 12 218 | 00000030 9a cb 33 1f 5b 6d 39 e8 4b 09 0b 57 39 01 08 29 219 | 00000040 81 7c 56 33 01 5b 44 41 e2 29 80 93 24 cd ea 57 220 | 00000050 39 df f8 a5 5d cc d7 33 a2 e7 41 36 b9 26 be 36 221 | 00000060 f0 4a f4 22 58 77 9e 01 c1 20 5d 8b 00 a5 cb 9b 222 | 00000070 20 2d f3 13 eb c4 73 f7 a5 fc 28 ef c3 c6 b6 91 223 | ``` 224 | -------------------------------------------------------------------------------- /doc/csn.1.scd: -------------------------------------------------------------------------------- 1 | CSN(1) 2 | 3 | # NAME 4 | 5 | csn - casync-nano 6 | 7 | # SYNOPSIS 8 | 9 | *csn* _INDEX_ _TARGET_ _STORE_...++ 10 | *casync* [--store _HTTP_URL_...] [--seed _PATH_] [--seed-output _YESNO_] *extract* _INDEX_ _TARGET_ 11 | 12 | # DESCRIPTION 13 | 14 | casync-nano is a feature-reduced replacement for casync intended as a building 15 | block for implementing image-based differential updates on embedded systems. To 16 | that end, the only operation it supports is the extraction of .caibx files that 17 | utilize SHA256 hashes. On the other hand, it has some features that help in its 18 | intended application compared to the original casync tool: 19 | 20 | - Side-loading of index files corresponding to a local source of chunks. 21 | This is useful to avoid slow reindexing of a partition in an A/B update 22 | scheme (see OPERATION and EXAMPLES below). 23 | - Support for the kernel crypto API to accelerate hashing operations using 24 | hardware mechanisms that are present on many modern SoCs. 25 | - Support for encrypting chunks so that they can be stored on untrusted 26 | third-party HTTP infrastructure. 27 | - Vastly reduced memory usage. 28 | 29 | casync-nano is a multi-call binary and provides a compatibility layer that 30 | implements a subset of the _casync_(1) *extract* command to the extent necessary 31 | to support tools such as RAUC when called as casync and presents its own 32 | interface when called as *csn*. 33 | 34 | # OPTIONS 35 | 36 | _INDEX_ 37 | Specifies the .caibx file that describes the image to be extracted. 38 | 39 | _TARGET_ 40 | Specifies where the extracted image will be written to. This will normally 41 | be a block device. casync-nano currently does not support writing to MTD 42 | devices. 43 | 44 | As a safety mechanism, casync-nano will refuse to create _TARGET_ when it 45 | does not exist. To extract an image to a regular file, create an empty file 46 | first. 47 | 48 | _STORE_ 49 | A generic source of chunks. This can either be a HTTP(S) URL pointing 50 | towards a casync-style chunk store or a path to a local file or block device 51 | that will be broken apart into chunks. (See OPERATION below for more details 52 | on how the synchronization process works). 53 | 54 | Additional attributes can be set for a HTTP chunk store using URI 55 | fragments, see HTTP CHUNK STORE ATTRIBUTES below for more details. 56 | 57 | A local store specification may optionally be followed by a colon and the 58 | path to a .caibx file representing the contents of that local store. If 59 | supplied, this index will be used instead of running the chunking algorithm 60 | on the local store, which is computationally expensive. Such a side-loaded 61 | index will be verified such that if it is incorrect for whatever reason 62 | (e.g. if the local store has bit-rotted or the .caibx simply doesn't match 63 | the contents), it will be discarded and the synchronization process 64 | continues as if it hadn't been specified at all. 65 | 66 | Normally, when a chunk appears multiple times in _INDEX_, casync-nano will 67 | query its stores for it multiple times. Thus, if it is not contained in a 68 | local store, it will be downloaded over the network multiple times. While 69 | this sounds problematic, it empirically does not really happen in practice 70 | in the cases which casync-nano is intended for (since such a redundant 71 | chunk will usually already have been redundant in the previous version of 72 | the image and will thus be present in a local store). Still, if you are 73 | concerned about this possibility, you can also supply _TARGET_ as an 74 | additional _STORE_. This triggers some special logic that will prevent this 75 | problem by reusing chunks that have been written to _TARGET_ already. Note 76 | however that because of the data structures involved, this currently 77 | carries a moderate performance penalty. 78 | 79 | During synchronization, the specified stores are queried in the order given 80 | on the command line. Thus, local stores should appear first. 81 | 82 | The casync compatibility layer accepts getopt-style arguments that map to the 83 | concepts above: Both --store and --seed are equivalent to specifying a _STORE_. 84 | The --seed-output option is ignored to avoid incurring a warning when used with 85 | RAUC. 86 | 87 | When the _CSN_KCAPI_DRIVER_ environment variable is set, that particular 88 | algorithm specifier/driver of the Linux kernel crypto API is used for 89 | calculating SHA256 checksums (via the _AF_ALG_ mechanism). Note that this might 90 | actually be slower than using the default user space software implementation 91 | depending on your hardware. Please run a benchmark before using this in your 92 | application. The source distribution includes the csn-bench tool that can be 93 | used for this purpose. 94 | 95 | # OPERATION 96 | 97 | casync and casync-nano use Content-Defined Chunking (CDC) to split large files 98 | such as disk images into smaller chunks in way that (statistically) produces the 99 | same chunks even when content moves within the file. This affords a way to 100 | transfer different versions of the same file efficiently by only transferring a 101 | list of the chunks in the new version and their order to the target. If the 102 | target has an older version of the same file available locally, it stands to 103 | reason that a large percentage of the chunks will have remained the same. Thus, 104 | only the missing ones need to be downloaded, saving a significant amount of 105 | bandwidth. 106 | 107 | The advantage of this approach compared to other binary delta systems is that it 108 | is generic and very simple to reason about, does not require generation of 109 | deltas between each pair of possible versions and allows archiving of old 110 | versions with little marginal storage cost. The downside is that the effective 111 | deltas (i.e. the amount of bytes a client has to download) will likely be larger 112 | than the ones produced by more specialized binary delta systems. 113 | 114 | casync-nano implements the exact same chunking algorithm as casync. The only 115 | parameters of that algorithm are the minimum, target average and maximum chunk 116 | sizes (in practice, these are always set to the default values of 16, 64 and 256 117 | KiB, respectively). Given the same set of parameters, all implementations of 118 | this algorithm will always produce the exact same set of chunks for a given 119 | input file. 120 | 121 | Chunks are identified by their SHA256 checksum, which provides protection 122 | against corruption and also cryptographically binds the output of a 123 | synchronization process to the input index that produced it. For each chunk that 124 | it retrieves, casync-nano verifies that the contents actually hash to the chunk 125 | id. This means that authenticating the .caibx file (for example through some 126 | kind of signature) implicitly authenticates the output image. This check is also 127 | performed for local stores to prevent bit-rot. 128 | 129 | When starting up, casync-nano runs the chunking algorithm using the parameters 130 | specified in the .caibx file on all local stores that were supplied on the 131 | command line. Since this process is computationally expensive, it also allows 132 | the user to supply the (supposed) result of this process externally by passing 133 | an auxiliary .caibx file for each store (casync-nano calls this "side-loading"). 134 | 135 | The idea here is that the most common application of casync-nano will be an OTA 136 | update system that uses an A/B partition scheme where one partition is the one 137 | the system will currently be executing from and the other one is the one that 138 | will contain the new system image at the end of the synchronization process. 139 | Crucially, the current system partition usually is read-only for resilience 140 | reasons and is thus unmodified from when it was originally created. This means 141 | that since the _current_ system image usually also is the result of a previous 142 | OTA update, we could have saved the .caibx file that produced it on mutable 143 | storage somewhere and supply it for the current synchronization run, avoiding 144 | recomputing the chunk boundaries. 145 | 146 | HTTP chunk stores are expected to follow the casync store structure, that is: 147 | - Individual chunks correspond to individual files 148 | - Each file is compressed using the zstd algorithm and named "_HASH_.cacnk" 149 | where _HASH_ is the lower-case hexadecimal representation of the SHA256 hash 150 | of the _uncompressed_ chunk. 151 | - Each file is placed in a directory below the root that corresponds to the 152 | first four characters of its file name. 153 | 154 | E.g. the path corresponding to an 256 KiB long all-zero-byte chunk would be 155 | /8a39/8a39d2abd3999ab73c34db2476849cddf303ce389b35826850f9a700589b4a90.cacnk. 156 | 157 | casync-nano aims to be resilient in face of the various issues that can occur on 158 | systems that have unstable or intermittent connectivity: 159 | - HTTP chunk downloads are retried a number of times using exponential backoff 160 | if they or the network fail transiently. 161 | - If a given HTTP store has incurred a number of transient failures that 162 | couldn't be recovered from, it is disabled to avoid hammering it excessively. 163 | - casync-nano checks the content that is already present on the target and skips 164 | any prefix that has already been synchronized, which allows resuming a 165 | synchronization process that has been interrupted, ensuring forward progress. 166 | 167 | In general, casync-nano does not cache individual chunks in memory or elsewhere 168 | to avoid unpredictable memory usage. Chunks are always retrieved from the 169 | specified stores on demand. The only exception is that the previously retrieved 170 | chunk is reused if it repeats in the .caibx file. This happens during long runs 171 | of null bytes, for example. Benchmarking has shown that this is generally 172 | sufficient for the intended applications of casync-nano. 173 | 174 | # HTTP CHUNK STORE ATTRIBUTES 175 | 176 | To specify additional configuration data for a given HTTP chunk store, URI 177 | fragments can be used: 178 | 179 | https://example.com#key1=value1,key2=value2 180 | 181 | Currently, the only valid key is _encrypt_, the value is a string describing 182 | how to retrieve the encryption key. In each case, the 32 byte key is 183 | represented in hexadecimal. 184 | 185 | - _key:HEXSTRING_ - supplies the key literally through the command line. The 186 | usual caveats about this making it visible to other processes on the system 187 | apply. 188 | - _env:NAME_ - read the key from the environment variable _NAME_ 189 | - _file:PATH_ - read the key from the file at _PATH_ 190 | 191 | If specified, casync-nano expects to find .cacnk.enc files encrypted with the 192 | specified key in the respective store instead of regular .cacnk files. 193 | _csn-tool_(1) can be used to convert between .cacnk and .cacnk.enc files. See 194 | the chunk encryption design document in the source tree 195 | (doc/chunk-encryption.md) for more details. 196 | 197 | # EXAMPLES 198 | 199 | Extract image.caibx to /dev/mmcblk0p3, downloading all chunks from example.com: 200 | 201 | csn image.caibx /dev/mmcblk0p3 https://example.com 202 | 203 | Same as before, but avoid downloading chunks multiple times by using the 204 | partially-written-to target as a cache: 205 | 206 | csn image.caibx /dev/mmcblk0p3 /dev/mmcblk0p3 https://example.com 207 | 208 | Extract image.caibx to /dev/mmcblk0p3, using both example.com and 209 | /dev/mmcblk0p2 as sources for chunks, preferring to use /dev/mmcblk0p2 if 210 | possible. Furthermore, assume old.caibx was used to create /dev/mmcblk0p2 211 | previously: 212 | 213 | csn image.caibx /dev/mmcblk0p3 /dev/mmcblk0p2:old.caibx https://example.com 214 | 215 | The latter is the most common application for casync-nano. 216 | 217 | Since casync-nano does not provide a mechanism to generate .caibx files or the 218 | corresponding chunk stores, the original casync tool has to be used for that 219 | purpose: 220 | 221 | casync make --digest=sha256 image.caibx image.img 222 | 223 | # LIMITATIONS 224 | 225 | Indices are internally implemented using sorted arrays and binary search. This 226 | is fine for static indices, but when using the target as a store (which is 227 | continuously updated during synchronization), it causes a certain amount of 228 | overhead. However, since this is more of a niche use case, as of now, it does 229 | not really justify adding a more complex/expensive data structure for the other 230 | cases as well. 231 | 232 | # BUG REPORTS 233 | 234 | Please report bugs in casync-nano or errors in this manual page via GitHub 235 | (https://github.com/florolf/casync-nano/issues) or email (_fl@n621.de_). 236 | 237 | The casync compatibility layer only targets RAUC right now. Any incompatibility 238 | is considered a bug. If you encounter any problems or use another update 239 | orchestration system that requires broader casync emulation, please report a 240 | bug. 241 | 242 | # SEE ALSO 243 | _casync_(1)++ 244 | _csn-tool_(1)++ 245 | _RAUC_ (https://rauc.io/) 246 | -------------------------------------------------------------------------------- /doc/csn.1: -------------------------------------------------------------------------------- 1 | .\" Generated by scdoc 1.11.3 2 | .\" Complete documentation for this program is not available as a GNU info page 3 | .ie \n(.g .ds Aq \(aq 4 | .el .ds Aq ' 5 | .nh 6 | .ad l 7 | .\" Begin generated content: 8 | .TH "CSN" "1" "2024-10-24" 9 | .PP 10 | .SH NAME 11 | .PP 12 | csn - casync-nano 13 | .PP 14 | .SH SYNOPSIS 15 | .PP 16 | \fBcsn\fR \fIINDEX\fR \fITARGET\fR \fISTORE\fR.\&.\&.\& 17 | .br 18 | \fBcasync\fR [--store \fIHTTP_URL\fR.\&.\&.\&] [--seed \fIPATH\fR] [--seed-output \fIYESNO\fR] \fBextract\fR \fIINDEX\fR \fITARGET\fR 19 | .PP 20 | .SH DESCRIPTION 21 | .PP 22 | casync-nano is a feature-reduced replacement for casync intended as a building 23 | block for implementing image-based differential updates on embedded systems.\& To 24 | that end, the only operation it supports is the extraction of .\&caibx files that 25 | utilize SHA256 hashes.\& On the other hand, it has some features that help in its 26 | intended application compared to the original casync tool: 27 | .PP 28 | .PD 0 29 | .IP \(bu 4 30 | Side-loading of index files corresponding to a local source of chunks.\& 31 | This is useful to avoid slow reindexing of a partition in an A/B update 32 | scheme (see OPERATION and EXAMPLES below).\& 33 | .IP \(bu 4 34 | Support for the kernel crypto API to accelerate hashing operations using 35 | hardware mechanisms that are present on many modern SoCs.\& 36 | .IP \(bu 4 37 | Support for encrypting chunks so that they can be stored on untrusted 38 | third-party HTTP infrastructure.\& 39 | .IP \(bu 4 40 | Vastly reduced memory usage.\& 41 | .PD 42 | .PP 43 | casync-nano is a multi-call binary and provides a compatibility layer that 44 | implements a subset of the \fIcasync\fR(1) \fBextract\fR command to the extent necessary 45 | to support tools such as RAUC when called as casync and presents its own 46 | interface when called as \fBcsn\fR.\& 47 | .PP 48 | .SH OPTIONS 49 | .PP 50 | \fIINDEX\fR 51 | .RS 4 52 | Specifies the .\&caibx file that describes the image to be extracted.\& 53 | .PP 54 | .RE 55 | \fITARGET\fR 56 | .RS 4 57 | Specifies where the extracted image will be written to.\& This will normally 58 | be a block device.\& casync-nano currently does not support writing to MTD 59 | devices.\& 60 | .PP 61 | As a safety mechanism, casync-nano will refuse to create \fITARGET\fR when it 62 | does not exist.\& To extract an image to a regular file, create an empty file 63 | first.\& 64 | .PP 65 | .RE 66 | \fISTORE\fR 67 | .RS 4 68 | A generic source of chunks.\& This can either be a HTTP(S) URL pointing 69 | towards a casync-style chunk store or a path to a local file or block device 70 | that will be broken apart into chunks.\& (See OPERATION below for more details 71 | on how the synchronization process works).\& 72 | .PP 73 | Additional attributes can be set for a HTTP chunk store using URI 74 | fragments, see HTTP CHUNK STORE ATTRIBUTES below for more details.\& 75 | .PP 76 | A local store specification may optionally be followed by a colon and the 77 | path to a .\&caibx file representing the contents of that local store.\& If 78 | supplied, this index will be used instead of running the chunking algorithm 79 | on the local store, which is computationally expensive.\& Such a side-loaded 80 | index will be verified such that if it is incorrect for whatever reason 81 | (e.\&g.\& if the local store has bit-rotted or the .\&caibx simply doesn'\&t match 82 | the contents), it will be discarded and the synchronization process 83 | continues as if it hadn'\&t been specified at all.\& 84 | .PP 85 | Normally, when a chunk appears multiple times in \fIINDEX\fR, casync-nano will 86 | query its stores for it multiple times.\& Thus, if it is not contained in a 87 | local store, it will be downloaded over the network multiple times.\& While 88 | this sounds problematic, it empirically does not really happen in practice 89 | in the cases which casync-nano is intended for (since such a redundant 90 | chunk will usually already have been redundant in the previous version of 91 | the image and will thus be present in a local store).\& Still, if you are 92 | concerned about this possibility, you can also supply \fITARGET\fR as an 93 | additional \fISTORE\fR.\& This triggers some special logic that will prevent this 94 | problem by reusing chunks that have been written to \fITARGET\fR already.\& Note 95 | however that because of the data structures involved, this currently 96 | carries a moderate performance penalty.\& 97 | .PP 98 | During synchronization, the specified stores are queried in the order given 99 | on the command line.\& Thus, local stores should appear first.\& 100 | .PP 101 | .RE 102 | The casync compatibility layer accepts getopt-style arguments that map to the 103 | concepts above: Both --store and --seed are equivalent to specifying a \fISTORE\fR.\& 104 | The --seed-output option is ignored to avoid incurring a warning when used with 105 | RAUC.\& 106 | .PP 107 | When the \fICSN_KCAPI_DRIVER\fR environment variable is set, that particular 108 | algorithm specifier/driver of the Linux kernel crypto API is used for 109 | calculating SHA256 checksums (via the \fIAF_ALG\fR mechanism).\& Note that this might 110 | actually be slower than using the default user space software implementation 111 | depending on your hardware.\& Please run a benchmark before using this in your 112 | application.\& The source distribution includes the csn-bench tool that can be 113 | used for this purpose.\& 114 | .PP 115 | .SH OPERATION 116 | .PP 117 | casync and casync-nano use Content-Defined Chunking (CDC) to split large files 118 | such as disk images into smaller chunks in way that (statistically) produces the 119 | same chunks even when content moves within the file.\& This affords a way to 120 | transfer different versions of the same file efficiently by only transferring a 121 | list of the chunks in the new version and their order to the target.\& If the 122 | target has an older version of the same file available locally, it stands to 123 | reason that a large percentage of the chunks will have remained the same.\& Thus, 124 | only the missing ones need to be downloaded, saving a significant amount of 125 | bandwidth.\& 126 | .PP 127 | The advantage of this approach compared to other binary delta systems is that it 128 | is generic and very simple to reason about, does not require generation of 129 | deltas between each pair of possible versions and allows archiving of old 130 | versions with little marginal storage cost.\& The downside is that the effective 131 | deltas (i.\&e.\& the amount of bytes a client has to download) will likely be larger 132 | than the ones produced by more specialized binary delta systems.\& 133 | .PP 134 | casync-nano implements the exact same chunking algorithm as casync.\& The only 135 | parameters of that algorithm are the minimum, target average and maximum chunk 136 | sizes (in practice, these are always set to the default values of 16, 64 and 256 137 | KiB, respectively).\& Given the same set of parameters, all implementations of 138 | this algorithm will always produce the exact same set of chunks for a given 139 | input file.\& 140 | .PP 141 | Chunks are identified by their SHA256 checksum, which provides protection 142 | against corruption and also cryptographically binds the output of a 143 | synchronization process to the input index that produced it.\& For each chunk that 144 | it retrieves, casync-nano verifies that the contents actually hash to the chunk 145 | id.\& This means that authenticating the .\&caibx file (for example through some 146 | kind of signature) implicitly authenticates the output image.\& This check is also 147 | performed for local stores to prevent bit-rot.\& 148 | .PP 149 | When starting up, casync-nano runs the chunking algorithm using the parameters 150 | specified in the .\&caibx file on all local stores that were supplied on the 151 | command line.\& Since this process is computationally expensive, it also allows 152 | the user to supply the (supposed) result of this process externally by passing 153 | an auxiliary .\&caibx file for each store (casync-nano calls this "side-loading").\& 154 | .PP 155 | The idea here is that the most common application of casync-nano will be an OTA 156 | update system that uses an A/B partition scheme where one partition is the one 157 | the system will currently be executing from and the other one is the one that 158 | will contain the new system image at the end of the synchronization process.\& 159 | Crucially, the current system partition usually is read-only for resilience 160 | reasons and is thus unmodified from when it was originally created.\& This means 161 | that since the \fIcurrent\fR system image usually also is the result of a previous 162 | OTA update, we could have saved the .\&caibx file that produced it on mutable 163 | storage somewhere and supply it for the current synchronization run, avoiding 164 | recomputing the chunk boundaries.\& 165 | .PP 166 | HTTP chunk stores are expected to follow the casync store structure, that is: 167 | .PD 0 168 | .IP \(bu 4 169 | Individual chunks correspond to individual files 170 | .IP \(bu 4 171 | Each file is compressed using the zstd algorithm and named "\fIHASH\fR.\&cacnk" 172 | where \fIHASH\fR is the lower-case hexadecimal representation of the SHA256 hash 173 | of the \fIuncompressed\fR chunk.\& 174 | .IP \(bu 4 175 | Each file is placed in a directory below the root that corresponds to the 176 | first four characters of its file name.\& 177 | .PD 178 | .PP 179 | E.\&g.\& the path corresponding to an 256 KiB long all-zero-byte chunk would be 180 | /8a39/8a39d2abd3999ab73c34db2476849cddf303ce389b35826850f9a700589b4a90.\&cacnk.\& 181 | .PP 182 | casync-nano aims to be resilient in face of the various issues that can occur on 183 | systems that have unstable or intermittent connectivity: 184 | .PD 0 185 | .IP \(bu 4 186 | HTTP chunk downloads are retried a number of times using exponential backoff 187 | if they or the network fail transiently.\& 188 | .IP \(bu 4 189 | If a given HTTP store has incurred a number of transient failures that 190 | couldn'\&t be recovered from, it is disabled to avoid hammering it excessively.\& 191 | .IP \(bu 4 192 | casync-nano checks the content that is already present on the target and skips 193 | any prefix that has already been synchronized, which allows resuming a 194 | synchronization process that has been interrupted, ensuring forward progress.\& 195 | .PD 196 | .PP 197 | In general, casync-nano does not cache individual chunks in memory or elsewhere 198 | to avoid unpredictable memory usage.\& Chunks are always retrieved from the 199 | specified stores on demand.\& The only exception is that the previously retrieved 200 | chunk is reused if it repeats in the .\&caibx file.\& This happens during long runs 201 | of null bytes, for example.\& Benchmarking has shown that this is generally 202 | sufficient for the intended applications of casync-nano.\& 203 | .PP 204 | .SH HTTP CHUNK STORE ATTRIBUTES 205 | .PP 206 | To specify additional configuration data for a given HTTP chunk store, URI 207 | fragments can be used: 208 | .PP 209 | .RS 4 210 | https://example.\&com#key1=value1,key2=value2 211 | .PP 212 | .RE 213 | Currently, the only valid key is \fIencrypt\fR, the value is a string describing 214 | how to retrieve the encryption key.\& In each case, the 32 byte key is 215 | represented in hexadecimal.\& 216 | .PP 217 | .PD 0 218 | .IP \(bu 4 219 | \fIkey:HEXSTRING\fR - supplies the key literally through the command line.\& The 220 | usual caveats about this making it visible to other processes on the system 221 | apply.\& 222 | .IP \(bu 4 223 | \fIenv:NAME\fR - read the key from the environment variable \fINAME\fR 224 | .IP \(bu 4 225 | \fIfile:PATH\fR - read the key from the file at \fIPATH\fR 226 | .PD 227 | .PP 228 | If specified, casync-nano expects to find .\&cacnk.\&enc files encrypted with the 229 | specified key in the respective store instead of regular .\&cacnk files.\& 230 | \fIcsn-tool\fR(1) can be used to convert between .\&cacnk and .\&cacnk.\&enc files.\& See 231 | the chunk encryption design document in the source tree 232 | (doc/chunk-encryption.\&md) for more details.\& 233 | .PP 234 | .SH EXAMPLES 235 | .PP 236 | Extract image.\&caibx to /dev/mmcblk0p3, downloading all chunks from example.\&com: 237 | .PP 238 | .RS 4 239 | csn image.\&caibx /dev/mmcblk0p3 https://example.\&com 240 | .PP 241 | .RE 242 | Same as before, but avoid downloading chunks multiple times by using the 243 | partially-written-to target as a cache: 244 | .PP 245 | .RS 4 246 | csn image.\&caibx /dev/mmcblk0p3 /dev/mmcblk0p3 https://example.\&com 247 | .PP 248 | .RE 249 | Extract image.\&caibx to /dev/mmcblk0p3, using both example.\&com and 250 | /dev/mmcblk0p2 as sources for chunks, preferring to use /dev/mmcblk0p2 if 251 | possible.\& Furthermore, assume old.\&caibx was used to create /dev/mmcblk0p2 252 | previously: 253 | .PP 254 | .RS 4 255 | csn image.\&caibx /dev/mmcblk0p3 /dev/mmcblk0p2:old.\&caibx https://example.\&com 256 | .PP 257 | .RE 258 | The latter is the most common application for casync-nano.\& 259 | .PP 260 | Since casync-nano does not provide a mechanism to generate .\&caibx files or the 261 | corresponding chunk stores, the original casync tool has to be used for that 262 | purpose: 263 | .PP 264 | .RS 4 265 | casync make --digest=sha256 image.\&caibx image.\&img 266 | .PP 267 | .RE 268 | .SH LIMITATIONS 269 | .PP 270 | Indices are internally implemented using sorted arrays and binary search.\& This 271 | is fine for static indices, but when using the target as a store (which is 272 | continuously updated during synchronization), it causes a certain amount of 273 | overhead.\& However, since this is more of a niche use case, as of now, it does 274 | not really justify adding a more complex/expensive data structure for the other 275 | cases as well.\& 276 | .PP 277 | .SH BUG REPORTS 278 | .PP 279 | Please report bugs in casync-nano or errors in this manual page via GitHub 280 | (https://github.\&com/florolf/casync-nano/issues) or email (\fIfl@n621.\&de\fR).\& 281 | .PP 282 | The casync compatibility layer only targets RAUC right now.\& Any incompatibility 283 | is considered a bug.\& If you encounter any problems or use another update 284 | orchestration system that requires broader casync emulation, please report a 285 | bug.\& 286 | .PP 287 | .SH SEE ALSO 288 | \fIcasync\fR(1) 289 | .br 290 | \fIcsn-tool\fR(1) 291 | .br 292 | \fIRAUC\fR (https://rauc.\&io/) 293 | -------------------------------------------------------------------------------- /store-http.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include "utils.h" 9 | #include "chunker.h" 10 | #include "chunk.h" 11 | #include "encrypt.h" 12 | 13 | #include "store-http.h" 14 | 15 | #define ERROR_MAX 10u 16 | #define CONNECT_TRIES 10 17 | 18 | struct store_http { 19 | struct store s; 20 | 21 | const char *baseurl; 22 | char *url_buf; 23 | 24 | CURL *curl; 25 | char curl_err_buf[CURL_ERROR_SIZE]; 26 | 27 | ZSTD_DStream *zstd; 28 | uint8_t *decode_buffer; 29 | size_t decode_buffer_size, decode_buffer_fill; 30 | 31 | unsigned int error_count; 32 | 33 | bool encrypted; 34 | struct encrypt_ctx encrypt_ctx; 35 | }; 36 | 37 | #define curl_checked_setopt(curl, opt, val, err) do { \ 38 | CURLcode curl_ret; \ 39 | \ 40 | curl_ret = curl_easy_setopt((curl), (opt), (val)); \ 41 | if (curl_ret != CURLE_OK) { \ 42 | u_log(ERR, "setting up curl option %d failed: %s", (opt), curl_easy_strerror(curl_ret)); \ 43 | err; \ 44 | } \ 45 | } while (0) 46 | 47 | enum http_request_state { 48 | HTTP_REQUEST_STARTED, 49 | HTTP_REQUEST_STREAMING, 50 | HTTP_REQUEST_FAILED 51 | }; 52 | 53 | struct http_request_ctx { 54 | struct store_http *hs; 55 | 56 | uint8_t *outbuf; 57 | size_t outbuf_size; 58 | size_t outbuf_written; 59 | 60 | enum http_request_state state; 61 | }; 62 | 63 | static int decode_buffer_absorb(struct store_http *hs, uint8_t *data, size_t len) 64 | { 65 | if (hs->decode_buffer_size - hs->decode_buffer_fill < len) { 66 | void *new_buf; 67 | size_t new_size; 68 | 69 | new_size = hs->decode_buffer_size + len; 70 | if (new_size < hs->decode_buffer_size) { 71 | u_log(ERR, "incrementing decode buffer size %zu by %zu overflows", 72 | hs->decode_buffer_size, len); 73 | return -1; 74 | } 75 | 76 | new_buf = realloc(hs->decode_buffer, new_size); 77 | if (!new_buf) { 78 | u_log(ERR, "increasing decode buffer to %zu bytes failed", new_size); 79 | return -1; 80 | } else { 81 | u_log(DEBUG, "increased decode buffer to %zu bytes", new_size); 82 | } 83 | 84 | hs->decode_buffer = new_buf; 85 | hs->decode_buffer_size = new_size; 86 | } 87 | 88 | if (hs->encrypted) { 89 | if (encrypt_do(&hs->encrypt_ctx, 90 | &hs->decode_buffer[hs->decode_buffer_fill], 91 | data, len) < 0) { 92 | u_log(ERR, "decryption failed"); 93 | return -1; 94 | } 95 | } else { 96 | memcpy(&hs->decode_buffer[hs->decode_buffer_fill], data, len); 97 | } 98 | 99 | hs->decode_buffer_fill += len; 100 | 101 | return 0; 102 | } 103 | 104 | #define ZSTD_MAGIC "\x28\xb5\x2f\xfd" 105 | 106 | static ssize_t handle_data(struct http_request_ctx *ctx, uint8_t *data, size_t len) 107 | { 108 | if (ctx->state == HTTP_REQUEST_STARTED) { 109 | if (len < 4) 110 | return 0; 111 | 112 | if (memcmp(data, ZSTD_MAGIC, 4) == 0) { 113 | ctx->state = HTTP_REQUEST_STREAMING; 114 | } else { 115 | u_log(WARN, "data does not have zstd magic number"); 116 | return -1; 117 | } 118 | } 119 | 120 | u_assert(ctx->state == HTTP_REQUEST_STREAMING); 121 | 122 | ZSTD_inBuffer zstd_in = { 123 | .src = data, 124 | .size = len, 125 | .pos = 0 126 | }; 127 | 128 | ZSTD_outBuffer zstd_out = { 129 | .dst = ctx->outbuf, 130 | .size = ctx->outbuf_size, 131 | .pos = ctx->outbuf_written 132 | }; 133 | 134 | size_t ret; 135 | ret = ZSTD_decompressStream(ctx->hs->zstd, &zstd_out, &zstd_in); 136 | if (ZSTD_isError(ret)) { 137 | u_log(ERR, "zstd decompression failed with error: %s", ZSTD_getErrorName(ret)); 138 | return -1; 139 | } 140 | 141 | u_log(DEBUG, "decompression consumed %zu out of %zu available bytes and produced %zu", 142 | zstd_in.pos, zstd_in.size, zstd_out.pos); 143 | 144 | ctx->outbuf_written = zstd_out.pos; 145 | 146 | return zstd_in.pos; 147 | } 148 | 149 | static long get_response_code(CURL *handle) 150 | { 151 | long http_code; 152 | 153 | CURLcode ret = curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &http_code); 154 | if (ret != CURLE_OK) { 155 | u_log(ERR, "getting response code failed: %s", curl_easy_strerror(ret)); 156 | return -1; 157 | } 158 | 159 | return http_code; 160 | } 161 | 162 | static size_t store_http_data_cb(char *ptr, size_t size, size_t nmemb, void *userdata) 163 | { 164 | u_assert(size == 1); 165 | 166 | struct http_request_ctx *ctx = (struct http_request_ctx*) userdata; 167 | 168 | if (ctx->state == HTTP_REQUEST_STARTED) { 169 | long http_code = get_response_code(ctx->hs->curl); 170 | if (http_code != 200) { 171 | u_log(DEBUG, "got response code %ld, failing request", http_code); 172 | goto fail; 173 | } 174 | } 175 | 176 | if (ctx->state == HTTP_REQUEST_FAILED) 177 | return 0; 178 | 179 | ssize_t ret; 180 | if (ctx->hs->decode_buffer_fill || ctx->hs->encrypted) { 181 | if (decode_buffer_absorb(ctx->hs, (uint8_t*)ptr, nmemb) < 0) 182 | goto fail; 183 | 184 | ret = handle_data(ctx, ctx->hs->decode_buffer, ctx->hs->decode_buffer_fill); 185 | if (ret < 0) 186 | goto fail; 187 | 188 | // XXX: clang-analyzer flags this, why? 189 | memmove(ctx->hs->decode_buffer, &ctx->hs->decode_buffer[ret], ctx->hs->decode_buffer_fill - ret); 190 | ctx->hs->decode_buffer_fill -= ret; 191 | } else { 192 | ret = handle_data(ctx, (uint8_t*)ptr, nmemb); 193 | if (ret < 0) { 194 | goto fail; 195 | } else if ((size_t)ret < nmemb) { 196 | // if we haven't used up all the data we were given, push it 197 | // into the buffer for the next time around 198 | if (decode_buffer_absorb(ctx->hs, (uint8_t*)&ptr[ret], nmemb - ret) < 0) 199 | goto fail; 200 | } 201 | } 202 | 203 | return nmemb; 204 | 205 | fail: 206 | ctx->state = HTTP_REQUEST_FAILED; 207 | return 0; 208 | } 209 | 210 | static int set_url(struct store_http *hs, uint8_t *id) 211 | { 212 | int ret; 213 | 214 | ret = sprintf(hs->url_buf, "%s/%02x%02x/", hs->baseurl, id[0], id[1]); 215 | u_assert(ret > 0); 216 | 217 | chunk_format_id(&hs->url_buf[ret], id); 218 | strncat(hs->url_buf, ".cacnk", 7); 219 | 220 | if (hs->encrypted) 221 | strncat(hs->url_buf, ".enc", 5); 222 | 223 | curl_checked_setopt(hs->curl, CURLOPT_URL, hs->url_buf, return -1); 224 | 225 | return 0; 226 | } 227 | 228 | static bool curl_is_transient_error(CURLcode code) 229 | { 230 | return code == CURLE_COULDNT_CONNECT || 231 | code == CURLE_COULDNT_RESOLVE_HOST || 232 | code == CURLE_OPERATION_TIMEDOUT; 233 | } 234 | 235 | static CURLcode retried_curl_easy_perform(struct store_http *hs) 236 | { 237 | CURLcode ret; 238 | int remaining = CONNECT_TRIES; 239 | useconds_t wait_us = 200 * 1000; 240 | 241 | do { 242 | ret = curl_easy_perform(hs->curl); 243 | 244 | if (!curl_is_transient_error(ret)) 245 | break; 246 | 247 | remaining--; 248 | u_log(WARN, "connection failed (%s%s%s), retrying %d more times", 249 | curl_easy_strerror(ret), 250 | hs->curl_err_buf[0] ? ": " : "", hs->curl_err_buf, 251 | remaining); 252 | 253 | usleep(wait_us); 254 | wait_us = MIN(2*wait_us, 10ul*1000*1000); 255 | } while (remaining); 256 | 257 | return ret; 258 | } 259 | 260 | static ssize_t increase_error_counter(struct store_http *hs) 261 | { 262 | hs->error_count++; 263 | u_log(WARN, "increasing error counter to %d", hs->error_count); 264 | 265 | if (hs->error_count > ERROR_MAX) { 266 | u_log(ERR, "error counter exceeded %d, killing store", ERROR_MAX); 267 | return -1; 268 | } 269 | 270 | return 0; 271 | } 272 | 273 | static ssize_t store_http_get_chunk(struct store *s, uint8_t *id, uint8_t *out, size_t out_max) 274 | { 275 | struct store_http *hs = (struct store_http*) s; 276 | 277 | hs->decode_buffer_fill = 0; 278 | 279 | ZSTD_initDStream(hs->zstd); 280 | 281 | if (set_url(hs, id) < 0) { 282 | u_log(ERR, "setting request URL failed"); 283 | return -1; 284 | } 285 | 286 | if (hs->encrypted) { 287 | // uses the first 24 bytes of the id as the nonce 288 | if (encrypt_restart(&hs->encrypt_ctx, id) < 0) { 289 | u_log(ERR, "failed to re-initialize encryption"); 290 | return -1; 291 | } 292 | } 293 | 294 | struct http_request_ctx ctx = { 295 | .hs = hs, 296 | .outbuf = out, 297 | .outbuf_size = out_max, 298 | .outbuf_written = 0, 299 | .state = HTTP_REQUEST_STARTED 300 | }; 301 | curl_checked_setopt(hs->curl, CURLOPT_WRITEDATA, &ctx, return -1); 302 | 303 | u_log(DEBUG, "trying to fetch chunk from '%s'", hs->url_buf); 304 | 305 | CURLcode curl_ret; 306 | curl_ret = retried_curl_easy_perform(hs); 307 | 308 | if (curl_is_transient_error(curl_ret)) { 309 | u_log(WARN, "transient transfer failure, increasing error counter"); 310 | return increase_error_counter(hs); 311 | } 312 | 313 | /* Write errors are fine, they are used by the write function to abort 314 | * the request if it decides that it would never be happy with it. 315 | * 316 | * We'll handle this below. 317 | */ 318 | if (curl_ret != CURLE_OK && curl_ret != CURLE_WRITE_ERROR) { 319 | u_log(ERR, "curl_easy_perform failed: %s%s%s", 320 | curl_easy_strerror(curl_ret), 321 | hs->curl_err_buf[0] ? ": " : "", hs->curl_err_buf); 322 | return -1; 323 | } 324 | 325 | long http_code; 326 | http_code = get_response_code(hs->curl); 327 | if (http_code < 0) { 328 | u_log(ERR, "getting response code failed"); 329 | return -1; 330 | } 331 | 332 | if (http_code == 404 || http_code == 410) { 333 | u_log(DEBUG, "got %ld from server", http_code); 334 | return 0; 335 | } 336 | 337 | if (http_code >= 400) { 338 | u_log(WARN, "got unexpected HTTP response code %ld, increasing error counter", http_code); 339 | return increase_error_counter(hs); 340 | } 341 | 342 | /* If we get here and have not started processing data or failed to do 343 | * so, the HTTP request went through fine, but something went wrong 344 | * when decompressing the data. 345 | * 346 | * Not good, but could be a fluke, return 0. 347 | */ 348 | if (ctx.state == HTTP_REQUEST_STARTED || ctx.state == HTTP_REQUEST_FAILED) { 349 | u_log(WARN, "request for '%s' was successful, but response data was invalid", 350 | hs->url_buf); 351 | 352 | return 0; 353 | } 354 | 355 | if (hs->decode_buffer_fill) { 356 | u_log(WARN, "decode buffer still contains %zu bytes at end of request", 357 | hs->decode_buffer_fill); 358 | } 359 | 360 | // everything went fine: reset the error counter 361 | hs->error_count = 0; 362 | 363 | uint8_t actual_id[CHUNK_ID_LEN]; 364 | chunk_calculate_id(out, ctx.outbuf_written, actual_id); 365 | 366 | if (memcmp(actual_id, id, CHUNK_ID_LEN) != 0) { 367 | char id_str[CHUNK_ID_STRLEN], actual_id_str[CHUNK_ID_STRLEN]; 368 | 369 | chunk_format_id(id_str, id); 370 | chunk_format_id(actual_id_str, actual_id); 371 | 372 | u_log(WARN, "chunk id mismatch (expected %s, got %s)", 373 | id_str, actual_id_str); 374 | 375 | return 0; 376 | } 377 | 378 | return ctx.outbuf_written; 379 | } 380 | 381 | static void store_http_free(struct store *s) 382 | { 383 | struct store_http *hs = (struct store_http*) s; 384 | 385 | free((void*)hs->baseurl); 386 | free(hs->url_buf); 387 | free(hs->decode_buffer); 388 | 389 | ZSTD_freeDStream(hs->zstd); 390 | curl_easy_cleanup(hs->curl); 391 | 392 | if (hs->encrypted) 393 | encrypt_close(&hs->encrypt_ctx); 394 | 395 | free(hs); 396 | } 397 | 398 | static int store_handle_params(struct store_http *hs, char *args) 399 | { 400 | char *p = args; 401 | 402 | while (*p) { 403 | char *next; 404 | next = strchr(p, ','); 405 | 406 | if (next) 407 | *next = 0; 408 | 409 | if (startswith(p, "encrypt=")) { 410 | uint8_t key[32]; 411 | if (encrypt_parse_keyspec(key, p+strlen("encrypt=")) < 0) { 412 | u_log(ERR, "parsing encryption config failed"); 413 | return -1; 414 | } 415 | 416 | if (encrypt_init(&hs->encrypt_ctx, key) < 0) { 417 | u_log(ERR, "failed to initialize encryption context"); 418 | return -1; 419 | } 420 | 421 | hs->encrypted = true; 422 | } else { 423 | u_log(ERR, "unhandled store parameters: %s", p); 424 | return -1; 425 | } 426 | 427 | if (next) 428 | p = next+1; 429 | else 430 | break; 431 | } 432 | 433 | return 0; 434 | } 435 | 436 | #define CHUNK_SUFFIX_MAX_LEN (strlen("/8a39/8a39d2abd3999ab73c34db2476849cddf303ce389b35826850f9a700589b4a90.cacnk.enc")) 437 | 438 | struct store *store_http_new(const char *baseurl) 439 | { 440 | u_assert(baseurl); 441 | 442 | if (curl_global_init(CURL_GLOBAL_ALL) != 0) { 443 | u_log(ERR, "initializing curl failed"); 444 | return NULL; 445 | } 446 | 447 | struct store_http *hs; 448 | hs = calloc(1, sizeof(*hs)); 449 | u_notnull(hs, return NULL); 450 | 451 | char *tmp = strdup(baseurl); 452 | u_notnull(tmp, goto err_hs); 453 | 454 | char *p; 455 | p = strrchr(tmp, '#'); 456 | if (p) { 457 | if (store_handle_params(hs, p+1) < 0) { 458 | u_log(ERR, "failed to parse store parameters"); 459 | free(tmp); 460 | 461 | goto err_hs; 462 | } 463 | 464 | *p = 0; 465 | } 466 | 467 | if ((p = strrchr(tmp, '/')) && *(p+1) == 0) 468 | *p = 0; 469 | 470 | hs->baseurl = tmp; 471 | 472 | hs->url_buf = malloc(strlen(hs->baseurl) + CHUNK_SUFFIX_MAX_LEN + 1); 473 | u_notnull(hs->url_buf, goto err_baseurl); 474 | 475 | snprintf(hs->s.name, sizeof(hs->s.name), "%s", baseurl); 476 | hs->s.free = store_http_free; 477 | hs->s.get_chunk = store_http_get_chunk; 478 | 479 | hs->curl = curl_easy_init(); 480 | u_notnull(hs->curl, goto err_url_buf); 481 | 482 | curl_checked_setopt(hs->curl, CURLOPT_WRITEFUNCTION, 483 | store_http_data_cb, goto err_curl); 484 | 485 | curl_checked_setopt(hs->curl, CURLOPT_ERRORBUFFER, 486 | hs->curl_err_buf, goto err_curl); 487 | 488 | curl_checked_setopt(hs->curl, CURLOPT_FOLLOWLOCATION, 489 | (long)1, goto err_curl); 490 | 491 | curl_checked_setopt(hs->curl, CURLOPT_LOW_SPEED_TIME, 492 | (long)15, goto err_curl); 493 | 494 | curl_checked_setopt(hs->curl, CURLOPT_LOW_SPEED_LIMIT, 495 | (long)10, goto err_curl); 496 | 497 | hs->zstd = ZSTD_createDStream(); 498 | u_notnull(hs->zstd, goto err_curl); 499 | 500 | return (struct store*)hs; 501 | 502 | err_curl: 503 | curl_easy_cleanup(hs->curl); 504 | 505 | err_url_buf: 506 | free((void*)hs->url_buf); 507 | 508 | err_baseurl: 509 | free((void*)hs->baseurl); 510 | 511 | err_hs: 512 | free(hs); 513 | 514 | return NULL; 515 | } 516 | -------------------------------------------------------------------------------- /test/e2e/csn_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import hashlib 3 | import pyzstd 4 | import re 5 | import subprocess 6 | import tempfile 7 | import logging 8 | 9 | from http.server import HTTPServer, BaseHTTPRequestHandler 10 | from threading import Thread 11 | from pathlib import Path 12 | from typing import ClassVar, Iterable, List, Self, Dict, Union, Optional 13 | from collections import defaultdict 14 | 15 | from Crypto.Cipher import ChaCha20 16 | 17 | class Chunk: 18 | def __init__(self, data: bytes): 19 | self.data = data 20 | self.digest = hashlib.sha256(data).digest() 21 | 22 | @property 23 | def hexdigest(self) -> str: 24 | return self.digest.hex() 25 | 26 | @classmethod 27 | def make(cls, size: int, start_byte: bytes = b"\x00") -> Self: 28 | """ 29 | Create a bytestring that when chunked with the default parameters ends 30 | up as exactly one chunk. 31 | 32 | This works because the 48-byte sequence (i.e. one buzhash window) "XX 33 | 00 00 00 ..." never produces a break and "00 00 00 ... 05 8b" always 34 | does. 35 | """ 36 | 37 | assert 16*1024 <= size <= 256*1024 38 | assert len(start_byte) == 1 39 | 40 | # Special case: This is a max-sized chunk so we'll always get a 41 | # break-point here. We could of course just use the break pattern as 42 | # for all other sizes, but let's exercise this code path as well. 43 | if size == 256*1024: 44 | return cls(start_byte + b"\x00" * (size-1)) 45 | 46 | break_pattern = b"\x00" * 46 + b"\x05\x8b" 47 | 48 | return cls(start_byte + 49 | b"\x00" * (size-1-48) + 50 | break_pattern) 51 | 52 | class ChunkRequestHandler(BaseHTTPRequestHandler): 53 | request_count: ClassVar[defaultdict[str, int]] = defaultdict(int) 54 | misses: ClassVar[Dict[str, int]] = defaultdict(int) 55 | 56 | data: ClassVar[Dict[str, Chunk]] = {} 57 | encryption_key: ClassVar[Optional[bytes]] = None 58 | 59 | CHUNK_PATH_PATTERN = re.compile(r"/([0-9a-f]{4})/(\1[0-9a-f]{60})\.(.*)") 60 | 61 | def do_GET(self): 62 | m = self.CHUNK_PATH_PATTERN.fullmatch(self.path) 63 | if not m: 64 | self.send_error(404) 65 | return 66 | 67 | expected_ext = "cacnk" 68 | if self.__class__.encryption_key is not None: 69 | expected_ext = "cacnk.enc" 70 | 71 | _, cid, ext = m.groups() 72 | if cid not in self.__class__.data or ext != expected_ext: 73 | self.__class__.misses[cid] +=1 74 | 75 | self.send_error(404) 76 | return 77 | 78 | self.__class__.request_count[cid] += 1 79 | 80 | self.send_response(200) 81 | self.end_headers() 82 | 83 | chunk = self.__class__.data[cid] 84 | 85 | data = pyzstd.compress(chunk.data) 86 | if self.__class__.encryption_key is not None: 87 | cipher = ChaCha20.new( 88 | key=self.__class__.encryption_key, 89 | nonce=chunk.digest[0:24] 90 | ) 91 | 92 | data = cipher.encrypt(data) 93 | 94 | self.wfile.write(data) 95 | 96 | @classmethod 97 | def reset(cls): 98 | cls.misses.clear() 99 | cls.request_count.clear() 100 | cls.data = {} 101 | cls.encryption_key = None 102 | 103 | @classmethod 104 | def set_data(cls, chunks: Iterable[Chunk]): 105 | cls.data = { 106 | chunk.hexdigest: chunk for chunk in chunks 107 | } 108 | 109 | @classmethod 110 | def set_encryption(cls, key: bytes): 111 | cls.encryption_key = key 112 | 113 | class Target: 114 | def __init__(self, size: int): 115 | self.f = tempfile.NamedTemporaryFile(mode='wb+', suffix='.img') 116 | self.f.truncate(size) 117 | self.size = size 118 | 119 | def write(self, data: bytes, pos: int = 0): 120 | assert pos + len(data) <= self.size 121 | 122 | self.f.seek(pos) 123 | self.f.write(data) 124 | self.f.flush() 125 | 126 | def path(self) -> Path: 127 | return Path(self.f.name) 128 | 129 | def write_chunks(self, chunks: List[Chunk]): 130 | self.f.seek(0) 131 | for chunk in chunks: 132 | self.f.write(chunk.data) 133 | 134 | self.f.flush() 135 | 136 | def check_chunks(self, chunks: List[Chunk]) -> bool: 137 | self.f.seek(0) 138 | offset = 0 139 | 140 | for chunk in chunks: 141 | l = len(chunk.data) 142 | 143 | td = self.f.read(l) 144 | if td != chunk.data: 145 | logging.error('chunk mismatch at offset %d', offset) 146 | return False 147 | 148 | offset += l 149 | 150 | return True 151 | 152 | def make_caibx(chunks: List[Chunk]) -> tempfile.NamedTemporaryFile: 153 | f = tempfile.NamedTemporaryFile(mode='wb+', suffix='.caibx') 154 | 155 | words: List[Union[int, bytes]] = [] 156 | 157 | # index header 158 | words.extend([ 159 | 0x30, # length 160 | 0x96824d9c7b129ff9, # magic 161 | 0x9000000000000000, # flags 162 | 16*1024, 64*1024, 256*1024 # min/avg/max chunk size 163 | ]) 164 | 165 | # table 166 | words.extend([ 167 | 0xffffffffffffffff, # indefinite length 168 | 0xe75b9e112f17417d, # magic 169 | ]) 170 | 171 | offset = 0 172 | for chunk in chunks: 173 | offset += len(chunk.data) 174 | words.append(offset) 175 | words.append(chunk.digest) 176 | 177 | # footer 178 | words.extend([ 179 | 0, 0, 180 | 0x30, # table offset 181 | 16 + (len(chunks)+1) * 40, 182 | 0x4b4f050e5549ecd1 # end marker 183 | ]) 184 | 185 | for word in words: 186 | if type(word) is int: 187 | f.write(word.to_bytes(length=8, byteorder='little')) 188 | elif type(word) is bytes: 189 | f.write(word) 190 | 191 | f.flush() 192 | 193 | return f 194 | 195 | @pytest.fixture(scope='module') 196 | def chunk_server(): 197 | httpd = HTTPServer(("", 8080), ChunkRequestHandler) 198 | 199 | server_thread = Thread(target=httpd.serve_forever, daemon=True) 200 | server_thread.start() 201 | 202 | yield httpd 203 | 204 | httpd.shutdown() 205 | 206 | def run_csn(caibx: Path, target: Target, 207 | local_store: Optional[Target] = None, local_index: Optional[Path] = None, 208 | http_store: bool = False, http_store_attr: Optional[Dict[str, str]] = None, 209 | should_fail: bool = False): 210 | 211 | cmd = ['csn', str(caibx), str(target.path())] 212 | if local_store: 213 | if local_index: 214 | cmd.append('%s:%s' % (str(local_store.path()), str(local_index))) 215 | else: 216 | cmd.append(str(local_store.path())) 217 | 218 | if http_store: 219 | url = 'http://127.0.0.1:8080' 220 | if http_store_attr: 221 | url += '#' 222 | for k, v in sorted(http_store_attr.items()): 223 | url += '%s=%s' % (k, v) 224 | 225 | cmd.append(url) 226 | 227 | result = subprocess.run(cmd) 228 | if should_fail: 229 | assert result.returncode != 0 230 | else: 231 | assert result.returncode == 0 232 | 233 | def test_regular(chunk_server): 234 | """ 235 | Baseline test just using a single HTTP source 236 | """ 237 | 238 | del chunk_server 239 | 240 | chunks = [ 241 | Chunk.make(32*1024, start_byte = b'\x01'), 242 | Chunk.make(32*1024+1, start_byte = b'\x02'), 243 | Chunk.make(256*1024), 244 | Chunk.make(32*1024, start_byte = b'\x03'), 245 | ] 246 | 247 | ChunkRequestHandler.reset() 248 | ChunkRequestHandler.set_data(chunks) 249 | 250 | caibx = make_caibx(chunks) 251 | 252 | t = Target(1024*1024) 253 | run_csn(Path(caibx.name), t, http_store=True) 254 | assert t.check_chunks(chunks) 255 | 256 | def test_regular_encrypted(chunk_server): 257 | """ 258 | test_regular, but with chunk encryption 259 | """ 260 | 261 | del chunk_server 262 | 263 | chunks = [ 264 | Chunk.make(32*1024, start_byte = b'\x01'), 265 | Chunk.make(32*1024+1, start_byte = b'\x02'), 266 | Chunk.make(256*1024), 267 | Chunk.make(32*1024, start_byte = b'\x03'), 268 | ] 269 | 270 | key = bytearray() 271 | key.extend(range(0, 32)) 272 | key = bytes(key) 273 | 274 | ChunkRequestHandler.reset() 275 | ChunkRequestHandler.set_data(chunks) 276 | ChunkRequestHandler.set_encryption(key) 277 | 278 | caibx = make_caibx(chunks) 279 | 280 | t = Target(1024*1024) 281 | run_csn(Path(caibx.name), t, http_store=True, http_store_attr={ 282 | 'encrypt': 'key:' + key.hex() 283 | }) 284 | assert t.check_chunks(chunks) 285 | 286 | def test_resumption(chunk_server): 287 | """ 288 | Make sure that resuming a synchronization process does not redundantly 289 | fetch data 290 | """ 291 | 292 | del chunk_server 293 | 294 | chunks = [ 295 | Chunk.make(32*1024, start_byte = b'\x01'), 296 | Chunk.make(32*1024, start_byte = b'\x02'), 297 | Chunk.make(32*1024, start_byte = b'\x03'), 298 | ] 299 | 300 | ChunkRequestHandler.reset() 301 | ChunkRequestHandler.set_data(chunks[2:]) 302 | 303 | caibx = make_caibx(chunks) 304 | 305 | t = Target(1024*1024) 306 | 307 | # Simulate a partial update where the first two chunks have already been 308 | # written. 309 | t.write_chunks(chunks[0:2]) 310 | 311 | run_csn(Path(caibx.name), t, http_store=True) 312 | assert t.check_chunks(chunks) 313 | 314 | def test_no_repeated_fetch(chunk_server): 315 | """ 316 | Chunks that are repeated consecutively should not incur HTTP fetches 317 | """ 318 | 319 | del chunk_server 320 | 321 | duplicate = Chunk.make(32*1024, start_byte = b'\x03') 322 | chunks = [ 323 | Chunk.make(32*1024, start_byte = b'\x01'), 324 | Chunk.make(32*1024, start_byte = b'\x02'), 325 | duplicate, 326 | duplicate 327 | ] 328 | 329 | ChunkRequestHandler.reset() 330 | ChunkRequestHandler.set_data(chunks) 331 | 332 | caibx = make_caibx(chunks) 333 | 334 | t = Target(1024*1024) 335 | run_csn(Path(caibx.name), t, http_store=True) 336 | assert t.check_chunks(chunks) 337 | 338 | assert ChunkRequestHandler.request_count[duplicate.hexdigest] == 1 339 | 340 | def test_target_as_source(chunk_server): 341 | """ 342 | When using the target as a source too, even non-consecutive chunk 343 | repetitions should not incur HTTP fetches 344 | """ 345 | 346 | del chunk_server 347 | 348 | duplicate = Chunk.make(32*1024, start_byte = b'\x03') 349 | chunks = [ 350 | Chunk.make(32*1024, start_byte = b'\x01'), 351 | duplicate, 352 | Chunk.make(32*1024, start_byte = b'\x02'), 353 | duplicate 354 | ] 355 | 356 | ChunkRequestHandler.reset() 357 | ChunkRequestHandler.set_data(chunks) 358 | 359 | caibx = make_caibx(chunks) 360 | 361 | t = Target(1024*1024) 362 | run_csn(Path(caibx.name), t, http_store=True, local_store=t) 363 | assert t.check_chunks(chunks) 364 | 365 | assert ChunkRequestHandler.request_count[duplicate.hexdigest] == 1 366 | 367 | def test_chunk_corruption(chunk_server): 368 | """ 369 | Make sure casync-nano catches server-side chunk corruption 370 | """ 371 | 372 | del chunk_server 373 | 374 | chunks = [ 375 | Chunk.make(32*1024, start_byte = b'\x01'), 376 | Chunk.make(32*1024, start_byte = b'\x02'), 377 | Chunk.make(32*1024, start_byte = b'\x03'), 378 | ] 379 | 380 | ChunkRequestHandler.reset() 381 | ChunkRequestHandler.set_data(chunks) 382 | 383 | caibx = make_caibx(chunks) 384 | 385 | # Corrupt the chunk data so that the server provides data that is 386 | # compressed correctly and of the correct length, but doesn't match its 387 | # checksum. 388 | chunks[0].data = b'\xaa' * 32 * 1024 389 | 390 | t = Target(1024*1024) 391 | run_csn(Path(caibx.name), t, http_store=True, should_fail=True) 392 | 393 | def test_ab_regular(chunk_server): 394 | """ 395 | Test that the common A/B update path only fetches changed chunks 396 | """ 397 | 398 | del chunk_server 399 | 400 | chunks_a = [ 401 | Chunk.make(256*1024, start_byte = b'\x01'), 402 | Chunk.make(32*1024, start_byte = b'\x02'), 403 | 404 | Chunk.make(32*1024, start_byte = b'\x03'), 405 | ] 406 | 407 | chunks_b = [ 408 | Chunk.make(256*1024, start_byte = b'\x01'), 409 | Chunk.make(32*1024, start_byte = b'\x02'), 410 | 411 | Chunk.make(32*1024, start_byte = b'\x04'), 412 | ] 413 | 414 | ChunkRequestHandler.reset() 415 | ChunkRequestHandler.set_data(chunks_b[-1:]) 416 | 417 | local = Target(1024*1024) 418 | local.write_chunks(chunks_a) 419 | 420 | caibx = make_caibx(chunks_b) 421 | 422 | t = Target(1024*1024) 423 | run_csn(Path(caibx.name), t, local_store=local, http_store=True) 424 | assert t.check_chunks(chunks_b) 425 | 426 | def test_ab_sideload(chunk_server): 427 | """ 428 | Same as test_ab_regular but with sideloading the index 429 | """ 430 | 431 | del chunk_server 432 | 433 | chunks_a = [ 434 | Chunk.make(256*1024, start_byte = b'\x01'), 435 | Chunk.make(32*1024, start_byte = b'\x02'), 436 | 437 | Chunk.make(32*1024, start_byte = b'\x03'), 438 | ] 439 | 440 | chunks_b = [ 441 | Chunk.make(256*1024, start_byte = b'\x01'), 442 | Chunk.make(32*1024, start_byte = b'\x02'), 443 | 444 | Chunk.make(32*1024, start_byte = b'\x04'), 445 | ] 446 | 447 | ChunkRequestHandler.reset() 448 | ChunkRequestHandler.set_data(chunks_b[-1:]) 449 | 450 | local = Target(1024*1024) 451 | local.write_chunks(chunks_a) 452 | 453 | caibx_a = make_caibx(chunks_a) 454 | caibx_b = make_caibx(chunks_b) 455 | 456 | t = Target(1024*1024) 457 | run_csn(Path(caibx_b.name), t, local_store=local, 458 | local_index=Path(caibx_a.name), http_store=True) 459 | assert t.check_chunks(chunks_b) 460 | 461 | def test_ab_sideload_bitrot(chunk_server): 462 | """ 463 | Same as test_ab_sideload but simulate bitrot in the local chunk store 464 | """ 465 | 466 | del chunk_server 467 | 468 | chunks_a = [ 469 | Chunk.make(32*1024, start_byte = b'\x01'), 470 | Chunk.make(32*1024, start_byte = b'\x02'), 471 | 472 | Chunk.make(32*1024, start_byte = b'\x03'), 473 | ] 474 | 475 | chunks_b = [ 476 | Chunk.make(32*1024, start_byte = b'\x01'), 477 | Chunk.make(32*1024, start_byte = b'\x02'), 478 | 479 | Chunk.make(32*1024, start_byte = b'\x04'), 480 | ] 481 | 482 | ChunkRequestHandler.reset() 483 | ChunkRequestHandler.set_data([chunks_b[0], chunks_b[2]]) 484 | 485 | local = Target(1024*1024) 486 | local.write_chunks(chunks_a) 487 | 488 | # corrupt the first chunk 489 | local.write(b'\xde\xad\xbe\xef', 100) 490 | 491 | caibx_a = make_caibx(chunks_a) 492 | caibx_b = make_caibx(chunks_b) 493 | 494 | t = Target(1024*1024) 495 | run_csn(Path(caibx_b.name), t, local_store=local, 496 | local_index=Path(caibx_a.name), http_store=True) 497 | assert t.check_chunks(chunks_b) 498 | -------------------------------------------------------------------------------- /test/chunker_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "utils.h" 10 | #include "chunker.h" 11 | #include "xorshift32.h" 12 | 13 | static int create_chunker(void **state) 14 | { 15 | *state = test_malloc(sizeof(struct chunker)); 16 | 17 | struct chunker *c = (struct chunker*)*state; 18 | 19 | struct chunker_params params = { 20 | .min_size = CHUNKER_SIZE_AVG_DEFAULT / 4, 21 | .avg_size = CHUNKER_SIZE_AVG_DEFAULT, 22 | .max_size = CHUNKER_SIZE_AVG_DEFAULT * 4, 23 | }; 24 | 25 | chunker_init(c, ¶ms); 26 | 27 | return 0; 28 | } 29 | 30 | static int destroy_chunker(void **state) 31 | { 32 | test_free(*state); 33 | return 0; 34 | } 35 | 36 | #define RANDOM_SEED 0xAFFED00F 37 | #define RANDOM_SIZE (100ull * 1024 * 1024) 38 | static const size_t random_chunk_boundaries[] = { 39 | 243768, 265304, 309942, 409375, 486572, 525729, 575730, 615354, 658735, 707616, 40 | 745028, 807648, 857443, 955152, 974702, 1071516, 1184150, 1290448, 1401104, 1455201, 41 | 1665844, 1705922, 1767765, 1822814, 1880834, 1900815, 1922155, 2095042, 2155294, 42 | 2226503, 2246791, 2348781, 2390659, 2409422, 2429771, 2449277, 2494712, 2555741, 43 | 2584908, 2704206, 2746401, 2977696, 3019812, 3046126, 3098768, 3251838, 3279507, 44 | 3444595, 3470370, 3607304, 3644965, 3814531, 3883513, 3982410, 4023592, 4054169, 45 | 4085987, 4113606, 4161195, 4259960, 4300217, 4399028, 4431796, 4523952, 4674653, 46 | 4716637, 4759628, 4856482, 4927556, 5082318, 5225836, 5331006, 5375730, 5401630, 47 | 5439736, 5599662, 5645047, 5685765, 5739270, 5833978, 5899732, 6020501, 6099506, 48 | 6118819, 6239946, 6266464, 6305618, 6369443, 6393199, 6465630, 6521644, 6598242, 49 | 6619420, 6696468, 6716821, 6736233, 6825801, 6849690, 6894552, 6974605, 6991691, 50 | 7045137, 7116482, 7153120, 7235076, 7267491, 7368424, 7406173, 7443627, 7492706, 51 | 7621063, 7638968, 7792691, 7873114, 7896945, 7951860, 8044247, 8096475, 8183954, 52 | 8221961, 8311131, 8382668, 8475430, 8520060, 8566341, 8592529, 8621358, 8694402, 53 | 8857333, 8897308, 8944492, 8967926, 9072418, 9141548, 9217692, 9327557, 9364954, 54 | 9417768, 9519814, 9569310, 9607327, 9849112, 9869469, 9931277, 9998368, 10121062, 55 | 10255285, 10303758, 10362165, 10409836, 10440730, 10466709, 10499032, 10526969, 56 | 10555272, 10627323, 10713888, 10736652, 10774032, 10855338, 10888397, 11150541, 57 | 11242616, 11446226, 11581253, 11644237, 11699361, 11764855, 11817548, 11945705, 58 | 11971076, 12004845, 12062326, 12093020, 12166696, 12198804, 12336949, 12384838, 59 | 12498123, 12536640, 12572614, 12682177, 12737709, 12766166, 12816298, 12853496, 60 | 13068918, 13161792, 13221396, 13249143, 13372783, 13416217, 13456461, 13511241, 61 | 13563172, 13585286, 13629176, 13668571, 13724459, 13783672, 13877082, 13923609, 62 | 13955979, 14019485, 14074221, 14134242, 14396386, 14453307, 14529905, 14574010, 63 | 14599164, 14647273, 14740570, 14844179, 14905350, 14930987, 15040612, 15071209, 64 | 15149667, 15197274, 15255264, 15339865, 15357450, 15382048, 15448371, 15469299, 65 | 15596273, 15629434, 15649984, 15739632, 15795833, 15921200, 15953032, 15976164, 66 | 16151885, 16203306, 16307391, 16381675, 16457806, 16524860, 16576350, 16647125, 67 | 16758274, 16889736, 16985058, 17070626, 17143871, 17163573, 17222498, 17306765, 68 | 17409286, 17427501, 17497000, 17517257, 17592298, 17669328, 17715288, 17775523, 69 | 17862737, 17934524, 18027022, 18052388, 18069639, 18086553, 18111260, 18172046, 70 | 18379690, 18413756, 18555453, 18589359, 18636214, 18774356, 18845687, 18893834, 71 | 18917458, 18969184, 18999255, 19038030, 19093245, 19144426, 19193088, 19238654, 72 | 19255345, 19294746, 19394058, 19432047, 19585657, 19613158, 19651734, 19751521, 73 | 19772203, 19963203, 19981852, 20069286, 20104136, 20146228, 20262172, 20333558, 74 | 20387114, 20414446, 20546533, 20573193, 20647245, 20733229, 20771238, 20826356, 75 | 20859702, 20877916, 20907144, 20969843, 20992394, 21024470, 21208153, 21267688, 76 | 21308992, 21374920, 21490836, 21515948, 21591484, 21745465, 21878127, 22070496, 77 | 22236221, 22303383, 22339484, 22468278, 22515707, 22559964, 22581082, 22718709, 78 | 22888531, 22996783, 23087481, 23131527, 23222630, 23257383, 23278203, 23374366, 79 | 23395197, 23425987, 23474287, 23571919, 23694401, 23848884, 23932389, 24027110, 80 | 24209458, 24290225, 24323912, 24368313, 24423619, 24593826, 24645867, 24908011, 81 | 24925971, 24961003, 25043152, 25104116, 25204626, 25307135, 25341499, 25404935, 82 | 25450687, 25471353, 25571747, 25617181, 25734464, 25756065, 25849378, 25908511, 83 | 25949386, 26091001, 26353145, 26381441, 26428817, 26501675, 26571182, 26654007, 84 | 26704713, 26744618, 26778898, 26801453, 26873277, 26932974, 26969780, 27030424, 85 | 27063624, 27126899, 27293370, 27319024, 27376052, 27442473, 27477806, 27552991, 86 | 27575240, 27607480, 27703332, 27736458, 27757743, 27889149, 27955043, 28100090, 87 | 28160649, 28205428, 28318694, 28345786, 28443731, 28614131, 28836070, 28973255, 88 | 28995654, 29067144, 29209776, 29351528, 29472986, 29521373, 29556158, 29595752, 89 | 29741977, 29763573, 29818574, 29855482, 29887933, 29923891, 30013763, 30063166, 90 | 30192308, 30229199, 30246414, 30289529, 30319618, 30342361, 30420496, 30481742, 91 | 30539649, 30585582, 30625666, 30677795, 30713298, 30794811, 30853217, 30961366, 92 | 30998971, 31056939, 31114573, 31193007, 31226206, 31244178, 31296797, 31322492, 93 | 31381249, 31429824, 31488811, 31515414, 31564159, 31655237, 31699055, 31828648, 94 | 31910341, 31975355, 32063870, 32129676, 32170387, 32191984, 32211867, 32468631, 95 | 32517099, 32539731, 32627014, 32659671, 32830078, 32866503, 32962267, 32984299, 96 | 33090266, 33125390, 33166894, 33187236, 33219489, 33302357, 33336742, 33429169, 97 | 33448251, 33577905, 33618850, 33787342, 33903799, 33923987, 34011139, 34056597, 98 | 34108327, 34204653, 34234971, 34268018, 34375784, 34488868, 34514917, 34532027, 99 | 34549572, 34649350, 34684814, 34768968, 34809571, 34869115, 34996067, 35110119, 100 | 35130721, 35147717, 35218656, 35243013, 35260324, 35280691, 35418008, 35438043, 101 | 35470559, 35542580, 35677826, 35698815, 35741664, 35819490, 35838061, 35898709, 102 | 35927080, 35949745, 36004707, 36089140, 36215353, 36347748, 36449273, 36468993, 103 | 36492809, 36514695, 36564475, 36608980, 36661494, 36679437, 36718410, 36753978, 104 | 36808871, 36852823, 36871874, 36915370, 36932439, 36995049, 37018166, 37078819, 105 | 37097223, 37117968, 37211443, 37284534, 37487648, 37610572, 37634876, 37685746, 106 | 37742972, 37762524, 37798348, 37832517, 37857395, 37892740, 38081819, 38152312, 107 | 38185356, 38354507, 38413806, 38445314, 38491454, 38621908, 38808299, 38857717, 108 | 38965742, 39017716, 39036242, 39101774, 39183648, 39218299, 39264409, 39449261, 109 | 39533363, 39571078, 39591177, 39612448, 39676727, 39783773, 39868971, 39910199, 110 | 39950993, 40048366, 40070068, 40199841, 40223146, 40242974, 40308980, 40365138, 111 | 40407025, 40485112, 40515256, 40550427, 40572145, 40610742, 40675732, 40781928, 112 | 40890587, 40908309, 40931951, 40980879, 41139911, 41164705, 41295177, 41400590, 113 | 41437807, 41565401, 41664049, 41703705, 41829906, 41874983, 41934789, 42026620, 114 | 42091389, 42109727, 42167188, 42189983, 42225997, 42396542, 42507696, 42537605, 115 | 42561089, 42770618, 42812840, 42880605, 42933039, 42968918, 42992344, 43136802, 116 | 43177275, 43217000, 43322048, 43346772, 43453905, 43471553, 43528683, 43556385, 117 | 43613172, 43632438, 43688423, 43717690, 43780853, 43815396, 43863389, 43913444, 118 | 43949370, 43967961, 44099703, 44323096, 44416790, 44487042, 44504415, 44694666, 119 | 44910908, 45123988, 45179785, 45336256, 45360139, 45386173, 45434444, 45543334, 120 | 45566384, 45622715, 45694535, 45735849, 45790789, 45811309, 45878581, 45926014, 121 | 46047467, 46195852, 46286950, 46331527, 46378283, 46397343, 46423775, 46559707, 122 | 46609522, 46633193, 46700646, 46726551, 46807508, 46854999, 46959920, 46977074, 123 | 47010335, 47058459, 47138972, 47225249, 47258529, 47348441, 47385348, 47419197, 124 | 47475678, 47529096, 47698504, 47802880, 47873780, 48005712, 48034733, 48111179, 125 | 48180636, 48206706, 48268354, 48433800, 48552938, 48579567, 48705251, 48743635, 126 | 48762767, 48825122, 49021838, 49066145, 49086392, 49108768, 49226991, 49271400, 127 | 49305893, 49356173, 49374895, 49401675, 49418815, 49543190, 49560723, 49577763, 128 | 49615104, 49694969, 49795966, 49837389, 49885841, 50006136, 50033296, 50068935, 129 | 50108306, 50330510, 50400973, 50450687, 50483527, 50553024, 50633401, 50662978, 130 | 50763965, 50785963, 50844364, 50949402, 50978961, 51007043, 51052741, 51072276, 131 | 51113337, 51293469, 51314824, 51392638, 51410827, 51448023, 51471158, 51583028, 132 | 51627402, 51674496, 51789353, 51874050, 51926864, 51993607, 52032676, 52076919, 133 | 52135269, 52195006, 52236178, 52364215, 52383437, 52465338, 52571327, 52598638, 134 | 52690610, 52719467, 52878873, 52896331, 52928924, 52972869, 53019465, 53112335, 135 | 53138152, 53190841, 53271347, 53339884, 53357892, 53391083, 53410456, 53442844, 136 | 53461698, 53516682, 53544701, 53611529, 53745871, 54008015, 54072868, 54110210, 137 | 54169610, 54188107, 54243814, 54283095, 54342688, 54427241, 54452663, 54474270, 138 | 54493776, 54523004, 54719363, 54758981, 54858018, 54880585, 54932573, 55015415, 139 | 55213253, 55232405, 55353761, 55374422, 55420003, 55567139, 55683107, 55755959, 140 | 56018103, 56097389, 56173554, 56351881, 56372871, 56520005, 56557817, 56629959, 141 | 56718331, 56805604, 56838544, 56913613, 57071115, 57163387, 57235490, 57368511, 142 | 57397080, 57441010, 57481989, 57507934, 57535941, 57581293, 57609109, 57720657, 143 | 57737646, 57754757, 57781580, 57807801, 57849811, 57941029, 58002452, 58029803, 144 | 58075207, 58200637, 58262406, 58311676, 58410704, 58478120, 58536247, 58642535, 145 | 58676287, 58710845, 58739577, 58854985, 59015604, 59084440, 59183226, 59265159, 146 | 59283757, 59307628, 59336561, 59380325, 59490491, 59512104, 59545531, 59609618, 147 | 59627799, 59676091, 59705675, 59789816, 59875586, 59917614, 59936304, 59964495, 148 | 60033884, 60151825, 60186018, 60218864, 60350030, 60431713, 60450859, 60489915, 149 | 60550004, 60584576, 60650106, 60745805, 60906245, 60941007, 60976258, 60997320, 150 | 61018493, 61047996, 61072887, 61114397, 61192706, 61371238, 61388604, 61461503, 151 | 61477974, 61516424, 61546010, 61630792, 61704942, 61765683, 61797702, 61816994, 152 | 61861467, 61915660, 61960822, 62104108, 62136805, 62175022, 62191417, 62230020, 153 | 62267428, 62310480, 62429066, 62493067, 62630136, 62656327, 62831308, 62909051, 154 | 62961155, 62988445, 63035023, 63100708, 63123263, 63197122, 63229073, 63279182, 155 | 63296712, 63345000, 63439112, 63506072, 63563336, 63699363, 63728798, 63769047, 156 | 63793597, 63864866, 63934040, 63974093, 64036919, 64088774, 64185766, 64216023, 157 | 64248960, 64305124, 64377605, 64410434, 64436103, 64469686, 64632190, 64894334, 158 | 65030655, 65065613, 65112034, 65336229, 65385971, 65483419, 65501773, 65525788, 159 | 65543663, 65664918, 65710537, 65824893, 65966312, 65996355, 66045068, 66173237, 160 | 66214649, 66239467, 66304885, 66348533, 66515500, 66549701, 66636005, 66748888, 161 | 66766198, 66826491, 66901593, 66968368, 67118298, 67144889, 67216352, 67314735, 162 | 67423279, 67464425, 67554353, 67661233, 67684065, 67789810, 67851470, 67871056, 163 | 67947347, 67991400, 68040154, 68093103, 68184189, 68220695, 68316937, 68360313, 164 | 68473350, 68496425, 68540442, 68560932, 68626862, 68644078, 68792398, 68827170, 165 | 68888890, 68918206, 68975600, 69127681, 69169631, 69213767, 69251924, 69268881, 166 | 69294872, 69333681, 69595825, 69662218, 69780215, 69809905, 69832630, 69858061, 167 | 70018173, 70050268, 70226675, 70257826, 70279637, 70311029, 70546289, 70699789, 168 | 70793355, 70812249, 70867141, 70929853, 70949019, 71111031, 71190131, 71218664, 169 | 71262061, 71326665, 71359776, 71382846, 71412891, 71467179, 71501472, 71665289, 170 | 71732508, 71774006, 71814720, 71917360, 71998823, 72057933, 72077982, 72120518, 171 | 72141118, 72189998, 72208834, 72241047, 72320619, 72379380, 72480075, 72516680, 172 | 72542935, 72637020, 72705824, 72753730, 72777218, 73039362, 73169216, 73306433, 173 | 73354611, 73401730, 73420340, 73465113, 73495920, 73557651, 73693897, 73735519, 174 | 73798352, 73839315, 73903177, 73986080, 74075482, 74106713, 74213050, 74231675, 175 | 74289595, 74341930, 74391522, 74420264, 74457822, 74494342, 74554155, 74696332, 176 | 74724172, 74770346, 74812163, 74846180, 74872831, 74937792, 74986869, 75130796, 177 | 75158399, 75209312, 75298641, 75379382, 75497080, 75550494, 75587266, 75671584, 178 | 75710215, 75785736, 75929527, 75974898, 76058956, 76321100, 76394554, 76418271, 179 | 76537864, 76604848, 76651032, 76709968, 76794244, 77051786, 77154916, 77185223, 180 | 77302562, 77363139, 77525132, 77587367, 77661105, 77691089, 77713844, 77778031, 181 | 77839782, 77959914, 77984147, 78019215, 78058962, 78103129, 78173006, 78223638, 182 | 78263804, 78333884, 78391246, 78519583, 78550312, 78589911, 78634821, 78727597, 183 | 78818915, 78838052, 78898246, 78920767, 78939958, 79157167, 79183071, 79374385, 184 | 79423807, 79448753, 79534923, 79565229, 79667259, 79759868, 79943755, 79966440, 185 | 79988089, 80022736, 80049451, 80079494, 80096052, 80140066, 80174767, 80200707, 186 | 80282851, 80394785, 80462283, 80494896, 80521918, 80544504, 80590689, 80671735, 187 | 80933879, 81082359, 81206527, 81232473, 81280888, 81339924, 81505297, 81577229, 188 | 81596851, 81650173, 81667410, 81698846, 81721839, 81777782, 81830567, 81897032, 189 | 81940474, 82025390, 82131203, 82151975, 82235241, 82268069, 82413074, 82464704, 190 | 82600117, 82699326, 82720927, 82773118, 82830330, 82961205, 83043818, 83305962, 191 | 83327924, 83462443, 83504264, 83565947, 83603373, 83644237, 83673351, 83710268, 192 | 83798183, 83877121, 83941969, 83967914, 83994044, 84012958, 84046204, 84067471, 193 | 84098614, 84116502, 84133968, 84396112, 84551984, 84568387, 84590227, 84670572, 194 | 84721238, 84791562, 84933338, 84961876, 84995961, 85012577, 85063106, 85136609, 195 | 85154804, 85183544, 85302831, 85350697, 85384895, 85424312, 85461953, 85541199, 196 | 85610213, 85629088, 85676115, 85780789, 85938122, 86046405, 86091417, 86134899, 197 | 86171172, 86190893, 86224095, 86351465, 86476878, 86532067, 86584408, 86639993, 198 | 86675652, 86778143, 86800312, 86834512, 86851814, 86924590, 86977726, 87008229, 199 | 87148414, 87172154, 87204696, 87347804, 87563189, 87677946, 87702489, 87726967, 200 | 87848148, 87895316, 87969615, 88000317, 88066914, 88314340, 88346651, 88480272, 201 | 88540740, 88604355, 88866499, 88988073, 89004486, 89023410, 89057642, 89150028, 202 | 89174564, 89206731, 89232093, 89272556, 89316380, 89338882, 89359288, 89508252, 203 | 89539902, 89595604, 89677851, 89722676, 89818027, 90038675, 90061555, 90130374, 204 | 90189874, 90237195, 90293140, 90310944, 90357535, 90453624, 90533506, 90557971, 205 | 90579335, 90608392, 90661366, 90715319, 90741214, 90772937, 90863809, 90903970, 206 | 90992650, 91043911, 91144060, 91161233, 91266191, 91286808, 91443239, 91478899, 207 | 91504712, 91562022, 91615262, 91664702, 91703729, 91758544, 91854625, 91960427, 208 | 91988977, 92021523, 92092413, 92225544, 92285585, 92339976, 92358249, 92405885, 209 | 92476885, 92532594, 92572380, 92589278, 92638828, 92668130, 92765436, 92784615, 210 | 92812917, 92876803, 92899306, 93108223, 93172389, 93290988, 93379684, 93421940, 211 | 93439674, 93517407, 93538223, 93589846, 93739198, 93829164, 93915919, 94059124, 212 | 94078766, 94143431, 94267612, 94329802, 94412782, 94502384, 94534660, 94624372, 213 | 94648822, 94675613, 94700980, 94756065, 94792892, 94881587, 94968956, 94988908, 214 | 95037583, 95085274, 95184444, 95224724, 95252103, 95307003, 95393612, 95430038, 215 | 95480614, 95512017, 95581473, 95617515, 95734758, 95843676, 95861657, 95909106, 216 | 96023266, 96165582, 96361994, 96392485, 96416205, 96483954, 96503140, 96522059, 217 | 96562279, 96579217, 96607553, 96814063, 96885373, 96965146, 97043022, 97101196, 218 | 97155521, 97186129, 97249636, 97285970, 97313768, 97412608, 97432881, 97656423, 219 | 97839676, 97876197, 97974491, 98236635, 98267546, 98345548, 98366257, 98485018, 220 | 98513197, 98531886, 98672120, 98716799, 98849044, 98875698, 98920192, 98960464, 221 | 99058776, 99203324, 99225122, 99278342, 99340912, 99392712, 99474951, 99550054, 222 | 99618707, 99667688, 99722262, 99807398, 99904654, 99968201, 100034455, 100075897, 223 | 100096216, 100203280, 100261342, 100434041, 100478276, 100565262, 100620551, 224 | 100715606, 100735365, 100893047, 100921632, 100953547, 100979933, 101066306, 225 | 101135513, 101207970, 101227748, 101393798, 101428271, 101584341, 101625105, 226 | 101727433, 101753597, 101781789, 101839805, 101901027, 102084417, 102106963, 227 | 102205320, 102226555, 102315711, 102392229, 102459090, 102533845, 102565931, 228 | 102594126, 102654465, 102682127, 102751117, 102790827, 102834545, 102942563, 229 | 103018495, 103051028, 103102577, 103148551, 103187170, 103220498, 103250280, 230 | 103358188, 103455347, 103679477, 103696462, 103825978, 103843772, 103894750, 231 | 103977485, 104011964, 104043373, 104195503, 104235611, 104323736, 104378093, 232 | 104476988, 104494831, 104519439, 104546306, 104620983, 104714776, 104735252 233 | }; 234 | 235 | static const uint8_t random_chunk_ids[] = { 236 | 0xda, 0x39, 0x94, 0xf1, 0x3b, 0x17, 0xbc, 0xea, 0x35, 0x43, 0x52, 0x05, 0x50, 0x5f, 0x59, 0xf4, 237 | 0x10, 0x2e, 0xa9, 0xe5, 0x5f, 0x9c, 0x82, 0x83, 0x13, 0xd3, 0xd9, 0xb5, 0xcc, 0xc9, 0xf9, 0x55, 238 | 239 | 0x34, 0xae, 0x40, 0xbf, 0x5b, 0xbe, 0xe5, 0x88, 0x44, 0x4e, 0x9d, 0x5c, 0xf6, 0x28, 0xad, 0x5f, 240 | 0xc3, 0x98, 0x32, 0xee, 0xca, 0xf9, 0x9e, 0x0d, 0x70, 0xd8, 0x4d, 0xf3, 0x22, 0x88, 0x29, 0x69, 241 | 242 | 0xf0, 0x46, 0x65, 0xd8, 0xf8, 0x88, 0x1a, 0x00, 0x8e, 0x72, 0x4e, 0xaf, 0x17, 0x7b, 0x91, 0x35, 243 | 0xb8, 0x12, 0xc2, 0xd0, 0x1a, 0x6e, 0x9b, 0x49, 0xd6, 0xbb, 0xa2, 0x5f, 0xcd, 0x42, 0xf3, 0x99, 244 | 245 | 0x69, 0xea, 0xc8, 0x79, 0xba, 0xc3, 0x61, 0xe0, 0x79, 0x33, 0xea, 0x8e, 0xe6, 0xc2, 0x49, 0x8f, 246 | 0x10, 0xbd, 0x3d, 0x20, 0xbc, 0x7d, 0xa2, 0x15, 0x4e, 0xd4, 0x2e, 0x72, 0x6d, 0xa4, 0xd5, 0x36, 247 | 248 | 0x77, 0xb6, 0x99, 0x03, 0x2f, 0x19, 0xcb, 0x7f, 0xd4, 0xd9, 0xd7, 0xaf, 0x69, 0x19, 0x56, 0x86, 249 | 0x24, 0xca, 0xad, 0x4a, 0xab, 0x51, 0x88, 0x97, 0x1e, 0x82, 0x06, 0x26, 0xf3, 0x6a, 0xf5, 0x7f, 250 | 251 | 0xbb, 0x16, 0x97, 0x5f, 0xf7, 0xc0, 0x31, 0x59, 0x76, 0x8e, 0xa2, 0xb0, 0xd3, 0x76, 0x6f, 0x47, 252 | 0xcb, 0xc9, 0xa7, 0x08, 0x3c, 0x83, 0xcf, 0xfb, 0x5c, 0xc4, 0x14, 0x6f, 0xb2, 0xa7, 0x2c, 0xdc, 253 | 254 | 0xb7, 0xde, 0x32, 0xe7, 0x09, 0x1d, 0xb6, 0x70, 0x5e, 0xac, 0x0a, 0x53, 0x59, 0xf1, 0x61, 0x41, 255 | 0x76, 0x53, 0xdd, 0x96, 0xb4, 0x86, 0x43, 0x60, 0x5a, 0x0c, 0x61, 0x94, 0x72, 0xa3, 0xcb, 0xbe, 256 | 257 | 0x79, 0x30, 0x0f, 0x8d, 0x44, 0x57, 0xe8, 0xff, 0x85, 0x1a, 0xd3, 0x86, 0xf9, 0x21, 0x09, 0x8e, 258 | 0x29, 0x0d, 0xd4, 0xc2, 0x46, 0x4e, 0x0e, 0xde, 0xe7, 0x8d, 0xb8, 0xdf, 0x9b, 0x19, 0xc1, 0xdb, 259 | 260 | 0x36, 0x79, 0x26, 0xc5, 0xf0, 0xc0, 0x9f, 0x05, 0x6a, 0x20, 0x17, 0x0a, 0xec, 0x2d, 0xed, 0x8a, 261 | 0xc9, 0xac, 0x43, 0x66, 0x3e, 0xf2, 0x18, 0x45, 0x54, 0xf3, 0x9e, 0xcd, 0xe0, 0x6f, 0xa2, 0xb0, 262 | 263 | 0x87, 0xec, 0x0a, 0xae, 0x64, 0x3e, 0x6d, 0x04, 0x62, 0xba, 0xb1, 0x64, 0x19, 0xa8, 0x37, 0xf4, 264 | 0x98, 0xc3, 0x14, 0x37, 0xeb, 0xc4, 0x8a, 0x69, 0x40, 0x57, 0xe1, 0x5b, 0xcd, 0x89, 0xa2, 0xb5, 265 | 266 | 0x0f, 0x73, 0xe6, 0x02, 0x2f, 0x67, 0xda, 0xed, 0xe8, 0x71, 0x45, 0x9b, 0x1c, 0xa5, 0x07, 0xb1, 267 | 0x82, 0xf2, 0xf3, 0x00, 0x60, 0xc8, 0xc9, 0x49, 0x1c, 0x44, 0xe9, 0xd2, 0x6d, 0xaa, 0xfd, 0xed, 268 | 269 | 0xc0, 0x0b, 0x8f, 0x6e, 0xdc, 0xd5, 0x16, 0x4a, 0x59, 0xc1, 0x4a, 0xc8, 0xbf, 0xf1, 0xe9, 0xbf, 270 | 0x93, 0x35, 0x07, 0x6a, 0x69, 0x82, 0xb8, 0xe0, 0x48, 0x5b, 0xeb, 0xd4, 0xab, 0xea, 0x2f, 0x94, 271 | 272 | 0x84, 0x5d, 0x16, 0x6f, 0x2d, 0x3a, 0xa6, 0x65, 0x09, 0x57, 0x06, 0x41, 0xa7, 0x98, 0xd1, 0xd9, 273 | 0x71, 0x3a, 0x5c, 0x64, 0xd6, 0xa2, 0xee, 0xda, 0xf8, 0xde, 0xf3, 0x31, 0x6f, 0x6e, 0x5b, 0xd1, 274 | 275 | 0x55, 0xf4, 0x21, 0xdf, 0xe1, 0xe8, 0x9e, 0x57, 0x8d, 0xbd, 0x54, 0x2b, 0x11, 0x9f, 0x1f, 0x51, 276 | 0x26, 0x36, 0x8f, 0x20, 0xc9, 0x5b, 0x48, 0x0b, 0x3e, 0xf1, 0x99, 0x18, 0xa7, 0x13, 0xf1, 0x73, 277 | 278 | 0xba, 0x6e, 0x4a, 0x13, 0xf4, 0x4e, 0xfc, 0xa2, 0x8f, 0x9f, 0x25, 0x93, 0x4e, 0x71, 0x70, 0x89, 279 | 0x7f, 0xd8, 0xcf, 0x02, 0x2b, 0x1c, 0x6a, 0x8f, 0x01, 0x3a, 0x0a, 0x60, 0xc7, 0x75, 0x84, 0xfe, 280 | 281 | 0xc5, 0x3e, 0x08, 0x6a, 0x65, 0xc9, 0xda, 0xb5, 0x02, 0x78, 0xb6, 0x96, 0x1b, 0x17, 0xa1, 0xdb, 282 | 0x48, 0x53, 0xa1, 0x89, 0x41, 0x56, 0xa7, 0x1e, 0x01, 0x5c, 0x7a, 0xcc, 0x71, 0x3b, 0x7f, 0xcb, 283 | 284 | 0xdb, 0xb6, 0x39, 0x36, 0xc7, 0x54, 0xb6, 0xe9, 0x4d, 0x5d, 0x57, 0x4d, 0x69, 0xae, 0xa6, 0xe0, 285 | 0xfb, 0xad, 0xd6, 0x06, 0x00, 0xe8, 0x7c, 0xc6, 0xbe, 0x17, 0xb3, 0xeb, 0xa1, 0x13, 0xc5, 0xe6, 286 | }; 287 | 288 | /* Verify the calculated chunk boundaries and IDs against what the regular 289 | * casync tool calculates for some random data. 290 | */ 291 | static void random_chunking(void **state) 292 | { 293 | struct chunker *c = (struct chunker*)*state; 294 | 295 | struct xorshift32_state rng = XORSHIFT32_INIT(RANDOM_SEED); 296 | size_t offset = 0; 297 | 298 | size_t boundary_idx = 0; 299 | size_t id_idx = 0; 300 | 301 | uint8_t buf[512]; 302 | size_t buf_fill = 0; 303 | 304 | uint8_t id[CHUNK_ID_LEN]; 305 | u_build_assert(sizeof(random_chunk_ids) % CHUNK_ID_LEN == 0); 306 | 307 | while (offset < RANDOM_SIZE) { 308 | if (buf_fill == 0) { 309 | xorshift32_fill(&rng, buf, sizeof(buf)); 310 | buf_fill = sizeof(buf); 311 | } 312 | 313 | size_t ret; 314 | ret = chunker_scan(c, buf, buf_fill); 315 | if (ret == (size_t)-1) { 316 | offset += buf_fill; 317 | buf_fill = 0; 318 | 319 | continue; 320 | } 321 | 322 | assert_true(boundary_idx < ARRAY_SIZE(random_chunk_boundaries)); 323 | 324 | offset += ret; 325 | if (random_chunk_boundaries[boundary_idx] != offset) 326 | fail_msg("chunk boundary mismatch at index %zu: Expected %zu, got %zu.", 327 | boundary_idx, random_chunk_boundaries[boundary_idx], offset); 328 | 329 | boundary_idx++; 330 | 331 | if (id_idx < sizeof(random_chunk_ids)) 332 | { 333 | chunker_get_id(c, id); 334 | assert_memory_equal(id, &random_chunk_ids[id_idx], CHUNK_ID_LEN); 335 | 336 | id_idx += CHUNK_ID_LEN; 337 | } 338 | 339 | chunker_reset(c); 340 | memmove(buf, &buf[ret], buf_fill-ret); 341 | buf_fill -= ret; 342 | } 343 | 344 | uint8_t final_chunk_id[] = { 345 | 0x88, 0x2a, 0xb0, 0x7f, 0x13, 0xe3, 0x4f, 0x07, 0x10, 0x91, 0xf0, 0xa3, 0x54, 0xf7, 0x16, 0x4a, 346 | 0xb8, 0x34, 0xaa, 0x50, 0x0e, 0xe1, 0x2b, 0x2f, 0x2f, 0xb1, 0x83, 0x93, 0x8e, 0xf5, 0x3d, 0xef 347 | }; 348 | 349 | chunker_get_id(c, id); 350 | assert_memory_equal(id, final_chunk_id, CHUNK_ID_LEN); 351 | 352 | assert_int_equal(boundary_idx, ARRAY_SIZE(random_chunk_boundaries)); 353 | } 354 | 355 | /* After encountering a chunk boundary, we should not consume any more data until we are reset 356 | */ 357 | static void stuck_after_chunk(void **state) 358 | { 359 | struct chunker *c = (struct chunker*)*state; 360 | 361 | uint8_t buf[] = "Hello, world!"; 362 | size_t ret; 363 | 364 | do { 365 | ret = chunker_scan(c, buf, sizeof(buf)); 366 | } while (ret == (size_t)-1); 367 | 368 | assert_int_equal(chunker_scan(c, buf, sizeof(buf)), 0); 369 | 370 | chunker_reset(c); 371 | assert_int_not_equal(chunker_scan(c, buf, sizeof(buf)), 0); 372 | } 373 | 374 | static void discriminator(void **state) 375 | { 376 | struct chunker *c = (struct chunker*)*state; 377 | 378 | /* We have a build-time check for the discriminator calculation, but 379 | * let's also do this at runtime to make sure this also works on the 380 | * target arch, should the tests be run there. 381 | */ 382 | 383 | assert_int_equal(c->discriminator, 0xc17f); 384 | } 385 | 386 | /* Check that we don't mess anything up if we don't fill up the window on the 387 | * first call after a reset. 388 | */ 389 | static void partial_window_feed(void **state) 390 | { 391 | struct chunker *c = (struct chunker*)*state; 392 | 393 | struct xorshift32_state rng = XORSHIFT32_INIT(RANDOM_SEED); 394 | uint8_t buf[128]; 395 | size_t offset = 0; 396 | xorshift32_fill(&rng, buf, sizeof(buf)); 397 | 398 | const uint32_t good_hash = 0xec7de6d0; 399 | 400 | u_build_assert(sizeof(buf) > CHUNKER_WINDOW_SIZE); 401 | 402 | #define RESET() do { chunker_reset(c); offset = 0; } while (0) 403 | #define FEED(amount) do { assert_int_equal(chunker_scan(c, &buf[offset], amount), (size_t)-1); offset += amount;} while (0) 404 | #define FEED_REST() FEED(sizeof(buf) - offset) 405 | #define CHECK() assert_int_equal(c->h, good_hash) 406 | 407 | RESET(); 408 | FEED_REST(); 409 | CHECK(); 410 | 411 | RESET(); 412 | FEED(0); FEED_REST(); FEED(0); 413 | CHECK(); 414 | 415 | RESET(); 416 | FEED(1); FEED_REST(); 417 | CHECK(); 418 | 419 | RESET(); 420 | FEED(CHUNKER_WINDOW_SIZE - 1); FEED_REST(); 421 | CHECK(); 422 | 423 | RESET(); 424 | FEED(CHUNKER_WINDOW_SIZE - 1); FEED(1); FEED_REST(); 425 | CHECK(); 426 | 427 | RESET(); 428 | FEED(CHUNKER_WINDOW_SIZE - 1); FEED(1); FEED(1); FEED_REST(); 429 | CHECK(); 430 | 431 | RESET(); 432 | FEED(CHUNKER_WINDOW_SIZE); FEED(1); FEED_REST(); 433 | CHECK(); 434 | 435 | RESET(); 436 | FEED(CHUNKER_WINDOW_SIZE); FEED_REST(); 437 | CHECK(); 438 | #undef RESET 439 | #undef FEED 440 | #undef FEED_REST 441 | #undef CHECK 442 | } 443 | 444 | int main(void) 445 | { 446 | const struct CMUnitTest tests[] = { 447 | cmocka_unit_test_setup_teardown(random_chunking, create_chunker, destroy_chunker), 448 | cmocka_unit_test_setup_teardown(stuck_after_chunk, create_chunker, destroy_chunker), 449 | cmocka_unit_test_setup_teardown(discriminator, create_chunker, destroy_chunker), 450 | cmocka_unit_test_setup_teardown(partial_window_feed, create_chunker, destroy_chunker), 451 | }; 452 | 453 | u_log_init(); 454 | 455 | return cmocka_run_group_tests_name("chunker", tests, NULL, NULL); 456 | } 457 | --------------------------------------------------------------------------------