├── .github └── workflows │ └── rust.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── examples ├── cache.rs ├── import.rs ├── restore.rs └── stress.rs ├── src ├── cache │ ├── async_tracker.rs │ ├── mod.rs │ ├── sqlite_tracker.rs │ └── tests.rs ├── cidbytes.rs ├── db.rs ├── error.rs ├── lib.rs ├── tests.rs └── transaction.rs ├── tables.sql ├── test-data ├── broken.sqlite └── mini.sqlite └── tests └── stress.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | *.sqlite 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | This changelog was started sometime after the 0.7 release. 4 | 5 | ## Release 0.13 6 | 7 | - update to libipld 0.14 and multihash 0.16 8 | 9 | ## Release 0.12 10 | 11 | - fix some lifetimes and ownership issues to ensure that transaction retries cannot yield corrupt results 12 | 13 | ## Release 0.11 14 | 15 | - automatically upgrade DB schema to new foreign key definitions 16 | 17 | **This makes downgrades to 0.10 impossible.** 18 | 19 | 0.11.1: permit more schema differences when upgrading (case insensitive, unique vs. primary key, autoincrement) 20 | 21 | 0.11.3: split up transaction for cleaning up CIDs to avoid blocking the DB for long times 22 | 23 | 0.11.4: improve logging of startup latencies and only do integrity check after schema change 24 | 25 | 0.11.5: truncate WAL before and after GC and give more information about DB stats 26 | 27 | ## Release 0.10 28 | 29 | - update to `rusqlite` version 0.26 30 | 31 | 0.10.1: BROKEN 32 | 33 | 0.10.2: reinstate behaviour of cleaning up unreferenced CIDs 34 | 35 | 0.10.3: remove some exponential runtime traps for pathologically linked DAGs 36 | 37 | 0.10.4: make it possible again to use standalone `temp_pin()` (i.e. remove broken foreign key constraint) 38 | 39 | 0.10.5: make GC logging less obnoxious 40 | 41 | 0.10.6: fix CID cleanup to never remove temp_pins 42 | 43 | 0.10.7: fix GC to actually remove blocks from the refs table 44 | 45 | ## Release 0.9 46 | 47 | - use `unblock_notify` feature of `rusqlite` to allow concurrent transactions to the same DB 48 | - add `addtional_connection` function to obtain connections for concurrent use within the same process 49 | - make initial recomputation of storage size stats concurrent, since it needs to read all the blocks 50 | - always provide context in error return values 51 | - make GC fully concurrent, with locks taken only for brief periods — it is now reasonable to always run a full GC 52 | 53 | ## Release 0.8 54 | 55 | - no changes from 0.7 except for updating to `rusqlite` version 0.26 (which was reverted in 0.9; the proper update was only done in 0.10) 56 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.17.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "b9ecd88a8c8378ca913a680cd98f0f13ac67383d35993f86c90a70e3f137816b" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler" 16 | version = "1.0.2" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 19 | 20 | [[package]] 21 | name = "ahash" 22 | version = "0.7.6" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" 25 | dependencies = [ 26 | "getrandom", 27 | "once_cell", 28 | "version_check", 29 | ] 30 | 31 | [[package]] 32 | name = "ansi_term" 33 | version = "0.12.1" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" 36 | dependencies = [ 37 | "winapi", 38 | ] 39 | 40 | [[package]] 41 | name = "anyhow" 42 | version = "1.0.52" 43 | source = "registry+https://github.com/rust-lang/crates.io-index" 44 | checksum = "84450d0b4a8bd1ba4144ce8ce718fbc5d071358b1e5384bace6536b3d1f2d5b3" 45 | dependencies = [ 46 | "backtrace", 47 | ] 48 | 49 | [[package]] 50 | name = "async-trait" 51 | version = "0.1.52" 52 | source = "registry+https://github.com/rust-lang/crates.io-index" 53 | checksum = "061a7acccaa286c011ddc30970520b98fa40e00c9d644633fb26b5fc63a265e3" 54 | dependencies = [ 55 | "proc-macro2", 56 | "quote", 57 | "syn", 58 | ] 59 | 60 | [[package]] 61 | name = "autocfg" 62 | version = "1.1.0" 63 | source = "registry+https://github.com/rust-lang/crates.io-index" 64 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 65 | 66 | [[package]] 67 | name = "backtrace" 68 | version = "0.3.64" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | checksum = "5e121dee8023ce33ab248d9ce1493df03c3b38a659b240096fcbd7048ff9c31f" 71 | dependencies = [ 72 | "addr2line", 73 | "cc", 74 | "cfg-if", 75 | "libc", 76 | "miniz_oxide", 77 | "object", 78 | "rustc-demangle", 79 | ] 80 | 81 | [[package]] 82 | name = "base-x" 83 | version = "0.2.8" 84 | source = "registry+https://github.com/rust-lang/crates.io-index" 85 | checksum = "a4521f3e3d031370679b3b140beb36dfe4801b09ac77e30c61941f97df3ef28b" 86 | 87 | [[package]] 88 | name = "bitflags" 89 | version = "1.3.2" 90 | source = "registry+https://github.com/rust-lang/crates.io-index" 91 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 92 | 93 | [[package]] 94 | name = "block-buffer" 95 | version = "0.10.3" 96 | source = "registry+https://github.com/rust-lang/crates.io-index" 97 | checksum = "69cce20737498f97b993470a6e536b8523f0af7892a4f928cceb1ac5e52ebe7e" 98 | dependencies = [ 99 | "generic-array", 100 | ] 101 | 102 | [[package]] 103 | name = "byteorder" 104 | version = "1.4.3" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" 107 | 108 | [[package]] 109 | name = "bytes" 110 | version = "1.1.0" 111 | source = "registry+https://github.com/rust-lang/crates.io-index" 112 | checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" 113 | 114 | [[package]] 115 | name = "cached" 116 | version = "0.30.0" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "af4dfac631a8e77b2f327f7852bb6172771f5279c4512efe79fad6067b37be3d" 119 | dependencies = [ 120 | "hashbrown", 121 | "once_cell", 122 | ] 123 | 124 | [[package]] 125 | name = "cc" 126 | version = "1.0.72" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee" 129 | 130 | [[package]] 131 | name = "cfg-if" 132 | version = "1.0.0" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 135 | 136 | [[package]] 137 | name = "cid" 138 | version = "0.8.6" 139 | source = "registry+https://github.com/rust-lang/crates.io-index" 140 | checksum = "f6ed9c8b2d17acb8110c46f1da5bf4a696d745e1474a16db0cd2b49cd0249bf2" 141 | dependencies = [ 142 | "core2", 143 | "multibase", 144 | "multihash", 145 | "serde", 146 | "unsigned-varint", 147 | ] 148 | 149 | [[package]] 150 | name = "convert_case" 151 | version = "0.4.0" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" 154 | 155 | [[package]] 156 | name = "core2" 157 | version = "0.4.0" 158 | source = "registry+https://github.com/rust-lang/crates.io-index" 159 | checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" 160 | dependencies = [ 161 | "memchr", 162 | ] 163 | 164 | [[package]] 165 | name = "cpufeatures" 166 | version = "0.2.1" 167 | source = "registry+https://github.com/rust-lang/crates.io-index" 168 | checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" 169 | dependencies = [ 170 | "libc", 171 | ] 172 | 173 | [[package]] 174 | name = "crypto-common" 175 | version = "0.1.6" 176 | source = "registry+https://github.com/rust-lang/crates.io-index" 177 | checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" 178 | dependencies = [ 179 | "generic-array", 180 | "typenum", 181 | ] 182 | 183 | [[package]] 184 | name = "data-encoding" 185 | version = "2.3.2" 186 | source = "registry+https://github.com/rust-lang/crates.io-index" 187 | checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57" 188 | 189 | [[package]] 190 | name = "data-encoding-macro" 191 | version = "0.1.12" 192 | source = "registry+https://github.com/rust-lang/crates.io-index" 193 | checksum = "86927b7cd2fe88fa698b87404b287ab98d1a0063a34071d92e575b72d3029aca" 194 | dependencies = [ 195 | "data-encoding", 196 | "data-encoding-macro-internal", 197 | ] 198 | 199 | [[package]] 200 | name = "data-encoding-macro-internal" 201 | version = "0.1.10" 202 | source = "registry+https://github.com/rust-lang/crates.io-index" 203 | checksum = "a5bbed42daaa95e780b60a50546aa345b8413a1e46f9a40a12907d3598f038db" 204 | dependencies = [ 205 | "data-encoding", 206 | "syn", 207 | ] 208 | 209 | [[package]] 210 | name = "derive_more" 211 | version = "0.99.17" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" 214 | dependencies = [ 215 | "convert_case", 216 | "proc-macro2", 217 | "quote", 218 | "rustc_version", 219 | "syn", 220 | ] 221 | 222 | [[package]] 223 | name = "digest" 224 | version = "0.10.5" 225 | source = "registry+https://github.com/rust-lang/crates.io-index" 226 | checksum = "adfbc57365a37acbd2ebf2b64d7e69bb766e2fea813521ed536f5d0520dcf86c" 227 | dependencies = [ 228 | "block-buffer", 229 | "crypto-common", 230 | ] 231 | 232 | [[package]] 233 | name = "either" 234 | version = "1.6.1" 235 | source = "registry+https://github.com/rust-lang/crates.io-index" 236 | checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" 237 | 238 | [[package]] 239 | name = "fallible-iterator" 240 | version = "0.2.0" 241 | source = "registry+https://github.com/rust-lang/crates.io-index" 242 | checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" 243 | 244 | [[package]] 245 | name = "fallible-streaming-iterator" 246 | version = "0.1.9" 247 | source = "registry+https://github.com/rust-lang/crates.io-index" 248 | checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" 249 | 250 | [[package]] 251 | name = "fastrand" 252 | version = "1.6.0" 253 | source = "registry+https://github.com/rust-lang/crates.io-index" 254 | checksum = "779d043b6a0b90cc4c0ed7ee380a6504394cee7efd7db050e3774eee387324b2" 255 | dependencies = [ 256 | "instant", 257 | ] 258 | 259 | [[package]] 260 | name = "fnv" 261 | version = "1.0.7" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 264 | 265 | [[package]] 266 | name = "fuchsia-cprng" 267 | version = "0.1.1" 268 | source = "registry+https://github.com/rust-lang/crates.io-index" 269 | checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" 270 | 271 | [[package]] 272 | name = "futures" 273 | version = "0.3.19" 274 | source = "registry+https://github.com/rust-lang/crates.io-index" 275 | checksum = "28560757fe2bb34e79f907794bb6b22ae8b0e5c669b638a1132f2592b19035b4" 276 | dependencies = [ 277 | "futures-channel", 278 | "futures-core", 279 | "futures-executor", 280 | "futures-io", 281 | "futures-sink", 282 | "futures-task", 283 | "futures-util", 284 | ] 285 | 286 | [[package]] 287 | name = "futures-channel" 288 | version = "0.3.19" 289 | source = "registry+https://github.com/rust-lang/crates.io-index" 290 | checksum = "ba3dda0b6588335f360afc675d0564c17a77a2bda81ca178a4b6081bd86c7f0b" 291 | dependencies = [ 292 | "futures-core", 293 | "futures-sink", 294 | ] 295 | 296 | [[package]] 297 | name = "futures-core" 298 | version = "0.3.19" 299 | source = "registry+https://github.com/rust-lang/crates.io-index" 300 | checksum = "d0c8ff0461b82559810cdccfde3215c3f373807f5e5232b71479bff7bb2583d7" 301 | 302 | [[package]] 303 | name = "futures-executor" 304 | version = "0.3.19" 305 | source = "registry+https://github.com/rust-lang/crates.io-index" 306 | checksum = "29d6d2ff5bb10fb95c85b8ce46538a2e5f5e7fdc755623a7d4529ab8a4ed9d2a" 307 | dependencies = [ 308 | "futures-core", 309 | "futures-task", 310 | "futures-util", 311 | ] 312 | 313 | [[package]] 314 | name = "futures-io" 315 | version = "0.3.19" 316 | source = "registry+https://github.com/rust-lang/crates.io-index" 317 | checksum = "b1f9d34af5a1aac6fb380f735fe510746c38067c5bf16c7fd250280503c971b2" 318 | 319 | [[package]] 320 | name = "futures-macro" 321 | version = "0.3.19" 322 | source = "registry+https://github.com/rust-lang/crates.io-index" 323 | checksum = "6dbd947adfffb0efc70599b3ddcf7b5597bb5fa9e245eb99f62b3a5f7bb8bd3c" 324 | dependencies = [ 325 | "proc-macro2", 326 | "quote", 327 | "syn", 328 | ] 329 | 330 | [[package]] 331 | name = "futures-sink" 332 | version = "0.3.19" 333 | source = "registry+https://github.com/rust-lang/crates.io-index" 334 | checksum = "e3055baccb68d74ff6480350f8d6eb8fcfa3aa11bdc1a1ae3afdd0514617d508" 335 | 336 | [[package]] 337 | name = "futures-task" 338 | version = "0.3.19" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "6ee7c6485c30167ce4dfb83ac568a849fe53274c831081476ee13e0dce1aad72" 341 | 342 | [[package]] 343 | name = "futures-util" 344 | version = "0.3.19" 345 | source = "registry+https://github.com/rust-lang/crates.io-index" 346 | checksum = "d9b5cf40b47a271f77a8b1bec03ca09044d99d2372c0de244e66430761127164" 347 | dependencies = [ 348 | "futures-channel", 349 | "futures-core", 350 | "futures-io", 351 | "futures-macro", 352 | "futures-sink", 353 | "futures-task", 354 | "memchr", 355 | "pin-project-lite", 356 | "pin-utils", 357 | "slab", 358 | ] 359 | 360 | [[package]] 361 | name = "generic-array" 362 | version = "0.14.5" 363 | source = "registry+https://github.com/rust-lang/crates.io-index" 364 | checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" 365 | dependencies = [ 366 | "typenum", 367 | "version_check", 368 | ] 369 | 370 | [[package]] 371 | name = "getrandom" 372 | version = "0.2.4" 373 | source = "registry+https://github.com/rust-lang/crates.io-index" 374 | checksum = "418d37c8b1d42553c93648be529cb70f920d3baf8ef469b74b9638df426e0b4c" 375 | dependencies = [ 376 | "cfg-if", 377 | "libc", 378 | "wasi", 379 | ] 380 | 381 | [[package]] 382 | name = "gimli" 383 | version = "0.26.1" 384 | source = "registry+https://github.com/rust-lang/crates.io-index" 385 | checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4" 386 | 387 | [[package]] 388 | name = "hashbrown" 389 | version = "0.11.2" 390 | source = "registry+https://github.com/rust-lang/crates.io-index" 391 | checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" 392 | dependencies = [ 393 | "ahash", 394 | ] 395 | 396 | [[package]] 397 | name = "hashlink" 398 | version = "0.7.0" 399 | source = "registry+https://github.com/rust-lang/crates.io-index" 400 | checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf" 401 | dependencies = [ 402 | "hashbrown", 403 | ] 404 | 405 | [[package]] 406 | name = "hermit-abi" 407 | version = "0.1.19" 408 | source = "registry+https://github.com/rust-lang/crates.io-index" 409 | checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" 410 | dependencies = [ 411 | "libc", 412 | ] 413 | 414 | [[package]] 415 | name = "instant" 416 | version = "0.1.12" 417 | source = "registry+https://github.com/rust-lang/crates.io-index" 418 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" 419 | dependencies = [ 420 | "cfg-if", 421 | ] 422 | 423 | [[package]] 424 | name = "ipfs-sqlite-block-store" 425 | version = "0.13.0" 426 | dependencies = [ 427 | "anyhow", 428 | "derive_more", 429 | "fnv", 430 | "futures", 431 | "itertools", 432 | "libipld", 433 | "maplit", 434 | "multihash", 435 | "parking_lot 0.11.2", 436 | "rusqlite", 437 | "tempdir", 438 | "tempfile", 439 | "tokio", 440 | "tracing", 441 | "tracing-subscriber", 442 | ] 443 | 444 | [[package]] 445 | name = "itertools" 446 | version = "0.10.3" 447 | source = "registry+https://github.com/rust-lang/crates.io-index" 448 | checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" 449 | dependencies = [ 450 | "either", 451 | ] 452 | 453 | [[package]] 454 | name = "lazy_static" 455 | version = "1.4.0" 456 | source = "registry+https://github.com/rust-lang/crates.io-index" 457 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 458 | 459 | [[package]] 460 | name = "libc" 461 | version = "0.2.112" 462 | source = "registry+https://github.com/rust-lang/crates.io-index" 463 | checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125" 464 | 465 | [[package]] 466 | name = "libipld" 467 | version = "0.14.0" 468 | source = "registry+https://github.com/rust-lang/crates.io-index" 469 | checksum = "ac9c3aa309c260aa2f174bac968901eddc546e9d85950c28eae6a7bec402f926" 470 | dependencies = [ 471 | "async-trait", 472 | "cached", 473 | "fnv", 474 | "libipld-cbor", 475 | "libipld-cbor-derive", 476 | "libipld-core", 477 | "libipld-macro", 478 | "log", 479 | "multihash", 480 | "parking_lot 0.12.1", 481 | "thiserror", 482 | ] 483 | 484 | [[package]] 485 | name = "libipld-cbor" 486 | version = "0.14.0" 487 | source = "registry+https://github.com/rust-lang/crates.io-index" 488 | checksum = "8dd1ab68c9d26f20c7d0dfea6eecbae8c00359875210001b33ca27d4a02f3d09" 489 | dependencies = [ 490 | "byteorder", 491 | "libipld-core", 492 | "thiserror", 493 | ] 494 | 495 | [[package]] 496 | name = "libipld-cbor-derive" 497 | version = "0.14.0" 498 | source = "registry+https://github.com/rust-lang/crates.io-index" 499 | checksum = "69ec2f49393a1347a2d95ebcb248ff75d0d47235919b678036c010a8cd927375" 500 | dependencies = [ 501 | "proc-macro-crate", 502 | "proc-macro2", 503 | "quote", 504 | "syn", 505 | "synstructure", 506 | ] 507 | 508 | [[package]] 509 | name = "libipld-core" 510 | version = "0.14.0" 511 | source = "registry+https://github.com/rust-lang/crates.io-index" 512 | checksum = "d44790246ec6b7314cba745992c23d479d018073e66d49ae40ae1b64e5dd8eb5" 513 | dependencies = [ 514 | "anyhow", 515 | "cid", 516 | "core2", 517 | "multibase", 518 | "multihash", 519 | "thiserror", 520 | ] 521 | 522 | [[package]] 523 | name = "libipld-macro" 524 | version = "0.14.0" 525 | source = "registry+https://github.com/rust-lang/crates.io-index" 526 | checksum = "852c011562ae5059b67c3a917f9f5945af5a68df8e39ede4444fff33274d25e2" 527 | dependencies = [ 528 | "libipld-core", 529 | ] 530 | 531 | [[package]] 532 | name = "libsqlite3-sys" 533 | version = "0.23.2" 534 | source = "registry+https://github.com/rust-lang/crates.io-index" 535 | checksum = "d2cafc7c74096c336d9d27145f7ebd4f4b6f95ba16aa5a282387267e6925cb58" 536 | dependencies = [ 537 | "cc", 538 | "pkg-config", 539 | "vcpkg", 540 | ] 541 | 542 | [[package]] 543 | name = "lock_api" 544 | version = "0.4.9" 545 | source = "registry+https://github.com/rust-lang/crates.io-index" 546 | checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" 547 | dependencies = [ 548 | "autocfg", 549 | "scopeguard", 550 | ] 551 | 552 | [[package]] 553 | name = "log" 554 | version = "0.4.14" 555 | source = "registry+https://github.com/rust-lang/crates.io-index" 556 | checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" 557 | dependencies = [ 558 | "cfg-if", 559 | ] 560 | 561 | [[package]] 562 | name = "maplit" 563 | version = "1.0.2" 564 | source = "registry+https://github.com/rust-lang/crates.io-index" 565 | checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" 566 | 567 | [[package]] 568 | name = "matchers" 569 | version = "0.1.0" 570 | source = "registry+https://github.com/rust-lang/crates.io-index" 571 | checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" 572 | dependencies = [ 573 | "regex-automata", 574 | ] 575 | 576 | [[package]] 577 | name = "memchr" 578 | version = "2.4.1" 579 | source = "registry+https://github.com/rust-lang/crates.io-index" 580 | checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" 581 | 582 | [[package]] 583 | name = "miniz_oxide" 584 | version = "0.4.4" 585 | source = "registry+https://github.com/rust-lang/crates.io-index" 586 | checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" 587 | dependencies = [ 588 | "adler", 589 | "autocfg", 590 | ] 591 | 592 | [[package]] 593 | name = "mio" 594 | version = "0.7.14" 595 | source = "registry+https://github.com/rust-lang/crates.io-index" 596 | checksum = "8067b404fe97c70829f082dec8bcf4f71225d7eaea1d8645349cb76fa06205cc" 597 | dependencies = [ 598 | "libc", 599 | "log", 600 | "miow", 601 | "ntapi", 602 | "winapi", 603 | ] 604 | 605 | [[package]] 606 | name = "miow" 607 | version = "0.3.7" 608 | source = "registry+https://github.com/rust-lang/crates.io-index" 609 | checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21" 610 | dependencies = [ 611 | "winapi", 612 | ] 613 | 614 | [[package]] 615 | name = "multibase" 616 | version = "0.9.1" 617 | source = "registry+https://github.com/rust-lang/crates.io-index" 618 | checksum = "9b3539ec3c1f04ac9748a260728e855f261b4977f5c3406612c884564f329404" 619 | dependencies = [ 620 | "base-x", 621 | "data-encoding", 622 | "data-encoding-macro", 623 | ] 624 | 625 | [[package]] 626 | name = "multihash" 627 | version = "0.16.3" 628 | source = "registry+https://github.com/rust-lang/crates.io-index" 629 | checksum = "1c346cf9999c631f002d8f977c4eaeaa0e6386f16007202308d0b3757522c2cc" 630 | dependencies = [ 631 | "core2", 632 | "digest", 633 | "multihash-derive", 634 | "sha2", 635 | "unsigned-varint", 636 | ] 637 | 638 | [[package]] 639 | name = "multihash-derive" 640 | version = "0.8.0" 641 | source = "registry+https://github.com/rust-lang/crates.io-index" 642 | checksum = "fc076939022111618a5026d3be019fd8b366e76314538ff9a1b59ffbcbf98bcd" 643 | dependencies = [ 644 | "proc-macro-crate", 645 | "proc-macro-error", 646 | "proc-macro2", 647 | "quote", 648 | "syn", 649 | "synstructure", 650 | ] 651 | 652 | [[package]] 653 | name = "ntapi" 654 | version = "0.3.6" 655 | source = "registry+https://github.com/rust-lang/crates.io-index" 656 | checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" 657 | dependencies = [ 658 | "winapi", 659 | ] 660 | 661 | [[package]] 662 | name = "num_cpus" 663 | version = "1.13.1" 664 | source = "registry+https://github.com/rust-lang/crates.io-index" 665 | checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" 666 | dependencies = [ 667 | "hermit-abi", 668 | "libc", 669 | ] 670 | 671 | [[package]] 672 | name = "object" 673 | version = "0.27.1" 674 | source = "registry+https://github.com/rust-lang/crates.io-index" 675 | checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9" 676 | dependencies = [ 677 | "memchr", 678 | ] 679 | 680 | [[package]] 681 | name = "once_cell" 682 | version = "1.9.0" 683 | source = "registry+https://github.com/rust-lang/crates.io-index" 684 | checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" 685 | 686 | [[package]] 687 | name = "parking_lot" 688 | version = "0.11.2" 689 | source = "registry+https://github.com/rust-lang/crates.io-index" 690 | checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" 691 | dependencies = [ 692 | "instant", 693 | "lock_api", 694 | "parking_lot_core 0.8.5", 695 | ] 696 | 697 | [[package]] 698 | name = "parking_lot" 699 | version = "0.12.1" 700 | source = "registry+https://github.com/rust-lang/crates.io-index" 701 | checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" 702 | dependencies = [ 703 | "lock_api", 704 | "parking_lot_core 0.9.3", 705 | ] 706 | 707 | [[package]] 708 | name = "parking_lot_core" 709 | version = "0.8.5" 710 | source = "registry+https://github.com/rust-lang/crates.io-index" 711 | checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" 712 | dependencies = [ 713 | "cfg-if", 714 | "instant", 715 | "libc", 716 | "redox_syscall", 717 | "smallvec", 718 | "winapi", 719 | ] 720 | 721 | [[package]] 722 | name = "parking_lot_core" 723 | version = "0.9.3" 724 | source = "registry+https://github.com/rust-lang/crates.io-index" 725 | checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" 726 | dependencies = [ 727 | "cfg-if", 728 | "libc", 729 | "redox_syscall", 730 | "smallvec", 731 | "windows-sys", 732 | ] 733 | 734 | [[package]] 735 | name = "pin-project-lite" 736 | version = "0.2.8" 737 | source = "registry+https://github.com/rust-lang/crates.io-index" 738 | checksum = "e280fbe77cc62c91527259e9442153f4688736748d24660126286329742b4c6c" 739 | 740 | [[package]] 741 | name = "pin-utils" 742 | version = "0.1.0" 743 | source = "registry+https://github.com/rust-lang/crates.io-index" 744 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 745 | 746 | [[package]] 747 | name = "pkg-config" 748 | version = "0.3.24" 749 | source = "registry+https://github.com/rust-lang/crates.io-index" 750 | checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe" 751 | 752 | [[package]] 753 | name = "proc-macro-crate" 754 | version = "1.1.0" 755 | source = "registry+https://github.com/rust-lang/crates.io-index" 756 | checksum = "1ebace6889caf889b4d3f76becee12e90353f2b8c7d875534a71e5742f8f6f83" 757 | dependencies = [ 758 | "thiserror", 759 | "toml", 760 | ] 761 | 762 | [[package]] 763 | name = "proc-macro-error" 764 | version = "1.0.4" 765 | source = "registry+https://github.com/rust-lang/crates.io-index" 766 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 767 | dependencies = [ 768 | "proc-macro-error-attr", 769 | "proc-macro2", 770 | "quote", 771 | "syn", 772 | "version_check", 773 | ] 774 | 775 | [[package]] 776 | name = "proc-macro-error-attr" 777 | version = "1.0.4" 778 | source = "registry+https://github.com/rust-lang/crates.io-index" 779 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 780 | dependencies = [ 781 | "proc-macro2", 782 | "quote", 783 | "version_check", 784 | ] 785 | 786 | [[package]] 787 | name = "proc-macro2" 788 | version = "1.0.36" 789 | source = "registry+https://github.com/rust-lang/crates.io-index" 790 | checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" 791 | dependencies = [ 792 | "unicode-xid", 793 | ] 794 | 795 | [[package]] 796 | name = "quote" 797 | version = "1.0.14" 798 | source = "registry+https://github.com/rust-lang/crates.io-index" 799 | checksum = "47aa80447ce4daf1717500037052af176af5d38cc3e571d9ec1c7353fc10c87d" 800 | dependencies = [ 801 | "proc-macro2", 802 | ] 803 | 804 | [[package]] 805 | name = "rand" 806 | version = "0.4.6" 807 | source = "registry+https://github.com/rust-lang/crates.io-index" 808 | checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" 809 | dependencies = [ 810 | "fuchsia-cprng", 811 | "libc", 812 | "rand_core 0.3.1", 813 | "rdrand", 814 | "winapi", 815 | ] 816 | 817 | [[package]] 818 | name = "rand_core" 819 | version = "0.3.1" 820 | source = "registry+https://github.com/rust-lang/crates.io-index" 821 | checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" 822 | dependencies = [ 823 | "rand_core 0.4.2", 824 | ] 825 | 826 | [[package]] 827 | name = "rand_core" 828 | version = "0.4.2" 829 | source = "registry+https://github.com/rust-lang/crates.io-index" 830 | checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" 831 | 832 | [[package]] 833 | name = "rdrand" 834 | version = "0.4.0" 835 | source = "registry+https://github.com/rust-lang/crates.io-index" 836 | checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" 837 | dependencies = [ 838 | "rand_core 0.3.1", 839 | ] 840 | 841 | [[package]] 842 | name = "redox_syscall" 843 | version = "0.2.10" 844 | source = "registry+https://github.com/rust-lang/crates.io-index" 845 | checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" 846 | dependencies = [ 847 | "bitflags", 848 | ] 849 | 850 | [[package]] 851 | name = "regex" 852 | version = "1.5.5" 853 | source = "registry+https://github.com/rust-lang/crates.io-index" 854 | checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" 855 | dependencies = [ 856 | "regex-syntax", 857 | ] 858 | 859 | [[package]] 860 | name = "regex-automata" 861 | version = "0.1.10" 862 | source = "registry+https://github.com/rust-lang/crates.io-index" 863 | checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" 864 | dependencies = [ 865 | "regex-syntax", 866 | ] 867 | 868 | [[package]] 869 | name = "regex-syntax" 870 | version = "0.6.25" 871 | source = "registry+https://github.com/rust-lang/crates.io-index" 872 | checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" 873 | 874 | [[package]] 875 | name = "remove_dir_all" 876 | version = "0.5.3" 877 | source = "registry+https://github.com/rust-lang/crates.io-index" 878 | checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" 879 | dependencies = [ 880 | "winapi", 881 | ] 882 | 883 | [[package]] 884 | name = "rusqlite" 885 | version = "0.26.3" 886 | source = "registry+https://github.com/rust-lang/crates.io-index" 887 | checksum = "4ba4d3462c8b2e4d7f4fcfcf2b296dc6b65404fbbc7b63daa37fd485c149daf7" 888 | dependencies = [ 889 | "bitflags", 890 | "fallible-iterator", 891 | "fallible-streaming-iterator", 892 | "hashlink", 893 | "libsqlite3-sys", 894 | "memchr", 895 | "smallvec", 896 | ] 897 | 898 | [[package]] 899 | name = "rustc-demangle" 900 | version = "0.1.21" 901 | source = "registry+https://github.com/rust-lang/crates.io-index" 902 | checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" 903 | 904 | [[package]] 905 | name = "rustc_version" 906 | version = "0.4.0" 907 | source = "registry+https://github.com/rust-lang/crates.io-index" 908 | checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" 909 | dependencies = [ 910 | "semver", 911 | ] 912 | 913 | [[package]] 914 | name = "scopeguard" 915 | version = "1.1.0" 916 | source = "registry+https://github.com/rust-lang/crates.io-index" 917 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 918 | 919 | [[package]] 920 | name = "semver" 921 | version = "1.0.4" 922 | source = "registry+https://github.com/rust-lang/crates.io-index" 923 | checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" 924 | 925 | [[package]] 926 | name = "serde" 927 | version = "1.0.133" 928 | source = "registry+https://github.com/rust-lang/crates.io-index" 929 | checksum = "97565067517b60e2d1ea8b268e59ce036de907ac523ad83a0475da04e818989a" 930 | 931 | [[package]] 932 | name = "sha2" 933 | version = "0.10.6" 934 | source = "registry+https://github.com/rust-lang/crates.io-index" 935 | checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" 936 | dependencies = [ 937 | "cfg-if", 938 | "cpufeatures", 939 | "digest", 940 | ] 941 | 942 | [[package]] 943 | name = "sharded-slab" 944 | version = "0.1.4" 945 | source = "registry+https://github.com/rust-lang/crates.io-index" 946 | checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" 947 | dependencies = [ 948 | "lazy_static", 949 | ] 950 | 951 | [[package]] 952 | name = "signal-hook-registry" 953 | version = "1.4.0" 954 | source = "registry+https://github.com/rust-lang/crates.io-index" 955 | checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" 956 | dependencies = [ 957 | "libc", 958 | ] 959 | 960 | [[package]] 961 | name = "slab" 962 | version = "0.4.5" 963 | source = "registry+https://github.com/rust-lang/crates.io-index" 964 | checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" 965 | 966 | [[package]] 967 | name = "smallvec" 968 | version = "1.8.0" 969 | source = "registry+https://github.com/rust-lang/crates.io-index" 970 | checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" 971 | 972 | [[package]] 973 | name = "syn" 974 | version = "1.0.85" 975 | source = "registry+https://github.com/rust-lang/crates.io-index" 976 | checksum = "a684ac3dcd8913827e18cd09a68384ee66c1de24157e3c556c9ab16d85695fb7" 977 | dependencies = [ 978 | "proc-macro2", 979 | "quote", 980 | "unicode-xid", 981 | ] 982 | 983 | [[package]] 984 | name = "synstructure" 985 | version = "0.12.6" 986 | source = "registry+https://github.com/rust-lang/crates.io-index" 987 | checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" 988 | dependencies = [ 989 | "proc-macro2", 990 | "quote", 991 | "syn", 992 | "unicode-xid", 993 | ] 994 | 995 | [[package]] 996 | name = "tempdir" 997 | version = "0.3.7" 998 | source = "registry+https://github.com/rust-lang/crates.io-index" 999 | checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" 1000 | dependencies = [ 1001 | "rand", 1002 | "remove_dir_all", 1003 | ] 1004 | 1005 | [[package]] 1006 | name = "tempfile" 1007 | version = "3.3.0" 1008 | source = "registry+https://github.com/rust-lang/crates.io-index" 1009 | checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" 1010 | dependencies = [ 1011 | "cfg-if", 1012 | "fastrand", 1013 | "libc", 1014 | "redox_syscall", 1015 | "remove_dir_all", 1016 | "winapi", 1017 | ] 1018 | 1019 | [[package]] 1020 | name = "thiserror" 1021 | version = "1.0.30" 1022 | source = "registry+https://github.com/rust-lang/crates.io-index" 1023 | checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" 1024 | dependencies = [ 1025 | "thiserror-impl", 1026 | ] 1027 | 1028 | [[package]] 1029 | name = "thiserror-impl" 1030 | version = "1.0.30" 1031 | source = "registry+https://github.com/rust-lang/crates.io-index" 1032 | checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" 1033 | dependencies = [ 1034 | "proc-macro2", 1035 | "quote", 1036 | "syn", 1037 | ] 1038 | 1039 | [[package]] 1040 | name = "thread_local" 1041 | version = "1.1.4" 1042 | source = "registry+https://github.com/rust-lang/crates.io-index" 1043 | checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" 1044 | dependencies = [ 1045 | "once_cell", 1046 | ] 1047 | 1048 | [[package]] 1049 | name = "tokio" 1050 | version = "1.15.0" 1051 | source = "registry+https://github.com/rust-lang/crates.io-index" 1052 | checksum = "fbbf1c778ec206785635ce8ad57fe52b3009ae9e0c9f574a728f3049d3e55838" 1053 | dependencies = [ 1054 | "bytes", 1055 | "libc", 1056 | "memchr", 1057 | "mio", 1058 | "num_cpus", 1059 | "once_cell", 1060 | "parking_lot 0.11.2", 1061 | "pin-project-lite", 1062 | "signal-hook-registry", 1063 | "tokio-macros", 1064 | "winapi", 1065 | ] 1066 | 1067 | [[package]] 1068 | name = "tokio-macros" 1069 | version = "1.7.0" 1070 | source = "registry+https://github.com/rust-lang/crates.io-index" 1071 | checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" 1072 | dependencies = [ 1073 | "proc-macro2", 1074 | "quote", 1075 | "syn", 1076 | ] 1077 | 1078 | [[package]] 1079 | name = "toml" 1080 | version = "0.5.8" 1081 | source = "registry+https://github.com/rust-lang/crates.io-index" 1082 | checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa" 1083 | dependencies = [ 1084 | "serde", 1085 | ] 1086 | 1087 | [[package]] 1088 | name = "tracing" 1089 | version = "0.1.29" 1090 | source = "registry+https://github.com/rust-lang/crates.io-index" 1091 | checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105" 1092 | dependencies = [ 1093 | "cfg-if", 1094 | "pin-project-lite", 1095 | "tracing-attributes", 1096 | "tracing-core", 1097 | ] 1098 | 1099 | [[package]] 1100 | name = "tracing-attributes" 1101 | version = "0.1.18" 1102 | source = "registry+https://github.com/rust-lang/crates.io-index" 1103 | checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e" 1104 | dependencies = [ 1105 | "proc-macro2", 1106 | "quote", 1107 | "syn", 1108 | ] 1109 | 1110 | [[package]] 1111 | name = "tracing-core" 1112 | version = "0.1.21" 1113 | source = "registry+https://github.com/rust-lang/crates.io-index" 1114 | checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4" 1115 | dependencies = [ 1116 | "lazy_static", 1117 | ] 1118 | 1119 | [[package]] 1120 | name = "tracing-log" 1121 | version = "0.1.2" 1122 | source = "registry+https://github.com/rust-lang/crates.io-index" 1123 | checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3" 1124 | dependencies = [ 1125 | "lazy_static", 1126 | "log", 1127 | "tracing-core", 1128 | ] 1129 | 1130 | [[package]] 1131 | name = "tracing-subscriber" 1132 | version = "0.3.5" 1133 | source = "registry+https://github.com/rust-lang/crates.io-index" 1134 | checksum = "5d81bfa81424cc98cb034b837c985b7a290f592e5b4322f353f94a0ab0f9f594" 1135 | dependencies = [ 1136 | "ansi_term", 1137 | "lazy_static", 1138 | "matchers", 1139 | "regex", 1140 | "sharded-slab", 1141 | "smallvec", 1142 | "thread_local", 1143 | "tracing", 1144 | "tracing-core", 1145 | "tracing-log", 1146 | ] 1147 | 1148 | [[package]] 1149 | name = "typenum" 1150 | version = "1.15.0" 1151 | source = "registry+https://github.com/rust-lang/crates.io-index" 1152 | checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" 1153 | 1154 | [[package]] 1155 | name = "unicode-xid" 1156 | version = "0.2.2" 1157 | source = "registry+https://github.com/rust-lang/crates.io-index" 1158 | checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" 1159 | 1160 | [[package]] 1161 | name = "unsigned-varint" 1162 | version = "0.7.1" 1163 | source = "registry+https://github.com/rust-lang/crates.io-index" 1164 | checksum = "d86a8dc7f45e4c1b0d30e43038c38f274e77af056aa5f74b93c2cf9eb3c1c836" 1165 | 1166 | [[package]] 1167 | name = "vcpkg" 1168 | version = "0.2.15" 1169 | source = "registry+https://github.com/rust-lang/crates.io-index" 1170 | checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" 1171 | 1172 | [[package]] 1173 | name = "version_check" 1174 | version = "0.9.4" 1175 | source = "registry+https://github.com/rust-lang/crates.io-index" 1176 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 1177 | 1178 | [[package]] 1179 | name = "wasi" 1180 | version = "0.10.2+wasi-snapshot-preview1" 1181 | source = "registry+https://github.com/rust-lang/crates.io-index" 1182 | checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" 1183 | 1184 | [[package]] 1185 | name = "winapi" 1186 | version = "0.3.9" 1187 | source = "registry+https://github.com/rust-lang/crates.io-index" 1188 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1189 | dependencies = [ 1190 | "winapi-i686-pc-windows-gnu", 1191 | "winapi-x86_64-pc-windows-gnu", 1192 | ] 1193 | 1194 | [[package]] 1195 | name = "winapi-i686-pc-windows-gnu" 1196 | version = "0.4.0" 1197 | source = "registry+https://github.com/rust-lang/crates.io-index" 1198 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1199 | 1200 | [[package]] 1201 | name = "winapi-x86_64-pc-windows-gnu" 1202 | version = "0.4.0" 1203 | source = "registry+https://github.com/rust-lang/crates.io-index" 1204 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1205 | 1206 | [[package]] 1207 | name = "windows-sys" 1208 | version = "0.36.1" 1209 | source = "registry+https://github.com/rust-lang/crates.io-index" 1210 | checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" 1211 | dependencies = [ 1212 | "windows_aarch64_msvc", 1213 | "windows_i686_gnu", 1214 | "windows_i686_msvc", 1215 | "windows_x86_64_gnu", 1216 | "windows_x86_64_msvc", 1217 | ] 1218 | 1219 | [[package]] 1220 | name = "windows_aarch64_msvc" 1221 | version = "0.36.1" 1222 | source = "registry+https://github.com/rust-lang/crates.io-index" 1223 | checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" 1224 | 1225 | [[package]] 1226 | name = "windows_i686_gnu" 1227 | version = "0.36.1" 1228 | source = "registry+https://github.com/rust-lang/crates.io-index" 1229 | checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" 1230 | 1231 | [[package]] 1232 | name = "windows_i686_msvc" 1233 | version = "0.36.1" 1234 | source = "registry+https://github.com/rust-lang/crates.io-index" 1235 | checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" 1236 | 1237 | [[package]] 1238 | name = "windows_x86_64_gnu" 1239 | version = "0.36.1" 1240 | source = "registry+https://github.com/rust-lang/crates.io-index" 1241 | checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" 1242 | 1243 | [[package]] 1244 | name = "windows_x86_64_msvc" 1245 | version = "0.36.1" 1246 | source = "registry+https://github.com/rust-lang/crates.io-index" 1247 | checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" 1248 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ipfs-sqlite-block-store" 3 | version = "0.13.0" 4 | repository = "https://github.com/actyx/ipfs-sqlite-block-store" 5 | authors = ["Rüdiger Klaehn ", "David Craven ", "Actyx AG", "Roland Kuhn "] 6 | edition = "2018" 7 | license = "MIT OR Apache-2.0" 8 | keywords = ["ipfs", "dag"] 9 | description = "block store for ipfs, using sqlite" 10 | 11 | [dependencies] 12 | anyhow = "1.0.52" 13 | derive_more = "0.99.17" 14 | fnv = "1.0.7" 15 | futures = "0.3.19" 16 | itertools = "0.10.3" 17 | libipld = { version = "0.14.0", default-features = false } 18 | parking_lot = "0.11.2" 19 | rusqlite = { version = "0.26.3", features = ["backup", "bundled", "unlock_notify"] } 20 | tracing = "0.1.29" 21 | 22 | [dev-dependencies] 23 | anyhow = { version = "1.0.52", features = ["backtrace"] } 24 | libipld = { version = "0.14.0", default-features = false, features = ["derive", "dag-cbor"] } 25 | maplit = "1.0.2" 26 | multihash = { version = "0.16.3", default-features = false, features = ["sha2"] } 27 | tempdir = "0.3.7" 28 | tempfile = "3.2.0" 29 | tokio = { version = "1.15.0", features = ["full"] } 30 | tracing-subscriber = { version = "0.3.5", features = ["env-filter"] } 31 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IPFS sqlite block store   [![Latest Version]][crates.io] [![Docs Badge]][docs.rs] 2 | 3 | [Latest Version]: https://img.shields.io/crates/v/ipfs-sqlite-block-store.svg 4 | [crates.io]: https://crates.io/crates/ipfs-sqlite-block-store 5 | [Docs Badge]: https://img.shields.io/badge/docs-docs.rs-green 6 | [docs.rs]: https://docs.rs/ipfs-sqlite-block-store 7 | 8 | Sqlite based block store for content-addressed data like IPLD. 9 | 10 | See the [module level docs](https://docs.rs/ipfs-sqlite-block-store) for more details. -------------------------------------------------------------------------------- /examples/cache.rs: -------------------------------------------------------------------------------- 1 | use ipfs_sqlite_block_store::{ 2 | cache::{AsyncCacheTracker, Spawner, SqliteCacheTracker}, 3 | BlockStore, Config, 4 | }; 5 | use itertools::*; 6 | use libipld::{cbor::DagCborCodec, codec::Codec, Cid, DagCbor}; 7 | use multihash::{Code, MultihashDigest}; 8 | use std::time::Instant; 9 | use tracing::*; 10 | use tracing_subscriber::{fmt::format::FmtSpan, EnvFilter}; 11 | 12 | type Block = libipld::Block; 13 | 14 | #[derive(Debug, DagCbor)] 15 | struct Node { 16 | links: Vec, 17 | text: String, 18 | } 19 | 20 | impl Node { 21 | pub fn leaf(text: &str) -> Self { 22 | Self { 23 | links: Vec::new(), 24 | text: text.into(), 25 | } 26 | } 27 | } 28 | 29 | /// creates a block with a min size 30 | fn sized(name: &str, min_size: usize) -> Block { 31 | let mut text = name.to_string(); 32 | while text.len() < min_size { 33 | text += " "; 34 | } 35 | let ipld = Node::leaf(&text); 36 | let bytes = DagCborCodec.encode(&ipld).unwrap(); 37 | let hash = Code::Sha2_256.digest(&bytes); 38 | // https://github.com/multiformats/multicodec/blob/master/table.csv 39 | Block::new_unchecked(Cid::new_v1(0x71, hash), bytes) 40 | } 41 | 42 | /// creates a block with the name "unpinned-" and a size of 1000 43 | fn unpinned(i: usize) -> Block { 44 | sized(&format!("{}", i), 10000 - 16) 45 | } 46 | 47 | struct TokioSpawner; 48 | 49 | impl Spawner for TokioSpawner { 50 | fn spawn_blocking(&self, f: impl FnOnce() + Send + 'static) { 51 | tokio::task::spawn_blocking(f); 52 | } 53 | } 54 | 55 | #[tokio::main] 56 | async fn main() -> anyhow::Result<()> { 57 | tracing_subscriber::fmt() 58 | .with_span_events(FmtSpan::CLOSE) 59 | .with_env_filter(EnvFilter::from_default_env()) 60 | .init(); 61 | // a tracker that only cares about access time 62 | let tracker = SqliteCacheTracker::open("cache-test-access.sqlite", |access, _| Some(access))?; 63 | let tracker = AsyncCacheTracker::new(TokioSpawner, tracker); 64 | // let tracker = InMemCacheTracker::new(|access, _, _| Some(access)); 65 | // let tracker = NoopCacheTracker; 66 | let mut store = BlockStore::::open( 67 | "cache-test.sqlite", 68 | Config::default() 69 | .with_size_targets(1000, 1000000) 70 | .with_cache_tracker(tracker), 71 | )?; 72 | let n = 100000; 73 | let mut cids = Vec::new(); 74 | for is in &(0..n).chunks(1000) { 75 | info!("adding 1000 blocks"); 76 | let blocks = is.map(unpinned).collect::>(); 77 | for block in &blocks { 78 | cids.push(*block.cid()); 79 | } 80 | store.put_blocks(blocks, None)?; 81 | } 82 | let mut sum = 0usize; 83 | let mut count = 0usize; 84 | let t0 = Instant::now(); 85 | for j in 0..2 { 86 | info!("Accessing all blocks, round {}", j); 87 | for cid in &cids { 88 | sum += store.get_block(cid)?.map(|x| x.len()).unwrap_or_default(); 89 | count += 1; 90 | } 91 | } 92 | let dt = t0.elapsed(); 93 | info!( 94 | "total accessed {} bytes, {} blocks, in {}s", 95 | sum, 96 | count, 97 | dt.as_secs_f64() 98 | ); 99 | store.gc()?; 100 | Ok(()) 101 | } 102 | -------------------------------------------------------------------------------- /examples/import.rs: -------------------------------------------------------------------------------- 1 | use ipfs_sqlite_block_store::{BlockStore, Config}; 2 | use itertools::*; 3 | use libipld::cid::Cid; 4 | use libipld::store::DefaultParams; 5 | use rusqlite::{Connection, OpenFlags}; 6 | use std::convert::TryFrom; 7 | use std::path::Path; 8 | use tracing::*; 9 | use tracing_subscriber::{fmt::format::FmtSpan, EnvFilter}; 10 | type Block = libipld::Block; 11 | 12 | pub fn query_roots(path: &Path) -> anyhow::Result> { 13 | let conn = Connection::open_with_flags(path, OpenFlags::SQLITE_OPEN_READ_ONLY)?; 14 | let len: u32 = conn.query_row("SELECT COUNT(1) FROM roots", [], |row| row.get(0))?; 15 | let mut stmt = conn.prepare("SELECT * FROM roots")?; 16 | let roots_iter = stmt.query_map([], |row| { 17 | Ok((row.get::<_, String>(0)?, row.get::<_, String>(2)?)) 18 | })?; 19 | let mut roots = Vec::with_capacity(len as usize); 20 | for res in roots_iter { 21 | let (key, cid) = res?; 22 | let cid = Cid::try_from(cid)?; 23 | roots.push((key, cid)); 24 | } 25 | Ok(roots) 26 | } 27 | 28 | pub struct OldBlock { 29 | key: Vec, 30 | cid: Vec, 31 | data: Vec, 32 | } 33 | 34 | fn main() -> anyhow::Result<()> { 35 | tracing_subscriber::fmt() 36 | .with_span_events(FmtSpan::CLOSE) 37 | .with_env_filter(EnvFilter::from_default_env()) 38 | .init(); 39 | let args = std::env::args().collect::>(); 40 | if args.len() < 2 { 41 | println!("Usage: import "); 42 | std::process::exit(1); 43 | } 44 | info!("opening roots db {}", args[0]); 45 | info!("opening blocks db {}", args[1]); 46 | let roots = Path::new(&args[1]); 47 | let blocks = Path::new(&args[2]); 48 | let output = Path::new("out.sqlite"); 49 | let mut store = BlockStore::::open(output, Config::default())?; 50 | 51 | let blocks = Connection::open_with_flags(blocks, OpenFlags::SQLITE_OPEN_READ_ONLY)?; 52 | let len: u32 = blocks.query_row("SELECT COUNT(1) FROM blocks", [], |row| row.get(0))?; 53 | info!("importing {} blocks", len); 54 | 55 | let mut stmt = blocks.prepare("SELECT * FROM blocks")?; 56 | let block_iter = stmt.query_map([], |row| { 57 | Ok(OldBlock { 58 | key: row.get(0)?, 59 | //pinned: row.get(1)?, 60 | cid: row.get(2)?, 61 | data: row.get(3)?, 62 | }) 63 | })?; 64 | 65 | let block_iter = block_iter.map(|block| { 66 | block.map_err(anyhow::Error::from).and_then(|block| { 67 | let key = Cid::try_from(String::from_utf8(block.key)?)?; 68 | let cid = Cid::try_from(block.cid)?; 69 | assert_eq!(key.hash(), cid.hash()); 70 | //println!("{} {} {}", cid, block.pinned, block.data.len()); 71 | let block = libipld::Block::::new(cid, block.data)?; 72 | let (cid, data) = block.into_inner(); 73 | Block::new(cid, data) 74 | }) 75 | }); 76 | 77 | for block in &block_iter.chunks(1000) { 78 | info!("adding 1000 block chunk"); 79 | let blocks = block.collect::>>()?; 80 | store.put_blocks(blocks, None)?; 81 | } 82 | 83 | for (alias, cid) in query_roots(roots)?.into_iter() { 84 | info!("aliasing {} to {}", alias, cid); 85 | let now = std::time::Instant::now(); 86 | store.alias(alias.as_bytes(), Some(&cid))?; 87 | info!("{}ms", now.elapsed().as_millis()); 88 | let missing = store.get_missing_blocks::>(&cid)?; 89 | info!("{} blocks missing", missing.len()); 90 | } 91 | 92 | store.gc()?; 93 | 94 | let now = std::time::Instant::now(); 95 | let mut len = 0usize; 96 | for (i, cid) in store.get_block_cids::>()?.iter().enumerate() { 97 | if i % 1000 == 0 { 98 | info!("iterating {} {}", cid, i); 99 | } 100 | len += store.get_block(cid)?.map(|x| x.len()).unwrap_or(0) 101 | } 102 | let dt = now.elapsed().as_secs_f64(); 103 | info!("iterating over all blocks: {}s", dt); 104 | info!("len = {}", len); 105 | info!("rate = {} bytes/s", (len as f64) / dt); 106 | 107 | let now = std::time::Instant::now(); 108 | let mut len = 0usize; 109 | for (i, cid) in store.get_block_cids::>()?.iter().enumerate() { 110 | if i % 1000 == 0 { 111 | info!("iterating {} {}", cid, i); 112 | } 113 | len += store.get_block(cid)?.map(|x| x.len()).unwrap_or(0) 114 | } 115 | let dt = now.elapsed().as_secs_f64(); 116 | info!("iterating over all blocks: {}s", dt); 117 | info!("len = {}", len); 118 | info!("rate = {} bytes/s", (len as f64) / dt); 119 | Ok(()) 120 | } 121 | -------------------------------------------------------------------------------- /examples/restore.rs: -------------------------------------------------------------------------------- 1 | use ipfs_sqlite_block_store::{BlockStore, Config}; 2 | use tracing_subscriber::EnvFilter; 3 | 4 | fn main() -> anyhow::Result<()> { 5 | tracing_subscriber::fmt() 6 | .with_env_filter(EnvFilter::from_default_env()) 7 | .init(); 8 | let _ = BlockStore::::open_test( 9 | "test-data/mini.sqlite", 10 | Config::default(), 11 | )?; 12 | Ok(()) 13 | } 14 | -------------------------------------------------------------------------------- /examples/stress.rs: -------------------------------------------------------------------------------- 1 | use std::time::Instant; 2 | 3 | use ipfs_sqlite_block_store::{BlockStore, Config}; 4 | use libipld::{cbor::DagCborCodec, cid::Cid, codec::Codec, DagCbor}; 5 | use multihash::{Code, MultihashDigest}; 6 | use tracing_subscriber::{fmt::format::FmtSpan, EnvFilter}; 7 | 8 | type Block = libipld::Block; 9 | 10 | #[derive(Debug, DagCbor)] 11 | struct Node { 12 | links: Vec, 13 | text: String, 14 | } 15 | 16 | impl Node { 17 | pub fn branch(text: &str, links: impl IntoIterator) -> Self { 18 | Self { 19 | links: links.into_iter().collect(), 20 | text: text.into(), 21 | } 22 | } 23 | } 24 | 25 | /// creates a block 26 | /// leaf blocks will be larger than branch blocks 27 | fn block(name: &str, links: impl IntoIterator) -> Block { 28 | let links = links.into_iter().collect::>(); 29 | let data_size = if links.is_empty() { 30 | 1024 * 16 - 16 31 | } else { 32 | 512 33 | }; 34 | let mut name = name.to_string(); 35 | while name.len() < data_size { 36 | name += " "; 37 | } 38 | let ipld = Node::branch(&name, links); 39 | let bytes = DagCborCodec.encode(&ipld).unwrap(); 40 | let hash = Code::Sha2_256.digest(&bytes); 41 | // https://github.com/multiformats/multicodec/blob/master/table.csv 42 | Block::new_unchecked(Cid::new_v1(0x71, hash), bytes) 43 | } 44 | 45 | fn fmt_cid(cid: Cid) -> String { 46 | cid.to_string()[8..12].to_string() 47 | } 48 | 49 | fn fmt_cids(x: impl IntoIterator) -> String { 50 | x.into_iter().map(fmt_cid).collect::>().join(",") 51 | } 52 | 53 | fn build_tree_0( 54 | prefix: &str, 55 | branch: u64, 56 | depth: u64, 57 | blocks: &mut Vec, 58 | ) -> anyhow::Result { 59 | let children = if depth == 0 { 60 | Vec::new() 61 | } else { 62 | let mut children = Vec::new(); 63 | for i in 0..branch { 64 | let cid = build_tree_0(&format!("{}-{}", prefix, i), branch, depth - 1, blocks)?; 65 | children.push(cid); 66 | } 67 | children 68 | }; 69 | let block = block(prefix, children); 70 | let cid = *block.cid(); 71 | blocks.push(block); 72 | Ok(cid) 73 | } 74 | 75 | fn build_tree(prefix: &str, branch: u64, depth: u64) -> anyhow::Result<(Cid, Vec)> { 76 | let mut tmp = Vec::new(); 77 | let res = build_tree_0(prefix, branch, depth, &mut tmp)?; 78 | Ok((res, tmp)) 79 | } 80 | 81 | fn build_chain(prefix: &str, n: usize) -> anyhow::Result<(Cid, Vec)> { 82 | anyhow::ensure!(n > 0); 83 | let mut blocks = Vec::with_capacity(n); 84 | let mk_node = |i: usize, links| block(&format!("{}-{}", prefix, i), links); 85 | let mut prev: Option = None; 86 | for i in 0..n { 87 | let node = mk_node(i, prev); 88 | prev = Some(*node.cid()); 89 | blocks.push(node); 90 | } 91 | Ok((prev.unwrap(), blocks)) 92 | } 93 | 94 | fn main() -> anyhow::Result<()> { 95 | tracing_subscriber::fmt() 96 | .with_span_events(FmtSpan::CLOSE) 97 | .with_env_filter(EnvFilter::from_default_env()) 98 | .init(); 99 | let mut store = BlockStore::::open("test.sqlite", Config::default())?; 100 | for i in 0..10 { 101 | println!("Adding filler tree {}", i); 102 | let (tree_root, tree_blocks) = build_tree(&format!("tree-{}", i), 10, 4)?; 103 | store.put_blocks(tree_blocks, None)?; 104 | if i % 2 == 0 { 105 | store.alias(format!("tree-alias-{}", i).as_bytes(), Some(&tree_root))?; 106 | } 107 | } 108 | let (tree_root, tree_blocks) = build_tree("test-tree", 10, 4)?; 109 | let (list_root, list_blocks) = build_chain("chain", 10000)?; 110 | store.put_blocks(tree_blocks, None)?; 111 | store.put_blocks(list_blocks, None)?; 112 | let t0 = Instant::now(); 113 | store.get_missing_blocks::>(&list_root)?; 114 | store.alias("list-alias-1".as_bytes(), Some(&list_root))?; 115 | println!("get_missing_blocks {}", t0.elapsed().as_secs_f64()); 116 | println!( 117 | "descendants of {:?} {}", 118 | tree_root, 119 | store.get_descendants::>(&tree_root)?.len(), 120 | ); 121 | store.put_block(block("a", None), None)?; 122 | println!( 123 | "{:?}", 124 | fmt_cids(store.get_missing_blocks::>(block("a", None).cid())?) 125 | ); 126 | store.put_block(block("b", None), None)?; 127 | store.put_block(block("c", None), None)?; 128 | println!( 129 | "{:?}", 130 | fmt_cids(store.get_descendants::>(block("a", None).cid())?) 131 | ); 132 | store.put_block(block("d", None), None)?; 133 | 134 | store.alias(b"source1".as_ref(), Some(block("a", None).cid()))?; 135 | store.alias(b"source2".as_ref(), Some(block("d", None).cid()))?; 136 | println!("starting gc"); 137 | let t0 = Instant::now(); 138 | store.gc()?; 139 | let dt = Instant::now() - t0; 140 | println!("{}", dt.as_secs_f64()); 141 | Ok(()) 142 | } 143 | -------------------------------------------------------------------------------- /src/cache/async_tracker.rs: -------------------------------------------------------------------------------- 1 | use super::{BlockInfo, CacheTracker, WriteInfo}; 2 | use parking_lot::Mutex; 3 | use std::{fmt::Debug, sync::Arc}; 4 | 5 | /// Wrapper around a spawn function 6 | pub trait Spawner: Send + Sync { 7 | /// Called by the cache tracker to spawn a small, blocking, io bound task 8 | fn spawn_blocking(&self, f: impl FnOnce() + Send + 'static); 9 | } 10 | 11 | /// A wrapping cache tracker that performs write operations on another thread 12 | pub struct AsyncCacheTracker { 13 | spawner: S, 14 | inner: Arc>, 15 | } 16 | 17 | impl Debug for AsyncCacheTracker { 18 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 19 | f.debug_struct("AsyncCacheTracker").finish() 20 | } 21 | } 22 | 23 | impl AsyncCacheTracker { 24 | pub fn new(spawner: S, inner: T) -> Self { 25 | Self { 26 | spawner, 27 | inner: Arc::new(Mutex::new(inner)), 28 | } 29 | } 30 | } 31 | 32 | impl CacheTracker for AsyncCacheTracker 33 | where 34 | S: Spawner, 35 | T: CacheTracker + 'static, 36 | { 37 | fn blocks_accessed(&self, blocks: Vec) { 38 | let inner = self.inner.clone(); 39 | self.spawner.spawn_blocking(move || { 40 | inner.lock().blocks_accessed(blocks); 41 | }); 42 | } 43 | 44 | fn blocks_written(&self, blocks: Vec) { 45 | let inner = self.inner.clone(); 46 | self.spawner.spawn_blocking(move || { 47 | inner.lock().blocks_written(blocks); 48 | }); 49 | } 50 | 51 | fn blocks_deleted(&self, blocks: Vec) { 52 | self.inner.lock().blocks_deleted(blocks); 53 | } 54 | 55 | fn retain_ids(&self, ids: &[i64]) { 56 | self.inner.lock().retain_ids(ids); 57 | } 58 | 59 | fn sort_ids(&self, ids: &mut [i64]) { 60 | self.inner.lock().sort_ids(ids); 61 | } 62 | 63 | fn has_persistent_state(&self) -> bool { 64 | self.inner.lock().has_persistent_state() 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/cache/mod.rs: -------------------------------------------------------------------------------- 1 | use fnv::{FnvHashMap, FnvHashSet}; 2 | use libipld::Cid; 3 | use std::{ 4 | fmt::Debug, 5 | ops::{Deref, DerefMut}, 6 | sync::Arc, 7 | time::{Duration, Instant}, 8 | }; 9 | mod async_tracker; 10 | mod sqlite_tracker; 11 | pub use async_tracker::{AsyncCacheTracker, Spawner}; 12 | use parking_lot::Mutex; 13 | pub use sqlite_tracker::SqliteCacheTracker; 14 | 15 | #[cfg(test)] 16 | mod tests; 17 | 18 | /// Information about a block that is quick to gather 19 | /// 20 | /// This is what is available for making decisions about whether to cache a block 21 | #[derive(Debug, Clone, Copy)] 22 | pub struct BlockInfo { 23 | /// id of the block in the block store 24 | id: i64, 25 | /// cid 26 | cid: Cid, 27 | /// size of the block 28 | len: usize, 29 | } 30 | 31 | impl BlockInfo { 32 | pub fn new(id: i64, cid: &Cid, len: usize) -> Self { 33 | Self { id, cid: *cid, len } 34 | } 35 | pub fn id(&self) -> i64 { 36 | self.id 37 | } 38 | pub fn cid(&self) -> &Cid { 39 | &self.cid 40 | } 41 | pub fn block_len(&self) -> usize { 42 | self.len 43 | } 44 | } 45 | 46 | /// Information about a write operation that is cheap to gather 47 | #[derive(Debug, Clone, Copy)] 48 | pub struct WriteInfo { 49 | block: BlockInfo, 50 | block_exists: bool, 51 | } 52 | 53 | impl WriteInfo { 54 | pub fn new(block: BlockInfo, block_exists: bool) -> Self { 55 | Self { 56 | block, 57 | block_exists, 58 | } 59 | } 60 | /// true if we had the block already. 61 | pub fn block_exists(&self) -> bool { 62 | self.block_exists 63 | } 64 | } 65 | 66 | impl Deref for WriteInfo { 67 | type Target = BlockInfo; 68 | 69 | fn deref(&self) -> &Self::Target { 70 | &self.block 71 | } 72 | } 73 | 74 | /// tracks block reads and writes to provide info about which blocks to evict from the LRU cache 75 | #[allow(unused_variables)] 76 | pub trait CacheTracker: Debug + Send + Sync { 77 | /// called whenever blocks were accessed 78 | /// 79 | /// note that this method will be called very frequently, on every block access. 80 | /// it is fire and forget, so it is perfectly ok to offload the writing to another thread. 81 | fn blocks_accessed(&self, blocks: Vec) {} 82 | 83 | /// called whenever blocks were written 84 | /// 85 | /// note that this method will be called frequently, on every block write. 86 | /// it is fire and forget, so it is perfectly ok to offload the writing to another thread. 87 | fn blocks_written(&self, blocks: Vec) {} 88 | 89 | /// called whenever blocks have been deleted by gc. 90 | fn blocks_deleted(&self, blocks: Vec) {} 91 | 92 | /// sort ids by importance. More important ids should go to the end. 93 | /// 94 | /// this will be called from inside gc 95 | fn sort_ids(&self, ids: &mut [i64]) {} 96 | 97 | /// indicate whether `retain_ids` should be called on startup 98 | fn has_persistent_state(&self) -> bool; 99 | 100 | /// notification that only these ids should be retained 101 | /// 102 | /// this will be called once during startup 103 | fn retain_ids(&self, ids: &[i64]) {} 104 | } 105 | 106 | impl CacheTracker for Arc { 107 | fn blocks_accessed(&self, blocks: Vec) { 108 | self.as_ref().blocks_accessed(blocks) 109 | } 110 | 111 | fn blocks_written(&self, blocks: Vec) { 112 | self.as_ref().blocks_written(blocks) 113 | } 114 | 115 | fn sort_ids(&self, ids: &mut [i64]) { 116 | self.as_ref().sort_ids(ids) 117 | } 118 | 119 | fn blocks_deleted(&self, blocks: Vec) { 120 | self.as_ref().blocks_deleted(blocks) 121 | } 122 | 123 | fn has_persistent_state(&self) -> bool { 124 | self.as_ref().has_persistent_state() 125 | } 126 | 127 | fn retain_ids(&self, ids: &[i64]) { 128 | self.as_ref().retain_ids(ids) 129 | } 130 | } 131 | 132 | /// a cache tracker that does nothing whatsoever, but is extremely fast 133 | #[derive(Debug)] 134 | pub struct NoopCacheTracker; 135 | 136 | impl CacheTracker for NoopCacheTracker { 137 | fn has_persistent_state(&self) -> bool { 138 | false 139 | } 140 | } 141 | 142 | /// a cache tracker that just sorts by id, which is the time of first addition of a block 143 | #[derive(Debug)] 144 | pub struct SortByIdCacheTracker; 145 | 146 | impl CacheTracker for SortByIdCacheTracker { 147 | fn sort_ids(&self, ids: &mut [i64]) { 148 | // a bit faster than stable sort, and obviously for ids it does not matter 149 | ids.sort_unstable(); 150 | } 151 | fn has_persistent_state(&self) -> bool { 152 | false 153 | } 154 | } 155 | 156 | /// keep track of block accesses in memory 157 | pub struct InMemCacheTracker { 158 | cache: Arc>>, 159 | mk_cache_entry: F, 160 | created: Instant, 161 | } 162 | 163 | impl InMemCacheTracker 164 | where 165 | T: Ord + Clone + Debug, 166 | F: Fn(Duration, BlockInfo) -> Option, 167 | { 168 | /// mk_cache_entry will be called on each block access to create or update a cache entry. 169 | /// It allows to customize whether we are interested in an entry at all, and what 170 | /// entries we want to be preserved. 171 | /// 172 | /// E.g. to just sort entries by their access time, use `|access, _, _| Some(access)`. 173 | /// this will keep entries in the cache based on last access time. 174 | /// 175 | /// It is also possible to use more sophisticated strategies like only caching certain cid types 176 | /// or caching based on the data size. 177 | pub fn new(mk_cache_entry: F) -> Self { 178 | Self { 179 | cache: Arc::new(Mutex::new(FnvHashMap::default())), 180 | mk_cache_entry, 181 | created: Instant::now(), 182 | } 183 | } 184 | } 185 | 186 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] 187 | struct SortKey { 188 | time: Option, 189 | id: i64, 190 | } 191 | 192 | impl SortKey { 193 | pub fn new(time: Option, id: i64) -> Self { 194 | Self { time, id } 195 | } 196 | } 197 | 198 | fn get_key( 199 | cache: &mut impl DerefMut>, 200 | id: i64, 201 | ) -> SortKey { 202 | SortKey::new(cache.get(&id).cloned(), id) 203 | } 204 | 205 | impl CacheTracker for InMemCacheTracker 206 | where 207 | T: Ord + Clone + Debug + Send + Sync, 208 | F: Fn(Duration, BlockInfo) -> Option + Send + Sync, 209 | { 210 | /// called whenever blocks were accessed 211 | fn blocks_accessed(&self, blocks: Vec) { 212 | let now = Instant::now().checked_duration_since(self.created).unwrap(); 213 | let mut cache = self.cache.lock(); 214 | for block in blocks { 215 | if let Some(value) = (self.mk_cache_entry)(now, block) { 216 | cache.insert(block.id, value); 217 | } else { 218 | cache.remove(&block.id); 219 | } 220 | } 221 | } 222 | 223 | /// notification that these ids no longer have to be tracked 224 | fn blocks_deleted(&self, blocks: Vec) { 225 | let mut cache = self.cache.lock(); 226 | for block in blocks { 227 | cache.remove(&block.id); 228 | } 229 | } 230 | 231 | /// notification that only these ids should be retained 232 | fn retain_ids(&self, ids: &[i64]) { 233 | let ids = ids.iter().cloned().collect::>(); 234 | let mut cache = self.cache.lock(); 235 | cache.retain(|id, _| ids.contains(id)); 236 | } 237 | 238 | /// sort ids by importance. More important ids should go to the end. 239 | fn sort_ids(&self, ids: &mut [i64]) { 240 | let mut cache = self.cache.lock(); 241 | ids.sort_unstable_by_key(move |id| get_key(&mut cache, *id)); 242 | } 243 | 244 | fn has_persistent_state(&self) -> bool { 245 | false 246 | } 247 | } 248 | 249 | impl std::fmt::Debug for InMemCacheTracker { 250 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 251 | f.debug_struct("InMemLruCacheTracker") 252 | .field("cache", &self.cache.lock()) 253 | .finish() 254 | } 255 | } 256 | 257 | #[cfg(test)] 258 | #[test] 259 | fn sort_key_sort_order() { 260 | assert!( 261 | SortKey::new(None, i64::max_value()) 262 | < SortKey::new(Some(Duration::default()), i64::min_value()) 263 | ); 264 | } 265 | -------------------------------------------------------------------------------- /src/cache/sqlite_tracker.rs: -------------------------------------------------------------------------------- 1 | use super::{BlockInfo, CacheTracker}; 2 | use crate::error::Context; 3 | use fnv::{FnvHashMap, FnvHashSet}; 4 | use parking_lot::Mutex; 5 | use rusqlite::{Connection, Transaction}; 6 | use std::{ 7 | fmt::Debug, 8 | ops::DerefMut, 9 | path::Path, 10 | sync::Arc, 11 | time::{Instant, SystemTime}, 12 | }; 13 | use tracing::*; 14 | 15 | /// A cache tracker that uses a sqlite database as persistent storage 16 | pub struct SqliteCacheTracker { 17 | conn: Arc>, 18 | mk_cache_entry: F, 19 | } 20 | 21 | impl Debug for SqliteCacheTracker { 22 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 23 | f.debug_struct("SqliteCacheTracker").finish() 24 | } 25 | } 26 | 27 | const INIT: &str = r#" 28 | PRAGMA journal_mode = WAL; 29 | PRAGMA synchronous = OFF; 30 | CREATE TABLE IF NOT EXISTS accessed ( 31 | id INTEGER PRIMARY KEY, 32 | time INTEGER 33 | ); 34 | "#; 35 | 36 | fn init_db(conn: &mut Connection) -> crate::Result<()> { 37 | conn.execute_batch(INIT).ctx("initialising CT DB")?; 38 | Ok(()) 39 | } 40 | 41 | /// execute a statement in a readonly transaction 42 | /// nested transactions are not allowed here. 43 | pub(crate) fn in_ro_txn( 44 | conn: &mut Connection, 45 | f: impl FnOnce(&Transaction) -> crate::Result, 46 | ) -> crate::Result { 47 | let txn = conn.transaction().ctx("beginning CT ro transaction")?; 48 | f(&txn) 49 | } 50 | 51 | fn attempt_txn( 52 | mut conn: impl DerefMut, 53 | f: impl FnOnce(&Transaction) -> crate::Result, 54 | ) { 55 | let result = conn 56 | .transaction() 57 | .ctx("beginning CT transaction") 58 | .and_then(|txn| { 59 | f(&txn)?; 60 | Ok(txn) 61 | }) 62 | .and_then(|txn| txn.commit().ctx("committing CT transaction")); 63 | if let Err(cause) = result { 64 | tracing::warn!("Unable to execute transaction: {}", cause); 65 | } 66 | } 67 | 68 | fn attempt_ro_txn( 69 | mut conn: impl DerefMut, 70 | f: impl FnOnce(&Transaction) -> crate::Result, 71 | ) { 72 | let result = in_ro_txn(&mut conn, f); 73 | if let Err(cause) = result { 74 | tracing::warn!("Unable to execute readonly transaction {}", cause); 75 | } 76 | } 77 | 78 | fn set_accessed(txn: &Transaction, id: i64, accessed: i64) -> crate::Result<()> { 79 | txn.prepare_cached("REPLACE INTO accessed (id, time) VALUES (?, ?)") 80 | .ctx("setting accessed (prep)")? 81 | .execute([id, accessed]) 82 | .ctx("setting accessed")?; 83 | Ok(()) 84 | } 85 | 86 | fn get_accessed_bulk( 87 | txn: &Transaction, 88 | result: &mut FnvHashMap>, 89 | ) -> crate::Result<()> { 90 | let mut stmt = txn 91 | .prepare_cached("SELECT id, time FROM accessed") 92 | .ctx("getting accessed (prep)")?; 93 | let accessed = stmt 94 | .query_map([], |row| { 95 | let id: i64 = row.get(0)?; 96 | let time: i64 = row.get(1)?; 97 | Ok((id, time)) 98 | }) 99 | .ctx("getting accessed")?; 100 | // we have no choice but to run through all values in accessed. 101 | for row in accessed.flatten() { 102 | // only add if a row already exists 103 | let (id, time) = row; 104 | if let Some(value) = result.get_mut(&id) { 105 | *value = Some(time); 106 | } 107 | } 108 | Ok(()) 109 | } 110 | 111 | fn delete_id(txn: &Transaction, id: i64) -> crate::Result<()> { 112 | txn.prepare_cached("DELETE FROM accessed WHERE id = ?") 113 | .ctx("deleting from CT (prep)")? 114 | .execute([id]) 115 | .ctx("deleting from CT")?; 116 | Ok(()) 117 | } 118 | 119 | fn get_ids(txn: &Transaction) -> crate::Result> { 120 | let ids = txn 121 | .prepare_cached("SELECT id FROM accessed") 122 | .ctx("getting IDs (prep)")? 123 | .query_map([], |row| row.get(0)) 124 | .ctx("getting IDs")? 125 | .collect::>>() 126 | .ctx("getting IDs (transform)")?; 127 | Ok(ids) 128 | } 129 | 130 | impl SqliteCacheTracker 131 | where 132 | F: Fn(i64, BlockInfo) -> Option, 133 | { 134 | pub fn memory(mk_cache_entry: F) -> crate::Result { 135 | let mut conn = Connection::open_in_memory().ctx("opening in-memory CT DB")?; 136 | init_db(&mut conn)?; 137 | Ok(Self { 138 | conn: Arc::new(Mutex::new(conn)), 139 | mk_cache_entry, 140 | }) 141 | } 142 | 143 | pub fn open(path: impl AsRef, mk_cache_entry: F) -> crate::Result { 144 | let mut conn = Connection::open(path).ctx("opening CT DB")?; 145 | init_db(&mut conn)?; 146 | Ok(Self { 147 | conn: Arc::new(Mutex::new(conn)), 148 | mk_cache_entry, 149 | }) 150 | } 151 | } 152 | 153 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] 154 | struct SortKey { 155 | time: Option, 156 | id: i64, 157 | } 158 | 159 | impl SortKey { 160 | fn new(time: Option, id: i64) -> Self { 161 | Self { time, id } 162 | } 163 | } 164 | 165 | impl CacheTracker for SqliteCacheTracker 166 | where 167 | F: Fn(i64, BlockInfo) -> Option + Send + Sync, 168 | { 169 | #[allow(clippy::needless_collect)] 170 | fn blocks_accessed(&self, blocks: Vec) { 171 | let accessed = SystemTime::now() 172 | .duration_since(SystemTime::UNIX_EPOCH) 173 | .unwrap_or_default(); 174 | let nanos = accessed.as_nanos() as i64; 175 | let items = blocks 176 | .iter() 177 | .filter_map(|block| (self.mk_cache_entry)(nanos, *block).map(|nanos| (block.id, nanos))) 178 | .collect::>(); 179 | if items.is_empty() { 180 | return; 181 | } 182 | attempt_txn(self.conn.lock(), |txn| { 183 | for (id, accessed) in items { 184 | set_accessed(txn, id, accessed as i64)?; 185 | } 186 | Ok(()) 187 | }); 188 | } 189 | 190 | fn blocks_deleted(&self, blocks: Vec) { 191 | attempt_txn(self.conn.lock(), |txn| { 192 | for block in blocks { 193 | delete_id(txn, block.id)?; 194 | } 195 | Ok(()) 196 | }); 197 | } 198 | 199 | fn retain_ids(&self, ids: &[i64]) { 200 | let ids = ids.iter().cloned().collect::>(); 201 | attempt_txn(self.conn.lock(), move |txn| { 202 | for id in get_ids(txn)? { 203 | if !&ids.contains(&id) { 204 | delete_id(txn, id)?; 205 | } 206 | } 207 | Ok(()) 208 | }); 209 | } 210 | 211 | fn sort_ids(&self, ids: &mut [i64]) { 212 | attempt_ro_txn(self.conn.lock(), |txn| { 213 | let t0 = Instant::now(); 214 | let mut accessed = ids 215 | .iter() 216 | .map(|id| (*id, None)) 217 | .collect::>>(); 218 | get_accessed_bulk(txn, &mut accessed)?; 219 | debug!("getting access times took {}", t0.elapsed().as_micros()); 220 | let t0 = Instant::now(); 221 | ids.sort_by_cached_key(|id| SortKey::new(accessed.get(id).cloned().flatten(), *id)); 222 | debug!("sorting ids took {}", t0.elapsed().as_micros()); 223 | Ok(()) 224 | }); 225 | } 226 | 227 | fn has_persistent_state(&self) -> bool { 228 | true 229 | } 230 | } 231 | 232 | #[test] 233 | fn sort_key_sort_order() { 234 | assert!( 235 | SortKey::new(None, i64::max_value()) 236 | < SortKey::new(Some(i64::min_value()), i64::min_value()) 237 | ); 238 | } 239 | -------------------------------------------------------------------------------- /src/cache/tests.rs: -------------------------------------------------------------------------------- 1 | use super::SortKey; 2 | use std::time::Duration; 3 | 4 | #[cfg(test)] 5 | #[test] 6 | fn sort_key_sort_order() { 7 | assert!( 8 | SortKey::new(None, i64::max_value()) 9 | < SortKey::new(Some(Duration::default()), i64::min_value()) 10 | ); 11 | } 12 | -------------------------------------------------------------------------------- /src/cidbytes.rs: -------------------------------------------------------------------------------- 1 | use libipld::cid::{self, Cid}; 2 | use rusqlite::{ 3 | types::ToSqlOutput, 4 | types::{FromSql, FromSqlError, ValueRef}, 5 | ToSql, 6 | }; 7 | use std::{convert::TryFrom, io::Cursor}; 8 | 9 | /// This is sufficient for 32 byte hashes like sha2-256, but not for exotic hashes. 10 | const MAX_SIZE: usize = 39; 11 | 12 | /// a representation of a cid that implements AsRef<[u8]> 13 | #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] 14 | pub(crate) struct CidBytes { 15 | size: u8, 16 | data: [u8; MAX_SIZE], 17 | } 18 | 19 | impl CidBytes { 20 | fn len(&self) -> usize { 21 | self.size as usize 22 | } 23 | } 24 | 25 | impl AsRef<[u8]> for CidBytes { 26 | fn as_ref(&self) -> &[u8] { 27 | &self.data[0..self.len()] 28 | } 29 | } 30 | 31 | impl Default for CidBytes { 32 | fn default() -> Self { 33 | Self { 34 | size: 0, 35 | data: [0; MAX_SIZE], 36 | } 37 | } 38 | } 39 | 40 | impl TryFrom<&Cid> for CidBytes { 41 | type Error = cid::Error; 42 | 43 | fn try_from(value: &Cid) -> Result { 44 | let mut res = Self::default(); 45 | value.write_bytes(&mut res)?; 46 | Ok(res) 47 | } 48 | } 49 | 50 | impl TryFrom<&CidBytes> for Cid { 51 | type Error = cid::Error; 52 | 53 | fn try_from(value: &CidBytes) -> Result { 54 | Cid::read_bytes(Cursor::new(value.as_ref())) 55 | } 56 | } 57 | 58 | impl TryFrom<&[u8]> for CidBytes { 59 | type Error = cid::Error; 60 | 61 | fn try_from(value: &[u8]) -> Result { 62 | let mut res = CidBytes::default(); 63 | if value.len() < 64 { 64 | res.size = value.len() as u8; 65 | res.data[0..value.len()].copy_from_slice(value); 66 | Ok(res) 67 | } else { 68 | Err(cid::Error::ParsingError) 69 | } 70 | } 71 | } 72 | 73 | impl ToSql for CidBytes { 74 | fn to_sql(&self) -> rusqlite::Result> { 75 | Ok(ToSqlOutput::Borrowed(ValueRef::Blob(self.as_ref()))) 76 | } 77 | } 78 | 79 | impl FromSql for CidBytes { 80 | fn column_result(value: ValueRef<'_>) -> rusqlite::types::FromSqlResult { 81 | CidBytes::try_from(value.as_blob()?).map_err(|_| FromSqlError::InvalidType) 82 | } 83 | } 84 | 85 | impl std::io::Write for CidBytes { 86 | fn write(&mut self, buf: &[u8]) -> std::io::Result { 87 | let len = self.len(); 88 | let cap: usize = MAX_SIZE - len; 89 | let n = cap.min(buf.len()); 90 | self.data[len..len + n].copy_from_slice(&buf[0..n]); 91 | self.size += n as u8; 92 | Ok(n) 93 | } 94 | 95 | fn flush(&mut self) -> std::io::Result<()> { 96 | Ok(()) 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/db.rs: -------------------------------------------------------------------------------- 1 | //! A sqlite based block store for content-addressed data that tries to do as much as possible 2 | //! in the database. 3 | //! 4 | //! This module is for all interactions with the database, so all SQL statements go in here. 5 | //! 6 | //! Tables: 7 | //! cids: mapping from cid (blob < 64 bytes) to id (u64) 8 | //! refs: m:n mapping from block ids to their children 9 | //! blocks: the actual data for blocks, keyed by block id 10 | //! cids can exist in the system without having data associated with them! 11 | //! alias: table that contains named pins for roots of graphs that should not be deleted by gc 12 | //! you can alias incomplete or in fact non-existing data. It is not necessary for a pinned dag 13 | //! to be complete. 14 | use libipld::{Cid, DefaultParams}; 15 | use rusqlite::{ 16 | config::DbConfig, 17 | params, params_from_iter, 18 | types::FromSql, 19 | Connection, 20 | Error::{QueryReturnedNoRows, SqliteFailure}, 21 | ErrorCode::DatabaseBusy, 22 | OptionalExtension, ToSql, Transaction, TransactionBehavior, 23 | }; 24 | use std::{ 25 | collections::{BTreeSet, HashSet}, 26 | convert::TryFrom, 27 | time::Duration, 28 | time::Instant, 29 | }; 30 | 31 | use crate::{ 32 | cache::{BlockInfo, CacheTracker}, 33 | cidbytes::CidBytes, 34 | error::Context, 35 | BlockStoreError, SizeTargets, StoreStats, Synchronous, 36 | }; 37 | use anyhow::Context as _; 38 | use itertools::Itertools; 39 | 40 | const PRAGMAS: &str = r#" 41 | -- this must be done before creating the first table, otherwise it has no effect 42 | PRAGMA auto_vacuum = 2; 43 | -- this must be done before changing the database via the CLI! 44 | PRAGMA foreign_keys = ON; 45 | PRAGMA journal_mode = WAL; 46 | PRAGMA wal_checkpoint(TRUNCATE); 47 | -- I tried different even larger values for this. Did not make a difference. 48 | PRAGMA page_size = 4096; 49 | "#; 50 | 51 | const TABLES: &[(&str, &str)] = &[ 52 | ( 53 | "cids", 54 | "CREATE TABLE cids ( \ 55 | id INTEGER PRIMARY KEY AUTOINCREMENT, \ 56 | cid BLOB UNIQUE NOT NULL \ 57 | )", 58 | ), 59 | ( 60 | "refs", 61 | "CREATE TABLE refs ( \ 62 | parent_id INTEGER NOT NULL, \ 63 | child_id INTEGER NOT NULL, \ 64 | PRIMARY KEY(parent_id,child_id) \ 65 | CONSTRAINT fk_parent_block \ 66 | FOREIGN KEY (parent_id) \ 67 | REFERENCES blocks(block_id) \ 68 | ON DELETE CASCADE \ 69 | CONSTRAINT fk_child_id \ 70 | FOREIGN KEY (child_id) \ 71 | REFERENCES cids(id) \ 72 | ON DELETE RESTRICT \ 73 | )", 74 | ), 75 | ( 76 | "blocks", 77 | "CREATE TABLE blocks ( \ 78 | block_id INTEGER PRIMARY KEY, \ 79 | block BLOB NOT NULL, \ 80 | CONSTRAINT fk_block_cid \ 81 | FOREIGN KEY (block_id) \ 82 | REFERENCES cids(id) \ 83 | ON DELETE CASCADE \ 84 | )", 85 | ), 86 | ( 87 | "aliases", 88 | "CREATE TABLE aliases ( \ 89 | name blob NOT NULL PRIMARY KEY, \ 90 | block_id INTEGER NOT NULL, \ 91 | CONSTRAINT fk_block_id \ 92 | FOREIGN KEY (block_id) \ 93 | REFERENCES cids(id) \ 94 | ON DELETE CASCADE \ 95 | )", 96 | ), 97 | ( 98 | "temp_pins", 99 | "CREATE TABLE temp_pins ( \ 100 | id INTEGER NOT NULL, \ 101 | block_id INTEGER NOT NULL, \ 102 | PRIMARY KEY(id,block_id) \ 103 | CONSTRAINT fk_block_id \ 104 | FOREIGN KEY (block_id) \ 105 | REFERENCES cids(id) \ 106 | ON DELETE RESTRICT \ 107 | )", 108 | ), 109 | ( 110 | "stats", 111 | "CREATE TABLE stats ( \ 112 | count INTEGER NOT NULL, \ 113 | size INTEGER NOT NULL \ 114 | )", 115 | ), 116 | ]; 117 | 118 | const INIT: &str = r#" 119 | PRAGMA user_version = 2; 120 | 121 | CREATE INDEX IF NOT EXISTS idx_refs_child_id 122 | ON refs (child_id); 123 | 124 | CREATE INDEX IF NOT EXISTS idx_aliases_block_id 125 | ON aliases (block_id); 126 | 127 | CREATE INDEX IF NOT EXISTS idx_temp_pins_block_id 128 | ON temp_pins (block_id); 129 | "#; 130 | 131 | const CLEANUP_TEMP_PINS: &str = r#" 132 | -- delete temp aliases that were not dropped because of crash 133 | DELETE FROM temp_pins; 134 | "#; 135 | 136 | fn user_version(txn: &Transaction) -> rusqlite::Result { 137 | Ok(txn 138 | .pragma_query_value(None, "user_version", |row| row.get(0)) 139 | .optional()? 140 | .unwrap_or_default()) 141 | } 142 | 143 | fn table_exists(txn: &Transaction, table: &str) -> rusqlite::Result { 144 | let num: u32 = txn 145 | .prepare_cached("SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1;")? 146 | .query_row([table], |row| row.get(0))?; 147 | Ok(num > 0) 148 | } 149 | 150 | macro_rules! c { 151 | (DEBUG $t:literal => $e:expr) => {{ 152 | tracing::debug!($t); 153 | $e.ctx(concat!($t, " (line ", line!(), ")"))? 154 | }}; 155 | ($t:literal => $e:expr) => { 156 | $e.ctx(concat!($t, " (line ", line!(), ")"))? 157 | }; 158 | } 159 | 160 | fn get_id(txn: &Transaction, cid: impl ToSql) -> rusqlite::Result> { 161 | txn.prepare_cached("SELECT id FROM cids WHERE cid=?")? 162 | .query_row([cid], |row| row.get(0)) 163 | .optional() 164 | } 165 | 166 | #[derive(Debug, PartialEq)] 167 | pub(crate) struct BlockStats { 168 | count: u64, 169 | size: u64, 170 | } 171 | 172 | impl From for BlockStats { 173 | fn from(s: StoreStats) -> Self { 174 | Self { 175 | count: s.count, 176 | size: s.size, 177 | } 178 | } 179 | } 180 | 181 | /// returns the number and size of blocks, excluding orphaned blocks, computed from scratch 182 | pub(crate) fn compute_store_stats(txn: &Transaction) -> crate::Result { 183 | let (count, size): (i64, i64) = txn 184 | .prepare( 185 | "SELECT COUNT(id), COALESCE(SUM(LENGTH(block)), 0) \ 186 | FROM cids, blocks ON id = block_id", 187 | ) 188 | .ctx("computing store stats (prep)")? 189 | .query_row([], |row| Ok((row.get(0)?, row.get(1)?))) 190 | .ctx("computing store stats")?; 191 | Ok(BlockStats { 192 | count: u64::try_from(count).ctx("computed count")?, 193 | size: u64::try_from(size).ctx("computed size")?, 194 | }) 195 | } 196 | 197 | /// recomputes the store stats (should be done at startup to prevent unbounded drift) 198 | pub(crate) fn recompute_store_stats(conn: &mut Connection) -> crate::Result<()> { 199 | let _span = tracing::debug_span!("check stats").entered(); 200 | // first a read-only transaction to determine the true base 201 | let (stats, truth) = in_txn(conn, None, false, |txn| { 202 | let stats = get_store_stats(txn)?; 203 | let truth = compute_store_stats(txn)?; 204 | Ok((stats, truth)) 205 | })?; 206 | 207 | tracing::debug!("applying findings"); 208 | // now compute the correction based on what the above snapshot has calculated 209 | in_txn(conn, None, true, move |txn| { 210 | let stats2 = BlockStats::from(get_store_stats(txn)?); 211 | let new_stats = BlockStats { 212 | count: stats2.count - stats.count + truth.count, 213 | size: stats2.size - stats.size + truth.size, 214 | }; 215 | if new_stats != stats2 { 216 | tracing::info!( 217 | "correcting usage stats from {:?} to {:?}", 218 | stats2, 219 | new_stats 220 | ); 221 | txn.prepare_cached("UPDATE stats SET count = ?, size = ?") 222 | .ctx("updating stats (prep)")? 223 | .execute([new_stats.count, new_stats.size]) 224 | .ctx("updating stats")?; 225 | } else { 226 | tracing::debug!("usage stats were correct"); 227 | } 228 | Ok(()) 229 | })?; 230 | 231 | Ok(()) 232 | } 233 | 234 | /// returns the number and size of blocks, excluding orphaned blocks, from the stats table 235 | pub(crate) fn get_store_stats(txn: &Transaction) -> crate::Result { 236 | let (count, size): (i64, i64) = txn 237 | .prepare_cached("SELECT count, size FROM stats LIMIT 1") 238 | .ctx("getting store stats (prep)")? 239 | .query_row([], |row| Ok((row.get(0)?, row.get(1)?))) 240 | .ctx("getting store stats")?; 241 | let page_size = txn 242 | .pragma_query_value(None, "page_size", |r| r.get::<_, i64>(0)) 243 | .ctx("getting page_size")?; 244 | let used_pages = txn 245 | .pragma_query_value(None, "page_count", |r| r.get::<_, i64>(0)) 246 | .ctx("getting page_count")?; 247 | let free_pages = txn 248 | .pragma_query_value(None, "freelist_count", |r| r.get::<_, i64>(0)) 249 | .ctx("getting freelist_count")?; 250 | let result = StoreStats { 251 | count: u64::try_from(count).ctx("getting count")?, 252 | size: u64::try_from(size).ctx("getting size")?, 253 | page_size: u64::try_from(page_size).ctx("getting page_size")?, 254 | used_pages: u64::try_from(used_pages).ctx("getting used_pages")?, 255 | free_pages: u64::try_from(free_pages).ctx("getting free_pages")?, 256 | }; 257 | Ok(result) 258 | } 259 | 260 | fn get_or_create_id(txn: &Transaction, cid: impl ToSql) -> rusqlite::Result { 261 | txn.prepare_cached( 262 | "INSERT INTO cids (cid) VALUES (?) ON CONFLICT DO UPDATE SET cid=cid RETURNING id", 263 | )? 264 | .query_row([cid], |row| row.get(0)) 265 | } 266 | 267 | // This is the plan: 268 | // 269 | // First figure out in a read transaction which blocks are not referenced; ideally get an iterator 270 | // for these and stop iterating when enough sufficiently low-prio blocks have been found as seen by 271 | // the CacheTracker. In a second step delete from least important upwards, block by block, in a 272 | // write transaction that first checks whether that particular block is still unreferenced. Then 273 | // at the end perform an incremental or full vacuum, depending on config or fragmentation state. 274 | pub(crate) fn incremental_gc( 275 | conn: &mut Connection, 276 | min_blocks: usize, 277 | max_duration: Duration, 278 | size_targets: SizeTargets, 279 | cache_tracker: &impl CacheTracker, 280 | ) -> crate::Result { 281 | let _span = tracing::debug_span!("GC", %min_blocks, ?max_duration).entered(); 282 | 283 | // get the store stats from the stats table: 284 | // if we don't exceed any of the size targets, there is nothing to do 285 | let mut stats = in_txn(conn, None, false, get_store_stats)?; 286 | if !size_targets.exceeded(&stats) { 287 | tracing::info!( 288 | blocks = display(stats.count), 289 | size = display(stats.size), 290 | "nothing to do" 291 | ); 292 | return Ok(true); 293 | } 294 | 295 | let t0 = Instant::now(); 296 | 297 | let mut ids = in_txn( 298 | conn, 299 | Some(("getting unreferenced CIDs", Duration::from_secs(3))), 300 | false, 301 | |txn| { 302 | // find all ids that are not pinned (directly or indirectly) 303 | let mut id_query = txn 304 | .prepare_cached( 305 | r#" 306 | WITH RECURSIVE 307 | descendant_of(id) AS 308 | ( 309 | SELECT block_id FROM aliases UNION SELECT block_id FROM temp_pins 310 | UNION 311 | SELECT child_id FROM refs, descendant_of ON id = parent_id 312 | ) 313 | SELECT id FROM cids 314 | WHERE id NOT IN descendant_of; 315 | "#, 316 | ) 317 | .ctx("finding GC blocks (prep)")?; 318 | // log execution time of the non-interruptible query that computes the set of ids to delete 319 | let ret = id_query 320 | .query_map([], |row| row.get(0)) 321 | .ctx("finding GC blocks")? 322 | .collect::>>() 323 | .ctx("reading GC block ID")?; 324 | Ok(ret) 325 | }, 326 | )?; 327 | 328 | // give the cache tracker the opportunity to sort the non-pinned ids by value 329 | let span = tracing::debug_span!("sorting CIDs").entered(); 330 | cache_tracker.sort_ids(&mut ids); 331 | drop(span); 332 | 333 | let mut n = 0; 334 | let mut ret_val = true; 335 | for id in ids.into_iter() { 336 | if n >= min_blocks && t0.elapsed() > max_duration { 337 | tracing::info!(removed = n, "stopping due to time constraint"); 338 | ret_val = false; 339 | break; 340 | } 341 | if !size_targets.exceeded(&stats) { 342 | tracing::info!(removed = n, "finished, target reached"); 343 | break; 344 | } 345 | let res = in_txn( 346 | conn, 347 | Some(("", Duration::from_millis(100))), 348 | true, 349 | move |txn| { 350 | // get block size and check whether now referenced 351 | let mut block_size_stmt = c!("getting GC block (prep)" => txn.prepare_cached( 352 | r#" 353 | WITH RECURSIVE 354 | ancestor(id) AS ( 355 | SELECT ? 356 | UNION -- must not use UNION ALL in case of pathologically linked dags 357 | SELECT parent_id FROM refs, ancestor ON id = child_id 358 | ), 359 | names AS (SELECT name FROM ancestor, aliases ON id = block_id) 360 | SELECT LENGTH(block), cid, (SELECT count(*) FROM names) 361 | FROM cids, blocks ON id = block_id WHERE id = ?; 362 | "#, 363 | )); 364 | let mut update_stats_stmt = c!("updating GC stats (prep)" => 365 | txn.prepare_cached("UPDATE stats SET count = count - 1, size = size - ?")); 366 | let mut delete_stmt = c!("deleting GC block (prep)" => txn.prepare_cached("DELETE FROM blocks WHERE block_id = ?")); 367 | 368 | tracing::trace!("deleting id {}", id); 369 | 370 | let block_size: Option<(i64, CidBytes, i64)> = block_size_stmt 371 | .query_row([id, id], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?))) 372 | .optional() 373 | .ctx("getting GC block")?; 374 | tracing::trace!(block_size = ?&block_size); 375 | if let Some((block_size, cid, names)) = block_size { 376 | if names != 0 { 377 | // block is referenced again 378 | return Ok(None); 379 | } 380 | let cid = Cid::try_from(&cid)?; 381 | let len = c!("getting GC block size" => usize::try_from(block_size)); 382 | c!("updating GC stats" => update_stats_stmt.execute([block_size])); 383 | tracing::trace!("stats updated"); 384 | c!("deleting GC block" => delete_stmt.execute(params![id])); 385 | Ok(Some((block_size, cid, len))) 386 | } else { 387 | Ok(None) 388 | } 389 | }, 390 | )?; 391 | if let Some((size, cid, len)) = res { 392 | stats.count -= 1; 393 | stats.size -= size as u64; 394 | cache_tracker.blocks_deleted(vec![BlockInfo::new(id, &cid, len)]); 395 | n += 1; 396 | } 397 | } 398 | 399 | if n > 0 { 400 | // the above only removed the blocks, now we need to clean up those cids that we don’t 401 | // need anymore 402 | 403 | // doing this in one transaction may block the DB for too long, so get the IDs first and then 404 | // remove them in batches 405 | let ids = in_txn( 406 | conn, 407 | Some(("getting IDs to clean up", Duration::from_secs(5))), 408 | false, 409 | |txn| { 410 | let mut stmt = c!("getting IDs (prep)" => txn.prepare_cached( 411 | // refs.parent_id is not a blocker because if we delete this it means that 412 | // the block is gone 413 | "SELECT id FROM cids WHERE \ 414 | id NOT IN (SELECT block_id FROM blocks) AND \ 415 | id NOT IN (SELECT block_id FROM aliases) AND \ 416 | id NOT IN (SELECT child_id FROM refs) AND \ 417 | id NOT IN (SELECT block_id FROM temp_pins)", 418 | )); 419 | let ids = c!("getting IDs" => stmt.query_map([], |row| row.get(0))); 420 | ids.collect::, _>>().ctx("ids") 421 | }, 422 | )?; 423 | 424 | tracing::debug!("cleaning up {} IDs", ids.len()); 425 | 426 | // this number is linked to the prepared query below! 427 | const BATCH_SIZE: usize = 10; 428 | for ids in &ids.into_iter().chunks(BATCH_SIZE) { 429 | let mut v = Vec::with_capacity(BATCH_SIZE); 430 | v.extend(ids); 431 | if v.len() == BATCH_SIZE { 432 | in_txn( 433 | conn, 434 | Some(("cleaning up CIDs", Duration::from_millis(100))), 435 | false, 436 | move |txn| { 437 | let mut del_cid = c!("deleting CIDs (prep)" => txn.prepare_cached( 438 | "DELETE FROM cids WHERE \ 439 | id in (VALUES (?), (?), (?), (?), (?), (?), (?), (?), (?), (?)) AND \ 440 | id NOT IN (SELECT block_id FROM blocks) AND \ 441 | id NOT IN (SELECT block_id FROM aliases) AND \ 442 | id NOT IN (SELECT child_id FROM refs) AND \ 443 | id NOT IN (SELECT block_id FROM temp_pins)" 444 | )); 445 | c!("deleting CIDs" => del_cid.execute(params_from_iter(v.iter()))); 446 | Ok(()) 447 | }, 448 | )?; 449 | } else { 450 | in_txn(conn, None, false, move |txn| { 451 | let mut stmt = c!("deleting CIDs (prep)" => txn.prepare_cached( 452 | "DELETE FROM cids WHERE \ 453 | id = ? AND \ 454 | id NOT IN (SELECT block_id FROM blocks) AND \ 455 | id NOT IN (SELECT block_id FROM aliases) AND \ 456 | id NOT IN (SELECT child_id FROM refs) AND \ 457 | id NOT IN (SELECT block_id FROM temp_pins)" 458 | )); 459 | for id in v.iter() { 460 | c!("deleting CIDs" => stmt.execute([id])); 461 | } 462 | Ok(()) 463 | })?; 464 | } 465 | } 466 | } 467 | 468 | Ok(ret_val) 469 | } 470 | 471 | pub(crate) fn delete_temp_pin(txn: &Transaction, pin: i64) -> crate::Result<()> { 472 | let mut stmt = 473 | c!("deleting temp_pin (prep)" => txn.prepare_cached("DELETE FROM temp_pins WHERE id = ?")); 474 | c!("deleting temp_pin" => stmt.execute([pin])); 475 | Ok(()) 476 | } 477 | 478 | pub(crate) fn extend_temp_pin( 479 | txn: &Transaction, 480 | mut id: i64, 481 | links: Vec, 482 | ) -> crate::Result { 483 | for link in links { 484 | let block_id = c!("getting ID for temp pinning" => get_or_create_id(txn, link)); 485 | // it is important that the above is a write action, because otherwise a rollback may 486 | // invalidate the id stored in the TempPin in the below 487 | id = add_temp_pin(txn, block_id, id).context("extending temp_pin")?; 488 | } 489 | Ok(id) 490 | } 491 | 492 | fn add_temp_pin(txn: &Transaction, block_id: i64, pin: i64) -> crate::Result { 493 | if pin > 0 { 494 | txn.prepare_cached("INSERT OR IGNORE INTO temp_pins (id, block_id) VALUES (?, ?)") 495 | .ctx("extending existing temp_pin (prep)")? 496 | .execute([pin, block_id]) 497 | .ctx("extending existing temp_pin")?; 498 | Ok(pin) 499 | } else { 500 | // we must not reuse IDs, but sqlite takes care of transactionality here 501 | Ok(txn 502 | .prepare_cached( 503 | "INSERT INTO temp_pins (id, block_id) VALUES \ 504 | ((SELECT coalesce(max(id), 0) FROM temp_pins) + 1, ?) RETURNING id", 505 | ) 506 | .ctx("creating new temp_pin (prep)")? 507 | .query_row([block_id], |row| row.get(0)) 508 | .ctx("creating new temp_pin")?) 509 | } 510 | } 511 | 512 | pub(crate) struct PutBlockResult { 513 | /// id for the cid 514 | pub(crate) id: i64, 515 | /// true if the block already existed 516 | pub(crate) block_exists: bool, 517 | } 518 | 519 | pub(crate) fn put_block( 520 | txn: &Transaction, 521 | key: &C, 522 | data: &[u8], 523 | links: impl IntoIterator, 524 | mut pin: Option, 525 | ) -> crate::Result<(Option, PutBlockResult)> { 526 | // this is important: we need write lock on the table so that add_temp_pin is never rolled back 527 | let block_id = c!("getting put_block ID" => get_or_create_id(txn, key)); 528 | let block_exists = txn 529 | .prepare_cached("SELECT COUNT(*) FROM blocks WHERE block_id = ?") 530 | .ctx("checking put_block (prep)")? 531 | .query_row([block_id], |row| Ok(row.get::<_, i64>(0)? == 1)) 532 | .ctx("checking put_block")?; 533 | if !block_exists { 534 | // add the block itself 535 | txn.prepare_cached("INSERT INTO blocks (block_id, block) VALUES (?, ?)") 536 | .ctx("adding put_block (prep)")? 537 | .execute(params![block_id, &data]) 538 | .ctx("adding put_block")?; 539 | 540 | // update the stats 541 | txn.prepare_cached("UPDATE stats SET count = count + 1, size = size + ?") 542 | .ctx("updating put_block stats (prep)")? 543 | .execute([data.len() as i64]) 544 | .ctx("updating put_block stats")?; 545 | 546 | // insert the links 547 | let mut insert_ref = txn 548 | .prepare_cached("INSERT INTO refs (parent_id, child_id) VALUES (?,?)") 549 | .ctx("adding put_block link (prep)")?; 550 | for link in links { 551 | let child_id: i64 = c!("getting put_block link ID" => get_or_create_id(txn, link)); 552 | insert_ref 553 | .execute([block_id, child_id]) 554 | .ctx("adding put_block link")?; 555 | } 556 | } 557 | if let Some(pin) = pin.as_mut() { 558 | // create a temporary alias for the block, even if it already exists 559 | // this is only safe because get_or_create_id ensured that we have write lock on the table 560 | *pin = add_temp_pin(txn, block_id, *pin).context("adding put_block temp_pin")?; 561 | } 562 | Ok(( 563 | pin, 564 | PutBlockResult { 565 | id: block_id, 566 | block_exists, 567 | }, 568 | )) 569 | } 570 | 571 | /// Get a block 572 | pub(crate) fn get_block( 573 | txn: &Transaction, 574 | cid: impl ToSql, 575 | ) -> crate::Result)>> { 576 | let id = c!("getting get_block ID" => get_id(txn, cid)); 577 | Ok(if let Some(id) = id { 578 | txn.prepare_cached("SELECT block FROM blocks WHERE block_id = ?") 579 | .ctx("getting get_block (prep)")? 580 | .query_row([id], |row| row.get(0)) 581 | .optional() 582 | .ctx("getting get_block")? 583 | .map(|b| (id, b)) 584 | } else { 585 | None 586 | }) 587 | } 588 | 589 | /// Check if we have a block 590 | pub(crate) fn has_block(txn: &Transaction, cid: impl ToSql) -> crate::Result { 591 | Ok(txn 592 | .prepare_cached("SELECT 1 FROM blocks, cids ON block_id = id WHERE cid = ?") 593 | .ctx("getting has_block (prep)")? 594 | .query_row([cid], |_| Ok(())) 595 | .optional() 596 | .ctx("getting has_block")? 597 | .is_some()) 598 | } 599 | 600 | /// Check if we have a cid 601 | pub(crate) fn has_cid(txn: &Transaction, cid: impl ToSql) -> crate::Result { 602 | Ok(txn 603 | .prepare_cached("SELECT 1 FROM cids WHERE cids.cid = ?") 604 | .ctx("getting has_cid (prep)")? 605 | .query_row([cid], |_| Ok(())) 606 | .optional() 607 | .ctx("getting has_cid")? 608 | .is_some()) 609 | } 610 | 611 | /// get the descendants of a cid. 612 | /// This just uses the refs table, so it does not ensure that we actually have data for each cid. 613 | /// The value itself is included. 614 | pub(crate) fn get_descendants( 615 | txn: &Transaction, 616 | cid: C, 617 | ) -> crate::Result> { 618 | let res = txn 619 | .prepare_cached( 620 | r#" 621 | WITH RECURSIVE 622 | descendant_of(id) AS 623 | ( 624 | SELECT id FROM cids WHERE cid = ? 625 | UNION 626 | SELECT child_id FROM refs, descendant_of ON id = parent_id 627 | ) 628 | -- retrieve corresponding cids - this is a set because of select distinct 629 | SELECT cid from cids, descendant_of USING (id); 630 | "#, 631 | ) 632 | .ctx("getting descendants (prep)")? 633 | .query_map([cid], |row| row.get(0)) 634 | .ctx("getting descendants")? 635 | .collect::>>() 636 | .ctx("parsing descendants")?; 637 | Ok(res) 638 | } 639 | 640 | /// get the set of descendants of an id for which we do not have the data yet. 641 | /// The value itself is included. 642 | /// It is safe to call this method for a cid we don't have yet. 643 | pub(crate) fn get_missing_blocks( 644 | txn: &Transaction, 645 | cid: C, 646 | ) -> crate::Result> { 647 | let id = c!("getting missing_blocks ID" => get_or_create_id(txn, cid)); 648 | let res = txn 649 | .prepare_cached( 650 | r#" 651 | WITH RECURSIVE 652 | -- find descendants of cid, including the id of the cid itself 653 | desc(id) AS ( 654 | SELECT ? 655 | UNION 656 | SELECT child_id FROM refs, desc ON id = parent_id 657 | ), 658 | -- find orphaned ids 659 | orphaned_ids as ( 660 | SELECT id FROM desc LEFT JOIN blocks ON id = block_id WHERE block_id IS NULL 661 | ) 662 | -- retrieve corresponding cids - this is a set because of select distinct 663 | SELECT cid FROM cids, orphaned_ids USING (id) 664 | "#, 665 | ) 666 | .ctx("finding missing_blocks (prep)")? 667 | .query_map([id], |row| row.get(0)) 668 | .ctx("finding missing_blocks")? 669 | .collect::>>() 670 | .ctx("parsing missing_blocks")?; 671 | Ok(res) 672 | } 673 | 674 | pub(crate) fn alias( 675 | txn: &Transaction, 676 | name: &[u8], 677 | key: Option<&C>, 678 | ) -> crate::Result<()> { 679 | if let Some(key) = key { 680 | let id = c!("getting alias ID" => get_or_create_id(txn, key)); 681 | txn.prepare_cached("REPLACE INTO aliases (name, block_id) VALUES (?, ?)") 682 | .ctx("setting alias (prep)")? 683 | .execute(params![name, id]) 684 | .ctx("setting alias")?; 685 | } else { 686 | txn.prepare_cached("DELETE FROM aliases WHERE name = ?") 687 | .ctx("removing alias (prep)")? 688 | .execute([name]) 689 | .ctx("removing alias")?; 690 | } 691 | Ok(()) 692 | } 693 | 694 | pub(crate) fn resolve(txn: &Transaction, name: &[u8]) -> crate::Result> { 695 | txn.prepare_cached("SELECT cid FROM aliases, cids ON block_id = id WHERE name = ?") 696 | .ctx("resolving alias (prep)")? 697 | .query_row([name], |row| row.get(0)) 698 | .optional() 699 | .ctx("resolving alias") 700 | } 701 | 702 | pub(crate) fn reverse_alias( 703 | txn: &Transaction, 704 | cid: impl ToSql, 705 | ) -> crate::Result>>> { 706 | if let Some(id) = c!("getting reverse_alias ID" => get_id(txn, cid)) { 707 | Ok(Some( 708 | txn.prepare_cached( 709 | r#" 710 | WITH RECURSIVE 711 | ancestor_of(id) AS 712 | ( 713 | SELECT ? 714 | UNION 715 | SELECT parent_id FROM refs, ancestor_of ON id = child_id 716 | ) 717 | SELECT name FROM ancestor_of, aliases ON id = block_id; 718 | "#, 719 | ) 720 | .ctx("getting reverse_alias (prep)")? 721 | .query_map([id], |row| row.get::<_, Vec>(0)) 722 | .ctx("getting reverse_alias")? 723 | .collect::>() 724 | .ctx("parsing reverse_alias")?, 725 | )) 726 | } else { 727 | Ok(None) 728 | } 729 | } 730 | 731 | /// get all ids corresponding to cids that we have a block for 732 | pub(crate) fn get_ids(txn: &Transaction) -> crate::Result> { 733 | txn.prepare_cached("SELECT id FROM cids JOIN blocks ON id = block_id") 734 | .ctx("getting IDs (prep)")? 735 | .query_map([], |row| row.get(0)) 736 | .ctx("getting IDs")? 737 | .collect::>>() 738 | .ctx("parsing IDs") 739 | } 740 | 741 | /// get all cids of blocks in the store 742 | pub(crate) fn get_block_cids(txn: &Transaction) -> crate::Result> { 743 | txn.prepare_cached("SELECT cid FROM cids JOIN blocks ON id = block_id") 744 | .ctx("getting all CIDs (prep)")? 745 | .query_map([], |row| row.get(0)) 746 | .ctx("getting all CIDs")? 747 | .collect::>>() 748 | .ctx("parsing all CIDs") 749 | } 750 | 751 | /// get all cids that we know about, even ones that we don't have a block for 752 | pub(crate) fn get_known_cids(txn: &Transaction) -> crate::Result> { 753 | txn.prepare_cached("SELECT cid FROM cids") 754 | .ctx("getting known CIDs (prep)")? 755 | .query_map([], |row| row.get(0)) 756 | .ctx("getting known CIDs")? 757 | .collect::>>() 758 | .ctx("parsing known CIDs") 759 | } 760 | 761 | pub(crate) fn aliases(txn: &Transaction) -> crate::Result, C)>> { 762 | txn.prepare_cached("SELECT name, cid FROM aliases JOIN cids ON id = block_id") 763 | .ctx("getting aliases (prep)")? 764 | .query_map([], |row| Ok((row.get(0)?, row.get(1)?))) 765 | .ctx("getting aliases")? 766 | .collect::, C)>>>() 767 | .ctx("parsing aliases") 768 | } 769 | 770 | pub(crate) fn vacuum(conn: &mut Connection) -> crate::Result<()> { 771 | let _span = tracing::debug_span!("vacuuming the db").entered(); 772 | conn.execute("VACUUM;", []).ctx("running VACUUM")?; 773 | Ok(()) 774 | } 775 | 776 | pub(crate) fn init_pragmas( 777 | conn: &mut Connection, 778 | is_memory: bool, 779 | cache_pages: i64, 780 | ) -> crate::Result<()> { 781 | c!("running pragmas" => conn.execute_batch(PRAGMAS)); 782 | c!("setting cache_pages" => conn.pragma_update(None, "cache_pages", &cache_pages)); 783 | 784 | let foreign_keys: i64 = c!("getting foreign_keys" => conn.pragma_query_value(None, "foreign_keys", |row| row.get(0))); 785 | let journal_mode: String = c!("getting journal_mode" => conn.pragma_query_value(None, "journal_mode", |row| row.get(0))); 786 | let expected_journal_mode = if is_memory { "memory" } else { "wal" }; 787 | assert_eq!(foreign_keys, 1); 788 | assert_eq!(journal_mode, expected_journal_mode); 789 | 790 | conn.set_prepared_statement_cache_capacity(100); 791 | 792 | if !c!("checking foreign keys" => conn.db_config(DbConfig::SQLITE_DBCONFIG_ENABLE_FKEY)) { 793 | Err(BlockStoreError::Other(anyhow::anyhow!( 794 | "foreign keys not enabled in SQLITE library" 795 | ))) 796 | } else { 797 | Ok(()) 798 | } 799 | } 800 | 801 | fn ws(s: impl AsRef) -> String { 802 | let mut r = String::new(); 803 | for (i, t) in s.as_ref().split_whitespace().enumerate() { 804 | if i > 0 { 805 | r.push(' '); 806 | } 807 | r.push_str(t); 808 | } 809 | r.to_lowercase() 810 | .replace("primary_key", "primary key") 811 | // adding AUTOINCREMENT doesn’t really work, but it doesn’t hurt here either 812 | // (only CREATE TABLE switched on AUTOINCREMENT special behaviour, but our CIDs 813 | // don’t need monotonically incrementing IDs, unique is enough) 814 | .replace("primary key autoincrement", "primary key") 815 | .replace("unique", "primary key") 816 | } 817 | 818 | fn ensure_table(txn: &Transaction, name: &str, sql: &str) -> crate::Result { 819 | let mut in_db = c!("getting table (prep)" => txn 820 | .prepare("SELECT sql FROM sqlite_master WHERE type = 'table' and name = ?")); 821 | let in_db = c!("getting table" => in_db 822 | .query_row([name], |row| row.get::<_, String>(0)) 823 | .optional()); 824 | 825 | if let Some(existing) = in_db { 826 | let ex_ws = ws(existing); 827 | let sql_ws = ws(sql); 828 | if ex_ws == sql_ws { 829 | // all good, it has the right definition already 830 | tracing::debug!("table {} is up-to-date", name); 831 | return Ok(false); 832 | } 833 | if let Some(prefix) = ex_ws.find("constraint") { 834 | // definitions must be equal up to the first constraint 835 | if ex_ws[..prefix] != sql_ws[..prefix] { 836 | return Err(BlockStoreError::Other(anyhow::anyhow!( 837 | "cannot update table `{}` due to incompatible data content", 838 | name 839 | ))); 840 | } 841 | } else { 842 | // it is only okay to add constraints before the closing paren 843 | let ex_trim = ex_ws.trim_end_matches(|c| " )".contains(c)); 844 | if sql_ws[..ex_trim.len()] != *ex_trim 845 | || !sql_ws[ex_trim.len()..] 846 | .trim_start_matches(|c| ", ".contains(c)) 847 | .starts_with("constraint") 848 | { 849 | return Err(BlockStoreError::Other(anyhow::anyhow!( 850 | "cannot update table `{}` due to incompatible data content", 851 | name 852 | ))); 853 | } 854 | } 855 | // okay, let’s try the update (knock wood) 856 | tracing::debug!("updating table {}", name); 857 | c!("change table" => txn.execute( 858 | "UPDATE sqlite_master SET sql = ? WHERE type = 'table' and name = ?", 859 | [sql, name] 860 | )); 861 | Ok(true) 862 | } else { 863 | tracing::debug!("creating table {}", name); 864 | c!("creating table" => txn.execute_batch(sql)); 865 | Ok(false) 866 | } 867 | } 868 | 869 | fn ensure_tables(txn: &Transaction, tables: &[(&str, &str)]) -> crate::Result<()> { 870 | let version = c!("schema version" => 871 | txn.pragma_query_value(None, "schema_version", |r| r.get::<_, i64>(0))); 872 | let mut changed = false; 873 | 874 | c!("writable schema" => txn.pragma_update(None, "writable_schema", true)); 875 | tracing::debug!("schema now read-write"); 876 | for (name, sql) in tables { 877 | changed |= 878 | ensure_table(txn, *name, *sql).with_context(|| format!("ensuring table {}", name))?; 879 | } 880 | tracing::debug!("schemas checked"); 881 | 882 | if changed { 883 | let version = version + 1; 884 | c!("increment schema version" => txn.pragma_update(None, "schema_version", version)); 885 | tracing::debug!("schema version updated to {}", version); 886 | } 887 | c!("writable schema" => txn.pragma_update(None, "writable_schema", false)); 888 | tracing::debug!("schema read-only again"); 889 | 890 | if changed { 891 | c!("integrity check" => txn.execute_batch("PRAGMA integrity_check")); 892 | tracing::debug!("integrity check done"); 893 | } 894 | Ok(()) 895 | } 896 | 897 | fn migrate_v0_v1(txn: &Transaction) -> crate::Result<()> { 898 | let num_blocks: i64 = c!("getting block count" => txn.query_row("SELECT COUNT(*) FROM blocks_v0", [], |r| r.get(0))); 899 | let mut stmt = c!("getting old blocks (prep)" => txn.prepare("SELECT * FROM blocks_v0")); 900 | let block_iter = c!("getting old blocks" => 901 | stmt.query_map([], |row| { Ok((row.get::<_, Vec>(2)?, row.get::<_, Vec>(3)?)) })); 902 | for (i, block) in block_iter.enumerate() { 903 | if num_blocks != 0 && i % 1000 == 0 { 904 | tracing::info!( 905 | "converting to new blocks, block {} of {} ({}%)", 906 | i, 907 | num_blocks, 908 | 100 * i / (num_blocks as usize) 909 | ); 910 | } 911 | let (cid, data) = c!("reading blobs" => block); 912 | let cid = Cid::try_from(cid).context("parsing CID")?; 913 | let block = libipld::Block::::new(cid, data).context("creating block")?; 914 | let mut set = BTreeSet::new(); 915 | block 916 | .references(&mut set) 917 | .context("extracting references")?; 918 | put_block( 919 | txn, 920 | &block.cid().to_bytes(), 921 | block.data(), 922 | set.into_iter() 923 | .map(|cid| cid.to_bytes()) 924 | .collect::>(), 925 | None, 926 | )?; 927 | } 928 | tracing::info!("dropping table blocks_v0"); 929 | c!("dropping old blocks" => txn.execute_batch("DROP TABLE blocks_v0")); 930 | drop(stmt); 931 | tracing::info!("migration from v0 to v1 done!"); 932 | Ok(()) 933 | } 934 | 935 | pub(crate) fn init_db( 936 | conn: &mut Connection, 937 | is_memory: bool, 938 | cache_pages: i64, 939 | synchronous: Synchronous, 940 | ) -> crate::Result<()> { 941 | let _span = tracing::debug_span!("initializing db").entered(); 942 | 943 | // can’t be done inside a transaction 944 | init_pragmas(conn, is_memory, cache_pages)?; 945 | conn.pragma_update(None, "synchronous", &synchronous.to_string()) 946 | .ctx("setting Synchronous mode")?; 947 | 948 | c!("foreign keys off" => conn.pragma_update(None, "foreign_keys", false)); 949 | 950 | in_txn(conn, Some(("init", Duration::from_secs(1))), true, |txn| { 951 | let user_version = c!("getting user_version" => user_version(txn)); 952 | if user_version > 2 { 953 | return Err(anyhow::anyhow!( 954 | "found future DB version {} (downgrades are not supported)", 955 | user_version 956 | ) 957 | .into()); 958 | } 959 | 960 | let migrate = 961 | user_version == 0 && c!("checking table `blocks`" => table_exists(txn, "blocks")); 962 | if migrate { 963 | tracing::info!("executing migration from v0 to v1"); 964 | c!("renaming blocks to v0" => txn.execute_batch("ALTER TABLE blocks RENAME TO blocks_v0")); 965 | // drop the old refs table, since the content can be extracted from blocks_v0 966 | c!("dropping refs table" => txn.execute_batch("DROP TABLE IF EXISTS refs")); 967 | } 968 | 969 | ensure_tables(txn, TABLES)?; 970 | c!(DEBUG "creating indexes" => txn.execute_batch(INIT)); 971 | c!(DEBUG "cleaning up temp pins" => txn.execute_batch(CLEANUP_TEMP_PINS)); 972 | if let Err(BlockStoreError::SqliteError(QueryReturnedNoRows, _)) = get_store_stats(txn) { 973 | c!("faking store stats" => txn.execute_batch("INSERT INTO stats VALUES (0, 0);")); 974 | } 975 | 976 | if migrate { 977 | migrate_v0_v1(txn).context("migrating v0 -> v1")?; 978 | } 979 | 980 | Ok(()) 981 | })?; 982 | 983 | c!("foreign keys on" => conn.pragma_update(None, "foreign_keys", false)); 984 | Ok(()) 985 | } 986 | 987 | pub(crate) fn integrity_check(conn: &mut Connection) -> crate::Result> { 988 | let _span = tracing::debug_span!("db integrity check").entered(); 989 | in_txn(conn, None, false, |txn| { 990 | let mut stmt = c!("checking sqlite integrity (prep)" => txn.prepare("SELECT integrity_check FROM pragma_integrity_check")); 991 | let result = c!("checking sqlite integrity" => stmt.query_map([], |row| row.get(0))) 992 | .collect::>>() 993 | .ctx("parsing sqlite integrity_check results")?; 994 | Ok(result) 995 | }) 996 | } 997 | 998 | /// helper to log execution time of a block of code that returns a result 999 | /// 1000 | /// will log at info level if `expected_duration` is exceeded, 1001 | /// at warn level if the result is a failure, and 1002 | /// just at debug level if the operation is quick and successful. 1003 | /// 1004 | /// this is an attempt to avoid spamming the log with lots of irrelevant info. 1005 | /// execute a statement in a write transaction 1006 | pub(crate) fn in_txn( 1007 | conn: &mut Connection, 1008 | name: Option<(&str, Duration)>, 1009 | immediate: bool, 1010 | f: F, 1011 | ) -> crate::Result 1012 | where 1013 | // since this function can be retried many times, it must no remember the Transaction nor reference other state 1014 | F: for<'a> Fn(&'a Transaction) -> crate::Result + 'static, 1015 | { 1016 | let _span = if let Some(name) = name.map(|x| x.0).filter(|x| !x.is_empty()) { 1017 | tracing::debug_span!("txn", "{}", name).entered() 1018 | } else { 1019 | tracing::trace_span!("txn").entered() 1020 | }; 1021 | let started = Instant::now(); 1022 | let mut attempts = 0; 1023 | loop { 1024 | let txn = c!("beginning transaction" => 1025 | if immediate { conn.transaction_with_behavior(TransactionBehavior::Immediate) } else { conn.transaction() } 1026 | ); 1027 | let result = f(&txn); 1028 | let result = result.and_then(|t| { 1029 | c!("committing transaction" => txn.commit()); 1030 | Ok(t) 1031 | }); 1032 | attempts += 1; 1033 | match result { 1034 | Ok(value) => { 1035 | if let Some((name, expected)) = name { 1036 | let dt = started.elapsed(); 1037 | if dt > expected { 1038 | tracing::info!("{} took {}ms", name, dt.as_millis()); 1039 | } 1040 | } 1041 | break Ok(value); 1042 | } 1043 | Err(BlockStoreError::SqliteError(SqliteFailure(e, _), _)) if e.code == DatabaseBusy => { 1044 | if attempts > 3 && started.elapsed().as_millis() > 100 { 1045 | tracing::warn!( 1046 | "getting starved ({} attempts so far, {}ms)", 1047 | attempts, 1048 | started.elapsed().as_millis() 1049 | ); 1050 | } else { 1051 | tracing::debug!("retrying transaction {:?}", name); 1052 | } 1053 | } 1054 | Err(cause) => { 1055 | tracing::error!("transaction rolled back! {:#}", cause); 1056 | break Err(cause); 1057 | } 1058 | } 1059 | } 1060 | } 1061 | 1062 | #[cfg(test)] 1063 | #[allow(unused)] 1064 | fn p(c: &Transaction, s: &str) { 1065 | let mut stmt = c.prepare(s).unwrap(); 1066 | let columns = stmt.column_count(); 1067 | let x = stmt 1068 | .query([]) 1069 | .unwrap() 1070 | .mapped(|row| { 1071 | Ok((0..columns) 1072 | .map(|idx| format!("{:?}", row.get_ref_unwrap(idx))) 1073 | .join(", ")) 1074 | }) 1075 | .map(|x| x.unwrap()) 1076 | .join("\n"); 1077 | println!("query: {}\nresults:\n{}", s, x); 1078 | } 1079 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use derive_more::Display; 2 | use std::error::Error; 3 | 4 | #[derive(Debug, Display)] 5 | pub enum BlockStoreError { 6 | /// Error when interacting with the sqlite database 7 | #[display( 8 | fmt = "sqlite error while {}: {} caused by {:?}", 9 | _1, 10 | _0, 11 | "_0.source().map(std::string::ToString::to_string)" 12 | )] 13 | SqliteError(rusqlite::Error, &'static str), 14 | /// Error convering from a cid to a fixed sized representation. 15 | /// This can be caused by hashes with more than 32 bytes of size 16 | #[display(fmt = "error packing a CID into fixed size: {}", _0)] 17 | CidError(libipld::cid::Error), 18 | /// Error when converting i64 from sqlite to u64. 19 | /// This is unlikely to ever happen. 20 | #[display( 21 | fmt = "DB corrupted, got unsuitable integer value while {}: {}", 22 | _1, 23 | _0 24 | )] 25 | TryFromIntError(std::num::TryFromIntError, &'static str), 26 | #[display(fmt = "cannot open additional connection for in-memory DB")] 27 | NoAdditionalInMemory, 28 | /// Other error 29 | Other(anyhow::Error), 30 | } 31 | 32 | impl From for BlockStoreError { 33 | fn from(v: anyhow::Error) -> Self { 34 | Self::Other(v) 35 | } 36 | } 37 | 38 | impl From for BlockStoreError { 39 | fn from(v: libipld::cid::Error) -> Self { 40 | Self::CidError(v) 41 | } 42 | } 43 | 44 | impl std::error::Error for BlockStoreError { 45 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 46 | match self { 47 | BlockStoreError::SqliteError(_e, _) => None, 48 | BlockStoreError::CidError(e) => Some(e), 49 | BlockStoreError::TryFromIntError(e, _) => Some(e), 50 | BlockStoreError::Other(e) => AsRef::::as_ref(e).source(), 51 | BlockStoreError::NoAdditionalInMemory => None, 52 | } 53 | } 54 | } 55 | 56 | pub type Result = std::result::Result; 57 | 58 | pub(crate) trait Context { 59 | type Output; 60 | fn ctx(self, s: &'static str) -> Self::Output; 61 | } 62 | 63 | impl Context for std::result::Result { 64 | type Output = crate::Result; 65 | 66 | fn ctx(self, s: &'static str) -> Self::Output { 67 | self.map_err(|e| BlockStoreError::SqliteError(e, s)) 68 | } 69 | } 70 | 71 | impl Context for std::result::Result { 72 | type Output = crate::Result; 73 | 74 | fn ctx(self, s: &'static str) -> Self::Output { 75 | self.map_err(|e| BlockStoreError::TryFromIntError(e, s)) 76 | } 77 | } 78 | 79 | #[cfg(test)] 80 | mod tests { 81 | use super::Context; 82 | use crate::BlockStoreError; 83 | use anyhow::Context as _; 84 | 85 | #[test] 86 | fn show() { 87 | let sqlite = std::result::Result::<(), _>::Err(rusqlite::Error::SqliteFailure( 88 | rusqlite::ffi::Error::new(517), 89 | Some("sql string".to_owned()), 90 | )); 91 | 92 | assert_eq!(format!("x {:?}", sqlite), "x Err(SqliteFailure(Error { code: DatabaseBusy, extended_code: 517 }, Some(\"sql string\")))"); 93 | if let Err(ref sqlite) = sqlite { 94 | assert_eq!(format!("x {}", sqlite), "x sql string"); 95 | assert_eq!(format!("x {:#}", sqlite), "x sql string"); 96 | } 97 | 98 | let db = sqlite.ctx("first"); 99 | 100 | assert_eq!(format!("x {:?}", db), "x Err(SqliteError(SqliteFailure(Error { code: DatabaseBusy, extended_code: 517 }, Some(\"sql string\")), \"first\"))"); 101 | if let Err(ref db) = db { 102 | assert_eq!( 103 | format!("x {}", db), 104 | "x sqlite error while first: sql string caused by \ 105 | Some(\"Error code 517: \ 106 | Cannot promote read transaction to write transaction because of writes by another connection\")" 107 | ); 108 | assert_eq!( 109 | format!("x {:#}", db), 110 | "x sqlite error while first: sql string caused by \ 111 | Some(\"Error code 517: \ 112 | Cannot promote read transaction to write transaction because of writes by another connection\")" 113 | ); 114 | } 115 | 116 | let app = db.context("second").map_err(BlockStoreError::from); 117 | 118 | assert_eq!( 119 | format!("x {:?}", app), 120 | r#"x Err(Other(second 121 | 122 | Caused by: 123 | sqlite error while first: sql string caused by Some("Error code 517: Cannot promote read transaction to write transaction because of writes by another connection")))"# 124 | ); 125 | if let Err(ref app) = app { 126 | assert_eq!(format!("x {}", app), "x second"); 127 | assert_eq!(format!("x {:#}", app), "x second: \ 128 | sqlite error while first: \ 129 | sql string caused by Some(\"Error code 517: \ 130 | Cannot promote read transaction to write transaction because of writes by another connection\")"); 131 | } 132 | } 133 | 134 | #[test] 135 | fn double() { 136 | let e = BlockStoreError::Other(anyhow::anyhow!("hello")); 137 | assert_eq!(format!("{}", e), "hello"); 138 | assert_eq!(format!("{:#}", e), "hello"); 139 | 140 | let e = Result::<(), _>::Err(e).context("world").unwrap_err(); 141 | assert_eq!(format!("{:#}", e), "world: hello"); 142 | 143 | assert_eq!(e.to_string(), "world"); 144 | let e = e.source().unwrap(); 145 | assert_eq!(e.to_string(), "hello"); 146 | assert!(e.source().is_none()); 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # IPFS sqlite block store 2 | //! 3 | //! A block store for a rust implementation of [ipfs](https://ipfs.io/). 4 | //! 5 | //! # Concepts 6 | //! 7 | //! ## Aliases 8 | //! 9 | //! An alias is a named pin of a root. When a root is aliased, none of the leaves of the dag pointed 10 | //! to by the root will be collected by gc. However, a root being aliased does not mean that the dag 11 | //! must be complete. 12 | //! 13 | //! ## Temporary aliases 14 | //! 15 | //! A temporary alias is an unnamed alias that is just for the purpose of protecting blocks from gc 16 | //! while a large tree is being constructed. While an alias maps a single name to a single root, a 17 | //! temporary alias can be assigned to an arbitary number of blocks before the dag is finished. 18 | //! 19 | //! A temporary alias will be deleted as soon as the handle goes out of scope. 20 | //! 21 | //! ## Garbage Collection (GC) 22 | //! 23 | //! GC refers to the process of removing unpinned blocks. It runs only when the configured size 24 | //! targets are exceeded. [Size targets](SizeTargets) contain both the total size of the store 25 | //! and the number of blocks. 26 | //! 27 | //! GC will run incrementally, deleting blocks until the size targets are no longer exceeded. The 28 | //! order in which unpinned blocks will be deleted can be customized. 29 | //! 30 | //! ## Caching 31 | //! 32 | //! For unpinned blocks, it is possible to customize which blocks have the highest value using a 33 | //! [CacheTracker](cache::CacheTracker). The default is to [do nothing](cache::NoopCacheTracker) 34 | //! and has no performance overhead. 35 | //! 36 | //! The most elaborate implemented strategy is to keep track of access times in a separate database, 37 | //! via the [SqliteCacheTracker](cache::SqliteCacheTracker), which has a slight performance overhead. 38 | //! 39 | //! The performance overhead of writing to an access tracking database on each block read can be 40 | //! mitigated by using the [AsyncCacheTracker](cache::AsyncCacheTracker) wrapper to perform the database 41 | //! writes on a different thread. 42 | //! 43 | //! # Usage 44 | //! 45 | //! ## Blocking 46 | //! 47 | //! For blocking usage, use [BlockStore](BlockStore). This is the most low level interface. 48 | //! 49 | //! # Major differences to the go-ipfs pinning concept 50 | //! 51 | //! - Pinning/aliasing a root does not require that the dag is complete 52 | //! - Aliases/named pins as opposed to unnamed and non-reference-counted pins 53 | //! - Temporary pins as a mechanism to keep blocks safe from gc while a tree is being constructed 54 | pub mod cache; 55 | mod cidbytes; 56 | mod db; 57 | mod error; 58 | #[cfg(test)] 59 | mod tests; 60 | mod transaction; 61 | 62 | use cache::{CacheTracker, NoopCacheTracker}; 63 | use db::*; 64 | use error::Context; 65 | pub use error::{BlockStoreError, Result}; 66 | use libipld::{codec::References, store::StoreParams, Block, Cid, Ipld}; 67 | use parking_lot::Mutex; 68 | use rusqlite::{Connection, DatabaseName, OpenFlags}; 69 | use std::{ 70 | borrow::Cow, 71 | collections::HashSet, 72 | fmt, 73 | iter::FromIterator, 74 | marker::PhantomData, 75 | mem, 76 | ops::DerefMut, 77 | path::{Path, PathBuf}, 78 | sync::{ 79 | atomic::{AtomicBool, Ordering}, 80 | Arc, 81 | }, 82 | time::Duration, 83 | }; 84 | use tracing::*; 85 | pub use transaction::Transaction; 86 | 87 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] 88 | pub enum DbPath { 89 | File(PathBuf), 90 | Memory, 91 | } 92 | 93 | impl DbPath { 94 | fn is_memory(&self) -> bool { 95 | !matches!(self, DbPath::File(_)) 96 | } 97 | } 98 | 99 | /// Size targets for a store. Gc of non-pinned blocks will start once one of the size targets is exceeded. 100 | /// 101 | /// There are targets for both block count and block size. The reason for this is that a store that has 102 | /// a very large number of tiny blocks will become sluggish despite not having a large total size. 103 | /// 104 | /// Size targets only apply to non-pinned blocks. Pinned blocks will never be gced even if exceeding one of the 105 | /// size targets. 106 | #[derive(Debug, Clone, Copy, Default)] 107 | pub struct SizeTargets { 108 | /// target number of blocks. 109 | /// 110 | /// Up to this number, the store will retain everything even if not pinned. 111 | /// Once this number is exceeded, the store will run garbage collection of all 112 | /// unpinned blocks until the block criterion is met again. 113 | /// 114 | /// To completely disable storing of non-pinned blocks, set this to 0. 115 | /// Even then, the store will never delete pinned blocks. 116 | pub count: u64, 117 | 118 | /// target store size. 119 | /// 120 | /// Up to this size, the store will retain everything even if not pinned. 121 | /// Once this size is exceeded, the store will run garbage collection of all 122 | /// unpinned blocks until the size criterion is met again. 123 | /// 124 | /// The store will never delete pinned blocks. 125 | pub size: u64, 126 | } 127 | 128 | impl SizeTargets { 129 | pub fn new(count: u64, size: u64) -> Self { 130 | Self { count, size } 131 | } 132 | 133 | pub fn exceeded(&self, stats: &StoreStats) -> bool { 134 | stats.count > self.count || stats.size > self.size 135 | } 136 | 137 | /// Size targets that can not be reached. This can be used to disable gc. 138 | pub fn max_value() -> Self { 139 | Self { 140 | count: u64::max_value(), 141 | size: u64::max_value(), 142 | } 143 | } 144 | } 145 | 146 | #[derive(Debug, Clone, Copy)] 147 | pub enum Synchronous { 148 | // this is the most conservative mode. This only works if we have few, large transactions 149 | Full, 150 | Normal, 151 | Off, 152 | } 153 | 154 | impl fmt::Display for Synchronous { 155 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 156 | f.write_str(match self { 157 | Synchronous::Full => "FULL", 158 | Synchronous::Normal => "NORMAL", 159 | Synchronous::Off => "OFF", 160 | }) 161 | } 162 | } 163 | 164 | #[derive(Debug, Clone)] 165 | pub struct Config { 166 | size_targets: SizeTargets, 167 | cache_tracker: Arc, 168 | pragma_synchronous: Synchronous, 169 | pragma_cache_pages: u64, 170 | // open in readonly mode 171 | read_only: bool, 172 | // create if it does not yet exist 173 | create: bool, 174 | } 175 | 176 | impl Default for Config { 177 | fn default() -> Self { 178 | Self { 179 | size_targets: Default::default(), 180 | cache_tracker: Arc::new(NoopCacheTracker), 181 | pragma_synchronous: Synchronous::Full, // most conservative setting 182 | pragma_cache_pages: 8192, // 32 megabytes with the default page size of 4096 183 | read_only: false, 184 | create: true, 185 | } 186 | } 187 | } 188 | 189 | impl Config { 190 | pub fn with_read_only(mut self, value: bool) -> Self { 191 | self.read_only = value; 192 | self 193 | } 194 | /// Set size targets for the store 195 | pub fn with_size_targets(mut self, count: u64, size: u64) -> Self { 196 | self.size_targets = SizeTargets { count, size }; 197 | self 198 | } 199 | /// Set strategy for which non-pinned blocks to keep in case one of the size targets is exceeded. 200 | pub fn with_cache_tracker(mut self, cache_tracker: T) -> Self { 201 | self.cache_tracker = Arc::new(cache_tracker); 202 | self 203 | } 204 | pub fn with_pragma_synchronous(mut self, value: Synchronous) -> Self { 205 | self.pragma_synchronous = value; 206 | self 207 | } 208 | pub fn with_pragma_cache_pages(mut self, value: u64) -> Self { 209 | self.pragma_cache_pages = value; 210 | self 211 | } 212 | } 213 | 214 | pub struct BlockStore { 215 | conn: Connection, 216 | expired_temp_pins: Arc>>, 217 | config: Config, 218 | db_path: DbPath, 219 | recompute_done: Arc, 220 | _s: PhantomData, 221 | } 222 | 223 | #[derive(Debug, Clone, Default, PartialEq, Eq)] 224 | pub struct StoreStats { 225 | count: u64, 226 | size: u64, 227 | page_size: u64, 228 | used_pages: u64, 229 | free_pages: u64, 230 | } 231 | 232 | impl StoreStats { 233 | /// Total number of blocks in the store 234 | pub fn count(&self) -> u64 { 235 | self.count 236 | } 237 | 238 | /// Total size of blocks in the store 239 | pub fn size(&self) -> u64 { 240 | self.size 241 | } 242 | 243 | /// Page size used by the SQLite DB 244 | pub fn page_size(&self) -> u64 { 245 | self.page_size 246 | } 247 | 248 | /// Number of used pages in the SQLite DB 249 | /// 250 | /// Multiply this with [`page_size`](#method.page_size) to obtain an upper bound 251 | /// on how much space is actually used. The value returned by [`size`](#method.size) 252 | /// will always be smaller than this, since it only counts net block data, without 253 | /// overhead. A large difference suggests the need for calling `vacuum`. 254 | pub fn used_pages(&self) -> u64 { 255 | self.used_pages 256 | } 257 | 258 | /// Number of unused pages in the SQLite DB 259 | /// 260 | /// The DB file can be shrunk by at least this page count by calling `vacuum`, which often is 261 | /// a long-running procedure. 262 | pub fn free_pages(&self) -> u64 { 263 | self.free_pages 264 | } 265 | } 266 | 267 | /// a handle that contains a temporary pin 268 | /// 269 | /// Dropping this handle enqueues the pin for dropping before the next gc. 270 | // do not implement Clone for this! 271 | pub struct TempPin { 272 | id: i64, 273 | expired_temp_pins: Arc>>, 274 | } 275 | 276 | impl TempPin { 277 | fn new(expired_temp_pins: Arc>>) -> Self { 278 | Self { 279 | id: 0, 280 | expired_temp_pins, 281 | } 282 | } 283 | } 284 | 285 | /// dump the temp alias id so you can find it in the database 286 | impl fmt::Debug for TempPin { 287 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 288 | let mut builder = f.debug_struct("TempAlias"); 289 | if self.id > 0 { 290 | builder.field("id", &self.id); 291 | } else { 292 | builder.field("unused", &true); 293 | } 294 | builder.finish() 295 | } 296 | } 297 | 298 | impl Drop for TempPin { 299 | fn drop(&mut self) { 300 | if self.id > 0 { 301 | self.expired_temp_pins.lock().push(self.id); 302 | } 303 | } 304 | } 305 | 306 | impl BlockStore 307 | where 308 | S: StoreParams, 309 | Ipld: References, 310 | { 311 | fn create_connection(db_path: DbPath, config: &Config) -> crate::Result { 312 | let mut flags = OpenFlags::SQLITE_OPEN_NO_MUTEX | OpenFlags::SQLITE_OPEN_URI; 313 | flags |= if config.read_only { 314 | OpenFlags::SQLITE_OPEN_READ_ONLY 315 | } else { 316 | OpenFlags::SQLITE_OPEN_READ_WRITE 317 | }; 318 | if config.create && !config.read_only { 319 | flags |= OpenFlags::SQLITE_OPEN_CREATE 320 | } 321 | let conn = match db_path { 322 | DbPath::Memory => Connection::open_in_memory().ctx("opening in-memory DB")?, 323 | DbPath::File(path) => Connection::open_with_flags(path, flags).ctx("opening DB")?, 324 | }; 325 | Ok(conn) 326 | } 327 | 328 | pub fn open_path(db_path: DbPath, config: Config) -> crate::Result { 329 | let is_memory = db_path.is_memory(); 330 | let mut conn = Self::create_connection(db_path.clone(), &config)?; 331 | // this needs to be done only once, and before the first transaction 332 | conn.execute_batch("PRAGMA journal_mode = WAL") 333 | .ctx("setting WAL mode")?; 334 | init_db( 335 | &mut conn, 336 | is_memory, 337 | config.pragma_cache_pages as i64, 338 | config.pragma_synchronous, 339 | )?; 340 | let mut this = Self { 341 | conn, 342 | expired_temp_pins: Arc::new(Mutex::new(Vec::new())), 343 | config, 344 | db_path, 345 | recompute_done: Arc::new(AtomicBool::new(false)), 346 | _s: PhantomData, 347 | }; 348 | if !is_memory { 349 | let mut conn = this.additional_connection()?; 350 | std::thread::spawn(move || { 351 | if let Err(e) = recompute_store_stats(&mut conn.conn) { 352 | tracing::error!("cannot recompute store stats: {}", e); 353 | } 354 | // This is done to avoid GC doing a wal_checkpoint(RESTART) while the above 355 | // long-running query is ongoing, since that would block all writers during 356 | // that period. 357 | conn.recompute_done.store(true, Ordering::SeqCst); 358 | }); 359 | } else { 360 | this.recompute_done.store(true, Ordering::SeqCst); 361 | } 362 | if this.config.cache_tracker.has_persistent_state() { 363 | let ids = in_txn( 364 | &mut this.conn, 365 | Some(("get IDs", Duration::from_secs(1))), 366 | false, 367 | get_ids, 368 | )?; 369 | this.config.cache_tracker.retain_ids(&ids); 370 | } 371 | Ok(this) 372 | } 373 | 374 | /// Create another connection to the underlying database 375 | /// 376 | /// This allows you to perform operations in parallel. 377 | pub fn additional_connection(&self) -> crate::Result { 378 | if self.db_path.is_memory() { 379 | return Err(BlockStoreError::NoAdditionalInMemory); 380 | } 381 | let mut conn = Self::create_connection(self.db_path.clone(), &self.config)?; 382 | init_pragmas( 383 | &mut conn, 384 | self.db_path.is_memory(), 385 | self.config.pragma_cache_pages as i64, 386 | )?; 387 | conn.pragma_update( 388 | None, 389 | "synchronous", 390 | &self.config.pragma_synchronous.to_string(), 391 | ) 392 | .ctx("setting synchronous mode")?; 393 | Ok(Self { 394 | conn, 395 | expired_temp_pins: self.expired_temp_pins.clone(), 396 | config: self.config.clone(), 397 | db_path: self.db_path.clone(), 398 | recompute_done: self.recompute_done.clone(), 399 | _s: PhantomData, 400 | }) 401 | } 402 | 403 | /// Create an in memory block store with the given config 404 | pub fn memory(config: Config) -> crate::Result { 405 | Self::open_path(DbPath::Memory, config) 406 | } 407 | 408 | /// Create a persistent block store with the given config 409 | pub fn open(path: impl AsRef, config: Config) -> crate::Result { 410 | let mut pb: PathBuf = PathBuf::new(); 411 | pb.push(path); 412 | Self::open_path(DbPath::File(pb), config) 413 | } 414 | 415 | /// Open the file at the given path for testing. 416 | /// 417 | /// This will create a writeable in-memory database that is initialized with the content 418 | /// of the file at the given path. 419 | pub fn open_test(path: impl AsRef, config: Config) -> crate::Result { 420 | let mut conn = Self::create_connection(DbPath::Memory, &config)?; 421 | debug!( 422 | "Restoring in memory database from {}", 423 | path.as_ref().display() 424 | ); 425 | conn.restore( 426 | DatabaseName::Main, 427 | path, 428 | Some(|p: rusqlite::backup::Progress| { 429 | let percent = if p.pagecount == 0 { 430 | 100 431 | } else { 432 | (p.pagecount - p.remaining) * 100 / p.pagecount 433 | }; 434 | if percent % 10 == 0 { 435 | debug!("Restoring: {} %", percent); 436 | } 437 | }), 438 | ) 439 | .ctx("restoring test DB from backup")?; 440 | let ids = in_txn( 441 | &mut conn, 442 | Some(("get ids", Duration::from_secs(1))), 443 | false, 444 | get_ids, 445 | )?; 446 | config.cache_tracker.retain_ids(&ids); 447 | Ok(Self { 448 | conn, 449 | expired_temp_pins: Arc::new(Mutex::new(Vec::new())), 450 | config, 451 | db_path: DbPath::Memory, 452 | recompute_done: Arc::new(AtomicBool::new(true)), 453 | _s: PhantomData, 454 | }) 455 | } 456 | 457 | pub fn backup(&mut self, path: PathBuf) -> Result<()> { 458 | in_txn(&mut self.conn, None, false, move |txn| { 459 | txn.backup(DatabaseName::Main, path.as_path(), None) 460 | .ctx("backing up DB") 461 | }) 462 | } 463 | 464 | pub fn flush(&mut self) -> crate::Result<()> { 465 | in_txn(&mut self.conn, None, false, |txn| { 466 | txn.pragma_update(None, "wal_checkpoint", &"TRUNCATE") 467 | .ctx("flushing WAL") 468 | }) 469 | } 470 | 471 | pub fn integrity_check(&mut self) -> crate::Result<()> { 472 | let result = integrity_check(&mut self.conn)?; 473 | if result == vec!["ok".to_owned()] { 474 | Ok(()) 475 | } else { 476 | let error_text = result.join(";"); 477 | Err(crate::error::BlockStoreError::SqliteError( 478 | rusqlite::Error::SqliteFailure(rusqlite::ffi::Error::new(11), Some(error_text)), 479 | "checking integrity", 480 | )) 481 | } 482 | // FIXME add actual integrity check on the stored blocks 483 | } 484 | 485 | pub fn transaction(&mut self) -> Transaction<'_, S> { 486 | Transaction::new(self) 487 | } 488 | 489 | /// Get a temporary alias for safely adding blocks to the store 490 | pub fn temp_pin(&self) -> TempPin { 491 | TempPin::new(self.expired_temp_pins.clone()) 492 | } 493 | 494 | /// Run a full VACUUM on the SQLITE database 495 | /// 496 | /// This may take a while, blocking all other writes to the store. 497 | pub fn vacuum(&mut self) -> Result<()> { 498 | vacuum(&mut self.conn) 499 | } 500 | 501 | /// Perform maintenance on the TempPins 502 | /// 503 | /// This is done automatically upon every (incremental) GC, so you normally don’t need to call this. 504 | pub fn cleanup_temp_pins(&mut self) -> Result<()> { 505 | // atomically grab the expired_temp_pins until now 506 | let expired_temp_pins = mem::take(self.expired_temp_pins.lock().deref_mut()); 507 | in_txn( 508 | &mut self.conn, 509 | Some(("dropping expired temp_pins", Duration::from_millis(100))), 510 | true, 511 | move |txn| { 512 | // get rid of dropped temp aliases, this should be fast 513 | for id in expired_temp_pins.iter() { 514 | delete_temp_pin(txn, *id)?; 515 | } 516 | Ok(()) 517 | }, 518 | ) 519 | } 520 | 521 | /// Perform full GC 522 | /// 523 | /// This is the same as running incremental GC without limits, plus a full SQLITE VACUUM. 524 | pub fn gc(&mut self) -> Result<()> { 525 | self.cleanup_temp_pins()?; 526 | self.flush()?; 527 | incremental_gc( 528 | &mut self.conn, 529 | usize::MAX, 530 | Duration::from_secs(u32::MAX.into()), 531 | self.config.size_targets, 532 | &self.config.cache_tracker, 533 | )?; 534 | self.vacuum()?; 535 | Ok(()) 536 | } 537 | 538 | fn maybe_checkpoint(&mut self) -> Result<()> { 539 | if self.recompute_done.load(Ordering::SeqCst) { 540 | self.conn 541 | .pragma_update(None, "journal_size_limit", 10_000_000i64) 542 | .ctx("setting journal_size_limit")?; 543 | self.conn 544 | .pragma_update(None, "wal_checkpoint", &"RESTART") 545 | .ctx("running wal_checkpoint(RESTART)")?; 546 | } 547 | Ok(()) 548 | } 549 | 550 | /// Perform an incremental garbage collection. 551 | /// 552 | /// Will collect unpinned blocks until either the size targets are met again, or at minimum 553 | /// `min_blocks` blocks are collected. Then it will continue collecting blocks until `max_duration` 554 | /// is elapsed. 555 | /// 556 | /// Note that this might significantly exceed `max_duration` for various reasons. 557 | /// 558 | /// Returns true if either size targets are met or there are no unpinned blocks left. 559 | pub fn incremental_gc(&mut self, min_blocks: usize, max_duration: Duration) -> Result { 560 | let stats = self.get_store_stats()?; 561 | let _span = tracing::debug_span!("incGC", stats = ?&stats).entered(); 562 | self.cleanup_temp_pins()?; 563 | self.maybe_checkpoint()?; 564 | let ret = incremental_gc( 565 | &mut self.conn, 566 | min_blocks, 567 | max_duration, 568 | self.config.size_targets, 569 | &self.config.cache_tracker, 570 | )?; 571 | self.maybe_checkpoint()?; 572 | in_txn( 573 | &mut self.conn, 574 | Some(("incremental_vacuum", Duration::from_millis(500))), 575 | false, 576 | |txn| { 577 | txn.execute_batch("PRAGMA incremental_vacuum") 578 | .ctx("incremental vacuum") 579 | }, 580 | )?; 581 | Ok(ret) 582 | } 583 | } 584 | 585 | macro_rules! delegate { 586 | ($($(#[$attr:meta])*$n:ident$(<$v:ident : $vt:path>)?($($arg:ident : $typ:ty),*) -> $ret:ty;)+) => { 587 | $( 588 | $(#[$attr])* 589 | pub fn $n$(<$v: $vt>)?(&mut self, $($arg: $typ),*) -> $ret { 590 | let mut txn = self.transaction(); 591 | let ret = txn.$n($($arg),*)?; 592 | txn.commit()?; 593 | Ok(ret) 594 | } 595 | )+ 596 | }; 597 | } 598 | 599 | impl BlockStore 600 | where 601 | S: StoreParams, 602 | Ipld: References, 603 | { 604 | /// Set or delete an alias 605 | pub fn alias<'b>( 606 | &mut self, 607 | name: impl Into>, 608 | link: Option<&'b Cid>, 609 | ) -> Result<()> { 610 | self.transaction().alias(name, link) 611 | } 612 | 613 | /// Resolves an alias to a cid 614 | pub fn resolve<'b>(&mut self, name: impl Into>) -> Result> { 615 | self.transaction().resolve(name) 616 | } 617 | 618 | delegate! { 619 | /// Returns the aliases referencing a cid 620 | reverse_alias(cid: &Cid) -> Result>>>; 621 | 622 | /// Extend temp pin with an additional cid 623 | extend_temp_pin(pin: &mut TempPin, link: &Cid) -> Result<()>; 624 | 625 | /// Checks if the store knows about the cid. 626 | /// 627 | /// Note that this does not necessarily mean that the store has the data for the cid. 628 | has_cid(cid: &Cid) -> Result; 629 | 630 | /// Checks if the store has the data for a cid 631 | has_block(cid: &Cid) -> Result; 632 | 633 | /// Get all cids that the store knows about 634 | get_known_cids>() -> Result; 635 | 636 | /// Get all cids for which the store has blocks 637 | get_block_cids>() -> Result; 638 | 639 | /// Get descendants of a cid 640 | get_descendants>(cid: &Cid) -> Result; 641 | 642 | /// Given a root of a dag, gives all cids which we do not have data for. 643 | get_missing_blocks>(cid: &Cid) -> Result; 644 | 645 | /// list all aliases 646 | aliases, Cid)>>() -> Result; 647 | 648 | /// Put a block 649 | /// 650 | /// This will only be completed once the transaction is successfully committed. 651 | put_block(block: Block, pin: Option<&mut TempPin>) -> Result<()>; 652 | 653 | /// Get a block 654 | get_block(cid: &Cid) -> Result>>; 655 | 656 | /// Get the stats for the store 657 | /// 658 | /// The stats are kept up to date, so this is fast. 659 | get_store_stats() -> Result; 660 | } 661 | 662 | pub fn put_blocks(&mut self, blocks: I, mut pin: Option<&mut TempPin>) -> Result<()> 663 | where 664 | I: IntoIterator>, 665 | { 666 | let mut txn = self.transaction(); 667 | for block in blocks { 668 | #[allow(clippy::needless_option_as_deref)] 669 | txn.put_block(block, pin.as_deref_mut())?; 670 | } 671 | txn.commit() 672 | } 673 | } 674 | -------------------------------------------------------------------------------- /src/tests.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::many_single_char_names)] 2 | use crate::{ 3 | cache::CacheTracker, 4 | cache::InMemCacheTracker, 5 | cache::{SortByIdCacheTracker, SqliteCacheTracker}, 6 | BlockStoreError, Config, DbPath, Result, StoreStats, TempPin, 7 | }; 8 | use anyhow::Context; 9 | use fnv::FnvHashSet; 10 | use libipld::{ 11 | cbor::DagCborCodec, 12 | cid::Cid, 13 | multihash::{Code, MultihashDigest}, 14 | }; 15 | use libipld::{prelude::*, DagCbor}; 16 | use maplit::hashset; 17 | use rusqlite::{params, Connection}; 18 | use std::{ 19 | borrow::Cow, 20 | collections::HashSet, 21 | iter::FromIterator, 22 | path::{Path, PathBuf}, 23 | time::Duration, 24 | }; 25 | use tempdir::TempDir; 26 | 27 | type Block = libipld::Block; 28 | 29 | macro_rules! delegate { 30 | ($($n:ident$(<$v:ident : $vt:path>)?($($arg:ident : $typ:ty),*) -> $ret:ty;)+) => { 31 | $( 32 | pub fn $n$(<$v: $vt>)?(&mut self, $($arg: $typ),*) -> $ret { 33 | let ret = self.0.$n($($arg),*); 34 | if ret.is_err() { 35 | match self.backup() { 36 | Ok(p) => eprintln!("wrote backup to {}", p.display()), 37 | Err(e) => eprintln!("couldn’t write backup: {:#}", e), 38 | } 39 | } 40 | ret 41 | } 42 | )+ 43 | }; 44 | } 45 | struct BlockStore(crate::BlockStore); 46 | 47 | #[allow(unused)] 48 | impl BlockStore { 49 | pub fn memory(config: Config) -> Result { 50 | Ok(Self(crate::BlockStore::memory(config)?)) 51 | } 52 | pub fn open(path: impl AsRef, config: Config) -> Result { 53 | Ok(Self(crate::BlockStore::open(path, config)?)) 54 | } 55 | pub fn open_path(path: DbPath, config: Config) -> Result { 56 | Ok(Self(crate::BlockStore::open_path(path, config)?)) 57 | } 58 | 59 | fn backup(&mut self) -> Result { 60 | let file = tempfile::tempdir() 61 | .map_err(|e| BlockStoreError::Other(e.into()))? 62 | .into_path() 63 | .join("db"); 64 | self.0.backup(file.clone())?; 65 | Ok(file) 66 | } 67 | 68 | pub fn temp_pin(&self) -> TempPin { 69 | self.0.temp_pin() 70 | } 71 | 72 | pub fn alias<'b>( 73 | &mut self, 74 | name: impl Into>, 75 | link: Option<&'b Cid>, 76 | ) -> Result<()> { 77 | let ret = self.0.alias(name, link); 78 | if ret.is_err() { 79 | match self.backup() { 80 | Ok(p) => eprintln!("wrote backup to {}", p.display()), 81 | Err(e) => eprintln!("couldn’t write backup: {:#}", e), 82 | } 83 | } 84 | ret 85 | } 86 | pub fn resolve<'b>(&mut self, name: impl Into>) -> Result> { 87 | let ret = self.0.resolve(name); 88 | if ret.is_err() { 89 | match self.backup() { 90 | Ok(p) => eprintln!("wrote backup to {}", p.display()), 91 | Err(e) => eprintln!("couldn’t write backup: {:#}", e), 92 | } 93 | } 94 | ret 95 | } 96 | delegate! { 97 | reverse_alias(cid: &Cid) -> Result>>>; 98 | extend_temp_pin(pin: &mut TempPin, link: &Cid) -> Result<()>; 99 | has_cid(cid: &Cid) -> Result; 100 | has_block(cid: &Cid) -> Result; 101 | get_known_cids>() -> Result; 102 | get_block_cids>() -> Result; 103 | get_descendants>(cid: &Cid) -> Result; 104 | get_missing_blocks>(cid: &Cid) -> Result; 105 | aliases, Cid)>>() -> Result; 106 | put_block(block: Block, pin: Option<&mut TempPin>) -> Result<()>; 107 | get_block(cid: &Cid) -> Result>>; 108 | get_store_stats() -> Result; 109 | gc() -> Result<()>; 110 | incremental_gc(blocks: usize, duration: Duration) -> Result; 111 | vacuum() -> Result<()>; 112 | integrity_check() -> Result<()>; 113 | } 114 | } 115 | 116 | #[derive(Debug, DagCbor)] 117 | struct Node { 118 | links: Vec, 119 | text: String, 120 | } 121 | 122 | impl Node { 123 | pub fn leaf(text: &str) -> Self { 124 | Self { 125 | links: Vec::new(), 126 | text: text.into(), 127 | } 128 | } 129 | 130 | pub fn branch(text: &str, links: impl IntoIterator) -> Self { 131 | Self { 132 | links: links.into_iter().collect(), 133 | text: text.into(), 134 | } 135 | } 136 | } 137 | 138 | enum SizeOrLinks { 139 | Size(usize), 140 | Links(Vec), 141 | } 142 | 143 | impl From> for SizeOrLinks { 144 | fn from(value: Vec) -> Self { 145 | Self::Links(value) 146 | } 147 | } 148 | 149 | impl From for SizeOrLinks { 150 | fn from(value: usize) -> Self { 151 | Self::Size(value) 152 | } 153 | } 154 | 155 | /// creates a simple leaf block 156 | fn block(name: &str) -> Block { 157 | let ipld = Node::leaf(name); 158 | let bytes = DagCborCodec.encode(&ipld).unwrap(); 159 | let hash = Code::Sha2_256.digest(&bytes); 160 | // https://github.com/multiformats/multicodec/blob/master/table.csv 161 | Block::new_unchecked(Cid::new_v1(0x71, hash), bytes) 162 | } 163 | 164 | /// creates a block with some links 165 | fn links(name: &str, children: Vec<&Block>) -> Block { 166 | let ipld = Node::branch(name, children.iter().map(|b| *b.cid()).collect::>()); 167 | let bytes = DagCborCodec.encode(&ipld).unwrap(); 168 | let hash = Code::Sha2_256.digest(&bytes); 169 | // https://github.com/multiformats/multicodec/blob/master/table.csv 170 | Block::new_unchecked(Cid::new_v1(0x71, hash), bytes) 171 | } 172 | 173 | /// creates a block with a min size 174 | fn sized(name: &str, min_size: usize) -> Block { 175 | let mut text = name.to_string(); 176 | while text.len() < min_size { 177 | text += " "; 178 | } 179 | let ipld = Node::leaf(&text); 180 | let bytes = DagCborCodec.encode(&ipld).unwrap(); 181 | let hash = Code::Sha2_256.digest(&bytes); 182 | // https://github.com/multiformats/multicodec/blob/master/table.csv 183 | Block::new_unchecked(Cid::new_v1(0x71, hash), bytes) 184 | } 185 | 186 | /*fn pb(name: &str) -> Cid { 187 | // https://github.com/multiformats/multicodec/blob/master/table.csv 188 | let hash = Code::Sha2_256.digest(name.as_bytes()); 189 | Cid::new_v1(0x70, hash) 190 | }*/ 191 | 192 | /// creates a block with the name "unpinned-" and a size of 1000 193 | fn unpinned(i: usize) -> Block { 194 | sized(&format!("{}", i), 1000 - 16) 195 | } 196 | 197 | /// creates a block with the name "pinned-" and a size of 1000 198 | fn pinned(i: usize) -> Block { 199 | sized(&format!("pinned-{}", i), 1000 - 16) 200 | } 201 | 202 | #[test] 203 | fn insert_get() { 204 | let mut store = BlockStore::memory(Config::default()).unwrap(); 205 | let b = block("b"); 206 | let c = block("c"); 207 | let a = links("a", vec![&b, &c, &c]); 208 | store.put_block(a.clone(), None).unwrap(); 209 | // we should have all three cids 210 | assert!(store.has_cid(a.cid()).unwrap()); 211 | assert!(store.has_cid(b.cid()).unwrap()); 212 | assert!(store.has_cid(c.cid()).unwrap()); 213 | // but only the first block 214 | assert!(store.has_block(a.cid()).unwrap()); 215 | assert!(!store.has_block(b.cid()).unwrap()); 216 | assert!(!store.has_block(c.cid()).unwrap()); 217 | // check the data 218 | assert_eq!(store.get_block(a.cid()).unwrap(), Some(a.data().to_vec())); 219 | // check descendants 220 | assert_eq!( 221 | store.get_descendants::>(a.cid()).unwrap(), 222 | hashset![*a.cid(), *b.cid(), *c.cid()] 223 | ); 224 | // check missing blocks - should be b and c 225 | assert_eq!( 226 | store.get_missing_blocks::>(a.cid()).unwrap(), 227 | hashset![*b.cid(), *c.cid()] 228 | ); 229 | // alias the root 230 | store.alias(b"alias1".as_ref(), Some(a.cid())).unwrap(); 231 | store.gc().unwrap(); 232 | // after gc, we shold still have the block 233 | assert!(store.has_block(a.cid()).unwrap()); 234 | store.alias(b"alias1".as_ref(), None).unwrap(); 235 | store.gc().unwrap(); 236 | // after gc, we shold no longer have the block 237 | assert!(!store.has_block(a.cid()).unwrap()); 238 | } 239 | 240 | #[test] 241 | fn incremental_insert() -> anyhow::Result<()> { 242 | let mut store = BlockStore::memory(Config::default())?; 243 | let b = block("b"); 244 | let d = block("d"); 245 | let e = block("e"); 246 | let c = links("c", vec![&d, &e]); 247 | let a = links("a", vec![&b, &c]); 248 | // alias before even adding the block 249 | store.alias(b"alias1".as_ref(), Some(a.cid()))?; 250 | assert!(store.has_cid(a.cid())?); 251 | store.put_block(a.clone(), None)?; 252 | store.gc()?; 253 | store.put_block(c.clone(), None)?; 254 | store.gc()?; 255 | // we should have all five cids 256 | assert!(store.has_cid(a.cid())?); 257 | assert!(store.has_cid(b.cid())?); 258 | assert!(store.has_cid(c.cid())?); 259 | assert!(store.has_cid(d.cid())?); 260 | assert!(store.has_cid(e.cid())?); 261 | // but only blocks a and c 262 | assert!(store.has_block(a.cid())?); 263 | assert!(!store.has_block(b.cid())?); 264 | assert!(store.has_block(c.cid())?); 265 | assert!(!store.has_block(d.cid())?); 266 | assert!(!store.has_block(e.cid())?); 267 | // check the data 268 | assert_eq!(store.get_block(a.cid())?, Some(a.data().to_vec())); 269 | // check descendants 270 | assert_eq!( 271 | store.get_descendants::>(a.cid())?, 272 | [a.cid(), b.cid(), c.cid(), d.cid(), e.cid()] 273 | .iter() 274 | .copied() 275 | .copied() 276 | .collect::>() 277 | ); 278 | // check missing blocks - should be b and c 279 | assert_eq!( 280 | store.get_missing_blocks::>(a.cid())?, 281 | [b.cid(), d.cid(), e.cid()] 282 | .iter() 283 | .copied() 284 | .copied() 285 | .collect::>() 286 | ); 287 | // alias the root 288 | store.alias(b"alias1".as_ref(), Some(a.cid()))?; 289 | store.gc()?; 290 | // after gc, we shold still have the block 291 | assert!(store.has_block(a.cid())?); 292 | store.alias(b"alias1".as_ref(), Some(c.cid()))?; 293 | store.gc()?; 294 | assert!(!store.has_block(a.cid())?); 295 | assert!(!store.has_cid(a.cid())?); 296 | assert!(store.has_block(c.cid())?); 297 | Ok(()) 298 | } 299 | 300 | #[test] 301 | fn size_targets() -> anyhow::Result<()> { 302 | // create a store with a non-empty size target to enable keeping non-pinned stuff around 303 | let mut store = BlockStore::memory( 304 | Config::default() 305 | .with_size_targets(10, 10000) 306 | .with_cache_tracker(SortByIdCacheTracker), 307 | )?; 308 | 309 | // add some pinned stuff at the very beginning 310 | for i in 0..2 { 311 | let block = pinned(i); 312 | store.put_block(block.clone(), None)?; 313 | store.alias(block.cid().to_bytes(), Some(block.cid()))?; 314 | } 315 | 316 | // add data that is within the size targets 317 | for i in 0..8 { 318 | let block = unpinned(i); 319 | store.put_block(block.clone(), None)?; 320 | } 321 | 322 | // check that gc does nothing 323 | assert_eq!(store.get_store_stats()?.count, 10); 324 | assert_eq!(store.get_store_stats()?.size, 10000); 325 | store.incremental_gc(5, Duration::from_secs(100000))?; 326 | assert_eq!(store.get_store_stats()?.count, 10); 327 | assert_eq!(store.get_store_stats()?.size, 10000); 328 | 329 | // add some more stuff to exceed the size targets 330 | for i in 8..13 { 331 | let block = unpinned(i); 332 | store.put_block(block.clone(), None)?; 333 | } 334 | 335 | // check that gc gets triggered and removes min_blocks 336 | store.incremental_gc(10, Duration::from_secs(100000))?; 337 | assert_eq!(store.get_store_stats()?.count, 10); 338 | assert_eq!(store.get_store_stats()?.size, 10000); 339 | 340 | let cids = store.get_block_cids::>()?; 341 | // check that the 2 pinned ones are still there despite being added first 342 | // and that only the 8 latest unpinned ones to be added remain 343 | let expected_cids = (0..2) 344 | .map(pinned) 345 | .chain((5..13).map(unpinned)) 346 | .map(|block| *block.cid()) 347 | .collect::>(); 348 | assert_eq!(cids, expected_cids); 349 | Ok(()) 350 | } 351 | 352 | #[test] 353 | fn in_mem_cache_tracker() -> anyhow::Result<()> { 354 | cache_test(InMemCacheTracker::new(|access, _| Some(access))) 355 | } 356 | 357 | #[test] 358 | fn sqlite_cache_tracker() -> anyhow::Result<()> { 359 | cache_test(SqliteCacheTracker::memory(|access, _| Some(access))?) 360 | } 361 | 362 | fn cache_test(tracker: impl CacheTracker + 'static) -> anyhow::Result<()> { 363 | // let tracker = ; 364 | 365 | // create a store with a non-empty size target to enable keeping non-pinned stuff around 366 | let mut store = BlockStore::memory( 367 | Config::default() 368 | .with_size_targets(10, 10000) 369 | .with_cache_tracker(tracker), 370 | )?; 371 | 372 | // add some pinned stuff at the very beginning 373 | for i in 0..2 { 374 | let block = pinned(i); 375 | store.put_block(block.clone(), None)?; 376 | store.alias(block.cid().to_bytes(), Some(block.cid()))?; 377 | } 378 | 379 | // add data that is within the size targets 380 | for i in 0..8 { 381 | let block = unpinned(i); 382 | store.put_block(block.clone(), None)?; 383 | } 384 | 385 | // check that gc does nothing 386 | assert_eq!(store.get_store_stats()?.count, 10); 387 | assert_eq!(store.get_store_stats()?.size, 10000); 388 | store.incremental_gc(5, Duration::from_secs(100000))?; 389 | assert_eq!(store.get_store_stats()?.count, 10); 390 | assert_eq!(store.get_store_stats()?.size, 10000); 391 | 392 | // add some more stuff to exceed the size targets 393 | for i in 8..13 { 394 | let block = unpinned(i); 395 | store.put_block(block.clone(), None)?; 396 | } 397 | 398 | // access one of the existing unpinned blocks to move it to the front 399 | assert_eq!( 400 | store.get_block(unpinned(0).cid())?, 401 | Some(unpinned(0).data().to_vec()) 402 | ); 403 | 404 | // check that gc gets triggered and removes min_blocks 405 | store.incremental_gc(10, Duration::from_secs(100000))?; 406 | assert_eq!(store.get_store_stats()?.count, 10); 407 | assert_eq!(store.get_store_stats()?.size, 10000); 408 | 409 | let cids = store.get_block_cids::>()?; 410 | // check that the 2 pinned ones are still there despite being added first 411 | // and that the recently accessed block is still there 412 | let expected_cids = (0..2) 413 | .map(pinned) 414 | .chain(Some(unpinned(0))) 415 | .chain((6..13).map(unpinned)) 416 | .map(|block| *block.cid()) 417 | .collect::>(); 418 | assert_eq!(cids, expected_cids); 419 | Ok(()) 420 | } 421 | 422 | const OLD_INIT: &str = r#" 423 | CREATE TABLE IF NOT EXISTS blocks ( 424 | key BLOB PRIMARY KEY, 425 | pinned INTEGER DEFAULT 0, 426 | cid BLOB, 427 | data BLOB 428 | ) WITHOUT ROWID; 429 | "#; 430 | 431 | #[test] 432 | fn test_migration() -> anyhow::Result<()> { 433 | let tmp = TempDir::new("test_migration")?; 434 | let path = tmp.path().join("db"); 435 | let conn = Connection::open(&path)?; 436 | conn.execute_batch(OLD_INIT)?; 437 | let mut blocks = Vec::with_capacity(5); 438 | for i in 0..blocks.capacity() { 439 | let data = (i as u64).to_be_bytes().to_vec(); 440 | let cid = Cid::new_v1(0x55, Code::Sha2_256.digest(&data)); 441 | conn.prepare_cached("INSERT INTO blocks (key, pinned, cid, data) VALUES (?1, 1, ?2, ?3)")? 442 | .execute(params![cid.to_string(), cid.to_bytes(), data])?; 443 | blocks.push((cid, data)); 444 | } 445 | let mut store = BlockStore::open(path, Config::default())?; 446 | for (cid, data) in blocks { 447 | assert_eq!(store.get_block(&cid)?, Some(data)); 448 | } 449 | Ok(()) 450 | } 451 | 452 | #[test] 453 | fn test_resolve() -> anyhow::Result<()> { 454 | let mut store = BlockStore::memory(Config::default())?; 455 | let block = pinned(0); 456 | store.put_block(block.clone(), None)?; 457 | store.alias(&b"leaf"[..], Some(block.cid()))?; 458 | let cid2 = store.resolve(&b"leaf"[..])?; 459 | assert_eq!(Some(*block.cid()), cid2); 460 | Ok(()) 461 | } 462 | 463 | #[test] 464 | fn test_reverse_alias() -> anyhow::Result<()> { 465 | let mut store = BlockStore::memory(Config::default())?; 466 | let block = pinned(0); 467 | assert_eq!(store.reverse_alias(block.cid())?, None); 468 | store.put_block(block.clone(), None)?; 469 | assert_eq!(store.reverse_alias(block.cid())?, Some(hashset! {})); 470 | store.alias(&b"leaf"[..], Some(block.cid()))?; 471 | assert_eq!( 472 | store.reverse_alias(block.cid())?, 473 | Some(hashset! {b"leaf".to_vec()}) 474 | ); 475 | let block2 = links("1", vec![&block]); // needs link to cid 476 | store.put_block(block2.clone(), None)?; 477 | store.alias(&b"root"[..], Some(block2.cid()))?; 478 | assert_eq!( 479 | store.reverse_alias(block.cid())?, 480 | Some(hashset! {b"leaf".to_vec(), b"root".to_vec()}) 481 | ); 482 | Ok(()) 483 | } 484 | 485 | #[test] 486 | fn test_vacuum() -> anyhow::Result<()> { 487 | let mut store = BlockStore::memory(Config::default())?; 488 | store.vacuum()?; 489 | Ok(()) 490 | } 491 | 492 | #[test] 493 | fn test_aliases() -> anyhow::Result<()> { 494 | let mut store = BlockStore::memory(Config::default())?; 495 | let block = pinned(0); 496 | let cid = block.cid(); 497 | store.put_block(block.clone(), None)?; 498 | store.alias(b"a".as_ref(), Some(cid))?; 499 | store.alias(b"b".as_ref(), Some(cid))?; 500 | store.alias(b"c".as_ref(), Some(cid))?; 501 | let mut aliases: Vec<(Vec, Cid)> = store.aliases()?; 502 | aliases.sort_by_key(|x| x.0.clone()); 503 | assert_eq!( 504 | aliases, 505 | vec![ 506 | (b"a".to_vec(), *cid), 507 | (b"b".to_vec(), *cid), 508 | (b"c".to_vec(), *cid), 509 | ] 510 | ); 511 | Ok(()) 512 | } 513 | 514 | #[test] 515 | fn temp_pin() -> anyhow::Result<()> { 516 | let mut store = BlockStore::memory(Config::default())?; 517 | let a = block("a"); 518 | let b = block("b"); 519 | let mut alias = store.temp_pin(); 520 | 521 | store.put_block(a.clone(), Some(&mut alias))?; 522 | store.gc()?; 523 | assert!(store.has_block(a.cid())?); 524 | 525 | store.put_block(b.clone(), Some(&mut alias))?; 526 | store.gc()?; 527 | assert!(store.has_block(b.cid())?); 528 | 529 | drop(alias); 530 | store.gc()?; 531 | assert!(!store.has_block(a.cid())?); 532 | assert!(!store.has_block(b.cid())?); 533 | 534 | Ok(()) 535 | } 536 | 537 | #[test] 538 | fn broken_db() -> anyhow::Result<()> { 539 | let tmp = TempDir::new("broken_db")?; 540 | let path = tmp.path().join("mini.sqlite"); 541 | std::fs::copy("test-data/mini.sqlite", &path)?; 542 | let mut store = BlockStore::open_path(DbPath::File(path), Config::default()).context("mini")?; 543 | assert!(store.integrity_check().is_ok()); 544 | 545 | let path = tmp.path().join("broken.sqlite"); 546 | std::fs::copy("test-data/broken.sqlite", &path)?; 547 | // don’t use the wrapper — we expect it to fail and don’t need a backup 548 | assert!(crate::BlockStore::::open_path( 549 | DbPath::File(path), 550 | Config::default() 551 | ) 552 | .is_err()); 553 | 554 | Ok(()) 555 | } 556 | 557 | #[test] 558 | fn shared_file() { 559 | let tmp = TempDir::new("shared_file").unwrap(); 560 | let path = tmp.path().join("test.sqlite"); 561 | let mut db1 = BlockStore::open_path(DbPath::File(path.clone()), Config::default()).unwrap(); 562 | let mut db2 = BlockStore::open_path(DbPath::File(path), Config::default()).unwrap(); 563 | 564 | for i in 0..10 { 565 | let block = block(&format!("block-{}", i)); 566 | db1.put_block(block.clone(), None).unwrap(); 567 | assert_eq!( 568 | db2.get_block(block.cid()).unwrap(), 569 | Some(block.data().to_vec()) 570 | ); 571 | } 572 | } 573 | 574 | #[test] 575 | fn large_dag_gc() -> anyhow::Result<()> { 576 | let mut store = BlockStore::memory(Config::default())?; 577 | let mut l = Vec::new(); 578 | for i in 0..100 { 579 | let block = links(&format!("node-{}", i), l.iter().collect()); 580 | store.put_block(block.clone(), None)?; 581 | l.push(block); 582 | } 583 | // pin the root 584 | let cid = *l.last().as_ref().unwrap().cid(); 585 | store.alias((&cid).to_bytes(), Some(&cid))?; 586 | // this takes forever 587 | store.gc()?; 588 | Ok(()) 589 | } 590 | -------------------------------------------------------------------------------- /src/transaction.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | cache::{BlockInfo, CacheTracker, WriteInfo}, 3 | cidbytes::CidBytes, 4 | db::*, 5 | Block, BlockStore, Result, StoreStats, TempPin, 6 | }; 7 | use fnv::FnvHashSet; 8 | use libipld::{cid, codec::References, store::StoreParams, Cid, Ipld}; 9 | use parking_lot::Mutex; 10 | use std::{ 11 | borrow::Cow, collections::HashSet, convert::TryFrom, iter::FromIterator, marker::PhantomData, 12 | mem, sync::Arc, 13 | }; 14 | 15 | pub struct Transaction<'a, S> { 16 | inner: &'a mut rusqlite::Connection, 17 | info: TransactionInfo, 18 | expired_temp_pins: Arc>>, 19 | _s: PhantomData, 20 | } 21 | 22 | struct TransactionInfo { 23 | written: Vec, 24 | accessed: Vec, 25 | committed: bool, 26 | tracker: Arc, 27 | } 28 | 29 | impl Drop for TransactionInfo { 30 | fn drop(&mut self) { 31 | if !self.accessed.is_empty() { 32 | let blocks = mem::take(&mut self.accessed); 33 | self.tracker.blocks_accessed(blocks); 34 | } 35 | // if the transaction was not committed, we don't report blocks written! 36 | if self.committed && !self.written.is_empty() { 37 | let blocks = mem::take(&mut self.written); 38 | self.tracker.blocks_written(blocks); 39 | } 40 | } 41 | } 42 | 43 | impl<'a, S> Transaction<'a, S> 44 | where 45 | S: StoreParams, 46 | Ipld: References, 47 | { 48 | pub(crate) fn new(owner: &'a mut BlockStore) -> Self { 49 | Self { 50 | inner: &mut owner.conn, 51 | info: TransactionInfo { 52 | written: Vec::new(), 53 | accessed: Vec::new(), 54 | committed: false, 55 | tracker: owner.config.cache_tracker.clone(), 56 | }, 57 | expired_temp_pins: owner.expired_temp_pins.clone(), 58 | _s: PhantomData, 59 | } 60 | } 61 | 62 | /// Set or delete an alias 63 | pub fn alias<'b>( 64 | &mut self, 65 | name: impl Into>, 66 | link: Option<&'b Cid>, 67 | ) -> Result<()> { 68 | let link: Option = link.map(CidBytes::try_from).transpose()?; 69 | let name = name.into().into_owned(); 70 | in_txn(self.inner, None, true, move |txn| { 71 | alias(txn, name.as_ref(), link.as_ref()) 72 | })?; 73 | Ok(()) 74 | } 75 | 76 | /// Returns the aliases referencing a cid. 77 | pub fn reverse_alias(&mut self, cid: &Cid) -> Result>>> { 78 | let cid = CidBytes::try_from(cid)?; 79 | in_txn(self.inner, None, true, move |txn| { 80 | reverse_alias(txn, cid.as_ref()) 81 | }) 82 | } 83 | 84 | /// Resolves an alias to a cid. 85 | pub fn resolve<'b>(&mut self, name: impl Into>) -> Result> { 86 | let name = name.into().into_owned(); 87 | in_txn(self.inner, None, true, move |txn| { 88 | resolve::(txn, name.as_ref())? 89 | .map(|c| Cid::try_from(&c)) 90 | .transpose() 91 | .map_err(Into::into) 92 | }) 93 | } 94 | 95 | /// Get a temporary pin for safely adding blocks to the store 96 | pub fn temp_pin(&mut self) -> TempPin { 97 | TempPin::new(self.expired_temp_pins.clone()) 98 | } 99 | 100 | /// Extend temp pin with an additional cid 101 | pub fn extend_temp_pin(&mut self, pin: &mut TempPin, link: &Cid) -> Result<()> { 102 | let link = CidBytes::try_from(link)?; 103 | let id = pin.id; 104 | pin.id = in_txn(self.inner, None, true, move |txn| { 105 | extend_temp_pin(txn, id, vec![link]) 106 | })?; 107 | Ok(()) 108 | } 109 | 110 | /// Checks if the store knows about the cid. 111 | /// 112 | /// Note that this does not necessarily mean that the store has the data for the cid. 113 | pub fn has_cid(&mut self, cid: &Cid) -> Result { 114 | let cid = CidBytes::try_from(cid)?; 115 | in_txn(self.inner, None, false, move |txn| has_cid(txn, cid)) 116 | } 117 | 118 | /// Checks if the store has the data for a cid 119 | pub fn has_block(&mut self, cid: &Cid) -> Result { 120 | let cid = CidBytes::try_from(cid)?; 121 | in_txn(self.inner, None, false, move |txn| has_block(txn, cid)) 122 | } 123 | 124 | /// Get all cids that the store knows about 125 | pub fn get_known_cids>(&mut self) -> Result { 126 | let res = in_txn(self.inner, None, false, move |txn| { 127 | get_known_cids::(txn) 128 | })?; 129 | let res = res.iter().map(Cid::try_from).collect::>()?; 130 | Ok(res) 131 | } 132 | 133 | /// Get all cids for which the store has blocks 134 | pub fn get_block_cids>(&mut self) -> Result { 135 | let res = in_txn(self.inner, None, false, move |txn| { 136 | get_block_cids::(txn) 137 | })?; 138 | let res = res.iter().map(Cid::try_from).collect::>()?; 139 | Ok(res) 140 | } 141 | 142 | /// Get descendants of a cid 143 | pub fn get_descendants>(&mut self, cid: &Cid) -> Result { 144 | let cid = CidBytes::try_from(cid)?; 145 | let res = in_txn(self.inner, None, false, move |txn| { 146 | get_descendants(txn, cid) 147 | })?; 148 | let res = res.iter().map(Cid::try_from).collect::>()?; 149 | Ok(res) 150 | } 151 | 152 | /// Given a root of a dag, gives all cids which we do not have data for. 153 | pub fn get_missing_blocks>(&mut self, cid: &Cid) -> Result { 154 | let cid = CidBytes::try_from(cid)?; 155 | let result = in_txn(self.inner, None, false, move |txn| { 156 | get_missing_blocks(txn, cid) 157 | })?; 158 | let res = result 159 | .iter() 160 | .map(Cid::try_from) 161 | .collect::>()?; 162 | Ok(res) 163 | } 164 | 165 | /// list all aliases 166 | pub fn aliases, Cid)>>(&mut self) -> Result { 167 | let result: Vec<(Vec, CidBytes)> = 168 | in_txn(self.inner, None, false, move |txn| aliases(txn))?; 169 | let res = result 170 | .into_iter() 171 | .map(|(alias, cid)| { 172 | let cid = Cid::try_from(&cid)?; 173 | Ok((alias, cid)) 174 | }) 175 | .collect::>()?; 176 | Ok(res) 177 | } 178 | 179 | /// Put a block. This will only be completed once the transaction is successfully committed 180 | pub fn put_block(&mut self, block: Block, pin: Option<&mut TempPin>) -> Result<()> { 181 | let cid_bytes = CidBytes::try_from(block.cid())?; 182 | let mut links = Vec::new(); 183 | block.references(&mut links)?; 184 | let links = links 185 | .iter() 186 | .map(CidBytes::try_from) 187 | .collect::, cid::Error>>()?; 188 | let id = pin.as_ref().map(|p| p.id); 189 | let cid = *block.cid(); 190 | let len = block.data().len(); 191 | let (opt_id, res) = in_txn(self.inner, None, true, move |txn| { 192 | put_block(txn, &cid_bytes, block.data(), links.iter().copied(), id) 193 | })?; 194 | if let (Some(id), Some(pin)) = (opt_id, pin) { 195 | pin.id = id; 196 | } 197 | let write_info = WriteInfo::new(BlockInfo::new(res.id, &cid, len), res.block_exists); 198 | self.info.written.push(write_info); 199 | Ok(()) 200 | } 201 | 202 | /// Get a block 203 | pub fn get_block(&mut self, cid: &Cid) -> Result>> { 204 | let cid1 = *cid; 205 | let response = in_txn(self.inner, None, false, move |txn| { 206 | get_block(txn, &CidBytes::try_from(&cid1)?) 207 | })?; 208 | if let Some(info) = response 209 | .as_ref() 210 | .map(|(id, data)| BlockInfo::new(*id, cid, data.len())) 211 | { 212 | self.info.accessed.push(info); 213 | } 214 | Ok(response.map(|(_id, data)| data)) 215 | } 216 | 217 | /// Get the stats for the store. 218 | /// 219 | /// The stats are kept up to date, so this is fast. 220 | pub fn get_store_stats(&mut self) -> Result { 221 | in_txn(self.inner, None, false, get_store_stats) 222 | } 223 | 224 | /// Commit and consume the transaction. Default is to not commit. 225 | pub fn commit(mut self) -> Result<()> { 226 | self.info.committed = true; 227 | Ok(()) 228 | } 229 | } 230 | -------------------------------------------------------------------------------- /tables.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE cids; 2 | DROP TABLE refs; 3 | DROP TABLE blocks; 4 | DROP TABLE atime; 5 | PRAGMA foreign_keys = ON; 6 | 7 | CREATE TABLE IF NOT EXISTS cids ( 8 | id INTEGER PRIMARY KEY AUTOINCREMENT, 9 | cid BLOB UNIQUE 10 | ); 11 | 12 | CREATE TABLE IF NOT EXISTS refs ( 13 | parent_id INTEGER, 14 | child_id INTEGER, 15 | UNIQUE(parent_id,child_id) 16 | CONSTRAINT fk_parent_id 17 | FOREIGN KEY (parent_id) 18 | REFERENCES cids(id) 19 | ON DELETE CASCADE 20 | CONSTRAINT fk_child_id 21 | FOREIGN KEY (child_id) 22 | REFERENCES cids(id) 23 | ON DELETE CASCADE 24 | ); 25 | 26 | CREATE INDEX idx_refs_parent_id 27 | ON refs (parent_id); 28 | 29 | CREATE INDEX idx_refs_child_id 30 | ON refs (child_id); 31 | 32 | CREATE TABLE IF NOT EXISTS blocks ( 33 | block_id INTEGER PRIMARY_KEY, 34 | block BLOB UNIQUE, 35 | ); 36 | 37 | CREATE TABLE IF NOT EXISTS atime ( 38 | atime INTEGER PRIMARY KEY AUTOINCREMENT, 39 | block_id INTEGER UNIQUE, 40 | CONSTRAINT fk_block_id 41 | FOREIGN KEY (block_id) 42 | REFERENCES cids(id) 43 | ON DELETE CASCADE 44 | ); 45 | 46 | CREATE TABLE IF NOT EXISTS aliases ( 47 | name blob UNIQUE, 48 | block_id INTEGER, 49 | CONSTRAINT fk_block_id 50 | FOREIGN KEY (block_id) 51 | REFERENCES cids(id) 52 | ON DELETE CASCADE 53 | ); 54 | 55 | BEGIN TRANSACTION; 56 | -- note that we would have to use INSERT OR IGNORE here in a real database 57 | INSERT INTO cids (cid) VALUES ("cid_a"); 58 | INSERT INTO blocks (block_id, block) VALUES (last_insert_rowid(), "value_a"); 59 | COMMIT; 60 | 61 | BEGIN TRANSACTION; 62 | -- note that we would have to use INSERT OR IGNORE here in a real database 63 | INSERT INTO cids (cid) VALUES ("cid_b"); 64 | INSERT INTO blocks (block_id, block) VALUES (last_insert_rowid(), "value_b"); 65 | COMMIT; 66 | 67 | BEGIN TRANSACTION; 68 | -- note that we would have to use INSERT OR IGNORE here in a real database 69 | INSERT INTO cids (cid) VALUES ("cid_c"); 70 | INSERT INTO blocks (block_id, block) VALUES (last_insert_rowid(), "value_c"); 71 | COMMIT; 72 | 73 | BEGIN TRANSACTION; 74 | -- note that we would have to use INSERT OR IGNORE here in a real database 75 | INSERT INTO cids (cid) VALUES ("cid_d"); 76 | INSERT INTO blocks (block_id, block) VALUES (last_insert_rowid(), "value_d"); 77 | COMMIT; 78 | 79 | INSERT INTO atime (block_id) VALUES (1); 80 | INSERT INTO atime (block_id) VALUES (2); 81 | INSERT INTO atime (block_id) VALUES (3); 82 | INSERT INTO atime (block_id) VALUES (4); 83 | 84 | -- a is parent of b and c 85 | INSERT INTO refs (parent_id, child_id) VALUES (1,2); 86 | INSERT INTO refs (parent_id, child_id) VALUES (1,3); 87 | 88 | -- d is parent of b and c 89 | INSERT INTO refs (parent_id, child_id) VALUES (4,2); 90 | INSERT INTO refs (parent_id, child_id) VALUES (4,3); 91 | 92 | SELECT 93 | (SELECT COUNT(parent_id) FROM refs WHERE child_id = 1) + 94 | (SELECT COUNT(name) FROM aliases WHERE block_id = 1); 95 | 96 | DELETE FROM 97 | cids 98 | WHERE 99 | (NOT EXISTS(SELECT 1 FROM refs WHERE child_id = id)) AND 100 | (NOT EXISTS(SELECT 1 FROM aliases WHERE block_id = id)); 101 | 102 | DELETE FROM 103 | cids 104 | WHERE 105 | (NOT EXISTS(SELECT 1 FROM refs WHERE child_id = id)); 106 | 107 | INSERT INTO aliases (name, block_id) VALUES ("alias2", 4); 108 | INSERT INTO aliases (name, block_id) VALUES ("alias1", 1); 109 | 110 | WITH RECURSIVE 111 | ancestor_of(child_id) AS 112 | (SELECT parent_id FROM refs WHERE child_id=1 113 | UNION ALL 114 | SELECT parent_id FROM refs JOIN ancestor_of USING(child_id)) 115 | SELECT DISTINCT refs.parent_id FROM ancestor_of, refs; 116 | 117 | WITH RECURSIVE 118 | ancestor_of(id) AS 119 | ( 120 | SELECT parent_id FROM refs WHERE child_id=2 121 | UNION ALL 122 | SELECT DISTINCT parent_id FROM refs JOIN ancestor_of WHERE ancestor_of.id=refs.child_id 123 | ) 124 | SELECT id FROM ancestor_of; 125 | 126 | WITH RECURSIVE 127 | descendant_of(id) AS 128 | ( 129 | -- non recursive part - simply look up the immediate children 130 | SELECT child_id FROM refs WHERE parent_id=11121 131 | UNION ALL 132 | -- recursive part - look up parents of all returned ids 133 | SELECT DISTINCT child_id FROM refs JOIN descendant_of WHERE descendant_of.id=refs.parent_id 134 | ) 135 | SELECT id FROM descendant_of; 136 | 137 | WITH RECURSIVE 138 | descendant_of(id) AS 139 | ( 140 | -- non recursive part - simply look up the immediate children 141 | SELECT block_id FROM aliases 142 | UNION ALL 143 | -- recursive part - look up parents of all returned ids 144 | SELECT DISTINCT child_id FROM refs JOIN descendant_of WHERE descendant_of.id=refs.parent_id 145 | ) 146 | SELECT id FROM descendant_of; 147 | 148 | WITH RECURSIVE 149 | descendant_of(id) AS 150 | ( 151 | -- non recursive part - simply look up the immediate children 152 | SELECT block_id FROM aliases 153 | UNION ALL 154 | -- recursive part - look up parents of all returned ids 155 | SELECT DISTINCT child_id FROM refs JOIN descendant_of WHERE descendant_of.id=refs.parent_id 156 | ) 157 | DELETE FROM cids WHERE id NOT IN (SELECT id from descendant_of) LIMIT 1000; 158 | 159 | WITH RECURSIVE 160 | descendant_of(id) AS 161 | ( 162 | -- non recursive part - simply look up the immediate children 163 | SELECT child_id FROM refs WHERE parent_id=1121101 164 | UNION ALL 165 | -- recursive part - look up parents of all returned ids 166 | SELECT DISTINCT child_id FROM refs JOIN descendant_of WHERE descendant_of.id=refs.parent_id 167 | ) 168 | SELECT id FROM descendant_of; 169 | 170 | PRAGMA foreign_keys = ON; 171 | 172 | SELECT * FROM 173 | cids 174 | WHERE 175 | (NOT EXISTS(SELECT 1 FROM refs WHERE child_id = id)) AND 176 | (NOT EXISTS(SELECT 1 FROM aliases WHERE block_id = id)); 177 | 178 | DELETE FROM 179 | cids 180 | WHERE 181 | (NOT EXISTS(SELECT 1 FROM refs WHERE child_id = id)) AND 182 | (NOT EXISTS(SELECT 1 FROM aliases WHERE block_id = id)); 183 | LIMIT 1; 184 | 185 | WITH RECURSIVE 186 | descendant_of(id) AS ( 187 | -- non recursive part - simply look up the immediate children 188 | SELECT 1099989 189 | UNION ALL 190 | -- recursive part - look up parents of all returned ids 191 | SELECT DISTINCT child_id FROM refs JOIN descendant_of WHERE descendant_of.id=refs.parent_id 192 | ), 193 | orphaned_ids as ( 194 | SELECT id FROM descendant_of LEFT JOIN blocks ON descendant_of.id = blocks.block_id WHERE blocks.block_id IS NULL 195 | ) 196 | SELECT cid from cids,orphaned_ids WHERE cids.id = orphaned_ids.id; 197 | 198 | WITH RECURSIVE 199 | descendant_of(id) AS 200 | ( 201 | -- non recursive part - simply look up the immediate children 202 | SELECT block_id FROM aliases 203 | UNION ALL 204 | -- recursive part - look up parents of all returned ids 205 | SELECT DISTINCT child_id FROM refs JOIN descendant_of WHERE descendant_of.id=refs.parent_id 206 | ) 207 | SELECT id FROM 208 | cids 209 | WHERE 210 | id NOT IN (SELECT id FROM descendant_of) AND 211 | (SELECT atime FROM atime WHERE atime.block_id = id) < 1000000; 212 | 213 | 214 | EXPLAIN QUERY PLAN WITH RECURSIVE 215 | -- find descendants of cid, including the id of the cid itself 216 | descendant_of(id) AS ( 217 | SELECT 11111 218 | UNION ALL 219 | SELECT DISTINCT child_id FROM refs JOIN descendant_of ON descendant_of.id=refs.parent_id 220 | ), 221 | -- find orphaned ids 222 | orphaned_ids as ( 223 | SELECT DISTINCT id FROM descendant_of LEFT JOIN blocks ON descendant_of.id = blocks.block_id WHERE blocks.block_id IS NULL 224 | ) 225 | -- retrieve corresponding cids - this is a set because of select distinct 226 | SELECT cid from cids JOIN orphaned_ids ON cids.id = orphaned_ids.id; 227 | 228 | EXPLAIN QUERY PLAN WITH RECURSIVE 229 | descendant_of(id) AS 230 | ( 231 | SELECT 11111 232 | UNION ALL 233 | SELECT DISTINCT child_id FROM refs JOIN descendant_of ON descendant_of.id=refs.parent_id 234 | ), 235 | descendant_ids as ( 236 | SELECT DISTINCT id FROM descendant_of 237 | ) 238 | SELECT * FROM descendant_ids; -------------------------------------------------------------------------------- /test-data/broken.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Actyx/ipfs-sqlite-block-store/8b54ee3c34d41bd790309229bdb70ba271540d32/test-data/broken.sqlite -------------------------------------------------------------------------------- /test-data/mini.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Actyx/ipfs-sqlite-block-store/8b54ee3c34d41bd790309229bdb70ba271540d32/test-data/mini.sqlite -------------------------------------------------------------------------------- /tests/stress.rs: -------------------------------------------------------------------------------- 1 | use ipfs_sqlite_block_store::{Config, DbPath}; 2 | use itertools::Itertools; 3 | use libipld::{cbor::DagCborCodec, codec::Codec, Cid, DagCbor}; 4 | use maplit::hashset; 5 | use multihash::{Code, MultihashDigest}; 6 | use std::{ 7 | collections::HashSet, 8 | sync::{ 9 | atomic::{AtomicBool, Ordering}, 10 | Arc, 11 | }, 12 | time::Duration, 13 | }; 14 | use tracing_subscriber::{fmt::format::FmtSpan, EnvFilter}; 15 | 16 | type Block = libipld::Block; 17 | type BlockStore = ipfs_sqlite_block_store::BlockStore; 18 | 19 | #[test] 20 | fn main() -> anyhow::Result<()> { 21 | let subscriber = tracing_subscriber::FmtSubscriber::builder() 22 | .with_span_events(FmtSpan::ENTER | FmtSpan::CLOSE) 23 | .with_env_filter(EnvFilter::from_default_env()) 24 | .with_writer(std::io::stderr) 25 | .finish(); 26 | tracing::subscriber::set_global_default(subscriber).unwrap(); 27 | 28 | const STREAMS: usize = 5; 29 | const ROUNDS: usize = 200; 30 | 31 | const TARGET_SIZE: u64 = 10_000; 32 | const TARGET_COUNT: u64 = 1_000; 33 | 34 | let dir = tempfile::tempdir().unwrap(); 35 | let db_path = dir.path().join("db"); 36 | 37 | let mut store = BlockStore::open_path( 38 | DbPath::File(db_path), 39 | Config::default().with_size_targets(TARGET_COUNT, TARGET_SIZE), 40 | ) 41 | .unwrap(); 42 | 43 | let stopped = Arc::new(AtomicBool::new(false)); 44 | let handle = std::thread::spawn({ 45 | let stopped = stopped.clone(); 46 | let mut store = store.additional_connection().unwrap(); 47 | // this thread is responsible for continuously GC’ing to disturb things 48 | move || { 49 | while !stopped.load(Ordering::Acquire) { 50 | eprintln!("gc"); 51 | store.gc().unwrap(); 52 | std::thread::sleep(Duration::from_millis(5)); 53 | } 54 | } 55 | }); 56 | 57 | let mut trees = (0..STREAMS) 58 | .map(|r| { 59 | let mut store = store.additional_connection().unwrap(); 60 | (0..ROUNDS) 61 | .map(move |i| block(format!("block-{}-{}", r, i).as_str())) 62 | .tuple_windows() 63 | .enumerate() 64 | .map(move |(i, (a, b, c, d, e))| { 65 | let mut pin = store.temp_pin(); 66 | let root = links("root", vec![&a, &b, &c, &d, &e]); 67 | store 68 | .put_blocks(vec![a, b, c, d, e], Some(&mut pin)) 69 | .unwrap(); 70 | let cid = *root.cid(); 71 | store.put_block(root, Some(&mut pin)).unwrap(); 72 | store 73 | .alias(format!("theRoot-{}", r).as_bytes(), Some(&cid)) 74 | .unwrap(); 75 | i 76 | }) 77 | }) 78 | .collect::>(); 79 | 80 | 'a: loop { 81 | for (count, x) in trees.iter_mut().enumerate() { 82 | std::thread::sleep(Duration::from_millis(1)); 83 | if let Some(i) = x.next() { 84 | println!("loop {}", i); 85 | for r in 0..STREAMS { 86 | if count < r { 87 | continue; 88 | } 89 | let cid = store 90 | .resolve(format!("theRoot-{}", r).as_bytes()) 91 | .unwrap() 92 | .unwrap(); 93 | let root: Node = DagCborCodec 94 | .decode(store.get_block(&cid).unwrap().unwrap().as_slice()) 95 | .unwrap(); 96 | let mut cids = hashset! {cid}; 97 | cids.extend(root.links.iter().copied()); 98 | assert_eq!(store.get_descendants::>(&cid).unwrap(), cids); 99 | for (idx, cid) in root.links.iter().enumerate() { 100 | let b: Node = DagCborCodec 101 | .decode(store.get_block(cid).unwrap().unwrap().as_slice()) 102 | .unwrap(); 103 | assert_eq!(Node::leaf(&*format!("block-{}-{}", r, i + idx)), b); 104 | } 105 | } 106 | } else { 107 | break 'a; 108 | } 109 | } 110 | } 111 | stopped.store(true, Ordering::Release); 112 | handle.join().unwrap(); 113 | 114 | let stats = store.get_store_stats().unwrap(); 115 | println!("stats {:?}", stats); 116 | assert!(stats.count() < 2 * TARGET_COUNT); 117 | assert!(stats.size() < 2 * TARGET_SIZE); 118 | 119 | store.gc().unwrap(); 120 | 121 | let stats = store.get_store_stats().unwrap(); 122 | println!("stats {:?}", stats); 123 | assert!(stats.count() < TARGET_COUNT); 124 | assert!(stats.size() < TARGET_SIZE); 125 | 126 | Ok(()) 127 | } 128 | 129 | #[derive(Debug, DagCbor, PartialEq)] 130 | struct Node { 131 | links: Vec, 132 | text: String, 133 | } 134 | 135 | impl Node { 136 | pub fn leaf(text: &str) -> Self { 137 | Self { 138 | links: Vec::new(), 139 | text: text.into(), 140 | } 141 | } 142 | 143 | pub fn branch(text: &str, links: impl IntoIterator) -> Self { 144 | Self { 145 | links: links.into_iter().collect(), 146 | text: text.into(), 147 | } 148 | } 149 | } 150 | 151 | /// creates a simple leaf block 152 | fn block(name: &str) -> Block { 153 | let ipld = Node::leaf(name); 154 | let bytes = DagCborCodec.encode(&ipld).unwrap(); 155 | let hash = Code::Sha2_256.digest(&bytes); 156 | // https://github.com/multiformats/multicodec/blob/master/table.csv 157 | Block::new_unchecked(Cid::new_v1(0x71, hash), bytes) 158 | } 159 | 160 | /// creates a block with some links 161 | fn links(name: &str, children: Vec<&Block>) -> Block { 162 | let ipld = Node::branch(name, children.iter().map(|b| *b.cid()).collect::>()); 163 | let bytes = DagCborCodec.encode(&ipld).unwrap(); 164 | let hash = Code::Sha2_256.digest(&bytes); 165 | // https://github.com/multiformats/multicodec/blob/master/table.csv 166 | Block::new_unchecked(Cid::new_v1(0x71, hash), bytes) 167 | } 168 | --------------------------------------------------------------------------------