├── .github └── workflows │ └── rust.yml ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── command-not-found.nu ├── command-not-found.sh ├── default.nix ├── examples └── nix-index-debug.rs ├── flake.lock ├── flake.nix ├── recover.py ├── rustfmt.toml └── src ├── bin ├── nix-channel-index.rs ├── nix-index.rs └── nix-locate.rs ├── database.rs ├── errors.rs ├── files.rs ├── frcode.rs ├── hydra.rs ├── lib.rs ├── listings.rs ├── nixpkgs.rs ├── package.rs ├── util.rs └── workset.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | branches: [master] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | matrix: 17 | os: [macos-latest, ubuntu-latest] 18 | 19 | steps: 20 | - uses: actions/checkout@v3 21 | - name: Build 22 | run: cargo build --verbose 23 | - name: Run tests 24 | run: cargo test --verbose 25 | 26 | format: 27 | runs-on: ubuntu-latest 28 | steps: 29 | - uses: actions/checkout@v3 30 | - name: Cargo fmt 31 | run: | 32 | rustup toolchain install nightly --profile minimal -c rustfmt 33 | cargo +nightly fmt --check 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | /data 3 | /result 4 | /target-* 5 | .direnv/ 6 | .envrc 7 | *.sqlite 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - stable 4 | - beta 5 | - nightly 6 | matrix: 7 | allow_failures: 8 | - rust: nightly 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.1.9 - [Unreleased] 2 | ### Added 3 | ### Fixed 4 | ### Changed 5 | 6 | ## 0.1.8 7 | ### Added 8 | 9 | * use HTTPS for cache.nixos.org (#246) 10 | * client now use reqwest to obtain HTTPS, content decoding, timeouts and more for free (#247) 11 | 12 | ### Fixed 13 | 14 | * nix-community buildbot CI builds all attributes out of the box (#240) 15 | 16 | ### Changed 17 | 18 | ### Removed 19 | 20 | ## 0.1.7 21 | ### Added 22 | * improve help message for `nix-locate --type` (issue #204) 23 | * improve error message when `nix-env` is killed by a signal 24 | ### Fixed 25 | * disable nixpkgs overlays (issue #161) 26 | * fix Nix command suggestions for command-not-found.sh (issue #185) 27 | ### Changed 28 | * update dependencies 29 | ### Removed 30 | 31 | ## 0.1.6 32 | ### Added 33 | * support setting `NIX_INDEX_DATABASE` environment variable to set database location (#213), thanks @mmarx 34 | * this version and future versions will be published to https://crates.io, simplifying library usage in other projects 35 | ### Fixed 36 | ### Changed 37 | ### Removed 38 | 39 | ## 0.1.5 40 | ### Added 41 | ### Fixed 42 | * fix crash when using wildcard pattern with nix-locate (issue #205) 43 | ### Changed 44 | ### Removed 45 | 46 | ## 0.1.4 - 2023-01-13 47 | ### Added 48 | ### Fixed 49 | * fix RUSTSEC-2021-0131 (integer overflow in brotli) by migrating away from `brotli2` crate 50 | * fix RUSTSEC-2022-0006 (data race in `thread_local`) by updating `thread_local` 51 | * fix panic when using `--type` CLI (issue #202) 52 | ### Changed 53 | * update all dependencies in Cargo.lock 54 | 55 | ### 0.1.3 - 2023-01-10 56 | ### Added 57 | * flake.nix added to repository, allows directly running nix-index from git (#162), thanks @matthewbauer 58 | * support for proxies (#132), thanks @whizsid 59 | * command-not-found.sh suggests new `nix profile` command if manifest.json exists (#135), thanks @matthewbauer 60 | * support building project via Nix on Darwin (#175), thanks @BrianHicks 61 | * indexer supports prefix filtering (#177), rhanks @virchau13 62 | * command-line option to specify system for which to build the index (#183), thanks @usertam 63 | * nix-channel-index: new command to build a programs.sqlite as currently distributed with nix channels (#192), thanks @K900 64 | ### Fixed 65 | * command-not-found.sh never accesses undefined variables anymore (allows set -u) (#123), thanks @matthewbauer 66 | * support xlibs renamed to xorg in recent nixpkgs (#179), thanks @cole-h 67 | ### Changed 68 | * rust dependencies updated to latest versions, thanks @elude03, @berbiche, @Sciecentistguy, @Mic92 69 | * nix-env is now invoked in parallel to query paths (improves performance) 70 | * performance improvement: multithread compression (#152), thanks @enolan 71 | * performance improvement: reduce compression level from 22 to 19 (#152), thanks @enolan 72 | * performance improvement: get store paths from nix-env in parallel (#152), thanks @enolan 73 | 74 | ## 0.1.2 - 2018-09-18 75 | ### Added 76 | ### Fixed 77 | * don't stop when a single request fails (thanks @jameysharp) 78 | ### Changed 79 | ### Removed 80 | 81 | ## 0.1.1 - 2018-01-26 82 | ### Added 83 | * `--show-trace` command line option 84 | ### Fixed 85 | ### Changed 86 | ### Removed 87 | 88 | ## 0.1.0 - 2017-07-22 89 | ### Added 90 | * Initial release 91 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | description = "Nix (package manager) indexing primitives" 3 | authors = ["Benno Fünfstück "] 4 | edition = "2021" 5 | name = "nix-index" 6 | version = "0.1.9" 7 | license = "BSD-3-Clause" 8 | homepage = "https://github.com/nix-community/nix-index" 9 | repository = "https://github.com/nix-community/nix-index" 10 | readme = "README.md" 11 | keywords = [ "nixpkgs", "nixos", "nix", "indexing" ] 12 | include = [ 13 | "examples/", 14 | "src/*.rs", 15 | "src/bin/*.rs", 16 | "Cargo.toml", 17 | "README.md", 18 | "LICENSE" 19 | ] 20 | 21 | [[bin]] 22 | doc = false 23 | name = "nix-index" 24 | 25 | [[bin]] 26 | name = "nix-locate" 27 | 28 | [dependencies] 29 | bincode = "1.3.3" 30 | byteorder = "1.5.0" 31 | error-chain = "0.12.4" 32 | futures = "0.3.30" 33 | grep = "0.3.1" 34 | atty = "0.2.14" 35 | memchr = "2.7.2" 36 | num_cpus = "1.16.0" 37 | indexmap = "2.2.6" 38 | owo-colors = { version = "4.0.0", features = ["supports-colors"] } 39 | rayon = "1.10.0" 40 | regex = "1.10.4" 41 | regex-syntax = "0.7.4" 42 | reqwest = { version = "0.12.3", features = [ "brotli" ] } 43 | separator = "0.4.1" 44 | serde = { version = "1.0.198", features = [ "derive" ] } 45 | serde_bytes = "0.11.14" 46 | serde_json = "1.0.116" 47 | tokio-retry = "0.3.0" 48 | xdg = "2.5.2" 49 | xml-rs = "0.8.20" 50 | xz2 = "0.1.7" 51 | zstd = { version = "0.12.4", features = [ "zstdmt" ] } 52 | 53 | [dependencies.hyper] 54 | features = ["client", "http1", "http2", "runtime", "stream"] 55 | version = "0.14.27" 56 | 57 | 58 | [dependencies.tokio] 59 | features = ["full"] 60 | version = "1.32.0" 61 | 62 | [dependencies.clap] 63 | version = "4.3.24" 64 | features = ["derive", "env"] 65 | 66 | [dependencies.rusqlite] 67 | features = ["backup"] 68 | version = "0.31.0" 69 | 70 | [[example]] 71 | name = "nix-index-debug" 72 | 73 | [profile] 74 | [profile.release] 75 | debug = true 76 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017 Benno Fünfstück 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the author nor the names of his contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 21 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR 24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nix-index 2 | ## A files database for nixpkgs 3 | **nix-index** is a tool to quickly locate the package providing a certain file in [`nixpkgs`](https://github.com/NixOS/nixpkgs). It indexes built derivations found in binary caches. 4 | 5 | ###### Demo 6 | 7 | ``` 8 | $ nix-locate 'bin/hello' 9 | hello.out 29,488 x /nix/store/bdjyhh70npndlq3rzmggh4f2dzdsj4xy-hello-2.10/bin/hello 10 | linuxPackages_4_4.dpdk.examples 2,022,224 x /nix/store/jlnk3d38zsk0bp02rp9skpqk4vjfijnn-dpdk-16.07.2-4.4.52-examples/bin/helloworld 11 | linuxPackages.dpdk.examples 2,022,224 x /nix/store/rzx4k0pb58gd1dr9kzwam3vk9r8bfyv1-dpdk-16.07.2-4.9.13-examples/bin/helloworld 12 | linuxPackages_4_10.dpdk.examples 2,022,224 x /nix/store/wya1b0910qidfc9v3i6r9rnbnc9ykkwq-dpdk-16.07.2-4.10.1-examples/bin/helloworld 13 | linuxPackages_grsec_nixos.dpdk.examples 2,022,224 x /nix/store/2wqv94290pa38aclld7sc548a7hnz35k-dpdk-16.07.2-4.9.13-examples/bin/helloworld 14 | camlistore.out 7,938,952 x /nix/store/xn5ivjdyslxldhm5cb4x0lfz48zf21rl-camlistore-0.9/bin/hello 15 | ``` 16 | ## Installation 17 | 18 | ### Flakes 19 | 20 | 1. create the database: 21 | 22 | ``` 23 | $ nix run github:nix-community/nix-index#nix-index 24 | ``` 25 | 26 | 2. query for a file: 27 | 28 | ``` 29 | $ nix run github:nix-community/nix-index#nix-locate -- bin/hello 30 | ``` 31 | 32 | ### Latest Git version 33 | 34 | To install the latest development version of nix-index, simply clone the repo and run `nix-env -if.`: 35 | 36 | ``` 37 | $ git clone https://github.com/nix-community/nix-index 38 | $ cd nix-index 39 | $ nix-env -if. 40 | ``` 41 | 42 | ### Stable 43 | 44 | For the stable version, you can either [checkout](https://git-scm.com/docs/git-checkout) the latest [tag](https://git-scm.com/docs/git-tag) (see the list [here](https://github.com/nix-community/nix-index/tags)) or use Nixpkgs' repositories' and install it with: 45 | 46 | ``` 47 | $ nix-env -iA nixos.nix-index 48 | ``` 49 | 50 | ## Usage 51 | First, you need to generate an index by running `nix-index` (it takes around 5 minutes) . Then, you can use `nix-locate pattern`. For more information, see `nix-locate --help` and `nix-index --help`. 52 | 53 | ### Use pre-generated database 54 | 55 | [nix-index-database](https://github.com/Mic92/nix-index-database) provides pre-generated databases if you don't want to generate a database locally. 56 | It also comes with nixos/home-manager modules to use those databases. 57 | 58 | ### Usage as a command-not-found replacement 59 | 60 | Nix-index provides a "command-not-found" script that can print for you the attribute path of unfound commands in your shell. You can either source `${pkgs.nix-index}/etc/command-not-found.sh` in your own shell init files (works for ZSH and Bash for as far as we know) or you can use the following in home-manager / `/etc/nixos/configuration.nix`: 61 | 62 | ```nix 63 | programs.command-not-found.enable = false; 64 | # for home-manager, use programs.bash.initExtra instead 65 | programs.bash.interactiveShellInit = '' 66 | source ${pkgs.nix-index}/etc/profile.d/command-not-found.sh 67 | ''; 68 | ``` 69 | 70 | Replace `bash` with `zsh` if you use `zsh`. 71 | 72 | Example output: 73 | 74 | ``` 75 | $ blender 76 | The program 'blender' is currently not installed. You can install it 77 | by typing: 78 | nix-env -iA nixpkgs.blender.out 79 | 80 | Or run it once with: 81 | nix-shell -p blender.out --run ... 82 | ``` 83 | 84 | A [`home-manager` module](https://nix-community.github.io/home-manager/options.html#opt-programs.nix-index.enable) is now available to integrate `nix-index` with `bash`, `zsh`, and `fish` using this script. 85 | 86 | You can also use `command-not-found.nu` as a Nushell hook by adding the 87 | following to your Nushell config: 88 | 89 | ```nix 90 | programs.nushell = { 91 | enable = true; 92 | extraConfig = '' 93 | $env.config.hooks.command_not_found = source ${pkgs.nix-index}/etc/profile.d/command-not-found.nu 94 | ''; 95 | }; 96 | ``` 97 | 98 | ## Contributing 99 | If you find any missing features that you would like to implement, I'm very happy about any PRs! You can also create an issue first if the feature is more complex so we can discuss possible implementations. 100 | 101 | Here is a quick description of all relevant files: 102 | 103 | * `bin/{nix-index, nix-locate}.rs`: Implementation of the nix-index / nix-locate command line tools 104 | * `src/database.rs`: High-level functions for working with the database format 105 | * `src/files.rs`: The data types for working with file listings 106 | * `src/frcode.rs`: Low-level implementation of an encoder to efficiently store many file paths (see comments in the file for more details). Used by `database.rs`. 107 | * `src/hydra.rs`: Deals with everything that has to do with downloading from the binary cache (fetching file listings and references) 108 | * `src/nixpkgs.rs`: Implements the gathering of the packages (store paths and attributes) using `nix-env` 109 | * `src/package.rs`: High-level data types for representing store paths (sometimes also refered to as a package) 110 | * `src/workset.rs`: A queue used by `nix-index` to implement the recursive fetching (fetching references of everything) 111 | -------------------------------------------------------------------------------- /command-not-found.nu: -------------------------------------------------------------------------------- 1 | { |cmd_name| 2 | let install = { |pkgs| 3 | $pkgs | each {|pkg| $" nix shell nixpkgs#($pkg)" } 4 | } 5 | let run_once = { |pkgs| 6 | $pkgs | each {|pkg| $" nix shell nixpkgs#($pkg) --command '($cmd_name) ...'" } 7 | } 8 | let single_pkg = { |pkg| 9 | let lines = [ 10 | $"The program '($cmd_name)' is currently not installed." 11 | "" 12 | "You can install it by typing:" 13 | (do $install [$pkg] | get 0) 14 | "" 15 | "Or run it once with:" 16 | (do $run_once [$pkg] | get 0) 17 | ] 18 | $lines | str join "\n" 19 | } 20 | let multiple_pkgs = { |pkgs| 21 | let lines = [ 22 | $"The program '($cmd_name)' is currently not installed. It is provided by several packages." 23 | "" 24 | "You can install it by typing one of the following:" 25 | (do $install $pkgs | str join "\n") 26 | "" 27 | "Or run it once with:" 28 | (do $run_once $pkgs | str join "\n") 29 | ] 30 | $lines | str join "\n" 31 | } 32 | let pkgs = (@out@/bin/nix-locate --minimal --no-group --type x --type s --top-level --whole-name --at-root $"/bin/($cmd_name)" | lines) 33 | let len = ($pkgs | length) 34 | let ret = match $len { 35 | 0 => null, 36 | 1 => (do $single_pkg ($pkgs | get 0)), 37 | _ => (do $multiple_pkgs $pkgs), 38 | } 39 | return $ret 40 | } 41 | -------------------------------------------------------------------------------- /command-not-found.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # for bash 4 4 | # this will be called when a command is entered 5 | # but not found in the user’s path + environment 6 | command_not_found_handle () { 7 | 8 | # TODO: use "command not found" gettext translations 9 | 10 | # taken from http://www.linuxjournal.com/content/bash-command-not-found 11 | # - do not run when inside Midnight Commander or within a Pipe 12 | if [ -n "${MC_SID-}" ] || ! [ -t 1 ]; then 13 | >&2 echo "$1: command not found" 14 | return 127 15 | fi 16 | 17 | toplevel=nixpkgs # nixpkgs should always be available even in NixOS 18 | cmd=$1 19 | attrs=$(@out@/bin/nix-locate --minimal --no-group --type x --type s --top-level --whole-name --at-root "/bin/$cmd") 20 | len=$(echo -n "$attrs" | grep -c "^") 21 | 22 | case $len in 23 | 0) 24 | >&2 echo "$cmd: command not found" 25 | ;; 26 | 1) 27 | # if only 1 package provides this, then we can invoke it 28 | # without asking the users if they have opted in with one 29 | # of 2 environment variables 30 | 31 | # they are based on the ones found in 32 | # command-not-found.sh: 33 | 34 | # NIX_AUTO_INSTALL : install the missing command into the 35 | # user’s environment 36 | # NIX_AUTO_RUN : run the command transparently inside of 37 | # nix shell 38 | 39 | # these will not return 127 if they worked correctly 40 | 41 | if ! [ -z "${NIX_AUTO_INSTALL-}" ]; then 42 | >&2 cat <&2 cat <" 62 | if [ "$?" -eq 0 ]; then 63 | # how nix-shell handles commands is weird 64 | # $(echo $@) is need to handle this 65 | nix-shell -p $attrs --run "$(echo $@)" 66 | return $? 67 | else 68 | >&2 cat <&2 cat <&2 cat <&2 cat <&2 echo " nix profile install $toplevel#$attr" 106 | else 107 | >&2 echo " nix-env -iA $toplevel.$attr" 108 | fi 109 | done <<< "$attrs" 110 | 111 | >&2 cat <&2 echo " nix shell $toplevel#$attr -c $cmd ..." 119 | else 120 | >&2 echo " nix-shell -p $attr --run '$cmd ...'" 121 | fi 122 | done <<< "$attrs" 123 | ;; 124 | esac 125 | 126 | return 127 # command not found should always exit with 127 127 | } 128 | 129 | # for zsh... 130 | # we just pass it to the bash handler above 131 | # apparently they work identically 132 | command_not_found_handler () { 133 | command_not_found_handle $@ 134 | return $? 135 | } 136 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | # This file is the compt layer of flakes: https://github.com/edolstra/flake-compat 2 | # See flake.nix for details 3 | (import ( 4 | let 5 | lock = builtins.fromJSON (builtins.readFile ./flake.lock); 6 | in fetchTarball { 7 | url = "https://github.com/edolstra/flake-compat/archive/${lock.nodes.flake-compat.locked.rev}.tar.gz"; 8 | sha256 = lock.nodes.flake-compat.locked.narHash; } 9 | ) { 10 | src = ./.; 11 | }).defaultNix 12 | -------------------------------------------------------------------------------- /examples/nix-index-debug.rs: -------------------------------------------------------------------------------- 1 | extern crate nix_index; 2 | 3 | use nix_index::database::Reader; 4 | 5 | fn main() { 6 | let f = std::env::args().nth(1).expect("file name given as 1st arg"); 7 | let mut db = Reader::open(f).unwrap(); 8 | db.dump().unwrap(); 9 | } 10 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "flake-compat": { 4 | "flake": false, 5 | "locked": { 6 | "lastModified": 1696426674, 7 | "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", 8 | "owner": "edolstra", 9 | "repo": "flake-compat", 10 | "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", 11 | "type": "github" 12 | }, 13 | "original": { 14 | "owner": "edolstra", 15 | "repo": "flake-compat", 16 | "type": "github" 17 | } 18 | }, 19 | "nixpkgs": { 20 | "locked": { 21 | "lastModified": 1713248628, 22 | "narHash": "sha256-NLznXB5AOnniUtZsyy/aPWOk8ussTuePp2acb9U+ISA=", 23 | "owner": "NixOS", 24 | "repo": "nixpkgs", 25 | "rev": "5672bc9dbf9d88246ddab5ac454e82318d094bb8", 26 | "type": "github" 27 | }, 28 | "original": { 29 | "id": "nixpkgs", 30 | "ref": "nixos-unstable", 31 | "type": "indirect" 32 | } 33 | }, 34 | "root": { 35 | "inputs": { 36 | "flake-compat": "flake-compat", 37 | "nixpkgs": "nixpkgs" 38 | } 39 | } 40 | }, 41 | "root": "root", 42 | "version": 7 43 | } 44 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "A files database for nixpkgs"; 3 | 4 | inputs = { 5 | nixpkgs.url = "nixpkgs/nixos-unstable"; 6 | flake-compat = { 7 | url = "github:edolstra/flake-compat"; 8 | flake = false; 9 | }; 10 | }; 11 | 12 | outputs = { self, nixpkgs, flake-compat }: 13 | let 14 | inherit (nixpkgs) lib; 15 | systems = [ "x86_64-linux" "x86_64-darwin" "aarch64-darwin" "aarch64-linux" ]; 16 | forAllSystems = lib.genAttrs systems; 17 | nixpkgsFor = nixpkgs.legacyPackages; 18 | in 19 | { 20 | packages = forAllSystems (system: { 21 | default = with nixpkgsFor.${system}; rustPlatform.buildRustPackage { 22 | pname = "nix-index"; 23 | inherit ((lib.importTOML ./Cargo.toml).package) version; 24 | 25 | src = lib.sourceByRegex self [ 26 | "(examples|src)(/.*)?" 27 | ''Cargo\.(toml|lock)'' 28 | ''command-not-found\.sh'' 29 | ''command-not-found\.nu'' 30 | ]; 31 | 32 | cargoLock = { 33 | lockFile = ./Cargo.lock; 34 | }; 35 | 36 | nativeBuildInputs = [ pkg-config ]; 37 | buildInputs = [ openssl curl sqlite ] 38 | ++ lib.optionals stdenv.isDarwin [ darwin.apple_sdk.frameworks.Security ]; 39 | 40 | postInstall = '' 41 | substituteInPlace command-not-found.sh \ 42 | --subst-var out 43 | install -Dm555 command-not-found.sh -t $out/etc/profile.d 44 | substituteInPlace command-not-found.nu \ 45 | --subst-var out 46 | install -Dm555 command-not-found.nu -t $out/etc/profile.d 47 | ''; 48 | 49 | meta = with lib; { 50 | description = "A files database for nixpkgs"; 51 | homepage = "https://github.com/nix-community/nix-index"; 52 | license = with licenses; [ bsd3 ]; 53 | maintainers = [ maintainers.bennofs ]; 54 | }; 55 | }; 56 | }); 57 | 58 | checks = forAllSystems (system: 59 | let 60 | packages = lib.mapAttrs' (n: lib.nameValuePair "package-${n}") self.packages.${system}; 61 | devShells = lib.mapAttrs' (n: lib.nameValuePair "devShell-${n}") self.devShells.${system}; 62 | in packages // devShells 63 | ); 64 | 65 | devShells = forAllSystems (system: { 66 | minimal = with nixpkgsFor.${system}; mkShell { 67 | name = "nix-index"; 68 | 69 | nativeBuildInputs = [ 70 | pkg-config 71 | ]; 72 | 73 | buildInputs = [ 74 | openssl 75 | sqlite 76 | ] ++ lib.optionals stdenv.isDarwin [ 77 | darwin.apple_sdk.frameworks.Security 78 | ]; 79 | 80 | env.LD_LIBRARY_PATH = lib.makeLibraryPath [ openssl ]; 81 | }; 82 | 83 | default = with nixpkgsFor.${system}; mkShell { 84 | name = "nix-index"; 85 | 86 | inputsFrom = [ self.devShells.${system}.minimal ]; 87 | 88 | nativeBuildInputs = [ rustc cargo clippy rustfmt ]; 89 | 90 | env = { 91 | LD_LIBRARY_PATH = lib.makeLibraryPath [ openssl ]; 92 | RUST_SRC_PATH = rustPlatform.rustLibSrc; 93 | }; 94 | }; 95 | }); 96 | 97 | apps = forAllSystems (system: { 98 | nix-index = { 99 | type = "app"; 100 | program = "${self.packages.${system}.default}/bin/nix-index"; 101 | }; 102 | nix-locate = { 103 | type = "app"; 104 | program = "${self.packages.${system}.default}/bin/nix-locate"; 105 | }; 106 | default = self.apps.${system}.nix-locate; 107 | }); 108 | }; 109 | } 110 | -------------------------------------------------------------------------------- /recover.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import json 4 | 5 | 6 | CHUNK_SIZE = 4*32*1024 7 | 8 | 9 | def wrong_written_size(x): 10 | out = 0 11 | while x >= 0: 12 | out += x 13 | x -= CHUNK_SIZE 14 | return out 15 | 16 | 17 | if __name__ == '__main__': 18 | with open(sys.argv[1], 'rb') as f: 19 | data = f.read() 20 | 21 | print(sys.argv[1]) 22 | try: 23 | json.loads(data) 24 | except json.JSONDecodeError as e: 25 | exc = e 26 | for margin in range(10): 27 | if len(data) == wrong_written_size(e.pos + margin): 28 | print(margin, exc, len(data), e.pos, data[e.pos:][:10], data[:10]) 29 | sys.exit(0) 30 | 31 | sys.exit(1) 32 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | unstable_features = true 2 | 3 | group_imports = "StdExternalCrate" 4 | newline_style = "Unix" 5 | reorder_impl_items = true 6 | use_field_init_shorthand = true 7 | use_try_shorthand = true 8 | -------------------------------------------------------------------------------- /src/bin/nix-channel-index.rs: -------------------------------------------------------------------------------- 1 | //! Toor for generating a nix-index database. 2 | use std::ffi::OsString; 3 | use std::io::{self, Write}; 4 | use std::os::unix::ffi::OsStringExt; 5 | use std::path::PathBuf; 6 | use std::process; 7 | 8 | use clap::Parser; 9 | use error_chain::ChainedError; 10 | use futures::{future, StreamExt}; 11 | use nix_index::files::FileNode; 12 | use nix_index::hydra::Fetcher; 13 | use nix_index::listings::fetch_listings; 14 | use nix_index::{errors::*, CACHE_URL}; 15 | use rusqlite::{Connection, DatabaseName}; 16 | 17 | /// The main function of this module: creates a new command-not-found database. 18 | async fn update_index(args: &Args) -> Result<()> { 19 | let fetcher = Fetcher::new(CACHE_URL.to_string()).map_err(ErrorKind::ParseProxy)?; 20 | let connection = 21 | Connection::open_in_memory().map_err(|_| ErrorKind::CreateDatabase(args.output.clone()))?; 22 | 23 | connection 24 | .execute( 25 | r#" 26 | create table Programs ( 27 | name text not null, 28 | system text not null, 29 | package text not null, 30 | primary key (name, system, package) 31 | ); 32 | "#, 33 | (), 34 | ) 35 | .map_err(|_| ErrorKind::CreateDatabase(args.output.clone()))?; 36 | 37 | let debug_connection = Connection::open_in_memory() 38 | .map_err(|_| ErrorKind::CreateDatabase(args.debug_output.clone()))?; 39 | debug_connection 40 | .execute( 41 | r#" 42 | create table DebugInfo ( 43 | build_id text unique not null, 44 | url text not null, 45 | filename text not null, 46 | primary key (build_id) 47 | ); 48 | "#, 49 | (), 50 | ) 51 | .map_err(|_| ErrorKind::CreateDatabase(args.debug_output.clone()))?; 52 | 53 | let systems = match &args.systems { 54 | Some(systems) => systems.iter().map(|x| Some(x.as_str())).collect(), 55 | None => vec![None], 56 | }; 57 | 58 | eprint!("+ querying available packages"); 59 | let (files, watch) = 60 | fetch_listings(&fetcher, args.jobs, &args.nixpkgs, systems, args.show_trace)?; 61 | 62 | // Treat request errors as if the file list were missing 63 | let files = files.map(|r| { 64 | r.unwrap_or_else(|e| { 65 | eprint!("\n{}", e.display_chain()); 66 | None 67 | }) 68 | }); 69 | 70 | // Add progress output 71 | let (mut indexed, mut missing) = (0, 0); 72 | let files = files.inspect(|entry| { 73 | if entry.is_some() { 74 | indexed += 1; 75 | } else { 76 | missing += 1; 77 | }; 78 | 79 | eprint!("+ generating index: {:05} paths found :: {:05} paths not in binary cache :: {:05} paths in queue \r", 80 | indexed, missing, watch.queue_len()); 81 | io::stderr().flush().expect("flushing stderr failed"); 82 | }); 83 | 84 | let mut files = files.filter_map(future::ready); 85 | 86 | eprint!("+ generating index"); 87 | eprint!("\r"); 88 | 89 | while let Some((path, nar, files)) = files.next().await { 90 | let origin = path.origin(); 91 | 92 | if !origin.toplevel { 93 | // skip dependencies 94 | continue; 95 | } 96 | 97 | for item in files.to_list(&[]) { 98 | if let FileNode::Symlink { target: _ } // FIXME: should probably check if the target is executable... 99 | | FileNode::Regular { 100 | size: _, 101 | executable: true, 102 | } = item.node 103 | { 104 | let path = PathBuf::from(OsString::from_vec(item.path)); 105 | 106 | if let Ok(binary) = path.strip_prefix("/bin") { 107 | let attr = origin.attr.clone(); 108 | let system = origin.system.clone(); 109 | let binary: String = binary.to_string_lossy().into(); 110 | 111 | if binary.starts_with('.') || binary.contains('/') || binary.is_empty() { 112 | continue; 113 | } 114 | 115 | connection 116 | .execute( 117 | "insert or replace into Programs(name, system, package) values (?, ?, ?)", 118 | (binary, system, attr), 119 | ) 120 | .map_err(|_| ErrorKind::CreateDatabase(args.output.clone()))?; 121 | } 122 | 123 | if let Ok(debuginfo) = path.strip_prefix("/lib/debug/.build-id") { 124 | let build_id: String = debuginfo 125 | .to_string_lossy() 126 | .replace('/', "") 127 | .strip_suffix(".debug") 128 | .expect("Debug info files must end with .debug") 129 | .into(); 130 | 131 | debug_connection 132 | .execute( 133 | "insert or replace into DebugInfo(build_id, url, filename) values (?, ?, ?)", 134 | (build_id, format!("../{}", nar), path.to_string_lossy().strip_prefix('/')), 135 | ) 136 | .map_err(|_| ErrorKind::CreateDatabase(args.debug_output.clone()))?; 137 | } 138 | } 139 | } 140 | } 141 | eprintln!(); 142 | 143 | eprint!("+ dumping index"); 144 | 145 | connection 146 | .backup(DatabaseName::Main, &args.output, None) 147 | .map_err(|_| ErrorKind::CreateDatabase(args.output.clone()))?; 148 | 149 | debug_connection 150 | .backup(DatabaseName::Main, &args.debug_output, None) 151 | .map_err(|_| ErrorKind::CreateDatabase(args.debug_output.clone()))?; 152 | 153 | Ok(()) 154 | } 155 | 156 | #[derive(Debug, Parser)] 157 | #[clap(author, about, version)] 158 | struct Args { 159 | /// Make REQUESTS http requests in parallel 160 | #[clap(short = 'r', long = "requests", default_value = "500")] 161 | jobs: usize, 162 | 163 | /// Path to nixpkgs for which to build the index, as accepted by nix-env -f 164 | #[clap(short = 'f', long, default_value = "")] 165 | nixpkgs: String, 166 | 167 | /// Path for resulting database file 168 | #[clap(short, long, default_value = "programs.sqlite")] 169 | output: PathBuf, 170 | 171 | /// Path for debuginfo database file 172 | #[clap(short, long, default_value = "debug.sqlite")] 173 | debug_output: PathBuf, 174 | 175 | /// Systems to include in generated database 176 | #[clap(short = 's', long = "platform")] 177 | systems: Option>, 178 | 179 | /// Show a stack trace in the case of a Nix evaluation error 180 | #[clap(long)] 181 | show_trace: bool, 182 | } 183 | 184 | #[tokio::main] 185 | async fn main() { 186 | let args = Args::parse(); 187 | 188 | if let Err(e) = update_index(&args).await { 189 | eprintln!("error: {}", e); 190 | 191 | for e in e.iter().skip(1) { 192 | eprintln!("caused by: {}", e); 193 | } 194 | 195 | if let Some(backtrace) = e.backtrace() { 196 | eprintln!("backtrace: {:?}", backtrace); 197 | } 198 | process::exit(2); 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /src/bin/nix-index.rs: -------------------------------------------------------------------------------- 1 | //! Tool for generating a nix-index database. 2 | use std::ffi::OsStr; 3 | use std::fs::{self, File}; 4 | use std::io::{self, Write}; 5 | use std::path::PathBuf; 6 | use std::process; 7 | 8 | use clap::Parser; 9 | use error_chain::ChainedError; 10 | use futures::future::Either; 11 | use futures::{future, StreamExt}; 12 | use nix_index::database::Writer; 13 | use nix_index::errors::*; 14 | use nix_index::files::FileTree; 15 | use nix_index::hydra::Fetcher; 16 | use nix_index::listings::{fetch_listings, try_load_paths_cache}; 17 | use nix_index::package::StorePath; 18 | use nix_index::CACHE_URL; 19 | use separator::Separatable; 20 | 21 | /// The main function of this module: creates a new nix-index database. 22 | async fn update_index(args: &Args) -> Result<()> { 23 | // first try to load the paths.cache if requested, otherwise query 24 | // the packages normally. Also fall back to normal querying if the paths.cache 25 | // fails to load. 26 | let cached = if args.path_cache { 27 | eprintln!("+ loading paths from cache"); 28 | try_load_paths_cache()? 29 | } else { 30 | None 31 | }; 32 | 33 | eprintln!("+ querying available packages"); 34 | let fetcher = Fetcher::new(CACHE_URL.to_string()).map_err(ErrorKind::ParseProxy)?; 35 | let (files, watch) = match cached { 36 | Some((f, w)) => (Either::Left(f), w), 37 | None => { 38 | let (f, w) = fetch_listings( 39 | &fetcher, 40 | args.jobs, 41 | &args.nixpkgs, 42 | vec![args.system.as_deref()], 43 | args.show_trace, 44 | )?; 45 | (Either::Right(f), w) 46 | } 47 | }; 48 | 49 | // Treat request errors as if the file list were missing 50 | let files = files.map(|r| { 51 | r.unwrap_or_else(|e| { 52 | eprint!("\n{}", e.display_chain()); 53 | None 54 | }) 55 | }); 56 | 57 | // Add progress output 58 | let (mut indexed, mut missing) = (0, 0); 59 | let files = files.inspect(|entry| { 60 | if entry.is_some() { 61 | indexed += 1; 62 | } else { 63 | missing += 1; 64 | }; 65 | 66 | eprint!("+ generating index: {:05} paths found :: {:05} paths not in binary cache :: {:05} paths in queue \r", 67 | indexed, missing, watch.queue_len()); 68 | io::stderr().flush().expect("flushing stderr failed"); 69 | }); 70 | 71 | // Filter packages with no file listings available 72 | let mut files = files.filter_map(future::ready); 73 | 74 | eprint!("+ generating index"); 75 | if !args.filter_prefix.is_empty() { 76 | eprint!(" (filtering by `{}`)", args.filter_prefix); 77 | } 78 | eprint!("\r"); 79 | fs::create_dir_all(&args.database) 80 | .chain_err(|| ErrorKind::CreateDatabaseDir(args.database.clone()))?; 81 | let mut db = Writer::create(args.database.join("files"), args.compression_level) 82 | .chain_err(|| ErrorKind::CreateDatabase(args.database.clone()))?; 83 | 84 | let mut results: Vec<(StorePath, String, FileTree)> = Vec::new(); 85 | while let Some(entry) = files.next().await { 86 | if args.path_cache { 87 | results.push(entry.clone()); 88 | } 89 | let (path, _, files) = entry; 90 | db.add(path, files, args.filter_prefix.as_bytes()) 91 | .chain_err(|| ErrorKind::WriteDatabase(args.database.clone()))?; 92 | } 93 | eprintln!(); 94 | 95 | if args.path_cache { 96 | eprintln!("+ writing path cache"); 97 | let mut output = io::BufWriter::new( 98 | File::create("paths.cache").chain_err(|| ErrorKind::WritePathsCache)?, 99 | ); 100 | bincode::serialize_into(&mut output, &results).chain_err(|| ErrorKind::WritePathsCache)?; 101 | } 102 | 103 | let index_size = db 104 | .finish() 105 | .chain_err(|| ErrorKind::WriteDatabase(args.database.clone()))?; 106 | eprintln!("+ wrote index of {} bytes", index_size.separated_string()); 107 | 108 | Ok(()) 109 | } 110 | 111 | fn cache_dir() -> &'static OsStr { 112 | let base = xdg::BaseDirectories::with_prefix("nix-index").unwrap(); 113 | let cache_dir = Box::new(base.get_cache_home()); 114 | let cache_dir = Box::leak(cache_dir); 115 | cache_dir.as_os_str() 116 | } 117 | 118 | /// Builds an index for nix-locate 119 | #[derive(Debug, Parser)] 120 | #[clap(author, about, version)] 121 | struct Args { 122 | /// Make REQUESTS http requests in parallel 123 | #[clap(short = 'r', long = "requests", default_value = "100")] 124 | jobs: usize, 125 | 126 | /// Directory where the index is stored 127 | #[clap(short, long = "db", default_value_os = cache_dir(), env = "NIX_INDEX_DATABASE")] 128 | database: PathBuf, 129 | 130 | /// Path to nixpkgs for which to build the index, as accepted by nix-env -f 131 | #[clap(short = 'f', long, default_value = "")] 132 | nixpkgs: String, 133 | 134 | /// Specify system platform for which to build the index, accepted by nix-env --argstr system 135 | #[clap(short = 's', long, value_name = "platform")] 136 | system: Option, 137 | 138 | /// Zstandard compression level 139 | #[clap(short, long = "compression", default_value = "22")] 140 | compression_level: i32, 141 | 142 | /// Show a stack trace in the case of a Nix evaluation error 143 | #[clap(long)] 144 | show_trace: bool, 145 | 146 | /// Only add paths starting with PREFIX (e.g. `/bin/`) 147 | #[clap(long, default_value = "")] 148 | filter_prefix: String, 149 | 150 | /// Store and load results of fetch phase in a file called paths.cache. This speeds up testing 151 | /// different database formats / compression. 152 | /// 153 | /// Note: does not check if the cached data is up to date! Use only for development. 154 | #[clap(long)] 155 | path_cache: bool, 156 | } 157 | 158 | #[tokio::main] 159 | async fn main() { 160 | let args = Args::parse(); 161 | 162 | if let Err(e) = update_index(&args).await { 163 | eprintln!("error: {}", e); 164 | 165 | for e in e.iter().skip(1) { 166 | eprintln!("caused by: {}", e); 167 | } 168 | 169 | if let Some(backtrace) = e.backtrace() { 170 | eprintln!("backtrace: {:?}", backtrace); 171 | } 172 | process::exit(2); 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/bin/nix-locate.rs: -------------------------------------------------------------------------------- 1 | //! Tool for searching for files in nixpkgs packages 2 | use std::collections::HashSet; 3 | use std::ffi::OsStr; 4 | use std::path::PathBuf; 5 | use std::process; 6 | use std::result; 7 | use std::str; 8 | use std::str::FromStr; 9 | 10 | use clap::{value_parser, Parser}; 11 | use error_chain::error_chain; 12 | use nix_index::database; 13 | use nix_index::files::{self, FileTreeEntry, FileType}; 14 | use owo_colors::{OwoColorize, Stream}; 15 | use regex::bytes::Regex; 16 | use separator::Separatable; 17 | 18 | error_chain! { 19 | errors { 20 | ReadDatabase(database: PathBuf) { 21 | description("database read error") 22 | display("reading from the database at '{}' failed.\n\ 23 | This may be caused by a corrupt or missing database, try (re)running `nix-index` to generate the database. \n\ 24 | If the error persists please file a bug report at https://github.com/nix-community/nix-index.", database.to_string_lossy()) 25 | } 26 | Grep(pattern: String) { 27 | description("grep builder error") 28 | display("constructing the regular expression from the pattern '{}' failed.", pattern) 29 | } 30 | } 31 | } 32 | 33 | /// The struct holding the parsed arguments for searching 34 | struct Args { 35 | /// Path of the nix-index database. 36 | database: PathBuf, 37 | /// The pattern to search for. This is always in regex syntax. 38 | pattern: String, 39 | group: bool, 40 | hash: Option, 41 | package_pattern: Option, 42 | file_type: Vec, 43 | only_toplevel: bool, 44 | color: bool, 45 | minimal: bool, 46 | } 47 | 48 | /// The main function of this module: searches with the given options in the database. 49 | fn locate(args: &Args) -> Result<()> { 50 | // Build the regular expression matcher 51 | let pattern = Regex::new(&args.pattern).chain_err(|| ErrorKind::Grep(args.pattern.clone()))?; 52 | let package_pattern = if let Some(ref pat) = args.package_pattern { 53 | Some(Regex::new(pat).chain_err(|| ErrorKind::Grep(pat.clone()))?) 54 | } else { 55 | None 56 | }; 57 | 58 | // Open the database 59 | let index_file = args.database.join("files"); 60 | let db = database::Reader::open(&index_file) 61 | .chain_err(|| ErrorKind::ReadDatabase(index_file.clone()))?; 62 | 63 | let results = db 64 | .query(&pattern) 65 | .package_pattern(package_pattern.as_ref()) 66 | .hash(args.hash.clone()) 67 | .run() 68 | .chain_err(|| ErrorKind::Grep(args.pattern.clone()))? 69 | .filter(|v| { 70 | v.as_ref().ok().map_or(true, |v| { 71 | let &(ref store_path, FileTreeEntry { ref path, ref node }) = v; 72 | let m = pattern 73 | .find_iter(path) 74 | .last() 75 | .expect("path should match the pattern"); 76 | 77 | let conditions = [ 78 | !args.group || !path[m.end()..].contains(&b'/'), 79 | !args.only_toplevel || store_path.origin().toplevel, 80 | args.file_type.iter().any(|t| &node.get_type() == t), 81 | ]; 82 | 83 | conditions.iter().all(|c| *c) 84 | }) 85 | }); 86 | 87 | let mut printed_attrs = HashSet::new(); 88 | for v in results { 89 | let (store_path, FileTreeEntry { path, node }) = 90 | v.chain_err(|| ErrorKind::ReadDatabase(index_file.clone()))?; 91 | 92 | use crate::files::FileNode::*; 93 | let (typ, size) = match node { 94 | Regular { executable, size } => (if executable { "x" } else { "r" }, size), 95 | Directory { size, contents: () } => ("d", size), 96 | Symlink { .. } => ("s", 0), 97 | }; 98 | 99 | let mut attr = format!( 100 | "{}.{}", 101 | store_path.origin().attr, 102 | store_path.origin().output 103 | ); 104 | 105 | if !store_path.origin().toplevel { 106 | attr = format!("({})", attr); 107 | } 108 | 109 | if args.minimal { 110 | // only print each package once, even if there are multiple matches 111 | if printed_attrs.insert(attr.clone()) { 112 | println!("{}", attr); 113 | } 114 | } else { 115 | print!( 116 | "{:<40} {:>14} {:>1} {}", 117 | attr, 118 | size.separated_string(), 119 | typ, 120 | store_path.as_str() 121 | ); 122 | 123 | let path = String::from_utf8_lossy(&path); 124 | 125 | if args.color { 126 | let mut prev = 0; 127 | for mat in pattern.find_iter(path.as_bytes()) { 128 | // if the match is empty, we need to make sure we don't use string 129 | // indexing because the match may be "inside" a single multibyte character 130 | // in that case (for example, the pattern may match the second byte of a multibyte character) 131 | if mat.start() == mat.end() { 132 | continue; 133 | } 134 | print!( 135 | "{}{}", 136 | &path[prev..mat.start()], 137 | (&path[mat.start()..mat.end()]) 138 | .if_supports_color(Stream::Stdout, |txt| txt.red()), 139 | ); 140 | prev = mat.end(); 141 | } 142 | println!("{}", &path[prev..]); 143 | } else { 144 | println!("{}", path); 145 | } 146 | } 147 | } 148 | 149 | Ok(()) 150 | } 151 | 152 | /// Extract the parsed arguments for clap's arg matches. 153 | /// 154 | /// Handles parsing the values of more complex arguments. 155 | fn process_args(matches: Opts) -> result::Result { 156 | let pattern_arg = matches.pattern; 157 | let package_arg = matches.package; 158 | 159 | let start_anchor = if matches.at_root { "^" } else { "" }; 160 | let end_anchor = if matches.whole_name { "$" } else { "" }; 161 | 162 | let make_pattern = |s: &str, wrap: bool| { 163 | let regex = if matches.regex { 164 | s.to_string() 165 | } else { 166 | regex::escape(s) 167 | }; 168 | if wrap { 169 | format!("{}{}{}", start_anchor, regex, end_anchor) 170 | } else { 171 | regex 172 | } 173 | }; 174 | 175 | let color = match matches.color { 176 | Color::Auto => atty::is(atty::Stream::Stdout), 177 | Color::Always => true, 178 | Color::Never => false, 179 | }; 180 | 181 | let args = Args { 182 | database: matches.database, 183 | group: !matches.no_group, 184 | pattern: make_pattern(&pattern_arg, true), 185 | package_pattern: package_arg.as_deref().map(|p| make_pattern(p, false)), 186 | hash: matches.hash, 187 | file_type: matches 188 | .r#type 189 | .unwrap_or_else(|| files::ALL_FILE_TYPES.to_vec()), 190 | only_toplevel: matches.top_level, 191 | color, 192 | minimal: matches.minimal, 193 | }; 194 | Ok(args) 195 | } 196 | 197 | const LONG_USAGE: &str = r#" 198 | How to use 199 | ========== 200 | 201 | In the simplest case, just run `nix-locate part/of/file/path` to search for all packages that contain 202 | a file matching that path: 203 | 204 | $ nix-locate 'bin/firefox' 205 | ...all packages containing a file named 'bin/firefox' 206 | 207 | Before using this tool, you first need to generate a nix-index database. 208 | Use the `nix-index` tool to do that. 209 | 210 | Limitations 211 | =========== 212 | 213 | * this tool can only find packages which are built by hydra, because only those packages 214 | will have file listings that are indexed by nix-index 215 | 216 | * we can't know the precise attribute path for every package, so if you see the syntax `(attr)` 217 | in the output, that means that `attr` is not the target package but that it 218 | depends (perhaps indirectly) on the package that contains the searched file. Example: 219 | 220 | $ nix-locate 'bin/xmonad' 221 | (xmonad-with-packages.out) 0 s /nix/store/nl581g5kv3m2xnmmfgb678n91d7ll4vv-ghc-8.0.2-with-packages/bin/xmonad 222 | 223 | This means that we don't know what nixpkgs attribute produces /nix/store/nl581g5kv3m2xnmmfgb678n91d7ll4vv-ghc-8.0.2-with-packages, 224 | but we know that `xmonad-with-packages.out` requires it. 225 | "#; 226 | 227 | fn cache_dir() -> &'static OsStr { 228 | let base = xdg::BaseDirectories::with_prefix("nix-index").unwrap(); 229 | let cache_dir = Box::new(base.get_cache_home()); 230 | let cache_dir = Box::leak(cache_dir); 231 | cache_dir.as_os_str() 232 | } 233 | 234 | /// Quickly finds the derivation providing a certain file 235 | #[derive(Debug, Parser)] 236 | #[clap(author, about, version, after_help = LONG_USAGE)] 237 | struct Opts { 238 | /// Pattern for which to search 239 | // #[clap(name = "PATTERN")] 240 | pattern: String, 241 | 242 | /// Directory where the index is stored 243 | #[clap(short, long = "db", default_value_os = cache_dir(), env = "NIX_INDEX_DATABASE")] 244 | database: PathBuf, 245 | 246 | /// Treat PATTERN as regex instead of literal text. Also applies to NAME. 247 | #[clap(short, long)] 248 | regex: bool, 249 | 250 | /// Only print matches from packages whose name matches PACKAGE. 251 | #[clap(short, long)] 252 | package: Option, 253 | 254 | /// Only print matches from the package that has the given HASH. 255 | #[clap(long, name = "HASH")] 256 | hash: Option, 257 | 258 | /// Only print matches from packages that show up in `nix-env -qa`. 259 | #[clap(long)] 260 | top_level: bool, 261 | 262 | /// Only print matches for files that have this type. If the option is given multiple times, 263 | /// a file will be printed if it has any of the given types. 264 | /// [options: (r)egular file, e(x)cutable, (d)irectory, (s)ymlink] 265 | #[clap(short, long, value_parser=value_parser!(FileType))] 266 | r#type: Option>, 267 | 268 | /// Disables grouping of paths with the same matching part. By default, a path will only be 269 | /// printed if the pattern matches some part of the last component of the path. For example, 270 | /// the pattern `a/foo` would match all of `a/foo`, `a/foo/some_file` and `a/foo/another_file`, 271 | /// but only the first match will be printed. This option disables that behavior and prints 272 | /// all matches. 273 | #[clap(long)] 274 | no_group: bool, 275 | 276 | /// Whether to use colors in output. If auto, only use colors if outputting to a terminal. 277 | #[clap(long, value_enum, default_value = "auto")] 278 | color: Color, 279 | 280 | /// Only print matches for files or directories whose basename matches PATTERN exactly. 281 | /// This means that the pattern `bin/foo` will only match a file called `bin/foo` or 282 | /// `xx/bin/foo` but not `bin/foobar`. 283 | #[clap(short, long)] 284 | whole_name: bool, 285 | 286 | /// Treat PATTERN as an absolute file path, so it only matches starting from the root of a 287 | /// package. This means that the pattern `/bin/foo` only matches a file called `/bin/foo` or 288 | /// `/bin/foobar` but not `/libexec/bin/foo`. 289 | #[clap(long)] 290 | at_root: bool, 291 | 292 | /// Only print attribute names of found files or directories. Other details such as size or 293 | /// store path are omitted. This is useful for scripts that use the output of nix-locate. 294 | #[clap(long)] 295 | minimal: bool, 296 | } 297 | 298 | #[derive(clap::ValueEnum, Clone, Copy, Debug)] 299 | enum Color { 300 | Always, 301 | Never, 302 | Auto, 303 | } 304 | 305 | impl FromStr for Color { 306 | type Err = &'static str; 307 | 308 | fn from_str(s: &str) -> core::result::Result { 309 | match s { 310 | "always" => Ok(Color::Always), 311 | "never" => Ok(Color::Never), 312 | "auto" => Ok(Color::Auto), 313 | _ => Err(""), 314 | } 315 | } 316 | } 317 | 318 | fn main() { 319 | let args = Opts::parse(); 320 | 321 | let args = process_args(args).unwrap_or_else(|e| e.exit()); 322 | 323 | if let Err(e) = locate(&args) { 324 | eprintln!("error: {}", e); 325 | 326 | for e in e.iter().skip(1) { 327 | eprintln!("caused by: {}", e); 328 | } 329 | 330 | if let Some(backtrace) = e.backtrace() { 331 | eprintln!("backtrace: {:?}", backtrace); 332 | } 333 | process::exit(2); 334 | } 335 | } 336 | -------------------------------------------------------------------------------- /src/database.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | /// Creating and searching file databases. 3 | /// 4 | /// This module implements an abstraction for creating an index of files with meta information 5 | /// and searching that index for paths matching a specific pattern. 6 | use std::io::{self, BufReader, BufWriter, Read, Seek, Write}; 7 | use std::path::Path; 8 | 9 | use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; 10 | use error_chain::error_chain; 11 | use grep; 12 | use grep::matcher::{LineMatchKind, Match, Matcher, NoError}; 13 | use memchr::{memchr, memrchr}; 14 | use regex::bytes::Regex; 15 | use regex_syntax::ast::{ 16 | Alternation, Assertion, AssertionKind, Ast, Concat, Group, Literal, Repetition, 17 | }; 18 | use serde_json; 19 | use zstd; 20 | 21 | use crate::files::{FileTree, FileTreeEntry}; 22 | use crate::frcode; 23 | use crate::package::StorePath; 24 | 25 | /// The version of the database format supported by this nix-index version. 26 | /// 27 | /// This should be updated whenever you make an incompatible change to the database format. 28 | const FORMAT_VERSION: u64 = 1; 29 | 30 | /// The magic for nix-index database files, used to ensure that the file we're passed is 31 | /// actually a file generated by nix-index. 32 | const FILE_MAGIC: &[u8] = b"NIXI"; 33 | 34 | /// A writer for creating a new file database. 35 | pub struct Writer { 36 | /// The encoder used to compress the database. Will be set to `None` when the value 37 | /// is dropped. 38 | writer: Option>>, 39 | } 40 | 41 | // We need to make sure that the encoder is `finish`ed in all cases, so we need 42 | // a custom Drop. 43 | impl Drop for Writer { 44 | fn drop(&mut self) { 45 | if self.writer.is_some() { 46 | self.finish_encoder().unwrap(); 47 | } 48 | } 49 | } 50 | 51 | impl Writer { 52 | /// Creates a new database at the given path with the specified zstd compression level 53 | /// (currently, supported values range from 0 to 22). 54 | pub fn create>(path: P, level: i32) -> io::Result { 55 | let mut file = File::create(path)?; 56 | file.write_all(FILE_MAGIC)?; 57 | file.write_u64::(FORMAT_VERSION)?; 58 | let mut encoder = zstd::Encoder::new(file, level)?; 59 | encoder.multithread(num_cpus::get() as u32)?; 60 | 61 | Ok(Writer { 62 | writer: Some(BufWriter::new(encoder)), 63 | }) 64 | } 65 | 66 | /// Add a new package to the database for the given store path with its corresponding 67 | /// file tree. Entries are only added if they match `filter_prefix`. 68 | pub fn add( 69 | &mut self, 70 | path: StorePath, 71 | files: FileTree, 72 | filter_prefix: &[u8], 73 | ) -> io::Result<()> { 74 | let entries = files.to_list(filter_prefix); 75 | 76 | // Don't add packages with no file entries to the database. 77 | if entries.is_empty() { 78 | return Ok(()); 79 | } 80 | let writer = self.writer.as_mut().expect("not dropped yet"); 81 | let mut encoder = 82 | frcode::Encoder::new(writer, b"p".to_vec(), serde_json::to_vec(&path).unwrap()); 83 | for entry in entries { 84 | entry.encode(&mut encoder)?; 85 | } 86 | Ok(()) 87 | } 88 | 89 | /// Finishes encoding. After calling this function, `add` may no longer be called, since this function 90 | /// closes the stream. 91 | /// 92 | /// The return value is the underlying File. 93 | fn finish_encoder(&mut self) -> io::Result { 94 | let writer = self.writer.take().expect("not dropped yet"); 95 | let encoder = writer.into_inner()?; 96 | encoder.finish() 97 | } 98 | 99 | /// Finish the encoding and return the size in bytes of the compressed file that was created. 100 | pub fn finish(mut self) -> io::Result { 101 | let mut file = self.finish_encoder()?; 102 | file.stream_position() 103 | } 104 | } 105 | 106 | error_chain! { 107 | errors { 108 | UnsupportedFileType(found: Vec) { 109 | description("unsupported file type") 110 | display("expected file to start with nix-index file magic 'NIXI', but found '{}' (is this a valid nix-index database file?)", String::from_utf8_lossy(found)) 111 | } 112 | UnsupportedVersion(found: u64) { 113 | description("unsupported file version") 114 | display("this executable only supports the nix-index database version {}, but found a database with version {}", FORMAT_VERSION, found) 115 | } 116 | MissingPackageEntry { 117 | description("missing package entry for path") 118 | display("database corrupt, found a file entry without a matching package entry") 119 | } 120 | Frcode(err: frcode::Error) { 121 | description("frcode error") 122 | display("database corrupt, frcode error: {}", err) 123 | } 124 | EntryParse(entry: Vec) { 125 | description("entry parse failure") 126 | display("database corrupt, could not parse entry: {:?}", String::from_utf8_lossy(entry)) 127 | } 128 | StorePathParse(path: Vec) { 129 | description("store path parse failure") 130 | display("database corrupt, could not parse store path: {:?}", String::from_utf8_lossy(path)) 131 | } 132 | } 133 | 134 | foreign_links { 135 | Io(io::Error); 136 | Grep(grep::regex::Error); 137 | } 138 | } 139 | 140 | impl From for Error { 141 | fn from(err: frcode::Error) -> Error { 142 | ErrorKind::Frcode(err).into() 143 | } 144 | } 145 | 146 | /// A Reader allows fast querying of a nix-index database. 147 | pub struct Reader { 148 | decoder: frcode::Decoder>>>, 149 | } 150 | 151 | impl Reader { 152 | /// Opens a nix-index database located at the given path. 153 | /// 154 | /// If the path does not exist or is not a valid database, an error is returned. 155 | pub fn open>(path: P) -> Result { 156 | let mut file = File::open(path)?; 157 | let mut magic = [0u8; 4]; 158 | file.read_exact(&mut magic)?; 159 | 160 | if magic != FILE_MAGIC { 161 | return Err(ErrorKind::UnsupportedFileType(magic.to_vec()).into()); 162 | } 163 | 164 | let version = file.read_u64::()?; 165 | if version != FORMAT_VERSION { 166 | return Err(ErrorKind::UnsupportedVersion(version).into()); 167 | } 168 | 169 | let decoder = zstd::Decoder::new(file)?; 170 | Ok(Reader { 171 | decoder: frcode::Decoder::new(BufReader::new(decoder)), 172 | }) 173 | } 174 | 175 | /// Builds a query to find all entries in the database that have a filename matching the given pattern. 176 | /// 177 | /// Afterwards, use `Query::into_iter` to iterate over the items. 178 | pub fn query(self, exact_regex: &Regex) -> Query { 179 | Query { 180 | reader: self, 181 | exact_regex, 182 | hash: None, 183 | package_pattern: None, 184 | } 185 | } 186 | 187 | /// Dumps the contents of the database to stdout, for debugging. 188 | #[allow(clippy::print_stdout)] 189 | pub fn dump(&mut self) -> Result<()> { 190 | loop { 191 | let block = self.decoder.decode()?; 192 | if block.is_empty() { 193 | break; 194 | } 195 | for line in block.split(|c| *c == b'\n') { 196 | println!("{:?}", String::from_utf8_lossy(line)); 197 | } 198 | println!("-- block boundary"); 199 | } 200 | Ok(()) 201 | } 202 | } 203 | 204 | /// A builder for a `ReaderIter` to iterate over entries in the database matching a given pattern. 205 | pub struct Query<'a, 'b> { 206 | /// The underlying reader from which we read input. 207 | reader: Reader, 208 | 209 | /// The pattern that file paths have to match. 210 | exact_regex: &'a Regex, 211 | 212 | /// Only include the package with the given hash. 213 | hash: Option, 214 | 215 | /// Only include packages whose name matches the given pattern. 216 | package_pattern: Option<&'b Regex>, 217 | } 218 | 219 | impl<'a, 'b> Query<'a, 'b> { 220 | /// Limit results to entries from the package with the specified hash if `Some`. 221 | pub fn hash(self, hash: Option) -> Query<'a, 'b> { 222 | Query { hash, ..self } 223 | } 224 | 225 | /// Limit results to entries from packages whose name matches the given regex if `Some`. 226 | pub fn package_pattern(self, package_pattern: Option<&'b Regex>) -> Query<'a, 'b> { 227 | Query { 228 | package_pattern, 229 | ..self 230 | } 231 | } 232 | 233 | /// Runs the query, returning an Iterator that will yield all entries matching the conditions. 234 | /// 235 | /// There is no guarantee about the order of the returned matches. 236 | pub fn run(self) -> Result> { 237 | let mut expr = regex_syntax::ast::parse::Parser::new() 238 | .parse(self.exact_regex.as_str()) 239 | .expect("regex cannot be invalid"); 240 | // replace the ^ anchor by a NUL byte, since each entry is of the form `METADATA\0PATH` 241 | // (so the NUL byte marks the start of the path). 242 | { 243 | let mut stack = vec![&mut expr]; 244 | while let Some(e) = stack.pop() { 245 | match *e { 246 | Ast::Assertion(Assertion { 247 | kind: AssertionKind::StartLine, 248 | span, 249 | }) => { 250 | *e = Ast::Literal(Literal { 251 | span, 252 | c: '\0', 253 | kind: regex_syntax::ast::LiteralKind::Verbatim, 254 | }) 255 | } 256 | Ast::Group(Group { ref mut ast, .. }) => stack.push(ast), 257 | Ast::Repetition(Repetition { ref mut ast, .. }) => stack.push(ast), 258 | Ast::Concat(Concat { ref mut asts, .. }) 259 | | Ast::Alternation(Alternation { ref mut asts, .. }) => stack.extend(asts), 260 | _ => {} 261 | } 262 | } 263 | } 264 | let mut regex_builder = grep::regex::RegexMatcherBuilder::new(); 265 | regex_builder.line_terminator(Some(b'\n')).multi_line(true); 266 | 267 | let grep = regex_builder.build(&format!("{}", expr))?; 268 | Ok(ReaderIter { 269 | reader: self.reader, 270 | found: Vec::new(), 271 | found_without_package: Vec::new(), 272 | pattern: grep, 273 | exact_pattern: self.exact_regex, 274 | package_entry_pattern: regex_builder.build("^p\0").expect("valid regex"), 275 | package_name_pattern: self.package_pattern, 276 | package_hash: self.hash, 277 | }) 278 | } 279 | } 280 | 281 | /// An iterator for entries in a database matching a given pattern. 282 | pub struct ReaderIter<'a, 'b> { 283 | /// The underlying reader from which we read input. 284 | reader: Reader, 285 | /// Entries that matched the pattern but have not been returned by `next` yet. 286 | found: Vec<(StorePath, FileTreeEntry)>, 287 | /// Entries that matched the pattern but for which we don't know yet what package they belong to. 288 | /// This may happen if the entry we matched was at the end of the search buffer, so that the entry 289 | /// for the package did not fit into the buffer anymore (since the package is stored after the entries 290 | /// of the package). In this case, we need to look for the package entry in the next iteration when 291 | /// we read the next block of input. 292 | found_without_package: Vec, 293 | /// The pattern for which to search package paths. 294 | /// 295 | /// This pattern should work on the raw bytes of file entries. In particular, the file path is not the 296 | /// first data in a file entry, so the regex `^` anchor will not work correctly. 297 | /// 298 | /// The pattern here may produce false positives (for example, if it matches inside the metadata of a file 299 | /// entry). This is not a problem, as matches are later checked against `exact_pattern`. 300 | pattern: grep::regex::RegexMatcher, 301 | /// The raw pattern, as supplied to `find_iter`. This is used to verify matches, since `pattern` itself 302 | /// may produce false positives. 303 | exact_pattern: &'a Regex, 304 | /// Pattern that matches only package entries. 305 | package_entry_pattern: grep::regex::RegexMatcher, 306 | /// Pattern that the package name should match. 307 | package_name_pattern: Option<&'b Regex>, 308 | /// Only search the package with the given hash. 309 | package_hash: Option, 310 | } 311 | 312 | fn consume_no_error(e: NoError) -> T { 313 | panic!("impossible: {}", e) 314 | } 315 | 316 | fn next_matching_line>( 317 | matcher: M, 318 | buf: &[u8], 319 | mut start: usize, 320 | ) -> Option { 321 | while let Some(candidate) = matcher 322 | .find_candidate_line(&buf[start..]) 323 | .unwrap_or_else(consume_no_error) 324 | { 325 | // the buffer may end with a newline character, so we may get a match 326 | // for an empty "line" at the end of the buffer 327 | // since this is not a line match, return None 328 | if start == buf.len() { 329 | return None; 330 | }; 331 | 332 | let (pos, confirmed) = match candidate { 333 | LineMatchKind::Confirmed(pos) => (start + pos, true), 334 | LineMatchKind::Candidate(pos) => (start + pos, false), 335 | }; 336 | 337 | let line_start = memrchr(b'\n', &buf[..pos]).map_or(0, |x| x + 1); 338 | let line_end = memchr(b'\n', &buf[pos..]).map_or(buf.len(), |x| x + pos + 1); 339 | 340 | if !confirmed 341 | && !matcher 342 | .is_match(&buf[line_start..line_end]) 343 | .unwrap_or_else(consume_no_error) 344 | { 345 | start = line_end; 346 | continue; 347 | } 348 | 349 | return Some(Match::new(line_start, line_end)); 350 | } 351 | None 352 | } 353 | 354 | impl<'a, 'b> ReaderIter<'a, 'b> { 355 | /// Reads input until `self.found` contains at least one entry or the end of the input has been reached. 356 | fn fill_buf(&mut self) -> Result<()> { 357 | // the input is processed in blocks until we've found at least a single entry 358 | while self.found.is_empty() { 359 | let &mut ReaderIter { 360 | ref mut reader, 361 | ref package_entry_pattern, 362 | ref package_name_pattern, 363 | ref package_hash, 364 | .. 365 | } = self; 366 | let block = reader.decoder.decode()?; 367 | 368 | // if the block is empty, the end of input has been reached 369 | if block.is_empty() { 370 | return Ok(()); 371 | } 372 | 373 | // when we find a match, we need to know the package that this match belongs to. 374 | // the `find_package` function will skip forward until a package entry is found 375 | // (the package entry comes after all file entries for a package). 376 | // 377 | // to be more efficient if there are many matches, we cache the current package here. 378 | // this package is valid for all positions up to the second element of the tuple 379 | // (after that, a new package begins). 380 | let mut cached_package: Option<(StorePath, usize)> = None; 381 | let mut no_more_package = false; 382 | let mut find_package = |item_end| -> Result<_> { 383 | if let Some((ref pkg, end)) = cached_package { 384 | if item_end < end { 385 | return Ok(Some((pkg.clone(), end))); 386 | } 387 | } 388 | 389 | if no_more_package { 390 | return Ok(None); 391 | } 392 | 393 | let mat = match next_matching_line(package_entry_pattern, block, item_end) { 394 | Some(v) => v, 395 | None => { 396 | no_more_package = true; 397 | return Ok(None); 398 | } 399 | }; 400 | 401 | let json = &block[mat.start() + 2..mat.end() - 1]; 402 | let pkg: StorePath = serde_json::from_slice(json) 403 | .chain_err(|| ErrorKind::StorePathParse(json.to_vec()))?; 404 | cached_package = Some((pkg.clone(), mat.end())); 405 | Ok(Some((pkg, mat.end()))) 406 | }; 407 | 408 | // Tests if a store path matches the `package_name_pattern` and `package_hash` constraints. 409 | let should_search_package = |pkg: &StorePath| -> bool { 410 | package_name_pattern.map_or(true, |r| r.is_match(pkg.name().as_bytes())) 411 | && package_hash.as_ref().map_or(true, |h| h == &pkg.hash()) 412 | }; 413 | 414 | let mut pos = 0; 415 | // if there are any entries without a package left over from the previous iteration, see 416 | // if this block contains the package entry. 417 | if !self.found_without_package.is_empty() { 418 | if let Some((pkg, end)) = find_package(0)? { 419 | if !should_search_package(&pkg) { 420 | // all entries before end will have the same package 421 | pos = end; 422 | self.found_without_package.truncate(0); 423 | } else { 424 | for entry in self.found_without_package.split_off(0) { 425 | self.found.push((pkg.clone(), entry)); 426 | } 427 | } 428 | } 429 | } 430 | 431 | // process all matches in this block 432 | while let Some(mat) = next_matching_line(&self.pattern, block, pos) { 433 | pos = mat.end(); 434 | let entry = &block[mat.start()..mat.end() - 1]; 435 | // skip entries that aren't describing file paths 436 | if self 437 | .package_entry_pattern 438 | .is_match(entry) 439 | .unwrap_or_else(consume_no_error) 440 | { 441 | continue; 442 | } 443 | 444 | // skip if package name or hash doesn't match 445 | // we can only skip if we know the package 446 | if let Some((pkg, end)) = find_package(mat.end())? { 447 | if !should_search_package(&pkg) { 448 | // all entries before end will have the same package 449 | pos = end; 450 | continue; 451 | } 452 | } 453 | 454 | let entry = FileTreeEntry::decode(entry) 455 | .ok_or_else(|| Error::from(ErrorKind::EntryParse(entry.to_vec())))?; 456 | 457 | // check for false positives 458 | if !self.exact_pattern.is_match(&entry.path) { 459 | continue; 460 | } 461 | 462 | match find_package(mat.end())? { 463 | None => self.found_without_package.push(entry), 464 | Some((pkg, _)) => self.found.push((pkg, entry)), 465 | } 466 | } 467 | } 468 | Ok(()) 469 | } 470 | 471 | /// Returns the next match in the database. 472 | fn next_match(&mut self) -> Result> { 473 | self.fill_buf()?; 474 | Ok(self.found.pop()) 475 | } 476 | } 477 | 478 | impl<'a, 'b> Iterator for ReaderIter<'a, 'b> { 479 | type Item = Result<(StorePath, FileTreeEntry)>; 480 | 481 | fn next(&mut self) -> Option { 482 | match self.next_match() { 483 | Err(e) => Some(Err(e)), 484 | Ok(v) => v.map(Ok), 485 | } 486 | } 487 | } 488 | 489 | #[cfg(test)] 490 | mod tests { 491 | use super::*; 492 | 493 | #[test] 494 | fn test_next_matching_line_package() { 495 | let matcher = grep::regex::RegexMatcherBuilder::new() 496 | .line_terminator(Some(b'\n')) 497 | .multi_line(true) 498 | .build("^p") 499 | .expect("valid regex"); 500 | let buffer = br#" 501 | SOME LINE 502 | pDATA 503 | ANOTHER LINE 504 | "#; 505 | 506 | let mat = next_matching_line(matcher, buffer, 0); 507 | assert_eq!(mat, Some(Match::new(11, 17))); 508 | } 509 | } 510 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | use crate::package::StorePath; 4 | 5 | error_chain::error_chain! { 6 | errors { 7 | QueryPackages { 8 | description("query packages error") 9 | display("querying available packages failed") 10 | } 11 | FetchFiles(path: StorePath) { 12 | description("file listing fetch error") 13 | display("fetching the file listing for store path '{}' failed", path.as_str()) 14 | } 15 | FetchReferences(path: StorePath) { 16 | description("references fetch error") 17 | display("fetching the references of store path '{}' failed", path.as_str()) 18 | } 19 | LoadPathsCache { 20 | description("paths.cache load error") 21 | display("loading the paths.cache file failed") 22 | } 23 | WritePathsCache { 24 | description("paths.cache write error") 25 | display("writing the paths.cache file failed") 26 | } 27 | CreateDatabase(path: PathBuf) { 28 | description("crate database error") 29 | display("creating the database at '{}' failed", path.to_string_lossy()) 30 | } 31 | CreateDatabaseDir(path: PathBuf) { 32 | description("crate database directory error") 33 | display("creating the directory for the database at '{}' failed", path.to_string_lossy()) 34 | } 35 | WriteDatabase(path: PathBuf) { 36 | description("database write error") 37 | display("writing to the database '{}' failed", path.to_string_lossy()) 38 | } 39 | ParseProxy(err: crate::hydra::Error){ 40 | description("proxy parse error") 41 | display("Can not parse proxy settings") 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/files.rs: -------------------------------------------------------------------------------- 1 | //! Data types for working with trees of files. 2 | //! 3 | //! The main type here is `FileTree` which represents 4 | //! such as the file listing for a store path. 5 | use std::collections::HashMap; 6 | use std::io::{self, Write}; 7 | use std::str::{self, FromStr}; 8 | 9 | use clap::builder::PossibleValue; 10 | use clap::ValueEnum; 11 | use memchr::memchr; 12 | use serde::{Deserialize, Serialize}; 13 | use serde_bytes::ByteBuf; 14 | 15 | use crate::frcode; 16 | 17 | /// This enum represents a single node in a file tree. 18 | /// 19 | /// The type is generic over the contents of a directory node, 20 | /// because we want to use this enum to represent both a flat 21 | /// structure where a directory only stores some meta-information about itself 22 | /// (such as the number of children) and full file trees, where a 23 | /// directory contains all the child nodes. 24 | /// 25 | /// Note that file nodes by themselves do not have names. Names are given 26 | /// to file nodes by the parent directory, which has a map of entry names to 27 | /// file nodes. 28 | #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] 29 | pub enum FileNode { 30 | /// A regular file. This is the normal kind of file which is 31 | /// neither a directory not a symlink. 32 | Regular { 33 | /// The size of this file, in bytes. 34 | size: u64, 35 | /// Whether or not this file has the `executable` bit set. 36 | executable: bool, 37 | }, 38 | /// A symbolic link that points to another file path. 39 | Symlink { 40 | /// The path that this symlink points to. 41 | target: ByteBuf, 42 | }, 43 | /// A directory. It usually has a mapping of names to child nodes (in 44 | /// the case of a fill tree), but we also support a reduced form where 45 | /// we only store the number of entries in the directory. 46 | Directory { 47 | /// The size of a directory is the number of children it contains. 48 | size: u64, 49 | 50 | /// The contents of this directory. These are generic, as explained 51 | /// in the documentation for this type. 52 | contents: T, 53 | }, 54 | } 55 | 56 | /// The type of a file. 57 | /// 58 | /// This mirrors the variants of `FileNode`, but without storing 59 | /// data in each variant. 60 | /// 61 | /// An exception to this is the `executable` field for the regular type. 62 | /// This is needed since we present `regular` and `executable` files as different 63 | /// to the user, so we need a way to represent both types. 64 | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] 65 | pub enum FileType { 66 | Regular { executable: bool }, 67 | Directory, 68 | Symlink, 69 | } 70 | 71 | impl ValueEnum for FileType { 72 | fn value_variants<'a>() -> &'a [Self] { 73 | &[ 74 | FileType::Regular { executable: false }, 75 | FileType::Regular { executable: true }, 76 | FileType::Directory, 77 | FileType::Symlink, 78 | ] 79 | } 80 | 81 | fn to_possible_value(&self) -> Option { 82 | match self { 83 | FileType::Regular { executable: false } => Some(PossibleValue::new("r")), 84 | FileType::Regular { executable: true } => Some(PossibleValue::new("x")), 85 | FileType::Directory => Some(PossibleValue::new("d")), 86 | FileType::Symlink => Some(PossibleValue::new("s")), 87 | } 88 | } 89 | } 90 | 91 | impl FromStr for FileType { 92 | type Err = &'static str; 93 | 94 | fn from_str(s: &str) -> Result { 95 | match s { 96 | "r" => Ok(FileType::Regular { executable: false }), 97 | "x" => Ok(FileType::Regular { executable: true }), 98 | "d" => Ok(FileType::Directory), 99 | "s" => Ok(FileType::Symlink), 100 | _ => Err("invalid file type"), 101 | } 102 | } 103 | } 104 | 105 | /// This lists all file types that can currently be represented. 106 | pub const ALL_FILE_TYPES: &[FileType] = &[ 107 | FileType::Regular { executable: true }, 108 | FileType::Regular { executable: false }, 109 | FileType::Directory, 110 | FileType::Symlink, 111 | ]; 112 | 113 | impl FileNode { 114 | /// Split this node into a node without contents and optionally the contents themselves, 115 | /// if the node was a directory. 116 | pub fn split_contents(&self) -> (FileNode<()>, Option<&T>) { 117 | use self::FileNode::*; 118 | match *self { 119 | Regular { size, executable } => (Regular { size, executable }, None), 120 | Symlink { ref target } => ( 121 | Symlink { 122 | target: target.clone(), 123 | }, 124 | None, 125 | ), 126 | Directory { size, ref contents } => (Directory { size, contents: () }, Some(contents)), 127 | } 128 | } 129 | 130 | /// Return the type of this file. 131 | pub fn get_type(&self) -> FileType { 132 | match *self { 133 | FileNode::Regular { executable, .. } => FileType::Regular { executable }, 134 | FileNode::Directory { .. } => FileType::Directory, 135 | FileNode::Symlink { .. } => FileType::Symlink, 136 | } 137 | } 138 | } 139 | 140 | impl FileNode<()> { 141 | fn encode(&self, encoder: &mut frcode::Encoder) -> io::Result<()> { 142 | use self::FileNode::*; 143 | match *self { 144 | Regular { executable, size } => { 145 | let e = if executable { "x" } else { "r" }; 146 | encoder.write_meta(format!("{}{}", size, e).as_bytes())?; 147 | } 148 | Symlink { ref target } => { 149 | encoder.write_meta(target)?; 150 | encoder.write_meta(b"s")?; 151 | } 152 | Directory { size, contents: () } => { 153 | encoder.write_meta(format!("{}d", size).as_bytes())?; 154 | } 155 | } 156 | Ok(()) 157 | } 158 | 159 | pub fn decode(buf: &[u8]) -> Option { 160 | use self::FileNode::*; 161 | buf.split_last().and_then(|(kind, buf)| match *kind { 162 | b'x' | b'r' => { 163 | let executable = *kind == b'x'; 164 | str::from_utf8(buf) 165 | .ok() 166 | .and_then(|s| s.parse().ok()) 167 | .map(|size| Regular { executable, size }) 168 | } 169 | b's' => Some(Symlink { 170 | target: ByteBuf::from(buf), 171 | }), 172 | b'd' => str::from_utf8(buf) 173 | .ok() 174 | .and_then(|s| s.parse().ok()) 175 | .map(|size| Directory { size, contents: () }), 176 | _ => None, 177 | }) 178 | } 179 | } 180 | 181 | /// This type represents a full tree of files. 182 | /// 183 | /// A *file tree* is a *file node* where each directory contains 184 | /// the tree for its children. 185 | #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)] 186 | pub struct FileTree(FileNode>); 187 | 188 | /// An entry in a file tree is a path to a node paired with that node. 189 | /// 190 | /// If the entry refers to a directory, it only stores information about that 191 | /// directory itself. It does not contain the children of the directory. 192 | pub struct FileTreeEntry { 193 | pub path: Vec, 194 | pub node: FileNode<()>, 195 | } 196 | 197 | impl FileTreeEntry { 198 | pub fn encode(self, encoder: &mut frcode::Encoder) -> io::Result<()> { 199 | self.node.encode(encoder)?; 200 | encoder.write_path(self.path)?; 201 | Ok(()) 202 | } 203 | 204 | pub fn decode(buf: &[u8]) -> Option { 205 | memchr(b'\0', buf).and_then(|sep| { 206 | let path = &buf[(sep + 1)..]; 207 | let node = &buf[0..sep]; 208 | FileNode::decode(node).map(|node| FileTreeEntry { 209 | path: path.to_vec(), 210 | node, 211 | }) 212 | }) 213 | } 214 | } 215 | 216 | impl FileTree { 217 | pub fn regular(size: u64, executable: bool) -> Self { 218 | FileTree(FileNode::Regular { size, executable }) 219 | } 220 | 221 | pub fn symlink(target: ByteBuf) -> Self { 222 | FileTree(FileNode::Symlink { target }) 223 | } 224 | 225 | pub fn directory(entries: HashMap) -> Self { 226 | FileTree(FileNode::Directory { 227 | size: entries.len() as u64, 228 | contents: entries, 229 | }) 230 | } 231 | 232 | pub fn to_list(&self, filter_prefix: &[u8]) -> Vec { 233 | let mut result = Vec::new(); 234 | 235 | let mut stack = Vec::with_capacity(16); 236 | stack.push((Vec::new(), self)); 237 | 238 | while let Some(entry) = stack.pop() { 239 | let path = entry.0; 240 | let FileTree(current) = entry.1; 241 | let (node, contents) = current.split_contents(); 242 | if let Some(entries) = contents { 243 | let mut entries = entries.iter().collect::>(); 244 | entries.sort_by(|a, b| Ord::cmp(a.0, b.0)); 245 | for (name, entry) in entries { 246 | let mut path = path.clone(); 247 | path.push(b'/'); 248 | path.extend_from_slice(name); 249 | stack.push((path, entry)); 250 | } 251 | } 252 | if path.starts_with(filter_prefix) { 253 | result.push(FileTreeEntry { path, node }); 254 | } 255 | } 256 | result 257 | } 258 | } 259 | -------------------------------------------------------------------------------- /src/frcode.rs: -------------------------------------------------------------------------------- 1 | //! A compact encoding for file tree entries based on sharing prefixes. 2 | //! 3 | //! This module contains a rust implementation of a variant of the `frcode` tool 4 | //! used by GNU findutils' locate. It has been extended to allow meta information 5 | //! to be attached to each entry so it is no longer compatible with the original 6 | //! frcode format. 7 | //! (See http://www.delorie.com/gnu/docs/findutils/locatedb.5.html for a description of the frcode format.) 8 | //! 9 | //! The basic building block of the encoding is a line. Each line has the following format: 10 | //! (the spaces are for readability only, they are not present in the encoding) 11 | //! 12 | //! ```text 13 | //! <\x00 byte> 14 | //! ``` 15 | //! 16 | //! Each entry holds two parts of data: metadata, which is just some arbitrary blob of NUL-terminated bytes 17 | //! and a path. Because we are storing file trees, the path will likely share a long prefix with the previous 18 | //! entry's path (we traverse directory entries in sorted order to maximize this chance), so we first store 19 | //! the length of the shared prefix. 20 | //! 21 | //! Since this length will likely be similar to the previous one (if there are many entries in `/foo/bar`, then they will 22 | //! all share a prefix of at least the length of `/foo/bar`) we only store the signed *difference* to the previous shared prefix length 23 | //! (This is why it's called a differential). For differences smaller than +/-127 we store them directly as a single byte. If the 24 | //! difference is greater than that, the first byte will by `0x80` (-128) indicating that the following two bytes represent the 25 | //! difference (with the high byte first [big endian]). 26 | //! 27 | //! As an example, consider the following non-encoded plaintext, where `:` separates the metadata from the path: 28 | //! 29 | //! ```text 30 | //! d:/ 31 | //! d:/foo 32 | //! d:/foo/bar 33 | //! f:/foo/bar/test.txt 34 | //! f:/foo/bar/text.txt 35 | //! d:/foo/baz 36 | //! ``` 37 | //! 38 | //! This text would be encoded as (using `[v]` to indicate a byte with the value of v) 39 | //! 40 | //! ```text 41 | //! d[0][0]/ 42 | //! d[0][1]foo 43 | //! d[0][3]/bar 44 | //! f[0][4]/test.txt 45 | //! f[0][3]xt.txt 46 | //! d[0][-4]z 47 | //! ``` 48 | //! 49 | //! At the beginning, there is no previous entry, so the shared prefix length must always be `0` (and so must the shared prefix differential). 50 | //! The second entry shares `1` byte with the first path so the difference is `1`. The third entry shares `4` bytes with the second one, which 51 | //! is `3` more than the shared length of the second one, so we encode a `3` followed by the non-shared bytes, and so on for the remaining entries. 52 | //! The last entry shares four bytes less than the second to last one did with its predecessor, so here the differential is negative. 53 | //! 54 | //! Through this encoding, the size of the index is typically reduces by a factor of 3 to 5. 55 | use std::cmp; 56 | use std::io::{self, BufRead, Write}; 57 | use std::ops::{Deref, DerefMut}; 58 | 59 | use error_chain::{bail, error_chain}; 60 | use memchr; 61 | 62 | error_chain! { 63 | foreign_links { 64 | Io(io::Error); 65 | } 66 | errors { 67 | SharedOutOfRange { previous_len: usize, shared_len: isize } { 68 | description("shared prefix length out of bounds") 69 | display("length of shared prefix must be >= 0 and <= {} (length of previous item), but found: {}", previous_len, shared_len) 70 | } 71 | SharedOverflow { shared_len: isize, diff: isize } { 72 | description("shared prefix length too big (overflow)") 73 | display("length of shared prefix too big: cannot add {} to {} without overflow", shared_len, diff) 74 | } 75 | MissingNul { 76 | description("missing terminating NUL byte for entry") 77 | } 78 | MissingNewline { 79 | description("missing newline separator for entry") 80 | } 81 | MissingPrefixDifferential { 82 | description("missing the shared prefix length differential for entry") 83 | } 84 | } 85 | } 86 | 87 | /// A buffer that may be resizable or not. This is used for decoding, 88 | /// where we want to make the buffer resizable as long as we haven't decoded 89 | /// a full entry yet but want to lock it as soon as we got a full entry. 90 | /// 91 | /// This is necessary because we always need to be able to decode at least 92 | /// one entry to make progress, as we never return partial entries during decoding. 93 | struct ResizableBuf { 94 | allow_resize: bool, 95 | data: Vec, 96 | } 97 | 98 | impl ResizableBuf { 99 | /// Allocates a new resizable buffer with the given initial size. 100 | /// 101 | /// The new buffer will allow resizing initially. 102 | fn new(capacity: usize) -> ResizableBuf { 103 | ResizableBuf { 104 | data: vec![0; capacity], 105 | allow_resize: true, 106 | } 107 | } 108 | 109 | /// Resizes the buffer to hold at least `new_size` elements. Returns `true` 110 | /// if resizing was successful (so that buffer can now hold at least `new_size` elements) 111 | /// or `false` if not (meaning `new_size` is greater than the current size and resizing 112 | /// was not allowed). 113 | fn resize(&mut self, new_size: usize) -> bool { 114 | if new_size <= self.data.len() { 115 | return true; 116 | } 117 | 118 | if !self.allow_resize { 119 | return false; 120 | } 121 | 122 | self.data.resize(new_size, b'\x00'); 123 | true 124 | } 125 | } 126 | 127 | impl Deref for ResizableBuf { 128 | type Target = [u8]; 129 | 130 | fn deref(&self) -> &[u8] { 131 | &self.data 132 | } 133 | } 134 | 135 | impl DerefMut for ResizableBuf { 136 | fn deref_mut(&mut self) -> &mut [u8] { 137 | &mut self.data 138 | } 139 | } 140 | 141 | /// A decoder for the frcode format. It reads data from some input source 142 | /// and returns blocks of decoded entries. 143 | /// 144 | /// It will not split the metadata/path parts of individual entries since 145 | /// the primary use case for this is searching, where it is enough to decode 146 | /// the entries that match. 147 | pub struct Decoder { 148 | /// The input source from which we decode 149 | reader: R, 150 | /// Position of the first byte of the path part of the last entry. 151 | /// We need this to copy the shared prefix. 152 | last_path: usize, 153 | /// Position of the start of the entry that didn't fully fit in the buffer in the 154 | /// last decode iteration. Since this entry was partial, it hasn't been returned to 155 | /// the user yet and we need to continue decoding this entry in this iteration. 156 | partial_entry_start: usize, 157 | /// The length of the shared prefix for the current entry. This is necessary because 158 | /// the shared length is stored as a difference, so we need the previous value to update it. 159 | shared_len: isize, 160 | /// The buffer into which we store the decoded bytes. 161 | buf: ResizableBuf, 162 | /// Current write position in buf. The next decoded byte should be written to buf[pos]. 163 | pos: usize, 164 | } 165 | 166 | impl Decoder { 167 | /// Construct a new decoder for the given source. 168 | pub fn new(reader: R) -> Decoder { 169 | let capacity = 1_000_000; 170 | Decoder { 171 | reader, 172 | buf: ResizableBuf::new(capacity), 173 | pos: 0, 174 | last_path: 0, 175 | shared_len: 0, 176 | partial_entry_start: 0, 177 | } 178 | } 179 | 180 | /// Copies `self.shared_len` bytes from the previous entry's path into the output buffer. 181 | /// 182 | /// Returns false if the buffer was too small and could not be resized. In this case, no 183 | /// bytes will be copied. 184 | fn copy_shared(&mut self) -> Result { 185 | let shared_len = self.shared_len as usize; 186 | let new_pos = self.pos + shared_len; 187 | let new_last_path = self.pos; 188 | if !self.buf.resize(new_pos) { 189 | return Ok(false); 190 | } 191 | 192 | if self.shared_len < 0 || self.last_path + shared_len > self.pos { 193 | bail!(ErrorKind::SharedOutOfRange { 194 | previous_len: self.pos - self.last_path, 195 | shared_len: self.shared_len, 196 | }); 197 | } 198 | 199 | let (_, last) = self.buf.split_at_mut(self.last_path); 200 | let (last, new) = last.split_at_mut(self.pos - self.last_path); 201 | new[..shared_len].copy_from_slice(&last[..shared_len]); 202 | 203 | self.pos += shared_len; 204 | self.last_path = new_last_path; 205 | Ok(true) 206 | } 207 | 208 | /// Copies bytes from the input reader to the output buffer until a `\x00` byte is read. 209 | /// The NUL byte is included in the output buffer. 210 | /// 211 | /// Returns false if the output buffer was exhausted before a NUL byte could be found and 212 | /// could not be resized. All bytes that were read before this situation was detected will 213 | /// have already been copied to the output buffer in this case. 214 | /// 215 | /// It will also return false if the end of the input was reached. 216 | fn read_to_nul(&mut self) -> Result { 217 | loop { 218 | let (done, len) = { 219 | let &mut Decoder { 220 | ref mut reader, 221 | ref mut buf, 222 | ref mut pos, 223 | .. 224 | } = self; 225 | let input = match reader.fill_buf() { 226 | Ok(data) => data, 227 | Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, 228 | Err(e) => return Err(Error::from(e)), 229 | }; 230 | 231 | if input.is_empty() { 232 | return Ok(false); 233 | } 234 | 235 | let (done, len) = match memchr::memchr(b'\x00', input) { 236 | Some(i) => (true, i + 1), 237 | None => (false, input.len()), 238 | }; 239 | 240 | let new_pos = *pos + len; 241 | if buf.resize(new_pos) { 242 | buf[*pos..new_pos].copy_from_slice(&input[..len]); 243 | *pos = new_pos; 244 | (done, len) 245 | } else { 246 | return Ok(false); 247 | } 248 | }; 249 | self.reader.consume(len); 250 | if done { 251 | return Ok(true); 252 | } 253 | } 254 | } 255 | 256 | /// Read the differential from the input reader. This function will return an error 257 | /// if the end of input has been reached. 258 | fn decode_prefix_diff(&mut self) -> Result { 259 | let mut buf = [0; 1]; 260 | self.reader 261 | .read_exact(&mut buf) 262 | .chain_err(|| ErrorKind::MissingPrefixDifferential)?; 263 | 264 | if buf[0] != 0x80 { 265 | Ok((buf[0] as i8) as i16) 266 | } else { 267 | let mut buf = [0; 2]; 268 | self.reader 269 | .read_exact(&mut buf) 270 | .chain_err(|| ErrorKind::MissingPrefixDifferential)?; 271 | let high = buf[0] as i16; 272 | let low = buf[1] as i16; 273 | Ok(high << 8 | low) 274 | } 275 | } 276 | 277 | /// Decodes some entries to fill the buffer and returns a block of decoded entries. 278 | /// 279 | /// It will decode as many entries as fit into the internal buffer, but at least one. 280 | /// In the returned block of bytes, an entry's metadata and path will be separated by a NUL byte 281 | /// and entries will be terminated with a newline character. This allows for fast searching with 282 | /// a line based searcher. 283 | /// 284 | /// The function does not return partially decoded entries. Because of this, the size of returned 285 | /// slice will vary from call to call. The last entry which did not fully fit into the buffer yet 286 | /// will be returned as the first entry at the next call. 287 | pub fn decode(&mut self) -> Result<&mut [u8]> { 288 | // Save end pointer from previous iteration and reset write position 289 | let end = self.pos; 290 | self.pos = 0; 291 | 292 | // We need to preserve some data from the previous iteration, namely: 293 | // 294 | // * all data after the `self.last_path` position, for copying the shared prefix 295 | // * everything from the start of the partial entry, since this entry wasn't fully decoded 296 | // in the last iteration and we want to continue decoding it now 297 | // 298 | // If we stopped decoding the partial entry after already copying the shared prefix, then 299 | // `last_path` will already point to the partial entry so it will be greater than `partial_entry_start`. 300 | // 301 | // If we stopped decoding during copying the metadata though, which comes before we copy the shared 302 | // prefix, then `last_path` will point to the previous entry's path, so it will be smaller than 303 | // `partial_entry_start`. 304 | // 305 | // To support both these cases, we take the minimum here. 306 | let mut copy_pos = cmp::min(self.partial_entry_start, self.last_path); 307 | 308 | // Since we sometimes copy more than just the partial entry, we need to know where the partial entry 309 | // starts as that is the first position that we want to return (everything before that was already 310 | // part of an entry returned in the last iteration). 311 | let item_start = self.partial_entry_start - copy_pos; 312 | 313 | // Shift the last path, because we copy it from copy_pos.. to 0.. 314 | self.last_path -= copy_pos; 315 | 316 | // Now we can do the actual copying. We cannot use copy_from_slice here since source and target 317 | // may overlap. 318 | while copy_pos < end { 319 | self.buf[self.pos] = self.buf[copy_pos]; 320 | self.pos += 1; 321 | copy_pos += 1; 322 | } 323 | 324 | // Allow resizing the buffer, since we haven't decoded a full entry yet 325 | self.buf.allow_resize = true; 326 | 327 | // If the the last decoded byte in the buffer is a NUL byte, that means that 328 | // we are now at the start of the path part of the entry. This means that 329 | // we need to copy the shared prefix now. 330 | let mut found_nul = self.pos > 0 && self.buf[self.pos - 1] == b'\x00'; 331 | if found_nul { 332 | self.copy_shared()?; 333 | } 334 | 335 | // At this point, we are guaranteed to be in either the metadata part or the non-shared part 336 | // of an entry. In both cases, the action that we need to take is the same: copy data till 337 | // the next NUL byte. After the NUL byte, we know that we are at the end of the metadata part, 338 | // so we read a differential and copy the shared prefix, and repeat. 339 | // 340 | // Note that this loop doesn't care about where entries end. Only the path part of each entry requires 341 | // special processing, so we can jump from NUL byte to NUL byte, decode the path and then just copy 342 | // the data from the source when jumping to the next NUL byte. 343 | loop { 344 | // Read data up to the next nul byte. 345 | if !self.read_to_nul()? { 346 | break; 347 | } 348 | 349 | // If we have already found a NUL byte before this, so we've now got two NUL bytes, so 350 | // we've got at least one full entry in between. 351 | self.buf.allow_resize = !found_nul; 352 | 353 | // We found a NUL byte. Note that we need to set this *after* updating allow_resize, 354 | // since allow_resize should be set to false only after we've found two NUL bytes. 355 | found_nul = true; 356 | 357 | // Parse the next prefix length difference 358 | let diff = self.decode_prefix_diff()? as isize; 359 | 360 | // Update the shared len 361 | self.shared_len = 362 | self.shared_len 363 | .checked_add(diff) 364 | .ok_or(ErrorKind::SharedOverflow { 365 | shared_len: self.shared_len, 366 | diff, 367 | })?; 368 | 369 | // Copy the shared prefix 370 | if !self.copy_shared()? { 371 | break; 372 | } 373 | } 374 | 375 | // Since we don't want to return partially decoded items, we need to find the end of the last entry. 376 | self.partial_entry_start = memchr::memrchr(b'\n', &self.buf[..self.pos]) 377 | .ok_or_else(|| ErrorKind::MissingNewline)? 378 | + 1; 379 | Ok(&mut self.buf[item_start..self.partial_entry_start]) 380 | } 381 | } 382 | 383 | /// This struct implements an encoder for the frcode format. The encoder 384 | /// writes directly to the underlying `Write` instance. 385 | /// 386 | /// To encode an entry you should first call `write_meta` a number of times 387 | /// to fill the meta data portion. Then, call `write_path` once to finialize the entry. 388 | /// 389 | /// One important property of this encoder is that it is safe to open and close 390 | /// it multiple times on the same stream, like this: 391 | /// 392 | /// ```text 393 | /// { 394 | /// let encoder1 = Encoder::new(&mut stream); 395 | /// } // encoder1 gets dropped here 396 | /// { 397 | /// let encoder2 = Encoder::new(&mut stream); 398 | /// } 399 | /// ``` 400 | /// 401 | /// To support this, the encoder has a "footer" item that will get written when it is dropped. 402 | /// This is necessary because we need to write at least one more entry to reset the shared prefix 403 | /// length to zero, since the next encoder will expect that as initial state. 404 | pub struct Encoder { 405 | writer: W, 406 | last: Vec, 407 | shared_len: i16, 408 | footer_meta: Vec, 409 | footer_path: Vec, 410 | footer_written: bool, 411 | } 412 | 413 | impl Drop for Encoder { 414 | fn drop(&mut self) { 415 | self.write_footer().expect("failed to write footer") 416 | } 417 | } 418 | 419 | impl Encoder { 420 | /// Constructs a new encoder for the specific writer. 421 | /// 422 | /// The encoder will write the given `footer_meta` and `footer_path` as the last entry. 423 | /// 424 | /// # Panics 425 | /// 426 | /// If either `footer_meta` or `footer_path` contain NUL or newline bytes. 427 | pub fn new(writer: W, footer_meta: Vec, footer_path: Vec) -> Encoder { 428 | assert!( 429 | !footer_meta.contains(&b'\x00'), 430 | "footer meta must not contain null bytes" 431 | ); 432 | assert!( 433 | !footer_path.contains(&b'\x00'), 434 | "footer path must not contain null bytes" 435 | ); 436 | assert!( 437 | !footer_meta.contains(&b'\n'), 438 | "footer meta must not contain newlines" 439 | ); 440 | assert!( 441 | !footer_path.contains(&b'\n'), 442 | "footer path must not contain newlines" 443 | ); 444 | Encoder { 445 | writer, 446 | last: Vec::new(), 447 | shared_len: 0, 448 | footer_meta, 449 | footer_path, 450 | footer_written: false, 451 | } 452 | } 453 | 454 | /// Writes the specific shared prefix differential to the output stream. 455 | /// 456 | /// This function takes care of the variable-length encoding using for prefix differentials 457 | /// in the frcode format. 458 | fn encode_diff(&mut self, diff: i16) -> io::Result<()> { 459 | let low = (diff & 0xFF) as u8; 460 | if diff.abs() < i8::max_value() as i16 { 461 | self.writer.write_all(&[low])?; 462 | } else { 463 | let high = ((diff >> 8) & 0xFF) as u8; 464 | self.writer.write_all(&[0x80, high, low])?; 465 | } 466 | Ok(()) 467 | } 468 | 469 | /// Writes the meta data of an entry to the output stream. 470 | /// 471 | /// This function can be called multiple times to extend the current meta data part. 472 | /// Since the meta data is written as-is to the output stream, calling the function 473 | /// multiple times will concatenate the meta data of all calls. 474 | /// 475 | /// # Panics 476 | /// 477 | /// If the meta data contains NUL bytes or newlines. 478 | pub fn write_meta(&mut self, meta: &[u8]) -> io::Result<()> { 479 | assert!( 480 | !meta.contains(&b'\x00'), 481 | "entry must not contain null bytes" 482 | ); 483 | assert!(!meta.contains(&b'\n'), "entry must not contain newlines"); 484 | 485 | self.writer.write_all(meta)?; 486 | Ok(()) 487 | } 488 | 489 | /// Finalizes an entry by encoding its path to the output stream. 490 | /// 491 | /// This function should be called after you've finished writing the meta data for 492 | /// the current entry. It will terminate the meta data part by writing the NUL byte 493 | /// and then encode the path into the output stream. 494 | /// 495 | /// The entry will be terminated with a newline. 496 | /// 497 | /// # Panics 498 | /// 499 | /// If the path contains NUL bytes or newlines. 500 | pub fn write_path(&mut self, path: Vec) -> io::Result<()> { 501 | assert!( 502 | !path.contains(&b'\x00'), 503 | "entry must not contain null bytes" 504 | ); 505 | assert!(!path.contains(&b'\x00'), "entry must not contain newlines"); 506 | self.writer.write_all(&[b'\x00'])?; 507 | 508 | let mut shared: isize = 0; 509 | let max_shared = i16::max_value() as isize; 510 | for (a, b) in self.last.iter().zip(path.iter()) { 511 | if a != b || shared > max_shared { 512 | break; 513 | } 514 | shared += 1; 515 | } 516 | let shared = shared as i16; 517 | 518 | let diff = shared - self.shared_len; 519 | self.encode_diff(diff)?; 520 | 521 | self.last = path; 522 | self.shared_len = shared; 523 | 524 | let pos = shared as usize; 525 | self.writer.write_all(&self.last[pos..])?; 526 | self.writer.write_all(b"\n")?; 527 | 528 | Ok(()) 529 | } 530 | 531 | /// Writes the footer entry. 532 | /// 533 | /// The footer entry will not share any prefix with the preceding entry, 534 | /// so after this function, the shared prefix length is zero. This guarantees 535 | /// that we can start another Encoder after this item, since the Encoder expects 536 | /// the initial shared prefix length to be zero. 537 | fn write_footer(&mut self) -> io::Result<()> { 538 | if self.footer_written { 539 | return Ok(()); 540 | } 541 | 542 | let diff = -self.shared_len; 543 | self.writer.write_all(&self.footer_meta)?; 544 | self.writer.write_all(b"\x00")?; 545 | self.encode_diff(diff)?; 546 | self.writer.write_all(&self.footer_path)?; 547 | self.writer.write_all(b"\n")?; 548 | self.footer_written = true; 549 | Ok(()) 550 | } 551 | 552 | /// Finishes the encoder by writing the footer entry. 553 | /// 554 | /// This function is called by drop, but calling it explictly is recommended as 555 | /// drop has no way to report IO errors that may occur during writing the footer. 556 | pub fn finish(mut self) -> io::Result<()> { 557 | self.write_footer()?; 558 | 559 | Ok(()) 560 | } 561 | } 562 | -------------------------------------------------------------------------------- /src/hydra.rs: -------------------------------------------------------------------------------- 1 | //! Interacting with hydra and the binary cache. 2 | //! 3 | //! This module has all functions that deal with accessing hydra or the binary cache. 4 | //! Currently, it only provides two functions: `fetch_files` to get the file listing for 5 | //! a store path and `fetch_references` to retrieve the references from the narinfo. 6 | use std::collections::HashMap; 7 | use std::fmt; 8 | use std::io::{self, Read, Write}; 9 | use std::path::PathBuf; 10 | use std::pin::Pin; 11 | use std::result; 12 | use std::str::{self, Utf8Error}; 13 | use std::time::{Duration, Instant}; 14 | 15 | use error_chain::error_chain; 16 | use futures::future; 17 | use futures::{Future, TryFutureExt}; 18 | use reqwest::header::{HeaderValue, ACCEPT_ENCODING}; 19 | use reqwest::Url; 20 | use reqwest::{Client, ClientBuilder, StatusCode}; 21 | use serde::de::{Deserializer, MapAccess, Visitor}; 22 | use serde::{self, Deserialize}; 23 | use serde_bytes::ByteBuf; 24 | use serde_json; 25 | use tokio::time::error::Elapsed; 26 | use tokio_retry::strategy::ExponentialBackoff; 27 | use tokio_retry::{self, Retry}; 28 | use xz2::read::XzDecoder; 29 | 30 | use crate::files::FileTree; 31 | use crate::package::{PathOrigin, StorePath}; 32 | use crate::util; 33 | 34 | error_chain! { 35 | errors { 36 | Http(url: String, code: StatusCode) { 37 | description("http status code error") 38 | display("request GET '{}' failed with HTTP error {}", url, code) 39 | } 40 | ParseResponse(url: String, tmp_file: Option) { 41 | description("response parse error") 42 | display("response to GET '{}' failed to parse{}", url, tmp_file.as_ref().map_or("".into(), |f| format!(" (response saved to {})", f.to_string_lossy()))) 43 | } 44 | ParseStorePath(url: String, path: String) { 45 | description("store path parse error") 46 | display("response to GET '{}' contained invalid store path '{}', expected string matching format $(NIX_STORE_DIR)$(HASH)-$(NAME)", url, path) 47 | } 48 | Unicode(url: String, bytes: Vec, err: Utf8Error) { 49 | description("unicode error") 50 | display("response to GET '{}' contained invalid unicode byte {}: {}", url, bytes[err.valid_up_to()], err) 51 | } 52 | Decode(url: String) { 53 | description("decoder error") 54 | display("response to GET '{}' could not be decoded", url) 55 | } 56 | UnsupportedEncoding(url: String, encoding: Option) { 57 | description("unsupported content-encoding") 58 | display( 59 | "response to GET '{}' had unsupported content-encoding ({})", 60 | url, 61 | encoding.as_ref().map_or("not present".to_string(), |v| format!("'{}'", v)), 62 | ) 63 | } 64 | Timeout { 65 | description("timeout exceeded") 66 | } 67 | TimerError { 68 | description("timer failure") 69 | } 70 | ParseProxy(url: String) { 71 | description("proxy config error") 72 | display("Can not parse proxy url ({})", url) 73 | } 74 | } 75 | foreign_links { 76 | Reqwest(reqwest::Error); 77 | } 78 | } 79 | 80 | impl From for Error { 81 | fn from(_err: Elapsed) -> Self { 82 | Error::from(ErrorKind::Timeout) 83 | } 84 | } 85 | 86 | /// A Fetcher allows you to make requests to Hydra/the binary cache. 87 | /// 88 | /// It holds all the relevant state for performing requests, such as for example 89 | /// the HTTP client instance and a timer for timeouts. 90 | /// 91 | /// You should use a single instance of this struct to make all your hydra/binary cache 92 | /// requests. 93 | pub struct Fetcher { 94 | client: Client, 95 | cache_url: String, 96 | } 97 | 98 | const RESPONSE_TIMEOUT: Duration = Duration::from_secs(1); 99 | const CONNECT_TIMEOUT: Duration = Duration::from_secs(10); 100 | 101 | /// A boxed future using this module's error type. 102 | type BoxFuture<'a, I> = Pin> + 'a>>; 103 | 104 | pub struct ParsedNAR { 105 | pub store_path: StorePath, 106 | pub nar_path: String, 107 | pub references: Vec, 108 | } 109 | 110 | impl Fetcher { 111 | /// Initializes a new instance of the `Fetcher` struct. 112 | /// 113 | /// The `handle` argument is a Handle to the tokio event loop. 114 | /// 115 | /// `cache_url` specifies the URL of the binary cache (example: `https://cache.nixos.org`). 116 | pub fn new(cache_url: String) -> Result { 117 | let client = ClientBuilder::new() 118 | .connect_timeout(CONNECT_TIMEOUT) 119 | .timeout(RESPONSE_TIMEOUT) 120 | .build()?; 121 | Ok(Fetcher { client, cache_url }) 122 | } 123 | 124 | /// Sends a GET request to the given URL and decodes the response with the given encoding. 125 | /// 126 | /// If `encoding` is `None`, then the encoding will be detected automatically by reading 127 | /// the `Content-Encoding` header. 128 | /// 129 | /// The returned future resolves to `(url, None)` if the server returned a 404 error. On any 130 | /// other error, the future resolves to an error. If the request was successful, it returns 131 | /// `(url, Some(response_content))`. 132 | /// 133 | /// This function will automatically retry the request a few times to mitigate intermittent network 134 | /// failures. 135 | fn fetch(&self, url: String) -> BoxFuture<(String, Option>)> { 136 | let strategy = ExponentialBackoff::from_millis(50) 137 | .max_delay(Duration::from_millis(5000)) 138 | .take(20) 139 | // add some jitter 140 | .map(tokio_retry::strategy::jitter) 141 | // wait at least 5 seconds, as that is the time that cache.nixos.org caches 500 internal server errors 142 | .map(|x| x + Duration::from_secs(5)); 143 | Box::pin(Retry::spawn(strategy, move || { 144 | Box::pin(self.fetch_noretry(url.clone())) 145 | })) 146 | } 147 | 148 | /// The implementation of `fetch`, without the retry logic. 149 | async fn fetch_noretry(&self, url: String) -> Result<(String, Option>)> { 150 | let uri = Url::parse(&url).expect("url passed to fetch must be valid"); 151 | let request = self 152 | .client 153 | .get(uri) 154 | .header( 155 | ACCEPT_ENCODING, 156 | HeaderValue::from_static("br, gzip, deflate"), 157 | ) 158 | .build() 159 | .expect("HTTP request is valid"); 160 | 161 | let res = self.client.execute(request).await?; 162 | 163 | let code = res.status(); 164 | 165 | if code == StatusCode::NOT_FOUND { 166 | return Ok((url, None)); 167 | } 168 | 169 | if !code.is_success() { 170 | return Err(Error::from(ErrorKind::Http(url, code))); 171 | } 172 | 173 | let decoded = res.bytes().await?.into(); 174 | 175 | Ok((url, Some(decoded))) 176 | } 177 | 178 | /// Fetches the references of a given store path. 179 | /// 180 | /// Returns the references of the store path and the store path itself. Note that this 181 | /// function only requires the hash part of the store path that is passed as argument, 182 | /// but it will return a full store path as a result. So you can use this function to 183 | /// resolve hashes to full store paths as well. 184 | /// 185 | /// The references will be `None` if no information about the store path could be found 186 | /// (happens if the narinfo wasn't found which means that hydra didn't build this path). 187 | pub fn fetch_references(&self, mut path: StorePath) -> BoxFuture> { 188 | let url = format!("{}/{}.narinfo", self.cache_url, path.hash()); 189 | 190 | let parse_response = move |(url, data)| { 191 | let url: String = url; 192 | let data: Vec = match data { 193 | Some(v) => v, 194 | None => return Ok(None), 195 | }; 196 | 197 | let mut nar_path = None; 198 | let mut result = Vec::new(); 199 | for line in data.split(|x| x == &b'\n') { 200 | if let Some(line) = line.strip_prefix(b"References: ") { 201 | let line = str::from_utf8(line) 202 | .map_err(|e| ErrorKind::Unicode(url.clone(), line.to_vec(), e))?; 203 | result = line 204 | .split_whitespace() 205 | .map(|new_path| { 206 | let new_origin = PathOrigin { 207 | toplevel: false, 208 | ..path.origin().into_owned() 209 | }; 210 | StorePath::parse(new_origin, new_path).ok_or_else(|| { 211 | ErrorKind::ParseStorePath(url.clone(), new_path.to_string()).into() 212 | }) 213 | }) 214 | .collect::>>()?; 215 | } 216 | 217 | if let Some(line) = line.strip_prefix(b"StorePath: ") { 218 | let line = str::from_utf8(line) 219 | .map_err(|e| ErrorKind::Unicode(url.clone(), line.to_vec(), e))?; 220 | let line = line.trim(); 221 | 222 | path = StorePath::parse(path.origin().into_owned(), line) 223 | .ok_or_else(|| ErrorKind::ParseStorePath(url.clone(), line.to_string()))?; 224 | } 225 | 226 | if let Some(line) = line.strip_prefix(b"URL: ") { 227 | let line = str::from_utf8(line) 228 | .map_err(|e| ErrorKind::Unicode(url.clone(), line.to_vec(), e))?; 229 | let line = line.trim(); 230 | 231 | nar_path = Some(line.to_owned()); 232 | } 233 | } 234 | 235 | Ok(Some(ParsedNAR { 236 | store_path: path, 237 | nar_path: nar_path 238 | .ok_or(ErrorKind::ParseStorePath(url, "no URL line found".into()))?, 239 | references: result, 240 | })) 241 | }; 242 | 243 | Box::pin( 244 | self.fetch(url) 245 | .and_then(|r| future::ready(parse_response(r))), 246 | ) 247 | } 248 | 249 | /// Fetches the file listing for the given store path. 250 | /// 251 | /// A file listing is a tree of the files that the given store path contains. 252 | pub async fn fetch_files<'a>(&self, path: &StorePath) -> Result> { 253 | let url_xz = format!("{}/{}.ls.xz", self.cache_url, path.hash()); 254 | let url_generic = format!("{}/{}.ls", self.cache_url, path.hash()); 255 | let name = format!("{}.json", path.hash()); 256 | 257 | let (url, body) = self.fetch(url_generic).await?; 258 | let contents = match body { 259 | Some(v) => v, 260 | None => { 261 | let (_, Some(body)) = self.fetch(url_xz.clone()).await? else { 262 | return Ok(None); 263 | }; 264 | 265 | let mut unpacked = vec![]; 266 | XzDecoder::new(&body[..]) 267 | .read_to_end(&mut unpacked) 268 | .map_err(|e| ErrorKind::Decode(e.to_string()))?; 269 | 270 | unpacked 271 | } 272 | }; 273 | 274 | let now = Instant::now(); 275 | let response: FileListingResponse = 276 | serde_json::from_slice(&contents[..]).chain_err(|| { 277 | ErrorKind::ParseResponse(url, util::write_temp_file("file_listing.json", &contents)) 278 | })?; 279 | let duration = now.elapsed(); 280 | 281 | if duration > Duration::from_millis(2000) { 282 | let secs = duration.as_secs(); 283 | let millis = duration.subsec_millis(); 284 | 285 | writeln!( 286 | &mut io::stderr(), 287 | "warning: took a long time to parse: {}s:{:03}ms", 288 | secs, 289 | millis 290 | ) 291 | .unwrap_or(()); 292 | if let Some(p) = util::write_temp_file(&name, &contents) { 293 | writeln!( 294 | &mut io::stderr(), 295 | "saved response to file: {}", 296 | p.to_string_lossy() 297 | ) 298 | .unwrap_or(()); 299 | } 300 | } 301 | 302 | Ok(Some(response.root.0)) 303 | } 304 | } 305 | 306 | /// This data type represents the format of the `.ls` files fetched from the binary cache. 307 | /// 308 | /// The `.ls` file contains a JSON object. The structure of that object is mirrored by this 309 | /// struct for parsing the file. 310 | #[derive(Deserialize, Debug, PartialEq)] 311 | struct FileListingResponse { 312 | /// Each `.ls` file has a "root" key that contains the file listing. 313 | root: HydraFileListing, 314 | } 315 | 316 | /// A wrapper for `FileTree` so that we can add trait implementations for it. 317 | /// 318 | /// (`FileTree` is defined in another module, so we cannot directly implement `Deserialize` for 319 | /// `FileTree` since that would be an orphan impl). 320 | #[derive(Debug, PartialEq)] 321 | struct HydraFileListing(FileTree); 322 | 323 | /// We need a manual implementation for Deserialize here because file lisitings can contain non-unicode 324 | /// bytes so we need to explicitly request that keys be deserialized as `ByteBuf` and not String. 325 | /// 326 | /// We cannot use the serde-derive machinery because the `tagged` enum variant does not support map keys 327 | /// that aren't valid unicode (since it relies on the Deserializer to tell it the type, and the JSON Deserializer 328 | /// will default to String for map keys). 329 | impl<'de> Deserialize<'de> for HydraFileListing { 330 | fn deserialize>(d: D) -> result::Result { 331 | struct Root; 332 | 333 | // The access that implements derialization for a file tree 334 | impl<'de> Visitor<'de> for Root { 335 | type Value = FileTree; 336 | 337 | fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { 338 | write!(f, "a file listing (map)") 339 | } 340 | 341 | fn visit_map>( 342 | self, 343 | mut access: V, 344 | ) -> result::Result { 345 | const VARIANTS: &[&str] = &["regular", "directory", "symlink"]; 346 | 347 | // These will get filled in as we visit the map. 348 | // Note that not all of them will be available, depending on the `type` of the file listing 349 | // (`directory`, `symlink` or `regular`) 350 | let mut typ: Option = None; 351 | let mut size: Option = None; 352 | let mut executable: Option = None; 353 | let mut entries: Option> = None; 354 | let mut target: Option = None; 355 | 356 | while let Some(key) = access.next_key::()? { 357 | match &key as &[u8] { 358 | b"type" => { 359 | if typ.is_some() { 360 | return Err(serde::de::Error::duplicate_field("type")); 361 | } 362 | typ = Some(access.next_value()?) 363 | } 364 | b"size" => { 365 | if size.is_some() { 366 | return Err(serde::de::Error::duplicate_field("size")); 367 | } 368 | size = Some(access.next_value()?) 369 | } 370 | b"executable" => { 371 | if executable.is_some() { 372 | return Err(serde::de::Error::duplicate_field("executable")); 373 | } 374 | executable = Some(access.next_value()?) 375 | } 376 | b"entries" => { 377 | if entries.is_some() { 378 | return Err(serde::de::Error::duplicate_field("entries")); 379 | } 380 | entries = Some(access.next_value()?) 381 | } 382 | b"target" => { 383 | if target.is_some() { 384 | return Err(serde::de::Error::duplicate_field("target")); 385 | } 386 | target = Some(access.next_value()?) 387 | } 388 | _ => { 389 | // We ignore all other fields to be more robust against changes in 390 | // the format 391 | access.next_value::()?; 392 | } 393 | } 394 | } 395 | 396 | // the type field must always be present so we know which type to expect 397 | let typ: &[u8] = &typ.ok_or_else(|| serde::de::Error::missing_field("type"))?; 398 | 399 | match typ { 400 | b"regular" => { 401 | let size = size.ok_or_else(|| serde::de::Error::missing_field("size"))?; 402 | let executable = executable.unwrap_or(false); 403 | Ok(FileTree::regular(size, executable)) 404 | } 405 | b"directory" => { 406 | let entries = 407 | entries.ok_or_else(|| serde::de::Error::missing_field("entries"))?; 408 | let entries = entries.into_iter().map(|(k, v)| (k, v.0)).collect(); 409 | Ok(FileTree::directory(entries)) 410 | } 411 | b"symlink" => { 412 | let target = 413 | target.ok_or_else(|| serde::de::Error::missing_field("target"))?; 414 | Ok(FileTree::symlink(target)) 415 | } 416 | _ => Err(serde::de::Error::unknown_variant( 417 | &String::from_utf8_lossy(typ), 418 | VARIANTS, 419 | )), 420 | } 421 | } 422 | } 423 | d.deserialize_map(Root).map(HydraFileListing) 424 | } 425 | } 426 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr( 2 | feature = "cargo-clippy", 3 | warn( 4 | clippy::manual_filter_map, 5 | clippy::map_unwrap_or, 6 | clippy::module_name_repetitions, 7 | clippy::print_stdout, 8 | clippy::unwrap_used, 9 | ) 10 | )] 11 | 12 | pub mod database; 13 | pub mod errors; 14 | pub mod files; 15 | pub mod frcode; 16 | pub mod hydra; 17 | pub mod listings; 18 | pub mod nixpkgs; 19 | pub mod package; 20 | pub mod util; 21 | pub mod workset; 22 | 23 | /// The URL of the binary cache that we use to fetch file listings and references. 24 | /// 25 | /// Hardcoded for now, but may be made a configurable option in the future. 26 | pub const CACHE_URL: &str = "https://cache.nixos.org"; 27 | -------------------------------------------------------------------------------- /src/listings.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io; 3 | use std::iter::FromIterator; 4 | 5 | use futures::{Stream, StreamExt, TryFutureExt}; 6 | use indexmap::map::Entry; 7 | use indexmap::IndexMap; 8 | use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; 9 | 10 | use crate::errors::{Error, ErrorKind, Result, ResultExt}; 11 | use crate::files::FileTree; 12 | use crate::hydra::Fetcher; 13 | use crate::nixpkgs; 14 | use crate::package::StorePath; 15 | use crate::workset::{WorkSet, WorkSetHandle, WorkSetWatch}; 16 | 17 | // We also add some additional sets that only show up in `nix-env -qa -A someSet`. 18 | // 19 | // Some of these sets are not build directly by hydra. We still include them here 20 | // since parts of these sets may be build as dependencies of other packages 21 | // that are build by hydra. This way, our attribute path information is more 22 | // accurate. 23 | // 24 | // We only need sets that are not marked "recurseIntoAttrs" here, since if they are, 25 | // they are already part of normal_paths. 26 | pub const EXTRA_SCOPES: [&str; 6] = [ 27 | "xorg", 28 | "haskellPackages", 29 | "rPackages", 30 | "nodePackages", 31 | "coqPackages", 32 | "texlive.pkgs", 33 | ]; 34 | 35 | /// A stream of store paths (packages) with their associated file listings. 36 | /// 37 | /// If a store path has no file listing (for example, because it is not built by hydra), 38 | /// the file listing will be `None` instead. 39 | pub trait FileListingStream: Stream>> {} 40 | impl FileListingStream for T where T: Stream>> 41 | {} 42 | 43 | /// Fetches all the file listings for the full closure of the given starting set of path. 44 | /// 45 | /// This function will fetch the file listings of each path in the starting set. Additionally, it 46 | /// will also determine the references of each path and recursively fetch the file listings for those 47 | /// paths. 48 | /// 49 | /// The `jobs` argument is used to specify how many requests should be done in parallel. No more than 50 | /// `jobs` requests will be in-flight at any given time. 51 | fn fetch_listings_impl( 52 | fetcher: &Fetcher, 53 | jobs: usize, 54 | starting_set: Vec, 55 | ) -> (impl FileListingStream + '_, WorkSetWatch) { 56 | // Create the queue that will hold all the paths that still need processing. 57 | // Initially, only the starting set needs processing. 58 | 59 | // We can't use FromIterator here as we want shorter paths to win 60 | let mut map: IndexMap = IndexMap::with_capacity(starting_set.len()); 61 | 62 | for path in starting_set { 63 | let hash = path.hash().into(); 64 | match map.entry(hash) { 65 | Entry::Occupied(mut e) => { 66 | if e.get().origin().attr.len() > path.origin().attr.len() { 67 | e.insert(path); 68 | } 69 | } 70 | Entry::Vacant(e) => { 71 | e.insert(path); 72 | } 73 | }; 74 | } 75 | 76 | let workset = WorkSet::from_queue(map); 77 | 78 | // Processes a single store path, fetching the file listing for it and 79 | // adding its references to the queue 80 | let process = move |mut handle: WorkSetHandle<_, _>, path: StorePath| async move { 81 | let Some(parsed) = fetcher 82 | .fetch_references(path.clone()) 83 | .map_err(|e| Error::with_chain(e, ErrorKind::FetchReferences(path))) 84 | .await? 85 | else { 86 | return Ok(None); 87 | }; 88 | 89 | for reference in parsed.references { 90 | let hash = reference.hash().into_owned(); 91 | handle.add_work(hash, reference); 92 | } 93 | 94 | let path = parsed.store_path.clone(); 95 | let nar_path = parsed.nar_path; 96 | 97 | match fetcher.fetch_files(&parsed.store_path).await { 98 | Err(e) => Err(Error::with_chain(e, ErrorKind::FetchFiles(path))), 99 | Ok(Some(files)) => Ok(Some((path, nar_path, files))), 100 | Ok(None) => Ok(None), 101 | } 102 | }; 103 | 104 | // Process all paths in the queue, until the queue becomes empty. 105 | let watch = workset.watch(); 106 | let stream = workset 107 | .map(move |(handle, path)| process(handle, path)) 108 | .buffer_unordered(jobs); 109 | (stream, watch) 110 | } 111 | 112 | /// Tries to load the file listings for all paths from a cache file named `paths.cache`. 113 | /// 114 | /// This function is used to implement the `--path-cache` option. 115 | pub fn try_load_paths_cache() -> Result> { 116 | let file = match File::open("paths.cache") { 117 | Ok(file) => file, 118 | Err(ref e) if e.kind() == io::ErrorKind::NotFound => return Ok(None), 119 | Err(e) => return Err(e).chain_err(|| ErrorKind::LoadPathsCache)?, 120 | }; 121 | 122 | let mut input = io::BufReader::new(file); 123 | let fetched: Vec<(StorePath, String, FileTree)> = 124 | bincode::deserialize_from(&mut input).chain_err(|| ErrorKind::LoadPathsCache)?; 125 | let workset = WorkSet::from_iter( 126 | fetched 127 | .into_iter() 128 | .map(|(path, nar, tree)| (path.hash().to_string(), Some((path, nar, tree)))), 129 | ); 130 | let watch = workset.watch(); 131 | let stream = workset.map(|r| { 132 | let (_handle, v) = r; 133 | Ok(v) 134 | }); 135 | 136 | Ok(Some((stream, watch))) 137 | } 138 | 139 | pub fn fetch_listings<'a>( 140 | fetcher: &'a Fetcher, 141 | jobs: usize, 142 | nixpkgs: &str, 143 | systems: Vec>, 144 | show_trace: bool, 145 | ) -> Result<(impl FileListingStream + 'a, WorkSetWatch)> { 146 | let mut scopes = vec![None]; 147 | scopes.extend(EXTRA_SCOPES.map(Some)); 148 | 149 | let mut all_queries = vec![]; 150 | for system in systems { 151 | for scope in &scopes { 152 | all_queries.push((system, scope)); 153 | } 154 | } 155 | 156 | // Collect results in parallel. 157 | let all_paths = all_queries 158 | .par_iter() 159 | .flat_map_iter(|&(system, scope)| { 160 | nixpkgs::query_packages(nixpkgs, system, scope.as_deref(), show_trace) 161 | .map(|x| x.chain_err(|| ErrorKind::QueryPackages)) 162 | }) 163 | .collect::>()?; 164 | 165 | Ok(fetch_listings_impl(fetcher, jobs, all_paths)) 166 | } 167 | -------------------------------------------------------------------------------- /src/nixpkgs.rs: -------------------------------------------------------------------------------- 1 | //! Read package information from nix-env. 2 | //! 3 | //! This module implements the gathering of initial set of root store paths to fetch. 4 | //! We parse the output `nix-env --query` to figure out all accessible store paths with their attribute path 5 | //! and hashes. 6 | use std::error; 7 | use std::fmt; 8 | use std::io::{self, Read}; 9 | use std::process::{Child, ChildStdout, Command, Stdio}; 10 | 11 | use xml; 12 | use xml::common::{Position, TextPosition}; 13 | use xml::reader::{EventReader, XmlEvent}; 14 | 15 | use crate::package::{PathOrigin, StorePath}; 16 | 17 | /// Calls `nix-env` to list the packages in the given nixpkgs. 18 | /// 19 | /// The `nixpkgs` argument can either be a path to a nixpkgs checkout or another expression 20 | /// accepted by `nix-env -f`, such as `` or `http://example.org/nixpkgs.tar.bz`. 21 | /// 22 | /// If system is `Some(platform)`, nix-env is called with the `--argstr system ` argument so that 23 | /// the specified platform would be used instead of the default host system platform. 24 | /// 25 | /// If scope is `Some(attr)`, nix-env is called with the `-A attr` argument so only packages that are a member 26 | /// of `attr` are returned. 27 | /// 28 | /// The function returns an Iterator over the packages returned by nix-env. 29 | pub fn query_packages( 30 | nixpkgs: &str, 31 | system: Option<&str>, 32 | scope: Option<&str>, 33 | show_trace: bool, 34 | ) -> PackagesQuery { 35 | let mut cmd = Command::new("nix-env"); 36 | cmd.arg("-qaP") 37 | .arg("--out-path") 38 | .arg("--xml") 39 | .arg("--arg") 40 | .arg("config") 41 | .arg("{ allowAliases = false; }") // override default nixpkgs config discovery 42 | .arg("--arg") 43 | .arg("overlays") 44 | .arg("[ ]") 45 | .arg("--file") 46 | .arg(nixpkgs) 47 | .stdout(Stdio::piped()) 48 | .stderr(Stdio::piped()) 49 | .stdin(Stdio::null()); 50 | 51 | if let Some(system) = system { 52 | cmd.arg("--argstr").arg("system").arg(system); 53 | } 54 | 55 | if let Some(scope) = scope { 56 | cmd.arg("-A").arg(scope); 57 | } 58 | 59 | if show_trace { 60 | cmd.arg("--show-trace"); 61 | } 62 | 63 | PackagesQuery { 64 | parser: None, 65 | child: None, 66 | cmd: Some(cmd), 67 | } 68 | } 69 | 70 | /// An iterator that parses the output of nix-env and returns parsed store paths. 71 | /// 72 | /// Use `query_packages` to create a value of this type. 73 | pub struct PackagesQuery { 74 | parser: Option>, 75 | child: Option, 76 | cmd: Option, 77 | } 78 | 79 | impl PackagesQuery { 80 | /// Spawns the nix-env subprocess and initializes the parser. 81 | /// 82 | /// If the subprocess was already spawned, does nothing. 83 | fn ensure_initialized(&mut self) -> Result<(), Error> { 84 | if let Some(mut cmd) = self.cmd.take() { 85 | let mut child = cmd.spawn()?; 86 | 87 | let stdout = child.stdout.take().expect("should have stdout pipe"); 88 | let parser = PackagesParser::new(stdout); 89 | 90 | self.child = Some(child); 91 | self.parser = Some(parser); 92 | } 93 | Ok(()) 94 | } 95 | 96 | /// Waits for the subprocess to exit and checks whether it has returned a non-zero exit code 97 | /// (= failed with an error). 98 | /// 99 | /// If the exit code was non-zero, returns Some(err), else it returns None. 100 | fn check_error(&mut self) -> Option { 101 | let mut run = || { 102 | let child = match self.child.take() { 103 | Some(c) => c, 104 | None => return Ok(()), 105 | }; 106 | let result = child.wait_with_output()?; 107 | 108 | if !result.status.success() { 109 | let message = String::from_utf8_lossy(&result.stderr); 110 | 111 | return Err(Error::Command(format!( 112 | "nix-env failed with {}:\n{}", 113 | result.status, message, 114 | ))); 115 | } 116 | 117 | Ok(()) 118 | }; 119 | 120 | run().err() 121 | } 122 | } 123 | 124 | impl Iterator for PackagesQuery { 125 | type Item = Result; 126 | 127 | fn next(&mut self) -> Option { 128 | if let Err(e) = self.ensure_initialized() { 129 | return Some(Err(e)); 130 | } 131 | self.parser.take().and_then(|mut parser| { 132 | parser 133 | .next() 134 | .map(|v| { 135 | self.parser = Some(parser); 136 | // When the parser throws an error, we first wait for the subprocess to exit. 137 | // 138 | // If the subprocess returned an error, then the parser probably tried to parse garbage output 139 | // so we will ignore the parser error and instead return the error printed by the subprocess. 140 | v.map_err(|e| self.check_error().unwrap_or_else(|| Error::from(e))) 141 | }) 142 | .or_else(|| { 143 | self.parser = None; 144 | // At the end, we should check if the subprocess exited successfully. 145 | self.check_error().map(Err) 146 | }) 147 | }) 148 | } 149 | } 150 | 151 | /// Parses the XML output of `nix-env` and returns individual store paths. 152 | struct PackagesParser { 153 | events: EventReader, 154 | current_item: Option<(String, String)>, 155 | } 156 | 157 | /// A parser error that may occur during parsing `nix-env`'s output. 158 | #[derive(Debug)] 159 | pub struct ParserError { 160 | position: TextPosition, 161 | kind: ParserErrorKind, 162 | } 163 | 164 | /// Enumerates all possible error kinds that may occur during parsing. 165 | #[derive(Debug)] 166 | pub enum ParserErrorKind { 167 | /// Found an element with the tag `element_name` that should only occur inside 168 | /// elements with the tag `expected_parent` but it occurred as child of a different parent. 169 | MissingParent { 170 | element_name: String, 171 | expected_parent: String, 172 | }, 173 | 174 | /// An element occurred as a child of `found_parent`, but 175 | /// we know that elements with the tag `element_name` should never have that as 176 | /// a parent. 177 | ParentNotAllowed { 178 | element_name: String, 179 | found_parent: String, 180 | }, 181 | 182 | /// The required attribute `attribute_name` was missing on an element with the tag `element_name`. 183 | MissingAttribute { 184 | element_name: String, 185 | attribute_name: String, 186 | }, 187 | 188 | /// Found the end tag for `element_name` without a matching start tag. 189 | MissingStartTag { element_name: String }, 190 | 191 | /// An XML syntax error. 192 | XmlError { error: xml::reader::Error }, 193 | 194 | /// A store path in the output of `nix-env` could not be parsed. All valid store paths 195 | /// need to match the format `$(STOREDIR)$(HASH)-$(NAME)`. 196 | InvalidStorePath { path: String }, 197 | } 198 | 199 | impl fmt::Display for ParserError { 200 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 201 | use self::ParserErrorKind::*; 202 | write!(f, "error at {}: ", self.position)?; 203 | match self.kind { 204 | MissingParent { 205 | ref element_name, 206 | ref expected_parent, 207 | } => { 208 | write!( 209 | f, 210 | "element {} appears outside of expected parent {}", 211 | element_name, expected_parent 212 | ) 213 | } 214 | ParentNotAllowed { 215 | ref element_name, 216 | ref found_parent, 217 | } => { 218 | write!( 219 | f, 220 | "element {} must not appear as child of {}", 221 | element_name, found_parent 222 | ) 223 | } 224 | MissingAttribute { 225 | ref element_name, 226 | ref attribute_name, 227 | } => { 228 | write!( 229 | f, 230 | "element {} must have an attribute named {}", 231 | element_name, attribute_name 232 | ) 233 | } 234 | MissingStartTag { ref element_name } => { 235 | write!(f, "element {} does not have a start tag", element_name) 236 | } 237 | XmlError { ref error } => write!(f, "document not well-formed: {}", error), 238 | InvalidStorePath { ref path } => { 239 | write!( 240 | f, 241 | "store path does not match expected format /prefix/hash-name: {}", 242 | path 243 | ) 244 | } 245 | } 246 | } 247 | } 248 | 249 | impl PackagesParser { 250 | /// Creates a new parser that reads the `nix-env` XML output from the given reader. 251 | pub fn new(reader: R) -> PackagesParser { 252 | PackagesParser { 253 | events: EventReader::new(reader), 254 | current_item: None, 255 | } 256 | } 257 | 258 | /// Shorthand for exiting with an error at the current position. 259 | fn err(&self, kind: ParserErrorKind) -> ParserError { 260 | ParserError { 261 | position: self.events.position(), 262 | kind, 263 | } 264 | } 265 | 266 | /// Tries to read the next `StorePath` from the reader or fail with an error 267 | /// if there was a parse failure. 268 | /// 269 | /// Returns Ok(None) if the end of the stream was reached. 270 | /// 271 | /// This function is like `.next` from `Iterator`, but allows us to use `try! / ?` since it 272 | /// returns `Result, ...>` instead of `Option>`. 273 | fn next_err(&mut self) -> Result, ParserError> { 274 | use self::ParserErrorKind::*; 275 | use self::XmlEvent::*; 276 | 277 | loop { 278 | let event = self 279 | .events 280 | .next() 281 | .map_err(|e| self.err(XmlError { error: e }))?; 282 | match event { 283 | StartElement { 284 | name: element_name, 285 | attributes, 286 | .. 287 | } => { 288 | if element_name.local_name == "item" { 289 | if self.current_item.is_some() { 290 | return Err(self.err(ParentNotAllowed { 291 | element_name: "item".to_string(), 292 | found_parent: "item".to_string(), 293 | })); 294 | } 295 | 296 | let mut attr_path = None; 297 | let mut system = None; 298 | 299 | for attr in attributes { 300 | if attr.name.local_name == "attrPath" { 301 | attr_path = Some(attr.value); 302 | continue; 303 | } 304 | 305 | if attr.name.local_name == "system" { 306 | system = Some(attr.value); 307 | continue; 308 | } 309 | } 310 | 311 | let attr_path = attr_path.ok_or_else(|| { 312 | self.err(MissingAttribute { 313 | element_name: "item".into(), 314 | attribute_name: "attrPath".into(), 315 | }) 316 | })?; 317 | 318 | let system = system.ok_or_else(|| { 319 | self.err(MissingAttribute { 320 | element_name: "item".into(), 321 | attribute_name: "system".into(), 322 | }) 323 | })?; 324 | 325 | self.current_item = Some((attr_path, system)); 326 | continue; 327 | } 328 | 329 | if element_name.local_name == "output" { 330 | if let Some((item, system)) = self.current_item.clone() { 331 | let mut output_name = None; 332 | let mut output_path = None; 333 | 334 | for attr in attributes { 335 | if attr.name.local_name == "name" { 336 | output_name = Some(attr.value); 337 | continue; 338 | } 339 | 340 | if attr.name.local_name == "path" { 341 | output_path = Some(attr.value); 342 | continue; 343 | } 344 | } 345 | 346 | let output_name = output_name.ok_or_else(|| { 347 | self.err(MissingAttribute { 348 | element_name: "output".into(), 349 | attribute_name: "name".into(), 350 | }) 351 | })?; 352 | 353 | let output_path = output_path.ok_or_else(|| { 354 | self.err(MissingAttribute { 355 | element_name: "output".into(), 356 | attribute_name: "path".into(), 357 | }) 358 | })?; 359 | 360 | let origin = PathOrigin { 361 | attr: item, 362 | output: output_name, 363 | toplevel: true, 364 | system: Some(system), 365 | }; 366 | let store_path = StorePath::parse(origin, &output_path); 367 | let store_path = store_path 368 | .ok_or_else(|| self.err(InvalidStorePath { path: output_path }))?; 369 | 370 | return Ok(Some(store_path)); 371 | } else { 372 | return Err(self.err(MissingParent { 373 | element_name: "output".into(), 374 | expected_parent: "item".into(), 375 | })); 376 | } 377 | } 378 | } 379 | 380 | EndElement { name: element_name } => { 381 | if element_name.local_name == "item" { 382 | if self.current_item.is_none() { 383 | return Err(self.err(MissingStartTag { 384 | element_name: "item".into(), 385 | })); 386 | } 387 | self.current_item = None 388 | } 389 | } 390 | 391 | EndDocument => break, 392 | 393 | _ => {} 394 | } 395 | } 396 | 397 | Ok(None) 398 | } 399 | } 400 | 401 | impl Iterator for PackagesParser { 402 | type Item = Result; 403 | 404 | fn next(&mut self) -> Option> { 405 | match self.next_err() { 406 | Err(e) => Some(Err(e)), 407 | Ok(Some(i)) => Some(Ok(i)), 408 | Ok(None) => None, 409 | } 410 | } 411 | } 412 | 413 | /// Enumeration of all the possible errors that may happen during querying the packages. 414 | #[derive(Debug)] 415 | pub enum Error { 416 | /// Parsing of the output failed 417 | Parse(ParserError), 418 | 419 | /// An IO error occurred 420 | Io(io::Error), 421 | 422 | /// nix-env failed with an error message 423 | Command(String), 424 | } 425 | 426 | impl error::Error for Error { 427 | fn description(&self) -> &str { 428 | match *self { 429 | Error::Parse(_) => "nix-env output parse error", 430 | Error::Io(_) => "io error", 431 | Error::Command(_) => "nix-env error", 432 | } 433 | } 434 | } 435 | 436 | impl fmt::Display for Error { 437 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 438 | use self::Error::*; 439 | match *self { 440 | Parse(ref e) => write!(f, "parsing XML output of nix-env failed: {}", e), 441 | Io(ref e) => write!(f, "IO error: {}", e), 442 | Command(ref e) => write!(f, "nix-env failed with error: {}", e), 443 | } 444 | } 445 | } 446 | 447 | impl From for Error { 448 | fn from(err: io::Error) -> Error { 449 | Error::Io(err) 450 | } 451 | } 452 | 453 | impl From for Error { 454 | fn from(err: ParserError) -> Error { 455 | Error::Parse(err) 456 | } 457 | } 458 | -------------------------------------------------------------------------------- /src/package.rs: -------------------------------------------------------------------------------- 1 | //! Data types for representing meta information about packages and store paths. 2 | //! 3 | //! The main data type in this `StorePath`, which represents a single output of 4 | //! some nix derivation. We also sometimes call a `StorePath` a package, to avoid 5 | //! confusion with file paths. 6 | use std::borrow::Cow; 7 | use std::io::{self, Write}; 8 | use std::str; 9 | 10 | use serde::{Deserialize, Serialize}; 11 | 12 | /// A type for describing how to reach a given store path. 13 | /// 14 | /// When building an index, we collect store paths from various sources, such 15 | /// as the output of nix-env -qa and the references of those store paths. 16 | /// 17 | /// To show the user how we reached a given store path, each store path tracks 18 | /// its origin. For example, for top-level store paths, we know which attribute 19 | /// of nixpkgs builds this store path. 20 | #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)] 21 | pub struct PathOrigin { 22 | /// The attribute of nixpkgs that lead to this store path being discovered. 23 | /// 24 | /// If the store path is a top-level path, then the store path corresponds 25 | /// to an output of the derivation assigned to this attribute path. 26 | pub attr: String, 27 | 28 | /// The output of the derivation specified by `attr` that we want to refer to. 29 | /// 30 | /// If a derivation does not support multiple outputs, then this should just be "out", 31 | /// the default output. 32 | pub output: String, 33 | 34 | /// Indicates that this path is listed in the output of nix-env -qaP --out-name. 35 | /// 36 | /// We may index paths for which we do not know the exact attribute path. In this 37 | /// case, `attr` and `output` will be set to the values for the top-level path that 38 | /// contains the path in its closure. (This is also how we discovered the path in the 39 | /// first place: through being referenced by another, top-level path). It is unspecified 40 | /// which top-level path they will refer to though if there exist multiple ones whose closure 41 | /// contains this path. 42 | pub toplevel: bool, 43 | 44 | /// Target system 45 | pub system: Option, 46 | } 47 | 48 | impl PathOrigin { 49 | /// Encodes a path origin as a sequence of bytes, such that it can be decoed using `decode`. 50 | /// 51 | /// The encoding does not use the bytes `0x00` nor `0x01`, as long as neither `attr` nor `output` 52 | /// contain them. This is important since it allows the result to be encoded with [frcode](mod.frcode.html). 53 | /// 54 | /// # Panics 55 | /// 56 | /// The `attr` and `output` of the path origin must not contain the byte value `0x02`, otherwise 57 | /// this function panics. 58 | /// 59 | /// # Errors 60 | /// 61 | /// Returns any errors that were encountered while writing to the supplied `Writer`. 62 | pub fn encode(&self, writer: &mut W) -> io::Result<()> { 63 | assert!( 64 | !self.attr.contains('\x02'), 65 | "origin attribute path must not contain the byte value 0x02 anywhere" 66 | ); 67 | assert!( 68 | !self.output.contains('\x02'), 69 | "origin output name must not contain the byte value 0x02 aynwhere" 70 | ); 71 | write!( 72 | writer, 73 | "{}\x02{}{}", 74 | self.attr, 75 | self.output, 76 | if self.toplevel { "" } else { "\x02" } 77 | )?; 78 | Ok(()) 79 | } 80 | 81 | /// Decodes a path that was encoded by `encode` function of this trait. 82 | /// 83 | /// Returns the decoded path origin, or `None` if `buf` could not be decoded as path origin. 84 | pub fn decode(buf: &[u8]) -> Option { 85 | let mut iter = buf.splitn(2, |c| *c == b'\x02'); 86 | iter.next() 87 | .and_then(|v| String::from_utf8(v.to_vec()).ok()) 88 | .and_then(|attr| { 89 | iter.next() 90 | .and_then(|v| String::from_utf8(v.to_vec()).ok()) 91 | .map(|mut output| { 92 | let mut toplevel = true; 93 | if let Some(l) = output.pop() { 94 | if l == '\x02' { 95 | toplevel = false 96 | } else { 97 | output.push(l) 98 | } 99 | } 100 | PathOrigin { 101 | attr, 102 | output, 103 | toplevel, 104 | system: None, 105 | } 106 | }) 107 | }) 108 | } 109 | } 110 | 111 | /// Represents a store path which is something that is produced by `nix-build`. 112 | /// 113 | /// A store path represents an output in the nix store, matching the pattern 114 | /// `store_dir/hash-name` (most often, `store_dir` will be `/nix/store`). 115 | /// 116 | /// Using nix, a store path can be produced by calling `nix-build`. 117 | /// 118 | /// Note that even if a store path is a directory, the files inside that directory 119 | /// themselves are *not* store paths. For example, while the following is a store path: 120 | /// 121 | /// ```text 122 | /// /nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5 123 | /// ```` 124 | /// 125 | /// while this is not: 126 | /// 127 | /// ```text 128 | /// /nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5/bin/ 129 | /// ``` 130 | /// 131 | /// To avoid any confusion with file paths, we sometimes also refer to a store path as a *package*. 132 | #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)] 133 | pub struct StorePath { 134 | store_dir: String, 135 | hash: String, 136 | name: String, 137 | origin: PathOrigin, 138 | } 139 | 140 | impl StorePath { 141 | /// Parse a store path from an absolute file path. 142 | /// 143 | /// Since this function does not know where that path comes from, it takes 144 | /// `origin` as an argument. 145 | /// 146 | /// This function returns `None` if the path could not be parsed as a 147 | /// store path. You should not rely on that to check whether a path is a store 148 | /// path though, since it only does minimal validation (for one example, it does 149 | /// not check the length of the hash). 150 | pub fn parse(origin: PathOrigin, path: &str) -> Option { 151 | let mut parts = path.splitn(2, '-'); 152 | parts.next().and_then(|prefix| { 153 | parts.next().and_then(|name| { 154 | let mut iter = prefix.rsplitn(2, '/'); 155 | iter.next().map(|hash| { 156 | let store_dir = iter.next().unwrap_or(""); 157 | StorePath { 158 | store_dir: store_dir.to_string(), 159 | hash: hash.to_string(), 160 | name: name.to_string(), 161 | origin, 162 | } 163 | }) 164 | }) 165 | }) 166 | } 167 | 168 | /// Encodes a store path as a sequence of bytes, so that it can be decoded with `decode`. 169 | /// 170 | /// The encoding does not use the bytes `0x00` nor `0x01`, as long as none of the fields of 171 | /// this path contain those bytes (this includes `store_dir`, `hash`, `name` and `origin`). 172 | /// This is important since it allows the result to be encoded with [frcode](mod.frcode.html). 173 | /// 174 | /// # Panics 175 | /// 176 | /// The `attr` and `output` of the path origin must not contain the byte value `0x02`, otherwise 177 | /// this function panics. 178 | pub fn encode(&self) -> io::Result> { 179 | let mut result = Vec::with_capacity(self.as_str().len()); 180 | result.extend(self.as_str().bytes()); 181 | result.push(b'\n'); 182 | self.origin().encode(&mut result)?; 183 | Ok(result) 184 | } 185 | 186 | pub fn decode(buf: &[u8]) -> Option { 187 | let mut parts = buf.splitn(2, |c| *c == b'\n'); 188 | parts 189 | .next() 190 | .and_then(|v| str::from_utf8(v).ok()) 191 | .and_then(|path| { 192 | parts 193 | .next() 194 | .and_then(PathOrigin::decode) 195 | .and_then(|origin| StorePath::parse(origin, path)) 196 | }) 197 | } 198 | 199 | /// Returns the name of the store path, which is the part of the file name that 200 | /// is not the hash. In the above example, it would be `bash-4.4-p5`. 201 | /// 202 | /// # Example 203 | /// 204 | /// ``` 205 | /// use nix_index::package::{PathOrigin, StorePath}; 206 | /// 207 | /// let origin = PathOrigin { attr: "dummy".to_string(), output: "out".to_string(), toplevel: true, system: None }; 208 | /// let store_path = StorePath::parse(origin, "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5").unwrap(); 209 | /// assert_eq!(&store_path.name(), "bash-4.4-p5"); 210 | /// ``` 211 | pub fn name(&self) -> Cow { 212 | Cow::Borrowed(&self.name) 213 | } 214 | 215 | /// The hash of the store path. This is the part just before the name of 216 | /// the path. 217 | /// 218 | /// # Example 219 | /// 220 | /// ``` 221 | /// use nix_index::package::{PathOrigin, StorePath}; 222 | /// 223 | /// let origin = PathOrigin { attr: "dummy".to_string(), output: "out".to_string(), toplevel: true, system: None }; 224 | /// let store_path = StorePath::parse(origin, "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5").unwrap(); 225 | /// assert_eq!(&store_path.name(), "bash-4.4-p5"); 226 | /// ``` 227 | pub fn hash(&self) -> Cow { 228 | Cow::Borrowed(&self.hash) 229 | } 230 | 231 | /// The store dir for which this store path was built. 232 | /// 233 | /// Currently, this will be `/nix/store` in almost all cases, but 234 | /// we include it here anyway for completeness. 235 | /// 236 | /// # Example 237 | /// 238 | /// ``` 239 | /// use nix_index::package::{PathOrigin, StorePath}; 240 | /// 241 | /// let origin = PathOrigin { attr: "dummy".to_string(), output: "out".to_string(), toplevel: true, system: None }; 242 | /// let store_path = StorePath::parse(origin, "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5").unwrap(); 243 | /// assert_eq!(&store_path.store_dir(), "/nix/store"); 244 | /// ``` 245 | pub fn store_dir(&self) -> Cow { 246 | Cow::Borrowed(&self.store_dir) 247 | } 248 | 249 | /// Converts the store path back into an absolute path. 250 | /// 251 | /// # Example 252 | /// 253 | /// ``` 254 | /// use nix_index::package::{PathOrigin, StorePath}; 255 | /// 256 | /// let origin = PathOrigin { attr: "dummy".to_string(), output: "out".to_string(), toplevel: true, system: None }; 257 | /// let store_path = StorePath::parse(origin, "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5").unwrap(); 258 | /// assert_eq!(&store_path.as_str(), "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5"); 259 | /// ``` 260 | pub fn as_str(&self) -> Cow { 261 | Cow::Owned(format!("{}/{}-{}", self.store_dir, self.hash, self.name)) 262 | } 263 | 264 | /// Returns the origin that describes how we discovered this store path. 265 | /// 266 | /// See the documentation of `PathOrigin` for more information about this field. 267 | /// 268 | /// # Example 269 | /// 270 | /// ``` 271 | /// use nix_index::package::{PathOrigin, StorePath}; 272 | /// 273 | /// let origin = PathOrigin { attr: "dummy".to_string(), output: "out".to_string(), toplevel: true, system: None }; 274 | /// let store_path = StorePath::parse(origin.clone(), "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5").unwrap(); 275 | /// assert_eq!(store_path.origin().as_ref(), &origin); 276 | /// ``` 277 | pub fn origin(&self) -> Cow { 278 | Cow::Borrowed(&self.origin) 279 | } 280 | } 281 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | //! Small but reusable helper functions. 2 | use std::env; 3 | use std::fs::OpenOptions; 4 | use std::io::{self, Write}; 5 | use std::path::PathBuf; 6 | 7 | /// Writes a file to the temp directory with a name that is made of the supplied 8 | /// base and a suffix if a file with that name already exists. 9 | /// 10 | /// Returns the path of the file if the file was written successfully, None otherwise. 11 | /// None means that an IO error occurred during writing the file. 12 | pub fn write_temp_file(base_name: &str, contents: &[u8]) -> Option { 13 | let mut path = None; 14 | for i in 0.. { 15 | let mut this_path = env::temp_dir(); 16 | if i == 0 { 17 | this_path.push(base_name); 18 | } else { 19 | this_path.push(format!("{}.{}", base_name, i)); 20 | } 21 | let temp_file = OpenOptions::new() 22 | .write(true) 23 | .create_new(true) 24 | .open(&this_path); 25 | match temp_file { 26 | Ok(mut file) => { 27 | path = file.write_all(contents).map(|_| this_path).ok(); 28 | break; 29 | } 30 | Err(e) => { 31 | if e.kind() != io::ErrorKind::AlreadyExists { 32 | break; 33 | } 34 | } 35 | } 36 | } 37 | path 38 | } 39 | -------------------------------------------------------------------------------- /src/workset.rs: -------------------------------------------------------------------------------- 1 | //! A task queue where the processing of tasks can generate additional subtasks. 2 | //! 3 | //! This module implements a stream where the consumer of the stream can request 4 | //! additional items to be added to the stream. An example where this is useful 5 | //! is fetching a package including all the transitive dependencies: we start 6 | //! with a stream that just yields the package we want to fetch. The consumer can 7 | //! then fetch a package and add all dependencies of that package to the stream, 8 | //! adding them to the set of packages that need to be fetched. 9 | //! 10 | //! The data structure is called a work set because it allows assigning a key to 11 | //! each item to avoid duplicates. A new item will only be added if no prior item 12 | //! had the same key. 13 | //! 14 | //! # Example 15 | //! 16 | //! ```rust 17 | //! extern crate futures; 18 | //! extern crate nix_index; 19 | //! 20 | //! use futures::{Stream, stream::StreamExt}; 21 | //! use nix_index::workset::{WorkSet}; 22 | //! use std::iter::{self, FromIterator}; 23 | //! 24 | //! #[derive(Clone)] 25 | //! struct Package { 26 | //! name: String, 27 | //! dependencies: Vec, 28 | //! } 29 | //! 30 | //! fn main() { 31 | //! // set up some data 32 | //! let pkgA = Package { name: "a".to_string(), dependencies: vec![] }; 33 | //! let pkgB = Package { name: "b".to_string(), dependencies: vec![] }; 34 | //! let pkgC = Package { name: "c".to_string(), dependencies: vec![pkgA.clone(), pkgB] }; 35 | //! let pkgD = Package { name: "d".to_string(), dependencies: vec![pkgA, pkgC] }; 36 | //! 37 | //! // construct a workset that has `pkgD` as initial item. 38 | //! let workset = WorkSet::from_iter(iter::once((pkgD.name.clone(), pkgD))); 39 | //! 40 | //! // fetch the names of all transitive dependencies of `pkgD`. In real cases, 41 | //! // this would probably perform some network requests or other IO with futures. 42 | //! let all_packages = workset.map(|(mut handle, pkg)| { 43 | //! let Package { name, dependencies } = pkg; 44 | //! // add all dependencies to the workset 45 | //! for pkg in dependencies { 46 | //! handle.add_work(pkg.name.clone(), pkg); 47 | //! } 48 | //! name 49 | //! }); 50 | //! 51 | //! // all_packages is now a stream of all the names of the transitive dependencies of pkgD 52 | //! // and pkgD itself 53 | //! } 54 | //! ``` 55 | use std::cell::RefCell; 56 | use std::collections::HashSet; 57 | use std::hash::Hash; 58 | use std::iter::FromIterator; 59 | use std::pin::Pin; 60 | use std::rc::{Rc, Weak}; 61 | use std::task::{Context, Poll}; 62 | 63 | use futures::Stream; 64 | use indexmap::IndexMap; 65 | 66 | /// This structure holds the internal state of our queue. 67 | struct Shared { 68 | /// The set of keys that have already been added to the queue sometime in the past. 69 | /// Any item whose key is in this set does not need to be added again. 70 | seen: HashSet, 71 | 72 | /// The map of items that still need to be processed. As long as this is non-empty, 73 | /// there is still work remaining. 74 | queue: IndexMap, 75 | } 76 | 77 | impl Shared { 78 | /// Add a task to the work queue if the given key still needs to be processed. 79 | /// Returns `true` if a new item was added, `false` otherwise. 80 | fn insert(&mut self, k: K, v: V) -> bool { 81 | use indexmap::map::Entry::*; 82 | if !self.seen.contains(&k) { 83 | match self.queue.entry(k) { 84 | Occupied(_) => return false, 85 | Vacant(e) => { 86 | e.insert(v); 87 | return true; 88 | } 89 | } 90 | } 91 | false 92 | } 93 | } 94 | 95 | /// A queue where the consumer can request new items to be added to the queue. 96 | /// 97 | /// To construct a new instance of this type, use `WorkSet::from_iter`. 98 | /// 99 | /// The queue terminates if there is no work left that need processing and all 100 | /// `WorkSetHandle`s have been dropped (if there are `WorkSetHandle`s alive 101 | /// then it is still possible to call `add_work`, so the stream cannot end even 102 | /// if there is no work item available at the current time). 103 | pub struct WorkSet { 104 | /// A reference to the state of the queue. 105 | /// This reference is shared with all `WorkSetHandle`s. 106 | state: Rc>>, 107 | } 108 | 109 | /// A work set handle allows you to add new items to the queue. 110 | /// 111 | /// As long as there are still `WorkSetHandle`s alive, the queue 112 | /// will not terminate. 113 | pub struct WorkSetHandle { 114 | state: Rc>>, 115 | } 116 | 117 | impl WorkSetHandle { 118 | /// Adds a new item to the queue but only if this is 119 | /// the first time an item with the specified key is added. 120 | /// 121 | /// Returns `true` if this was a new item and therefore new work 122 | /// was added to the queue or `false` if there already was an item for 123 | /// the given key. 124 | pub fn add_work(&mut self, key: K, work: V) -> bool { 125 | self.state.borrow_mut().insert(key, work) 126 | } 127 | } 128 | 129 | /// An observer for `WorkSet` that provides status information 130 | /// about the queue. 131 | /// 132 | /// Note that this trait is not dependent on the type of items or keys 133 | /// in the work set, as it only provides meta information about the queue. 134 | pub trait WorkSetObserver { 135 | /// Returns the number of items in the queue that still need processing. 136 | fn queue_len(&self) -> usize; 137 | } 138 | 139 | /// A work set watch is any implementation of a `WorkSetObserver`. 140 | /// 141 | /// The watch not prevent the queue from terminating. If the queue has already 142 | /// terminated, the number of remaining items will be zero. 143 | pub type WorkSetWatch = Box; 144 | 145 | /// This is a concrete implementation of a `WorkSetObserver`. 146 | /// 147 | /// The indirection through the `WorkSetObserver` trait and `WorkSetWatch` type is 148 | /// necessary to allow hiding the concrete types `K` and `V` of the queue. 149 | /// Hiding the concrete types makes the interface much nicer. 150 | #[derive(Clone)] 151 | struct WorkSetObserverImpl { 152 | /// A weak reference to the queue state. The reference is weak 153 | /// so that the the observer does not prevent the queue from terminating. 154 | state: Weak>>, 155 | } 156 | 157 | impl WorkSetObserver for WorkSetObserverImpl { 158 | fn queue_len(&self) -> usize { 159 | self.state 160 | .upgrade() 161 | .map_or(0, |shared: Rc>>| { 162 | shared.as_ref().borrow().queue.len() 163 | }) 164 | } 165 | } 166 | 167 | impl WorkSet { 168 | /// Returns a watch for this work set that provides status information. 169 | pub fn watch(&self) -> WorkSetWatch { 170 | Box::new(WorkSetObserverImpl { 171 | state: Rc::downgrade(&self.state), 172 | }) 173 | } 174 | 175 | /// Constructs a new work set with the given initial work items. 176 | pub fn from_queue(queue: IndexMap) -> Self { 177 | let shared = Shared { 178 | seen: HashSet::new(), 179 | queue, 180 | }; 181 | 182 | Self { 183 | state: Rc::new(RefCell::new(shared)), 184 | } 185 | } 186 | } 187 | 188 | impl FromIterator<(K, V)> for WorkSet { 189 | fn from_iter>(iter: I) -> WorkSet { 190 | Self::from_queue(IndexMap::from_iter(iter)) 191 | } 192 | } 193 | 194 | /// A work set implements the `Stream` trait. The stream will produce the work 195 | /// that still needs processing. Along with every work item it also provides 196 | /// a handle to the queue that allows the consumer to add more items to the queue. 197 | /// 198 | /// The stream ends if the queue terminates, see the documentation of `WorkSet` 199 | /// for when exactly that happens. 200 | impl Stream for WorkSet { 201 | type Item = (WorkSetHandle, V); 202 | 203 | fn poll_next(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll> { 204 | let (k, v) = match self.state.borrow_mut().queue.pop() { 205 | Some(e) => e, 206 | None => { 207 | return if Rc::strong_count(&self.state) == 1 { 208 | Poll::Ready(None) 209 | } else { 210 | Poll::Pending 211 | } 212 | } 213 | }; 214 | 215 | self.state.borrow_mut().seen.insert(k); 216 | let handle = WorkSetHandle { 217 | state: self.state.clone(), 218 | }; 219 | Poll::Ready(Some((handle, v))) 220 | } 221 | } 222 | --------------------------------------------------------------------------------