├── .github
    └── workflows
    │   └── rust.yml
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── command-not-found.nu
├── command-not-found.sh
├── default.nix
├── examples
    └── nix-index-debug.rs
├── flake.lock
├── flake.nix
├── recover.py
├── rustfmt.toml
└── src
    ├── bin
        ├── nix-channel-index.rs
        ├── nix-index.rs
        └── nix-locate.rs
    ├── database.rs
    ├── errors.rs
    ├── files.rs
    ├── frcode.rs
    ├── hydra.rs
    ├── lib.rs
    ├── listings.rs
    ├── nixpkgs.rs
    ├── package.rs
    ├── util.rs
    └── workset.rs


/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master]
 6 |   pull_request:
 7 |     branches: [master]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ${{ matrix.os }}
15 |     strategy:
16 |       matrix:
17 |         os: [macos-latest, ubuntu-latest]
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v3
21 |       - name: Build
22 |         run: cargo build --verbose
23 |       - name: Run tests
24 |         run: cargo test --verbose
25 | 
26 |   format:
27 |     runs-on: ubuntu-latest
28 |     steps:
29 |       - uses: actions/checkout@v3
30 |       - name: Cargo fmt
31 |         run: |
32 |           rustup toolchain install nightly --profile minimal -c rustfmt
33 |           cargo +nightly fmt --check
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | /data
3 | /result
4 | /target-*
5 | .direnv/
6 | .envrc
7 | *.sqlite
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: rust
2 | rust:
3 |   - stable
4 |   - beta
5 |   - nightly
6 | matrix:
7 |   allow_failures:
8 |     - rust: nightly
9 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## 0.1.9 - [Unreleased]
 2 | ### Added
 3 | ### Fixed
 4 | ### Changed
 5 | 
 6 | ## 0.1.8
 7 | ### Added
 8 | 
 9 | * use HTTPS for cache.nixos.org (#246)
10 | * client now use reqwest to obtain HTTPS, content decoding, timeouts and more for free (#247)
11 | 
12 | ### Fixed
13 | 
14 | * nix-community buildbot CI builds all attributes out of the box (#240)
15 | 
16 | ### Changed
17 | 
18 | ### Removed
19 | 
20 | ## 0.1.7
21 | ### Added
22 | * improve help message for `nix-locate --type` (issue #204)
23 | * improve error message when `nix-env` is killed by a signal
24 | ### Fixed
25 | * disable nixpkgs overlays (issue #161)
26 | * fix Nix command suggestions for command-not-found.sh (issue #185)
27 | ### Changed
28 | * update dependencies
29 | ### Removed
30 | 
31 | ## 0.1.6
32 | ### Added
33 | * support setting `NIX_INDEX_DATABASE` environment variable to set database location (#213), thanks @mmarx
34 | * this version and future versions will be published to https://crates.io, simplifying library usage in other projects
35 | ### Fixed
36 | ### Changed
37 | ### Removed
38 | 
39 | ## 0.1.5
40 | ### Added
41 | ### Fixed
42 | * fix crash when using wildcard pattern with nix-locate (issue #205)
43 | ### Changed
44 | ### Removed
45 | 
46 | ## 0.1.4 - 2023-01-13
47 | ### Added
48 | ### Fixed
49 | * fix RUSTSEC-2021-0131 (integer overflow in brotli) by migrating away from `brotli2` crate
50 | * fix RUSTSEC-2022-0006 (data race in `thread_local`) by updating `thread_local`
51 | * fix panic when using `--type` CLI (issue #202)
52 | ### Changed
53 | * update all dependencies in Cargo.lock
54 | 
55 | ### 0.1.3 - 2023-01-10
56 | ### Added
57 | * flake.nix added to repository, allows directly running nix-index from git (#162), thanks @matthewbauer
58 | * support for proxies (#132), thanks @whizsid
59 | * command-not-found.sh suggests new `nix profile` command if manifest.json exists (#135), thanks @matthewbauer
60 | * support building project via Nix on Darwin (#175), thanks @BrianHicks
61 | * indexer supports prefix filtering (#177), rhanks @virchau13
62 | * command-line option to specify system for which to build the index (#183), thanks @usertam
63 | * nix-channel-index: new command to build a programs.sqlite as currently distributed with nix channels (#192), thanks @K900
64 | ### Fixed
65 | * command-not-found.sh never accesses undefined variables anymore (allows set -u) (#123), thanks @matthewbauer
66 | * support xlibs renamed to xorg in recent nixpkgs (#179), thanks @cole-h
67 | ### Changed
68 | * rust dependencies updated to latest versions, thanks @elude03, @berbiche, @Sciecentistguy, @Mic92
69 | * nix-env is now invoked in parallel to query paths (improves performance)
70 | * performance improvement: multithread compression (#152), thanks @enolan
71 | * performance improvement: reduce compression level from 22 to 19 (#152), thanks @enolan
72 | * performance improvement: get store paths from nix-env in parallel (#152), thanks @enolan
73 | 
74 | ## 0.1.2 - 2018-09-18
75 | ### Added
76 | ### Fixed
77 | * don't stop when a single request fails (thanks @jameysharp)
78 | ### Changed
79 | ### Removed
80 | 
81 | ## 0.1.1 - 2018-01-26
82 | ### Added
83 | * `--show-trace` command line option
84 | ### Fixed
85 | ### Changed
86 | ### Removed
87 | 
88 | ## 0.1.0 - 2017-07-22
89 | ### Added
90 | * Initial release
91 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | description = "Nix (package manager) indexing primitives"
 3 | authors = ["Benno Fünfstück <benno.fuenfstueck@gmail.com>"]
 4 | edition = "2021"
 5 | name = "nix-index"
 6 | version = "0.1.9"
 7 | license = "BSD-3-Clause"
 8 | homepage = "https://github.com/nix-community/nix-index"
 9 | repository = "https://github.com/nix-community/nix-index"
10 | readme = "README.md"
11 | keywords = [ "nixpkgs", "nixos", "nix", "indexing" ]
12 | include = [
13 |   "examples/",
14 |   "src/*.rs",
15 |   "src/bin/*.rs",
16 |   "Cargo.toml",
17 |   "README.md",
18 |   "LICENSE"
19 | ]
20 | 
21 | [[bin]]
22 | doc = false
23 | name = "nix-index"
24 | 
25 | [[bin]]
26 | name = "nix-locate"
27 | 
28 | [dependencies]
29 | bincode = "1.3.3"
30 | byteorder = "1.5.0"
31 | error-chain = "0.12.4"
32 | futures = "0.3.30"
33 | grep = "0.3.1"
34 | atty = "0.2.14"
35 | memchr = "2.7.2"
36 | num_cpus = "1.16.0"
37 | indexmap = "2.2.6"
38 | owo-colors = { version = "4.0.0", features = ["supports-colors"] }
39 | rayon = "1.10.0"
40 | regex = "1.10.4"
41 | regex-syntax = "0.7.4"
42 | reqwest = { version = "0.12.3", features = [ "brotli" ] }
43 | separator = "0.4.1"
44 | serde = { version = "1.0.198", features = [ "derive" ] }
45 | serde_bytes = "0.11.14"
46 | serde_json = "1.0.116"
47 | tokio-retry = "0.3.0"
48 | xdg = "2.5.2"
49 | xml-rs = "0.8.20"
50 | xz2 = "0.1.7"
51 | zstd = { version = "0.12.4", features = [ "zstdmt" ] }
52 | 
53 | [dependencies.hyper]
54 | features = ["client", "http1", "http2", "runtime", "stream"]
55 | version = "0.14.27"
56 | 
57 | 
58 | [dependencies.tokio]
59 | features = ["full"]
60 | version = "1.32.0"
61 | 
62 | [dependencies.clap]
63 | version = "4.3.24"
64 | features = ["derive", "env"]
65 | 
66 | [dependencies.rusqlite]
67 | features = ["backup"]
68 | version = "0.31.0"
69 | 
70 | [[example]]
71 | name = "nix-index-debug"
72 | 
73 | [profile]
74 | [profile.release]
75 | debug = true
76 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2017 Benno Fünfstück
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 |    notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 |    notice, this list of conditions and the following disclaimer in the
14 |    documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the author nor the names of his contributors
17 |    may be used to endorse or promote products derived from this software
18 |    without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
21 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 | POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # nix-index
  2 | ## A files database for nixpkgs
  3 | **nix-index** is a tool to quickly locate the package providing a certain file in [`nixpkgs`](https://github.com/NixOS/nixpkgs). It indexes built derivations found in binary caches. 
  4 | 
  5 | ###### Demo
  6 | 
  7 | ```
  8 | $ nix-locate 'bin/hello'
  9 | hello.out                                        29,488 x /nix/store/bdjyhh70npndlq3rzmggh4f2dzdsj4xy-hello-2.10/bin/hello
 10 | linuxPackages_4_4.dpdk.examples               2,022,224 x /nix/store/jlnk3d38zsk0bp02rp9skpqk4vjfijnn-dpdk-16.07.2-4.4.52-examples/bin/helloworld
 11 | linuxPackages.dpdk.examples                   2,022,224 x /nix/store/rzx4k0pb58gd1dr9kzwam3vk9r8bfyv1-dpdk-16.07.2-4.9.13-examples/bin/helloworld
 12 | linuxPackages_4_10.dpdk.examples              2,022,224 x /nix/store/wya1b0910qidfc9v3i6r9rnbnc9ykkwq-dpdk-16.07.2-4.10.1-examples/bin/helloworld
 13 | linuxPackages_grsec_nixos.dpdk.examples       2,022,224 x /nix/store/2wqv94290pa38aclld7sc548a7hnz35k-dpdk-16.07.2-4.9.13-examples/bin/helloworld
 14 | camlistore.out                                7,938,952 x /nix/store/xn5ivjdyslxldhm5cb4x0lfz48zf21rl-camlistore-0.9/bin/hello
 15 | ```
 16 | ## Installation
 17 | 
 18 | ### Flakes
 19 | 
 20 | 1. create the database:
 21 | 
 22 |    ```
 23 |    $ nix run github:nix-community/nix-index#nix-index
 24 |    ```
 25 | 
 26 | 2. query for a file:
 27 | 
 28 |    ```
 29 |    $ nix run github:nix-community/nix-index#nix-locate -- bin/hello
 30 |    ```
 31 | 
 32 | ### Latest Git version
 33 | 
 34 | To install the latest development version of nix-index, simply clone the repo and run `nix-env -if.`:
 35 | 
 36 | ```
 37 | $ git clone https://github.com/nix-community/nix-index
 38 | $ cd nix-index
 39 | $ nix-env -if.
 40 | ```
 41 | 
 42 | ### Stable
 43 | 
 44 | For the stable version, you can either [checkout](https://git-scm.com/docs/git-checkout) the latest [tag](https://git-scm.com/docs/git-tag) (see the list [here](https://github.com/nix-community/nix-index/tags)) or use Nixpkgs' repositories' and install it with:
 45 | 
 46 | ```
 47 | $ nix-env -iA nixos.nix-index
 48 | ```
 49 | 
 50 | ## Usage
 51 | First, you need to generate an index by running `nix-index` (it takes around 5 minutes) . Then, you can use `nix-locate pattern`. For more information, see `nix-locate --help` and `nix-index --help`.
 52 | 
 53 | ### Use pre-generated database
 54 | 
 55 | [nix-index-database](https://github.com/Mic92/nix-index-database) provides pre-generated databases if you don't want to generate a database locally.
 56 | It also comes with nixos/home-manager modules to use those databases.
 57 | 
 58 | ### Usage as a command-not-found replacement
 59 | 
 60 | Nix-index provides a "command-not-found" script that can print for you the attribute path of unfound commands in your shell. You can either source `${pkgs.nix-index}/etc/command-not-found.sh` in your own shell init files (works for ZSH and Bash for as far as we know) or you can use the following in home-manager / `/etc/nixos/configuration.nix`:
 61 | 
 62 | ```nix
 63 |     programs.command-not-found.enable = false;
 64 |     # for home-manager, use programs.bash.initExtra instead
 65 |     programs.bash.interactiveShellInit = ''
 66 |       source ${pkgs.nix-index}/etc/profile.d/command-not-found.sh
 67 |     '';
 68 | ```
 69 | 
 70 | Replace `bash` with `zsh` if you use `zsh`.
 71 | 
 72 | Example output:
 73 | 
 74 | ```
 75 | $ blender
 76 | The program 'blender' is currently not installed. You can install it
 77 | by typing:
 78 |   nix-env -iA nixpkgs.blender.out
 79 | 
 80 | Or run it once with:
 81 |   nix-shell -p blender.out --run ...
 82 | ```
 83 | 
 84 | A [`home-manager` module](https://nix-community.github.io/home-manager/options.html#opt-programs.nix-index.enable) is now available to integrate `nix-index` with `bash`, `zsh`, and `fish` using this script.
 85 | 
 86 | You can also use `command-not-found.nu` as a Nushell hook by adding the
 87 | following to your Nushell config:
 88 | 
 89 | ```nix
 90 |   programs.nushell = {
 91 |     enable = true;
 92 |     extraConfig = ''
 93 |       $env.config.hooks.command_not_found = source ${pkgs.nix-index}/etc/profile.d/command-not-found.nu
 94 |     '';
 95 |   };
 96 | ```
 97 | 
 98 | ## Contributing
 99 | If you find any missing features that you would like to implement, I'm very happy about any PRs! You can also create an issue first if the feature is more complex so we can discuss possible implementations.
100 | 
101 | Here is a quick description of all relevant files:
102 | 
103 | * `bin/{nix-index, nix-locate}.rs`: Implementation of the nix-index / nix-locate command line tools
104 | * `src/database.rs`: High-level functions for working with the database format
105 | * `src/files.rs`: The data types for working with file listings
106 | * `src/frcode.rs`: Low-level implementation of an encoder to efficiently store many file paths (see comments in the file for more details). Used by `database.rs`.
107 | * `src/hydra.rs`: Deals with everything that has to do with downloading from the binary cache (fetching file listings and references)
108 | * `src/nixpkgs.rs`: Implements the gathering of the packages (store paths and attributes) using `nix-env`
109 | * `src/package.rs`: High-level data types for representing store paths (sometimes also refered to as a package)
110 | * `src/workset.rs`: A queue used by `nix-index` to implement the recursive fetching (fetching references of everything)
111 | 


--------------------------------------------------------------------------------
/command-not-found.nu:
--------------------------------------------------------------------------------
 1 | { |cmd_name|
 2 |   let install = { |pkgs|
 3 |     $pkgs | each {|pkg| $"  nix shell nixpkgs#($pkg)" }
 4 |   }
 5 |   let run_once = { |pkgs|
 6 |     $pkgs | each {|pkg| $"  nix shell nixpkgs#($pkg) --command '($cmd_name) ...'" }
 7 |   }
 8 |   let single_pkg = { |pkg|
 9 |     let lines = [
10 |       $"The program '($cmd_name)' is currently not installed."
11 |       ""
12 |       "You can install it by typing:"
13 |       (do $install [$pkg] | get 0)
14 |       ""
15 |       "Or run it once with:"
16 |       (do $run_once [$pkg] | get 0)
17 |     ]
18 |     $lines | str join "\n"
19 |   }
20 |   let multiple_pkgs = { |pkgs|
21 |     let lines = [
22 |       $"The program '($cmd_name)' is currently not installed. It is provided by several packages."
23 |       ""
24 |       "You can install it by typing one of the following:"
25 |       (do $install $pkgs | str join "\n")
26 |       ""
27 |       "Or run it once with:"
28 |       (do $run_once $pkgs | str join "\n")
29 |     ]
30 |     $lines | str join "\n"
31 |   }
32 |   let pkgs = (@out@/bin/nix-locate --minimal --no-group --type x --type s --top-level --whole-name --at-root $"/bin/($cmd_name)" | lines)
33 |   let len = ($pkgs | length)
34 |   let ret = match $len {
35 |     0 => null,
36 |     1 => (do $single_pkg ($pkgs | get 0)),
37 |     _ => (do $multiple_pkgs $pkgs),
38 |   }
39 |   return $ret
40 | }
41 | 


--------------------------------------------------------------------------------
/command-not-found.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | # for bash 4
  4 | # this will be called when a command is entered
  5 | # but not found in the user’s path + environment
  6 | command_not_found_handle () {
  7 | 
  8 |     # TODO: use "command not found" gettext translations
  9 | 
 10 |     # taken from http://www.linuxjournal.com/content/bash-command-not-found
 11 |     # - do not run when inside Midnight Commander or within a Pipe
 12 |     if [ -n "${MC_SID-}" ] || ! [ -t 1 ]; then
 13 |         >&2 echo "$1: command not found"
 14 |         return 127
 15 |     fi
 16 | 
 17 |     toplevel=nixpkgs # nixpkgs should always be available even in NixOS
 18 |     cmd=$1
 19 |     attrs=$(@out@/bin/nix-locate --minimal --no-group --type x --type s --top-level --whole-name --at-root "/bin/$cmd")
 20 |     len=$(echo -n "$attrs" | grep -c "^")
 21 | 
 22 |     case $len in
 23 |         0)
 24 |             >&2 echo "$cmd: command not found"
 25 |             ;;
 26 |         1)
 27 |             # if only 1 package provides this, then we can invoke it
 28 |             # without asking the users if they have opted in with one
 29 |             # of 2 environment variables
 30 | 
 31 |             # they are based on the ones found in
 32 |             # command-not-found.sh:
 33 | 
 34 |             #   NIX_AUTO_INSTALL : install the missing command into the
 35 |             #                      user’s environment
 36 |             #   NIX_AUTO_RUN     : run the command transparently inside of
 37 |             #                      nix shell
 38 | 
 39 |             # these will not return 127 if they worked correctly
 40 | 
 41 |             if ! [ -z "${NIX_AUTO_INSTALL-}" ]; then
 42 |                 >&2 cat <<EOF
 43 | The program '$cmd' is currently not installed. It is provided by
 44 | the package '$toplevel.$attrs', which I will now install for you.
 45 | EOF
 46 |                 if [ -e "$HOME/.nix-profile/manifest.json" ]; then
 47 |                     nix profile install $toplevel#$attrs
 48 |                 else
 49 |                     nix-env -iA $toplevel.$attrs
 50 |                 fi
 51 |                 if [ "$?" -eq 0 ]; then
 52 |                     $@ # TODO: handle pipes correctly if AUTO_RUN/INSTALL is possible
 53 |                     return $?
 54 |                 else
 55 |                     >&2 cat <<EOF
 56 | Failed to install $toplevel.attrs.
 57 | $cmd: command not found
 58 | EOF
 59 |                 fi
 60 |             elif ! [ -z "${NIX_AUTO_RUN-}" ]; then
 61 |                 nix-build --no-out-link -A $attrs "<$toplevel>"
 62 |                 if [ "$?" -eq 0 ]; then
 63 |                     # how nix-shell handles commands is weird
 64 |                     # $(echo $@) is need to handle this
 65 |                     nix-shell -p $attrs --run "$(echo $@)"
 66 |                     return $?
 67 |                 else
 68 |                     >&2 cat <<EOF
 69 | Failed to install $toplevel.attrs.
 70 | $cmd: command not found
 71 | EOF
 72 |                 fi
 73 |             else
 74 |                 if [ -e "$HOME/.nix-profile/manifest.json" ]; then
 75 |                     >&2 cat <<EOF
 76 | The program '$cmd' is currently not installed. You can install it
 77 | by typing:
 78 |   nix profile install $toplevel#$attrs
 79 | 
 80 | Or run it once with:
 81 |   nix shell $toplevel#$attrs -c $cmd ...
 82 | EOF
 83 |                 else
 84 |                     >&2 cat <<EOF
 85 | The program '$cmd' is currently not installed. You can install it
 86 | by typing:
 87 |   nix-env -iA $toplevel.$attrs
 88 | 
 89 | Or run it once with:
 90 |   nix-shell -p $attrs --run '$cmd ...'
 91 | EOF
 92 |                 fi
 93 |             fi
 94 |             ;;
 95 |         *)
 96 |             >&2 cat <<EOF
 97 | The program '$cmd' is currently not installed. It is provided by
 98 | several packages. You can install it by typing one of the following:
 99 | EOF
100 | 
101 |             # ensure we get each element of attrs
102 |             # in a cross platform way
103 |             while read attr; do
104 |                 if [ -e "$HOME/.nix-profile/manifest.json" ]; then
105 |                     >&2 echo "  nix profile install $toplevel#$attr"
106 |                 else
107 |                     >&2 echo "  nix-env -iA $toplevel.$attr"
108 |                 fi
109 |             done <<< "$attrs"
110 | 
111 |             >&2 cat <<EOF
112 | 
113 | Or run it once with:
114 | EOF
115 | 
116 |             while read attr; do
117 |                 if [ -e "$HOME/.nix-profile/manifest.json" ]; then
118 |                     >&2 echo "  nix shell $toplevel#$attr -c $cmd ..."
119 |                 else
120 |                     >&2 echo "  nix-shell -p $attr --run '$cmd ...'"
121 |                 fi
122 |             done <<< "$attrs"
123 |             ;;
124 |     esac
125 | 
126 |     return 127 # command not found should always exit with 127
127 | }
128 | 
129 | # for zsh...
130 | # we just pass it to the bash handler above
131 | # apparently they work identically
132 | command_not_found_handler () {
133 |     command_not_found_handle $@
134 |     return $?
135 | }
136 | 


--------------------------------------------------------------------------------
/default.nix:
--------------------------------------------------------------------------------
 1 | # This file is the compt layer of flakes: https://github.com/edolstra/flake-compat
 2 | # See flake.nix for details
 3 | (import (
 4 |   let
 5 |     lock = builtins.fromJSON (builtins.readFile ./flake.lock);
 6 |   in fetchTarball {
 7 |     url = "https://github.com/edolstra/flake-compat/archive/${lock.nodes.flake-compat.locked.rev}.tar.gz";
 8 |     sha256 = lock.nodes.flake-compat.locked.narHash; }
 9 | ) {
10 |   src =  ./.;
11 | }).defaultNix
12 | 


--------------------------------------------------------------------------------
/examples/nix-index-debug.rs:
--------------------------------------------------------------------------------
 1 | extern crate nix_index;
 2 | 
 3 | use nix_index::database::Reader;
 4 | 
 5 | fn main() {
 6 |     let f = std::env::args().nth(1).expect("file name given as 1st arg");
 7 |     let mut db = Reader::open(f).unwrap();
 8 |     db.dump().unwrap();
 9 | }
10 | 


--------------------------------------------------------------------------------
/flake.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "nodes": {
 3 |     "flake-compat": {
 4 |       "flake": false,
 5 |       "locked": {
 6 |         "lastModified": 1696426674,
 7 |         "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=",
 8 |         "owner": "edolstra",
 9 |         "repo": "flake-compat",
10 |         "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
11 |         "type": "github"
12 |       },
13 |       "original": {
14 |         "owner": "edolstra",
15 |         "repo": "flake-compat",
16 |         "type": "github"
17 |       }
18 |     },
19 |     "nixpkgs": {
20 |       "locked": {
21 |         "lastModified": 1713248628,
22 |         "narHash": "sha256-NLznXB5AOnniUtZsyy/aPWOk8ussTuePp2acb9U+ISA=",
23 |         "owner": "NixOS",
24 |         "repo": "nixpkgs",
25 |         "rev": "5672bc9dbf9d88246ddab5ac454e82318d094bb8",
26 |         "type": "github"
27 |       },
28 |       "original": {
29 |         "id": "nixpkgs",
30 |         "ref": "nixos-unstable",
31 |         "type": "indirect"
32 |       }
33 |     },
34 |     "root": {
35 |       "inputs": {
36 |         "flake-compat": "flake-compat",
37 |         "nixpkgs": "nixpkgs"
38 |       }
39 |     }
40 |   },
41 |   "root": "root",
42 |   "version": 7
43 | }
44 | 


--------------------------------------------------------------------------------
/flake.nix:
--------------------------------------------------------------------------------
  1 | {
  2 |   description = "A files database for nixpkgs";
  3 | 
  4 |   inputs = {
  5 |     nixpkgs.url = "nixpkgs/nixos-unstable";
  6 |     flake-compat = {
  7 |       url = "github:edolstra/flake-compat";
  8 |       flake = false;
  9 |     };
 10 |   };
 11 | 
 12 |   outputs = { self, nixpkgs, flake-compat }:
 13 |     let
 14 |       inherit (nixpkgs) lib;
 15 |       systems = [ "x86_64-linux" "x86_64-darwin" "aarch64-darwin" "aarch64-linux" ];
 16 |       forAllSystems = lib.genAttrs systems;
 17 |       nixpkgsFor = nixpkgs.legacyPackages;
 18 |     in
 19 |     {
 20 |       packages = forAllSystems (system: {
 21 |         default = with nixpkgsFor.${system}; rustPlatform.buildRustPackage {
 22 |           pname = "nix-index";
 23 |           inherit ((lib.importTOML ./Cargo.toml).package) version;
 24 | 
 25 |           src = lib.sourceByRegex self [
 26 |             "(examples|src)(/.*)?"
 27 |             ''Cargo\.(toml|lock)''
 28 |             ''command-not-found\.sh''
 29 |             ''command-not-found\.nu''
 30 |           ];
 31 | 
 32 |           cargoLock = {
 33 |             lockFile = ./Cargo.lock;
 34 |           };
 35 | 
 36 |           nativeBuildInputs = [ pkg-config ];
 37 |           buildInputs = [ openssl curl sqlite ]
 38 |             ++ lib.optionals stdenv.isDarwin [ darwin.apple_sdk.frameworks.Security ];
 39 | 
 40 |           postInstall = ''
 41 |             substituteInPlace command-not-found.sh \
 42 |               --subst-var out
 43 |             install -Dm555 command-not-found.sh -t $out/etc/profile.d
 44 |             substituteInPlace command-not-found.nu \
 45 |               --subst-var out
 46 |             install -Dm555 command-not-found.nu -t $out/etc/profile.d
 47 |           '';
 48 | 
 49 |           meta = with lib; {
 50 |             description = "A files database for nixpkgs";
 51 |             homepage = "https://github.com/nix-community/nix-index";
 52 |             license = with licenses; [ bsd3 ];
 53 |             maintainers = [ maintainers.bennofs ];
 54 |           };
 55 |         };
 56 |       });
 57 | 
 58 |       checks = forAllSystems (system:
 59 |           let
 60 |             packages = lib.mapAttrs' (n: lib.nameValuePair "package-${n}") self.packages.${system};
 61 |             devShells = lib.mapAttrs' (n: lib.nameValuePair "devShell-${n}") self.devShells.${system};
 62 |           in packages // devShells
 63 |       );
 64 | 
 65 |       devShells = forAllSystems (system: {
 66 |         minimal = with nixpkgsFor.${system}; mkShell {
 67 |           name = "nix-index";
 68 | 
 69 |           nativeBuildInputs = [
 70 |             pkg-config
 71 |           ];
 72 | 
 73 |           buildInputs = [
 74 |             openssl
 75 |             sqlite
 76 |           ] ++ lib.optionals stdenv.isDarwin [
 77 |             darwin.apple_sdk.frameworks.Security
 78 |           ];
 79 | 
 80 |           env.LD_LIBRARY_PATH = lib.makeLibraryPath [ openssl ];
 81 |         };
 82 | 
 83 |         default = with nixpkgsFor.${system}; mkShell {
 84 |           name = "nix-index";
 85 | 
 86 |           inputsFrom = [ self.devShells.${system}.minimal ];
 87 | 
 88 |           nativeBuildInputs = [ rustc cargo clippy rustfmt ];
 89 | 
 90 |           env = {
 91 |             LD_LIBRARY_PATH = lib.makeLibraryPath [ openssl ];
 92 |             RUST_SRC_PATH = rustPlatform.rustLibSrc;
 93 |           };
 94 |         };
 95 |       });
 96 | 
 97 |       apps = forAllSystems (system: {
 98 |         nix-index = {
 99 |           type = "app";
100 |           program = "${self.packages.${system}.default}/bin/nix-index";
101 |         };
102 |         nix-locate = {
103 |           type = "app";
104 |           program = "${self.packages.${system}.default}/bin/nix-locate";
105 |         };
106 |         default = self.apps.${system}.nix-locate;
107 |       });
108 |     };
109 | }
110 | 


--------------------------------------------------------------------------------
/recover.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import sys
 3 | import json
 4 | 
 5 | 
 6 | CHUNK_SIZE = 4*32*1024
 7 | 
 8 | 
 9 | def wrong_written_size(x):
10 |     out = 0
11 |     while x >= 0:
12 |         out += x
13 |         x -= CHUNK_SIZE
14 |     return out
15 | 
16 | 
17 | if __name__ == '__main__':
18 |     with open(sys.argv[1], 'rb') as f:
19 |         data = f.read()
20 | 
21 |     print(sys.argv[1])
22 |     try:
23 |         json.loads(data)
24 |     except json.JSONDecodeError as e:
25 |         exc = e
26 |         for margin in range(10):
27 |             if len(data) == wrong_written_size(e.pos + margin):
28 |                 print(margin, exc, len(data), e.pos, data[e.pos:][:10], data[:10])
29 |                 sys.exit(0)
30 | 
31 |     sys.exit(1)
32 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | unstable_features = true
2 | 
3 | group_imports = "StdExternalCrate"
4 | newline_style = "Unix"
5 | reorder_impl_items = true
6 | use_field_init_shorthand = true
7 | use_try_shorthand = true
8 | 


--------------------------------------------------------------------------------
/src/bin/nix-channel-index.rs:
--------------------------------------------------------------------------------
  1 | //! Toor for generating a nix-index database.
  2 | use std::ffi::OsString;
  3 | use std::io::{self, Write};
  4 | use std::os::unix::ffi::OsStringExt;
  5 | use std::path::PathBuf;
  6 | use std::process;
  7 | 
  8 | use clap::Parser;
  9 | use error_chain::ChainedError;
 10 | use futures::{future, StreamExt};
 11 | use nix_index::files::FileNode;
 12 | use nix_index::hydra::Fetcher;
 13 | use nix_index::listings::fetch_listings;
 14 | use nix_index::{errors::*, CACHE_URL};
 15 | use rusqlite::{Connection, DatabaseName};
 16 | 
 17 | /// The main function of this module: creates a new command-not-found database.
 18 | async fn update_index(args: &Args) -> Result<()> {
 19 |     let fetcher = Fetcher::new(CACHE_URL.to_string()).map_err(ErrorKind::ParseProxy)?;
 20 |     let connection =
 21 |         Connection::open_in_memory().map_err(|_| ErrorKind::CreateDatabase(args.output.clone()))?;
 22 | 
 23 |     connection
 24 |         .execute(
 25 |             r#"
 26 |         create table Programs (
 27 |             name        text not null,
 28 |             system      text not null,
 29 |             package     text not null,
 30 |             primary key (name, system, package)
 31 |         );
 32 |     "#,
 33 |             (),
 34 |         )
 35 |         .map_err(|_| ErrorKind::CreateDatabase(args.output.clone()))?;
 36 | 
 37 |     let debug_connection = Connection::open_in_memory()
 38 |         .map_err(|_| ErrorKind::CreateDatabase(args.debug_output.clone()))?;
 39 |     debug_connection
 40 |         .execute(
 41 |             r#"
 42 |         create table DebugInfo (
 43 |             build_id    text unique not null,
 44 |             url         text not null,
 45 |             filename    text not null,
 46 |             primary key (build_id)
 47 |         );
 48 |     "#,
 49 |             (),
 50 |         )
 51 |         .map_err(|_| ErrorKind::CreateDatabase(args.debug_output.clone()))?;
 52 | 
 53 |     let systems = match &args.systems {
 54 |         Some(systems) => systems.iter().map(|x| Some(x.as_str())).collect(),
 55 |         None => vec![None],
 56 |     };
 57 | 
 58 |     eprint!("+ querying available packages");
 59 |     let (files, watch) =
 60 |         fetch_listings(&fetcher, args.jobs, &args.nixpkgs, systems, args.show_trace)?;
 61 | 
 62 |     // Treat request errors as if the file list were missing
 63 |     let files = files.map(|r| {
 64 |         r.unwrap_or_else(|e| {
 65 |             eprint!("\n{}", e.display_chain());
 66 |             None
 67 |         })
 68 |     });
 69 | 
 70 |     // Add progress output
 71 |     let (mut indexed, mut missing) = (0, 0);
 72 |     let files = files.inspect(|entry| {
 73 |         if entry.is_some() {
 74 |             indexed += 1;
 75 |         } else {
 76 |             missing += 1;
 77 |         };
 78 | 
 79 |         eprint!("+ generating index: {:05} paths found :: {:05} paths not in binary cache :: {:05} paths in queue \r",
 80 |                indexed, missing, watch.queue_len());
 81 |         io::stderr().flush().expect("flushing stderr failed");
 82 |     });
 83 | 
 84 |     let mut files = files.filter_map(future::ready);
 85 | 
 86 |     eprint!("+ generating index");
 87 |     eprint!("\r");
 88 | 
 89 |     while let Some((path, nar, files)) = files.next().await {
 90 |         let origin = path.origin();
 91 | 
 92 |         if !origin.toplevel {
 93 |             // skip dependencies
 94 |             continue;
 95 |         }
 96 | 
 97 |         for item in files.to_list(&[]) {
 98 |             if let FileNode::Symlink { target: _ } // FIXME: should probably check if the target is executable...
 99 |             | FileNode::Regular {
100 |                 size: _,
101 |                 executable: true,
102 |             } = item.node
103 |             {
104 |                 let path = PathBuf::from(OsString::from_vec(item.path));
105 | 
106 |                 if let Ok(binary) = path.strip_prefix("/bin") {
107 |                     let attr = origin.attr.clone();
108 |                     let system = origin.system.clone();
109 |                     let binary: String = binary.to_string_lossy().into();
110 | 
111 |                     if binary.starts_with('.') || binary.contains('/') || binary.is_empty() {
112 |                         continue;
113 |                     }
114 | 
115 |                     connection
116 |                         .execute(
117 |                             "insert or replace into Programs(name, system, package) values (?, ?, ?)",
118 |                             (binary, system, attr),
119 |                         )
120 |                         .map_err(|_| ErrorKind::CreateDatabase(args.output.clone()))?;
121 |                 }
122 | 
123 |                 if let Ok(debuginfo) = path.strip_prefix("/lib/debug/.build-id") {
124 |                     let build_id: String = debuginfo
125 |                         .to_string_lossy()
126 |                         .replace('/', "")
127 |                         .strip_suffix(".debug")
128 |                         .expect("Debug info files must end with .debug")
129 |                         .into();
130 | 
131 |                     debug_connection
132 |                         .execute(
133 |                             "insert or replace into DebugInfo(build_id, url, filename) values (?, ?, ?)",
134 |                             (build_id, format!("../{}", nar), path.to_string_lossy().strip_prefix('/')),
135 |                         )
136 |                         .map_err(|_| ErrorKind::CreateDatabase(args.debug_output.clone()))?;
137 |                 }
138 |             }
139 |         }
140 |     }
141 |     eprintln!();
142 | 
143 |     eprint!("+ dumping index");
144 | 
145 |     connection
146 |         .backup(DatabaseName::Main, &args.output, None)
147 |         .map_err(|_| ErrorKind::CreateDatabase(args.output.clone()))?;
148 | 
149 |     debug_connection
150 |         .backup(DatabaseName::Main, &args.debug_output, None)
151 |         .map_err(|_| ErrorKind::CreateDatabase(args.debug_output.clone()))?;
152 | 
153 |     Ok(())
154 | }
155 | 
156 | #[derive(Debug, Parser)]
157 | #[clap(author, about, version)]
158 | struct Args {
159 |     /// Make REQUESTS http requests in parallel
160 |     #[clap(short = 'r', long = "requests", default_value = "500")]
161 |     jobs: usize,
162 | 
163 |     /// Path to nixpkgs for which to build the index, as accepted by nix-env -f
164 |     #[clap(short = 'f', long, default_value = "<nixpkgs>")]
165 |     nixpkgs: String,
166 | 
167 |     /// Path for resulting database file
168 |     #[clap(short, long, default_value = "programs.sqlite")]
169 |     output: PathBuf,
170 | 
171 |     /// Path for debuginfo database file
172 |     #[clap(short, long, default_value = "debug.sqlite")]
173 |     debug_output: PathBuf,
174 | 
175 |     /// Systems to include in generated database
176 |     #[clap(short = 's', long = "platform")]
177 |     systems: Option<Vec<String>>,
178 | 
179 |     /// Show a stack trace in the case of a Nix evaluation error
180 |     #[clap(long)]
181 |     show_trace: bool,
182 | }
183 | 
184 | #[tokio::main]
185 | async fn main() {
186 |     let args = Args::parse();
187 | 
188 |     if let Err(e) = update_index(&args).await {
189 |         eprintln!("error: {}", e);
190 | 
191 |         for e in e.iter().skip(1) {
192 |             eprintln!("caused by: {}", e);
193 |         }
194 | 
195 |         if let Some(backtrace) = e.backtrace() {
196 |             eprintln!("backtrace: {:?}", backtrace);
197 |         }
198 |         process::exit(2);
199 |     }
200 | }
201 | 


--------------------------------------------------------------------------------
/src/bin/nix-index.rs:
--------------------------------------------------------------------------------
  1 | //! Tool for generating a nix-index database.
  2 | use std::ffi::OsStr;
  3 | use std::fs::{self, File};
  4 | use std::io::{self, Write};
  5 | use std::path::PathBuf;
  6 | use std::process;
  7 | 
  8 | use clap::Parser;
  9 | use error_chain::ChainedError;
 10 | use futures::future::Either;
 11 | use futures::{future, StreamExt};
 12 | use nix_index::database::Writer;
 13 | use nix_index::errors::*;
 14 | use nix_index::files::FileTree;
 15 | use nix_index::hydra::Fetcher;
 16 | use nix_index::listings::{fetch_listings, try_load_paths_cache};
 17 | use nix_index::package::StorePath;
 18 | use nix_index::CACHE_URL;
 19 | use separator::Separatable;
 20 | 
 21 | /// The main function of this module: creates a new nix-index database.
 22 | async fn update_index(args: &Args) -> Result<()> {
 23 |     // first try to load the paths.cache if requested, otherwise query
 24 |     // the packages normally. Also fall back to normal querying if the paths.cache
 25 |     // fails to load.
 26 |     let cached = if args.path_cache {
 27 |         eprintln!("+ loading paths from cache");
 28 |         try_load_paths_cache()?
 29 |     } else {
 30 |         None
 31 |     };
 32 | 
 33 |     eprintln!("+ querying available packages");
 34 |     let fetcher = Fetcher::new(CACHE_URL.to_string()).map_err(ErrorKind::ParseProxy)?;
 35 |     let (files, watch) = match cached {
 36 |         Some((f, w)) => (Either::Left(f), w),
 37 |         None => {
 38 |             let (f, w) = fetch_listings(
 39 |                 &fetcher,
 40 |                 args.jobs,
 41 |                 &args.nixpkgs,
 42 |                 vec![args.system.as_deref()],
 43 |                 args.show_trace,
 44 |             )?;
 45 |             (Either::Right(f), w)
 46 |         }
 47 |     };
 48 | 
 49 |     // Treat request errors as if the file list were missing
 50 |     let files = files.map(|r| {
 51 |         r.unwrap_or_else(|e| {
 52 |             eprint!("\n{}", e.display_chain());
 53 |             None
 54 |         })
 55 |     });
 56 | 
 57 |     // Add progress output
 58 |     let (mut indexed, mut missing) = (0, 0);
 59 |     let files = files.inspect(|entry| {
 60 |         if entry.is_some() {
 61 |             indexed += 1;
 62 |         } else {
 63 |             missing += 1;
 64 |         };
 65 | 
 66 |         eprint!("+ generating index: {:05} paths found :: {:05} paths not in binary cache :: {:05} paths in queue \r",
 67 |                indexed, missing, watch.queue_len());
 68 |         io::stderr().flush().expect("flushing stderr failed");
 69 |     });
 70 | 
 71 |     // Filter packages with no file listings available
 72 |     let mut files = files.filter_map(future::ready);
 73 | 
 74 |     eprint!("+ generating index");
 75 |     if !args.filter_prefix.is_empty() {
 76 |         eprint!(" (filtering by `{}`)", args.filter_prefix);
 77 |     }
 78 |     eprint!("\r");
 79 |     fs::create_dir_all(&args.database)
 80 |         .chain_err(|| ErrorKind::CreateDatabaseDir(args.database.clone()))?;
 81 |     let mut db = Writer::create(args.database.join("files"), args.compression_level)
 82 |         .chain_err(|| ErrorKind::CreateDatabase(args.database.clone()))?;
 83 | 
 84 |     let mut results: Vec<(StorePath, String, FileTree)> = Vec::new();
 85 |     while let Some(entry) = files.next().await {
 86 |         if args.path_cache {
 87 |             results.push(entry.clone());
 88 |         }
 89 |         let (path, _, files) = entry;
 90 |         db.add(path, files, args.filter_prefix.as_bytes())
 91 |             .chain_err(|| ErrorKind::WriteDatabase(args.database.clone()))?;
 92 |     }
 93 |     eprintln!();
 94 | 
 95 |     if args.path_cache {
 96 |         eprintln!("+ writing path cache");
 97 |         let mut output = io::BufWriter::new(
 98 |             File::create("paths.cache").chain_err(|| ErrorKind::WritePathsCache)?,
 99 |         );
100 |         bincode::serialize_into(&mut output, &results).chain_err(|| ErrorKind::WritePathsCache)?;
101 |     }
102 | 
103 |     let index_size = db
104 |         .finish()
105 |         .chain_err(|| ErrorKind::WriteDatabase(args.database.clone()))?;
106 |     eprintln!("+ wrote index of {} bytes", index_size.separated_string());
107 | 
108 |     Ok(())
109 | }
110 | 
111 | fn cache_dir() -> &'static OsStr {
112 |     let base = xdg::BaseDirectories::with_prefix("nix-index").unwrap();
113 |     let cache_dir = Box::new(base.get_cache_home());
114 |     let cache_dir = Box::leak(cache_dir);
115 |     cache_dir.as_os_str()
116 | }
117 | 
118 | /// Builds an index for nix-locate
119 | #[derive(Debug, Parser)]
120 | #[clap(author, about, version)]
121 | struct Args {
122 |     /// Make REQUESTS http requests in parallel
123 |     #[clap(short = 'r', long = "requests", default_value = "100")]
124 |     jobs: usize,
125 | 
126 |     /// Directory where the index is stored
127 |     #[clap(short, long = "db", default_value_os = cache_dir(), env = "NIX_INDEX_DATABASE")]
128 |     database: PathBuf,
129 | 
130 |     /// Path to nixpkgs for which to build the index, as accepted by nix-env -f
131 |     #[clap(short = 'f', long, default_value = "<nixpkgs>")]
132 |     nixpkgs: String,
133 | 
134 |     /// Specify system platform for which to build the index, accepted by nix-env --argstr system
135 |     #[clap(short = 's', long, value_name = "platform")]
136 |     system: Option<String>,
137 | 
138 |     /// Zstandard compression level
139 |     #[clap(short, long = "compression", default_value = "22")]
140 |     compression_level: i32,
141 | 
142 |     /// Show a stack trace in the case of a Nix evaluation error
143 |     #[clap(long)]
144 |     show_trace: bool,
145 | 
146 |     /// Only add paths starting with PREFIX (e.g. `/bin/`)
147 |     #[clap(long, default_value = "")]
148 |     filter_prefix: String,
149 | 
150 |     /// Store and load results of fetch phase in a file called paths.cache. This speeds up testing
151 |     /// different database formats / compression.
152 |     ///
153 |     /// Note: does not check if the cached data is up to date! Use only for development.
154 |     #[clap(long)]
155 |     path_cache: bool,
156 | }
157 | 
158 | #[tokio::main]
159 | async fn main() {
160 |     let args = Args::parse();
161 | 
162 |     if let Err(e) = update_index(&args).await {
163 |         eprintln!("error: {}", e);
164 | 
165 |         for e in e.iter().skip(1) {
166 |             eprintln!("caused by: {}", e);
167 |         }
168 | 
169 |         if let Some(backtrace) = e.backtrace() {
170 |             eprintln!("backtrace: {:?}", backtrace);
171 |         }
172 |         process::exit(2);
173 |     }
174 | }
175 | 


--------------------------------------------------------------------------------
/src/bin/nix-locate.rs:
--------------------------------------------------------------------------------
  1 | //! Tool for searching for files in nixpkgs packages
  2 | use std::collections::HashSet;
  3 | use std::ffi::OsStr;
  4 | use std::path::PathBuf;
  5 | use std::process;
  6 | use std::result;
  7 | use std::str;
  8 | use std::str::FromStr;
  9 | 
 10 | use clap::{value_parser, Parser};
 11 | use error_chain::error_chain;
 12 | use nix_index::database;
 13 | use nix_index::files::{self, FileTreeEntry, FileType};
 14 | use owo_colors::{OwoColorize, Stream};
 15 | use regex::bytes::Regex;
 16 | use separator::Separatable;
 17 | 
 18 | error_chain! {
 19 |     errors {
 20 |         ReadDatabase(database: PathBuf) {
 21 |             description("database read error")
 22 |             display("reading from the database at '{}' failed.\n\
 23 |                      This may be caused by a corrupt or missing database, try (re)running `nix-index` to generate the database. \n\
 24 |                      If the error persists please file a bug report at https://github.com/nix-community/nix-index.", database.to_string_lossy())
 25 |         }
 26 |         Grep(pattern: String) {
 27 |             description("grep builder error")
 28 |             display("constructing the regular expression from the pattern '{}' failed.", pattern)
 29 |         }
 30 |     }
 31 | }
 32 | 
 33 | /// The struct holding the parsed arguments for searching
 34 | struct Args {
 35 |     /// Path of the nix-index database.
 36 |     database: PathBuf,
 37 |     /// The pattern to search for. This is always in regex syntax.
 38 |     pattern: String,
 39 |     group: bool,
 40 |     hash: Option<String>,
 41 |     package_pattern: Option<String>,
 42 |     file_type: Vec<FileType>,
 43 |     only_toplevel: bool,
 44 |     color: bool,
 45 |     minimal: bool,
 46 | }
 47 | 
 48 | /// The main function of this module: searches with the given options in the database.
 49 | fn locate(args: &Args) -> Result<()> {
 50 |     // Build the regular expression matcher
 51 |     let pattern = Regex::new(&args.pattern).chain_err(|| ErrorKind::Grep(args.pattern.clone()))?;
 52 |     let package_pattern = if let Some(ref pat) = args.package_pattern {
 53 |         Some(Regex::new(pat).chain_err(|| ErrorKind::Grep(pat.clone()))?)
 54 |     } else {
 55 |         None
 56 |     };
 57 | 
 58 |     // Open the database
 59 |     let index_file = args.database.join("files");
 60 |     let db = database::Reader::open(&index_file)
 61 |         .chain_err(|| ErrorKind::ReadDatabase(index_file.clone()))?;
 62 | 
 63 |     let results = db
 64 |         .query(&pattern)
 65 |         .package_pattern(package_pattern.as_ref())
 66 |         .hash(args.hash.clone())
 67 |         .run()
 68 |         .chain_err(|| ErrorKind::Grep(args.pattern.clone()))?
 69 |         .filter(|v| {
 70 |             v.as_ref().ok().map_or(true, |v| {
 71 |                 let &(ref store_path, FileTreeEntry { ref path, ref node }) = v;
 72 |                 let m = pattern
 73 |                     .find_iter(path)
 74 |                     .last()
 75 |                     .expect("path should match the pattern");
 76 | 
 77 |                 let conditions = [
 78 |                     !args.group || !path[m.end()..].contains(&b'/'),
 79 |                     !args.only_toplevel || store_path.origin().toplevel,
 80 |                     args.file_type.iter().any(|t| &node.get_type() == t),
 81 |                 ];
 82 | 
 83 |                 conditions.iter().all(|c| *c)
 84 |             })
 85 |         });
 86 | 
 87 |     let mut printed_attrs = HashSet::new();
 88 |     for v in results {
 89 |         let (store_path, FileTreeEntry { path, node }) =
 90 |             v.chain_err(|| ErrorKind::ReadDatabase(index_file.clone()))?;
 91 | 
 92 |         use crate::files::FileNode::*;
 93 |         let (typ, size) = match node {
 94 |             Regular { executable, size } => (if executable { "x" } else { "r" }, size),
 95 |             Directory { size, contents: () } => ("d", size),
 96 |             Symlink { .. } => ("s", 0),
 97 |         };
 98 | 
 99 |         let mut attr = format!(
100 |             "{}.{}",
101 |             store_path.origin().attr,
102 |             store_path.origin().output
103 |         );
104 | 
105 |         if !store_path.origin().toplevel {
106 |             attr = format!("({})", attr);
107 |         }
108 | 
109 |         if args.minimal {
110 |             // only print each package once, even if there are multiple matches
111 |             if printed_attrs.insert(attr.clone()) {
112 |                 println!("{}", attr);
113 |             }
114 |         } else {
115 |             print!(
116 |                 "{:<40} {:>14} {:>1} {}",
117 |                 attr,
118 |                 size.separated_string(),
119 |                 typ,
120 |                 store_path.as_str()
121 |             );
122 | 
123 |             let path = String::from_utf8_lossy(&path);
124 | 
125 |             if args.color {
126 |                 let mut prev = 0;
127 |                 for mat in pattern.find_iter(path.as_bytes()) {
128 |                     // if the match is empty, we need to make sure we don't use string
129 |                     // indexing because the match may be "inside" a single multibyte character
130 |                     // in that case (for example, the pattern may match the second byte of a multibyte character)
131 |                     if mat.start() == mat.end() {
132 |                         continue;
133 |                     }
134 |                     print!(
135 |                         "{}{}",
136 |                         &path[prev..mat.start()],
137 |                         (&path[mat.start()..mat.end()])
138 |                             .if_supports_color(Stream::Stdout, |txt| txt.red()),
139 |                     );
140 |                     prev = mat.end();
141 |                 }
142 |                 println!("{}", &path[prev..]);
143 |             } else {
144 |                 println!("{}", path);
145 |             }
146 |         }
147 |     }
148 | 
149 |     Ok(())
150 | }
151 | 
152 | /// Extract the parsed arguments for clap's arg matches.
153 | ///
154 | /// Handles parsing the values of more complex arguments.
155 | fn process_args(matches: Opts) -> result::Result<Args, clap::Error> {
156 |     let pattern_arg = matches.pattern;
157 |     let package_arg = matches.package;
158 | 
159 |     let start_anchor = if matches.at_root { "^" } else { "" };
160 |     let end_anchor = if matches.whole_name { "$" } else { "" };
161 | 
162 |     let make_pattern = |s: &str, wrap: bool| {
163 |         let regex = if matches.regex {
164 |             s.to_string()
165 |         } else {
166 |             regex::escape(s)
167 |         };
168 |         if wrap {
169 |             format!("{}{}{}", start_anchor, regex, end_anchor)
170 |         } else {
171 |             regex
172 |         }
173 |     };
174 | 
175 |     let color = match matches.color {
176 |         Color::Auto => atty::is(atty::Stream::Stdout),
177 |         Color::Always => true,
178 |         Color::Never => false,
179 |     };
180 | 
181 |     let args = Args {
182 |         database: matches.database,
183 |         group: !matches.no_group,
184 |         pattern: make_pattern(&pattern_arg, true),
185 |         package_pattern: package_arg.as_deref().map(|p| make_pattern(p, false)),
186 |         hash: matches.hash,
187 |         file_type: matches
188 |             .r#type
189 |             .unwrap_or_else(|| files::ALL_FILE_TYPES.to_vec()),
190 |         only_toplevel: matches.top_level,
191 |         color,
192 |         minimal: matches.minimal,
193 |     };
194 |     Ok(args)
195 | }
196 | 
197 | const LONG_USAGE: &str = r#"
198 | How to use
199 | ==========
200 | 
201 | In the simplest case, just run `nix-locate part/of/file/path` to search for all packages that contain
202 | a file matching that path:
203 | 
204 | $ nix-locate 'bin/firefox'
205 | ...all packages containing a file named 'bin/firefox'
206 | 
207 | Before using this tool, you first need to generate a nix-index database.
208 | Use the `nix-index` tool to do that.
209 | 
210 | Limitations
211 | ===========
212 | 
213 | * this tool can only find packages which are built by hydra, because only those packages
214 |   will have file listings that are indexed by nix-index
215 | 
216 | * we can't know the precise attribute path for every package, so if you see the syntax `(attr)`
217 |   in the output, that means that `attr` is not the target package but that it
218 |   depends (perhaps indirectly) on the package that contains the searched file. Example:
219 | 
220 |   $ nix-locate 'bin/xmonad'
221 |   (xmonad-with-packages.out)      0 s /nix/store/nl581g5kv3m2xnmmfgb678n91d7ll4vv-ghc-8.0.2-with-packages/bin/xmonad
222 | 
223 |   This means that we don't know what nixpkgs attribute produces /nix/store/nl581g5kv3m2xnmmfgb678n91d7ll4vv-ghc-8.0.2-with-packages,
224 |   but we know that `xmonad-with-packages.out` requires it.
225 | "#;
226 | 
227 | fn cache_dir() -> &'static OsStr {
228 |     let base = xdg::BaseDirectories::with_prefix("nix-index").unwrap();
229 |     let cache_dir = Box::new(base.get_cache_home());
230 |     let cache_dir = Box::leak(cache_dir);
231 |     cache_dir.as_os_str()
232 | }
233 | 
234 | /// Quickly finds the derivation providing a certain file
235 | #[derive(Debug, Parser)]
236 | #[clap(author, about, version, after_help = LONG_USAGE)]
237 | struct Opts {
238 |     /// Pattern for which to search
239 |     // #[clap(name = "PATTERN")]
240 |     pattern: String,
241 | 
242 |     /// Directory where the index is stored
243 |     #[clap(short, long = "db", default_value_os = cache_dir(), env = "NIX_INDEX_DATABASE")]
244 |     database: PathBuf,
245 | 
246 |     /// Treat PATTERN as regex instead of literal text. Also applies to NAME.
247 |     #[clap(short, long)]
248 |     regex: bool,
249 | 
250 |     /// Only print matches from packages whose name matches PACKAGE.
251 |     #[clap(short, long)]
252 |     package: Option<String>,
253 | 
254 |     /// Only print matches from the package that has the given HASH.
255 |     #[clap(long, name = "HASH")]
256 |     hash: Option<String>,
257 | 
258 |     /// Only print matches from packages that show up in `nix-env -qa`.
259 |     #[clap(long)]
260 |     top_level: bool,
261 | 
262 |     /// Only print matches for files that have this type. If the option is given multiple times,
263 |     /// a file will be printed if it has any of the given types.
264 |     /// [options: (r)egular file, e(x)cutable, (d)irectory, (s)ymlink]
265 |     #[clap(short, long, value_parser=value_parser!(FileType))]
266 |     r#type: Option<Vec<FileType>>,
267 | 
268 |     /// Disables grouping of paths with the same matching part. By default, a path will only be
269 |     /// printed if the pattern matches some part of the last component of the path. For example,
270 |     /// the pattern `a/foo` would match all of `a/foo`, `a/foo/some_file` and `a/foo/another_file`,
271 |     /// but only the first match will be printed. This option disables that behavior and prints
272 |     /// all matches.
273 |     #[clap(long)]
274 |     no_group: bool,
275 | 
276 |     /// Whether to use colors in output. If auto, only use colors if outputting to a terminal.
277 |     #[clap(long, value_enum, default_value = "auto")]
278 |     color: Color,
279 | 
280 |     /// Only print matches for files or directories whose basename matches PATTERN exactly.
281 |     /// This means that the pattern `bin/foo` will only match a file called `bin/foo` or
282 |     /// `xx/bin/foo` but not `bin/foobar`.
283 |     #[clap(short, long)]
284 |     whole_name: bool,
285 | 
286 |     /// Treat PATTERN as an absolute file path, so it only matches starting from the root of a
287 |     /// package. This means that the pattern `/bin/foo` only matches a file called `/bin/foo` or
288 |     /// `/bin/foobar` but not `/libexec/bin/foo`.
289 |     #[clap(long)]
290 |     at_root: bool,
291 | 
292 |     /// Only print attribute names of found files or directories. Other details such as size or
293 |     /// store path are omitted. This is useful for scripts that use the output of nix-locate.
294 |     #[clap(long)]
295 |     minimal: bool,
296 | }
297 | 
298 | #[derive(clap::ValueEnum, Clone, Copy, Debug)]
299 | enum Color {
300 |     Always,
301 |     Never,
302 |     Auto,
303 | }
304 | 
305 | impl FromStr for Color {
306 |     type Err = &'static str;
307 | 
308 |     fn from_str(s: &str) -> core::result::Result<Self, Self::Err> {
309 |         match s {
310 |             "always" => Ok(Color::Always),
311 |             "never" => Ok(Color::Never),
312 |             "auto" => Ok(Color::Auto),
313 |             _ => Err(""),
314 |         }
315 |     }
316 | }
317 | 
318 | fn main() {
319 |     let args = Opts::parse();
320 | 
321 |     let args = process_args(args).unwrap_or_else(|e| e.exit());
322 | 
323 |     if let Err(e) = locate(&args) {
324 |         eprintln!("error: {}", e);
325 | 
326 |         for e in e.iter().skip(1) {
327 |             eprintln!("caused by: {}", e);
328 |         }
329 | 
330 |         if let Some(backtrace) = e.backtrace() {
331 |             eprintln!("backtrace: {:?}", backtrace);
332 |         }
333 |         process::exit(2);
334 |     }
335 | }
336 | 


--------------------------------------------------------------------------------
/src/database.rs:
--------------------------------------------------------------------------------
  1 | use std::fs::File;
  2 | /// Creating and searching file databases.
  3 | ///
  4 | /// This module implements an abstraction for creating an index of files with meta information
  5 | /// and searching that index for paths matching a specific pattern.
  6 | use std::io::{self, BufReader, BufWriter, Read, Seek, Write};
  7 | use std::path::Path;
  8 | 
  9 | use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
 10 | use error_chain::error_chain;
 11 | use grep;
 12 | use grep::matcher::{LineMatchKind, Match, Matcher, NoError};
 13 | use memchr::{memchr, memrchr};
 14 | use regex::bytes::Regex;
 15 | use regex_syntax::ast::{
 16 |     Alternation, Assertion, AssertionKind, Ast, Concat, Group, Literal, Repetition,
 17 | };
 18 | use serde_json;
 19 | use zstd;
 20 | 
 21 | use crate::files::{FileTree, FileTreeEntry};
 22 | use crate::frcode;
 23 | use crate::package::StorePath;
 24 | 
 25 | /// The version of the database format supported by this nix-index version.
 26 | ///
 27 | /// This should be updated whenever you make an incompatible change to the database format.
 28 | const FORMAT_VERSION: u64 = 1;
 29 | 
 30 | /// The magic for nix-index database files, used to ensure that the file we're passed is
 31 | /// actually a file generated by nix-index.
 32 | const FILE_MAGIC: &[u8] = b"NIXI";
 33 | 
 34 | /// A writer for creating a new file database.
 35 | pub struct Writer {
 36 |     /// The encoder used to compress the database. Will be set to `None` when the value
 37 |     /// is dropped.
 38 |     writer: Option<BufWriter<zstd::Encoder<'static, File>>>,
 39 | }
 40 | 
 41 | // We need to make sure that the encoder is `finish`ed in all cases, so we need
 42 | // a custom Drop.
 43 | impl Drop for Writer {
 44 |     fn drop(&mut self) {
 45 |         if self.writer.is_some() {
 46 |             self.finish_encoder().unwrap();
 47 |         }
 48 |     }
 49 | }
 50 | 
 51 | impl Writer {
 52 |     /// Creates a new database at the given path with the specified zstd compression level
 53 |     /// (currently, supported values range from 0 to 22).
 54 |     pub fn create<P: AsRef<Path>>(path: P, level: i32) -> io::Result<Writer> {
 55 |         let mut file = File::create(path)?;
 56 |         file.write_all(FILE_MAGIC)?;
 57 |         file.write_u64::<LittleEndian>(FORMAT_VERSION)?;
 58 |         let mut encoder = zstd::Encoder::new(file, level)?;
 59 |         encoder.multithread(num_cpus::get() as u32)?;
 60 | 
 61 |         Ok(Writer {
 62 |             writer: Some(BufWriter::new(encoder)),
 63 |         })
 64 |     }
 65 | 
 66 |     /// Add a new package to the database for the given store path with its corresponding
 67 |     /// file tree. Entries are only added if they match `filter_prefix`.
 68 |     pub fn add(
 69 |         &mut self,
 70 |         path: StorePath,
 71 |         files: FileTree,
 72 |         filter_prefix: &[u8],
 73 |     ) -> io::Result<()> {
 74 |         let entries = files.to_list(filter_prefix);
 75 | 
 76 |         // Don't add packages with no file entries to the database.
 77 |         if entries.is_empty() {
 78 |             return Ok(());
 79 |         }
 80 |         let writer = self.writer.as_mut().expect("not dropped yet");
 81 |         let mut encoder =
 82 |             frcode::Encoder::new(writer, b"p".to_vec(), serde_json::to_vec(&path).unwrap());
 83 |         for entry in entries {
 84 |             entry.encode(&mut encoder)?;
 85 |         }
 86 |         Ok(())
 87 |     }
 88 | 
 89 |     /// Finishes encoding. After calling this function, `add` may no longer be called, since this function
 90 |     /// closes the stream.
 91 |     ///
 92 |     /// The return value is the underlying File.
 93 |     fn finish_encoder(&mut self) -> io::Result<File> {
 94 |         let writer = self.writer.take().expect("not dropped yet");
 95 |         let encoder = writer.into_inner()?;
 96 |         encoder.finish()
 97 |     }
 98 | 
 99 |     /// Finish the encoding and return the size in bytes of the compressed file that was created.
100 |     pub fn finish(mut self) -> io::Result<u64> {
101 |         let mut file = self.finish_encoder()?;
102 |         file.stream_position()
103 |     }
104 | }
105 | 
106 | error_chain! {
107 |     errors {
108 |         UnsupportedFileType(found: Vec<u8>) {
109 |             description("unsupported file type")
110 |             display("expected file to start with nix-index file magic 'NIXI', but found '{}' (is this a valid nix-index database file?)", String::from_utf8_lossy(found))
111 |         }
112 |         UnsupportedVersion(found: u64) {
113 |             description("unsupported file version")
114 |             display("this executable only supports the nix-index database version {}, but found a database with version {}", FORMAT_VERSION, found)
115 |         }
116 |         MissingPackageEntry {
117 |             description("missing package entry for path")
118 |             display("database corrupt, found a file entry without a matching package entry")
119 |         }
120 |         Frcode(err: frcode::Error) {
121 |             description("frcode error")
122 |             display("database corrupt, frcode error: {}", err)
123 |         }
124 |         EntryParse(entry: Vec<u8>) {
125 |             description("entry parse failure")
126 |             display("database corrupt, could not parse entry: {:?}", String::from_utf8_lossy(entry))
127 |         }
128 |         StorePathParse(path: Vec<u8>) {
129 |             description("store path parse failure")
130 |             display("database corrupt, could not parse store path: {:?}", String::from_utf8_lossy(path))
131 |         }
132 |     }
133 | 
134 |     foreign_links {
135 |         Io(io::Error);
136 |         Grep(grep::regex::Error);
137 |     }
138 | }
139 | 
140 | impl From<frcode::Error> for Error {
141 |     fn from(err: frcode::Error) -> Error {
142 |         ErrorKind::Frcode(err).into()
143 |     }
144 | }
145 | 
146 | /// A Reader allows fast querying of a nix-index database.
147 | pub struct Reader {
148 |     decoder: frcode::Decoder<BufReader<zstd::Decoder<'static, BufReader<File>>>>,
149 | }
150 | 
151 | impl Reader {
152 |     /// Opens a nix-index database located at the given path.
153 |     ///
154 |     /// If the path does not exist or is not a valid database, an error is returned.
155 |     pub fn open<P: AsRef<Path>>(path: P) -> Result<Reader> {
156 |         let mut file = File::open(path)?;
157 |         let mut magic = [0u8; 4];
158 |         file.read_exact(&mut magic)?;
159 | 
160 |         if magic != FILE_MAGIC {
161 |             return Err(ErrorKind::UnsupportedFileType(magic.to_vec()).into());
162 |         }
163 | 
164 |         let version = file.read_u64::<LittleEndian>()?;
165 |         if version != FORMAT_VERSION {
166 |             return Err(ErrorKind::UnsupportedVersion(version).into());
167 |         }
168 | 
169 |         let decoder = zstd::Decoder::new(file)?;
170 |         Ok(Reader {
171 |             decoder: frcode::Decoder::new(BufReader::new(decoder)),
172 |         })
173 |     }
174 | 
175 |     /// Builds a query to find all entries in the database that have a filename matching the given pattern.
176 |     ///
177 |     /// Afterwards, use `Query::into_iter` to iterate over the items.
178 |     pub fn query(self, exact_regex: &Regex) -> Query {
179 |         Query {
180 |             reader: self,
181 |             exact_regex,
182 |             hash: None,
183 |             package_pattern: None,
184 |         }
185 |     }
186 | 
187 |     /// Dumps the contents of the database to stdout, for debugging.
188 |     #[allow(clippy::print_stdout)]
189 |     pub fn dump(&mut self) -> Result<()> {
190 |         loop {
191 |             let block = self.decoder.decode()?;
192 |             if block.is_empty() {
193 |                 break;
194 |             }
195 |             for line in block.split(|c| *c == b'\n') {
196 |                 println!("{:?}", String::from_utf8_lossy(line));
197 |             }
198 |             println!("-- block boundary");
199 |         }
200 |         Ok(())
201 |     }
202 | }
203 | 
204 | /// A builder for a `ReaderIter` to iterate over entries in the database matching a given pattern.
205 | pub struct Query<'a, 'b> {
206 |     /// The underlying reader from which we read input.
207 |     reader: Reader,
208 | 
209 |     /// The pattern that file paths have to match.
210 |     exact_regex: &'a Regex,
211 | 
212 |     /// Only include the package with the given hash.
213 |     hash: Option<String>,
214 | 
215 |     /// Only include packages whose name matches the given pattern.
216 |     package_pattern: Option<&'b Regex>,
217 | }
218 | 
219 | impl<'a, 'b> Query<'a, 'b> {
220 |     /// Limit results to entries from the package with the specified hash if `Some`.
221 |     pub fn hash(self, hash: Option<String>) -> Query<'a, 'b> {
222 |         Query { hash, ..self }
223 |     }
224 | 
225 |     /// Limit results to entries from packages whose name matches the given regex if `Some`.
226 |     pub fn package_pattern(self, package_pattern: Option<&'b Regex>) -> Query<'a, 'b> {
227 |         Query {
228 |             package_pattern,
229 |             ..self
230 |         }
231 |     }
232 | 
233 |     /// Runs the query, returning an Iterator that will yield all entries matching the conditions.
234 |     ///
235 |     /// There is no guarantee about the order of the returned matches.
236 |     pub fn run(self) -> Result<ReaderIter<'a, 'b>> {
237 |         let mut expr = regex_syntax::ast::parse::Parser::new()
238 |             .parse(self.exact_regex.as_str())
239 |             .expect("regex cannot be invalid");
240 |         // replace the ^ anchor by a NUL byte, since each entry is of the form `METADATA\0PATH`
241 |         // (so the NUL byte marks the start of the path).
242 |         {
243 |             let mut stack = vec![&mut expr];
244 |             while let Some(e) = stack.pop() {
245 |                 match *e {
246 |                     Ast::Assertion(Assertion {
247 |                         kind: AssertionKind::StartLine,
248 |                         span,
249 |                     }) => {
250 |                         *e = Ast::Literal(Literal {
251 |                             span,
252 |                             c: '\0',
253 |                             kind: regex_syntax::ast::LiteralKind::Verbatim,
254 |                         })
255 |                     }
256 |                     Ast::Group(Group { ref mut ast, .. }) => stack.push(ast),
257 |                     Ast::Repetition(Repetition { ref mut ast, .. }) => stack.push(ast),
258 |                     Ast::Concat(Concat { ref mut asts, .. })
259 |                     | Ast::Alternation(Alternation { ref mut asts, .. }) => stack.extend(asts),
260 |                     _ => {}
261 |                 }
262 |             }
263 |         }
264 |         let mut regex_builder = grep::regex::RegexMatcherBuilder::new();
265 |         regex_builder.line_terminator(Some(b'\n')).multi_line(true);
266 | 
267 |         let grep = regex_builder.build(&format!("{}", expr))?;
268 |         Ok(ReaderIter {
269 |             reader: self.reader,
270 |             found: Vec::new(),
271 |             found_without_package: Vec::new(),
272 |             pattern: grep,
273 |             exact_pattern: self.exact_regex,
274 |             package_entry_pattern: regex_builder.build("^p\0").expect("valid regex"),
275 |             package_name_pattern: self.package_pattern,
276 |             package_hash: self.hash,
277 |         })
278 |     }
279 | }
280 | 
281 | /// An iterator for entries in a database matching a given pattern.
282 | pub struct ReaderIter<'a, 'b> {
283 |     /// The underlying reader from which we read input.
284 |     reader: Reader,
285 |     /// Entries that matched the pattern but have not been returned by `next` yet.
286 |     found: Vec<(StorePath, FileTreeEntry)>,
287 |     /// Entries that matched the pattern but for which we don't know yet what package they belong to.
288 |     /// This may happen if the entry we matched was at the end of the search buffer, so that the entry
289 |     /// for the package did not fit into the buffer anymore (since the package is stored after the entries
290 |     /// of the package). In this case, we need to look for the package entry in the next iteration when
291 |     /// we read the next block of input.
292 |     found_without_package: Vec<FileTreeEntry>,
293 |     /// The pattern for which to search package paths.
294 |     ///
295 |     /// This pattern should work on the raw bytes of file entries. In particular, the file path is not the
296 |     /// first data in a file entry, so the regex `^` anchor will not work correctly.
297 |     ///
298 |     /// The pattern here may produce false positives (for example, if it matches inside the metadata of a file
299 |     /// entry). This is not a problem, as matches are later checked against `exact_pattern`.
300 |     pattern: grep::regex::RegexMatcher,
301 |     /// The raw pattern, as supplied to `find_iter`. This is used to verify matches, since `pattern` itself
302 |     /// may produce false positives.
303 |     exact_pattern: &'a Regex,
304 |     /// Pattern that matches only package entries.
305 |     package_entry_pattern: grep::regex::RegexMatcher,
306 |     /// Pattern that the package name should match.
307 |     package_name_pattern: Option<&'b Regex>,
308 |     /// Only search the package with the given hash.
309 |     package_hash: Option<String>,
310 | }
311 | 
312 | fn consume_no_error<T>(e: NoError) -> T {
313 |     panic!("impossible: {}", e)
314 | }
315 | 
316 | fn next_matching_line<M: Matcher<Error = NoError>>(
317 |     matcher: M,
318 |     buf: &[u8],
319 |     mut start: usize,
320 | ) -> Option<Match> {
321 |     while let Some(candidate) = matcher
322 |         .find_candidate_line(&buf[start..])
323 |         .unwrap_or_else(consume_no_error)
324 |     {
325 |         // the buffer may end with a newline character, so we may get a match
326 |         // for an empty "line" at the end of the buffer
327 |         // since this is not a line match, return None
328 |         if start == buf.len() {
329 |             return None;
330 |         };
331 | 
332 |         let (pos, confirmed) = match candidate {
333 |             LineMatchKind::Confirmed(pos) => (start + pos, true),
334 |             LineMatchKind::Candidate(pos) => (start + pos, false),
335 |         };
336 | 
337 |         let line_start = memrchr(b'\n', &buf[..pos]).map_or(0, |x| x + 1);
338 |         let line_end = memchr(b'\n', &buf[pos..]).map_or(buf.len(), |x| x + pos + 1);
339 | 
340 |         if !confirmed
341 |             && !matcher
342 |                 .is_match(&buf[line_start..line_end])
343 |                 .unwrap_or_else(consume_no_error)
344 |         {
345 |             start = line_end;
346 |             continue;
347 |         }
348 | 
349 |         return Some(Match::new(line_start, line_end));
350 |     }
351 |     None
352 | }
353 | 
354 | impl<'a, 'b> ReaderIter<'a, 'b> {
355 |     /// Reads input until `self.found` contains at least one entry or the end of the input has been reached.
356 |     fn fill_buf(&mut self) -> Result<()> {
357 |         // the input is processed in blocks until we've found at least a single entry
358 |         while self.found.is_empty() {
359 |             let &mut ReaderIter {
360 |                 ref mut reader,
361 |                 ref package_entry_pattern,
362 |                 ref package_name_pattern,
363 |                 ref package_hash,
364 |                 ..
365 |             } = self;
366 |             let block = reader.decoder.decode()?;
367 | 
368 |             // if the block is empty, the end of input has been reached
369 |             if block.is_empty() {
370 |                 return Ok(());
371 |             }
372 | 
373 |             // when we find a match, we need to know the package that this match belongs to.
374 |             // the `find_package` function will skip forward until a package entry is found
375 |             // (the package entry comes after all file entries for a package).
376 |             //
377 |             // to be more efficient if there are many matches, we cache the current package here.
378 |             // this package is valid for all positions up to the second element of the tuple
379 |             // (after that, a new package begins).
380 |             let mut cached_package: Option<(StorePath, usize)> = None;
381 |             let mut no_more_package = false;
382 |             let mut find_package = |item_end| -> Result<_> {
383 |                 if let Some((ref pkg, end)) = cached_package {
384 |                     if item_end < end {
385 |                         return Ok(Some((pkg.clone(), end)));
386 |                     }
387 |                 }
388 | 
389 |                 if no_more_package {
390 |                     return Ok(None);
391 |                 }
392 | 
393 |                 let mat = match next_matching_line(package_entry_pattern, block, item_end) {
394 |                     Some(v) => v,
395 |                     None => {
396 |                         no_more_package = true;
397 |                         return Ok(None);
398 |                     }
399 |                 };
400 | 
401 |                 let json = &block[mat.start() + 2..mat.end() - 1];
402 |                 let pkg: StorePath = serde_json::from_slice(json)
403 |                     .chain_err(|| ErrorKind::StorePathParse(json.to_vec()))?;
404 |                 cached_package = Some((pkg.clone(), mat.end()));
405 |                 Ok(Some((pkg, mat.end())))
406 |             };
407 | 
408 |             // Tests if a store path matches the `package_name_pattern` and `package_hash` constraints.
409 |             let should_search_package = |pkg: &StorePath| -> bool {
410 |                 package_name_pattern.map_or(true, |r| r.is_match(pkg.name().as_bytes()))
411 |                     && package_hash.as_ref().map_or(true, |h| h == &pkg.hash())
412 |             };
413 | 
414 |             let mut pos = 0;
415 |             // if there are any entries without a package left over from the previous iteration, see
416 |             // if this block contains the package entry.
417 |             if !self.found_without_package.is_empty() {
418 |                 if let Some((pkg, end)) = find_package(0)? {
419 |                     if !should_search_package(&pkg) {
420 |                         // all entries before end will have the same package
421 |                         pos = end;
422 |                         self.found_without_package.truncate(0);
423 |                     } else {
424 |                         for entry in self.found_without_package.split_off(0) {
425 |                             self.found.push((pkg.clone(), entry));
426 |                         }
427 |                     }
428 |                 }
429 |             }
430 | 
431 |             // process all matches in this block
432 |             while let Some(mat) = next_matching_line(&self.pattern, block, pos) {
433 |                 pos = mat.end();
434 |                 let entry = &block[mat.start()..mat.end() - 1];
435 |                 // skip entries that aren't describing file paths
436 |                 if self
437 |                     .package_entry_pattern
438 |                     .is_match(entry)
439 |                     .unwrap_or_else(consume_no_error)
440 |                 {
441 |                     continue;
442 |                 }
443 | 
444 |                 // skip if package name or hash doesn't match
445 |                 // we can only skip if we know the package
446 |                 if let Some((pkg, end)) = find_package(mat.end())? {
447 |                     if !should_search_package(&pkg) {
448 |                         // all entries before end will have the same package
449 |                         pos = end;
450 |                         continue;
451 |                     }
452 |                 }
453 | 
454 |                 let entry = FileTreeEntry::decode(entry)
455 |                     .ok_or_else(|| Error::from(ErrorKind::EntryParse(entry.to_vec())))?;
456 | 
457 |                 // check for false positives
458 |                 if !self.exact_pattern.is_match(&entry.path) {
459 |                     continue;
460 |                 }
461 | 
462 |                 match find_package(mat.end())? {
463 |                     None => self.found_without_package.push(entry),
464 |                     Some((pkg, _)) => self.found.push((pkg, entry)),
465 |                 }
466 |             }
467 |         }
468 |         Ok(())
469 |     }
470 | 
471 |     /// Returns the next match in the database.
472 |     fn next_match(&mut self) -> Result<Option<(StorePath, FileTreeEntry)>> {
473 |         self.fill_buf()?;
474 |         Ok(self.found.pop())
475 |     }
476 | }
477 | 
478 | impl<'a, 'b> Iterator for ReaderIter<'a, 'b> {
479 |     type Item = Result<(StorePath, FileTreeEntry)>;
480 | 
481 |     fn next(&mut self) -> Option<Self::Item> {
482 |         match self.next_match() {
483 |             Err(e) => Some(Err(e)),
484 |             Ok(v) => v.map(Ok),
485 |         }
486 |     }
487 | }
488 | 
489 | #[cfg(test)]
490 | mod tests {
491 |     use super::*;
492 | 
493 |     #[test]
494 |     fn test_next_matching_line_package() {
495 |         let matcher = grep::regex::RegexMatcherBuilder::new()
496 |             .line_terminator(Some(b'\n'))
497 |             .multi_line(true)
498 |             .build("^p")
499 |             .expect("valid regex");
500 |         let buffer = br#"
501 | SOME LINE
502 | pDATA
503 | ANOTHER LINE
504 |         "#;
505 | 
506 |         let mat = next_matching_line(matcher, buffer, 0);
507 |         assert_eq!(mat, Some(Match::new(11, 17)));
508 |     }
509 | }
510 | 


--------------------------------------------------------------------------------
/src/errors.rs:
--------------------------------------------------------------------------------
 1 | use std::path::PathBuf;
 2 | 
 3 | use crate::package::StorePath;
 4 | 
 5 | error_chain::error_chain! {
 6 |     errors {
 7 |         QueryPackages {
 8 |             description("query packages error")
 9 |             display("querying available packages failed")
10 |         }
11 |         FetchFiles(path: StorePath) {
12 |             description("file listing fetch error")
13 |             display("fetching the file listing for store path '{}' failed", path.as_str())
14 |         }
15 |         FetchReferences(path: StorePath) {
16 |             description("references fetch error")
17 |             display("fetching the references of store path '{}' failed", path.as_str())
18 |         }
19 |         LoadPathsCache {
20 |             description("paths.cache load error")
21 |             display("loading the paths.cache file failed")
22 |         }
23 |         WritePathsCache {
24 |             description("paths.cache write error")
25 |             display("writing the paths.cache file failed")
26 |         }
27 |         CreateDatabase(path: PathBuf) {
28 |             description("crate database error")
29 |             display("creating the database at '{}' failed", path.to_string_lossy())
30 |         }
31 |         CreateDatabaseDir(path: PathBuf) {
32 |             description("crate database directory error")
33 |             display("creating the directory for the database at '{}' failed", path.to_string_lossy())
34 |         }
35 |         WriteDatabase(path: PathBuf) {
36 |             description("database write error")
37 |             display("writing to the database '{}' failed", path.to_string_lossy())
38 |         }
39 |         ParseProxy(err: crate::hydra::Error){
40 |             description("proxy parse error")
41 |             display("Can not parse proxy settings")
42 |         }
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/files.rs:
--------------------------------------------------------------------------------
  1 | //! Data types for working with trees of files.
  2 | //!
  3 | //! The main type here is `FileTree` which represents
  4 | //! such as the file listing for a store path.
  5 | use std::collections::HashMap;
  6 | use std::io::{self, Write};
  7 | use std::str::{self, FromStr};
  8 | 
  9 | use clap::builder::PossibleValue;
 10 | use clap::ValueEnum;
 11 | use memchr::memchr;
 12 | use serde::{Deserialize, Serialize};
 13 | use serde_bytes::ByteBuf;
 14 | 
 15 | use crate::frcode;
 16 | 
 17 | /// This enum represents a single node in a file tree.
 18 | ///
 19 | /// The type is generic over the contents of a directory node,
 20 | /// because we want to use this enum to represent both a flat
 21 | /// structure where a directory only stores some meta-information about itself
 22 | /// (such as the number of children) and full file trees, where a
 23 | /// directory contains all the child nodes.
 24 | ///
 25 | /// Note that file nodes by themselves do not have names. Names are given
 26 | /// to file nodes by the parent directory, which has a map of entry names to
 27 | /// file nodes.
 28 | #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
 29 | pub enum FileNode<T> {
 30 |     /// A regular file. This is the normal kind of file which is
 31 |     /// neither a directory not a symlink.
 32 |     Regular {
 33 |         /// The size of this file, in bytes.
 34 |         size: u64,
 35 |         /// Whether or not this file has the `executable` bit set.
 36 |         executable: bool,
 37 |     },
 38 |     /// A symbolic link that points to another file path.
 39 |     Symlink {
 40 |         /// The path that this symlink points to.
 41 |         target: ByteBuf,
 42 |     },
 43 |     /// A directory. It usually has a mapping of names to child nodes (in
 44 |     /// the case of a fill tree), but we also support a reduced form where
 45 |     /// we only store the number of entries in the directory.
 46 |     Directory {
 47 |         /// The size of a directory is the number of children it contains.
 48 |         size: u64,
 49 | 
 50 |         /// The contents of this directory. These are generic, as explained
 51 |         /// in the documentation for this type.
 52 |         contents: T,
 53 |     },
 54 | }
 55 | 
 56 | /// The type of a file.
 57 | ///
 58 | /// This mirrors the variants of `FileNode`, but without storing
 59 | /// data in each variant.
 60 | ///
 61 | /// An exception to this is the `executable` field for the regular type.
 62 | /// This is needed since we present `regular` and `executable` files as different
 63 | /// to the user, so we need a way to represent both types.
 64 | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
 65 | pub enum FileType {
 66 |     Regular { executable: bool },
 67 |     Directory,
 68 |     Symlink,
 69 | }
 70 | 
 71 | impl ValueEnum for FileType {
 72 |     fn value_variants<'a>() -> &'a [Self] {
 73 |         &[
 74 |             FileType::Regular { executable: false },
 75 |             FileType::Regular { executable: true },
 76 |             FileType::Directory,
 77 |             FileType::Symlink,
 78 |         ]
 79 |     }
 80 | 
 81 |     fn to_possible_value(&self) -> Option<PossibleValue> {
 82 |         match self {
 83 |             FileType::Regular { executable: false } => Some(PossibleValue::new("r")),
 84 |             FileType::Regular { executable: true } => Some(PossibleValue::new("x")),
 85 |             FileType::Directory => Some(PossibleValue::new("d")),
 86 |             FileType::Symlink => Some(PossibleValue::new("s")),
 87 |         }
 88 |     }
 89 | }
 90 | 
 91 | impl FromStr for FileType {
 92 |     type Err = &'static str;
 93 | 
 94 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
 95 |         match s {
 96 |             "r" => Ok(FileType::Regular { executable: false }),
 97 |             "x" => Ok(FileType::Regular { executable: true }),
 98 |             "d" => Ok(FileType::Directory),
 99 |             "s" => Ok(FileType::Symlink),
100 |             _ => Err("invalid file type"),
101 |         }
102 |     }
103 | }
104 | 
105 | /// This lists all file types that can currently be represented.
106 | pub const ALL_FILE_TYPES: &[FileType] = &[
107 |     FileType::Regular { executable: true },
108 |     FileType::Regular { executable: false },
109 |     FileType::Directory,
110 |     FileType::Symlink,
111 | ];
112 | 
113 | impl<T> FileNode<T> {
114 |     /// Split this node into a node without contents and optionally the contents themselves,
115 |     /// if the node was a directory.
116 |     pub fn split_contents(&self) -> (FileNode<()>, Option<&T>) {
117 |         use self::FileNode::*;
118 |         match *self {
119 |             Regular { size, executable } => (Regular { size, executable }, None),
120 |             Symlink { ref target } => (
121 |                 Symlink {
122 |                     target: target.clone(),
123 |                 },
124 |                 None,
125 |             ),
126 |             Directory { size, ref contents } => (Directory { size, contents: () }, Some(contents)),
127 |         }
128 |     }
129 | 
130 |     /// Return the type of this file.
131 |     pub fn get_type(&self) -> FileType {
132 |         match *self {
133 |             FileNode::Regular { executable, .. } => FileType::Regular { executable },
134 |             FileNode::Directory { .. } => FileType::Directory,
135 |             FileNode::Symlink { .. } => FileType::Symlink,
136 |         }
137 |     }
138 | }
139 | 
140 | impl FileNode<()> {
141 |     fn encode<W: Write>(&self, encoder: &mut frcode::Encoder<W>) -> io::Result<()> {
142 |         use self::FileNode::*;
143 |         match *self {
144 |             Regular { executable, size } => {
145 |                 let e = if executable { "x" } else { "r" };
146 |                 encoder.write_meta(format!("{}{}", size, e).as_bytes())?;
147 |             }
148 |             Symlink { ref target } => {
149 |                 encoder.write_meta(target)?;
150 |                 encoder.write_meta(b"s")?;
151 |             }
152 |             Directory { size, contents: () } => {
153 |                 encoder.write_meta(format!("{}d", size).as_bytes())?;
154 |             }
155 |         }
156 |         Ok(())
157 |     }
158 | 
159 |     pub fn decode(buf: &[u8]) -> Option<Self> {
160 |         use self::FileNode::*;
161 |         buf.split_last().and_then(|(kind, buf)| match *kind {
162 |             b'x' | b'r' => {
163 |                 let executable = *kind == b'x';
164 |                 str::from_utf8(buf)
165 |                     .ok()
166 |                     .and_then(|s| s.parse().ok())
167 |                     .map(|size| Regular { executable, size })
168 |             }
169 |             b's' => Some(Symlink {
170 |                 target: ByteBuf::from(buf),
171 |             }),
172 |             b'd' => str::from_utf8(buf)
173 |                 .ok()
174 |                 .and_then(|s| s.parse().ok())
175 |                 .map(|size| Directory { size, contents: () }),
176 |             _ => None,
177 |         })
178 |     }
179 | }
180 | 
181 | /// This type represents a full tree of files.
182 | ///
183 | /// A *file tree* is a *file node* where each directory contains
184 | /// the tree for its children.
185 | #[derive(Debug, PartialEq, Serialize, Deserialize, Clone)]
186 | pub struct FileTree(FileNode<HashMap<ByteBuf, FileTree>>);
187 | 
188 | /// An entry in a file tree is a path to a node paired with that node.
189 | ///
190 | /// If the entry refers to a directory, it only stores information about that
191 | /// directory itself. It does not contain the children of the directory.
192 | pub struct FileTreeEntry {
193 |     pub path: Vec<u8>,
194 |     pub node: FileNode<()>,
195 | }
196 | 
197 | impl FileTreeEntry {
198 |     pub fn encode<W: Write>(self, encoder: &mut frcode::Encoder<W>) -> io::Result<()> {
199 |         self.node.encode(encoder)?;
200 |         encoder.write_path(self.path)?;
201 |         Ok(())
202 |     }
203 | 
204 |     pub fn decode(buf: &[u8]) -> Option<FileTreeEntry> {
205 |         memchr(b'\0', buf).and_then(|sep| {
206 |             let path = &buf[(sep + 1)..];
207 |             let node = &buf[0..sep];
208 |             FileNode::decode(node).map(|node| FileTreeEntry {
209 |                 path: path.to_vec(),
210 |                 node,
211 |             })
212 |         })
213 |     }
214 | }
215 | 
216 | impl FileTree {
217 |     pub fn regular(size: u64, executable: bool) -> Self {
218 |         FileTree(FileNode::Regular { size, executable })
219 |     }
220 | 
221 |     pub fn symlink(target: ByteBuf) -> Self {
222 |         FileTree(FileNode::Symlink { target })
223 |     }
224 | 
225 |     pub fn directory(entries: HashMap<ByteBuf, FileTree>) -> Self {
226 |         FileTree(FileNode::Directory {
227 |             size: entries.len() as u64,
228 |             contents: entries,
229 |         })
230 |     }
231 | 
232 |     pub fn to_list(&self, filter_prefix: &[u8]) -> Vec<FileTreeEntry> {
233 |         let mut result = Vec::new();
234 | 
235 |         let mut stack = Vec::with_capacity(16);
236 |         stack.push((Vec::new(), self));
237 | 
238 |         while let Some(entry) = stack.pop() {
239 |             let path = entry.0;
240 |             let FileTree(current) = entry.1;
241 |             let (node, contents) = current.split_contents();
242 |             if let Some(entries) = contents {
243 |                 let mut entries = entries.iter().collect::<Vec<_>>();
244 |                 entries.sort_by(|a, b| Ord::cmp(a.0, b.0));
245 |                 for (name, entry) in entries {
246 |                     let mut path = path.clone();
247 |                     path.push(b'/');
248 |                     path.extend_from_slice(name);
249 |                     stack.push((path, entry));
250 |                 }
251 |             }
252 |             if path.starts_with(filter_prefix) {
253 |                 result.push(FileTreeEntry { path, node });
254 |             }
255 |         }
256 |         result
257 |     }
258 | }
259 | 


--------------------------------------------------------------------------------
/src/frcode.rs:
--------------------------------------------------------------------------------
  1 | //! A compact encoding for file tree entries based on sharing prefixes.
  2 | //!
  3 | //! This module contains a rust implementation of a variant of the `frcode` tool
  4 | //! used by GNU findutils' locate. It has been extended to allow meta information
  5 | //! to be attached to each entry so it is no longer compatible with the original
  6 | //! frcode format.
  7 | //! (See http://www.delorie.com/gnu/docs/findutils/locatedb.5.html for a description of the frcode format.)
  8 | //!
  9 | //! The basic building block of the encoding is a line. Each line has the following format:
 10 | //! (the spaces are for readability only, they are not present in the encoding)
 11 | //!
 12 | //! ```text
 13 | //! <metadata> <\x00 byte> <shared prefix differential> <additional path bytes> <newline character>
 14 | //! ```
 15 | //!
 16 | //! Each entry holds two parts of data: metadata, which is just some arbitrary blob of NUL-terminated bytes
 17 | //! and a path. Because we are storing file trees, the path will likely share a long prefix with the previous
 18 | //! entry's path (we traverse directory entries in sorted order to maximize this chance), so we first store
 19 | //! the length of the shared prefix.
 20 | //!
 21 | //! Since this length will likely be similar to the previous one (if there are many entries in `/foo/bar`, then they will
 22 | //! all share a prefix of at least the length of `/foo/bar`) we only store the signed *difference* to the previous shared prefix length
 23 | //! (This is why it's called a differential). For differences smaller than +/-127 we store them directly as a single byte. If the
 24 | //! difference is greater than that, the first byte will by `0x80` (-128) indicating that the following two bytes represent the
 25 | //! difference (with the high byte first [big endian]).
 26 | //!
 27 | //! As an example, consider the following non-encoded plaintext, where `:` separates the metadata from the path:
 28 | //!
 29 | //! ```text
 30 | //! d:/
 31 | //! d:/foo
 32 | //! d:/foo/bar
 33 | //! f:/foo/bar/test.txt
 34 | //! f:/foo/bar/text.txt
 35 | //! d:/foo/baz
 36 | //! ```
 37 | //!
 38 | //! This text would be encoded as (using `[v]` to indicate a byte with the value of v)
 39 | //!
 40 | //! ```text
 41 | //! d[0][0]/
 42 | //! d[0][1]foo
 43 | //! d[0][3]/bar
 44 | //! f[0][4]/test.txt
 45 | //! f[0][3]xt.txt
 46 | //! d[0][-4]z
 47 | //! ```
 48 | //!
 49 | //! At the beginning, there is no previous entry, so the shared prefix length must always be `0` (and so must the shared prefix differential).
 50 | //! The second entry shares `1` byte with the first path so the difference is `1`. The third entry shares `4` bytes with the second one, which
 51 | //! is `3` more than the shared length of the second one, so we encode a `3` followed by the non-shared bytes, and so on for the remaining entries.
 52 | //! The last entry shares four bytes less than the second to last one did with its predecessor, so here the differential is negative.
 53 | //!
 54 | //! Through this encoding, the size of the index is typically reduces by a factor of 3 to 5.
 55 | use std::cmp;
 56 | use std::io::{self, BufRead, Write};
 57 | use std::ops::{Deref, DerefMut};
 58 | 
 59 | use error_chain::{bail, error_chain};
 60 | use memchr;
 61 | 
 62 | error_chain! {
 63 |     foreign_links {
 64 |         Io(io::Error);
 65 |     }
 66 |     errors {
 67 |         SharedOutOfRange { previous_len: usize, shared_len: isize } {
 68 |             description("shared prefix length out of bounds")
 69 |             display("length of shared prefix must be >= 0 and <= {} (length of previous item), but found: {}", previous_len, shared_len)
 70 |         }
 71 |         SharedOverflow { shared_len: isize, diff: isize } {
 72 |             description("shared prefix length too big (overflow)")
 73 |             display("length of shared prefix too big: cannot add {} to {} without overflow", shared_len, diff)
 74 |         }
 75 |         MissingNul {
 76 |             description("missing terminating NUL byte for entry")
 77 |         }
 78 |         MissingNewline {
 79 |             description("missing newline separator for entry")
 80 |         }
 81 |         MissingPrefixDifferential {
 82 |             description("missing the shared prefix length differential for entry")
 83 |         }
 84 |     }
 85 | }
 86 | 
 87 | /// A buffer that may be resizable or not. This is used for decoding,
 88 | /// where we want to make the buffer resizable as long as we haven't decoded
 89 | /// a full entry yet but want to lock it as soon as we got a full entry.
 90 | ///
 91 | /// This is necessary because we always need to be able to decode at least
 92 | /// one entry to make progress, as we never return partial entries during decoding.
 93 | struct ResizableBuf {
 94 |     allow_resize: bool,
 95 |     data: Vec<u8>,
 96 | }
 97 | 
 98 | impl ResizableBuf {
 99 |     /// Allocates a new resizable buffer with the given initial size.
100 |     ///
101 |     /// The new buffer will allow resizing initially.
102 |     fn new(capacity: usize) -> ResizableBuf {
103 |         ResizableBuf {
104 |             data: vec![0; capacity],
105 |             allow_resize: true,
106 |         }
107 |     }
108 | 
109 |     /// Resizes the buffer to hold at least `new_size` elements. Returns `true`
110 |     /// if resizing was successful (so that buffer can now hold at least `new_size` elements)
111 |     /// or `false` if not (meaning `new_size` is greater than the current size and resizing
112 |     /// was not allowed).
113 |     fn resize(&mut self, new_size: usize) -> bool {
114 |         if new_size <= self.data.len() {
115 |             return true;
116 |         }
117 | 
118 |         if !self.allow_resize {
119 |             return false;
120 |         }
121 | 
122 |         self.data.resize(new_size, b'\x00');
123 |         true
124 |     }
125 | }
126 | 
127 | impl Deref for ResizableBuf {
128 |     type Target = [u8];
129 | 
130 |     fn deref(&self) -> &[u8] {
131 |         &self.data
132 |     }
133 | }
134 | 
135 | impl DerefMut for ResizableBuf {
136 |     fn deref_mut(&mut self) -> &mut [u8] {
137 |         &mut self.data
138 |     }
139 | }
140 | 
141 | /// A decoder for the frcode format. It reads data from some input source
142 | /// and returns blocks of decoded entries.
143 | ///
144 | /// It will not split the metadata/path parts of individual entries since
145 | /// the primary use case for this is searching, where it is enough to decode
146 | /// the entries that match.
147 | pub struct Decoder<R> {
148 |     /// The input source from which we decode
149 |     reader: R,
150 |     /// Position of the first byte of the path part of the last entry.
151 |     /// We need this to copy the shared prefix.
152 |     last_path: usize,
153 |     /// Position of the start of the entry that didn't fully fit in the buffer in the
154 |     /// last decode iteration. Since this entry was partial, it hasn't been returned to
155 |     /// the user yet and we need to continue decoding this entry in this iteration.
156 |     partial_entry_start: usize,
157 |     /// The length of the shared prefix for the current entry. This is necessary because
158 |     /// the shared length is stored as a difference, so we need the previous value to update it.
159 |     shared_len: isize,
160 |     /// The buffer into which we store the decoded bytes.
161 |     buf: ResizableBuf,
162 |     /// Current write position in buf. The next decoded byte should be written to buf[pos].
163 |     pos: usize,
164 | }
165 | 
166 | impl<R: BufRead> Decoder<R> {
167 |     /// Construct a new decoder for the given source.
168 |     pub fn new(reader: R) -> Decoder<R> {
169 |         let capacity = 1_000_000;
170 |         Decoder {
171 |             reader,
172 |             buf: ResizableBuf::new(capacity),
173 |             pos: 0,
174 |             last_path: 0,
175 |             shared_len: 0,
176 |             partial_entry_start: 0,
177 |         }
178 |     }
179 | 
180 |     /// Copies `self.shared_len` bytes from the previous entry's path into the output buffer.
181 |     ///
182 |     /// Returns false if the buffer was too small and could not be resized. In this case, no
183 |     /// bytes will be copied.
184 |     fn copy_shared(&mut self) -> Result<bool> {
185 |         let shared_len = self.shared_len as usize;
186 |         let new_pos = self.pos + shared_len;
187 |         let new_last_path = self.pos;
188 |         if !self.buf.resize(new_pos) {
189 |             return Ok(false);
190 |         }
191 | 
192 |         if self.shared_len < 0 || self.last_path + shared_len > self.pos {
193 |             bail!(ErrorKind::SharedOutOfRange {
194 |                 previous_len: self.pos - self.last_path,
195 |                 shared_len: self.shared_len,
196 |             });
197 |         }
198 | 
199 |         let (_, last) = self.buf.split_at_mut(self.last_path);
200 |         let (last, new) = last.split_at_mut(self.pos - self.last_path);
201 |         new[..shared_len].copy_from_slice(&last[..shared_len]);
202 | 
203 |         self.pos += shared_len;
204 |         self.last_path = new_last_path;
205 |         Ok(true)
206 |     }
207 | 
208 |     /// Copies bytes from the input reader to the output buffer until a `\x00` byte is read.
209 |     /// The NUL byte is included in the output buffer.
210 |     ///
211 |     /// Returns false if the output buffer was exhausted before a NUL byte could be found and
212 |     /// could not be resized. All bytes that were read before this situation was detected will
213 |     /// have already been copied to the output buffer in this case.
214 |     ///
215 |     /// It will also return false if the end of the input was reached.
216 |     fn read_to_nul(&mut self) -> Result<bool> {
217 |         loop {
218 |             let (done, len) = {
219 |                 let &mut Decoder {
220 |                     ref mut reader,
221 |                     ref mut buf,
222 |                     ref mut pos,
223 |                     ..
224 |                 } = self;
225 |                 let input = match reader.fill_buf() {
226 |                     Ok(data) => data,
227 |                     Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
228 |                     Err(e) => return Err(Error::from(e)),
229 |                 };
230 | 
231 |                 if input.is_empty() {
232 |                     return Ok(false);
233 |                 }
234 | 
235 |                 let (done, len) = match memchr::memchr(b'\x00', input) {
236 |                     Some(i) => (true, i + 1),
237 |                     None => (false, input.len()),
238 |                 };
239 | 
240 |                 let new_pos = *pos + len;
241 |                 if buf.resize(new_pos) {
242 |                     buf[*pos..new_pos].copy_from_slice(&input[..len]);
243 |                     *pos = new_pos;
244 |                     (done, len)
245 |                 } else {
246 |                     return Ok(false);
247 |                 }
248 |             };
249 |             self.reader.consume(len);
250 |             if done {
251 |                 return Ok(true);
252 |             }
253 |         }
254 |     }
255 | 
256 |     /// Read the differential from the input reader. This function will return an error
257 |     /// if the end of input has been reached.
258 |     fn decode_prefix_diff(&mut self) -> Result<i16> {
259 |         let mut buf = [0; 1];
260 |         self.reader
261 |             .read_exact(&mut buf)
262 |             .chain_err(|| ErrorKind::MissingPrefixDifferential)?;
263 | 
264 |         if buf[0] != 0x80 {
265 |             Ok((buf[0] as i8) as i16)
266 |         } else {
267 |             let mut buf = [0; 2];
268 |             self.reader
269 |                 .read_exact(&mut buf)
270 |                 .chain_err(|| ErrorKind::MissingPrefixDifferential)?;
271 |             let high = buf[0] as i16;
272 |             let low = buf[1] as i16;
273 |             Ok(high << 8 | low)
274 |         }
275 |     }
276 | 
277 |     /// Decodes some entries to fill the buffer and returns a block of decoded entries.
278 |     ///
279 |     /// It will decode as many entries as fit into the internal buffer, but at least one.
280 |     /// In the returned block of bytes, an entry's metadata and path will be separated by a NUL byte
281 |     /// and entries will be terminated with a newline character. This allows for fast searching with
282 |     /// a line based searcher.
283 |     ///
284 |     /// The function does not return partially decoded entries. Because of this, the size of returned
285 |     /// slice will vary from call to call. The last entry which did not fully fit into the buffer yet
286 |     /// will be returned as the first entry at the next call.
287 |     pub fn decode(&mut self) -> Result<&mut [u8]> {
288 |         // Save end pointer from previous iteration and reset write position
289 |         let end = self.pos;
290 |         self.pos = 0;
291 | 
292 |         // We need to preserve some data from the previous iteration, namely:
293 |         //
294 |         // * all data after the `self.last_path` position, for copying the shared prefix
295 |         // * everything from the start of the partial entry, since this entry wasn't fully decoded
296 |         //   in the last iteration and we want to continue decoding it now
297 |         //
298 |         // If we stopped decoding the partial entry after already copying the shared prefix, then
299 |         // `last_path` will already point to the partial entry so it will be greater than `partial_entry_start`.
300 |         //
301 |         // If we stopped decoding during copying the metadata though, which comes before we copy the shared
302 |         // prefix, then `last_path` will point to the previous entry's path, so it will be smaller than
303 |         // `partial_entry_start`.
304 |         //
305 |         // To support both these cases, we take the minimum here.
306 |         let mut copy_pos = cmp::min(self.partial_entry_start, self.last_path);
307 | 
308 |         // Since we sometimes copy more than just the partial entry, we need to know where the partial entry
309 |         // starts as that is the first position that we want to return (everything before that was already
310 |         // part of an entry returned in the last iteration).
311 |         let item_start = self.partial_entry_start - copy_pos;
312 | 
313 |         // Shift the last path, because we copy it from copy_pos.. to 0..
314 |         self.last_path -= copy_pos;
315 | 
316 |         // Now we can do the actual copying. We cannot use copy_from_slice here since source and target
317 |         // may overlap.
318 |         while copy_pos < end {
319 |             self.buf[self.pos] = self.buf[copy_pos];
320 |             self.pos += 1;
321 |             copy_pos += 1;
322 |         }
323 | 
324 |         // Allow resizing the buffer, since we haven't decoded a full entry yet
325 |         self.buf.allow_resize = true;
326 | 
327 |         // If the the last decoded byte in the buffer is a NUL byte, that means that
328 |         // we are now at the start of the path part of the entry. This means that
329 |         // we need to copy the shared prefix now.
330 |         let mut found_nul = self.pos > 0 && self.buf[self.pos - 1] == b'\x00';
331 |         if found_nul {
332 |             self.copy_shared()?;
333 |         }
334 | 
335 |         // At this point, we are guaranteed to be in either the metadata part or the non-shared part
336 |         // of an entry. In both cases, the action that we need to take is the same: copy data till
337 |         // the next NUL byte. After the NUL byte, we know that we are at the end of the metadata part,
338 |         // so we read a differential and copy the shared prefix, and repeat.
339 |         //
340 |         // Note that this loop doesn't care about where entries end. Only the path part of each entry requires
341 |         // special processing, so we can jump from NUL byte to NUL byte, decode the path and then just copy
342 |         // the data from the source when jumping to the next NUL byte.
343 |         loop {
344 |             // Read data up to the next nul byte.
345 |             if !self.read_to_nul()? {
346 |                 break;
347 |             }
348 | 
349 |             // If we have already found a NUL byte before this, so we've now got two NUL bytes, so
350 |             // we've got at least one full entry in between.
351 |             self.buf.allow_resize = !found_nul;
352 | 
353 |             // We found a NUL byte. Note that we need to set this *after* updating allow_resize,
354 |             // since allow_resize should be set to false only after we've found two NUL bytes.
355 |             found_nul = true;
356 | 
357 |             // Parse the next prefix length difference
358 |             let diff = self.decode_prefix_diff()? as isize;
359 | 
360 |             // Update the shared len
361 |             self.shared_len =
362 |                 self.shared_len
363 |                     .checked_add(diff)
364 |                     .ok_or(ErrorKind::SharedOverflow {
365 |                         shared_len: self.shared_len,
366 |                         diff,
367 |                     })?;
368 | 
369 |             // Copy the shared prefix
370 |             if !self.copy_shared()? {
371 |                 break;
372 |             }
373 |         }
374 | 
375 |         // Since we don't want to return partially decoded items, we need to find the end of the last entry.
376 |         self.partial_entry_start = memchr::memrchr(b'\n', &self.buf[..self.pos])
377 |             .ok_or_else(|| ErrorKind::MissingNewline)?
378 |             + 1;
379 |         Ok(&mut self.buf[item_start..self.partial_entry_start])
380 |     }
381 | }
382 | 
383 | /// This struct implements an encoder for the frcode format. The encoder
384 | /// writes directly to the underlying `Write` instance.
385 | ///
386 | /// To encode an entry you should first call `write_meta` a number of times
387 | /// to fill the meta data portion. Then, call `write_path` once to finialize the entry.
388 | ///
389 | /// One important property of this encoder is that it is safe to open and close
390 | /// it multiple times on the same stream, like this:
391 | ///
392 | /// ```text
393 | /// {
394 | ///   let encoder1 = Encoder::new(&mut stream);
395 | /// } // encoder1 gets dropped here
396 | /// {
397 | ///   let encoder2 = Encoder::new(&mut stream);
398 | /// }
399 | /// ```
400 | ///
401 | /// To support this, the encoder has a "footer" item that will get written when it is dropped.
402 | /// This is necessary because we need to write at least one more entry to reset the shared prefix
403 | /// length to zero, since the next encoder will expect that as initial state.
404 | pub struct Encoder<W: Write> {
405 |     writer: W,
406 |     last: Vec<u8>,
407 |     shared_len: i16,
408 |     footer_meta: Vec<u8>,
409 |     footer_path: Vec<u8>,
410 |     footer_written: bool,
411 | }
412 | 
413 | impl<W: Write> Drop for Encoder<W> {
414 |     fn drop(&mut self) {
415 |         self.write_footer().expect("failed to write footer")
416 |     }
417 | }
418 | 
419 | impl<W: Write> Encoder<W> {
420 |     /// Constructs a new encoder for the specific writer.
421 |     ///
422 |     /// The encoder will write the given `footer_meta` and `footer_path` as the last entry.
423 |     ///
424 |     /// # Panics
425 |     ///
426 |     /// If either `footer_meta` or `footer_path` contain NUL or newline bytes.
427 |     pub fn new(writer: W, footer_meta: Vec<u8>, footer_path: Vec<u8>) -> Encoder<W> {
428 |         assert!(
429 |             !footer_meta.contains(&b'\x00'),
430 |             "footer meta must not contain null bytes"
431 |         );
432 |         assert!(
433 |             !footer_path.contains(&b'\x00'),
434 |             "footer path must not contain null bytes"
435 |         );
436 |         assert!(
437 |             !footer_meta.contains(&b'\n'),
438 |             "footer meta must not contain newlines"
439 |         );
440 |         assert!(
441 |             !footer_path.contains(&b'\n'),
442 |             "footer path must not contain newlines"
443 |         );
444 |         Encoder {
445 |             writer,
446 |             last: Vec::new(),
447 |             shared_len: 0,
448 |             footer_meta,
449 |             footer_path,
450 |             footer_written: false,
451 |         }
452 |     }
453 | 
454 |     /// Writes the specific shared prefix differential to the output stream.
455 |     ///
456 |     /// This function takes care of the variable-length encoding using for prefix differentials
457 |     /// in the frcode format.
458 |     fn encode_diff(&mut self, diff: i16) -> io::Result<()> {
459 |         let low = (diff & 0xFF) as u8;
460 |         if diff.abs() < i8::max_value() as i16 {
461 |             self.writer.write_all(&[low])?;
462 |         } else {
463 |             let high = ((diff >> 8) & 0xFF) as u8;
464 |             self.writer.write_all(&[0x80, high, low])?;
465 |         }
466 |         Ok(())
467 |     }
468 | 
469 |     /// Writes the meta data of an entry to the output stream.
470 |     ///
471 |     /// This function can be called multiple times to extend the current meta data part.
472 |     /// Since the meta data is written as-is to the output stream, calling the function
473 |     /// multiple times will concatenate the meta data of all calls.
474 |     ///
475 |     /// # Panics
476 |     ///
477 |     /// If the meta data contains NUL bytes or newlines.
478 |     pub fn write_meta(&mut self, meta: &[u8]) -> io::Result<()> {
479 |         assert!(
480 |             !meta.contains(&b'\x00'),
481 |             "entry must not contain null bytes"
482 |         );
483 |         assert!(!meta.contains(&b'\n'), "entry must not contain newlines");
484 | 
485 |         self.writer.write_all(meta)?;
486 |         Ok(())
487 |     }
488 | 
489 |     /// Finalizes an entry by encoding its path to the output stream.
490 |     ///
491 |     /// This function should be called after you've finished writing the meta data for
492 |     /// the current entry. It will terminate the meta data part by writing the NUL byte
493 |     /// and then encode the path into the output stream.
494 |     ///
495 |     /// The entry will be terminated with a newline.
496 |     ///
497 |     /// # Panics
498 |     ///
499 |     /// If the path contains NUL bytes or newlines.
500 |     pub fn write_path(&mut self, path: Vec<u8>) -> io::Result<()> {
501 |         assert!(
502 |             !path.contains(&b'\x00'),
503 |             "entry must not contain null bytes"
504 |         );
505 |         assert!(!path.contains(&b'\x00'), "entry must not contain newlines");
506 |         self.writer.write_all(&[b'\x00'])?;
507 | 
508 |         let mut shared: isize = 0;
509 |         let max_shared = i16::max_value() as isize;
510 |         for (a, b) in self.last.iter().zip(path.iter()) {
511 |             if a != b || shared > max_shared {
512 |                 break;
513 |             }
514 |             shared += 1;
515 |         }
516 |         let shared = shared as i16;
517 | 
518 |         let diff = shared - self.shared_len;
519 |         self.encode_diff(diff)?;
520 | 
521 |         self.last = path;
522 |         self.shared_len = shared;
523 | 
524 |         let pos = shared as usize;
525 |         self.writer.write_all(&self.last[pos..])?;
526 |         self.writer.write_all(b"\n")?;
527 | 
528 |         Ok(())
529 |     }
530 | 
531 |     /// Writes the footer entry.
532 |     ///
533 |     /// The footer entry will not share any prefix with the preceding entry,
534 |     /// so after this function, the shared prefix length is zero. This guarantees
535 |     /// that we can start another Encoder after this item, since the Encoder expects
536 |     /// the initial shared prefix length to be zero.
537 |     fn write_footer(&mut self) -> io::Result<()> {
538 |         if self.footer_written {
539 |             return Ok(());
540 |         }
541 | 
542 |         let diff = -self.shared_len;
543 |         self.writer.write_all(&self.footer_meta)?;
544 |         self.writer.write_all(b"\x00")?;
545 |         self.encode_diff(diff)?;
546 |         self.writer.write_all(&self.footer_path)?;
547 |         self.writer.write_all(b"\n")?;
548 |         self.footer_written = true;
549 |         Ok(())
550 |     }
551 | 
552 |     /// Finishes the encoder by writing the footer entry.
553 |     ///
554 |     /// This function is called by drop, but calling it explictly is recommended as
555 |     /// drop has no way to report IO errors that may occur during writing the footer.
556 |     pub fn finish(mut self) -> io::Result<()> {
557 |         self.write_footer()?;
558 | 
559 |         Ok(())
560 |     }
561 | }
562 | 


--------------------------------------------------------------------------------
/src/hydra.rs:
--------------------------------------------------------------------------------
  1 | //! Interacting with hydra and the binary cache.
  2 | //!
  3 | //! This module has all functions that deal with accessing hydra or the binary cache.
  4 | //! Currently, it only provides two functions: `fetch_files` to get the file listing for
  5 | //! a store path and `fetch_references` to retrieve the references from the narinfo.
  6 | use std::collections::HashMap;
  7 | use std::fmt;
  8 | use std::io::{self, Read, Write};
  9 | use std::path::PathBuf;
 10 | use std::pin::Pin;
 11 | use std::result;
 12 | use std::str::{self, Utf8Error};
 13 | use std::time::{Duration, Instant};
 14 | 
 15 | use error_chain::error_chain;
 16 | use futures::future;
 17 | use futures::{Future, TryFutureExt};
 18 | use reqwest::header::{HeaderValue, ACCEPT_ENCODING};
 19 | use reqwest::Url;
 20 | use reqwest::{Client, ClientBuilder, StatusCode};
 21 | use serde::de::{Deserializer, MapAccess, Visitor};
 22 | use serde::{self, Deserialize};
 23 | use serde_bytes::ByteBuf;
 24 | use serde_json;
 25 | use tokio::time::error::Elapsed;
 26 | use tokio_retry::strategy::ExponentialBackoff;
 27 | use tokio_retry::{self, Retry};
 28 | use xz2::read::XzDecoder;
 29 | 
 30 | use crate::files::FileTree;
 31 | use crate::package::{PathOrigin, StorePath};
 32 | use crate::util;
 33 | 
 34 | error_chain! {
 35 |     errors {
 36 |         Http(url: String, code: StatusCode) {
 37 |             description("http status code error")
 38 |             display("request GET '{}' failed with HTTP error {}", url, code)
 39 |         }
 40 |         ParseResponse(url: String, tmp_file: Option<PathBuf>) {
 41 |             description("response parse error")
 42 |             display("response to GET '{}' failed to parse{}", url, tmp_file.as_ref().map_or("".into(), |f| format!(" (response saved to {})", f.to_string_lossy())))
 43 |         }
 44 |         ParseStorePath(url: String, path: String) {
 45 |             description("store path parse error")
 46 |             display("response to GET '{}' contained invalid store path '{}', expected string matching format $(NIX_STORE_DIR)$(HASH)-$(NAME)", url, path)
 47 |         }
 48 |         Unicode(url: String, bytes: Vec<u8>, err: Utf8Error) {
 49 |             description("unicode error")
 50 |             display("response to GET '{}' contained invalid unicode byte {}: {}", url, bytes[err.valid_up_to()], err)
 51 |         }
 52 |         Decode(url: String) {
 53 |             description("decoder error")
 54 |             display("response to GET '{}' could not be decoded", url)
 55 |         }
 56 |         UnsupportedEncoding(url: String, encoding: Option<String>) {
 57 |             description("unsupported content-encoding")
 58 |             display(
 59 |                 "response to GET '{}' had unsupported content-encoding ({})",
 60 |                 url,
 61 |                 encoding.as_ref().map_or("not present".to_string(), |v| format!("'{}'", v)),
 62 |             )
 63 |         }
 64 |         Timeout {
 65 |             description("timeout exceeded")
 66 |         }
 67 |         TimerError {
 68 |             description("timer failure")
 69 |         }
 70 |         ParseProxy(url: String) {
 71 |             description("proxy config error")
 72 |             display("Can not parse proxy url ({})", url)
 73 |         }
 74 |     }
 75 |     foreign_links {
 76 |         Reqwest(reqwest::Error);
 77 |     }
 78 | }
 79 | 
 80 | impl From<Elapsed> for Error {
 81 |     fn from(_err: Elapsed) -> Self {
 82 |         Error::from(ErrorKind::Timeout)
 83 |     }
 84 | }
 85 | 
 86 | /// A Fetcher allows you to make requests to Hydra/the binary cache.
 87 | ///
 88 | /// It holds all the relevant state for performing requests, such as for example
 89 | /// the HTTP client instance and a timer for timeouts.
 90 | ///
 91 | /// You should use a single instance of this struct to make all your hydra/binary cache
 92 | /// requests.
 93 | pub struct Fetcher {
 94 |     client: Client,
 95 |     cache_url: String,
 96 | }
 97 | 
 98 | const RESPONSE_TIMEOUT: Duration = Duration::from_secs(1);
 99 | const CONNECT_TIMEOUT: Duration = Duration::from_secs(10);
100 | 
101 | /// A boxed future using this module's error type.
102 | type BoxFuture<'a, I> = Pin<Box<dyn Future<Output = Result<I>> + 'a>>;
103 | 
104 | pub struct ParsedNAR {
105 |     pub store_path: StorePath,
106 |     pub nar_path: String,
107 |     pub references: Vec<StorePath>,
108 | }
109 | 
110 | impl Fetcher {
111 |     /// Initializes a new instance of the `Fetcher` struct.
112 |     ///
113 |     /// The `handle` argument is a Handle to the tokio event loop.
114 |     ///
115 |     /// `cache_url` specifies the URL of the binary cache (example: `https://cache.nixos.org`).
116 |     pub fn new(cache_url: String) -> Result<Fetcher> {
117 |         let client = ClientBuilder::new()
118 |             .connect_timeout(CONNECT_TIMEOUT)
119 |             .timeout(RESPONSE_TIMEOUT)
120 |             .build()?;
121 |         Ok(Fetcher { client, cache_url })
122 |     }
123 | 
124 |     /// Sends a GET request to the given URL and decodes the response with the given encoding.
125 |     ///
126 |     /// If `encoding` is `None`, then the encoding will be detected automatically by reading
127 |     /// the `Content-Encoding` header.
128 |     ///
129 |     /// The returned future resolves to `(url, None)` if the server returned a 404 error. On any
130 |     /// other error, the future resolves to an error. If the request was successful, it returns
131 |     /// `(url, Some(response_content))`.
132 |     ///
133 |     /// This function will automatically retry the request a few times to mitigate intermittent network
134 |     /// failures.
135 |     fn fetch(&self, url: String) -> BoxFuture<(String, Option<Vec<u8>>)> {
136 |         let strategy = ExponentialBackoff::from_millis(50)
137 |             .max_delay(Duration::from_millis(5000))
138 |             .take(20)
139 |             // add some jitter
140 |             .map(tokio_retry::strategy::jitter)
141 |             // wait at least 5 seconds, as that is the time that cache.nixos.org caches 500 internal server errors
142 |             .map(|x| x + Duration::from_secs(5));
143 |         Box::pin(Retry::spawn(strategy, move || {
144 |             Box::pin(self.fetch_noretry(url.clone()))
145 |         }))
146 |     }
147 | 
148 |     /// The implementation of `fetch`, without the retry logic.
149 |     async fn fetch_noretry(&self, url: String) -> Result<(String, Option<Vec<u8>>)> {
150 |         let uri = Url::parse(&url).expect("url passed to fetch must be valid");
151 |         let request = self
152 |             .client
153 |             .get(uri)
154 |             .header(
155 |                 ACCEPT_ENCODING,
156 |                 HeaderValue::from_static("br, gzip, deflate"),
157 |             )
158 |             .build()
159 |             .expect("HTTP request is valid");
160 | 
161 |         let res = self.client.execute(request).await?;
162 | 
163 |         let code = res.status();
164 | 
165 |         if code == StatusCode::NOT_FOUND {
166 |             return Ok((url, None));
167 |         }
168 | 
169 |         if !code.is_success() {
170 |             return Err(Error::from(ErrorKind::Http(url, code)));
171 |         }
172 | 
173 |         let decoded = res.bytes().await?.into();
174 | 
175 |         Ok((url, Some(decoded)))
176 |     }
177 | 
178 |     /// Fetches the references of a given store path.
179 |     ///
180 |     /// Returns the references of the store path and the store path itself. Note that this
181 |     /// function only requires the hash part of the store path that is passed as argument,
182 |     /// but it will return a full store path as a result. So you can use this function to
183 |     /// resolve hashes to full store paths as well.
184 |     ///
185 |     /// The references will be `None` if no information about the store path could be found
186 |     /// (happens if the narinfo wasn't found which means that hydra didn't build this path).
187 |     pub fn fetch_references(&self, mut path: StorePath) -> BoxFuture<Option<ParsedNAR>> {
188 |         let url = format!("{}/{}.narinfo", self.cache_url, path.hash());
189 | 
190 |         let parse_response = move |(url, data)| {
191 |             let url: String = url;
192 |             let data: Vec<u8> = match data {
193 |                 Some(v) => v,
194 |                 None => return Ok(None),
195 |             };
196 | 
197 |             let mut nar_path = None;
198 |             let mut result = Vec::new();
199 |             for line in data.split(|x| x == &b'\n') {
200 |                 if let Some(line) = line.strip_prefix(b"References: ") {
201 |                     let line = str::from_utf8(line)
202 |                         .map_err(|e| ErrorKind::Unicode(url.clone(), line.to_vec(), e))?;
203 |                     result = line
204 |                         .split_whitespace()
205 |                         .map(|new_path| {
206 |                             let new_origin = PathOrigin {
207 |                                 toplevel: false,
208 |                                 ..path.origin().into_owned()
209 |                             };
210 |                             StorePath::parse(new_origin, new_path).ok_or_else(|| {
211 |                                 ErrorKind::ParseStorePath(url.clone(), new_path.to_string()).into()
212 |                             })
213 |                         })
214 |                         .collect::<Result<Vec<_>>>()?;
215 |                 }
216 | 
217 |                 if let Some(line) = line.strip_prefix(b"StorePath: ") {
218 |                     let line = str::from_utf8(line)
219 |                         .map_err(|e| ErrorKind::Unicode(url.clone(), line.to_vec(), e))?;
220 |                     let line = line.trim();
221 | 
222 |                     path = StorePath::parse(path.origin().into_owned(), line)
223 |                         .ok_or_else(|| ErrorKind::ParseStorePath(url.clone(), line.to_string()))?;
224 |                 }
225 | 
226 |                 if let Some(line) = line.strip_prefix(b"URL: ") {
227 |                     let line = str::from_utf8(line)
228 |                         .map_err(|e| ErrorKind::Unicode(url.clone(), line.to_vec(), e))?;
229 |                     let line = line.trim();
230 | 
231 |                     nar_path = Some(line.to_owned());
232 |                 }
233 |             }
234 | 
235 |             Ok(Some(ParsedNAR {
236 |                 store_path: path,
237 |                 nar_path: nar_path
238 |                     .ok_or(ErrorKind::ParseStorePath(url, "no URL line found".into()))?,
239 |                 references: result,
240 |             }))
241 |         };
242 | 
243 |         Box::pin(
244 |             self.fetch(url)
245 |                 .and_then(|r| future::ready(parse_response(r))),
246 |         )
247 |     }
248 | 
249 |     /// Fetches the file listing for the given store path.
250 |     ///
251 |     /// A file listing is a tree of the files that the given store path contains.
252 |     pub async fn fetch_files<'a>(&self, path: &StorePath) -> Result<Option<FileTree>> {
253 |         let url_xz = format!("{}/{}.ls.xz", self.cache_url, path.hash());
254 |         let url_generic = format!("{}/{}.ls", self.cache_url, path.hash());
255 |         let name = format!("{}.json", path.hash());
256 | 
257 |         let (url, body) = self.fetch(url_generic).await?;
258 |         let contents = match body {
259 |             Some(v) => v,
260 |             None => {
261 |                 let (_, Some(body)) = self.fetch(url_xz.clone()).await? else {
262 |                     return Ok(None);
263 |                 };
264 | 
265 |                 let mut unpacked = vec![];
266 |                 XzDecoder::new(&body[..])
267 |                     .read_to_end(&mut unpacked)
268 |                     .map_err(|e| ErrorKind::Decode(e.to_string()))?;
269 | 
270 |                 unpacked
271 |             }
272 |         };
273 | 
274 |         let now = Instant::now();
275 |         let response: FileListingResponse =
276 |             serde_json::from_slice(&contents[..]).chain_err(|| {
277 |                 ErrorKind::ParseResponse(url, util::write_temp_file("file_listing.json", &contents))
278 |             })?;
279 |         let duration = now.elapsed();
280 | 
281 |         if duration > Duration::from_millis(2000) {
282 |             let secs = duration.as_secs();
283 |             let millis = duration.subsec_millis();
284 | 
285 |             writeln!(
286 |                 &mut io::stderr(),
287 |                 "warning: took a long time to parse: {}s:{:03}ms",
288 |                 secs,
289 |                 millis
290 |             )
291 |             .unwrap_or(());
292 |             if let Some(p) = util::write_temp_file(&name, &contents) {
293 |                 writeln!(
294 |                     &mut io::stderr(),
295 |                     "saved response to file: {}",
296 |                     p.to_string_lossy()
297 |                 )
298 |                 .unwrap_or(());
299 |             }
300 |         }
301 | 
302 |         Ok(Some(response.root.0))
303 |     }
304 | }
305 | 
306 | /// This data type represents the format of the `.ls` files fetched from the binary cache.
307 | ///
308 | /// The `.ls` file contains a JSON object. The structure of that object is mirrored by this
309 | /// struct for parsing the file.
310 | #[derive(Deserialize, Debug, PartialEq)]
311 | struct FileListingResponse {
312 |     /// Each `.ls` file has a "root" key that contains the file listing.
313 |     root: HydraFileListing,
314 | }
315 | 
316 | /// A wrapper for `FileTree` so that we can add trait implementations for it.
317 | ///
318 | /// (`FileTree` is defined in another module, so we cannot directly implement `Deserialize` for
319 | /// `FileTree` since that would be an orphan impl).
320 | #[derive(Debug, PartialEq)]
321 | struct HydraFileListing(FileTree);
322 | 
323 | /// We need a manual implementation for Deserialize here because file lisitings can contain non-unicode
324 | /// bytes so we need to explicitly request that keys be deserialized as `ByteBuf` and not String.
325 | ///
326 | /// We cannot use the serde-derive machinery because the `tagged` enum variant does not support map keys
327 | /// that aren't valid unicode (since it relies on the Deserializer to tell it the type, and the JSON Deserializer
328 | /// will default to String for map keys).
329 | impl<'de> Deserialize<'de> for HydraFileListing {
330 |     fn deserialize<D: Deserializer<'de>>(d: D) -> result::Result<HydraFileListing, D::Error> {
331 |         struct Root;
332 | 
333 |         // The access that implements derialization for a file tree
334 |         impl<'de> Visitor<'de> for Root {
335 |             type Value = FileTree;
336 | 
337 |             fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result {
338 |                 write!(f, "a file listing (map)")
339 |             }
340 | 
341 |             fn visit_map<V: MapAccess<'de>>(
342 |                 self,
343 |                 mut access: V,
344 |             ) -> result::Result<FileTree, V::Error> {
345 |                 const VARIANTS: &[&str] = &["regular", "directory", "symlink"];
346 | 
347 |                 // These will get filled in as we visit the map.
348 |                 // Note that not all of them will be available, depending on the `type` of the file listing
349 |                 // (`directory`, `symlink` or `regular`)
350 |                 let mut typ: Option<ByteBuf> = None;
351 |                 let mut size: Option<u64> = None;
352 |                 let mut executable: Option<bool> = None;
353 |                 let mut entries: Option<HashMap<ByteBuf, HydraFileListing>> = None;
354 |                 let mut target: Option<ByteBuf> = None;
355 | 
356 |                 while let Some(key) = access.next_key::<ByteBuf>()? {
357 |                     match &key as &[u8] {
358 |                         b"type" => {
359 |                             if typ.is_some() {
360 |                                 return Err(serde::de::Error::duplicate_field("type"));
361 |                             }
362 |                             typ = Some(access.next_value()?)
363 |                         }
364 |                         b"size" => {
365 |                             if size.is_some() {
366 |                                 return Err(serde::de::Error::duplicate_field("size"));
367 |                             }
368 |                             size = Some(access.next_value()?)
369 |                         }
370 |                         b"executable" => {
371 |                             if executable.is_some() {
372 |                                 return Err(serde::de::Error::duplicate_field("executable"));
373 |                             }
374 |                             executable = Some(access.next_value()?)
375 |                         }
376 |                         b"entries" => {
377 |                             if entries.is_some() {
378 |                                 return Err(serde::de::Error::duplicate_field("entries"));
379 |                             }
380 |                             entries = Some(access.next_value()?)
381 |                         }
382 |                         b"target" => {
383 |                             if target.is_some() {
384 |                                 return Err(serde::de::Error::duplicate_field("target"));
385 |                             }
386 |                             target = Some(access.next_value()?)
387 |                         }
388 |                         _ => {
389 |                             // We ignore all other fields to be more robust against changes in
390 |                             // the format
391 |                             access.next_value::<serde::de::IgnoredAny>()?;
392 |                         }
393 |                     }
394 |                 }
395 | 
396 |                 // the type field must always be present so we know which type to expect
397 |                 let typ: &[u8] = &typ.ok_or_else(|| serde::de::Error::missing_field("type"))?;
398 | 
399 |                 match typ {
400 |                     b"regular" => {
401 |                         let size = size.ok_or_else(|| serde::de::Error::missing_field("size"))?;
402 |                         let executable = executable.unwrap_or(false);
403 |                         Ok(FileTree::regular(size, executable))
404 |                     }
405 |                     b"directory" => {
406 |                         let entries =
407 |                             entries.ok_or_else(|| serde::de::Error::missing_field("entries"))?;
408 |                         let entries = entries.into_iter().map(|(k, v)| (k, v.0)).collect();
409 |                         Ok(FileTree::directory(entries))
410 |                     }
411 |                     b"symlink" => {
412 |                         let target =
413 |                             target.ok_or_else(|| serde::de::Error::missing_field("target"))?;
414 |                         Ok(FileTree::symlink(target))
415 |                     }
416 |                     _ => Err(serde::de::Error::unknown_variant(
417 |                         &String::from_utf8_lossy(typ),
418 |                         VARIANTS,
419 |                     )),
420 |                 }
421 |             }
422 |         }
423 |         d.deserialize_map(Root).map(HydraFileListing)
424 |     }
425 | }
426 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![cfg_attr(
 2 |     feature = "cargo-clippy",
 3 |     warn(
 4 |         clippy::manual_filter_map,
 5 |         clippy::map_unwrap_or,
 6 |         clippy::module_name_repetitions,
 7 |         clippy::print_stdout,
 8 |         clippy::unwrap_used,
 9 |     )
10 | )]
11 | 
12 | pub mod database;
13 | pub mod errors;
14 | pub mod files;
15 | pub mod frcode;
16 | pub mod hydra;
17 | pub mod listings;
18 | pub mod nixpkgs;
19 | pub mod package;
20 | pub mod util;
21 | pub mod workset;
22 | 
23 | /// The URL of the binary cache that we use to fetch file listings and references.
24 | ///
25 | /// Hardcoded for now, but may be made a configurable option in the future.
26 | pub const CACHE_URL: &str = "https://cache.nixos.org";
27 | 


--------------------------------------------------------------------------------
/src/listings.rs:
--------------------------------------------------------------------------------
  1 | use std::fs::File;
  2 | use std::io;
  3 | use std::iter::FromIterator;
  4 | 
  5 | use futures::{Stream, StreamExt, TryFutureExt};
  6 | use indexmap::map::Entry;
  7 | use indexmap::IndexMap;
  8 | use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
  9 | 
 10 | use crate::errors::{Error, ErrorKind, Result, ResultExt};
 11 | use crate::files::FileTree;
 12 | use crate::hydra::Fetcher;
 13 | use crate::nixpkgs;
 14 | use crate::package::StorePath;
 15 | use crate::workset::{WorkSet, WorkSetHandle, WorkSetWatch};
 16 | 
 17 | // We also add some additional sets that only show up in `nix-env -qa -A someSet`.
 18 | //
 19 | // Some of these sets are not build directly by hydra. We still include them here
 20 | // since parts of these sets may be build as dependencies of other packages
 21 | // that are build by hydra. This way, our attribute path information is more
 22 | // accurate.
 23 | //
 24 | // We only need sets that are not marked "recurseIntoAttrs" here, since if they are,
 25 | // they are already part of normal_paths.
 26 | pub const EXTRA_SCOPES: [&str; 6] = [
 27 |     "xorg",
 28 |     "haskellPackages",
 29 |     "rPackages",
 30 |     "nodePackages",
 31 |     "coqPackages",
 32 |     "texlive.pkgs",
 33 | ];
 34 | 
 35 | /// A stream of store paths (packages) with their associated file listings.
 36 | ///
 37 | /// If a store path has no file listing (for example, because it is not built by hydra),
 38 | /// the file listing will be `None` instead.
 39 | pub trait FileListingStream: Stream<Item = Result<Option<(StorePath, String, FileTree)>>> {}
 40 | impl<T> FileListingStream for T where T: Stream<Item = Result<Option<(StorePath, String, FileTree)>>>
 41 | {}
 42 | 
 43 | /// Fetches all the file listings for the full closure of the given starting set of path.
 44 | ///
 45 | /// This function will fetch the file listings of each path in the starting set. Additionally, it
 46 | /// will also determine the references of each path and recursively fetch the file listings for those
 47 | /// paths.
 48 | ///
 49 | /// The `jobs` argument is used to specify how many requests should be done in parallel. No more than
 50 | /// `jobs` requests will be in-flight at any given time.
 51 | fn fetch_listings_impl(
 52 |     fetcher: &Fetcher,
 53 |     jobs: usize,
 54 |     starting_set: Vec<StorePath>,
 55 | ) -> (impl FileListingStream + '_, WorkSetWatch) {
 56 |     // Create the queue that will hold all the paths that still need processing.
 57 |     // Initially, only the starting set needs processing.
 58 | 
 59 |     // We can't use FromIterator here as we want shorter paths to win
 60 |     let mut map: IndexMap<String, StorePath> = IndexMap::with_capacity(starting_set.len());
 61 | 
 62 |     for path in starting_set {
 63 |         let hash = path.hash().into();
 64 |         match map.entry(hash) {
 65 |             Entry::Occupied(mut e) => {
 66 |                 if e.get().origin().attr.len() > path.origin().attr.len() {
 67 |                     e.insert(path);
 68 |                 }
 69 |             }
 70 |             Entry::Vacant(e) => {
 71 |                 e.insert(path);
 72 |             }
 73 |         };
 74 |     }
 75 | 
 76 |     let workset = WorkSet::from_queue(map);
 77 | 
 78 |     // Processes a single store path, fetching the file listing for it and
 79 |     // adding its references to the queue
 80 |     let process = move |mut handle: WorkSetHandle<_, _>, path: StorePath| async move {
 81 |         let Some(parsed) = fetcher
 82 |             .fetch_references(path.clone())
 83 |             .map_err(|e| Error::with_chain(e, ErrorKind::FetchReferences(path)))
 84 |             .await?
 85 |         else {
 86 |             return Ok(None);
 87 |         };
 88 | 
 89 |         for reference in parsed.references {
 90 |             let hash = reference.hash().into_owned();
 91 |             handle.add_work(hash, reference);
 92 |         }
 93 | 
 94 |         let path = parsed.store_path.clone();
 95 |         let nar_path = parsed.nar_path;
 96 | 
 97 |         match fetcher.fetch_files(&parsed.store_path).await {
 98 |             Err(e) => Err(Error::with_chain(e, ErrorKind::FetchFiles(path))),
 99 |             Ok(Some(files)) => Ok(Some((path, nar_path, files))),
100 |             Ok(None) => Ok(None),
101 |         }
102 |     };
103 | 
104 |     // Process all paths in the queue, until the queue becomes empty.
105 |     let watch = workset.watch();
106 |     let stream = workset
107 |         .map(move |(handle, path)| process(handle, path))
108 |         .buffer_unordered(jobs);
109 |     (stream, watch)
110 | }
111 | 
112 | /// Tries to load the file listings for all paths from a cache file named `paths.cache`.
113 | ///
114 | /// This function is used to implement the `--path-cache` option.
115 | pub fn try_load_paths_cache() -> Result<Option<(impl FileListingStream, WorkSetWatch)>> {
116 |     let file = match File::open("paths.cache") {
117 |         Ok(file) => file,
118 |         Err(ref e) if e.kind() == io::ErrorKind::NotFound => return Ok(None),
119 |         Err(e) => return Err(e).chain_err(|| ErrorKind::LoadPathsCache)?,
120 |     };
121 | 
122 |     let mut input = io::BufReader::new(file);
123 |     let fetched: Vec<(StorePath, String, FileTree)> =
124 |         bincode::deserialize_from(&mut input).chain_err(|| ErrorKind::LoadPathsCache)?;
125 |     let workset = WorkSet::from_iter(
126 |         fetched
127 |             .into_iter()
128 |             .map(|(path, nar, tree)| (path.hash().to_string(), Some((path, nar, tree)))),
129 |     );
130 |     let watch = workset.watch();
131 |     let stream = workset.map(|r| {
132 |         let (_handle, v) = r;
133 |         Ok(v)
134 |     });
135 | 
136 |     Ok(Some((stream, watch)))
137 | }
138 | 
139 | pub fn fetch_listings<'a>(
140 |     fetcher: &'a Fetcher,
141 |     jobs: usize,
142 |     nixpkgs: &str,
143 |     systems: Vec<Option<&str>>,
144 |     show_trace: bool,
145 | ) -> Result<(impl FileListingStream + 'a, WorkSetWatch)> {
146 |     let mut scopes = vec![None];
147 |     scopes.extend(EXTRA_SCOPES.map(Some));
148 | 
149 |     let mut all_queries = vec![];
150 |     for system in systems {
151 |         for scope in &scopes {
152 |             all_queries.push((system, scope));
153 |         }
154 |     }
155 | 
156 |     // Collect results in parallel.
157 |     let all_paths = all_queries
158 |         .par_iter()
159 |         .flat_map_iter(|&(system, scope)| {
160 |             nixpkgs::query_packages(nixpkgs, system, scope.as_deref(), show_trace)
161 |                 .map(|x| x.chain_err(|| ErrorKind::QueryPackages))
162 |         })
163 |         .collect::<Result<_>>()?;
164 | 
165 |     Ok(fetch_listings_impl(fetcher, jobs, all_paths))
166 | }
167 | 


--------------------------------------------------------------------------------
/src/nixpkgs.rs:
--------------------------------------------------------------------------------
  1 | //! Read package information from nix-env.
  2 | //!
  3 | //! This module implements the gathering of initial set of root store paths to fetch.
  4 | //! We parse the output `nix-env --query` to figure out all accessible store paths with their attribute path
  5 | //! and hashes.
  6 | use std::error;
  7 | use std::fmt;
  8 | use std::io::{self, Read};
  9 | use std::process::{Child, ChildStdout, Command, Stdio};
 10 | 
 11 | use xml;
 12 | use xml::common::{Position, TextPosition};
 13 | use xml::reader::{EventReader, XmlEvent};
 14 | 
 15 | use crate::package::{PathOrigin, StorePath};
 16 | 
 17 | /// Calls `nix-env` to list the packages in the given nixpkgs.
 18 | ///
 19 | /// The `nixpkgs` argument can either be a path to a nixpkgs checkout or another expression
 20 | /// accepted by `nix-env -f`, such as `<nixpkgs>` or `http://example.org/nixpkgs.tar.bz`.
 21 | ///
 22 | /// If system is `Some(platform)`, nix-env is called with the `--argstr system <platform>` argument so that
 23 | /// the specified platform would be used instead of the default host system platform.
 24 | ///
 25 | /// If scope is `Some(attr)`, nix-env is called with the `-A attr` argument so only packages that are a member
 26 | /// of `attr` are returned.
 27 | ///
 28 | /// The function returns an Iterator over the packages returned by nix-env.
 29 | pub fn query_packages(
 30 |     nixpkgs: &str,
 31 |     system: Option<&str>,
 32 |     scope: Option<&str>,
 33 |     show_trace: bool,
 34 | ) -> PackagesQuery<ChildStdout> {
 35 |     let mut cmd = Command::new("nix-env");
 36 |     cmd.arg("-qaP")
 37 |         .arg("--out-path")
 38 |         .arg("--xml")
 39 |         .arg("--arg")
 40 |         .arg("config")
 41 |         .arg("{ allowAliases = false; }") // override default nixpkgs config discovery
 42 |         .arg("--arg")
 43 |         .arg("overlays")
 44 |         .arg("[ ]")
 45 |         .arg("--file")
 46 |         .arg(nixpkgs)
 47 |         .stdout(Stdio::piped())
 48 |         .stderr(Stdio::piped())
 49 |         .stdin(Stdio::null());
 50 | 
 51 |     if let Some(system) = system {
 52 |         cmd.arg("--argstr").arg("system").arg(system);
 53 |     }
 54 | 
 55 |     if let Some(scope) = scope {
 56 |         cmd.arg("-A").arg(scope);
 57 |     }
 58 | 
 59 |     if show_trace {
 60 |         cmd.arg("--show-trace");
 61 |     }
 62 | 
 63 |     PackagesQuery {
 64 |         parser: None,
 65 |         child: None,
 66 |         cmd: Some(cmd),
 67 |     }
 68 | }
 69 | 
 70 | /// An iterator that parses the output of nix-env and returns parsed store paths.
 71 | ///
 72 | /// Use `query_packages` to create a value of this type.
 73 | pub struct PackagesQuery<R: Read> {
 74 |     parser: Option<PackagesParser<R>>,
 75 |     child: Option<Child>,
 76 |     cmd: Option<Command>,
 77 | }
 78 | 
 79 | impl PackagesQuery<ChildStdout> {
 80 |     /// Spawns the nix-env subprocess and initializes the parser.
 81 |     ///
 82 |     /// If the subprocess was already spawned, does nothing.
 83 |     fn ensure_initialized(&mut self) -> Result<(), Error> {
 84 |         if let Some(mut cmd) = self.cmd.take() {
 85 |             let mut child = cmd.spawn()?;
 86 | 
 87 |             let stdout = child.stdout.take().expect("should have stdout pipe");
 88 |             let parser = PackagesParser::new(stdout);
 89 | 
 90 |             self.child = Some(child);
 91 |             self.parser = Some(parser);
 92 |         }
 93 |         Ok(())
 94 |     }
 95 | 
 96 |     /// Waits for the subprocess to exit and checks whether it has returned a non-zero exit code
 97 |     /// (= failed with an error).
 98 |     ///
 99 |     /// If the exit code was non-zero, returns Some(err), else it returns None.
100 |     fn check_error(&mut self) -> Option<Error> {
101 |         let mut run = || {
102 |             let child = match self.child.take() {
103 |                 Some(c) => c,
104 |                 None => return Ok(()),
105 |             };
106 |             let result = child.wait_with_output()?;
107 | 
108 |             if !result.status.success() {
109 |                 let message = String::from_utf8_lossy(&result.stderr);
110 | 
111 |                 return Err(Error::Command(format!(
112 |                     "nix-env failed with {}:\n{}",
113 |                     result.status, message,
114 |                 )));
115 |             }
116 | 
117 |             Ok(())
118 |         };
119 | 
120 |         run().err()
121 |     }
122 | }
123 | 
124 | impl Iterator for PackagesQuery<ChildStdout> {
125 |     type Item = Result<StorePath, Error>;
126 | 
127 |     fn next(&mut self) -> Option<Self::Item> {
128 |         if let Err(e) = self.ensure_initialized() {
129 |             return Some(Err(e));
130 |         }
131 |         self.parser.take().and_then(|mut parser| {
132 |             parser
133 |                 .next()
134 |                 .map(|v| {
135 |                     self.parser = Some(parser);
136 |                     // When the parser throws an error, we first wait for the subprocess to exit.
137 |                     //
138 |                     // If the subprocess returned an error, then the parser probably tried to parse garbage output
139 |                     // so we will ignore the parser error and instead return the error printed by the subprocess.
140 |                     v.map_err(|e| self.check_error().unwrap_or_else(|| Error::from(e)))
141 |                 })
142 |                 .or_else(|| {
143 |                     self.parser = None;
144 |                     // At the end, we should check if the subprocess exited successfully.
145 |                     self.check_error().map(Err)
146 |                 })
147 |         })
148 |     }
149 | }
150 | 
151 | /// Parses the XML output of `nix-env` and returns individual store paths.
152 | struct PackagesParser<R: Read> {
153 |     events: EventReader<R>,
154 |     current_item: Option<(String, String)>,
155 | }
156 | 
157 | /// A parser error that may occur during parsing `nix-env`'s output.
158 | #[derive(Debug)]
159 | pub struct ParserError {
160 |     position: TextPosition,
161 |     kind: ParserErrorKind,
162 | }
163 | 
164 | /// Enumerates all possible error kinds that may occur during parsing.
165 | #[derive(Debug)]
166 | pub enum ParserErrorKind {
167 |     /// Found an element with the tag `element_name` that should only occur inside
168 |     /// elements with the tag `expected_parent` but it occurred as child of a different parent.
169 |     MissingParent {
170 |         element_name: String,
171 |         expected_parent: String,
172 |     },
173 | 
174 |     /// An element occurred as a child of `found_parent`, but
175 |     /// we know that elements with the tag `element_name` should never have that as
176 |     /// a parent.
177 |     ParentNotAllowed {
178 |         element_name: String,
179 |         found_parent: String,
180 |     },
181 | 
182 |     /// The required attribute `attribute_name` was missing on an element with the tag `element_name`.
183 |     MissingAttribute {
184 |         element_name: String,
185 |         attribute_name: String,
186 |     },
187 | 
188 |     /// Found the end tag for `element_name` without a matching start tag.
189 |     MissingStartTag { element_name: String },
190 | 
191 |     /// An XML syntax error.
192 |     XmlError { error: xml::reader::Error },
193 | 
194 |     /// A store path in the output of `nix-env` could not be parsed. All valid store paths
195 |     /// need to match the format `$(STOREDIR)$(HASH)-$(NAME)`.
196 |     InvalidStorePath { path: String },
197 | }
198 | 
199 | impl fmt::Display for ParserError {
200 |     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
201 |         use self::ParserErrorKind::*;
202 |         write!(f, "error at {}: ", self.position)?;
203 |         match self.kind {
204 |             MissingParent {
205 |                 ref element_name,
206 |                 ref expected_parent,
207 |             } => {
208 |                 write!(
209 |                     f,
210 |                     "element {} appears outside of expected parent {}",
211 |                     element_name, expected_parent
212 |                 )
213 |             }
214 |             ParentNotAllowed {
215 |                 ref element_name,
216 |                 ref found_parent,
217 |             } => {
218 |                 write!(
219 |                     f,
220 |                     "element {} must not appear as child of {}",
221 |                     element_name, found_parent
222 |                 )
223 |             }
224 |             MissingAttribute {
225 |                 ref element_name,
226 |                 ref attribute_name,
227 |             } => {
228 |                 write!(
229 |                     f,
230 |                     "element {} must have an attribute named {}",
231 |                     element_name, attribute_name
232 |                 )
233 |             }
234 |             MissingStartTag { ref element_name } => {
235 |                 write!(f, "element {} does not have a start tag", element_name)
236 |             }
237 |             XmlError { ref error } => write!(f, "document not well-formed: {}", error),
238 |             InvalidStorePath { ref path } => {
239 |                 write!(
240 |                     f,
241 |                     "store path does not match expected format /prefix/hash-name: {}",
242 |                     path
243 |                 )
244 |             }
245 |         }
246 |     }
247 | }
248 | 
249 | impl<R: Read> PackagesParser<R> {
250 |     /// Creates a new parser that reads the `nix-env` XML output from the given reader.
251 |     pub fn new(reader: R) -> PackagesParser<R> {
252 |         PackagesParser {
253 |             events: EventReader::new(reader),
254 |             current_item: None,
255 |         }
256 |     }
257 | 
258 |     /// Shorthand for exiting with an error at the current position.
259 |     fn err(&self, kind: ParserErrorKind) -> ParserError {
260 |         ParserError {
261 |             position: self.events.position(),
262 |             kind,
263 |         }
264 |     }
265 | 
266 |     /// Tries to read the next `StorePath` from the reader or fail with an error
267 |     /// if there was a parse failure.
268 |     ///
269 |     /// Returns Ok(None) if the end of the stream was reached.
270 |     ///
271 |     /// This function is like `.next` from `Iterator`, but allows us to use `try! / ?` since it
272 |     /// returns `Result<Option<...>, ...>` instead of `Option<Result<..., ...>>`.
273 |     fn next_err(&mut self) -> Result<Option<StorePath>, ParserError> {
274 |         use self::ParserErrorKind::*;
275 |         use self::XmlEvent::*;
276 | 
277 |         loop {
278 |             let event = self
279 |                 .events
280 |                 .next()
281 |                 .map_err(|e| self.err(XmlError { error: e }))?;
282 |             match event {
283 |                 StartElement {
284 |                     name: element_name,
285 |                     attributes,
286 |                     ..
287 |                 } => {
288 |                     if element_name.local_name == "item" {
289 |                         if self.current_item.is_some() {
290 |                             return Err(self.err(ParentNotAllowed {
291 |                                 element_name: "item".to_string(),
292 |                                 found_parent: "item".to_string(),
293 |                             }));
294 |                         }
295 | 
296 |                         let mut attr_path = None;
297 |                         let mut system = None;
298 | 
299 |                         for attr in attributes {
300 |                             if attr.name.local_name == "attrPath" {
301 |                                 attr_path = Some(attr.value);
302 |                                 continue;
303 |                             }
304 | 
305 |                             if attr.name.local_name == "system" {
306 |                                 system = Some(attr.value);
307 |                                 continue;
308 |                             }
309 |                         }
310 | 
311 |                         let attr_path = attr_path.ok_or_else(|| {
312 |                             self.err(MissingAttribute {
313 |                                 element_name: "item".into(),
314 |                                 attribute_name: "attrPath".into(),
315 |                             })
316 |                         })?;
317 | 
318 |                         let system = system.ok_or_else(|| {
319 |                             self.err(MissingAttribute {
320 |                                 element_name: "item".into(),
321 |                                 attribute_name: "system".into(),
322 |                             })
323 |                         })?;
324 | 
325 |                         self.current_item = Some((attr_path, system));
326 |                         continue;
327 |                     }
328 | 
329 |                     if element_name.local_name == "output" {
330 |                         if let Some((item, system)) = self.current_item.clone() {
331 |                             let mut output_name = None;
332 |                             let mut output_path = None;
333 | 
334 |                             for attr in attributes {
335 |                                 if attr.name.local_name == "name" {
336 |                                     output_name = Some(attr.value);
337 |                                     continue;
338 |                                 }
339 | 
340 |                                 if attr.name.local_name == "path" {
341 |                                     output_path = Some(attr.value);
342 |                                     continue;
343 |                                 }
344 |                             }
345 | 
346 |                             let output_name = output_name.ok_or_else(|| {
347 |                                 self.err(MissingAttribute {
348 |                                     element_name: "output".into(),
349 |                                     attribute_name: "name".into(),
350 |                                 })
351 |                             })?;
352 | 
353 |                             let output_path = output_path.ok_or_else(|| {
354 |                                 self.err(MissingAttribute {
355 |                                     element_name: "output".into(),
356 |                                     attribute_name: "path".into(),
357 |                                 })
358 |                             })?;
359 | 
360 |                             let origin = PathOrigin {
361 |                                 attr: item,
362 |                                 output: output_name,
363 |                                 toplevel: true,
364 |                                 system: Some(system),
365 |                             };
366 |                             let store_path = StorePath::parse(origin, &output_path);
367 |                             let store_path = store_path
368 |                                 .ok_or_else(|| self.err(InvalidStorePath { path: output_path }))?;
369 | 
370 |                             return Ok(Some(store_path));
371 |                         } else {
372 |                             return Err(self.err(MissingParent {
373 |                                 element_name: "output".into(),
374 |                                 expected_parent: "item".into(),
375 |                             }));
376 |                         }
377 |                     }
378 |                 }
379 | 
380 |                 EndElement { name: element_name } => {
381 |                     if element_name.local_name == "item" {
382 |                         if self.current_item.is_none() {
383 |                             return Err(self.err(MissingStartTag {
384 |                                 element_name: "item".into(),
385 |                             }));
386 |                         }
387 |                         self.current_item = None
388 |                     }
389 |                 }
390 | 
391 |                 EndDocument => break,
392 | 
393 |                 _ => {}
394 |             }
395 |         }
396 | 
397 |         Ok(None)
398 |     }
399 | }
400 | 
401 | impl<R: Read> Iterator for PackagesParser<R> {
402 |     type Item = Result<StorePath, ParserError>;
403 | 
404 |     fn next(&mut self) -> Option<Result<StorePath, ParserError>> {
405 |         match self.next_err() {
406 |             Err(e) => Some(Err(e)),
407 |             Ok(Some(i)) => Some(Ok(i)),
408 |             Ok(None) => None,
409 |         }
410 |     }
411 | }
412 | 
413 | /// Enumeration of all the possible errors that may happen during querying the packages.
414 | #[derive(Debug)]
415 | pub enum Error {
416 |     /// Parsing of the output failed
417 |     Parse(ParserError),
418 | 
419 |     /// An IO error occurred
420 |     Io(io::Error),
421 | 
422 |     /// nix-env failed with an error message
423 |     Command(String),
424 | }
425 | 
426 | impl error::Error for Error {
427 |     fn description(&self) -> &str {
428 |         match *self {
429 |             Error::Parse(_) => "nix-env output parse error",
430 |             Error::Io(_) => "io error",
431 |             Error::Command(_) => "nix-env error",
432 |         }
433 |     }
434 | }
435 | 
436 | impl fmt::Display for Error {
437 |     fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
438 |         use self::Error::*;
439 |         match *self {
440 |             Parse(ref e) => write!(f, "parsing XML output of nix-env failed: {}", e),
441 |             Io(ref e) => write!(f, "IO error: {}", e),
442 |             Command(ref e) => write!(f, "nix-env failed with error: {}", e),
443 |         }
444 |     }
445 | }
446 | 
447 | impl From<io::Error> for Error {
448 |     fn from(err: io::Error) -> Error {
449 |         Error::Io(err)
450 |     }
451 | }
452 | 
453 | impl From<ParserError> for Error {
454 |     fn from(err: ParserError) -> Error {
455 |         Error::Parse(err)
456 |     }
457 | }
458 | 


--------------------------------------------------------------------------------
/src/package.rs:
--------------------------------------------------------------------------------
  1 | //! Data types for representing meta information about packages and store paths.
  2 | //!
  3 | //! The main data type in this `StorePath`, which represents a single output of
  4 | //! some nix derivation. We also sometimes call a `StorePath` a package, to avoid
  5 | //! confusion with file paths.
  6 | use std::borrow::Cow;
  7 | use std::io::{self, Write};
  8 | use std::str;
  9 | 
 10 | use serde::{Deserialize, Serialize};
 11 | 
 12 | /// A type for describing how to reach a given store path.
 13 | ///
 14 | /// When building an index, we collect store paths from various sources, such
 15 | /// as the output of nix-env -qa and the references of those store paths.
 16 | ///
 17 | /// To show the user how we reached a given store path, each store path tracks
 18 | /// its origin. For example, for top-level store paths, we know which attribute
 19 | /// of nixpkgs builds this store path.
 20 | #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
 21 | pub struct PathOrigin {
 22 |     /// The attribute of nixpkgs that lead to this store path being discovered.
 23 |     ///
 24 |     /// If the store path is a top-level path, then the store path corresponds
 25 |     /// to an output of the derivation assigned to this attribute path.
 26 |     pub attr: String,
 27 | 
 28 |     /// The output of the derivation specified by `attr` that we want to refer to.
 29 |     ///
 30 |     /// If a derivation does not support multiple outputs, then this should just be "out",
 31 |     /// the default output.
 32 |     pub output: String,
 33 | 
 34 |     /// Indicates that this path is listed in the output of nix-env -qaP --out-name.
 35 |     ///
 36 |     /// We may index paths for which we do not know the exact attribute path. In this
 37 |     /// case, `attr` and `output` will be set to the values for the top-level path that
 38 |     /// contains the path in its closure. (This is also how we discovered the path in the
 39 |     /// first place: through being referenced by another, top-level path). It is unspecified
 40 |     /// which top-level path they will refer to though if there exist multiple ones whose closure
 41 |     /// contains this path.
 42 |     pub toplevel: bool,
 43 | 
 44 |     /// Target system
 45 |     pub system: Option<String>,
 46 | }
 47 | 
 48 | impl PathOrigin {
 49 |     /// Encodes a path origin as a sequence of bytes, such that it can be decoed using `decode`.
 50 |     ///
 51 |     /// The encoding does not use the bytes `0x00` nor `0x01`, as long as neither `attr` nor `output`
 52 |     /// contain them. This is important since it allows the result to be encoded with [frcode](mod.frcode.html).
 53 |     ///
 54 |     /// # Panics
 55 |     ///
 56 |     /// The `attr` and `output` of the path origin must not contain the byte value `0x02`, otherwise
 57 |     /// this function panics.
 58 |     ///
 59 |     /// # Errors
 60 |     ///
 61 |     /// Returns any errors that were encountered while writing to the supplied `Writer`.
 62 |     pub fn encode<W: Write>(&self, writer: &mut W) -> io::Result<()> {
 63 |         assert!(
 64 |             !self.attr.contains('\x02'),
 65 |             "origin attribute path must not contain the byte value 0x02 anywhere"
 66 |         );
 67 |         assert!(
 68 |             !self.output.contains('\x02'),
 69 |             "origin output name must not contain the byte value 0x02 aynwhere"
 70 |         );
 71 |         write!(
 72 |             writer,
 73 |             "{}\x02{}{}",
 74 |             self.attr,
 75 |             self.output,
 76 |             if self.toplevel { "" } else { "\x02" }
 77 |         )?;
 78 |         Ok(())
 79 |     }
 80 | 
 81 |     /// Decodes a path that was encoded by `encode` function of this trait.
 82 |     ///
 83 |     /// Returns the decoded path origin, or `None` if `buf` could not be decoded as path origin.
 84 |     pub fn decode(buf: &[u8]) -> Option<PathOrigin> {
 85 |         let mut iter = buf.splitn(2, |c| *c == b'\x02');
 86 |         iter.next()
 87 |             .and_then(|v| String::from_utf8(v.to_vec()).ok())
 88 |             .and_then(|attr| {
 89 |                 iter.next()
 90 |                     .and_then(|v| String::from_utf8(v.to_vec()).ok())
 91 |                     .map(|mut output| {
 92 |                         let mut toplevel = true;
 93 |                         if let Some(l) = output.pop() {
 94 |                             if l == '\x02' {
 95 |                                 toplevel = false
 96 |                             } else {
 97 |                                 output.push(l)
 98 |                             }
 99 |                         }
100 |                         PathOrigin {
101 |                             attr,
102 |                             output,
103 |                             toplevel,
104 |                             system: None,
105 |                         }
106 |                     })
107 |             })
108 |     }
109 | }
110 | 
111 | /// Represents a store path which is something that is produced by `nix-build`.
112 | ///
113 | /// A store path represents an output in the nix store, matching the pattern
114 | /// `store_dir/hash-name` (most often, `store_dir` will be `/nix/store`).
115 | ///
116 | /// Using nix, a store path can be produced by calling `nix-build`.
117 | ///
118 | /// Note that even if a store path is a directory, the files inside that directory
119 | /// themselves are *not* store paths. For example, while the following is a store path:
120 | ///
121 | /// ```text
122 | /// /nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5
123 | /// ````
124 | ///
125 | /// while this is not:
126 | ///
127 | /// ```text
128 | /// /nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5/bin/
129 | /// ```
130 | ///
131 | /// To avoid any confusion with file paths, we sometimes also refer to a store path as a *package*.
132 | #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
133 | pub struct StorePath {
134 |     store_dir: String,
135 |     hash: String,
136 |     name: String,
137 |     origin: PathOrigin,
138 | }
139 | 
140 | impl StorePath {
141 |     /// Parse a store path from an absolute file path.
142 |     ///
143 |     /// Since this function does not know where that path comes from, it takes
144 |     /// `origin` as an argument.
145 |     ///
146 |     /// This function returns `None` if the path could not be parsed as a
147 |     /// store path. You should not rely on that to check whether a path is a store
148 |     /// path though, since it only does minimal validation (for one example, it does
149 |     /// not check the length of the hash).
150 |     pub fn parse(origin: PathOrigin, path: &str) -> Option<StorePath> {
151 |         let mut parts = path.splitn(2, '-');
152 |         parts.next().and_then(|prefix| {
153 |             parts.next().and_then(|name| {
154 |                 let mut iter = prefix.rsplitn(2, '/');
155 |                 iter.next().map(|hash| {
156 |                     let store_dir = iter.next().unwrap_or("");
157 |                     StorePath {
158 |                         store_dir: store_dir.to_string(),
159 |                         hash: hash.to_string(),
160 |                         name: name.to_string(),
161 |                         origin,
162 |                     }
163 |                 })
164 |             })
165 |         })
166 |     }
167 | 
168 |     /// Encodes a store path as a sequence of bytes, so that it can be decoded with `decode`.
169 |     ///
170 |     /// The encoding does not use the bytes `0x00` nor `0x01`, as long as none of the fields of
171 |     /// this path contain those bytes (this includes `store_dir`, `hash`, `name` and `origin`).
172 |     /// This is important since it allows the result to be encoded with [frcode](mod.frcode.html).
173 |     ///
174 |     /// # Panics
175 |     ///
176 |     /// The `attr` and `output` of the path origin must not contain the byte value `0x02`, otherwise
177 |     /// this function panics.
178 |     pub fn encode(&self) -> io::Result<Vec<u8>> {
179 |         let mut result = Vec::with_capacity(self.as_str().len());
180 |         result.extend(self.as_str().bytes());
181 |         result.push(b'\n');
182 |         self.origin().encode(&mut result)?;
183 |         Ok(result)
184 |     }
185 | 
186 |     pub fn decode(buf: &[u8]) -> Option<StorePath> {
187 |         let mut parts = buf.splitn(2, |c| *c == b'\n');
188 |         parts
189 |             .next()
190 |             .and_then(|v| str::from_utf8(v).ok())
191 |             .and_then(|path| {
192 |                 parts
193 |                     .next()
194 |                     .and_then(PathOrigin::decode)
195 |                     .and_then(|origin| StorePath::parse(origin, path))
196 |             })
197 |     }
198 | 
199 |     /// Returns the name of the store path, which is the part of the file name that
200 |     /// is not the hash.  In the above example, it would be `bash-4.4-p5`.
201 |     ///
202 |     /// # Example
203 |     ///
204 |     /// ```
205 |     /// use nix_index::package::{PathOrigin, StorePath};
206 |     ///
207 |     /// let origin = PathOrigin { attr: "dummy".to_string(), output: "out".to_string(), toplevel: true, system: None };
208 |     /// let store_path = StorePath::parse(origin, "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5").unwrap();
209 |     /// assert_eq!(&store_path.name(), "bash-4.4-p5");
210 |     /// ```
211 |     pub fn name(&self) -> Cow<str> {
212 |         Cow::Borrowed(&self.name)
213 |     }
214 | 
215 |     /// The hash of the store path. This is the part just before the name of
216 |     /// the path.
217 |     ///
218 |     /// # Example
219 |     ///
220 |     /// ```
221 |     /// use nix_index::package::{PathOrigin, StorePath};
222 |     ///
223 |     /// let origin = PathOrigin { attr: "dummy".to_string(), output: "out".to_string(), toplevel: true, system: None };
224 |     /// let store_path = StorePath::parse(origin, "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5").unwrap();
225 |     /// assert_eq!(&store_path.name(), "bash-4.4-p5");
226 |     /// ```
227 |     pub fn hash(&self) -> Cow<str> {
228 |         Cow::Borrowed(&self.hash)
229 |     }
230 | 
231 |     /// The store dir for which this store path was built.
232 |     ///
233 |     /// Currently, this will be `/nix/store` in almost all cases, but
234 |     /// we include it here anyway for completeness.
235 |     ///
236 |     /// # Example
237 |     ///
238 |     /// ```
239 |     /// use nix_index::package::{PathOrigin, StorePath};
240 |     ///
241 |     /// let origin = PathOrigin { attr: "dummy".to_string(), output: "out".to_string(), toplevel: true, system: None };
242 |     /// let store_path = StorePath::parse(origin, "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5").unwrap();
243 |     /// assert_eq!(&store_path.store_dir(), "/nix/store");
244 |     /// ```
245 |     pub fn store_dir(&self) -> Cow<str> {
246 |         Cow::Borrowed(&self.store_dir)
247 |     }
248 | 
249 |     /// Converts the store path back into an absolute path.
250 |     ///
251 |     /// # Example
252 |     ///
253 |     /// ```
254 |     /// use nix_index::package::{PathOrigin, StorePath};
255 |     ///
256 |     /// let origin = PathOrigin { attr: "dummy".to_string(), output: "out".to_string(), toplevel: true, system: None };
257 |     /// let store_path = StorePath::parse(origin, "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5").unwrap();
258 |     /// assert_eq!(&store_path.as_str(), "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5");
259 |     /// ```
260 |     pub fn as_str(&self) -> Cow<str> {
261 |         Cow::Owned(format!("{}/{}-{}", self.store_dir, self.hash, self.name))
262 |     }
263 | 
264 |     /// Returns the origin that describes how we discovered this store path.
265 |     ///
266 |     /// See the documentation of `PathOrigin` for more information about this field.
267 |     ///
268 |     /// # Example
269 |     ///
270 |     /// ```
271 |     /// use nix_index::package::{PathOrigin, StorePath};
272 |     ///
273 |     /// let origin = PathOrigin { attr: "dummy".to_string(), output: "out".to_string(), toplevel: true, system: None };
274 |     /// let store_path = StorePath::parse(origin.clone(), "/nix/store/010yd8jls8w4vcnql4zhjbnyp2yay5pl-bash-4.4-p5").unwrap();
275 |     /// assert_eq!(store_path.origin().as_ref(), &origin);
276 |     /// ```
277 |     pub fn origin(&self) -> Cow<PathOrigin> {
278 |         Cow::Borrowed(&self.origin)
279 |     }
280 | }
281 | 


--------------------------------------------------------------------------------
/src/util.rs:
--------------------------------------------------------------------------------
 1 | //! Small but reusable helper functions.
 2 | use std::env;
 3 | use std::fs::OpenOptions;
 4 | use std::io::{self, Write};
 5 | use std::path::PathBuf;
 6 | 
 7 | /// Writes a file to the temp directory with a name that is made of the supplied
 8 | /// base and a suffix if a file with that name already exists.
 9 | ///
10 | /// Returns the path of the file if the file was written successfully, None otherwise.
11 | /// None means that an IO error occurred during writing the file.
12 | pub fn write_temp_file(base_name: &str, contents: &[u8]) -> Option<PathBuf> {
13 |     let mut path = None;
14 |     for i in 0.. {
15 |         let mut this_path = env::temp_dir();
16 |         if i == 0 {
17 |             this_path.push(base_name);
18 |         } else {
19 |             this_path.push(format!("{}.{}", base_name, i));
20 |         }
21 |         let temp_file = OpenOptions::new()
22 |             .write(true)
23 |             .create_new(true)
24 |             .open(&this_path);
25 |         match temp_file {
26 |             Ok(mut file) => {
27 |                 path = file.write_all(contents).map(|_| this_path).ok();
28 |                 break;
29 |             }
30 |             Err(e) => {
31 |                 if e.kind() != io::ErrorKind::AlreadyExists {
32 |                     break;
33 |                 }
34 |             }
35 |         }
36 |     }
37 |     path
38 | }
39 | 


--------------------------------------------------------------------------------
/src/workset.rs:
--------------------------------------------------------------------------------
  1 | //! A task queue where the processing of tasks can generate additional subtasks.
  2 | //!
  3 | //! This module implements a stream where the consumer of the stream can request
  4 | //! additional items to be added to the stream. An example where this is useful
  5 | //! is fetching a package including all the transitive dependencies: we start
  6 | //! with a stream that just yields the package we want to fetch. The consumer can
  7 | //! then fetch a package and add all dependencies of that package to the stream,
  8 | //! adding them to the set of packages that need to be fetched.
  9 | //!
 10 | //! The data structure is called a work set because it allows assigning a key to
 11 | //! each item to avoid duplicates. A new item will only be added if no prior item
 12 | //! had the same key.
 13 | //!
 14 | //! # Example
 15 | //!
 16 | //! ```rust
 17 | //! extern crate futures;
 18 | //! extern crate nix_index;
 19 | //!
 20 | //! use futures::{Stream, stream::StreamExt};
 21 | //! use nix_index::workset::{WorkSet};
 22 | //! use std::iter::{self, FromIterator};
 23 | //!
 24 | //! #[derive(Clone)]
 25 | //! struct Package {
 26 | //!     name: String,
 27 | //!     dependencies: Vec<Package>,
 28 | //! }
 29 | //!
 30 | //! fn main() {
 31 | //!     // set up some data
 32 | //!     let pkgA = Package { name: "a".to_string(), dependencies: vec![] };
 33 | //!     let pkgB = Package { name: "b".to_string(), dependencies: vec![] };
 34 | //!     let pkgC = Package { name: "c".to_string(), dependencies: vec![pkgA.clone(), pkgB] };
 35 | //!     let pkgD = Package { name: "d".to_string(), dependencies: vec![pkgA, pkgC] };
 36 | //!
 37 | //!     // construct a workset that has `pkgD` as initial item.
 38 | //!     let workset = WorkSet::from_iter(iter::once((pkgD.name.clone(), pkgD)));
 39 | //!
 40 | //!     // fetch the names of all transitive dependencies of `pkgD`. In real cases,
 41 | //!     // this would probably perform some network requests or other IO with futures.
 42 | //!     let all_packages = workset.map(|(mut handle, pkg)| {
 43 | //!         let Package { name, dependencies } = pkg;
 44 | //!         // add all dependencies to the workset
 45 | //!         for pkg in dependencies {
 46 | //!             handle.add_work(pkg.name.clone(), pkg);
 47 | //!         }
 48 | //!         name
 49 | //!     });
 50 | //!
 51 | //!    // all_packages is now a stream of all the names of the transitive dependencies of pkgD
 52 | //!    // and pkgD itself
 53 | //! }
 54 | //! ```
 55 | use std::cell::RefCell;
 56 | use std::collections::HashSet;
 57 | use std::hash::Hash;
 58 | use std::iter::FromIterator;
 59 | use std::pin::Pin;
 60 | use std::rc::{Rc, Weak};
 61 | use std::task::{Context, Poll};
 62 | 
 63 | use futures::Stream;
 64 | use indexmap::IndexMap;
 65 | 
 66 | /// This structure holds the internal state of our queue.
 67 | struct Shared<K, V> {
 68 |     /// The set of keys that have already been added to the queue sometime in the past.
 69 |     /// Any item whose key is in this set does not need to be added again.
 70 |     seen: HashSet<K>,
 71 | 
 72 |     /// The map of items that still need to be processed. As long as this is non-empty,
 73 |     /// there is still work remaining.
 74 |     queue: IndexMap<K, V>,
 75 | }
 76 | 
 77 | impl<K: Hash + Eq, V> Shared<K, V> {
 78 |     /// Add a task to the work queue if the given key still needs to be processed.
 79 |     /// Returns `true` if a new item was added, `false` otherwise.
 80 |     fn insert(&mut self, k: K, v: V) -> bool {
 81 |         use indexmap::map::Entry::*;
 82 |         if !self.seen.contains(&k) {
 83 |             match self.queue.entry(k) {
 84 |                 Occupied(_) => return false,
 85 |                 Vacant(e) => {
 86 |                     e.insert(v);
 87 |                     return true;
 88 |                 }
 89 |             }
 90 |         }
 91 |         false
 92 |     }
 93 | }
 94 | 
 95 | /// A queue where the consumer can request new items to be added to the queue.
 96 | ///
 97 | /// To construct a new instance of this type, use `WorkSet::from_iter`.
 98 | ///
 99 | /// The queue terminates if there is no work left that need processing and all
100 | /// `WorkSetHandle`s have been dropped (if there are `WorkSetHandle`s alive
101 | /// then it is still possible to call `add_work`, so the stream cannot end even
102 | /// if there is no work item available at the current time).
103 | pub struct WorkSet<K, V> {
104 |     /// A reference to the state of the queue.
105 |     /// This reference is shared with all `WorkSetHandle`s.
106 |     state: Rc<RefCell<Shared<K, V>>>,
107 | }
108 | 
109 | /// A work set handle allows you to add new items to the queue.
110 | ///
111 | /// As long as there are still `WorkSetHandle`s alive, the queue
112 | /// will not terminate.
113 | pub struct WorkSetHandle<K, V> {
114 |     state: Rc<RefCell<Shared<K, V>>>,
115 | }
116 | 
117 | impl<K: Hash + Eq, V> WorkSetHandle<K, V> {
118 |     /// Adds a new item to the queue but only if this is
119 |     /// the first time an item with the specified key is added.
120 |     ///
121 |     /// Returns `true` if this was a new item and therefore new work
122 |     /// was added to the queue or `false` if there already was an item for
123 |     /// the given key.
124 |     pub fn add_work(&mut self, key: K, work: V) -> bool {
125 |         self.state.borrow_mut().insert(key, work)
126 |     }
127 | }
128 | 
129 | /// An observer for `WorkSet` that provides status information
130 | /// about the queue.
131 | ///
132 | /// Note that this trait is not dependent on the type of items or keys
133 | /// in the work set, as it only provides meta information about the queue.
134 | pub trait WorkSetObserver {
135 |     /// Returns the number of items in the queue that still need processing.
136 |     fn queue_len(&self) -> usize;
137 | }
138 | 
139 | /// A work set watch is any implementation of a `WorkSetObserver`.
140 | ///
141 | /// The watch not prevent the queue from terminating. If the queue has already
142 | /// terminated, the number of remaining items will be zero.
143 | pub type WorkSetWatch = Box<dyn WorkSetObserver>;
144 | 
145 | /// This is a concrete implementation of a `WorkSetObserver`.
146 | ///
147 | /// The indirection through the `WorkSetObserver` trait and `WorkSetWatch` type is
148 | /// necessary to allow hiding the concrete types `K` and `V` of the queue.
149 | /// Hiding the concrete types makes the interface much nicer.
150 | #[derive(Clone)]
151 | struct WorkSetObserverImpl<K, V> {
152 |     /// A weak reference to the queue state. The reference is weak
153 |     /// so that the the observer does not prevent the queue from terminating.
154 |     state: Weak<RefCell<Shared<K, V>>>,
155 | }
156 | 
157 | impl<K, V> WorkSetObserver for WorkSetObserverImpl<K, V> {
158 |     fn queue_len(&self) -> usize {
159 |         self.state
160 |             .upgrade()
161 |             .map_or(0, |shared: Rc<RefCell<Shared<K, V>>>| {
162 |                 shared.as_ref().borrow().queue.len()
163 |             })
164 |     }
165 | }
166 | 
167 | impl<K: Hash + Eq + 'static, V: 'static> WorkSet<K, V> {
168 |     /// Returns a watch for this work set that provides status information.
169 |     pub fn watch(&self) -> WorkSetWatch {
170 |         Box::new(WorkSetObserverImpl {
171 |             state: Rc::downgrade(&self.state),
172 |         })
173 |     }
174 | 
175 |     /// Constructs a new work set with the given initial work items.
176 |     pub fn from_queue(queue: IndexMap<K, V>) -> Self {
177 |         let shared = Shared {
178 |             seen: HashSet::new(),
179 |             queue,
180 |         };
181 | 
182 |         Self {
183 |             state: Rc::new(RefCell::new(shared)),
184 |         }
185 |     }
186 | }
187 | 
188 | impl<K: Hash + Eq + 'static, V: 'static> FromIterator<(K, V)> for WorkSet<K, V> {
189 |     fn from_iter<I: IntoIterator<Item = (K, V)>>(iter: I) -> WorkSet<K, V> {
190 |         Self::from_queue(IndexMap::from_iter(iter))
191 |     }
192 | }
193 | 
194 | /// A work set implements the `Stream` trait. The stream will produce the work
195 | /// that still needs processing. Along with every work item it also provides
196 | /// a handle to the queue that allows the consumer to add more items to the queue.
197 | ///
198 | /// The stream ends if the queue terminates, see the documentation of `WorkSet`
199 | /// for when exactly that happens.
200 | impl<K: Hash + Eq, V> Stream for WorkSet<K, V> {
201 |     type Item = (WorkSetHandle<K, V>, V);
202 | 
203 |     fn poll_next(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
204 |         let (k, v) = match self.state.borrow_mut().queue.pop() {
205 |             Some(e) => e,
206 |             None => {
207 |                 return if Rc::strong_count(&self.state) == 1 {
208 |                     Poll::Ready(None)
209 |                 } else {
210 |                     Poll::Pending
211 |                 }
212 |             }
213 |         };
214 | 
215 |         self.state.borrow_mut().seen.insert(k);
216 |         let handle = WorkSetHandle {
217 |             state: self.state.clone(),
218 |         };
219 |         Poll::Ready(Some((handle, v)))
220 |     }
221 | }
222 | 


--------------------------------------------------------------------------------