├── .github └── workflows │ └── deploy.yml ├── .gitignore ├── LICENSE.md ├── README.md ├── jobsets ├── hello.nix ├── python.nix └── r.nix ├── nixpkgs.nix ├── nonce.md ├── overlays.nix ├── spec.json └── spec.nix /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Build nix-data packages & cache 2 | on: [push] 3 | jobs: 4 | nix-data: 5 | strategy: 6 | matrix: 7 | jobset: 8 | - hello 9 | - python 10 | - r 11 | 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: cachix/install-nix-action@v13 16 | - uses: cachix/cachix-action@v10 17 | with: 18 | name: tbenst 19 | # If you chose signing key for write access 20 | signingKey: '${{ secrets.NIX_DATA_CACHIX_SIGNING_KEY }}' 21 | # If you chose API tokens for write access OR if you have a private cache 22 | authToken: '${{ secrets.NIX_DATA_CACHIX_AUTH_TOKEN }}' 23 | - name: build 24 | run: nix-build --show-trace jobsets/${{ matrix.jobset }}.nix -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | result -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 tbenst 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Nix for Data Science 2 | 3 | This repository aims to define a set of packages for data-scientists with batteries-included. By using [Nix](https://nixos.org/nix/), the purely functional package manager, we strive to provide reproducible builds for the entire software stack, from low-level packages like glibc to high-level packages like PyTorch. 4 | 5 | By using the same overlays (roughly "compilation flags" for those not versed in Nix) and by pinning nixpkgs to a particular SHA, we aim to improve the stability of the ecosystem and the ease-of-use by eventually providing access to a binary cache. This will greatly reduce the compilation burden and improve data scientist productivity. 6 | 7 | Collaboration is encouraged! Feel free to create pull-requests or file an issue if you'd like to contribute. 8 | 9 | ## Focus 10 | Currently, the repository targets the Python & R ecosystem, and builds against Intel's MKL and NVIDIA's CUDA/cuDNN. Please get in touch if you would like to add focus areas! 11 | 12 | ## Get in touch 13 | 14 | - [Matrix chat](https://matrix.to/#/#datascience:nixos.org) at `#datascience:nixos.org` 15 | - [Discord](https://discord.gg/wXZDqVYgjZ) at #data-science 16 | - [Slack workspace](https://join.slack.com/t/nix-data/shared_invite/zt-ca8csgcz-N9Fyh~tnoZPY8x5lE_slFA) 17 | 18 | ## Repo format 19 | See https://www.reddit.com/r/NixOS/comments/8tkllx/standard_project_structure/ for more info. 20 | -------------------------------------------------------------------------------- /jobsets/hello.nix: -------------------------------------------------------------------------------- 1 | let 2 | pkgs = import ../nixpkgs.nix; 3 | in 4 | { 5 | hello = pkgs.hello.overrideAttrs (old: { 6 | # change string to rebuild hello as a quick test 7 | # also recommend moving all other jobsets to "disabled-jobsets" folder 8 | madeup = "hi26"; 9 | }); 10 | } 11 | -------------------------------------------------------------------------------- /jobsets/python.nix: -------------------------------------------------------------------------------- 1 | let pkgs = import ../nixpkgs.nix; 2 | in 3 | { 4 | # is this needed for shell to be cached properly? 5 | # python-env = pkgs.python3.withPackages (ps: with ps; [ 6 | # ... 7 | # ]); 8 | 9 | apache-airflow = pkgs.python37Packages.apache-airflow; 10 | av = pkgs.python3Packages.av; 11 | bokeh = pkgs.python3Packages.bokeh; 12 | cython = pkgs.python3Packages.cython; 13 | click = pkgs.python3Packages.click; 14 | dill = pkgs.python3Packages.dill; 15 | future = pkgs.python3Packages.future; 16 | h5py = pkgs.python3Packages.h5py; 17 | ipython = pkgs.python3Packages.ipython; 18 | imgaug = pkgs.python3Packages.imgaug; 19 | ipywidgets = pkgs.python3Packages.ipywidgets; 20 | joblib = pkgs.python3Packages.joblib; 21 | jupyterlab = pkgs.python3Packages.jupyterlab; 22 | matplotlib = pkgs.python3Packages.matplotlib; 23 | moviepy = pkgs.python3Packages.moviepy; 24 | # mlflow = pkgs.python3Packages.mlflow; 25 | # mlflow-server = pkgs.mlflow-server; 26 | mypy = pkgs.python3Packages.mypy; 27 | nbdime = pkgs.python3Packages.nbdime; 28 | nose = pkgs.python3Packages.nose; 29 | numpy = pkgs.python3Packages.numpy; 30 | opencv3 = pkgs.python3Packages.opencv3; 31 | opencv4 = pkgs.python3Packages.opencv4; 32 | pandas = pkgs.python3Packages.pandas; 33 | pims = pkgs.python3Packages.pims; 34 | pytest = pkgs.python3Packages.pytest; 35 | pytorch = pkgs.python3Packages.pytorch; 36 | # not yet merged... 37 | # pytorch-lightning = pkgs.python3Packages.pytorch-lightning; 38 | pyyaml = pkgs.python3Packages.pyyaml; 39 | requests = pkgs.python3Packages.requests; 40 | # rpy2 = pkgs.python3Packages.rpy2; 41 | scikitimage = pkgs.python3Packages.scikitimage; 42 | scikitlearn = pkgs.python3Packages.scikitlearn; 43 | scipy = pkgs.python3Packages.scipy; 44 | seaborn = pkgs.python3Packages.seaborn; 45 | tables = pkgs.python3Packages.tables; 46 | tensorflow = pkgs.python37Packages.tensorflow; 47 | tensorflow_avx2 = pkgs.python37Packages.tensorflow_avx2; 48 | tensorflow-probability = pkgs.python37Packages.tensorflow-probability; 49 | tifffile = pkgs.python3Packages.tifffile; 50 | torchvision = pkgs.python3Packages.torchvision; 51 | tqdm = pkgs.python3Packages.tqdm; 52 | } 53 | -------------------------------------------------------------------------------- /jobsets/r.nix: -------------------------------------------------------------------------------- 1 | let 2 | pkgs = import ../nixpkgs.nix; 3 | rPacks = with pkgs.rPackages; [ 4 | ggplot2 5 | dplyr 6 | xts 7 | freqparcoord 8 | RANN 9 | MASS 10 | Rcpp 11 | tidyverse 12 | ]; 13 | in 14 | { 15 | inherit rPacks; 16 | RStudio = pkgs.rstudioWrapper.override { packages = rPacks; }; 17 | R = pkgs.rWrapper.override { packages = rPacks; }; 18 | } 19 | -------------------------------------------------------------------------------- /nixpkgs.nix: -------------------------------------------------------------------------------- 1 | let 2 | # 20.09-release as of 2021-04-11 3 | nixpkgsSHA = "b39544be6c2e554fc494140d1d8d1b412b2762a5"; 4 | pkgs = import 5 | (fetchTarball 6 | "https://github.com/NixOS/nixpkgs/archive/${nixpkgsSHA}.tar.gz") 7 | { 8 | system = builtins.currentSystem; 9 | overlays = import ./overlays.nix; 10 | config = with pkgs.stdenv; { 11 | whitelistedLicenses = with lib.licenses; [ 12 | unfreeRedistributable 13 | issl 14 | ]; 15 | allowUnfreePredicate = pkg: builtins.elem (lib.getName pkg) [ 16 | "cudnn_cudatoolkit" 17 | "cudatoolkit" 18 | ]; 19 | }; 20 | }; 21 | 22 | in 23 | pkgs 24 | -------------------------------------------------------------------------------- /nonce.md: -------------------------------------------------------------------------------- 1 | ## increment me to force rebuild :) 2 | 1 -------------------------------------------------------------------------------- /overlays.nix: -------------------------------------------------------------------------------- 1 | [ 2 | # top-level pkgs overlays 3 | (self: super: { 4 | openmpi = super.openmpi.override { cudaSupport = true; }; 5 | 6 | # batteries included :) 7 | ffmpeg = super.ffmpeg-full.override { 8 | nonfreeLicensing = true; 9 | nvenc = true; # nvidia support 10 | }; 11 | 12 | ffmpeg-full = super.ffmpeg-full.override { 13 | nonfreeLicensing = true; 14 | nvenc = true; # nvidia support 15 | }; 16 | 17 | }) 18 | 19 | # python pkgs overlays 20 | (self: super: { 21 | 22 | blas = super.blas.override { 23 | blasProvider = self.mkl; 24 | }; 25 | lapack = super.lapack.override { 26 | lapackProvider = self.mkl; 27 | }; 28 | 29 | pythonOverrides = python-self: python-super: { 30 | pytorch = python-super.pytorch.override { 31 | openMPISupport = true; 32 | cudaSupport = true; 33 | buildNamedTensor = true; 34 | cudaArchList = [ 35 | "5.0" 36 | "5.2" 37 | "6.0" 38 | "6.1" 39 | "7.0" 40 | "7.5" 41 | "7.5+PTX" 42 | ]; 43 | }; 44 | 45 | tensorflow = python-super.tensorflow.override { 46 | cudaSupport = true; 47 | cudatoolkit = super.cudatoolkit_10_1; 48 | cudnn = super.cudnn_cudatoolkit_10_1; 49 | # https://docs.nvidia.com/deeplearning/frameworks/tensorflow-user-guide/index.html 50 | cudaCapabilities = [ 51 | "5.0" 52 | "5.2" 53 | "6.0" 54 | "6.1" 55 | "7.0" 56 | "7.5" 57 | ]; 58 | sse42Support = true; 59 | avx2Support = false; 60 | fmaSupport = true; 61 | 62 | }; 63 | 64 | tensorflow_avx2 = python-super.tensorflow.override { 65 | cudaSupport = true; 66 | cudatoolkit = super.cudatoolkit_10_1; 67 | cudnn = super.cudnn_cudatoolkit_10_1; 68 | # https://docs.nvidia.com/deeplearning/frameworks/tensorflow-user-guide/index.html 69 | cudaCapabilities = [ 70 | "5.0" 71 | "5.2" 72 | "6.0" 73 | "6.1" 74 | "7.0" 75 | "7.5" 76 | ]; 77 | sse42Support = true; 78 | avx2Support = true; 79 | fmaSupport = true; 80 | }; 81 | 82 | tensorflow_2 = python-super.tensorflow_2.override { 83 | cudaSupport = true; 84 | cudatoolkit = super.cudatoolkit_10_1; 85 | cudnn = super.cudnn_cudatoolkit_10_1; 86 | # https://docs.nvidia.com/deeplearning/frameworks/tensorflow-user-guide/index.html 87 | cudaCapabilities = [ 88 | "5.0" 89 | "5.2" 90 | "6.0" 91 | "6.1" 92 | "7.0" 93 | "7.5" 94 | ]; 95 | sse42Support = true; 96 | avx2Support = false; 97 | fmaSupport = true; 98 | }; 99 | 100 | opencv3 = python-super.opencv3.override { 101 | enableCuda = true; 102 | enableFfmpeg = true; 103 | }; 104 | 105 | opencv4 = python-super.opencv4.override { 106 | enableCuda = true; 107 | enableFfmpeg = true; 108 | }; 109 | }; 110 | 111 | python3 = 112 | super.python3.override { packageOverrides = self.pythonOverrides; }; 113 | 114 | }) 115 | ] 116 | -------------------------------------------------------------------------------- /spec.json: -------------------------------------------------------------------------------- 1 | { 2 | "enabled": 1, 3 | "hidden": false, 4 | "description": "nix-data jobsets", 5 | "nixexprinput": "src", 6 | "nixexprpath": "spec.nix", 7 | "checkinterval": 300, 8 | "schedulingshares": 100, 9 | "enableemail": false, 10 | "emailoverride": "", 11 | "keepnr": 3, 12 | "type": 0, 13 | "inputs": { 14 | "src": { 15 | "type": "git", 16 | "value": "git://github.com/nix-community/nix-data.git", 17 | "emailresponsible": false 18 | }, 19 | "nixpkgs": { 20 | "type": "git", 21 | "value": "git://github.com/NixOS/nixpkgs.git nixos-unstable", 22 | "emailresponsible": false 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /spec.nix: -------------------------------------------------------------------------------- 1 | { nixpkgs, declInput }: 2 | let 3 | pkgs = import nixpkgs { }; 4 | 5 | mkJobset = 6 | { nixpkgsRelease 7 | , nixFile 8 | , descriptionNote 9 | }: { 10 | enabled = 1; 11 | hidden = false; 12 | description = "nix-data jobset for nixpkgs branch ${nixpkgsRelease} (${descriptionNote})"; 13 | nixexprinput = "src"; 14 | nixexprpath = "jobsets/${nixFile}"; 15 | checkinterval = 300; 16 | schedulingshares = 100; 17 | enableemail = false; 18 | emailoverride = ""; 19 | keepnr = 3; 20 | type = 0; # Non-flake (legacy) 21 | inputs = { 22 | src = { 23 | type = "git"; 24 | value = "git://github.com/nix-community/nix-data.git"; 25 | emailresponsible = false; 26 | }; 27 | nixpkgs = { 28 | type = "git"; 29 | value = "git://github.com/NixOS/nixpkgs.git ${nixpkgsRelease}"; 30 | emailresponsible = false; 31 | }; 32 | }; 33 | }; 34 | 35 | jobsets = { 36 | "hello-20.09" = mkJobset { 37 | nixpkgsRelease = "nixos-20.09"; 38 | nixFile = "r.nix"; 39 | descriptionNote = "r"; 40 | }; 41 | 42 | hello-unstable = mkJobset { 43 | nixpkgsRelease = "nixos-unstable"; 44 | nixFile = "hello.nix"; 45 | descriptionNote = "hello"; 46 | }; 47 | 48 | "python-20.09" = mkJobset { 49 | nixpkgsRelease = "nixos-20.09"; 50 | nixFile = "python.nix"; 51 | descriptionNote = "python"; 52 | }; 53 | 54 | python-unstable = mkJobset { 55 | nixpkgsRelease = "nixos-unstable"; 56 | nixFile = "python.nix"; 57 | descriptionNote = "python"; 58 | }; 59 | 60 | "r-20.09" = mkJobset { 61 | nixpkgsRelease = "nixos-20.09"; 62 | nixFile = "r.nix"; 63 | descriptionNote = "r"; 64 | }; 65 | 66 | r-unstable = mkJobset { 67 | nixpkgsRelease = "nixos-unstable"; 68 | nixFile = "r.nix"; 69 | descriptionNote = "r"; 70 | }; 71 | }; 72 | 73 | in 74 | { 75 | jobsets = pkgs.runCommand "spec.json" { } '' 76 | cat < spec.json < $out 84 | 85 | ''; 86 | } 87 | --------------------------------------------------------------------------------