├── .appveyor.yml ├── .codeclimate.yml ├── .codespellrc ├── .datalad-release-action.yaml ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── add-changelog-snippet.yml │ ├── codespell.yml │ ├── docbuild.yml │ └── release.yml ├── .gitignore ├── .noannex ├── .readthedocs.yaml ├── CHANGELOG.md ├── CONTRIBUTORS ├── COPYING ├── MANIFEST.in ├── Makefile ├── README.md ├── _datalad_buildsupport ├── __init__.py ├── formatters.py └── setup.py ├── changelog.d ├── scriv.ini └── templates │ ├── entry_title.md.j2 │ └── new_fragment.md.j2 ├── datalad_container ├── __init__.py ├── _version.py ├── adapters │ ├── __init__.py │ ├── docker.py │ └── tests │ │ ├── __init__.py │ │ └── test_docker.py ├── conftest.py ├── containers_add.py ├── containers_list.py ├── containers_remove.py ├── containers_run.py ├── extractors │ ├── __init__.py │ ├── _load_singularity_versions.py │ ├── metalad_container.py │ └── tests │ │ ├── __init__.py │ │ └── test_metalad_container.py ├── find_container.py ├── tests │ ├── __init__.py │ ├── fixtures │ │ ├── __init__.py │ │ └── singularity_image.py │ ├── test_add.py │ ├── test_containers.py │ ├── test_find.py │ ├── test_register.py │ ├── test_run.py │ ├── test_schemes.py │ └── utils.py ├── utils.py └── version.py ├── docs ├── Makefile ├── examples │ └── basic_demo.sh ├── source │ ├── _static │ │ └── datalad_logo.png │ ├── _templates │ │ └── autosummary │ │ │ └── module.rst │ ├── acknowledgements.rst │ ├── changelog.rst │ ├── conf.py │ ├── index.rst │ └── metadata-extraction.rst └── utils │ └── pygments_ansi_color.py ├── pyproject.toml ├── requirements-devel.txt ├── requirements.txt ├── setup.cfg ├── setup.py ├── tools ├── Singularity.testhelper ├── appveyor_env_setup.bat ├── ci │ ├── install-singularity.sh │ ├── prep-travis-forssh-sudo.sh │ └── prep-travis-forssh.sh ├── containers_add_dhub_tags.py └── mk_minimal_chroot.sh └── versioneer.py /.appveyor.yml: -------------------------------------------------------------------------------- 1 | # This CI setup provides a largely homogeneous configuration across all 2 | # major platforms (Windows, MacOS, and Linux). The aim of this test setup is 3 | # to create a "native" platform experience, using as few cross-platform 4 | # helper tools as possible. 5 | # 6 | # On Linux/Mac a venv is used for testing. The effective virtual env 7 | # is available under ~/VENV. 8 | # 9 | # All workers support remote login. Login details are shown at the top of each 10 | # CI run log. 11 | # 12 | # - Linux/Mac workers (via SSH): 13 | # 14 | # - A permitted SSH key must be defined in an APPVEYOR_SSH_KEY environment 15 | # variable (via the appveyor project settings) 16 | # 17 | # - SSH login info is given in the form of: 'appveyor@67.225.164.xx -p 22xxx' 18 | # 19 | # - Login with: 20 | # 21 | # ssh -o StrictHostKeyChecking=no 22 | # 23 | # - to prevent the CI run from exiting, `touch` a file named `BLOCK` in the 24 | # user HOME directory (current directory directly after login). The session 25 | # will run until the file is removed (or 60 min have passed) 26 | # 27 | # - Windows workers (via RDP): 28 | # 29 | # - An RDP password should be defined in an APPVEYOR_RDP_PASSWORD environment 30 | # variable (via the appveyor project settings), or a random password is used 31 | # every time 32 | # 33 | # - RDP login info is given in the form of IP:PORT 34 | # 35 | # - Login with: 36 | # 37 | # xfreerdp /cert:ignore /dynamic-resolution /u:appveyor /p: /v: 38 | # 39 | # - to prevent the CI run from exiting, create a textfile named `BLOCK` on the 40 | # Desktop (a required .txt extension will be added automatically). The session 41 | # will run until the file is removed (or 60 min have passed) 42 | # 43 | # - in a terminal execute, for example, `C:\datalad_debug.bat 39` to set up the 44 | # environment to debug in a Python 3.8 session (should generally match the 45 | # respective CI run configuration). 46 | 47 | 48 | # do not make repository clone cheap: interfers with versioneer 49 | shallow_clone: false 50 | 51 | 52 | environment: 53 | DATALAD_TESTS_SSH: 1 54 | 55 | # Do not use `image` as a matrix dimension, to have fine-grained control over 56 | # what tests run on which platform 57 | # The ID variable had no impact, but sorts first in the CI run overview 58 | # an intelligible name can help to locate a specific test run 59 | 60 | # All of these are common to all matrix runs ATM, so pre-defined here and to be overloaded if needed 61 | DTS: datalad_container 62 | APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 63 | INSTALL_SYSPKGS: python3-venv xz-utils jq 64 | # system git-annex is way too old, use better one 65 | INSTALL_GITANNEX: git-annex -m deb-url --url http://snapshot.debian.org/archive/debian/20210906T204127Z/pool/main/g/git-annex/git-annex_8.20210903-1_amd64.deb 66 | CODECOV_BINARY: https://uploader.codecov.io/latest/linux/codecov 67 | 68 | matrix: 69 | # List a CI run for each platform first, to have immediate access when there 70 | # is a need for debugging 71 | 72 | # Ubuntu core tests 73 | - ID: Ubu 74 | 75 | # The same but with the oldest supported Python. 76 | - ID: Ubu-3.8 77 | PY: '3.8' 78 | 79 | # The same but removing busybox first - triggers different code paths in the tests 80 | - ID: Ubu-nobusybox 81 | BEFORE_CMD: docker rmi busybox:latest 82 | 83 | # Windows core tests 84 | #- ID: WinP39core 85 | # # ~35 min 86 | # DTS: datalad_container 87 | # APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 88 | # # Python version specification is non-standard on windows 89 | # PY: 39-x64 90 | # INSTALL_GITANNEX: git-annex -m datalad/packages 91 | ## MacOS core tests 92 | #- ID: MacP38core 93 | # DTS: datalad_container 94 | # APPVEYOR_BUILD_WORKER_IMAGE: macOS 95 | # PY: 3.8 96 | # INSTALL_GITANNEX: git-annex 97 | # DATALAD_LOCATIONS_SOCKETS: /Users/appveyor/DLTMP/sockets 98 | # CODECOV_BINARY: https://uploader.codecov.io/latest/macos/codecov 99 | 100 | matrix: 101 | allow_failures: 102 | - KNOWN2FAIL: 1 103 | 104 | 105 | # it is OK to specify paths that may not exist for a particular test run 106 | cache: 107 | # pip cache 108 | - C:\Users\appveyor\AppData\Local\pip\Cache -> .appveyor.yml 109 | - /home/appveyor/.cache/pip -> .appveyor.yml 110 | # TODO: where is the cache on macOS? 111 | #- /Users/appveyor/.cache/pip -> .appveyor.yml 112 | # TODO: Can we cache `brew`? 113 | #- /usr/local/Cellar 114 | #- /usr/local/bin 115 | 116 | 117 | # turn of support for MS project build support (not needed) 118 | build: off 119 | 120 | 121 | # init cannot use any components from the repo, because it runs prior to 122 | # cloning it 123 | init: 124 | # remove windows 260-char limit on path names 125 | - cmd: powershell Set-Itemproperty -path "HKLM:\SYSTEM\CurrentControlSet\Control\FileSystem" -Name LongPathsEnabled -value 1 126 | # enable developer mode on windows 127 | # this should enable mklink without admin privileges, but it doesn't seem to work 128 | #- cmd: powershell tools\ci\appveyor_enable_windevmode.ps1 129 | # enable RDP access on windows (RDP password is in appveyor project config) 130 | # this is relatively expensive (1-2min), but very convenient to jump into any build at any time 131 | - cmd: powershell.exe iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) 132 | # enable external SSH access to CI worker on all other systems 133 | # needs APPVEYOR_SSH_KEY defined in project settings (or environment) 134 | - sh: curl -sflL 'https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-ssh.sh' | bash -e - 135 | # Identity setup 136 | - git config --global user.email "test@appveyor.land" 137 | - git config --global user.name "Appveyor Almighty" 138 | # Scratch space 139 | - cmd: md C:\DLTMP 140 | # we place the "unix" one into the user's HOME to avoid git-annex issues on MacOSX 141 | # gh-5291 142 | - sh: mkdir ~/DLTMP 143 | # and use that scratch space to get short paths in test repos 144 | # (avoiding length-limits as much as possible) 145 | - cmd: "set TMP=C:\\DLTMP" 146 | - cmd: "set TEMP=C:\\DLTMP" 147 | - sh: export TMPDIR=~/DLTMP 148 | # docker login to get "personalized" rate limit (rather than IP-based) 149 | - sh: docker login -p "$DOCKERHUB_TOKEN" -u "$DOCKERHUB_USERNAME" 150 | 151 | 152 | install: 153 | # place a debug setup helper at a convenient location 154 | - cmd: copy tools\ci\appveyor_env_setup.bat C:\\datalad_debug.bat 155 | # Missing system software 156 | - sh: "[ -n \"$INSTALL_SYSPKGS\" ] && ( [ \"x${APPVEYOR_BUILD_WORKER_IMAGE}\" = \"xmacOS\" ] && brew install -q ${INSTALL_SYSPKGS} || { sudo apt-get update -y && sudo apt-get install --no-install-recommends -y ${INSTALL_SYSPKGS}; } ) || true" 157 | # If a particular Python version is requested, use env setup (using the 158 | # appveyor provided environments/installation). 159 | # Otherwise create a venv using the default Python 3, to enable uniform 160 | # use of python/pip executables below 161 | - sh: "[ \"x$PY\" != x ] && . ${HOME}/venv${PY}/bin/activate || python3 -m venv ${HOME}/dlvenv && . ${HOME}/dlvenv/bin/activate; ln -s \"$VIRTUAL_ENV\" \"${HOME}/VENV\"" 162 | - cmd: "set PATH=C:\\Python%PY%;C:\\Python%PY%\\Scripts;%PATH%" 163 | # deploy the datalad installer, override version via DATALAD_INSTALLER_VERSION 164 | - cmd: 165 | IF DEFINED DATALAD_INSTALLER_VERSION ( 166 | python -m pip install -q "datalad-installer%DATALAD_INSTALLER_VERSION%" 167 | ) ELSE ( 168 | python -m pip install -q datalad-installer 169 | ) 170 | - sh: python -m pip install datalad-installer${DATALAD_INSTALLER_VERSION:-} 171 | - pip install wheel 172 | # setup neurodebian, needs update of sources.list when base release changes 173 | - sh: "echo $ID | grep -q '^Ubu' && wget -O- http://neuro.debian.net/lists/focal.us-nh.full | sudo tee /etc/apt/sources.list.d/neurodebian.sources.list && ( sudo apt-key adv --recv-keys --keyserver hkps://keyserver.ubuntu.com 0xA5D32F012649A5A9 || { wget -q -O- http://neuro.debian.net/_static/neuro.debian.net.asc | sudo apt-key add -; } )" 174 | # Missing system software 175 | - sh: "[ -z \"$INSTALL_SYSPKGS\" ] || { if [ \"x${APPVEYOR_BUILD_WORKER_IMAGE}\" = \"xmacOS\" ]; then brew install -q ${INSTALL_SYSPKGS}; else sudo apt-get update -y -qq --allow-releaseinfo-change && sudo apt-get install -qq --no-install-recommends -y ${INSTALL_SYSPKGS}; fi }" 176 | # Install singularity 177 | - sh: tools/ci/install-singularity.sh 178 | # Install git-annex on windows, otherwise INSTALL_SYSPKGS can be used 179 | # deploy git-annex, if desired 180 | - cmd: IF DEFINED INSTALL_GITANNEX datalad-installer --sudo ok %INSTALL_GITANNEX% 181 | - sh: "[ -n \"${INSTALL_GITANNEX}\" ] && datalad-installer --sudo ok ${INSTALL_GITANNEX}" 182 | # in case of a snapshot installation, use the following approach to adjust 183 | # the PATH as necessary 184 | #- sh: "[ -n \"${INSTALL_GITANNEX}\" ] && datalad-installer -E ${HOME}/dlinstaller_env.sh --sudo ok ${INSTALL_GITANNEX}" 185 | # add location of datalad installer results to PATH 186 | #- sh: "[ -f ${HOME}/dlinstaller_env.sh ] && . ${HOME}/dlinstaller_env.sh || true" 187 | 188 | 189 | #before_build: 190 | # 191 | 192 | 193 | build_script: 194 | - python -m pip install -q -r requirements-devel.txt 195 | - python -m pip install . 196 | 197 | 198 | #after_build: 199 | # 200 | 201 | 202 | before_test: 203 | # simple call to see if datalad and git-annex are installed properly 204 | - datalad wtf 205 | # remove busybox:latest so tests could fetch/drop it as needed 206 | - sh: "[ -n \"${BEFORE_CMD}\" ] && ${BEFORE_CMD} || :" 207 | 208 | 209 | test_script: 210 | # run tests on installed module, not source tree files 211 | - cmd: md __testhome__ 212 | - sh: mkdir __testhome__ 213 | - cd __testhome__ 214 | # run test selecion (--traverse-namespace needed from Python 3.8 onwards) 215 | - cmd: python -m pytest -s -v -m "not (turtle)" --doctest-modules --cov=datalad_container --pyargs %DTS% 216 | - sh: python -m pytest -s -v -m "not (turtle)" --doctest-modules --cov=datalad_container --pyargs ${DTS} 217 | 218 | after_test: 219 | - python -m coverage xml 220 | - cmd: curl -fsSL -o codecov.exe "https://uploader.codecov.io/latest/windows/codecov.exe" 221 | - cmd: .\codecov.exe -f "coverage.xml" 222 | - sh: "curl -Os $CODECOV_BINARY" 223 | - sh: chmod +x codecov 224 | - sh: ./codecov 225 | 226 | 227 | #on_success: 228 | # 229 | 230 | 231 | #on_failure: 232 | # 233 | 234 | 235 | on_finish: 236 | # conditionally block the exit of a CI run for direct debugging 237 | - sh: while [ -f ~/BLOCK ]; do sleep 5; done 238 | - cmd: powershell.exe while ((Test-Path "C:\Users\\appveyor\\Desktop\\BLOCK.txt")) { Start-Sleep 5 } 239 | -------------------------------------------------------------------------------- /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | version: "2" 2 | checks: 3 | file-lines: 4 | config: 5 | threshold: 500 6 | plugins: 7 | bandit: 8 | enabled: true 9 | checks: 10 | assert_used: 11 | enabled: false 12 | exclude_patterns: 13 | - "_datalad_buildsupport/" 14 | - "versioneer.py" 15 | - "*/_version.py" 16 | - "tools/" 17 | - "**/tests/" 18 | -------------------------------------------------------------------------------- /.codespellrc: -------------------------------------------------------------------------------- 1 | [codespell] 2 | skip = .venv,venvs,.git,build,*.egg-info,*.lock,.asv,.mypy_cache,.tox,fixtures,_version.py,*.pem 3 | # ignore-words-list = 4 | # exclude-file = 5 | -------------------------------------------------------------------------------- /.datalad-release-action.yaml: -------------------------------------------------------------------------------- 1 | fragment_directory: changelog.d 2 | 3 | # Categories must be listed in descending order of precedence for determining 4 | # what category to apply to a PR with multiple labels. 5 | # The category names must align with the categories in changelog.d/scriv.ini 6 | categories: 7 | - name: 💥 Breaking Changes 8 | bump: major 9 | label: major 10 | - name: 🚀 Enhancements and New Features 11 | bump: minor 12 | label: minor 13 | - name: 🐛 Bug Fixes 14 | label: patch 15 | - name: 🔩 Dependencies 16 | label: dependencies 17 | - name: 📝 Documentation 18 | label: documentation 19 | - name: 🏠 Internal 20 | label: internal 21 | - name: 🏎 Performance 22 | label: performance 23 | - name: 🧪 Tests 24 | label: tests 25 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | datalad_container/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # This action keeps the versions of all github actions up-to-date 2 | version: 2 3 | updates: 4 | - package-ecosystem: github-actions 5 | directory: / 6 | schedule: 7 | interval: weekly 8 | commit-message: 9 | prefix: "[gh-actions]" 10 | include: scope 11 | labels: 12 | - internal 13 | -------------------------------------------------------------------------------- /.github/workflows/add-changelog-snippet.yml: -------------------------------------------------------------------------------- 1 | name: Add changelog.d snippet 2 | 3 | on: 4 | pull_request_target: 5 | # Run whenever the PR is pushed to, receives a label, or is created with 6 | # one or more labels: 7 | types: [synchronize, labeled] 8 | 9 | # Prevent the workflow from running multiple jobs at once when a PR is created 10 | # with multiple labels: 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.ref_name }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | add: 17 | runs-on: ubuntu-latest 18 | # Only run on PRs that have the "CHANGELOG-missing" label: 19 | if: contains(github.event.pull_request.labels.*.name, 'CHANGELOG-missing') 20 | steps: 21 | - name: Check out repository 22 | uses: actions/checkout@v4 23 | with: 24 | ref: ${{ github.event.pull_request.head.ref }} 25 | repository: ${{ github.event.pull_request.head.repo.full_name }} 26 | 27 | - name: Add changelog snippet 28 | uses: datalad/release-action/add-changelog-snippet@master 29 | with: 30 | token: ${{ secrets.GITHUB_TOKEN }} 31 | rm-labels: CHANGELOG-missing 32 | -------------------------------------------------------------------------------- /.github/workflows/codespell.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Codespell 3 | 4 | on: 5 | push: 6 | branches: [master] 7 | pull_request: 8 | branches: [master] 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | codespell: 15 | name: Check for spelling errors 16 | runs-on: ubuntu-latest 17 | 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@v4 21 | - name: Codespell 22 | uses: codespell-project/actions-codespell@v2 23 | -------------------------------------------------------------------------------- /.github/workflows/docbuild.yml: -------------------------------------------------------------------------------- 1 | name: docs 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - name: Set up environment 12 | run: | 13 | git config --global user.email "test@github.land" 14 | git config --global user.name "GitHub Almighty" 15 | - uses: actions/checkout@v4 16 | - name: Set up Python 3.9 17 | uses: actions/setup-python@v5 18 | with: 19 | python-version: 3.9 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip setuptools 23 | pip install -r requirements-devel.txt 24 | pip install . 25 | - name: Build docs 26 | run: | 27 | make -C docs html 28 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Auto-release on PR merge 2 | 3 | on: 4 | pull_request_target: 5 | branches: 6 | # Create a release whenever a PR is merged into one of these branches: 7 | - master 8 | types: 9 | - closed 10 | 11 | jobs: 12 | release: 13 | runs-on: ubuntu-latest 14 | # Only run for merged PRs with the "release" label: 15 | if: github.event.pull_request.merged == true && contains(github.event.pull_request.labels.*.name, 'release') 16 | steps: 17 | - name: Checkout source 18 | uses: actions/checkout@v4 19 | with: 20 | # Check out all history so that the previous release tag can be 21 | # found: 22 | fetch-depth: 0 23 | 24 | - name: Prepare release 25 | uses: datalad/release-action/release@master 26 | with: 27 | token: ${{ secrets.GITHUB_TOKEN }} 28 | pypi-token: ${{ secrets.PYPI_TOKEN }} 29 | pre-tag: | 30 | version_file=datalad_container/version.py 31 | printf '__version__ = "%s"\n' "$new_version" > "$version_file" 32 | git commit -m "Update __version__ to $new_version" "$version_file" 33 | 34 | # vim:set et sts=2: 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .pybuild/ 2 | .coverage 3 | /.tox 4 | *.egg-info 5 | *.py[coe] 6 | .#* 7 | .*.swp 8 | docs/build 9 | docs/source/generated 10 | build # manpage 11 | .idea/ 12 | venvs/ 13 | -------------------------------------------------------------------------------- /.noannex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalad/datalad-container/e9bba2a6566abf0e0a788dc06c56a468f8400d1f/.noannex -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.10" 13 | 14 | # Build documentation in the docs/ directory with Sphinx 15 | sphinx: 16 | configuration: docs/source/conf.py 17 | 18 | 19 | formats: all 20 | 21 | # Optionally declare the Python requirements required to build your docs 22 | python: 23 | install: 24 | - path: . 25 | method: pip 26 | - requirements: requirements-devel.txt 27 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # 1.2.6 (2025-05-18) 4 | 5 | ## 🐛 Bug Fixes 6 | 7 | - MNT: Account for a number of deprecations in core. [PR #268](https://github.com/datalad/datalad-container/pull/268) (by [@adswa](https://github.com/adswa)) 8 | 9 | 10 | # 1.2.5 (2024-01-17) 11 | 12 | ## 🏠 Internal 13 | 14 | - Run isort across entire codebase to harmonize imports order/appearance. 15 | https://github.com/datalad/datalad-container/260 (by @yarikoptic) 16 | 17 | 18 | # 1.2.4 (2024-01-17) 19 | 20 | ## 🚀 Enhancements and New Features 21 | 22 | - A new placeholder `{python}` is supported by container execution. 23 | It resolves to the Python interpreter executable running DataLad 24 | on container execution. This solves portability issues with the 25 | previous approach of hard-coding a command name on container 26 | configuration. 27 | Fixes https://github.com/datalad/datalad-container/issues/226 via 28 | https://github.com/datalad/datalad-container/pull/227 (by @mih) 29 | 30 | 31 | # 1.2.3 (2023-10-02) 32 | 33 | ## 🏠 Internal 34 | 35 | - Add [extras] extras_require with datalad-metalad and add all those extras to [devel]. [PR #215](https://github.com/datalad/datalad-container/pull/215) (by [@yarikoptic](https://github.com/yarikoptic)) 36 | 37 | - Robustify installation of singularity (install libfuse2). [PR #221](https://github.com/datalad/datalad-container/pull/221) (by [@yarikoptic](https://github.com/yarikoptic)) 38 | 39 | 40 | # 1.2.2 (2023-08-09) 41 | 42 | ## 🐛 Bug Fixes 43 | 44 | - BF: make it [] in case of None being returned. [PR #217](https://github.com/datalad/datalad-container/pull/217) (by [@yarikoptic](https://github.com/yarikoptic)) 45 | 46 | 47 | # 1.2.1 (2023-06-09) 48 | 49 | ## 🐛 Bug Fixes 50 | 51 | - Capture stderr as well while trying for singularity or apptainer to avoid spurious stderr display. [PR #208](https://github.com/datalad/datalad-container/pull/208) (by [@yarikoptic](https://github.com/yarikoptic)) 52 | 53 | - BF: by default stop containers-run on error, to not proceed to save. [PR #209](https://github.com/datalad/datalad-container/pull/209) (by [@yarikoptic](https://github.com/yarikoptic)) 54 | 55 | 56 | # 1.2.0 (2023-05-25) 57 | 58 | ## 🚀 Enhancements and New Features 59 | 60 | - Add metalad extractor using `singularity inspect`. 61 | Fixes https://github.com/datalad/datalad-container/issues/198 via 62 | https://github.com/datalad/datalad-container/pull/200 (by @asmacdo ) 63 | 64 | - Add `--extra-inputs` to `containers-add`. Fixes [#189](https://github.com/datalad/datalad-container/issues/189) via [PR #190](https://github.com/datalad/datalad-container/pull/190) (by [@nobodyinperson](https://github.com/nobodyinperson)) 65 | 66 | ## 🐛 Bug Fixes 67 | 68 | - Make `datalad_container.adapters.docker save` assume `latest` if no image version given. Fixes [#105](https://github.com/datalad/datalad-container/issues/105) via [PR #206](https://github.com/datalad/datalad-container/pull/206) (by [@jwodder](https://github.com/jwodder)) 69 | 70 | ## 🏠 Internal 71 | 72 | - Eliminate use of distutils. [PR #203](https://github.com/datalad/datalad-container/pull/203) (by [@jwodder](https://github.com/jwodder)) 73 | 74 | - Add codespell action,config and fix 1 typo. [PR #207](https://github.com/datalad/datalad-container/pull/207) (by [@yarikoptic](https://github.com/yarikoptic)) 75 | 76 | 77 | # 1.1.9 (2023-02-06) 78 | 79 | ## 🏠 Internal 80 | 81 | - Fix the "bump" level for breaking changes in .datalad-release-action.yaml. [PR #186](https://github.com/datalad/datalad-container/pull/186) (by [@jwodder](https://github.com/jwodder)) 82 | 83 | - Account for move of @eval_results in datalad core. [PR #192](https://github.com/datalad/datalad-container/pull/192) (by [@yarikoptic](https://github.com/yarikoptic)) 84 | 85 | - scriv.ini: Provide full relative path to the templates. [PR #193](https://github.com/datalad/datalad-container/pull/193) (by [@yarikoptic](https://github.com/yarikoptic)) 86 | 87 | ## 🧪 Tests 88 | 89 | - Install Singularity 3 from an official .deb, use newer ubuntu (jammy) on travis. [PR #188](https://github.com/datalad/datalad-container/pull/188) (by [@bpoldrack](https://github.com/bpoldrack)) 90 | # 1.1.8 (Mon Oct 10 2022) 91 | 92 | #### 🐛 Bug Fix 93 | 94 | - Replace `simplejson` with `json` [#182](https://github.com/datalad/datalad-container/pull/182) ([@christian-monch](https://github.com/christian-monch)) 95 | 96 | #### 📝 Documentation 97 | 98 | - codespell fix some typos [#184](https://github.com/datalad/datalad-container/pull/184) ([@yarikoptic](https://github.com/yarikoptic)) 99 | 100 | #### 🧪 Tests 101 | 102 | - Reenabling tests using SingularityHub [#180](https://github.com/datalad/datalad-container/pull/180) ([@yarikoptic](https://github.com/yarikoptic)) 103 | 104 | #### Authors: 2 105 | 106 | - Christian Mönch ([@christian-monch](https://github.com/christian-monch)) 107 | - Yaroslav Halchenko ([@yarikoptic](https://github.com/yarikoptic)) 108 | 109 | --- 110 | 111 | # 1.1.7 (Tue Aug 30 2022) 112 | 113 | #### 🐛 Bug Fix 114 | 115 | - DOC: Set language in Sphinx config to en [#178](https://github.com/datalad/datalad-container/pull/178) ([@adswa](https://github.com/adswa)) 116 | 117 | #### 🧪 Tests 118 | 119 | - nose -> pytest, isort imports in tests, unify requirements-devel to correspond to the form as in core [#179](https://github.com/datalad/datalad-container/pull/179) ([@yarikoptic](https://github.com/yarikoptic)) 120 | 121 | #### Authors: 2 122 | 123 | - Adina Wagner ([@adswa](https://github.com/adswa)) 124 | - Yaroslav Halchenko ([@yarikoptic](https://github.com/yarikoptic)) 125 | 126 | --- 127 | 128 | # 1.1.6 (Mon Apr 11 2022) 129 | 130 | #### 🐛 Bug Fix 131 | 132 | - BF: Disable subdataset result rendering [#175](https://github.com/datalad/datalad-container/pull/175) ([@adswa](https://github.com/adswa)) 133 | - DOC: A few typos in comments/docstrings [#173](https://github.com/datalad/datalad-container/pull/173) ([@yarikoptic](https://github.com/yarikoptic)) 134 | - Update badges [#172](https://github.com/datalad/datalad-container/pull/172) ([@mih](https://github.com/mih)) 135 | - Build docs in standard workflow, not with travis [#171](https://github.com/datalad/datalad-container/pull/171) ([@mih](https://github.com/mih)) 136 | - Make six obsolete [#170](https://github.com/datalad/datalad-container/pull/170) ([@mih](https://github.com/mih)) 137 | - Adopt standard extension setup [#169](https://github.com/datalad/datalad-container/pull/169) ([@mih](https://github.com/mih) [@jwodder](https://github.com/jwodder) [@yarikoptic](https://github.com/yarikoptic)) 138 | - Adopt standard appveyor config [#167](https://github.com/datalad/datalad-container/pull/167) ([@mih](https://github.com/mih)) 139 | - Clarify documentation for docker usage [#164](https://github.com/datalad/datalad-container/pull/164) ([@mih](https://github.com/mih)) 140 | - Strip unsupported scenarios from travis [#166](https://github.com/datalad/datalad-container/pull/166) ([@mih](https://github.com/mih)) 141 | - WIP: Implement the actual command "containers" [#2](https://github.com/datalad/datalad-container/pull/2) ([@mih](https://github.com/mih) [@bpoldrack](https://github.com/bpoldrack)) 142 | - Stop using deprecated Repo.add_submodule() [#161](https://github.com/datalad/datalad-container/pull/161) ([@mih](https://github.com/mih)) 143 | - BF:Docs: replace incorrect dashes with spaces in command names [#154](https://github.com/datalad/datalad-container/pull/154) ([@loj](https://github.com/loj)) 144 | 145 | #### ⚠️ Pushed to `master` 146 | 147 | - Adjust test to acknowledge reckless behavior ([@mih](https://github.com/mih)) 148 | - Slightly relax tests to account for upcoming remove() change ([@mih](https://github.com/mih)) 149 | 150 | #### 📝 Documentation 151 | 152 | - Mention that could be installed from conda-forge [#177](https://github.com/datalad/datalad-container/pull/177) ([@yarikoptic](https://github.com/yarikoptic)) 153 | 154 | #### Authors: 6 155 | 156 | - Adina Wagner ([@adswa](https://github.com/adswa)) 157 | - Benjamin Poldrack ([@bpoldrack](https://github.com/bpoldrack)) 158 | - John T. Wodder II ([@jwodder](https://github.com/jwodder)) 159 | - Laura Waite ([@loj](https://github.com/loj)) 160 | - Michael Hanke ([@mih](https://github.com/mih)) 161 | - Yaroslav Halchenko ([@yarikoptic](https://github.com/yarikoptic)) 162 | 163 | --- 164 | 165 | # 1.1.5 (Mon Jun 07 2021) 166 | 167 | #### 🐛 Bug Fix 168 | 169 | - BF: fix special remotes without "externaltype" [#156](https://github.com/datalad/datalad-container/pull/156) ([@loj](https://github.com/loj)) 170 | 171 | #### Authors: 1 172 | 173 | - Laura Waite ([@loj](https://github.com/loj)) 174 | 175 | --- 176 | 177 | # 1.1.4 (Mon Apr 19 2021) 178 | 179 | #### 🐛 Bug Fix 180 | 181 | - BF+RF: no need to pandoc long description for pypi + correctly boost MODULE/version.py for the release [#152](https://github.com/datalad/datalad-container/pull/152) ([@yarikoptic](https://github.com/yarikoptic)) 182 | 183 | #### Authors: 1 184 | 185 | - Yaroslav Halchenko ([@yarikoptic](https://github.com/yarikoptic)) 186 | 187 | --- 188 | 189 | # 1.1.3 (Thu Apr 15 2021) 190 | 191 | #### 🐛 Bug Fix 192 | 193 | - Set up workflow with auto for releasing & PyPI uploads [#151](https://github.com/datalad/datalad-container/pull/151) ([@yarikoptic](https://github.com/yarikoptic)) 194 | - TST: docker_adapter: Skip tests if 'docker pull' in setup fails [#148](https://github.com/datalad/datalad-container/pull/148) ([@kyleam](https://github.com/kyleam)) 195 | 196 | #### 🏠 Internal 197 | 198 | - ENH: containers-add-dhub - add multiple images/tags/repositories from docker hub [#135](https://github.com/datalad/datalad-container/pull/135) ([@kyleam](https://github.com/kyleam) [@yarikoptic](https://github.com/yarikoptic)) 199 | 200 | #### Authors: 2 201 | 202 | - Kyle Meyer ([@kyleam](https://github.com/kyleam)) 203 | - Yaroslav Halchenko ([@yarikoptic](https://github.com/yarikoptic)) 204 | 205 | --- 206 | 207 | # 1.1.2 (January 16, 2021) -- 208 | 209 | - Replace use of `mock` with `unittest.mock` as we do no longer support 210 | Python 2 211 | 212 | # 1.1.1 (January 03, 2021) -- 213 | 214 | - Drop use of `Runner` (to be removed in datalad 0.14.0) in favor of 215 | `WitlessRunner` 216 | 217 | # 1.1.0 (October 30, 2020) -- 218 | 219 | - Datalad version 0.13.0 or later is now required. 220 | 221 | - In the upcoming 0.14.0 release of DataLad, the datalad special 222 | remote will have built-in support for "shub://" URLs. If 223 | `containers-add` detects support for this feature, it will now add 224 | the "shub://" URL as is rather than resolving the URL itself. This 225 | avoids registering short-lived URLs, allowing the image to be 226 | retrieved later with `datalad get`. 227 | 228 | - `containers-run` learned to install necessary subdatasets when asked 229 | to execute a container from underneath an uninstalled subdataset. 230 | 231 | 232 | # 1.0.1 (June 23, 2020) -- 233 | 234 | - Prefer `datalad.core.local.run` to `datalad.interface.run`. The 235 | latter has been marked as obsolete since DataLad v0.12 (our minimum 236 | requirement) and will be removed in DataLad's next feature release. 237 | 238 | # 1.0.0 (Feb 23, 2020) -- not-as-a-shy-one 239 | 240 | Extension is pretty stable so releasing as 1. MAJOR release, so we could 241 | start tracking API breakages and enhancements properly. 242 | 243 | - Drops support for Python 2 and DataLad prior 0.12 244 | 245 | # 0.5.2 (Nov 12, 2019) -- 246 | 247 | ### Fixes 248 | 249 | - The Docker adapter unconditionally called `docker run` with 250 | `--interactive` and `--tty` even when stdin was not attached to a 251 | TTY, leading to an error. 252 | 253 | # 0.5.1 (Nov 08, 2019) -- 254 | 255 | ### Fixes 256 | 257 | - The Docker adapter, which is used for the "dhub://" URL scheme, 258 | assumed the Python executable was spelled "python". 259 | 260 | - A call to DataLad's `resolve_path` helper assumed a string return 261 | value, which isn't true as of the latest DataLad release candidate, 262 | 0.12.0rc6. 263 | 264 | # 0.5.0 (Jul 12, 2019) -- damn-you-malicious-users 265 | 266 | ### New features 267 | 268 | - The default result renderer for `containers-list` is now a custom 269 | renderer that includes the container name in the output. 270 | 271 | ### Fixes 272 | 273 | - Temporarily skip two tests relying on SingularityHub -- it is down. 274 | 275 | # 0.4.0 (May 29, 2019) -- run-baby-run 276 | 277 | The minimum required DataLad version is now 0.11.5. 278 | 279 | ### New features 280 | 281 | - The call format gained the "{img_dspath}" placeholder, which expands 282 | to the relative path of the dataset that contains the image. This 283 | is useful for pointing to a wrapper script that is bundled in the 284 | same subdataset as a container. 285 | 286 | - `containers-run` now passes the container image to `run` via its 287 | `extra_inputs` argument so that a run command's "{inputs}" field is 288 | restricted to inputs that the caller explicitly specified. 289 | 290 | - During execution, `containers-run` now sets the environment variable 291 | `DATALAD_CONTAINER_NAME` to the name of the container. 292 | 293 | ### Fixes 294 | 295 | - `containers-run` mishandled paths when called from a subdirectory. 296 | 297 | - `containers-run` didn't provide an informative error message when 298 | `cmdexec` contained an unknown placeholder. 299 | 300 | - `containers-add` ignores the `--update` flag when the container 301 | doesn't yet exist, but it confusingly still used the word "update" 302 | in the commit message. 303 | 304 | # 0.3.1 (Mar 05, 2019) -- Upgrayeddd 305 | 306 | ### Fixes 307 | 308 | - `containers-list` recursion actually does recursion. 309 | 310 | 311 | # 0.3.0 (Mar 05, 2019) -- Upgrayedd 312 | 313 | ### API changes 314 | 315 | - `containers-list` no longer lists containers from subdatasets by 316 | default. Specify `--recursive` to do so. 317 | 318 | - `containers-run` no longer considers subdataset containers in its 319 | automatic selection of a container name when no name is specified. 320 | If the current dataset has one container, that container is 321 | selected. Subdataset containers must always be explicitly 322 | specified. 323 | 324 | ### New features 325 | 326 | - `containers-add` learned to update a previous container when passed 327 | `--update`. 328 | 329 | - `containers-add` now supports Singularity's "docker://" scheme in 330 | the URL. 331 | 332 | - To avoid unnecessary recursion into subdatasets, `containers-run` 333 | now decides to look for containers in subdatasets based on whether 334 | the name has a slash (which is true of all subdataset containers). 335 | 336 | # 0.2.2 (Dec 19, 2018) -- The more the merrier 337 | 338 | - list/use containers recursively from installed subdatasets 339 | - Allow to specify container by path rather than just by name 340 | - Adding a container from local filesystem will copy it now 341 | 342 | # 0.2.1 (Jul 14, 2018) -- Explicit lyrics 343 | 344 | - Add support `datalad run --explicit`. 345 | 346 | # 0.2 (Jun 08, 2018) -- Docker 347 | 348 | - Initial support for adding and running Docker containers. 349 | - Add support `datalad run --sidecar`. 350 | - Simplify storage of `call_fmt` arguments in the Git config, by benefiting 351 | from `datalad run` being able to work with single-string compound commands. 352 | 353 | # 0.1.2 (May 28, 2018) -- The docs 354 | 355 | - Basic beginner documentation 356 | 357 | # 0.1.1 (May 22, 2018) -- The fixes 358 | 359 | ### New features 360 | 361 | - Add container images straight from singularity-hub, no need to manually 362 | specify `--call-fmt` arguments. 363 | 364 | ### API changes 365 | 366 | - Use "name" instead of "label" for referring to a container (e.g. 367 | `containers-run -n ...` instead of `containers-run -l`. 368 | 369 | ### Fixes 370 | 371 | - Pass relative container path to `datalad run`. 372 | - `containers-run` no longer hides `datalad run` failures. 373 | 374 | # 0.1 (May 19, 2018) -- The Release 375 | 376 | - Initial release with basic functionality to add, remove, and list 377 | containers in a dataset, plus a `run` command wrapper that injects 378 | the container image as an input dependency of a command call. 379 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | The following people have contributed to this project: 2 | 3 | Benjamin Poldrack 4 | Kyle Meyer 5 | Michael Hanke 6 | Yaroslav Halchenko 7 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | # Main Copyright/License 2 | 3 | DataLad, including all examples, code snippets and attached 4 | documentation is covered by the MIT license. 5 | 6 | The MIT License 7 | 8 | Copyright (c) 2018- DataLad Team 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy 11 | of this software and associated documentation files (the "Software"), to deal 12 | in the Software without restriction, including without limitation the rights 13 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | copies of the Software, and to permit persons to whom the Software is 15 | furnished to do so, subject to the following conditions: 16 | 17 | The above copyright notice and this permission notice shall be included in 18 | all copies or substantial portions of the Software. 19 | 20 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 | THE SOFTWARE. 27 | 28 | See CONTRIBUTORS file for a full list of contributors. 29 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CONTRIBUTORS LICENSE versioneer.py 2 | graft _datalad_buildsupport 3 | graft docs 4 | prune docs/build 5 | global-exclude *.py[cod] 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON ?= python 2 | 3 | clean: 4 | $(PYTHON) setup.py clean 5 | rm -rf dist build bin docs/build docs/source/generated *.egg-info 6 | -find . -name '*.pyc' -delete 7 | -find . -name '__pycache__' -type d -delete 8 | 9 | release-pypi: 10 | # avoid upload of stale builds 11 | test ! -e dist 12 | $(PYTHON) setup.py sdist bdist_wheel 13 | twine upload dist/* 14 | 15 | update-buildsupport: 16 | git subtree pull \ 17 | -m "Update DataLad build helper" \ 18 | --squash \ 19 | --prefix _datalad_buildsupport \ 20 | https://github.com/datalad/datalad-buildsupport.git \ 21 | master 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ____ _ _ _ 2 | | _ \ __ _ | |_ __ _ | | __ _ __| | 3 | | | | | / _` || __| / _` || | / _` | / _` | 4 | | |_| || (_| || |_ | (_| || |___ | (_| || (_| | 5 | |____/ \__,_| \__| \__,_||_____| \__,_| \__,_| 6 | Container 7 | 8 | [![Build status](https://ci.appveyor.com/api/projects/status/k4eyq1yygcvwf7wk/branch/master?svg=true)](https://ci.appveyor.com/project/mih/datalad-container/branch/master) [![Travis tests status](https://app.travis-ci.com/datalad/datalad-container.svg?branch=master)](https://app.travis-ci.com/datalad/datalad-container) [![codecov.io](https://codecov.io/github/datalad/datalad-container/coverage.svg?branch=master)](https://codecov.io/github/datalad/datalad-container?branch=master) [![Documentation](https://readthedocs.org/projects/datalad-container/badge/?version=latest)](http://datalad-container.rtfd.org) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![GitHub release](https://img.shields.io/github/release/datalad/datalad-container.svg)](https://GitHub.com/datalad/datalad-container/releases/) [![PyPI version fury.io](https://badge.fury.io/py/datalad-container.svg)](https://pypi.python.org/pypi/datalad-container/) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3368666.svg)](https://doi.org/10.5281/zenodo.3368666) ![Conda](https://anaconda.org/conda-forge/datalad-container/badges/version.svg) 9 | 10 | This extension enhances DataLad (http://datalad.org) for working with 11 | computational containers. Please see the [extension 12 | documentation](http://datalad-container.rtfd.org) 13 | for a description on additional commands and functionality. 14 | 15 | For general information on how to use or contribute to DataLad (and this 16 | extension), please see the [DataLad website](http://datalad.org) or the 17 | [main GitHub project page](http://datalad.org). 18 | 19 | 20 | ## Installation 21 | 22 | Before you install this package, please make sure that you [install a recent 23 | version of git-annex](https://git-annex.branchable.com/install). Afterwards, 24 | install the latest version of `datalad-container` from 25 | [PyPi](https://pypi.org/project/datalad-container). It is recommended to use 26 | a dedicated [virtualenv](https://virtualenv.pypa.io): 27 | 28 | # create and enter a new virtual environment (optional) 29 | virtualenv --system-site-packages --python=python3 ~/env/datalad 30 | . ~/env/datalad/bin/activate 31 | 32 | # install from PyPi 33 | pip install datalad_container 34 | 35 | It is also available for conda package manager from conda-forge: 36 | 37 | conda install -c conda-forge datalad-container 38 | 39 | 40 | ## Support 41 | 42 | The documentation of this project is found here: 43 | http://docs.datalad.org/projects/container 44 | 45 | All bugs, concerns and enhancement requests for this software can be submitted here: 46 | https://github.com/datalad/datalad-container/issues 47 | 48 | If you have a problem or would like to ask a question about how to use DataLad, 49 | please [submit a question to 50 | NeuroStars.org](https://neurostars.org/tags/datalad) with a ``datalad`` tag. 51 | NeuroStars.org is a platform similar to StackOverflow but dedicated to 52 | neuroinformatics. 53 | 54 | All previous DataLad questions are available here: 55 | http://neurostars.org/tags/datalad/ 56 | 57 | ## Acknowledgements 58 | 59 | DataLad development is supported by a US-German collaboration in computational 60 | neuroscience (CRCNS) project "DataGit: converging catalogues, warehouses, and 61 | deployment logistics into a federated 'data distribution'" (Halchenko/Hanke), 62 | co-funded by the US National Science Foundation (NSF 1429999) and the German 63 | Federal Ministry of Education and Research (BMBF 01GQ1411). Additional support 64 | is provided by the German federal state of Saxony-Anhalt and the European 65 | Regional Development Fund (ERDF), Project: Center for Behavioral Brain 66 | Sciences, Imaging Platform. This work is further facilitated by the ReproNim 67 | project (NIH 1P41EB019936-01A1). 68 | -------------------------------------------------------------------------------- /_datalad_buildsupport/__init__.py: -------------------------------------------------------------------------------- 1 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 2 | # 3 | # See COPYING file distributed along with the DataLad package for the 4 | # copyright and license terms. 5 | # 6 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 7 | """Python package for functionality needed at package 'build' time by DataLad and its extensions 8 | 9 | __init__ here should be really minimalistic, not import submodules by default 10 | and submodules should also not require heavy dependencies. 11 | """ 12 | 13 | __version__ = '0.1' 14 | -------------------------------------------------------------------------------- /_datalad_buildsupport/formatters.py: -------------------------------------------------------------------------------- 1 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 2 | # 3 | # See COPYING file distributed along with the DataLad package for the 4 | # copyright and license terms. 5 | # 6 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 7 | 8 | import argparse 9 | import datetime 10 | import re 11 | 12 | 13 | class ManPageFormatter(argparse.HelpFormatter): 14 | # This code was originally distributed 15 | # under the same License of Python 16 | # Copyright (c) 2014 Oz Nahum Tiram 17 | def __init__(self, 18 | prog, 19 | indent_increment=2, 20 | max_help_position=4, 21 | width=1000000, 22 | section=1, 23 | ext_sections=None, 24 | authors=None, 25 | version=None 26 | ): 27 | 28 | super(ManPageFormatter, self).__init__( 29 | prog, 30 | indent_increment=indent_increment, 31 | max_help_position=max_help_position, 32 | width=width) 33 | 34 | self._prog = prog 35 | self._section = 1 36 | self._today = datetime.date.today().strftime('%Y\\-%m\\-%d') 37 | self._ext_sections = ext_sections 38 | self._version = version 39 | 40 | def _get_formatter(self, **kwargs): 41 | return self.formatter_class(prog=self.prog, **kwargs) 42 | 43 | def _markup(self, txt): 44 | return txt.replace('-', '\\-') 45 | 46 | def _underline(self, string): 47 | return "\\fI\\s-1" + string + "\\s0\\fR" 48 | 49 | def _bold(self, string): 50 | if not string.strip().startswith('\\fB'): 51 | string = '\\fB' + string 52 | if not string.strip().endswith('\\fR'): 53 | string = string + '\\fR' 54 | return string 55 | 56 | def _mk_synopsis(self, parser): 57 | self.add_usage(parser.usage, parser._actions, 58 | parser._mutually_exclusive_groups, prefix='') 59 | usage = self._format_usage(None, parser._actions, 60 | parser._mutually_exclusive_groups, '') 61 | # replace too long list of commands with a single placeholder 62 | usage = re.sub(r'{[^]]*?create,.*?}', ' COMMAND ', usage, flags=re.MULTILINE) 63 | # take care of proper wrapping 64 | usage = re.sub(r'\[([-a-zA-Z0-9]*)\s([a-zA-Z0-9{}|_]*)\]', r'[\1\~\2]', usage) 65 | 66 | usage = usage.replace('%s ' % self._prog, '') 67 | usage = '.SH SYNOPSIS\n.nh\n.HP\n\\fB%s\\fR %s\n.hy\n' % (self._markup(self._prog), 68 | usage) 69 | return usage 70 | 71 | def _mk_title(self, prog): 72 | name_version = "{0} {1}".format(prog, self._version) 73 | return '.TH "{0}" "{1}" "{2}" "{3}"\n'.format( 74 | prog, self._section, self._today, name_version) 75 | 76 | def _mk_name(self, prog, desc): 77 | """ 78 | this method is in consistent with others ... it relies on 79 | distribution 80 | """ 81 | desc = desc.splitlines()[0] if desc else 'it is in the name' 82 | # ensure starting lower case 83 | desc = desc[0].lower() + desc[1:] 84 | return '.SH NAME\n%s \\- %s\n' % (self._bold(prog), desc) 85 | 86 | def _mk_description(self, parser): 87 | desc = parser.description 88 | desc = '\n'.join(desc.splitlines()[1:]) 89 | if not desc: 90 | return '' 91 | desc = desc.replace('\n\n', '\n.PP\n') 92 | # sub-section headings 93 | desc = re.sub(r'^\*(.*)\*$', r'.SS \1', desc, flags=re.MULTILINE) 94 | # italic commands 95 | desc = re.sub(r'^ ([-a-z]*)$', r'.TP\n\\fI\1\\fR', desc, flags=re.MULTILINE) 96 | # deindent body text, leave to troff viewer 97 | desc = re.sub(r'^ (\S.*)\n', '\\1\n', desc, flags=re.MULTILINE) 98 | # format NOTEs as indented paragraphs 99 | desc = re.sub(r'^NOTE\n', '.TP\nNOTE\n', desc, flags=re.MULTILINE) 100 | # deindent indented paragraphs after heading setup 101 | desc = re.sub(r'^ (.*)$', '\\1', desc, flags=re.MULTILINE) 102 | 103 | return '.SH DESCRIPTION\n%s\n' % self._markup(desc) 104 | 105 | def _mk_footer(self, sections): 106 | if not hasattr(sections, '__iter__'): 107 | return '' 108 | 109 | footer = [] 110 | for section, value in sections.items(): 111 | part = ".SH {}\n {}".format(section.upper(), value) 112 | footer.append(part) 113 | 114 | return '\n'.join(footer) 115 | 116 | def format_man_page(self, parser): 117 | page = [] 118 | page.append(self._mk_title(self._prog)) 119 | page.append(self._mk_name(self._prog, parser.description)) 120 | page.append(self._mk_synopsis(parser)) 121 | page.append(self._mk_description(parser)) 122 | page.append(self._mk_options(parser)) 123 | page.append(self._mk_footer(self._ext_sections)) 124 | 125 | return ''.join(page) 126 | 127 | def _mk_options(self, parser): 128 | 129 | formatter = parser._get_formatter() 130 | 131 | # positionals, optionals and user-defined groups 132 | for action_group in parser._action_groups: 133 | formatter.start_section(None) 134 | formatter.add_text(None) 135 | formatter.add_arguments(action_group._group_actions) 136 | formatter.end_section() 137 | 138 | # epilog 139 | formatter.add_text(parser.epilog) 140 | 141 | # determine help from format above 142 | help = formatter.format_help() 143 | # add spaces after comma delimiters for easier reformatting 144 | help = re.sub(r'([a-z]),([a-z])', '\\1, \\2', help) 145 | # get proper indentation for argument items 146 | help = re.sub(r'^ (\S.*)\n', '.TP\n\\1\n', help, flags=re.MULTILINE) 147 | # deindent body text, leave to troff viewer 148 | help = re.sub(r'^ (\S.*)\n', '\\1\n', help, flags=re.MULTILINE) 149 | return '.SH OPTIONS\n' + help 150 | 151 | def _format_action_invocation(self, action, doubledash='--'): 152 | if not action.option_strings: 153 | metavar, = self._metavar_formatter(action, action.dest)(1) 154 | return metavar 155 | 156 | else: 157 | parts = [] 158 | 159 | # if the Optional doesn't take a value, format is: 160 | # -s, --long 161 | if action.nargs == 0: 162 | parts.extend([self._bold(action_str) for action_str in 163 | action.option_strings]) 164 | 165 | # if the Optional takes a value, format is: 166 | # -s ARGS, --long ARGS 167 | else: 168 | default = self._underline(action.dest.upper()) 169 | args_string = self._format_args(action, default) 170 | for option_string in action.option_strings: 171 | parts.append('%s %s' % (self._bold(option_string), 172 | args_string)) 173 | 174 | return ', '.join(p.replace('--', doubledash) for p in parts) 175 | 176 | 177 | class RSTManPageFormatter(ManPageFormatter): 178 | def _get_formatter(self, **kwargs): 179 | return self.formatter_class(prog=self.prog, **kwargs) 180 | 181 | def _markup(self, txt): 182 | # put general tune-ups here 183 | return txt 184 | 185 | def _underline(self, string): 186 | return "*{0}*".format(string) 187 | 188 | def _bold(self, string): 189 | return "**{0}**".format(string) 190 | 191 | def _mk_synopsis(self, parser): 192 | self.add_usage(parser.usage, parser._actions, 193 | parser._mutually_exclusive_groups, prefix='') 194 | usage = self._format_usage(None, parser._actions, 195 | parser._mutually_exclusive_groups, '') 196 | 197 | usage = usage.replace('%s ' % self._prog, '') 198 | usage = 'Synopsis\n--------\n::\n\n %s %s\n' \ 199 | % (self._markup(self._prog), usage) 200 | return usage 201 | 202 | def _mk_title(self, prog): 203 | # and an easy to use reference point 204 | title = ".. _man_%s:\n\n" % prog.replace(' ', '-') 205 | title += "{0}".format(prog) 206 | title += '\n{0}\n\n'.format('=' * len(prog)) 207 | return title 208 | 209 | def _mk_name(self, prog, desc): 210 | return '' 211 | 212 | def _mk_description(self, parser): 213 | desc = parser.description 214 | if not desc: 215 | return '' 216 | return 'Description\n-----------\n%s\n' % self._markup(desc) 217 | 218 | def _mk_footer(self, sections): 219 | if not hasattr(sections, '__iter__'): 220 | return '' 221 | 222 | footer = [] 223 | for section, value in sections.items(): 224 | part = "\n{0}\n{1}\n{2}\n".format( 225 | section, 226 | '-' * len(section), 227 | value) 228 | footer.append(part) 229 | 230 | return '\n'.join(footer) 231 | 232 | def _mk_options(self, parser): 233 | 234 | # this non-obvious maneuver is really necessary! 235 | formatter = self.__class__(self._prog) 236 | 237 | # positionals, optionals and user-defined groups 238 | for action_group in parser._action_groups: 239 | formatter.start_section(None) 240 | formatter.add_text(None) 241 | formatter.add_arguments(action_group._group_actions) 242 | formatter.end_section() 243 | 244 | # epilog 245 | formatter.add_text(parser.epilog) 246 | 247 | # determine help from format above 248 | option_sec = formatter.format_help() 249 | 250 | return '\n\nOptions\n-------\n{0}'.format(option_sec) 251 | 252 | def _format_action(self, action): 253 | # determine the required width and the entry label 254 | action_header = self._format_action_invocation(action, doubledash='-\\\\-') 255 | 256 | if action.help: 257 | help_text = self._expand_help(action) 258 | help_lines = self._split_lines(help_text, 80) 259 | help = ' '.join(help_lines) 260 | else: 261 | help = '' 262 | 263 | # return a single string 264 | return '{0}\n{1}\n{2}\n\n'.format( 265 | action_header, 266 | 267 | '~' * len(action_header), 268 | help) 269 | 270 | 271 | def cmdline_example_to_rst(src, out=None, ref=None): 272 | if out is None: 273 | from io import StringIO 274 | out = StringIO() 275 | 276 | # place header 277 | out.write('.. AUTO-GENERATED FILE -- DO NOT EDIT!\n\n') 278 | if ref: 279 | # place cross-ref target 280 | out.write('.. {0}:\n\n'.format(ref)) 281 | 282 | # parser status vars 283 | inexample = False 284 | incodeblock = False 285 | 286 | for line in src: 287 | if line.startswith('#% EXAMPLE START'): 288 | inexample = True 289 | incodeblock = False 290 | continue 291 | if not inexample: 292 | continue 293 | if line.startswith('#% EXAMPLE END'): 294 | break 295 | if not inexample: 296 | continue 297 | if line.startswith('#%'): 298 | incodeblock = not incodeblock 299 | if incodeblock: 300 | out.write('\n.. code-block:: sh\n\n') 301 | continue 302 | if not incodeblock and line.startswith('#'): 303 | out.write(line[(min(2, len(line) - 1)):]) 304 | continue 305 | if incodeblock: 306 | if not line.rstrip().endswith('#% SKIP'): 307 | out.write(' %s' % line) 308 | continue 309 | if not len(line.strip()): 310 | continue 311 | else: 312 | raise RuntimeError("this should not happen") 313 | 314 | return out 315 | -------------------------------------------------------------------------------- /_datalad_buildsupport/setup.py: -------------------------------------------------------------------------------- 1 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 2 | # 3 | # See COPYING file distributed along with the DataLad package for the 4 | # copyright and license terms. 5 | # 6 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 7 | 8 | 9 | import datetime 10 | import os 11 | from os.path import ( 12 | dirname, 13 | join as opj, 14 | ) 15 | from setuptools import Command 16 | from setuptools.config import read_configuration 17 | from setuptools.errors import OptionError 18 | 19 | import versioneer 20 | 21 | from . import formatters as fmt 22 | 23 | 24 | class BuildManPage(Command): 25 | # The BuildManPage code was originally distributed 26 | # under the same License of Python 27 | # Copyright (c) 2014 Oz Nahum Tiram 28 | 29 | description = 'Generate man page from an ArgumentParser instance.' 30 | 31 | user_options = [ 32 | ('manpath=', None, 33 | 'output path for manpages (relative paths are relative to the ' 34 | 'datalad package)'), 35 | ('rstpath=', None, 36 | 'output path for RST files (relative paths are relative to the ' 37 | 'datalad package)'), 38 | ('parser=', None, 'module path to an ArgumentParser instance' 39 | '(e.g. mymod:func, where func is a method or function which return' 40 | 'a dict with one or more arparse.ArgumentParser instances.'), 41 | ('cmdsuite=', None, 'module path to an extension command suite ' 42 | '(e.g. mymod:command_suite) to limit the build to the contained ' 43 | 'commands.'), 44 | ] 45 | 46 | def initialize_options(self): 47 | self.manpath = opj('build', 'man') 48 | self.rstpath = opj('docs', 'source', 'generated', 'man') 49 | self.parser = 'datalad.cmdline.main:setup_parser' 50 | self.cmdsuite = None 51 | 52 | def finalize_options(self): 53 | if self.manpath is None: 54 | raise OptionError('\'manpath\' option is required') 55 | if self.rstpath is None: 56 | raise OptionError('\'rstpath\' option is required') 57 | if self.parser is None: 58 | raise OptionError('\'parser\' option is required') 59 | mod_name, func_name = self.parser.split(':') 60 | fromlist = mod_name.split('.') 61 | try: 62 | mod = __import__(mod_name, fromlist=fromlist) 63 | self._parser = getattr(mod, func_name)( 64 | ['datalad'], 65 | formatter_class=fmt.ManPageFormatter, 66 | return_subparsers=True, 67 | # ignore extensions only for the main package to avoid pollution 68 | # with all extension commands that happen to be installed 69 | help_ignore_extensions=self.distribution.get_name() == 'datalad') 70 | 71 | except ImportError as err: 72 | raise err 73 | if self.cmdsuite: 74 | mod_name, suite_name = self.cmdsuite.split(':') 75 | mod = __import__(mod_name, fromlist=mod_name.split('.')) 76 | suite = getattr(mod, suite_name) 77 | self.cmdlist = [c[2] if len(c) > 2 else c[1].replace('_', '-').lower() 78 | for c in suite[1]] 79 | 80 | self.announce('Writing man page(s) to %s' % self.manpath) 81 | self._today = datetime.date.today() 82 | 83 | @classmethod 84 | def handle_module(cls, mod_name, **kwargs): 85 | """Module specific handling. 86 | 87 | This particular one does 88 | 1. Memorize (at class level) the module name of interest here 89 | 2. Check if 'datalad.extensions' are specified for the module, 90 | and then analyzes them to obtain command names it provides 91 | 92 | If cmdline commands are found, its entries are to be used instead of 93 | the ones in datalad's _parser. 94 | 95 | Parameters 96 | ---------- 97 | **kwargs: 98 | all the kwargs which might be provided to setuptools.setup 99 | """ 100 | cls.mod_name = mod_name 101 | 102 | exts = kwargs.get('entry_points', {}).get('datalad.extensions', []) 103 | for ext in exts: 104 | assert '=' in ext # should be label=module:obj 105 | ext_label, mod_obj = ext.split('=', 1) 106 | assert ':' in mod_obj # should be module:obj 107 | mod, obj = mod_obj.split(':', 1) 108 | assert mod_name == mod # AFAIK should be identical 109 | 110 | mod = __import__(mod_name) 111 | if hasattr(mod, obj): 112 | command_suite = getattr(mod, obj) 113 | assert len(command_suite) == 2 # as far as I see it 114 | if not hasattr(cls, 'cmdline_names'): 115 | cls.cmdline_names = [] 116 | cls.cmdline_names += [ 117 | cmd 118 | for _, _, cmd, _ in command_suite[1] 119 | ] 120 | 121 | def run(self): 122 | 123 | dist = self.distribution 124 | #homepage = dist.get_url() 125 | #appname = self._parser.prog 126 | appname = 'datalad' 127 | 128 | cfg = read_configuration( 129 | opj(dirname(dirname(__file__)), 'setup.cfg'))['metadata'] 130 | 131 | sections = { 132 | 'Authors': """{0} is developed by {1} <{2}>.""".format( 133 | appname, cfg['author'], cfg['author_email']), 134 | } 135 | 136 | for cls, opath, ext in ((fmt.ManPageFormatter, self.manpath, '1'), 137 | (fmt.RSTManPageFormatter, self.rstpath, 'rst')): 138 | if not os.path.exists(opath): 139 | os.makedirs(opath) 140 | for cmdname in getattr(self, 'cmdline_names', list(self._parser)): 141 | if hasattr(self, 'cmdlist') and cmdname not in self.cmdlist: 142 | continue 143 | p = self._parser[cmdname] 144 | cmdname = "{0}{1}".format( 145 | 'datalad ' if cmdname != 'datalad' else '', 146 | cmdname) 147 | format = cls( 148 | cmdname, 149 | ext_sections=sections, 150 | version=versioneer.get_version()) 151 | formatted = format.format_man_page(p) 152 | with open(opj(opath, '{0}.{1}'.format( 153 | cmdname.replace(' ', '-'), 154 | ext)), 155 | 'w') as f: 156 | f.write(formatted) 157 | 158 | 159 | class BuildRSTExamplesFromScripts(Command): 160 | description = 'Generate RST variants of example shell scripts.' 161 | 162 | user_options = [ 163 | ('expath=', None, 'path to look for example scripts'), 164 | ('rstpath=', None, 'output path for RST files'), 165 | ] 166 | 167 | def initialize_options(self): 168 | self.expath = opj('docs', 'examples') 169 | self.rstpath = opj('docs', 'source', 'generated', 'examples') 170 | 171 | def finalize_options(self): 172 | if self.expath is None: 173 | raise OptionError('\'expath\' option is required') 174 | if self.rstpath is None: 175 | raise OptionError('\'rstpath\' option is required') 176 | self.announce('Converting example scripts') 177 | 178 | def run(self): 179 | opath = self.rstpath 180 | if not os.path.exists(opath): 181 | os.makedirs(opath) 182 | 183 | from glob import glob 184 | for example in glob(opj(self.expath, '*.sh')): 185 | exname = os.path.basename(example)[:-3] 186 | with open(opj(opath, '{0}.rst'.format(exname)), 'w') as out: 187 | fmt.cmdline_example_to_rst( 188 | open(example), 189 | out=out, 190 | ref='_example_{0}'.format(exname)) 191 | 192 | 193 | class BuildConfigInfo(Command): 194 | description = 'Generate RST documentation for all config items.' 195 | 196 | user_options = [ 197 | ('rstpath=', None, 'output path for RST file'), 198 | ] 199 | 200 | def initialize_options(self): 201 | self.rstpath = opj('docs', 'source', 'generated', 'cfginfo') 202 | 203 | def finalize_options(self): 204 | if self.rstpath is None: 205 | raise OptionError('\'rstpath\' option is required') 206 | self.announce('Generating configuration documentation') 207 | 208 | def run(self): 209 | opath = self.rstpath 210 | if not os.path.exists(opath): 211 | os.makedirs(opath) 212 | 213 | from datalad.dochelpers import _indent 214 | from datalad.interface.common_cfg import definitions as cfgdefs 215 | 216 | categories = { 217 | 'global': {}, 218 | 'local': {}, 219 | 'dataset': {}, 220 | 'misc': {} 221 | } 222 | for term, v in cfgdefs.items(): 223 | categories[v.get('destination', 'misc')][term] = v 224 | 225 | for cat in categories: 226 | with open(opj(opath, '{}.rst.in'.format(cat)), 'w') as rst: 227 | rst.write('.. glossary::\n') 228 | for term, v in sorted(categories[cat].items(), key=lambda x: x[0]): 229 | rst.write(_indent(term, '\n ')) 230 | qtype, docs = v.get('ui', (None, {})) 231 | desc_tmpl = '\n' 232 | if 'title' in docs: 233 | desc_tmpl += '{title}:\n' 234 | if 'text' in docs: 235 | desc_tmpl += '{text}\n' 236 | if 'default' in v: 237 | default = v['default'] 238 | if hasattr(default, 'replace'): 239 | # protect against leaking specific home dirs 240 | v['default'] = default.replace(os.path.expanduser('~'), '~') 241 | desc_tmpl += 'Default: {default}\n' 242 | if 'type' in v: 243 | type_ = v['type'] 244 | if hasattr(type_, 'long_description'): 245 | type_ = type_.long_description() 246 | else: 247 | type_ = type_.__name__ 248 | desc_tmpl += '\n[{type}]\n' 249 | v['type'] = type_ 250 | if desc_tmpl == '\n': 251 | # we need something to avoid joining terms 252 | desc_tmpl += 'undocumented\n' 253 | v.update(docs) 254 | rst.write(_indent(desc_tmpl.format(**v), ' ')) 255 | -------------------------------------------------------------------------------- /changelog.d/scriv.ini: -------------------------------------------------------------------------------- 1 | [scriv] 2 | fragment_directory = changelog.d 3 | entry_title_template = file: changelog.d/templates/entry_title.md.j2 4 | new_fragment_template = file: changelog.d/templates/new_fragment.md.j2 5 | format = md 6 | categories = 💥 Breaking Changes, 🚀 Enhancements and New Features, 🐛 Bug Fixes, 🔩 Dependencies, 📝 Documentation, 🏠 Internal, 🏎 Performance, 🧪 Tests 7 | -------------------------------------------------------------------------------- /changelog.d/templates/entry_title.md.j2: -------------------------------------------------------------------------------- 1 | {{ version if version else "VERSION" }} ({{ date.strftime('%Y-%m-%d') }}) 2 | -------------------------------------------------------------------------------- /changelog.d/templates/new_fragment.md.j2: -------------------------------------------------------------------------------- 1 | 2 | {% for cat in config.categories -%} 3 | 10 | {% endfor -%} 11 | -------------------------------------------------------------------------------- /datalad_container/__init__.py: -------------------------------------------------------------------------------- 1 | """DataLad container extension""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | # Imported to set singularity/apptainer version commands at init 6 | import datalad_container.extractors._load_singularity_versions # noqa 7 | 8 | # defines a datalad command suite 9 | # this symbold must be identified as a setuptools entrypoint 10 | # to be found by datalad 11 | command_suite = ( 12 | # description of the command suite, displayed in cmdline help 13 | "Containerized environments", 14 | [ 15 | # specification of a command, any number of commands can be defined 16 | ( 17 | # importable module that contains the command implementation 18 | 'datalad_container.containers_list', 19 | # name of the command class implementation in above module 20 | 'ContainersList', 21 | 'containers-list', 22 | 'containers_list', 23 | ), 24 | ( 25 | 'datalad_container.containers_remove', 26 | # name of the command class implementation in above module 27 | 'ContainersRemove', 28 | 'containers-remove', 29 | 'containers_remove', 30 | 31 | ), 32 | ( 33 | 'datalad_container.containers_add', 34 | # name of the command class implementation in above module 35 | 'ContainersAdd', 36 | 'containers-add', 37 | 'containers_add', 38 | 39 | ), 40 | ( 41 | 'datalad_container.containers_run', 42 | 'ContainersRun', 43 | 'containers-run', 44 | 'containers_run', 45 | 46 | ) 47 | ] 48 | ) 49 | 50 | from os.path import join as opj 51 | 52 | from datalad.support.constraints import EnsureStr 53 | from datalad.support.extensions import register_config 54 | 55 | register_config( 56 | 'datalad.containers.location', 57 | 'Container location', 58 | description='path within the dataset where to store containers', 59 | type=EnsureStr(), 60 | default=opj(".datalad", "environments"), 61 | dialog='question', 62 | scope='dataset', 63 | ) 64 | 65 | from . import _version 66 | 67 | __version__ = _version.get_versions()['version'] 68 | -------------------------------------------------------------------------------- /datalad_container/adapters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalad/datalad-container/e9bba2a6566abf0e0a788dc06c56a468f8400d1f/datalad_container/adapters/__init__.py -------------------------------------------------------------------------------- /datalad_container/adapters/docker.py: -------------------------------------------------------------------------------- 1 | """Work with Docker images as local paths. 2 | 3 | This module provides support for saving a Docker image in a local directory and 4 | then loading it on-the-fly before calling `docker run ...`. The motivation for 5 | this is that it allows the components of an image to be tracked as objects in a 6 | DataLad dataset. 7 | 8 | Run `python -m datalad_container.adapters.docker --help` for details about the 9 | command-line interface. 10 | """ 11 | 12 | import hashlib 13 | import json 14 | import logging 15 | import os 16 | import os.path as op 17 | import subprocess as sp 18 | import sys 19 | import tarfile 20 | import tempfile 21 | 22 | from datalad.utils import on_windows 23 | 24 | lgr = logging.getLogger("datalad.containers.adapters.docker") 25 | 26 | # Note: A dockerpy dependency probably isn't worth it in the current 27 | # state but is worth thinking about if this module gets more 28 | # complicated. 29 | 30 | # FIXME: These functions assume that there is a "docker" on the path 31 | # that can be managed by a non-root user. At the least, this should 32 | # be documented somewhere. 33 | 34 | 35 | def save(image, path): 36 | """Save and extract a docker image to a directory. 37 | 38 | Parameters 39 | ---------- 40 | image : str 41 | A unique identifier for a docker image. 42 | path : str 43 | A directory to extract the image to. 44 | """ 45 | # Use a temporary file because docker save (or actually tar underneath) 46 | # complains that stdout needs to be redirected if we use Popen and PIPE. 47 | if ":" not in image: 48 | image = f"{image}:latest" 49 | with tempfile.NamedTemporaryFile() as stream: 50 | # Windows can't write to an already opened file 51 | stream.close() 52 | sp.check_call(["docker", "save", "-o", stream.name, image]) 53 | with tarfile.open(stream.name, mode="r:") as tar: 54 | if not op.exists(path): 55 | lgr.debug("Creating new directory at %s", path) 56 | os.makedirs(path) 57 | elif os.listdir(path): 58 | raise OSError("Directory {} is not empty".format(path)) 59 | def is_within_directory(directory, target): 60 | 61 | abs_directory = os.path.abspath(directory) 62 | abs_target = os.path.abspath(target) 63 | 64 | prefix = os.path.commonprefix([abs_directory, abs_target]) 65 | 66 | return prefix == abs_directory 67 | 68 | def safe_extract(tar, path=".", members=None, *, numeric_owner=False): 69 | 70 | for member in tar.getmembers(): 71 | member_path = os.path.join(path, member.name) 72 | if not is_within_directory(path, member_path): 73 | raise Exception("Attempted Path Traversal in Tar File") 74 | 75 | tar.extractall(path, members, numeric_owner=numeric_owner) 76 | 77 | 78 | safe_extract(tar, path=path) 79 | lgr.info("Saved %s to %s", image, path) 80 | 81 | 82 | def _list_images(): 83 | out = sp.check_output( 84 | ["docker", "images", "--all", "--quiet", "--no-trunc"]) 85 | return out.decode().splitlines() 86 | 87 | 88 | def get_image(path, repo_tag=None, config=None): 89 | """Return the image ID of the image extracted at `path`. 90 | """ 91 | manifest_path = op.join(path, "manifest.json") 92 | with open(manifest_path) as fp: 93 | manifest = json.load(fp) 94 | if repo_tag is not None: 95 | manifest = [img for img in manifest if repo_tag in (img.get("RepoTags") or [])] 96 | if config is not None: 97 | manifest = [img for img in manifest if img["Config"].startswith(config)] 98 | if len(manifest) == 0: 99 | raise ValueError(f"No matching images found in {manifest_path}") 100 | elif len(manifest) > 1: 101 | raise ValueError( 102 | f"Multiple images found in {manifest_path}; disambiguate with" 103 | " --repo-tag or --config" 104 | ) 105 | 106 | with open(op.join(path, manifest[0]["Config"]), "rb") as stream: 107 | return hashlib.sha256(stream.read()).hexdigest() 108 | 109 | 110 | def load(path, repo_tag, config): 111 | """Load the Docker image from `path`. 112 | 113 | Parameters 114 | ---------- 115 | path : str 116 | A directory with an extracted tar archive. 117 | repo_tag : str or None 118 | `image:tag` of image to load 119 | config : str or None 120 | "Config" value or prefix of image to load 121 | 122 | Returns 123 | ------- 124 | The image ID (str) 125 | """ 126 | # FIXME: If we load a dataset, it may overwrite the current tag. Say that 127 | # (1) a dataset has a saved neurodebian:latest from a month ago, (2) a 128 | # newer neurodebian:latest has been pulled, and (3) the old image have been 129 | # deleted (e.g., with 'docker image prune --all'). Given all three of these 130 | # things, loading the image from the dataset will tag the old neurodebian 131 | # image as the latest. 132 | image_id = "sha256:" + get_image(path, repo_tag, config) 133 | if image_id not in _list_images(): 134 | lgr.debug("Loading %s", image_id) 135 | cmd = ["docker", "load"] 136 | p = sp.Popen(cmd, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE) 137 | with tarfile.open(fileobj=p.stdin, mode="w|", dereference=True) as tar: 138 | tar.add(path, arcname="") 139 | out, err = p.communicate() 140 | return_code = p.poll() 141 | if return_code: 142 | lgr.warning("Running %r failed: %s", cmd, err.decode()) 143 | raise sp.CalledProcessError(return_code, cmd, output=out) 144 | else: 145 | lgr.debug("Image %s is already present", image_id) 146 | 147 | if image_id not in _list_images(): 148 | raise RuntimeError( 149 | "docker image {} was not successfully loaded".format(image_id)) 150 | return image_id 151 | 152 | 153 | # Command-line 154 | 155 | 156 | def cli_save(namespace): 157 | save(namespace.image, namespace.path) 158 | 159 | 160 | def cli_run(namespace): 161 | image_id = load(namespace.path, namespace.repo_tag, namespace.config) 162 | prefix = ["docker", "run", 163 | # FIXME: The -v/-w settings are convenient for testing, but they 164 | # should be configurable. 165 | "-v", "{}:/tmp".format(os.getcwd()), 166 | "-w", "/tmp", 167 | "--rm", 168 | "--interactive"] 169 | if not on_windows: 170 | # Make it possible for the output files to be added to the 171 | # dataset without the user needing to manually adjust the 172 | # permissions. 173 | prefix.extend(["-u", "{}:{}".format(os.getuid(), os.getgid())]) 174 | 175 | if sys.stdin.isatty(): 176 | prefix.append("--tty") 177 | prefix.append(image_id) 178 | cmd = prefix + namespace.cmd 179 | lgr.debug("Running %r", cmd) 180 | sp.check_call(cmd) 181 | 182 | 183 | def main(args): 184 | import argparse 185 | 186 | parser = argparse.ArgumentParser( 187 | prog="python -m datalad_container.adapters.docker", 188 | description="Work with Docker images as local paths") 189 | parser.add_argument( 190 | "-v", "--verbose", 191 | action="store_true") 192 | 193 | subparsers = parser.add_subparsers(title="subcommands") 194 | # Don't continue without a subcommand. 195 | subparsers.required = True 196 | subparsers.dest = "command" 197 | 198 | parser_save = subparsers.add_parser( 199 | "save", 200 | help="save and extract a Docker image to a directory") 201 | parser_save.add_argument( 202 | "image", metavar="NAME", 203 | help="image to save") 204 | parser_save.add_argument( 205 | "path", metavar="PATH", 206 | help="directory to save image in") 207 | parser_save.set_defaults(func=cli_save) 208 | # TODO: Add command for updating an archive directory. 209 | 210 | parser_run = subparsers.add_parser( 211 | "run", 212 | help="run a command with a directory's image") 213 | parser_run.add_argument( 214 | "--repo-tag", metavar="IMAGE:TAG", help="Tag of image to load" 215 | ) 216 | parser_run.add_argument( 217 | "--config", 218 | metavar="IDPREFIX", 219 | help="Config value or prefix of image to load" 220 | ) 221 | parser_run.add_argument( 222 | "path", metavar="PATH", 223 | help="run the image in this directory") 224 | parser_run.add_argument( 225 | "cmd", metavar="CMD", nargs=argparse.REMAINDER, 226 | help="command to execute") 227 | parser_run.set_defaults(func=cli_run) 228 | 229 | namespace = parser.parse_args(args[1:]) 230 | 231 | logging.basicConfig( 232 | level=logging.DEBUG if namespace.verbose else logging.INFO, 233 | format="%(message)s") 234 | 235 | namespace.func(namespace) 236 | 237 | 238 | if __name__ == "__main__": 239 | try: 240 | main(sys.argv) 241 | except Exception as exc: 242 | lgr.exception("Failed to execute %s", sys.argv) 243 | if isinstance(exc, sp.CalledProcessError): 244 | excode = exc.returncode 245 | else: 246 | excode = 1 247 | sys.exit(excode) 248 | -------------------------------------------------------------------------------- /datalad_container/adapters/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalad/datalad-container/e9bba2a6566abf0e0a788dc06c56a468f8400d1f/datalad_container/adapters/tests/__init__.py -------------------------------------------------------------------------------- /datalad_container/adapters/tests/test_docker.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os.path as op 3 | import sys 4 | from shutil import ( 5 | unpack_archive, 6 | which, 7 | ) 8 | 9 | import pytest 10 | from datalad.cmd import ( 11 | StdOutCapture, 12 | WitlessRunner, 13 | ) 14 | from datalad.support.exceptions import CommandError 15 | from datalad.tests.utils_pytest import ( 16 | SkipTest, 17 | assert_in, 18 | assert_raises, 19 | eq_, 20 | ok_exists, 21 | with_tempfile, 22 | with_tree, 23 | ) 24 | 25 | import datalad_container.adapters.docker as da 26 | 27 | if not which("docker"): 28 | raise SkipTest("'docker' not found on path") 29 | 30 | 31 | def call(args, **kwds): 32 | return WitlessRunner().run( 33 | [sys.executable, "-m", "datalad_container.adapters.docker"] + args, 34 | **kwds) 35 | 36 | 37 | def list_images(args): 38 | cmd = ["docker", "images", "--quiet", "--no-trunc"] + args 39 | res = WitlessRunner().run(cmd, protocol=StdOutCapture) 40 | return res["stdout"].strip().split() 41 | 42 | 43 | def images_exist(args): 44 | return bool(list_images(args)) 45 | 46 | 47 | @with_tempfile 48 | def test_docker_save_doesnt_exist(path=None): 49 | image_name = "idonotexistsurely" 50 | if images_exist([image_name]): 51 | raise SkipTest("Image wasn't supposed to exist, but does: {}" 52 | .format(image_name)) 53 | with assert_raises(CommandError): 54 | call(["save", image_name, path]) 55 | 56 | 57 | class TestAdapterBusyBox(object): 58 | 59 | @classmethod 60 | def setup_class(cls): 61 | cls.image_name = "busybox:latest" 62 | if images_exist([cls.image_name]): 63 | cls.image_existed = True 64 | else: 65 | cls.image_existed = False 66 | try: 67 | WitlessRunner().run(["docker", "pull", cls.image_name]) 68 | except CommandError: 69 | # This is probably due to rate limiting. 70 | raise SkipTest("Plain `docker pull` failed; skipping") 71 | 72 | @classmethod 73 | def teardown_class(cls): 74 | if not cls.image_existed and images_exist([cls.image_name]): 75 | WitlessRunner().run(["docker", "rmi", cls.image_name]) 76 | 77 | @with_tempfile(mkdir=True) 78 | def test_save_and_run(self, path=None): 79 | image_dir = op.join(path, "image") 80 | call(["save", self.image_name, image_dir]) 81 | ok_exists(op.join(image_dir, "manifest.json")) 82 | img_ids = list_images([self.image_name]) 83 | assert len(img_ids) == 1 84 | eq_("sha256:" + da.get_image(image_dir), 85 | img_ids[0]) 86 | 87 | if not self.image_existed: 88 | WitlessRunner().run(["docker", "rmi", self.image_name]) 89 | 90 | out = call(["run", image_dir, "ls"], cwd=path, 91 | protocol=StdOutCapture) 92 | 93 | assert images_exist([self.image_name]) 94 | assert_in("image", out["stdout"]) 95 | 96 | @with_tree({"foo": "content"}) 97 | def test_containers_run(self, path=None): 98 | if self.image_existed: 99 | raise SkipTest( 100 | "Not pulling with containers-run due to existing image: {}" 101 | .format(self.image_name)) 102 | 103 | from datalad.api import Dataset 104 | ds = Dataset(path).create(force=True) 105 | ds.save(path="foo") 106 | ds.containers_add("bb", url="dhub://" + self.image_name) 107 | 108 | out = WitlessRunner(cwd=ds.path).run( 109 | ["datalad", "containers-run", "-n", "bb", "cat foo"], 110 | protocol=StdOutCapture) 111 | assert_in("content", out["stdout"]) 112 | 113 | # Data can be received on stdin. 114 | with (ds.pathobj / "foo").open() as ifh: 115 | out = WitlessRunner(cwd=ds.path).run( 116 | ["datalad", "containers-run", "-n", "bb", "cat"], 117 | protocol=StdOutCapture, 118 | stdin=ifh) 119 | assert_in("content", out["stdout"]) 120 | 121 | 122 | def test_load_multi_image(tmp_path): 123 | for v in ["3.15", "3.16", "3.17"]: 124 | WitlessRunner().run(["docker", "pull", f"alpine:{v}"]) 125 | WitlessRunner().run(["docker", "save", "alpine", "-o", str(tmp_path / "alpine.tar")]) 126 | unpack_archive(tmp_path / "alpine.tar", tmp_path / "alpine") 127 | with pytest.raises(CommandError): 128 | call(["run", str(tmp_path / "alpine"), "ls"]) 129 | call(["run", "--repo-tag", "alpine:3.16", str(tmp_path / "alpine"), "ls"]) 130 | 131 | 132 | def test_save_multi_image(tmp_path): 133 | for v in ["3.15", "3.16", "latest"]: 134 | WitlessRunner().run(["docker", "pull", f"alpine:{v}"]) 135 | call(["save", "alpine", str(tmp_path)]) 136 | with (tmp_path / "manifest.json").open() as fp: 137 | manifest = json.load(fp) 138 | assert len(manifest) == 1 139 | assert manifest[0]["RepoTags"] == ["alpine:latest"] 140 | -------------------------------------------------------------------------------- /datalad_container/conftest.py: -------------------------------------------------------------------------------- 1 | from datalad.conftest import setup_package 2 | 3 | from .tests.fixtures import * # noqa: F401, F403 # lgtm [py/polluting-import] 4 | -------------------------------------------------------------------------------- /datalad_container/containers_add.py: -------------------------------------------------------------------------------- 1 | """Add a container environment to a dataset""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | import json 6 | import logging 7 | import os 8 | import os.path as op 9 | import re 10 | from pathlib import ( 11 | Path, 12 | PurePosixPath, 13 | ) 14 | from shutil import copyfile 15 | 16 | from datalad.cmd import WitlessRunner 17 | from datalad.distribution.dataset import ( 18 | EnsureDataset, 19 | datasetmethod, 20 | require_dataset, 21 | ) 22 | from datalad.interface.base import ( 23 | Interface, 24 | build_doc, 25 | eval_results, 26 | ) 27 | from datalad.interface.results import get_status_dict 28 | from datalad.support.constraints import ( 29 | EnsureNone, 30 | EnsureStr, 31 | ) 32 | from datalad.support.exceptions import InsufficientArgumentsError 33 | from datalad.support.param import Parameter 34 | 35 | from .utils import get_container_configuration 36 | 37 | lgr = logging.getLogger("datalad.containers.containers_add") 38 | 39 | # The DataLad special remote has built-in support for Singularity Hub URLs. Let 40 | # it handle shub:// URLs if it's available. 41 | _HAS_SHUB_DOWNLOADER = True 42 | try: 43 | import datalad.downloaders.shub 44 | except ImportError: 45 | lgr.debug("DataLad's shub downloader not found. " 46 | "Custom handling for shub:// will be used") 47 | _HAS_SHUB_DOWNLOADER = False 48 | 49 | 50 | def _resolve_img_url(url): 51 | """Takes a URL and tries to resolve it to an actual download 52 | URL that `annex addurl` can handle""" 53 | if not _HAS_SHUB_DOWNLOADER and url.startswith('shub://'): 54 | # TODO: Remove this handling once the minimum DataLad version is at 55 | # least 0.14. 56 | lgr.debug('Query singularity-hub for image download URL') 57 | import requests 58 | req = requests.get( 59 | 'https://www.singularity-hub.org/api/container/{}'.format( 60 | url[7:])) 61 | shub_info = json.loads(req.text) 62 | url = shub_info['image'] 63 | return url 64 | 65 | 66 | def _guess_call_fmt(ds, name, url): 67 | """Helper to guess a container exec setup based on 68 | - a name (to be able to look up more config 69 | - a plain url to make inference based on the source location 70 | 71 | Should return `None` is no guess can be made. 72 | """ 73 | if url is None: 74 | return None 75 | elif url.startswith('shub://') or url.startswith('docker://'): 76 | return 'singularity exec {img} {cmd}' 77 | elif url.startswith('dhub://'): 78 | # {python} is replaced with sys.executable on *execute* 79 | return '{python} -m datalad_container.adapters.docker run {img} {cmd}' 80 | 81 | 82 | def _ensure_datalad_remote(repo): 83 | """Initialize and enable datalad special remote if it isn't already.""" 84 | dl_remote = None 85 | for info in repo.get_special_remotes().values(): 86 | if info.get("externaltype") == "datalad": 87 | dl_remote = info["name"] 88 | break 89 | 90 | if not dl_remote: 91 | from datalad.consts import DATALAD_SPECIAL_REMOTE 92 | from datalad.customremotes.base import init_datalad_remote 93 | 94 | init_datalad_remote(repo, DATALAD_SPECIAL_REMOTE, autoenable=True) 95 | elif repo.is_special_annex_remote(dl_remote, check_if_known=False): 96 | lgr.debug("datalad special remote '%s' is already enabled", 97 | dl_remote) 98 | else: 99 | lgr.debug("datalad special remote '%s' found. Enabling", 100 | dl_remote) 101 | repo.enable_remote(dl_remote) 102 | 103 | 104 | @build_doc 105 | # all commands must be derived from Interface 106 | class ContainersAdd(Interface): 107 | # first docstring line is used a short description in the cmdline help 108 | # the rest is put in the verbose help and manpage 109 | """Add a container to a dataset 110 | """ 111 | 112 | # parameters of the command, must be exhaustive 113 | _params_ = dict( 114 | dataset=Parameter( 115 | args=("-d", "--dataset"), 116 | doc="""specify the dataset to add the container to. If no dataset is 117 | given, an attempt is made to identify the dataset based on the 118 | current working directory""", 119 | constraints=EnsureDataset() | EnsureNone() 120 | ), 121 | name=Parameter( 122 | args=("name",), 123 | doc="""The name to register the container under. This also 124 | determines the default location of the container image 125 | within the dataset.""", 126 | metavar="NAME", 127 | constraints=EnsureStr(), 128 | ), 129 | url=Parameter( 130 | args=("-u", "--url"), 131 | doc="""A URL (or local path) to get the container image from. If 132 | the URL scheme is one recognized by Singularity (e.g., 133 | 'shub://neurodebian/dcm2niix:latest' or 134 | 'docker://debian:stable-slim'), a command format string for 135 | Singularity-based execution will be auto-configured when 136 | [CMD: --call-fmt CMD][PY: call_fmt PY] is not specified. 137 | For Docker-based container execution with the URL scheme 'dhub://', 138 | the rest of the URL will be interpreted as the argument to 139 | 'docker pull', the image will be saved to a location 140 | specified by `name`, and the call format will be auto-configured 141 | to run docker, unless overwritten. The auto-configured call to docker 142 | run mounts the CWD to '/tmp' and sets the working directory to '/tmp'.""", 143 | metavar="URL", 144 | constraints=EnsureStr() | EnsureNone(), 145 | ), 146 | 147 | # TODO: The "prepared command stuff should ultimately go somewhere else 148 | # (probably datalad-run). But first figure out, how exactly to address 149 | # container datasets 150 | call_fmt=Parameter( 151 | args=("--call-fmt",), 152 | doc="""Command format string indicating how to execute a command in 153 | this container, e.g. "singularity exec {img} {cmd}". Where '{img}' 154 | is a placeholder for the path to the container image and '{cmd}' is 155 | replaced with the desired command. Additional placeholders: 156 | '{img_dspath}' is relative path to the dataset containing the image, 157 | '{img_dirpath}' is the directory containing the '{img}'. 158 | '{python}' expands to the path of the Python executable that is 159 | running the respective DataLad session, for example a 160 | 'datalad containers-run' command. 161 | """, 162 | metavar="FORMAT", 163 | constraints=EnsureStr() | EnsureNone(), 164 | ), 165 | extra_input=Parameter( 166 | args=("--extra-input",), 167 | doc="""Additional file the container invocation depends on (e.g. 168 | overlays used in --call-fmt). Can be specified multiple times. 169 | Similar to --call-fmt, the placeholders {img_dspath} and 170 | {img_dirpath} are available. Will be stored in the dataset config and 171 | later added alongside the container image to the `extra_inputs` 172 | field in the run-record and thus automatically be fetched when 173 | needed. 174 | """, 175 | action="append", 176 | default=[], 177 | metavar="FILE", 178 | # Can't use EnsureListOf(str) yet as it handles strings as iterables... 179 | # See this PR: https://github.com/datalad/datalad/pull/7267 180 | # constraints=EnsureListOf(str) | EnsureNone(), 181 | ), 182 | image=Parameter( 183 | args=("-i", "--image"), 184 | doc="""Relative path of the container image within the dataset. If not 185 | given, a default location will be determined using the 186 | `name` argument.""", 187 | metavar="IMAGE", 188 | constraints=EnsureStr() | EnsureNone(), 189 | 190 | ), 191 | update=Parameter( 192 | args=("--update",), 193 | action="store_true", 194 | doc="""Update the existing container for `name`. If no other 195 | options are specified, URL will be set to 'updateurl', if 196 | configured. If a container with `name` does not already exist, this 197 | option is ignored.""" 198 | ) 199 | ) 200 | 201 | @staticmethod 202 | @datasetmethod(name='containers_add') 203 | @eval_results 204 | def __call__(name, url=None, dataset=None, call_fmt=None, image=None, 205 | update=False, extra_input=None): 206 | if not name: 207 | raise InsufficientArgumentsError("`name` argument is required") 208 | 209 | ds = require_dataset(dataset, check_installed=True, 210 | purpose='add container') 211 | runner = WitlessRunner() 212 | 213 | # prevent madness in the config file 214 | if not re.match(r'^[0-9a-zA-Z-]+$', name): 215 | raise ValueError( 216 | "Container names can only contain alphanumeric characters " 217 | "and '-', got: '{}'".format(name)) 218 | 219 | container_cfg = get_container_configuration(ds, name) 220 | if 'image' in container_cfg: 221 | if not update: 222 | yield get_status_dict( 223 | action="containers_add", ds=ds, logger=lgr, 224 | status="impossible", 225 | message=("Container named %r already exists. " 226 | "Use --update to reconfigure.", 227 | name)) 228 | return 229 | 230 | if not (url or image or call_fmt): 231 | # No updated values were provided. See if an update url is 232 | # configured (currently relevant only for Singularity Hub). 233 | url = container_cfg.get("updateurl") 234 | if not url: 235 | yield get_status_dict( 236 | action="containers_add", ds=ds, logger=lgr, 237 | status="impossible", 238 | message="No values to update specified") 239 | return 240 | 241 | call_fmt = call_fmt or container_cfg.get("cmdexec") 242 | image = image or container_cfg.get("image") 243 | 244 | if not image: 245 | loc_cfg_var = "datalad.containers.location" 246 | container_loc = \ 247 | ds.config.obtain( 248 | loc_cfg_var, 249 | # if not False it would actually modify the 250 | # dataset config file -- undesirable 251 | store=False, 252 | ) 253 | image = op.join(ds.path, container_loc, name, 'image') 254 | else: 255 | image = op.join(ds.path, image) 256 | 257 | result = get_status_dict( 258 | action="containers_add", 259 | path=image, 260 | type="file", 261 | logger=lgr, 262 | ) 263 | 264 | if call_fmt is None: 265 | # maybe built in knowledge can help 266 | call_fmt = _guess_call_fmt(ds, name, url) 267 | 268 | # collect bits for a final and single save() call 269 | to_save = [] 270 | imgurl = url 271 | was_updated = False 272 | if url: 273 | if update and op.lexists(image): 274 | was_updated = True 275 | # XXX: check=False is used to avoid dropping the image. It 276 | # should use drop=False if remove() gets such an option (see 277 | # DataLad's gh-2673). 278 | for r in ds.remove(image, reckless='availability', 279 | return_type="generator"): 280 | yield r 281 | 282 | imgurl = _resolve_img_url(url) 283 | lgr.debug('Attempt to obtain container image from: %s', imgurl) 284 | if url.startswith("dhub://"): 285 | from .adapters import docker 286 | 287 | docker_image = url[len("dhub://"):] 288 | 289 | lgr.debug( 290 | "Running 'docker pull %s and saving image to %s", 291 | docker_image, image) 292 | runner.run(["docker", "pull", docker_image]) 293 | docker.save(docker_image, image) 294 | elif url.startswith("docker://"): 295 | image_dir, image_basename = op.split(image) 296 | if not image_basename: 297 | raise ValueError("No basename in path {}".format(image)) 298 | if image_dir and not op.exists(image_dir): 299 | os.makedirs(image_dir) 300 | 301 | lgr.info("Building Singularity image for %s " 302 | "(this may take some time)", 303 | url) 304 | runner.run(["singularity", "build", image_basename, url], 305 | cwd=image_dir or None) 306 | elif op.exists(url): 307 | lgr.info("Copying local file %s to %s", url, image) 308 | image_dir = op.dirname(image) 309 | if image_dir and not op.exists(image_dir): 310 | os.makedirs(image_dir) 311 | copyfile(url, image) 312 | else: 313 | if _HAS_SHUB_DOWNLOADER and url.startswith('shub://'): 314 | _ensure_datalad_remote(ds.repo) 315 | 316 | try: 317 | ds.repo.add_url_to_file(image, imgurl) 318 | except Exception as e: 319 | result["status"] = "error" 320 | result["message"] = str(e) 321 | yield result 322 | # TODO do we have to take care of making the image executable 323 | # if --call_fmt is not provided? 324 | to_save.append(image) 325 | # continue despite a remote access failure, the following config 326 | # setting will enable running the command again with just the name 327 | # given to ease a re-run 328 | if not op.lexists(image): 329 | result["status"] = "error" 330 | result["message"] = ('no image at %s', image) 331 | yield result 332 | return 333 | 334 | # store configs 335 | cfgbasevar = "datalad.containers.{}".format(name) 336 | if imgurl != url: 337 | # store originally given URL, as it resolves to something 338 | # different and maybe can be used to update the container 339 | # at a later point in time 340 | ds.config.set("{}.updateurl".format(cfgbasevar), url) 341 | # force store the image, and prevent multiple entries 342 | ds.config.set( 343 | "{}.image".format(cfgbasevar), 344 | # always store a POSIX path, relative to dataset root 345 | str(PurePosixPath(Path(image).relative_to(ds.pathobj))), 346 | force=True) 347 | if call_fmt: 348 | ds.config.set( 349 | "{}.cmdexec".format(cfgbasevar), 350 | call_fmt, 351 | force=True) 352 | # --extra-input sanity check 353 | # TODO: might also want to do that for --call-fmt above? 354 | extra_input_placeholders = dict(img_dirpath="", img_dspath="") 355 | for xi in (extra_input or []): 356 | try: 357 | xi.format(**extra_input_placeholders) 358 | except KeyError as exc: 359 | yield get_status_dict( 360 | action="containers_add", ds=ds, logger=lgr, 361 | status="error", 362 | message=("--extra-input %r contains unknown placeholder %s. " 363 | "Available placeholders: %s", 364 | repr(xi), exc, ', '.join(extra_input_placeholders))) 365 | return 366 | 367 | # actually setting --extra-input config 368 | cfgextravar = "{}.extra-input".format(cfgbasevar) 369 | if ds.config.get(cfgextravar) is not None: 370 | ds.config.unset(cfgextravar) 371 | for xi in (extra_input or []): 372 | ds.config.add(cfgextravar, xi) 373 | 374 | # store changes 375 | to_save.append(op.join(".datalad", "config")) 376 | for r in ds.save( 377 | path=to_save, 378 | message="[DATALAD] {do} containerized environment '{name}'".format( 379 | do="Update" if was_updated else "Configure", 380 | name=name)): 381 | yield r 382 | result["status"] = "ok" 383 | yield result 384 | -------------------------------------------------------------------------------- /datalad_container/containers_list.py: -------------------------------------------------------------------------------- 1 | """List known container environments of a dataset""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | import logging 6 | import os.path as op 7 | 8 | import datalad.support.ansi_colors as ac 9 | from datalad.coreapi import subdatasets 10 | from datalad.distribution.dataset import ( 11 | Dataset, 12 | EnsureDataset, 13 | datasetmethod, 14 | require_dataset, 15 | ) 16 | from datalad.interface.base import ( 17 | Interface, 18 | build_doc, 19 | eval_results, 20 | ) 21 | from datalad.interface.common_opts import recursion_flag 22 | from datalad.interface.results import get_status_dict 23 | from datalad.interface.utils import default_result_renderer 24 | from datalad.support.constraints import EnsureNone 25 | from datalad.support.param import Parameter 26 | from datalad.ui import ui 27 | 28 | from datalad_container.utils import get_container_configuration 29 | 30 | lgr = logging.getLogger("datalad.containers.containers_list") 31 | 32 | 33 | @build_doc 34 | # all commands must be derived from Interface 35 | class ContainersList(Interface): 36 | # first docstring line is used a short description in the cmdline help 37 | # the rest is put in the verbose help and manpage 38 | """List containers known to a dataset 39 | """ 40 | 41 | result_renderer = 'tailored' 42 | # parameters of the command, must be exhaustive 43 | _params_ = dict( 44 | dataset=Parameter( 45 | args=("-d", "--dataset"), 46 | doc="""specify the dataset to query. If no dataset is given, an 47 | attempt is made to identify the dataset based on the current 48 | working directory""", 49 | constraints=EnsureDataset() | EnsureNone()), 50 | contains=Parameter( 51 | args=('--contains',), 52 | metavar='PATH', 53 | action='append', 54 | doc="""when operating recursively, restrict the reported containers 55 | to those from subdatasets that contain the given path (i.e. the 56 | subdatasets that are reported by :command:`datalad subdatasets 57 | --contains=PATH`). Top-level containers are always reported."""), 58 | recursive=recursion_flag, 59 | ) 60 | 61 | @staticmethod 62 | @datasetmethod(name='containers_list') 63 | @eval_results 64 | def __call__(dataset=None, recursive=False, contains=None): 65 | ds = require_dataset(dataset, check_installed=True, 66 | purpose='list containers') 67 | refds = ds.path 68 | 69 | if recursive: 70 | for sub in ds.subdatasets( 71 | contains=contains, 72 | on_failure='ignore', 73 | return_type='generator', 74 | result_renderer='disabled'): 75 | subds = Dataset(sub['path']) 76 | if subds.is_installed(): 77 | for c in subds.containers_list(recursive=recursive, 78 | return_type='generator', 79 | on_failure='ignore', 80 | result_filter=None, 81 | result_renderer=None, 82 | result_xfm=None): 83 | c['name'] = sub['gitmodule_name'] + '/' + c['name'] 84 | c['refds'] = refds 85 | yield c 86 | 87 | # all info is in the dataset config! 88 | containers = get_container_configuration(ds) 89 | 90 | for k, v in containers.items(): 91 | if 'image' not in v: 92 | # there is no container location configured 93 | continue 94 | res = get_status_dict( 95 | status='ok', 96 | action='containers', 97 | name=k, 98 | type='file', 99 | path=op.join(ds.path, v.pop('image')), 100 | refds=refds, 101 | parentds=ds.path, 102 | # TODO 103 | #state='absent' if ... else 'present' 104 | **v) 105 | yield res 106 | 107 | @staticmethod 108 | def custom_result_renderer(res, **kwargs): 109 | if res["action"] != "containers": 110 | default_result_renderer(res) 111 | else: 112 | ui.message( 113 | "{name} -> {path}" 114 | .format(name=ac.color_word(res["name"], ac.MAGENTA), 115 | path=op.relpath(res["path"], res["refds"]))) 116 | -------------------------------------------------------------------------------- /datalad_container/containers_remove.py: -------------------------------------------------------------------------------- 1 | """Remove a container environment from a dataset""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | import logging 6 | import os.path as op 7 | 8 | from datalad.distribution.dataset import ( 9 | EnsureDataset, 10 | datasetmethod, 11 | require_dataset, 12 | ) 13 | from datalad.interface.base import ( 14 | Interface, 15 | build_doc, 16 | eval_results, 17 | ) 18 | from datalad.interface.results import get_status_dict 19 | from datalad.support.constraints import ( 20 | EnsureNone, 21 | EnsureStr, 22 | ) 23 | from datalad.support.param import Parameter 24 | from datalad.utils import rmtree 25 | 26 | from datalad_container.utils import get_container_configuration 27 | 28 | lgr = logging.getLogger("datalad.containers.containers_remove") 29 | 30 | 31 | @build_doc 32 | # all commands must be derived from Interface 33 | class ContainersRemove(Interface): 34 | # first docstring line is used a short description in the cmdline help 35 | # the rest is put in the verbose help and manpage 36 | """Remove a known container from a dataset 37 | 38 | This command is only removing a container from the committed 39 | Dataset configuration (configuration scope ``branch``). It will not 40 | modify any other configuration scopes. 41 | 42 | This command is *not* dropping the container image associated with the 43 | removed record, because it may still be needed for other dataset versions. 44 | In order to drop the container image, use the 'drop' command prior 45 | to removing the container configuration. 46 | """ 47 | 48 | # parameters of the command, must be exhaustive 49 | _params_ = dict( 50 | dataset=Parameter( 51 | args=("-d", "--dataset"), 52 | doc="""specify the dataset from removing a container. If no dataset 53 | is given, an attempt is made to identify the dataset based on the 54 | current working directory""", 55 | constraints=EnsureDataset() | EnsureNone()), 56 | name=Parameter( 57 | args=("name",), 58 | doc="""name of the container to remove""", 59 | metavar="NAME", 60 | constraints=EnsureStr(), 61 | ), 62 | remove_image=Parameter( 63 | args=("-i", "--remove-image",), 64 | doc="""if set, remove container image as well. Even with this flag, 65 | the container image content will not be dropped. Use the 'drop' 66 | command explicitly before removing the container configuration.""", 67 | action="store_true", 68 | ), 69 | ) 70 | 71 | @staticmethod 72 | @datasetmethod(name='containers_remove') 73 | @eval_results 74 | def __call__(name, dataset=None, remove_image=False): 75 | ds = require_dataset(dataset, check_installed=True, 76 | purpose='remove a container') 77 | 78 | res = get_status_dict( 79 | ds=ds, 80 | action='containers_remove', 81 | logger=lgr) 82 | 83 | container_cfg = get_container_configuration(ds, name) 84 | 85 | to_save = [] 86 | if remove_image and 'image' in container_cfg: 87 | imagepath = ds.pathobj / container_cfg['image'] 88 | # we use rmtree() and not .unlink(), because 89 | # the image could be more than a single file underneath 90 | # this location (e.g., docker image dumps) 91 | rmtree(imagepath) 92 | # at the very end, save() will take care of committing 93 | # any removal that just occurred 94 | to_save.append(imagepath) 95 | 96 | if container_cfg: 97 | ds.config.remove_section( 98 | f'datalad.containers.{name}', 99 | scope='branch', 100 | reload=True) 101 | res['status'] = 'ok' 102 | to_save.append(op.join('.datalad', 'config')) 103 | else: 104 | res['status'] = 'notneeded' 105 | if to_save: 106 | for r in ds.save( 107 | path=to_save, 108 | message='[DATALAD] Remove container {}'.format(name)): 109 | yield r 110 | yield res 111 | -------------------------------------------------------------------------------- /datalad_container/containers_run.py: -------------------------------------------------------------------------------- 1 | """Drop-in replacement for `datalad run` for command execution in a container""" 2 | 3 | __docformat__ = 'restructuredtext' 4 | 5 | import logging 6 | import os.path as op 7 | import sys 8 | 9 | from datalad.core.local.run import ( 10 | Run, 11 | get_command_pwds, 12 | normalize_command, 13 | run_command, 14 | ) 15 | from datalad.distribution.dataset import ( 16 | datasetmethod, 17 | require_dataset, 18 | ) 19 | from datalad.interface.base import ( 20 | Interface, 21 | build_doc, 22 | eval_results, 23 | ) 24 | from datalad.interface.results import get_status_dict 25 | from datalad.support.param import Parameter 26 | from datalad.utils import ensure_iter 27 | 28 | from datalad_container.find_container import find_container_ 29 | 30 | lgr = logging.getLogger("datalad.containers.containers_run") 31 | 32 | # Environment variable to be set during execution to possibly 33 | # inform underlying shim scripts about the original name of 34 | # the container 35 | CONTAINER_NAME_ENVVAR = 'DATALAD_CONTAINER_NAME' 36 | 37 | _run_params = dict( 38 | Run._params_, 39 | container_name=Parameter( 40 | args=('-n', '--container-name',), 41 | metavar="NAME", 42 | doc="""Specify the name of or a path to a known container to use 43 | for execution, in case multiple containers are configured."""), 44 | ) 45 | 46 | 47 | @build_doc 48 | # all commands must be derived from Interface 49 | class ContainersRun(Interface): 50 | # first docstring line is used a short description in the cmdline help 51 | # the rest is put in the verbose help and manpage 52 | """Drop-in replacement of 'run' to perform containerized command execution 53 | 54 | Container(s) need to be configured beforehand (see containers-add). If no 55 | container is specified and only one container is configured in the current 56 | dataset, it will be selected automatically. If more than one container is 57 | registered in the current dataset or to access containers from subdatasets, 58 | the container has to be specified. 59 | 60 | A command is generated based on the input arguments such that the 61 | container image itself will be recorded as an input dependency of 62 | the command execution in the `run` record in the git history. 63 | 64 | During execution the environment variable {name_envvar} is set to the 65 | name of the used container. 66 | """ 67 | 68 | _docs_ = dict( 69 | name_envvar=CONTAINER_NAME_ENVVAR 70 | ) 71 | 72 | _params_ = _run_params 73 | 74 | # Analogous to 'run' command - stop on first error 75 | on_failure = 'stop' 76 | 77 | @staticmethod 78 | @datasetmethod(name='containers_run') 79 | @eval_results 80 | def __call__(cmd, container_name=None, dataset=None, 81 | inputs=None, outputs=None, message=None, expand=None, 82 | explicit=False, sidecar=None): 83 | from unittest.mock import \ 84 | patch # delayed, since takes long (~600ms for yoh) 85 | pwd, _ = get_command_pwds(dataset) 86 | ds = require_dataset(dataset, check_installed=True, 87 | purpose='run a containerized command execution') 88 | 89 | # this following block locates the target container. this involves a 90 | # configuration look-up. This is not using 91 | # get_container_configuration(), because it needs to account for a 92 | # wide range of scenarios, including the installation of the dataset(s) 93 | # that will eventually provide (the configuration) for the container. 94 | # However, internally this is calling `containers_list()`, which is 95 | # using get_container_configuration(), so any normalization of 96 | # configuration on-read, get still be implemented in this helper. 97 | container = None 98 | for res in find_container_(ds, container_name): 99 | if res.get("action") == "containers": 100 | container = res 101 | else: 102 | yield res 103 | assert container, "bug: container should always be defined here" 104 | 105 | image_path = op.relpath(container["path"], pwd) 106 | # container record would contain path to the (sub)dataset containing 107 | # it. If not - take current dataset, as it must be coming from it 108 | image_dspath = op.relpath(container.get('parentds', ds.path), pwd) 109 | 110 | # sure we could check whether the container image is present, 111 | # but it might live in a subdataset that isn't even installed yet 112 | # let's leave all this business to `get` that is called by `run` 113 | 114 | cmd = normalize_command(cmd) 115 | # expand the command with container execution 116 | if 'cmdexec' in container: 117 | callspec = container['cmdexec'] 118 | 119 | # Temporary kludge to give a more helpful message 120 | if callspec.startswith("["): 121 | import json 122 | try: 123 | json.loads(callspec) 124 | except json.JSONDecodeError: 125 | pass # Never mind, false positive. 126 | else: 127 | raise ValueError( 128 | 'cmdexe {!r} is in an old, unsupported format. ' 129 | 'Convert it to a plain string.'.format(callspec)) 130 | try: 131 | cmd_kwargs = dict( 132 | # point to the python installation that runs *this* code 133 | # we know that it would have things like the docker 134 | # adaptor installed with this extension package 135 | python=sys.executable, 136 | img=image_path, 137 | cmd=cmd, 138 | img_dspath=image_dspath, 139 | img_dirpath=op.dirname(image_path) or ".", 140 | ) 141 | cmd = callspec.format(**cmd_kwargs) 142 | except KeyError as exc: 143 | yield get_status_dict( 144 | 'run', 145 | ds=ds, 146 | status='error', 147 | message=( 148 | 'Unrecognized cmdexec placeholder: %s. ' 149 | 'See containers-add for information on known ones: %s', 150 | exc, 151 | ", ".join(cmd_kwargs))) 152 | return 153 | else: 154 | # just prepend and pray 155 | cmd = container['path'] + ' ' + cmd 156 | 157 | extra_inputs = [] 158 | for extra_input in ensure_iter(container.get("extra-input",[]), set): 159 | try: 160 | xi_kwargs = dict( 161 | img_dspath=image_dspath, 162 | img_dirpath=op.dirname(image_path) or ".", 163 | ) 164 | extra_inputs.append(extra_input.format(**xi_kwargs)) 165 | except KeyError as exc: 166 | yield get_status_dict( 167 | 'run', 168 | ds=ds, 169 | status='error', 170 | message=( 171 | 'Unrecognized extra_input placeholder: %s. ' 172 | 'See containers-add for information on known ones: %s', 173 | exc, 174 | ", ".join(xi_kwargs))) 175 | return 176 | 177 | lgr.debug("extra_inputs = %r", extra_inputs) 178 | 179 | with patch.dict('os.environ', 180 | {CONTAINER_NAME_ENVVAR: container['name']}): 181 | # fire! 182 | for r in run_command( 183 | cmd=cmd, 184 | dataset=dataset or (ds if ds.path == pwd else None), 185 | inputs=inputs, 186 | extra_inputs=[image_path] + extra_inputs, 187 | outputs=outputs, 188 | message=message, 189 | expand=expand, 190 | explicit=explicit, 191 | sidecar=sidecar): 192 | yield r 193 | -------------------------------------------------------------------------------- /datalad_container/extractors/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /datalad_container/extractors/_load_singularity_versions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Importing this file extends datalad.support.external_version: 3 | 4 | Adds: 5 | - external_versions["cmd:apptainer"] 6 | - external_versions["cmd:singularity"] 7 | """ 8 | 9 | from datalad.cmd import ( 10 | StdOutErrCapture, 11 | WitlessRunner, 12 | ) 13 | from datalad.support.external_versions import external_versions 14 | 15 | 16 | def __get_apptainer_version(): 17 | version = WitlessRunner().run("apptainer --version", protocol=StdOutErrCapture)['stdout'].strip() 18 | return version.split("apptainer version ")[1] 19 | 20 | 21 | def __get_singularity_version(): 22 | return WitlessRunner().run("singularity version", protocol=StdOutErrCapture)['stdout'].strip() 23 | 24 | 25 | # Load external_versions and patch with "cmd:singularity" and "cmd:apptainer" 26 | external_versions.add("cmd:apptainer", func=__get_apptainer_version) 27 | external_versions.add("cmd:singularity", func=__get_singularity_version) 28 | -------------------------------------------------------------------------------- /datalad_container/extractors/metalad_container.py: -------------------------------------------------------------------------------- 1 | # emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*- 2 | # ex: set sts=4 ts=4 sw=4: 3 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 4 | # 5 | # See COPYING file distributed along with the datalad package for the 6 | # copyright and license terms. 7 | # 8 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 9 | """Metadata extractors for Container Images stored in Datalad's own core storage""" 10 | import json 11 | import logging 12 | import subprocess 13 | import time 14 | from uuid import UUID 15 | 16 | from datalad.support.external_versions import ( 17 | UnknownVersion, 18 | external_versions, 19 | ) 20 | from datalad_metalad import get_file_id 21 | from datalad_metalad.extractors.base import ( 22 | DataOutputCategory, 23 | ExtractorResult, 24 | FileMetadataExtractor, 25 | ) 26 | 27 | from datalad_container.utils import get_container_command 28 | 29 | CURRENT_VERSION = "0.0.1" 30 | 31 | lgr = logging.getLogger('datalad.metadata.extractors.metalad_container') 32 | 33 | 34 | class MetaladContainerInspect(FileMetadataExtractor): 35 | """ 36 | Populates metadata singularity/apptainer version and `inspect` output. 37 | """ 38 | 39 | def get_data_output_category(self) -> DataOutputCategory: 40 | return DataOutputCategory.IMMEDIATE 41 | 42 | def is_content_required(self) -> bool: 43 | return True 44 | 45 | def get_id(self) -> UUID: 46 | # Nothing special, made this up - asmacdo 47 | return UUID('3a28cca6-b7a1-11ed-b106-fc3497650c92') 48 | 49 | @staticmethod 50 | def get_version() -> str: 51 | return CURRENT_VERSION 52 | 53 | def extract(self, _=None) -> ExtractorResult: 54 | container_command = get_container_command() 55 | return ExtractorResult( 56 | extractor_version=self.get_version(), 57 | extraction_parameter=self.parameter or {}, 58 | extraction_success=True, 59 | datalad_result_dict={ 60 | "type": "container", 61 | "status": "ok" 62 | }, 63 | immediate_data={ 64 | "@id": get_file_id(dict( 65 | path=self.file_info.path, 66 | type=self.file_info.type)), 67 | "type": self.file_info.type, 68 | "path": self.file_info.intra_dataset_path, 69 | "content_byte_size": self.file_info.byte_size, 70 | "comment": f"SingularityInspect extractor executed at {time.time()}", 71 | "container_system": container_command, 72 | "container_system_version": str(external_versions[container_command]), 73 | "container_inspect": self._container_inspect(container_command, self.file_info.path), 74 | }) 75 | 76 | @staticmethod 77 | def _container_inspect(command, path) -> str: 78 | data = subprocess.run( 79 | [command, "inspect", "--json", path], 80 | check=True, 81 | stdout=subprocess.PIPE).stdout.decode() 82 | return json.loads(data) 83 | -------------------------------------------------------------------------------- /datalad_container/extractors/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalad/datalad-container/e9bba2a6566abf0e0a788dc06c56a468f8400d1f/datalad_container/extractors/tests/__init__.py -------------------------------------------------------------------------------- /datalad_container/extractors/tests/test_metalad_container.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | import pytest 4 | from datalad.support.external_versions import external_versions 5 | 6 | # Early detection before we try to import meta_extract 7 | from datalad.tests.utils_pytest import SkipTest 8 | 9 | if not external_versions["datalad_metalad"]: 10 | raise SkipTest("skipping metalad tests") 11 | 12 | from datalad.api import meta_extract 13 | from datalad.tests.utils_pytest import ( 14 | SkipTest, 15 | with_tempfile, 16 | ) 17 | 18 | from datalad_container.utils import get_container_command 19 | 20 | try: 21 | container_command = get_container_command() 22 | except RuntimeError: 23 | raise SkipTest("skipping singularity/apptainer tests") 24 | 25 | # Must come after skiptest or imports will not work 26 | from datalad_container.extractors.metalad_container import ( 27 | MetaladContainerInspect, 28 | ) 29 | 30 | 31 | @with_tempfile 32 | def test__container_inspect_nofile(path=None): 33 | """Singularity causes CalledProcessError if path DNE.""" 34 | with pytest.raises(subprocess.CalledProcessError): 35 | MetaladContainerInspect._container_inspect(container_command, path) 36 | 37 | 38 | def test__container_inspect_valid(singularity_test_image): 39 | """Call inspect on a valid singularity container image.""" 40 | result = MetaladContainerInspect._container_inspect( 41 | container_command, 42 | singularity_test_image["img_path"], 43 | ) 44 | expected_result = { 45 | 'data': { 46 | 'attributes': { 47 | 'labels': { 48 | 'org.label-schema.build-date': 'Sat,_19_May_2018_07:06:48_+0000', 49 | 'org.label-schema.build-size': '62MB', 50 | 'org.label-schema.schema-version': '1.0', 51 | 'org.label-schema.usage.singularity.deffile': 'Singularity.testhelper', 52 | 'org.label-schema.usage.singularity.deffile.bootstrap': 'docker', 53 | 'org.label-schema.usage.singularity.deffile.from': 'debian:stable-slim', 'org.label-schema.usage.singularity.version': 54 | '2.5.0-feature-squashbuild-secbuild-2.5.0.gddf62fb5' 55 | } 56 | } 57 | }, 58 | 'type': 'container' 59 | } 60 | assert result == expected_result 61 | 62 | 63 | def test_extract(singularity_test_image): 64 | ds = singularity_test_image["ds"] 65 | path = singularity_test_image["img_path"] 66 | result = meta_extract(dataset=ds, extractorname="container_inspect", path=path) 67 | assert len(result) == 1 68 | 69 | assert result[0]["metadata_record"]["extracted_metadata"] 70 | assert result[0]["metadata_record"]["extractor_name"] == 'container_inspect' 71 | assert result[0]["metadata_record"]["extractor_version"] == MetaladContainerInspect.get_version() 72 | -------------------------------------------------------------------------------- /datalad_container/find_container.py: -------------------------------------------------------------------------------- 1 | """Support module for selecting a container from a dataset and its subdatasets. 2 | """ 3 | 4 | import logging 5 | 6 | from datalad.distribution.dataset import Dataset 7 | from datalad.utils import Path 8 | 9 | from datalad_container.containers_list import ContainersList 10 | 11 | lgr = logging.getLogger("datalad_container.find_container") 12 | 13 | 14 | def _list_containers(dataset, recursive, contains=None): 15 | return {c['name']: c 16 | for c in ContainersList.__call__(dataset=dataset, 17 | recursive=recursive, 18 | contains=contains, 19 | return_type='generator', 20 | on_failure='ignore', 21 | result_filter=None, 22 | result_renderer=None, 23 | result_xfm=None)} 24 | 25 | 26 | def _get_subdataset_container(ds, container_name): 27 | """Try to get subdataset container matching `container_name`. 28 | 29 | This is the primary function tried by find_container_() when the container 30 | name looks like it is from a subdataset (i.e. has a slash). 31 | 32 | Parameters 33 | ---------- 34 | ds : Dataset 35 | container_name : str 36 | 37 | Yields 38 | ------- 39 | Result records for any installed subdatasets and a containers-list record 40 | for the container, if any, found for `container_name`. 41 | """ 42 | name_parts = container_name.split('/') 43 | subds_names = name_parts[:-1] 44 | if Dataset(ds.pathobj / Path(*subds_names)).is_installed(): 45 | # This avoids unnecessary work in the common case, but it can result in 46 | # not installing the necessary subdatasets in the rare case that chain 47 | # of submodule names point to a subdataset path that is installed while 48 | # the actual submodule paths contains uninstalled parts. 49 | lgr.debug( 50 | "Subdataset for %s is probably installed. Skipping install logic", 51 | container_name) 52 | return 53 | 54 | curds = ds 55 | for name in subds_names: 56 | for sub in curds.subdatasets(return_type='generator'): 57 | if sub['gitmodule_name'] == name: 58 | path = sub['path'] 59 | yield from curds.get( 60 | path, get_data=False, 61 | on_failure='ignore', return_type='generator') 62 | curds = Dataset(path) 63 | break 64 | else: 65 | # There wasn't a submodule name chain that matched container_name. 66 | # Aside from an invalid name, the main case where this can happen 67 | # is when an image path is given for the container name. 68 | lgr.debug("Did not find submodule name %s in %s", 69 | name, curds) 70 | return 71 | containers = _list_containers(dataset=ds, recursive=True, 72 | contains=curds.path) 73 | res = containers.get(container_name) 74 | if res: 75 | yield res 76 | 77 | 78 | # Fallback functions tried by find_container_. These are called with the 79 | # current dataset, the container name, and a dictionary mapping the container 80 | # name to a record (as returned by containers-list). 81 | 82 | 83 | def _get_the_one_and_only(_, name, containers): 84 | if name is None: 85 | if len(containers) == 1: 86 | # no questions asked, take container and run 87 | return list(containers.values())[0] 88 | else: 89 | raise ValueError("Must explicitly specify container" 90 | " (known containers are: {})" 91 | .format(', '.join(containers))) 92 | 93 | 94 | def _get_container_by_name(_, name, containers): 95 | return containers.get(name) 96 | 97 | 98 | def _get_container_by_path(ds, name, containers): 99 | from datalad.distribution.dataset import resolve_path 100 | 101 | # Note: since datalad0.12.0rc6 resolve_path returns a Path object here, 102 | # which then fails to equal c['path'] below as this is taken from 103 | # config as a string 104 | container_path = str(resolve_path(name, ds)) 105 | container = [c for c in containers.values() 106 | if c['path'] == container_path] 107 | if len(container) == 1: 108 | return container[0] 109 | 110 | 111 | # Entry points 112 | 113 | 114 | def find_container_(ds, container_name=None): 115 | """Find the container in dataset `ds` specified by `container_name`. 116 | 117 | Parameters 118 | ---------- 119 | ds : Dataset 120 | Dataset to query. 121 | container_name : str or None 122 | Name in the form of how `containers-list -d ds -r` would report it 123 | (e.g., "s0/s1/cname"). 124 | 125 | Yields 126 | ------ 127 | The container record, as returned by containers-list. Before that record, 128 | it may yield records of other action types, in particular "install" records 129 | for subdatasets that were installed to try to get access to a subdataset 130 | container. 131 | 132 | Raises 133 | ------ 134 | ValueError if a uniquely matching container cannot be found. 135 | """ 136 | recurse = container_name and "/" in container_name 137 | if recurse: 138 | for res in _get_subdataset_container(ds, container_name): 139 | # Before the container record, the results may include install 140 | # records. Don't relay "notneeded" results to avoid noise. Also, 141 | # don't propagate install failures, which may be due to an image 142 | # path being given or a non-existent container, both cases that are 143 | # handled downstream. 144 | if res.get("status") == "ok": 145 | yield res 146 | if res.get("action") == "containers": 147 | return 148 | 149 | containers = _list_containers(dataset=ds, recursive=recurse) 150 | if not containers: 151 | raise ValueError("No known containers. Use containers-add") 152 | 153 | fns = [ 154 | _get_the_one_and_only, 155 | _get_container_by_name, 156 | _get_container_by_path, 157 | ] 158 | 159 | for fn in fns: 160 | lgr.debug("Trying to find container with %s", fn) 161 | container = fn(ds, container_name, containers) 162 | if container: 163 | yield container 164 | return 165 | 166 | raise ValueError( 167 | 'Container selection impossible: not specified, ambiguous ' 168 | 'or unknown (known containers are: {})' 169 | .format(', '.join(containers)) 170 | ) 171 | 172 | 173 | def find_container(ds, container_name=None): 174 | """Like `find_container_`, but just return the container record. 175 | """ 176 | # Note: This function was once used directly by containers_run(), but that 177 | # now uses the find_container_() generator function directly. Now 178 | # find_container() exists for compatibility with third-party tools 179 | # (reproman) and the test_find.py tests. 180 | for res in find_container_(ds, container_name): 181 | if res.get("action") == "containers": 182 | return res 183 | raise RuntimeError( 184 | "bug: find_container_() should return container or raise exception") 185 | -------------------------------------------------------------------------------- /datalad_container/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalad/datalad-container/e9bba2a6566abf0e0a788dc06c56a468f8400d1f/datalad_container/tests/__init__.py -------------------------------------------------------------------------------- /datalad_container/tests/fixtures/__init__.py: -------------------------------------------------------------------------------- 1 | from .singularity_image import singularity_test_image 2 | -------------------------------------------------------------------------------- /datalad_container/tests/fixtures/singularity_image.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from datalad.api import Dataset 5 | from datalad.tests.utils_pytest import with_tempfile 6 | 7 | from datalad_container.tests.utils import add_pyscript_image 8 | from datalad_container.utils import get_container_command 9 | 10 | TEST_IMG_URL = 'shub://datalad/datalad-container:testhelper' 11 | 12 | @pytest.fixture(scope="session") 13 | def singularity_test_image(tmp_path_factory: pytest.TempPathFactory) -> str: 14 | fixture_file_name = "fixture.sing" 15 | ds = Dataset(tmp_path_factory.mktemp("singularity_image")) 16 | ds.create(force=True) 17 | ds.containers_add( 18 | 'mycontainer', 19 | url=TEST_IMG_URL, 20 | image=fixture_file_name, 21 | ) 22 | img_path = ds.pathobj / fixture_file_name 23 | ds.get(img_path) 24 | return {"ds": ds, "img_path": img_path} 25 | 26 | @pytest.fixture(scope="session") 27 | def container_command(): 28 | """Not a very useful function other than to add session scope.""" 29 | return get_container_command() 30 | -------------------------------------------------------------------------------- /datalad_container/tests/test_add.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from datalad.api import ( 3 | Dataset, 4 | clone, 5 | ) 6 | from datalad.consts import DATALAD_SPECIAL_REMOTE 7 | from datalad.customremotes.base import init_datalad_remote 8 | from datalad.tests.utils_pytest import ( 9 | assert_false, 10 | assert_in, 11 | assert_not_in, 12 | with_tempfile, 13 | ) 14 | from datalad.utils import Path 15 | 16 | from datalad_container.containers_add import _ensure_datalad_remote 17 | 18 | # NOTE: At the moment, testing of the containers-add itself happens implicitly 19 | # via use in other tests. 20 | 21 | 22 | @with_tempfile 23 | def test_ensure_datalad_remote_init_and_enable_needed(path=None): 24 | ds = Dataset(path).create(force=True) 25 | repo = ds.repo 26 | assert_false(repo.get_remotes()) 27 | _ensure_datalad_remote(repo) 28 | assert_in("datalad", repo.get_remotes()) 29 | 30 | 31 | @pytest.mark.parametrize("autoenable", [False, True]) 32 | @with_tempfile 33 | def test_ensure_datalad_remote_maybe_enable(path=None, *, autoenable): 34 | path = Path(path) 35 | ds_a = Dataset(path / "a").create(force=True) 36 | init_datalad_remote(ds_a.repo, DATALAD_SPECIAL_REMOTE, 37 | autoenable=autoenable) 38 | 39 | ds_b = clone(source=ds_a.path, path=path / "b") 40 | repo = ds_b.repo 41 | if not autoenable: 42 | assert_not_in("datalad", repo.get_remotes()) 43 | _ensure_datalad_remote(repo) 44 | assert_in("datalad", repo.get_remotes()) -------------------------------------------------------------------------------- /datalad_container/tests/test_containers.py: -------------------------------------------------------------------------------- 1 | import os.path as op 2 | 3 | from datalad.api import ( 4 | Dataset, 5 | containers_add, 6 | containers_list, 7 | containers_remove, 8 | install, 9 | ) 10 | from datalad.support.network import get_local_file_url 11 | from datalad.tests.utils_pytest import ( 12 | SkipTest, 13 | assert_equal, 14 | assert_in, 15 | assert_in_results, 16 | assert_not_in, 17 | assert_raises, 18 | assert_re_in, 19 | assert_result_count, 20 | assert_status, 21 | ok_, 22 | ok_clean_git, 23 | ok_file_has_content, 24 | serve_path_via_http, 25 | with_tempfile, 26 | with_tree, 27 | ) 28 | from datalad.utils import swallow_outputs 29 | 30 | from datalad_container.tests.utils import add_pyscript_image 31 | 32 | common_kwargs = {'result_renderer': 'disabled'} 33 | 34 | 35 | @with_tempfile 36 | def test_add_noop(path=None): 37 | ds = Dataset(path).create(**common_kwargs) 38 | ok_clean_git(ds.path) 39 | assert_raises(TypeError, ds.containers_add) 40 | # fails when there is no image 41 | assert_status( 42 | 'error', 43 | ds.containers_add('name', on_failure='ignore', **common_kwargs)) 44 | # no config change 45 | ok_clean_git(ds.path) 46 | # place a dummy "image" file 47 | with open(op.join(ds.path, 'dummy'), 'w') as f: 48 | f.write('some') 49 | ds.save('dummy', **common_kwargs) 50 | ok_clean_git(ds.path) 51 | # config will be added, as long as there is a file, even when URL access 52 | # fails 53 | res = ds.containers_add( 54 | 'broken', 55 | url='bogus-protocol://bogus-server', image='dummy', 56 | on_failure='ignore', 57 | **common_kwargs 58 | ) 59 | assert_status('ok', res) 60 | assert_result_count(res, 1, action='save', status='ok') 61 | 62 | 63 | @with_tempfile 64 | @with_tree(tree={"foo.img": "doesn't matter 0", 65 | "bar.img": "doesn't matter 1"}) 66 | def test_add_local_path(path=None, local_file=None): 67 | ds = Dataset(path).create(**common_kwargs) 68 | res = ds.containers_add(name="foobert", 69 | url=op.join(local_file, "foo.img")) 70 | foo_target = op.join(path, ".datalad", "environments", "foobert", "image") 71 | assert_result_count(res, 1, status="ok", type="file", path=foo_target, 72 | action="containers_add") 73 | # the image path configuration is always in POSIX format 74 | assert ds.config.get('datalad.containers.foobert.image') \ 75 | == '.datalad/environments/foobert/image' 76 | 77 | # We've just copied and added the file. 78 | assert_not_in(ds.repo.WEB_UUID, ds.repo.whereis(foo_target)) 79 | 80 | # We can force the URL to be added. (Note: This works because datalad 81 | # overrides 'annex.security.allowed-url-schemes' in its tests.) 82 | ds.containers_add(name="barry", 83 | url=get_local_file_url(op.join(local_file, "bar.img"))) 84 | bar_target = op.join(path, ".datalad", "environments", "barry", "image") 85 | assert_in(ds.repo.WEB_UUID, ds.repo.whereis(bar_target)) 86 | 87 | 88 | RAW_KWDS = dict(return_type='list', 89 | result_filter=None, 90 | result_renderer=None, 91 | result_xfm=None) 92 | 93 | 94 | @with_tempfile 95 | @with_tree(tree={'some_container.img': "doesn't matter"}) 96 | @serve_path_via_http 97 | def test_container_files(ds_path=None, local_file=None, url=None): 98 | # setup things to add 99 | # 100 | # Note: Since "adding" as a container doesn't actually call anything or use 101 | # the container in some way, but simply registers it, for testing any file 102 | # is sufficient. 103 | local_file = get_local_file_url(op.join(local_file, 'some_container.img')) 104 | 105 | # prepare dataset: 106 | ds = Dataset(ds_path).create(**common_kwargs) 107 | # non-default location: 108 | ds.config.add("datalad.containers.location", 109 | value=op.join(".datalad", "test-environments"), 110 | scope='branch') 111 | ds.save(message="Configure container mountpoint", **common_kwargs) 112 | 113 | # no containers yet: 114 | res = ds.containers_list(**RAW_KWDS) 115 | assert_result_count(res, 0) 116 | 117 | # add first "image": must end up at the configured default location 118 | target_path = op.join( 119 | ds.path, ".datalad", "test-environments", "first", "image") 120 | res = ds.containers_add(name="first", url=local_file, **common_kwargs) 121 | ok_clean_git(ds.repo) 122 | 123 | assert_result_count(res, 1, status="ok", type="file", path=target_path, 124 | action="containers_add") 125 | ok_(op.lexists(target_path)) 126 | 127 | res = ds.containers_list(**RAW_KWDS) 128 | assert_result_count(res, 1) 129 | assert_result_count( 130 | res, 1, 131 | name='first', type='file', action='containers', status='ok', 132 | path=target_path) 133 | 134 | # and kill it again 135 | # but needs name 136 | assert_raises(TypeError, ds.containers_remove) 137 | res = ds.containers_remove('first', remove_image=True, **common_kwargs) 138 | assert_status('ok', res) 139 | assert_result_count(ds.containers_list(**RAW_KWDS), 0) 140 | # image removed 141 | assert(not op.lexists(target_path)) 142 | 143 | 144 | @with_tree(tree={ 145 | "container.img": "container", 146 | "overlay1.img": "overlay 1", 147 | "overlay2.img": "overlay 2", 148 | }) 149 | def test_extra_inputs(ds_path=None): 150 | container_file = 'container.img' 151 | overlay1_file = 'overlay1.img' 152 | overlay2_file = 'overlay2.img' 153 | 154 | # prepare dataset: 155 | ds = Dataset(ds_path).create(force=True, **common_kwargs) 156 | ds.save(**common_kwargs) 157 | 158 | ds.containers_add( 159 | name="container", 160 | image=container_file, 161 | call_fmt="apptainer exec {img} {cmd}", 162 | **common_kwargs 163 | ) 164 | ds.containers_add( 165 | name="container-with-overlay", 166 | image=container_file, 167 | call_fmt="apptainer exec --overlay {img_dirpath}/overlay1.img {img} {cmd}", 168 | extra_input=[overlay1_file], 169 | **common_kwargs 170 | ) 171 | ds.containers_add( 172 | name="container-with-two-overlays", 173 | image=container_file, 174 | call_fmt="apptainer exec --overlay {img_dirpath}/overlay1.img --overlay {img_dirpath}/overlay2.img:ro {img} {cmd}", 175 | extra_input=[overlay1_file, overlay2_file], 176 | **common_kwargs 177 | ) 178 | 179 | res = ds.containers_list(**RAW_KWDS) 180 | assert_result_count(res, 3) 181 | 182 | assert_equal(ds.config.get("datalad.containers.container.extra-input"), None) 183 | assert_equal(ds.config.get("datalad.containers.container-with-overlay.extra-input",get_all=True), "overlay1.img") 184 | assert_equal(ds.config.get("datalad.containers.container-with-two-overlays.extra-input",get_all=True), ("overlay1.img", "overlay2.img")) 185 | 186 | 187 | @with_tempfile 188 | @with_tree(tree={'foo.img': "foo", 189 | 'bar.img': "bar"}) 190 | @serve_path_via_http 191 | def test_container_update(ds_path=None, local_file=None, url=None): 192 | url_foo = get_local_file_url(op.join(local_file, 'foo.img')) 193 | url_bar = get_local_file_url(op.join(local_file, 'bar.img')) 194 | img = op.join(".datalad", "environments", "foo", "image") 195 | 196 | ds = Dataset(ds_path).create(**common_kwargs) 197 | 198 | ds.containers_add(name="foo", call_fmt="call-fmt1", url=url_foo, 199 | **common_kwargs) 200 | 201 | # Abort without --update flag. 202 | res = ds.containers_add(name="foo", on_failure="ignore", 203 | **common_kwargs) 204 | assert_result_count(res, 1, action="containers_add", status="impossible") 205 | 206 | # Abort if nothing to update is specified. 207 | res = ds.containers_add(name="foo", update=True, on_failure="ignore", 208 | **common_kwargs) 209 | assert_result_count(res, 1, action="containers_add", status="impossible", 210 | message="No values to update specified") 211 | 212 | # Update call format. 213 | ds.containers_add(name="foo", update=True, call_fmt="call-fmt2", 214 | **common_kwargs) 215 | assert_equal(ds.config.get("datalad.containers.foo.cmdexec"), 216 | "call-fmt2") 217 | ok_file_has_content(op.join(ds.path, img), "foo") 218 | 219 | # Update URL/image. 220 | ds.drop(img, **common_kwargs) # Make sure it works even with absent content. 221 | res = ds.containers_add(name="foo", update=True, url=url_bar, 222 | **common_kwargs) 223 | assert_in_results(res, action="remove", status="ok") 224 | assert_in_results(res, action="save", status="ok") 225 | ok_file_has_content(op.join(ds.path, img), "bar") 226 | # the image path configuration is (still) always in POSIX format 227 | assert ds.config.get('datalad.containers.foo.image') \ 228 | == '.datalad/environments/foo/image' 229 | 230 | # Test commit message 231 | # In the above case it was updating existing image so should have "Update " 232 | get_commit_msg = lambda *args: ds.repo.format_commit('%B') 233 | assert_in("Update ", get_commit_msg()) 234 | 235 | # If we add a new image with update=True should say Configure 236 | res = ds.containers_add(name="foo2", update=True, url=url_bar, 237 | **common_kwargs) 238 | assert_in("Configure ", get_commit_msg()) 239 | 240 | 241 | @with_tempfile 242 | @with_tempfile 243 | @with_tree(tree={'some_container.img': "doesn't matter"}) 244 | def test_container_from_subdataset(ds_path=None, src_subds_path=None, local_file=None): 245 | 246 | # prepare a to-be subdataset with a registered container 247 | src_subds = Dataset(src_subds_path).create(**common_kwargs) 248 | src_subds.containers_add( 249 | name="first", 250 | url=get_local_file_url(op.join(local_file, 'some_container.img')), 251 | **common_kwargs 252 | ) 253 | # add it as subdataset to a super ds: 254 | ds = Dataset(ds_path).create(**common_kwargs) 255 | subds = ds.install("sub", source=src_subds_path, **common_kwargs) 256 | # add it again one level down to see actual recursion: 257 | subds.install("subsub", source=src_subds_path, **common_kwargs) 258 | 259 | # We come up empty without recursive: 260 | res = ds.containers_list(recursive=False, **RAW_KWDS) 261 | assert_result_count(res, 0) 262 | 263 | # query available containers from within super: 264 | res = ds.containers_list(recursive=True, **RAW_KWDS) 265 | assert_result_count(res, 2) 266 | assert_in_results(res, action="containers", refds=ds.path) 267 | 268 | # default location within the subdataset: 269 | target_path = op.join(subds.path, 270 | '.datalad', 'environments', 'first', 'image') 271 | assert_result_count( 272 | res, 1, 273 | name='sub/first', type='file', action='containers', status='ok', 274 | path=target_path, 275 | parentds=subds.path 276 | ) 277 | 278 | # not installed subdataset doesn't pose an issue: 279 | sub2 = ds.create("sub2", **common_kwargs) 280 | assert_result_count(ds.subdatasets(**common_kwargs), 2, type="dataset") 281 | ds.drop("sub2", reckless='availability', what='datasets', **common_kwargs) 282 | from datalad.tests.utils_pytest import assert_false 283 | assert_false(sub2.is_installed()) 284 | 285 | # same results as before, not crashing or somehow confused by a not present 286 | # subds: 287 | res = ds.containers_list(recursive=True, **RAW_KWDS) 288 | assert_result_count(res, 2) 289 | assert_result_count( 290 | res, 1, 291 | name='sub/first', type='file', action='containers', status='ok', 292 | path=target_path, 293 | parentds=subds.path 294 | ) 295 | 296 | # The default renderer includes the image names. 297 | with swallow_outputs() as out: 298 | ds.containers_list(recursive=True) 299 | lines = out.out.splitlines() 300 | assert_re_in("sub/first", lines) 301 | assert_re_in("sub/subsub/first", lines) 302 | # But we are careful not to render partial names from subdataset traversals 303 | # (i.e. we recurse with containers_list(..., result_renderer=None)). 304 | with assert_raises(AssertionError): 305 | assert_re_in("subsub/first", lines) 306 | 307 | 308 | @with_tempfile 309 | def test_list_contains(path=None): 310 | ds = Dataset(path).create(**common_kwargs) 311 | subds_a = ds.create("a", **common_kwargs) 312 | subds_b = ds.create("b", **common_kwargs) 313 | subds_a_c = subds_a.create("c", **common_kwargs) 314 | 315 | add_pyscript_image(subds_a_c, "in-c", "img") 316 | add_pyscript_image(subds_a, "in-a", "img") 317 | add_pyscript_image(subds_b, "in-b", "img") 318 | add_pyscript_image(ds, "in-top", "img") 319 | 320 | ds.save(recursive=True, **common_kwargs) 321 | 322 | assert_result_count(ds.containers_list(recursive=True, **RAW_KWDS), 323 | 4) 324 | 325 | assert_result_count( 326 | ds.containers_list(contains=["nowhere"], recursive=True, **RAW_KWDS), 327 | 1, name="in-top", action='containers') 328 | 329 | res = ds.containers_list(contains=[subds_a.path], recursive=True, 330 | **RAW_KWDS) 331 | assert_result_count(res, 3) 332 | assert_in_results(res, name="in-top") 333 | assert_in_results(res, name="a/in-a") 334 | assert_in_results(res, name="a/c/in-c") 335 | 336 | res = ds.containers_list(contains=[subds_a_c.path], recursive=True, 337 | **RAW_KWDS) 338 | assert_result_count(res, 3) 339 | assert_in_results(res, name="in-top") 340 | assert_in_results(res, name="a/in-a") 341 | assert_in_results(res, name="a/c/in-c") 342 | 343 | res = ds.containers_list(contains=[subds_b.path], recursive=True, 344 | **RAW_KWDS) 345 | assert_result_count(res, 2) 346 | assert_in_results(res, name="in-top") 347 | assert_in_results(res, name="b/in-b") 348 | -------------------------------------------------------------------------------- /datalad_container/tests/test_find.py: -------------------------------------------------------------------------------- 1 | import os.path as op 2 | 3 | from datalad.api import Dataset 4 | from datalad.tests.utils_pytest import ( 5 | assert_in, 6 | assert_in_results, 7 | assert_is_instance, 8 | assert_raises, 9 | assert_result_count, 10 | ok_clean_git, 11 | with_tree, 12 | ) 13 | 14 | from datalad_container.find_container import find_container 15 | 16 | 17 | @with_tree(tree={"sub": {"i.img": "doesn't matter"}}) 18 | def test_find_containers(path=None): 19 | ds = Dataset(path).create(force=True) 20 | ds.save(path=[op.join('sub', 'i.img')], message="dummy container") 21 | ds.containers_add("i", image=op.join('sub', 'i.img')) 22 | ok_clean_git(path) 23 | 24 | # find the only one 25 | res = find_container(ds) 26 | assert_is_instance(res, dict) 27 | assert_result_count([res], 1, status="ok", path=op.join(ds.path, "sub", "i.img")) 28 | 29 | # find by name 30 | res = find_container(ds, "i") 31 | assert_is_instance(res, dict) 32 | assert_result_count([res], 1, status="ok", path=op.join(ds.path, "sub", "i.img")) 33 | 34 | # find by path 35 | res = find_container(ds, op.join("sub", "i.img")) 36 | assert_is_instance(res, dict) 37 | assert_result_count([res], 1, status="ok", path=op.join(ds.path, "sub", "i.img")) 38 | 39 | # don't find another thing 40 | assert_raises(ValueError, find_container, ds, "nothere") 41 | -------------------------------------------------------------------------------- /datalad_container/tests/test_register.py: -------------------------------------------------------------------------------- 1 | from datalad.tests.utils_pytest import assert_result_count 2 | 3 | 4 | def test_register(): 5 | import datalad.api as da 6 | assert hasattr(da, 'containers_list') 7 | assert hasattr(da, 'containers_add') 8 | -------------------------------------------------------------------------------- /datalad_container/tests/test_schemes.py: -------------------------------------------------------------------------------- 1 | import os.path as op 2 | 3 | from datalad.api import ( 4 | Dataset, 5 | containers_add, 6 | containers_list, 7 | containers_run, 8 | create, 9 | ) 10 | from datalad.cmd import ( 11 | StdOutCapture, 12 | WitlessRunner, 13 | ) 14 | from datalad.tests.utils_pytest import ( 15 | assert_result_count, 16 | ok_clean_git, 17 | ok_file_has_content, 18 | skip_if_no_network, 19 | with_tempfile, 20 | ) 21 | 22 | 23 | @skip_if_no_network 24 | @with_tempfile 25 | def test_docker(path=None): # Singularity's "docker://" scheme. 26 | ds = Dataset(path).create() 27 | ds.containers_add( 28 | "bb", 29 | url=("docker://busybox@sha256:" 30 | "7964ad52e396a6e045c39b5a44438424ac52e12e4d5a25d94895f2058cb863a0")) 31 | 32 | img = op.join(ds.path, ".datalad", "environments", "bb", "image") 33 | assert_result_count(ds.containers_list(), 1, path=img, name="bb") 34 | ok_clean_git(path) 35 | 36 | WitlessRunner(cwd=ds.path).run( 37 | ["datalad", "containers-run", "ls", "/singularity"], 38 | protocol=StdOutCapture) 39 | -------------------------------------------------------------------------------- /datalad_container/tests/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as op 3 | import sys 4 | 5 | from datalad.api import containers_add 6 | from datalad.interface.common_cfg import dirs as appdirs 7 | from datalad.tests.utils_pytest import SkipTest 8 | from datalad.utils import chpwd 9 | 10 | 11 | def add_pyscript_image(ds, container_name, file_name): 12 | """Set up simple Python script as image. 13 | 14 | Parameters 15 | ---------- 16 | ds : Dataset 17 | container_name : str 18 | Add container with this name. 19 | file_name : str 20 | Write script to this file and use it as the image. 21 | """ 22 | ds_file = (ds.pathobj / file_name) 23 | ds_file.write_text("import sys\nprint(sys.argv)\n") 24 | ds.save(ds_file, message="Add dummy container") 25 | containers_add(container_name, image=str(ds_file), 26 | call_fmt=sys.executable + " {img} {cmd}", 27 | dataset=ds) 28 | 29 | 30 | def get_singularity_image(): 31 | imgname = 'datalad_container_singularity_testimg.simg' 32 | targetpath = op.join( 33 | appdirs.user_cache_dir, 34 | imgname) 35 | if op.exists(targetpath): 36 | return targetpath 37 | 38 | with chpwd(appdirs.user_cache_dir): 39 | os.system( 40 | 'singularity pull --name "{}" shub://datalad/datalad-container:testhelper'.format( 41 | imgname)) 42 | 43 | if op.exists(targetpath): 44 | return targetpath 45 | 46 | raise SkipTest 47 | -------------------------------------------------------------------------------- /datalad_container/utils.py: -------------------------------------------------------------------------------- 1 | """Collection of common utilities""" 2 | 3 | from __future__ import annotations 4 | 5 | # the pathlib equivalent is only available in PY3.12 6 | from os.path import lexists 7 | from pathlib import ( 8 | PurePath, 9 | PurePosixPath, 10 | PureWindowsPath, 11 | ) 12 | 13 | from datalad.distribution.dataset import Dataset 14 | from datalad.support.external_versions import external_versions 15 | 16 | 17 | def get_container_command(): 18 | for command in ["apptainer", "singularity"]: 19 | container_system_version = external_versions[f"cmd:{command}"] 20 | if container_system_version: 21 | return command 22 | else: 23 | raise RuntimeError("Did not find apptainer or singularity") 24 | 25 | 26 | def get_container_configuration( 27 | ds: Dataset, 28 | name: str | None = None, 29 | ) -> dict: 30 | """Report all container-related configuration in a dataset 31 | 32 | Such configuration is identified by the item name pattern:: 33 | 34 | datalad.containers.. 35 | 36 | Parameters 37 | ---------- 38 | ds: Dataset 39 | Dataset instance to report configuration on. 40 | name: str, optional 41 | If given, the reported configuration will be limited to the container 42 | with this exact name. In this case, only a single ``dict`` is returned, 43 | not nested dictionaries. 44 | 45 | Returns 46 | ------- 47 | dict 48 | Keys are the names of configured containers and values are dictionaries 49 | with their respective configuration items (with the 50 | ``datalad.containers..`` prefix removed from their 51 | keys). 52 | If `name` is given, only a single ``dict`` with the configuration 53 | items of the matching container is returned (i.e., there will be no 54 | outer ``dict`` with container names as keys). 55 | If not (matching) container configuration exists, and empty dictionary 56 | is returned. 57 | """ 58 | var_prefix = 'datalad.containers.' 59 | 60 | containers = {} 61 | # all info is in the dataset config! 62 | for var, value in ds.config.items(): 63 | if not var.startswith(var_prefix): 64 | # not an interesting variable 65 | continue 66 | var_comps = var.split('.') 67 | # container name is the 3rd after 'datalad'.'container'. 68 | cname = var_comps[2] 69 | if name and name != cname: 70 | # we are looking for a specific container's configuration 71 | # and this is not it 72 | continue 73 | # reconstruct config item name, anything after 74 | # datalad.containers.. 75 | ccfgname = '.'.join(var_comps[3:]) 76 | if not ccfgname: 77 | continue 78 | 79 | if ccfgname == 'image': 80 | # run image path normalization to get a relative path 81 | # in platform conventions, regardless of the input. 82 | # for now we report a str, because the rest of the code 83 | # is not using pathlib 84 | value = str(_normalize_image_path(value, ds)) 85 | 86 | cinfo = containers.get(cname, {}) 87 | cinfo[ccfgname] = value 88 | 89 | containers[cname] = cinfo 90 | 91 | return containers if name is None else containers.get(name, {}) 92 | 93 | 94 | def _normalize_image_path(path: str, ds: Dataset) -> PurePath: 95 | """Helper to standardize container image path handling 96 | 97 | Previously, container configuration would contain platform-paths 98 | for container image location (e.g., windows paths when added on 99 | windows, POSIX paths elsewhere). This made cross-platform reuse 100 | impossible out-of-the box, but it also means that such dataset 101 | are out there in unknown numbers. 102 | 103 | This helper inspects an image path READ FROM CONFIG(!) and ensures 104 | that it matches platform conventions (because all other arguments) 105 | also come in platform conventions. This enables standardizing 106 | the storage conventions to be POSIX-only (for the future). 107 | 108 | Parameters 109 | ---------- 110 | path: str 111 | A str-path, as read from the configuration, matching its conventions 112 | (relative path, pointing to a container image relative to the 113 | dataset's root). 114 | ds: Dataset 115 | This dataset's base path is used as a reference for resolving 116 | the relative image path to an absolute location on the file system. 117 | 118 | Returns 119 | ------- 120 | PurePath 121 | Relative path in platform conventions 122 | """ 123 | # we only need to act differently, when an incoming path is 124 | # windows. This is not possible to say with 100% confidence, 125 | # because a POSIX path can also contain a backslash. We support 126 | # a few standard cases where we CAN tell 127 | pathobj = None 128 | if '\\' not in path: 129 | # no windows pathsep, no problem 130 | pathobj = PurePosixPath(path) 131 | elif path.startswith(r'.datalad\\environments\\'): 132 | # this is the default location setup in windows conventions 133 | pathobj = PureWindowsPath(path) 134 | else: 135 | # let's assume it is windows for a moment 136 | if lexists(str(ds.pathobj / PureWindowsPath(path))): 137 | # if there is something on the filesystem for this path, 138 | # we can be reasonably sure that this is indeed a windows 139 | # path. This won't catch images in uninstalled subdataset, 140 | # but better than nothing 141 | pathobj = PureWindowsPath(path) 142 | else: 143 | # if we get here, we have no idea, and no means to verify 144 | # further hypotheses -- go with the POSIX assumption 145 | # and hope for the best 146 | pathobj = PurePosixPath(path) 147 | 148 | assert pathobj is not None 149 | # we report in platform-conventions 150 | return PurePath(pathobj) 151 | -------------------------------------------------------------------------------- /datalad_container/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.2.6" 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = -W 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " applehelp to make an Apple Help Book" 34 | @echo " devhelp to make HTML files and a Devhelp project" 35 | @echo " epub to make an epub" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* source/generated source/_extras/schema.json 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 58 | 59 | dirhtml: 60 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 63 | 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | pickle: 70 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 71 | @echo 72 | @echo "Build finished; now you can process the pickle files." 73 | 74 | json: 75 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 76 | @echo 77 | @echo "Build finished; now you can process the JSON files." 78 | 79 | htmlhelp: 80 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 81 | @echo 82 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 83 | ".hhp project file in $(BUILDDIR)/htmlhelp." 84 | 85 | qthelp: 86 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 87 | @echo 88 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 89 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 90 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/datalad_container.qhcp" 91 | @echo "To view the help file:" 92 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/datalad_container.qhc" 93 | 94 | applehelp: 95 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 96 | @echo 97 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 98 | @echo "N.B. You won't be able to view it unless you put it in" \ 99 | "~/Library/Documentation/Help or install it in your application" \ 100 | "bundle." 101 | 102 | devhelp: 103 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 104 | @echo 105 | @echo "Build finished." 106 | @echo "To view the help file:" 107 | @echo "# mkdir -p $$HOME/.local/share/devhelp/datalad_container" 108 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/datalad_container" 109 | @echo "# devhelp" 110 | 111 | epub: 112 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 113 | @echo 114 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 115 | 116 | latex: 117 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 118 | @echo 119 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 120 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 121 | "(use \`make latexpdf' here to do that automatically)." 122 | 123 | latexpdf: 124 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 125 | @echo "Running LaTeX files through pdflatex..." 126 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 127 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 128 | 129 | latexpdfja: 130 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 131 | @echo "Running LaTeX files through platex and dvipdfmx..." 132 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 133 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 134 | 135 | text: 136 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 137 | @echo 138 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 139 | 140 | man: 141 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 142 | @echo 143 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 144 | 145 | texinfo: 146 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 147 | @echo 148 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 149 | @echo "Run \`make' in that directory to run these through makeinfo" \ 150 | "(use \`make info' here to do that automatically)." 151 | 152 | info: 153 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 154 | @echo "Running Texinfo files through makeinfo..." 155 | make -C $(BUILDDIR)/texinfo info 156 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 157 | 158 | gettext: 159 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 160 | @echo 161 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 162 | 163 | changes: 164 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 165 | @echo 166 | @echo "The overview file is in $(BUILDDIR)/changes." 167 | 168 | linkcheck: 169 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 170 | @echo 171 | @echo "Link check complete; look for any errors in the above output " \ 172 | "or in $(BUILDDIR)/linkcheck/output.txt." 173 | 174 | doctest: 175 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 176 | @echo "Testing of doctests in the sources finished, look at the " \ 177 | "results in $(BUILDDIR)/doctest/output.txt." 178 | 179 | coverage: 180 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 181 | @echo "Testing of coverage in the sources finished, look at the " \ 182 | "results in $(BUILDDIR)/coverage/python.txt." 183 | 184 | xml: 185 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 186 | @echo 187 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 188 | 189 | pseudoxml: 190 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 191 | @echo 192 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 193 | -------------------------------------------------------------------------------- /docs/examples/basic_demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # SKIP_IN_V6 3 | 4 | set -e 5 | 6 | OLD_PWD=$PWD 7 | 8 | # BOILERPLATE 9 | 10 | #% EXAMPLE START 11 | # 12 | # Getting started 13 | # *************** 14 | # 15 | # The Datalad container extension provides a few commands to register 16 | # containers with a dataset and use them for execution of arbitrary 17 | # commands. In order to get going quickly, we only need a dataset 18 | # and a ready-made container. For this demo we will start with a 19 | # fresh dataset and a demo container from Singularity-Hub. 20 | #% 21 | 22 | # fresh dataset 23 | datalad create demo 24 | cd demo 25 | 26 | # register container straight from Singularity-Hub 27 | datalad containers-add my1st --url shub://datalad/datalad-container:testhelper 28 | 29 | #% 30 | # This will download the container image, add it to the dataset, and record 31 | # basic information on the container under its name "my1st" in the dataset's 32 | # configuration at ``.datalad/config``. 33 | # 34 | # Now we are all set to use this container for command execution. All it needs 35 | # is to swap the command `datalad run` with `datalad containers-run`. The 36 | # command is automatically executed in the registered container and the results 37 | # (if there are any) will be added to the dataset: 38 | #% 39 | 40 | datalad containers-run cp /etc/debian_version proof.txt 41 | 42 | #% 43 | # If there is more than one container registered, the desired container needs 44 | # to be specified via the ``--name`` option. Containers do not need to come from 45 | # Singularity-Hub, but can be local images too. Via the ``containers-add 46 | # --call-fmt`` option it is possible to configure how exactly a container 47 | # is being executed, or which local directories shall be made available to 48 | # a container. 49 | # 50 | # At the moment there is built-in support for Singularity and Docker, but other 51 | # container execution systems can be used together with custom helper scripts. 52 | #% EXAMPLE END 53 | 54 | testEquality() { 55 | assertEquals 1 1 56 | } 57 | 58 | cd "$OLD_PWD" 59 | [ -n "$DATALAD_TESTS_RUNCMDLINE" ] && . shunit2 || true 60 | -------------------------------------------------------------------------------- /docs/source/_static/datalad_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalad/datalad-container/e9bba2a6566abf0e0a788dc06c56a468f8400d1f/docs/source/_static/datalad_logo.png -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/module.rst: -------------------------------------------------------------------------------- 1 | {% if fullname == 'datalad.api' -%} 2 | `{{ name }}` 3 | =={%- for c in name %}={%- endfor %} 4 | .. automodule:: datalad.api 5 | 6 | .. currentmodule:: datalad.api 7 | 8 | {% for item in members if not item.startswith('_') %} 9 | `{{ item }}` 10 | --{%- for c in item %}-{%- endfor %} 11 | 12 | .. autofunction:: {{ item }} 13 | {% endfor %} 14 | 15 | {% else -%} 16 | {{ fullname }} 17 | {{ underline }} 18 | 19 | .. automodule:: {{ fullname }} 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | {% endif %} 24 | -------------------------------------------------------------------------------- /docs/source/acknowledgements.rst: -------------------------------------------------------------------------------- 1 | Acknowledgments 2 | *************** 3 | 4 | DataLad development is being performed as part of a US-German collaboration in 5 | computational neuroscience (CRCNS) project "DataGit: converging catalogues, 6 | warehouses, and deployment logistics into a federated 'data distribution'" 7 | (Halchenko_/Hanke_), co-funded by the US National Science Foundation (`NSF 8 | 1429999`_) and the German Federal Ministry of Education and Research (`BMBF 9 | 01GQ1411`_). Additional support is provided by the German federal state of 10 | Saxony-Anhalt and the European Regional Development 11 | Fund (ERDF), Project: `Center for Behavioral Brain Sciences`_, Imaging Platform 12 | 13 | DataLad is built atop the git-annex_ software that is being developed and 14 | maintained by `Joey Hess`_. 15 | 16 | .. _Halchenko: http://haxbylab.dartmouth.edu/ppl/yarik.html 17 | .. _Hanke: http://www.psychoinformatics.de 18 | .. _NSF 1429999: http://www.nsf.gov/awardsearch/showAward?AWD_ID=1429999 19 | .. _BMBF 01GQ1411: http://www.gesundheitsforschung-bmbf.de/de/2550.php 20 | .. _Center for Behavioral Brain Sciences: http://cbbs.eu/en/ 21 | .. _git-annex: http://git-annex.branchable.com 22 | .. _Joey Hess: https://joeyh.name 23 | -------------------------------------------------------------------------------- /docs/source/changelog.rst: -------------------------------------------------------------------------------- 1 | .. This file is auto-converted from CHANGELOG.md (make update-changelog) -- do not edit 2 | 3 | Change log 4 | ********** 5 | :: 6 | 7 | ____ _ _ _ 8 | | _ \ __ _ | |_ __ _ | | __ _ __| | 9 | | | | | / _` || __| / _` || | / _` | / _` | 10 | | |_| || (_| || |_ | (_| || |___ | (_| || (_| | 11 | |____/ \__,_| \__| \__,_||_____| \__,_| \__,_| 12 | Container 13 | 14 | This is a high level and scarce summary of the changes between releases. 15 | We would recommend to consult log of the `DataLad git 16 | repository `__ for more 17 | details. 18 | 19 | 1.1.2 (January 16, 2021) – 20 | -------------------------- 21 | 22 | - Replace use of ``mock`` with ``unittest.mock`` as we do no longer 23 | support Python 2 24 | 25 | 1.1.1 (January 03, 2021) – 26 | -------------------------- 27 | 28 | - Drop use of ``Runner`` (to be removed in datalad 0.14.0) in favor of 29 | ``WitlessRunner`` 30 | 31 | 1.1.0 (October 30, 2020) – 32 | -------------------------- 33 | 34 | - Datalad version 0.13.0 or later is now required. 35 | 36 | - In the upcoming 0.14.0 release of DataLad, the datalad special remote 37 | will have built-in support for “shub://” URLs. If ``containers-add`` 38 | detects support for this feature, it will now add the “shub://” URL 39 | as is rather than resolving the URL itself. This avoids registering 40 | short-lived URLs, allowing the image to be retrieved later with 41 | ``datalad get``. 42 | 43 | - ``containers-run`` learned to install necessary subdatasets when 44 | asked to execute a container from underneath an uninstalled 45 | subdataset. 46 | 47 | 1.0.1 (June 23, 2020) – 48 | ----------------------- 49 | 50 | - Prefer ``datalad.core.local.run`` to ``datalad.interface.run``. The 51 | latter has been marked as obsolete since DataLad v0.12 (our minimum 52 | requirement) and will be removed in DataLad’s next feature release. 53 | 54 | 1.0.0 (Feb 23, 2020) – not-as-a-shy-one 55 | --------------------------------------- 56 | 57 | Extension is pretty stable so releasing as 1. MAJOR release, so we could 58 | start tracking API breakages and enhancements properly. 59 | 60 | - Drops support for Python 2 and DataLad prior 0.12 61 | 62 | 0.5.2 (Nov 12, 2019) – 63 | ---------------------- 64 | 65 | Fixes 66 | ~~~~~ 67 | 68 | - The Docker adapter unconditionally called ``docker run`` with 69 | ``--interactive`` and ``--tty`` even when stdin was not attached to a 70 | TTY, leading to an error. 71 | 72 | 0.5.1 (Nov 08, 2019) – 73 | ---------------------- 74 | 75 | .. _fixes-1: 76 | 77 | Fixes 78 | ~~~~~ 79 | 80 | - The Docker adapter, which is used for the “dhub://” URL scheme, 81 | assumed the Python executable was spelled “python”. 82 | 83 | - A call to DataLad’s ``resolve_path`` helper assumed a string return 84 | value, which isn’t true as of the latest DataLad release candidate, 85 | 0.12.0rc6. 86 | 87 | 0.5.0 (Jul 12, 2019) – damn-you-malicious-users 88 | ----------------------------------------------- 89 | 90 | New features 91 | ~~~~~~~~~~~~ 92 | 93 | - The default result renderer for ``containers-list`` is now a custom 94 | renderer that includes the container name in the output. 95 | 96 | .. _fixes-2: 97 | 98 | Fixes 99 | ~~~~~ 100 | 101 | - Temporarily skip two tests relying on SingularityHub – it is down. 102 | 103 | 0.4.0 (May 29, 2019) – run-baby-run 104 | ----------------------------------- 105 | 106 | The minimum required DataLad version is now 0.11.5. 107 | 108 | .. _new-features-1: 109 | 110 | New features 111 | ~~~~~~~~~~~~ 112 | 113 | - The call format gained the “{img_dspath}” placeholder, which expands 114 | to the relative path of the dataset that contains the image. This is 115 | useful for pointing to a wrapper script that is bundled in the same 116 | subdataset as a container. 117 | 118 | - ``containers-run`` now passes the container image to ``run`` via its 119 | ``extra_inputs`` argument so that a run command’s “{inputs}” field is 120 | restricted to inputs that the caller explicitly specified. 121 | 122 | - During execution, ``containers-run`` now sets the environment 123 | variable ``DATALAD_CONTAINER_NAME`` to the name of the container. 124 | 125 | .. _fixes-3: 126 | 127 | Fixes 128 | ~~~~~ 129 | 130 | - ``containers-run`` mishandled paths when called from a subdirectory. 131 | 132 | - ``containers-run`` didn’t provide an informative error message when 133 | ``cmdexec`` contained an unknown placeholder. 134 | 135 | - ``containers-add`` ignores the ``--update`` flag when the container 136 | doesn’t yet exist, but it confusingly still used the word “update” in 137 | the commit message. 138 | 139 | 0.3.1 (Mar 05, 2019) – Upgrayeddd 140 | --------------------------------- 141 | 142 | .. _fixes-4: 143 | 144 | Fixes 145 | ~~~~~ 146 | 147 | - ``containers-list`` recursion actually does recursion. 148 | 149 | 0.3.0 (Mar 05, 2019) – Upgrayedd 150 | -------------------------------- 151 | 152 | API changes 153 | ~~~~~~~~~~~ 154 | 155 | - ``containers-list`` no longer lists containers from subdatasets by 156 | default. Specify ``--recursive`` to do so. 157 | 158 | - ``containers-run`` no longer considers subdataset containers in its 159 | automatic selection of a container name when no name is specified. If 160 | the current dataset has one container, that container is selected. 161 | Subdataset containers must always be explicitly specified. 162 | 163 | .. _new-features-2: 164 | 165 | New features 166 | ~~~~~~~~~~~~ 167 | 168 | - ``containers-add`` learned to update a previous container when passed 169 | ``--update``. 170 | 171 | - ``containers-add`` now supports Singularity’s “docker://” scheme in 172 | the URL. 173 | 174 | - To avoid unnecessary recursion into subdatasets, ``containers-run`` 175 | now decides to look for containers in subdatasets based on whether 176 | the name has a slash (which is true of all subdataset containers). 177 | 178 | 0.2.2 (Dec 19, 2018) – The more the merrier 179 | ------------------------------------------- 180 | 181 | - list/use containers recursively from installed subdatasets 182 | - Allow to specify container by path rather than just by name 183 | - Adding a container from local filesystem will copy it now 184 | 185 | 0.2.1 (Jul 14, 2018) – Explicit lyrics 186 | -------------------------------------- 187 | 188 | - Add support ``datalad run --explicit``. 189 | 190 | 0.2 (Jun 08, 2018) – Docker 191 | --------------------------- 192 | 193 | - Initial support for adding and running Docker containers. 194 | - Add support ``datalad run --sidecar``. 195 | - Simplify storage of ``call_fmt`` arguments in the Git config, by 196 | benefiting from ``datalad run`` being able to work with single-string 197 | compound commands. 198 | 199 | 0.1.2 (May 28, 2018) – The docs 200 | ------------------------------- 201 | 202 | - Basic beginner documentation 203 | 204 | 0.1.1 (May 22, 2018) – The fixes 205 | -------------------------------- 206 | 207 | .. _new-features-3: 208 | 209 | New features 210 | ~~~~~~~~~~~~ 211 | 212 | - Add container images straight from singularity-hub, no need to 213 | manually specify ``--call-fmt`` arguments. 214 | 215 | .. _api-changes-1: 216 | 217 | API changes 218 | ~~~~~~~~~~~ 219 | 220 | - Use “name” instead of “label” for referring to a container (e.g. 221 | ``containers-run -n ...`` instead of ``containers-run -l``. 222 | 223 | .. _fixes-5: 224 | 225 | Fixes 226 | ~~~~~ 227 | 228 | - Pass relative container path to ``datalad run``. 229 | - ``containers-run`` no longer hides ``datalad run`` failures. 230 | 231 | 0.1 (May 19, 2018) – The Release 232 | -------------------------------- 233 | 234 | - Initial release with basic functionality to add, remove, and list 235 | containers in a dataset, plus a ``run`` command wrapper that injects 236 | the container image as an input dependency of a command call. 237 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # datalad_container documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Oct 13 08:41:19 2015. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import datetime 16 | import os 17 | import sys 18 | from os import pardir 19 | from os.path import ( 20 | abspath, 21 | dirname, 22 | exists, 23 | join as opj, 24 | ) 25 | 26 | import datalad_container 27 | 28 | # If extensions (or modules to document with autodoc) are in another directory, 29 | # add these directories to sys.path here. If the directory is relative to the 30 | # documentation root, use os.path.abspath to make it absolute, like shown here. 31 | #sys.path.insert(0, os.path.abspath('.')) 32 | 33 | # generate missing pieces 34 | for setup_py_path in (opj(pardir, 'setup.py'), # travis 35 | opj(pardir, pardir, 'setup.py')): # RTD 36 | if exists(setup_py_path): 37 | sys.path.insert(0, os.path.abspath(dirname(setup_py_path))) 38 | try: 39 | for cmd in 'manpage',: #'examples': 40 | os.system( 41 | '{} build_{} --cmdsuite {} --manpath {} --rstpath {}'.format( 42 | setup_py_path, 43 | cmd, 44 | 'datalad_container:command_suite', 45 | abspath(opj(dirname(setup_py_path), 'build', 'man')), 46 | opj(dirname(__file__), 'generated', 'man'))) 47 | except: 48 | # shut up and do your best 49 | pass 50 | 51 | # -- General configuration ------------------------------------------------ 52 | 53 | # If your documentation needs a minimal Sphinx version, state it here. 54 | #needs_sphinx = '1.0' 55 | 56 | # Add any Sphinx extension module names here, as strings. They can be 57 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 58 | # ones. 59 | extensions = [ 60 | 'sphinx.ext.autodoc', 61 | 'sphinx.ext.autosummary', 62 | 'sphinx.ext.doctest', 63 | 'sphinx.ext.intersphinx', 64 | 'sphinx.ext.todo', 65 | 'sphinx.ext.coverage', 66 | 'sphinx.ext.mathjax', 67 | 'sphinx.ext.ifconfig', 68 | 'sphinx.ext.inheritance_diagram', 69 | 'sphinx.ext.viewcode', 70 | 'sphinx.ext.napoleon', 71 | 'sphinx_copybutton', 72 | ] 73 | 74 | # for the module reference 75 | autosummary_generate = True 76 | 77 | # Add any paths that contain templates here, relative to this directory. 78 | templates_path = ['_templates'] 79 | 80 | # The suffix(es) of source filenames. 81 | # You can specify multiple suffix as a list of string: 82 | # source_suffix = ['.rst', '.md'] 83 | source_suffix = '.rst' 84 | 85 | # The master toctree document. 86 | master_doc = 'index' 87 | 88 | # General information about the project. 89 | project = u'Datalad for containerized environments' 90 | copyright = u'2018-{}, DataLad team'.format(datetime.datetime.now().year) 91 | author = u'DataLad team' 92 | 93 | # The version info for the project you're documenting, acts as replacement for 94 | # |version| and |release|, also used in various other places throughout the 95 | # built documents. 96 | version = datalad_container.__version__ 97 | release = version 98 | 99 | # The language for content autogenerated by Sphinx. Refer to documentation 100 | # for a list of supported languages. 101 | # 102 | # This is also used if you do content translation via gettext catalogs. 103 | # Usually you set "language" from the command line for these cases. 104 | language = 'en' 105 | 106 | # List of patterns, relative to source directory, that match files and 107 | # directories to ignore when looking for source files. 108 | exclude_patterns = [] 109 | 110 | # The name of the Pygments (syntax highlighting) style to use. 111 | pygments_style = 'sphinx' 112 | 113 | # If true, `todo` and `todoList` produce output, else they produce nothing. 114 | todo_include_todos = True 115 | 116 | # Example configuration for intersphinx: refer to the Python standard library. 117 | intersphinx_mapping = {"python": ('https://docs.python.org/', None)} 118 | 119 | # -- Options for HTML output ---------------------------------------------- 120 | 121 | # The theme to use for HTML and HTML Help pages. See the documentation for 122 | # a list of builtin themes. 123 | html_theme = 'sphinx_rtd_theme' 124 | 125 | # The name of an image file (relative to this directory) to place at the top 126 | # of the sidebar. 127 | html_logo = '_static/datalad_logo.png' 128 | 129 | # Add any paths that contain custom static files (such as style sheets) here, 130 | # relative to this directory. They are copied after the builtin static files, 131 | # so a file named "default.css" will overwrite the builtin "default.css". 132 | html_static_path = ['_static'] 133 | 134 | # If true, the index is split into individual pages for each letter. 135 | html_split_index = True 136 | 137 | # If true, links to the reST sources are added to the pages. 138 | html_show_sourcelink = False 139 | 140 | # smart quotes are incompatible with the RST flavor of the generated manpages 141 | # but see `smartquotes_action` for more fine-grained control, in case 142 | # some of this functionality is needed 143 | smartquotes = False 144 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | DataLad extension for containerized environments 2 | ************************************************ 3 | 4 | This extension equips DataLad's `run/rerun 5 | `_ functionality with the ability 6 | to transparently execute commands in containerized computational environments. 7 | On re-run, DataLad will automatically obtain any required container at the 8 | correct version prior execution. 9 | 10 | Documentation 11 | ============= 12 | 13 | This is the technical documentation of the functionality and commands provided by this DataLad extension package. 14 | For an introduction to the general topic and a tutorial, please see the DataLad Handbook at https://handbook.datalad.org/r?containers. 15 | 16 | * :ref:`Documentation index ` 17 | * `API reference`_ 18 | 19 | .. toctree:: 20 | :maxdepth: 1 21 | 22 | changelog 23 | acknowledgements 24 | metadata-extraction 25 | 26 | 27 | API Reference 28 | ============= 29 | 30 | Command manuals 31 | --------------- 32 | 33 | .. toctree:: 34 | :maxdepth: 1 35 | 36 | generated/man/datalad-containers-add 37 | generated/man/datalad-containers-remove 38 | generated/man/datalad-containers-list 39 | generated/man/datalad-containers-run 40 | 41 | 42 | Python API 43 | ---------- 44 | 45 | .. currentmodule:: datalad_container 46 | .. autosummary:: 47 | :toctree: generated 48 | 49 | containers_add 50 | containers_remove 51 | containers_list 52 | containers_run 53 | 54 | utils 55 | 56 | .. |---| unicode:: U+02014 .. em dash 57 | -------------------------------------------------------------------------------- /docs/source/metadata-extraction.rst: -------------------------------------------------------------------------------- 1 | Metadata Extraction 2 | ******************* 3 | 4 | If `datalad-metalad`_ extension is installed, `datalad-container` can 5 | extract metadata from singularity containers images. 6 | 7 | (It is recommended to use a tool like `jq` if you would like to read the 8 | output yourself.) 9 | 10 | Singularity Inspect 11 | ------------------- 12 | 13 | Adds metadata gathered from `singularity inspect` and the version of 14 | `singularity` or `apptainer`. 15 | 16 | For example: 17 | 18 | (From the ReproNim/containers repository) 19 | 20 | `datalad meta-extract -d . container_inspect images/bids/bids-pymvpa--1.0.2.sing | jq` 21 | 22 | .. code-block:: 23 | 24 | { 25 | "type": "file", 26 | "dataset_id": "b02e63c2-62c1-11e9-82b0-52540040489c", 27 | "dataset_version": "9ed0a39406e518f0309bb665a99b64dec719fb08", 28 | "path": "images/bids/bids-pymvpa--1.0.2.sing", 29 | "extractor_name": "container_inspect", 30 | "extractor_version": "0.0.1", 31 | "extraction_parameter": {}, 32 | "extraction_time": 1680097317.7093463, 33 | "agent_name": "Austin Macdonald", 34 | "agent_email": "austin@dartmouth.edu", 35 | "extracted_metadata": { 36 | "@id": "datalad:SHA1-s993116191--cc7ac6e6a31e9ac131035a88f699dfcca785b844", 37 | "type": "file", 38 | "path": "images/bids/bids-pymvpa--1.0.2.sing", 39 | "content_byte_size": 0, 40 | "comment": "SingularityInspect extractor executed at 1680097317.6012993", 41 | "container_system": "apptainer", 42 | "container_system_version": "1.1.6-1.fc37", 43 | "container_inspect": { 44 | "data": { 45 | "attributes": { 46 | "labels": { 47 | "org.label-schema.build-date": "Thu,_19_Dec_2019_14:58:41_+0000", 48 | "org.label-schema.build-size": "2442MB", 49 | "org.label-schema.schema-version": "1.0", 50 | "org.label-schema.usage.singularity.deffile": "Singularity.bids-pymvpa--1.0.2", 51 | "org.label-schema.usage.singularity.deffile.bootstrap": "docker", 52 | "org.label-schema.usage.singularity.deffile.from": "bids/pymvpa:v1.0.2", 53 | "org.label-schema.usage.singularity.version": "2.5.2-feature-squashbuild-secbuild-2.5.6e68f9725" 54 | } 55 | } 56 | }, 57 | "type": "container" 58 | } 59 | } 60 | } 61 | 62 | .. _datalad-metalad: http://docs.datalad.org/projects/metalad/en/latest/ 63 | -------------------------------------------------------------------------------- /docs/utils/pygments_ansi_color.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Pygments lexer for text containing ANSI color codes.""" 3 | from __future__ import ( 4 | absolute_import, 5 | unicode_literals, 6 | ) 7 | 8 | import itertools 9 | import re 10 | 11 | import pygments.lexer 12 | import pygments.token 13 | 14 | Color = pygments.token.Token.Color 15 | 16 | _ansi_code_to_color = { 17 | 0: 'Black', 18 | 1: 'Red', 19 | 2: 'Green', 20 | 3: 'Yellow', 21 | 4: 'Blue', 22 | 5: 'Magenta', 23 | 6: 'Cyan', 24 | 7: 'White', 25 | } 26 | 27 | 28 | def _token_from_lexer_state(bold, fg_color, bg_color): 29 | """Construct a token given the current lexer state. 30 | 31 | We can only emit one token even though we have a multiple-tuple state. 32 | To do work around this, we construct tokens like "BoldRed". 33 | """ 34 | token_name = '' 35 | 36 | if bold: 37 | token_name += 'Bold' 38 | 39 | if fg_color: 40 | token_name += fg_color 41 | 42 | if bg_color: 43 | token_name += 'BG' + bg_color 44 | 45 | if token_name == '': 46 | return pygments.token.Text 47 | else: 48 | return getattr(Color, token_name) 49 | 50 | 51 | def color_tokens(fg_colors, bg_colors): 52 | """Return color tokens for a given set of colors. 53 | 54 | Pygments doesn't have a generic "color" token; instead everything is 55 | contextual (e.g. "comment" or "variable"). That doesn't make sense for us, 56 | where the colors actually *are* what we care about. 57 | 58 | This function will register combinations of tokens (things like "Red" or 59 | "BoldRedBGGreen") based on the colors passed in. 60 | 61 | You can also define the tokens yourself, but note that the token names are 62 | *not* currently guaranteed to be stable between releases as I'm not really 63 | happy with this approach. 64 | 65 | Usage: 66 | 67 | fg_colors = bg_colors = { 68 | 'Black': '#000000', 69 | 'Red': '#EF2929', 70 | 'Green': '#8AE234', 71 | 'Yellow': '#FCE94F', 72 | 'Blue': '#3465A4', 73 | 'Magenta': '#c509c5', 74 | 'Cyan': '#34E2E2', 75 | 'White': '#ffffff', 76 | } 77 | class MyStyle(pygments.styles.SomeStyle): 78 | styles = dict(pygments.styles.SomeStyle.styles) 79 | styles.update(color_tokens(fg_colors, bg_colors)) 80 | """ 81 | styles = {} 82 | 83 | for bold, fg_color, bg_color in itertools.product( 84 | (False, True), 85 | {None} | set(fg_colors), 86 | {None} | set(bg_colors), 87 | ): 88 | token = _token_from_lexer_state(bold, fg_color, bg_color) 89 | if token is not pygments.token.Text: 90 | value = [] 91 | if bold: 92 | value.append('bold') 93 | if fg_color: 94 | value.append(fg_colors[fg_color]) 95 | if bg_color: 96 | value.append('bg:' + bg_colors[bg_color]) 97 | styles[token] = ' '.join(value) 98 | 99 | return styles 100 | 101 | 102 | class AnsiColorLexer(pygments.lexer.RegexLexer): 103 | name = 'ANSI Color' 104 | aliases = ('ansi-color', 'ansi', 'ansi-terminal') 105 | flags = re.DOTALL | re.MULTILINE 106 | 107 | def __init__(self, *args, **kwargs): 108 | super(AnsiColorLexer, self).__init__(*args, **kwargs) 109 | self.reset_state() 110 | 111 | def reset_state(self): 112 | self.bold = False 113 | self.fg_color = None 114 | self.bg_color = None 115 | 116 | @property 117 | def current_token(self): 118 | return _token_from_lexer_state( 119 | self.bold, self.fg_color, self.bg_color, 120 | ) 121 | 122 | def process(self, match): 123 | """Produce the next token and bit of text. 124 | 125 | Interprets the ANSI code (which may be a color code or some other 126 | code), changing the lexer state and producing a new token. If it's not 127 | a color code, we just strip it out and move on. 128 | 129 | Some useful reference for ANSI codes: 130 | * http://ascii-table.com/ansi-escape-sequences.php 131 | """ 132 | # "after_escape" contains everything after the start of the escape 133 | # sequence, up to the next escape sequence. We still need to separate 134 | # the content from the end of the escape sequence. 135 | after_escape = match.group(1) 136 | 137 | # TODO: this doesn't handle the case where the values are non-numeric. 138 | # This is rare but can happen for keyboard remapping, e.g. 139 | # '\x1b[0;59;"A"p' 140 | parsed = re.match( 141 | r'([0-9;=]*?)?([a-zA-Z])(.*)$', 142 | after_escape, 143 | re.DOTALL | re.MULTILINE, 144 | ) 145 | if parsed is None: 146 | # This shouldn't ever happen if we're given valid text + ANSI, but 147 | # people can provide us with utter junk, and we should tolerate it. 148 | text = after_escape 149 | else: 150 | value, code, text = parsed.groups() 151 | 152 | if code == 'm': # "m" is "Set Graphics Mode" 153 | # Special case \x1b[m is a reset code 154 | if value == '': 155 | self.reset_state() 156 | else: 157 | values = value.split(';') 158 | for value in values: 159 | try: 160 | value = int(value) 161 | except ValueError: 162 | # Shouldn't ever happen, but could with invalid 163 | # ANSI. 164 | continue 165 | else: 166 | fg_color = _ansi_code_to_color.get(value - 30) 167 | bg_color = _ansi_code_to_color.get(value - 40) 168 | if fg_color: 169 | self.fg_color = fg_color 170 | elif bg_color: 171 | self.bg_color = bg_color 172 | elif value == 1: 173 | self.bold = True 174 | elif value == 22: 175 | self.bold = False 176 | elif value == 39: 177 | self.fg_color = None 178 | elif value == 49: 179 | self.bg_color = None 180 | elif value == 0: 181 | self.reset_state() 182 | 183 | yield match.start(), self.current_token, text 184 | 185 | tokens = { 186 | # states have to be native strings 187 | str('root'): [ 188 | (r'\x1b\[([^\x1b]*)', process), 189 | (r'[^\x1b]+', pygments.token.Text), 190 | ], 191 | } 192 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 59.0.0", "tomli", "wheel"] 3 | 4 | [tool.isort] 5 | force_grid_wrap = 2 6 | include_trailing_comma = true 7 | multi_line_output = 3 8 | combine_as_imports = true 9 | 10 | [tool.codespell] 11 | skip = '.git,*.pdf,*.svg,venvs,versioneer.py,venvs' 12 | # DNE - do not exist 13 | ignore-words-list = 'dne' 14 | 15 | [tool.versioneer] 16 | # See the docstring in versioneer.py for instructions. Note that you must 17 | # re-run 'versioneer.py setup' after changing this section, and commit the 18 | # resulting files. 19 | VCS = 'git' 20 | style = 'pep440' 21 | versionfile_source = 'datalad_container/_version.py' 22 | versionfile_build = 'datalad_container/_version.py' 23 | tag_prefix = '' 24 | parentdir_prefix = '' 25 | -------------------------------------------------------------------------------- /requirements-devel.txt: -------------------------------------------------------------------------------- 1 | # requirements for a development environment 2 | -e .[devel] 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # If you want to develop, use requirements-devel.txt 2 | # git+https://github.com/datalad/datalad.git 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | url = https://github.com/datalad/datalad-container 3 | author = The DataLad Team and Contributors 4 | author_email = team@datalad.org 5 | description = DataLad extension package for working with containerized environments 6 | long_description = file:README.md 7 | long_description_content_type = text/markdown; charset=UTF-8 8 | license = MIT 9 | classifiers = 10 | Programming Language :: Python 11 | License :: OSI Approved :: BSD License 12 | Programming Language :: Python :: 3 13 | 14 | [options] 15 | python_requires = >= 3.7 16 | install_requires = 17 | datalad >= 0.18.0 18 | requests>=1.2 # to talk to Singularity-hub 19 | packages = find: 20 | include_package_data = True 21 | 22 | [options.extras_require] 23 | extras = 24 | datalad-metalad 25 | # this matches the name used by -core and what is expected by some CI setups 26 | devel = 27 | %(extras)s 28 | pytest 29 | pytest-cov 30 | coverage 31 | sphinx 32 | sphinx-rtd-theme 33 | sphinx-copybutton 34 | 35 | [options.packages.find] 36 | # do not ship the build helpers 37 | exclude= 38 | _datalad_buildsupport 39 | 40 | [options.entry_points] 41 | # 'datalad.extensions' is THE entrypoint inspected by the datalad API builders 42 | datalad.extensions = 43 | # the label in front of '=' is the command suite label 44 | # the entrypoint can point to any symbol of any name, as long it is 45 | # valid datalad interface specification (see demo in this extensions) 46 | container = datalad_container:command_suite 47 | 48 | datalad.metadata.extractors = 49 | container_inspect = datalad_container.extractors.metalad_container:MetaladContainerInspect 50 | 51 | [coverage:report] 52 | show_missing = True 53 | omit = 54 | # versioneer code 55 | datalad_container/_version.py 56 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | import versioneer 5 | 6 | from _datalad_buildsupport.setup import ( 7 | BuildManPage, 8 | ) 9 | 10 | cmdclass = versioneer.get_cmdclass() 11 | cmdclass.update(build_manpage=BuildManPage) 12 | 13 | if __name__ == '__main__': 14 | setup(name='datalad_container', 15 | version=versioneer.get_version(), 16 | cmdclass=cmdclass, 17 | ) 18 | -------------------------------------------------------------------------------- /tools/Singularity.testhelper: -------------------------------------------------------------------------------- 1 | # 2 | # This produces a minimal image that can be used for testing the 3 | # extension itself. 4 | # 5 | 6 | Bootstrap:docker 7 | From:debian:stable-slim 8 | -------------------------------------------------------------------------------- /tools/appveyor_env_setup.bat: -------------------------------------------------------------------------------- 1 | set PY=%1-x64 2 | set TMP=C:\DLTMP 3 | set TEMP=C:\DLTMP 4 | set PATH=C:\Python%PY%;C:\Python%PY%\Scripts;%PATH% 5 | -------------------------------------------------------------------------------- /tools/ci/install-singularity.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex -o pipefail 3 | release="$(curl -fsSL https://api.github.com/repos/sylabs/singularity/releases/latest | jq -r .tag_name)" 4 | codename="$(lsb_release -cs)" 5 | arch="$(dpkg --print-architecture)" 6 | wget -O /tmp/singularity-ce.deb "https://github.com/sylabs/singularity/releases/download/$release/singularity-ce_${release#v}-${codename}_$arch.deb" 7 | set -x 8 | sudo DEBIAN_FRONTEND=noninteractive apt-get install -y uidmap libfuse2 fuse2fs 9 | sudo dpkg -i /tmp/singularity-ce.deb 10 | sudo DEBIAN_FRONTEND=noninteractive apt-get install -f 11 | -------------------------------------------------------------------------------- /tools/ci/prep-travis-forssh-sudo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "127.0.0.1 datalad-test" >> /etc/hosts 4 | apt-get install openssh-client 5 | -------------------------------------------------------------------------------- /tools/ci/prep-travis-forssh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p ~/.ssh 4 | echo -e "Host localhost\n\tStrictHostKeyChecking no\n\tIdentityFile /tmp/dl-test-ssh-id\n" >> ~/.ssh/config 5 | echo -e "Host datalad-test\n\tStrictHostKeyChecking no\n\tIdentityFile /tmp/dl-test-ssh-id\n" >> ~/.ssh/config 6 | ssh-keygen -f /tmp/dl-test-ssh-id -N "" 7 | cat /tmp/dl-test-ssh-id.pub >> ~/.ssh/authorized_keys 8 | eval $(ssh-agent) 9 | ssh-add /tmp/dl-test-ssh-id 10 | 11 | echo "DEBUG: test connection to localhost ..." 12 | ssh -v localhost exit 13 | echo "DEBUG: test connection to datalad-test ..." 14 | ssh -v datalad-test exit 15 | 16 | # tmp: don't run the actual tests: 17 | # exit 1 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /tools/containers_add_dhub_tags.py: -------------------------------------------------------------------------------- 1 | """Feed tagged Docker Hub images to datalad-containers-add. 2 | 3 | This command takes a set of Docker Hub repositories, looks up the 4 | tags, and calls `datalad containers-add ... dhub://REPO:TAG@digest`. The 5 | output of datalad-container's Docker adapter is dumped to 6 | 7 | images/REPO/TAG/ARCH-DATE-SHORTDIGEST/ 8 | 9 | where SHORTDIGEST is the first 12 characters of .config.digest key of 10 | the manifest returned by Docker Hub for the image for the arch which was 11 | uploaded on the DATE. In addition, that image record and manifest are 12 | written to a satellite to that directory .image.json and .manifest.json files. 13 | The step of adding the image is skipped if the path is already present locally. 14 | """ 15 | 16 | import fileinput 17 | import json 18 | import logging 19 | import re 20 | from pathlib import Path 21 | from pprint import pprint 22 | 23 | import requests 24 | from datalad.api import ( 25 | containers_add, 26 | save, 27 | ) 28 | 29 | lgr = logging.getLogger("containers_add_dhub_tags") 30 | 31 | REGISTRY_AUTH_URL = ("https://auth.docker.io/token?service=registry.docker.io" 32 | "&scope=repository:{repo}:pull") 33 | REGISTRY_ENDPOINT = "https://registry-1.docker.io/v2" 34 | DHUB_ENDPOINT = "https://hub.docker.com/v2" 35 | 36 | # TODO: wrap it up with feeding the repositories to consider 37 | # or if we just make it one repository at a time, then could become CLI options 38 | target_architectures = '.*' 39 | target_tags = '.*' 40 | # TODO: forget_tags = 'master' -- those for which we might not want to retain prior versions 41 | # or may be exclude them completely since too frequently changing etc? 42 | 43 | # TEST on busybox on just a few architectures and tags - it is tiny but has too many 44 | #target_architectures = '^(amd64|.*86)$' 45 | #target_tags = '(latest|1.32.0)' 46 | 47 | # TODO this could be a CLI option 48 | default_architecture = 'amd64' 49 | 50 | 51 | def clean_container_name(name): 52 | """Transform `name` for use in datalad-containers-add. 53 | 54 | Note that, although it probably doesn't matter in practice, this 55 | transformation is susceptible to conflicts and ambiguity. 56 | """ 57 | if name.startswith("_/"): 58 | name = name[2:] 59 | name = name.replace("_", "-") 60 | # TODO: research feasibility to create "hierarchical" organization 61 | # by using . as a separator. Then we could have a "default" 62 | # one and then various past instances in sublevels of 63 | # .version.architecture.date--shortdigest 64 | return re.sub(r"[^0-9a-zA-Z-]", "--", name) 65 | 66 | 67 | def add_container(url, name, target): 68 | lgr.info("Adding %s as %s", url, name) 69 | # TODO: This would result in a commit for each image, which would 70 | # be good to avoid. 71 | # 72 | # This containers_add() call also prevents doing things in 73 | # parallel. 74 | containers_add( 75 | name=name, url=url, image=str(target), 76 | # Pass update=True to let the image for an existing entry 77 | # (particularly the one for the "latest" tag) be updated. 78 | update=True) 79 | return name 80 | 81 | 82 | def write_json(target, content): 83 | lgr.info("Writing %s", target) 84 | target.parent.mkdir(parents=True, exist_ok=True) 85 | target.write_text(json.dumps(content)) 86 | return target 87 | 88 | # 89 | # Registry -- requires authentication to query 90 | # 91 | from contextlib import contextmanager 92 | 93 | 94 | class RepoRegistry(object): 95 | def __init__(self, repo): 96 | resp_auth = requests.get(REGISTRY_AUTH_URL.format(repo=repo)) 97 | resp_auth.raise_for_status() 98 | self.repo = repo 99 | self._headers = { 100 | "Authorization": "Bearer " + resp_auth.json()["token"], 101 | } 102 | 103 | def get(self, query, headers=None): 104 | headers = headers or {} 105 | headers.update(self._headers) 106 | resp_man = requests.get(f"{REGISTRY_ENDPOINT}/{self.repo}/{query}", 107 | headers=headers) 108 | resp_man.raise_for_status() 109 | return resp_man.json() 110 | 111 | def get_manifest(self, reference): 112 | lgr.debug("Getting manifest for %s:%s", self.repo, reference) 113 | # TODO: Can we check with HEAD first to see if the digest 114 | # matches what we have locally? 115 | return self.get( 116 | f'manifests/{reference}', 117 | # return the single (first, if multiple e.g. for a reference being a tag) 118 | # manifest 119 | headers={"Accept": "application/vnd.docker.distribution.manifest.v2+json"} 120 | ) 121 | 122 | # 123 | # HUB -- no authentication required 124 | # 125 | 126 | 127 | def walk_pages(url): 128 | next_page = url 129 | while next_page: 130 | lgr.debug("GET %s", next_page) 131 | response = requests.get(next_page) 132 | response.raise_for_status() 133 | data = response.json() 134 | next_page = data.get("next") 135 | yield from data.get("results", []) 136 | 137 | 138 | def get_repo_tag_images(repo): 139 | url = f"{DHUB_ENDPOINT}/repositories/{repo}/tags" 140 | for result in walk_pages(url): 141 | images = result["images"] 142 | # there could be records with images not having been uploaded, 143 | # then it seems digest is not there and 'last_pushed' is None 144 | for i, image in list(enumerate(images))[::-1]: 145 | if 'digest' not in image: 146 | assert not image.get('last_pushed') 147 | images.pop(i) 148 | yield result["name"], sorted(images, key=lambda i: i['digest']) 149 | 150 | 151 | def get_namespace_repos(name): 152 | lgr.info("Getting repositories for %s...", name) 153 | url = f"{DHUB_ENDPOINT}/repositories/{name}/" 154 | for result in walk_pages(url): 155 | assert name == result["namespace"] 156 | yield f"{name}/{result['name']}" 157 | 158 | 159 | def parse_input(line): 160 | line = line.strip() 161 | lgr.debug("Processing input: %s", line) 162 | if line.endswith("/"): 163 | kind = "namespace" 164 | name = line[:-1] 165 | else: 166 | kind = "repository" 167 | if "/" in line: 168 | name = line 169 | else: 170 | lgr.debug( 171 | "Assuming official image and assigning library/ namespace") 172 | name = "library/" + line 173 | return name, kind 174 | 175 | 176 | def process_files(files): 177 | failed = [] 178 | for line in fileinput.input(files): 179 | name, kind = parse_input(line) 180 | if kind == "namespace": 181 | try: 182 | repos = list(get_namespace_repos(name)) 183 | except requests.HTTPError as exc: 184 | lgr.warning( 185 | "Failed to list repositories for %s (status %s). Skipping", 186 | name, exc.response.status_code) 187 | failed.append(name) 188 | continue 189 | else: 190 | repos = [name] 191 | 192 | target_architectures_re = re.compile(target_architectures) 193 | target_tags_re = re.compile(target_tags) 194 | for repo in repos: 195 | lgr.info("Working on %s", repo) 196 | try: 197 | registry = RepoRegistry(repo) 198 | #pprint(list(zip(sorted(_all_tags['latest'], key=lambda r: r['digest']), sorted(_all_tags['1.32.0'], 199 | # key=lambda r: r['digest'])))) 200 | tag_images = dict(get_repo_tag_images(repo)) 201 | 202 | # 'latest' tag is special in docker, it is the default one 203 | # which might typically point to some other release/version. 204 | # If we find that it is the case, we do not create a dedicated "latest" 205 | # image/datalad container -- we just add container entry pointing to that 206 | # one. If there is no matching one -- we do get "latest" 207 | latest_matching_tag = None 208 | # NOTE: "master" is also often used to signal a moving target 209 | # it might, or not, correspond to tagged release. I guess we are just 210 | # doomed to breed those 211 | if target_tags_re.match('latest'): 212 | matching_tags = [] 213 | for tag, images in tag_images.items(): 214 | if tag == 'latest' or not target_tags_re.match(tag): 215 | lgr.debug("Skipping tag %(tag)s") 216 | continue 217 | 218 | if images == tag_images['latest']: 219 | matching_tags.append(tag) 220 | if len(matching_tags) >= 1: 221 | if len(matching_tags) > 1: 222 | lgr.info( 223 | "Multiple tags images match latest, taking the first: %s", 224 | ', '.join(matching_tags)) 225 | latest_matching_tag = matching_tags[0] 226 | lgr.info("Taking %s as the one for 'latest'", latest_matching_tag) 227 | else: 228 | # TODO: if there is no latest, we should at least establish the 229 | # convenient one for each tag 230 | pass 231 | for tag, images in tag_images.items(): 232 | if tag == 'latest' and latest_matching_tag: 233 | continue # skip since we will handle it 234 | if not target_tags_re.match(tag): 235 | lgr.debug("Skipping tag %(tag)s") 236 | continue 237 | multiarch = len({i['architecture'] for i in images}) > 1 238 | for image in images: 239 | architecture = image['architecture'] 240 | if not target_architectures_re.match(architecture): 241 | lgr.debug("Skipping architecture %(architecture)s", image) 242 | continue 243 | manifest = registry.get_manifest(image['digest']) 244 | digest = manifest["config"]["digest"] 245 | # yoh: if I got it right, it is actual image ID we see in docker images 246 | assert digest.startswith("sha256:") 247 | digest = digest[7:] 248 | digest_short = digest[:12] # use short version in name 249 | last_pushed = image.get('last_pushed') 250 | if last_pushed: 251 | assert last_pushed.endswith('Z') 252 | # take only date 253 | last_pushed = last_pushed[:10].replace('-', '') 254 | assert len(last_pushed) == 8 255 | cleaner_repo = repo 256 | # this is how it looks on hub.docker.com URL 257 | if repo.startswith('library/'): 258 | cleaner_repo = "_/" + cleaner_repo[len('library/'):] 259 | image_name = f"{cleaner_repo}/{tag}/" 260 | if multiarch: 261 | image_name += f"{architecture}-" 262 | if last_pushed: 263 | # apparently not in all, e.g. no for repronim/neurodocker 264 | # may be None for those built on the hub? 265 | image_name += f"{last_pushed}-" 266 | image_name += f"{digest_short}" 267 | dl_container_name = clean_container_name(str(image_name)) 268 | image_path = Path("images") / image_name 269 | url = f"dhub://{repo}:{tag}@{image['digest']}" 270 | save_paths = [] 271 | if image_path.exists(): 272 | lgr.info("%s already exists, skipping adding", str(image_path)) 273 | else: 274 | save_paths.append(write_json(Path(str(image_path) + '.manifest.json'), manifest)) 275 | save_paths.append(write_json(Path(str(image_path) + '.image.json'), image)) 276 | add_container(url, dl_container_name, image_path) 277 | # TODO: either fix datalad-container for https://github.com/datalad/datalad-container/issues/98 278 | # or here, since we have manifest, we can datalad download-url, and add-archive-content 279 | # of the gzipped layers (but without untarring) - that should add datalad-archive 280 | # urls to individual layers in the "saved" version 281 | # TODO: make it in a single commit with add_container at least, 282 | # or one commit for the whole repo sweep 283 | save(path=save_paths, message=f"Added manifest and image records for {dl_container_name}") 284 | # TODO: ensure .datalad/config to have additional useful fields: 285 | # architecture, os, and manually "updateurl" since not added for 286 | # dhub:// ATM 287 | if tag == latest_matching_tag and architecture == default_architecture: 288 | # TODO remove section if exists, copy this one 289 | lgr.warning("Tracking of 'latest' is not yet implemented") 290 | except requests.HTTPError as exc: 291 | lgr.warning( 292 | "Failed processing %s. Skipping\n status %s for %s", 293 | repo, exc.response.status_code, exc.response.url) 294 | failed.append(name) 295 | continue 296 | return failed 297 | 298 | 299 | def main(args): 300 | import argparse 301 | 302 | parser = argparse.ArgumentParser( 303 | description=__doc__, 304 | formatter_class=argparse.RawDescriptionHelpFormatter) 305 | parser.add_argument( 306 | "-v", "--verbose", action="store_true") 307 | parser.add_argument( 308 | "files", metavar="FILE", nargs="*", 309 | help=("File with list of names. " 310 | "If a name doesn't contain a slash, " 311 | "it's treated as an official image by prepending 'library/'. " 312 | "A name ending with a slash is taken as a namespace, " 313 | "and Docker Hub is queried to obtain a list of repositories " 314 | "under that namespace (e.g., all the repositories of a user). " 315 | "If not specified, the names are read from stdin.")) 316 | namespace = parser.parse_args(args[1:]) 317 | 318 | logging.basicConfig( 319 | level=logging.DEBUG if namespace.verbose else logging.INFO, 320 | format="%(message)s") 321 | 322 | return process_files(namespace.files) 323 | 324 | 325 | if __name__ == "__main__": 326 | import sys 327 | failed = main(sys.argv) 328 | sys.exit(len(failed) > 0) 329 | -------------------------------------------------------------------------------- /tools/mk_minimal_chroot.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # bootstrap a tiny chroot (26MB compressed) 4 | # 5 | # run with sudo 6 | 7 | set -e -u 8 | 9 | chrootdir=$(mktemp -d) 10 | echo "Working in $chrootdir" 11 | debootstrap --variant=minbase --no-check-gpg stretch "$chrootdir" 12 | find "$chrootdir"/var/cache/apt/archives -type f -delete 13 | find "$chrootdir"/var/lib/apt/lists/ -type f -delete 14 | rm -rf "$chrootdir"/usr/share/doc/* 15 | rm -rf "$chrootdir"/usr/share/man 16 | tar --show-transformed-names --transform=s,^.*$(basename $chrootdir),minichroot, -cvjf minichroot.tar.xz "$chrootdir" 17 | echo "chroot tarball at minichroot.tar.xz" 18 | --------------------------------------------------------------------------------