├── dandi ├── tests │ ├── __init__.py │ ├── data │ │ ├── zarr3_stores │ │ │ ├── invalid_stores │ │ │ │ ├── array_v3_corrupt_zarr_json.zarr │ │ │ │ │ ├── zarr.json │ │ │ │ │ └── c │ │ │ │ │ │ ├── 0 │ │ │ │ │ │ ├── 0 │ │ │ │ │ │ └── 1 │ │ │ │ │ │ └── 1 │ │ │ │ │ │ ├── 0 │ │ │ │ │ │ └── 1 │ │ │ │ ├── arrays_in_groups_node_type_problem.zarr │ │ │ │ │ ├── foo │ │ │ │ │ │ ├── zarr.json │ │ │ │ │ │ └── bar │ │ │ │ │ │ │ ├── zarr.json │ │ │ │ │ │ │ └── z │ │ │ │ │ │ │ └── zarr.json │ │ │ │ │ ├── zarr.json │ │ │ │ │ └── foo1 │ │ │ │ │ │ ├── zarr.json │ │ │ │ │ │ └── bar1 │ │ │ │ │ │ ├── zarr.json │ │ │ │ │ │ └── z1 │ │ │ │ │ │ └── zarr.json │ │ │ │ ├── arrays_in_groups_missing_zarr_json.zarr │ │ │ │ │ ├── foo │ │ │ │ │ │ ├── zarr.json │ │ │ │ │ │ └── bar │ │ │ │ │ │ │ ├── zarr.json │ │ │ │ │ │ │ └── z │ │ │ │ │ │ │ └── zarr.json │ │ │ │ │ └── foo1 │ │ │ │ │ │ ├── zarr.json │ │ │ │ │ │ └── bar1 │ │ │ │ │ │ ├── zarr.json │ │ │ │ │ │ └── z1 │ │ │ │ │ │ └── zarr.json │ │ │ │ ├── single_array_missing_zarr_json.zarr │ │ │ │ │ └── c │ │ │ │ │ │ ├── 0 │ │ │ │ │ │ ├── 0 │ │ │ │ │ │ └── 1 │ │ │ │ │ │ └── 1 │ │ │ │ │ │ ├── 0 │ │ │ │ │ │ └── 1 │ │ │ │ └── single_array_node_type_problem.zarr │ │ │ │ │ ├── c │ │ │ │ │ ├── 0 │ │ │ │ │ │ ├── 0 │ │ │ │ │ │ └── 1 │ │ │ │ │ └── 1 │ │ │ │ │ │ ├── 0 │ │ │ │ │ │ └── 1 │ │ │ │ │ └── zarr.json │ │ │ └── valid_stores │ │ │ │ ├── single_array.zarr │ │ │ │ ├── c │ │ │ │ │ ├── 0 │ │ │ │ │ │ ├── 0 │ │ │ │ │ │ └── 1 │ │ │ │ │ └── 1 │ │ │ │ │ │ ├── 0 │ │ │ │ │ │ └── 1 │ │ │ │ └── zarr.json │ │ │ │ └── arrays_in_groups.zarr │ │ │ │ ├── zarr.json │ │ │ │ ├── foo │ │ │ │ ├── zarr.json │ │ │ │ └── bar │ │ │ │ │ ├── zarr.json │ │ │ │ │ └── z │ │ │ │ │ └── zarr.json │ │ │ │ └── foo1 │ │ │ │ ├── zarr.json │ │ │ │ └── bar1 │ │ │ │ ├── zarr.json │ │ │ │ └── z1 │ │ │ │ └── zarr.json │ │ ├── metadata │ │ │ ├── metadata2asset_simple1.json │ │ │ ├── metadata2asset.json │ │ │ ├── metadata2asset_cellline.json │ │ │ └── metadata2asset_3.json │ │ └── dandiarchive-docker │ │ │ └── docker-compose.yml │ ├── test_dandiset.py │ ├── test_fixtures.py │ ├── test_helptext.py │ ├── xfail.py │ ├── test_helpers.py │ ├── test_pynwb_utils.py │ └── test_validate_types.py ├── cli │ ├── tests │ │ ├── __init__.py │ │ ├── test_shell_completion.py │ │ ├── test_instances.py │ │ ├── test_formatter.py │ │ ├── test_command.py │ │ ├── test_move.py │ │ ├── test_digest.py │ │ ├── data │ │ │ └── update_dandiset_from_doi │ │ │ │ ├── neuron.json │ │ │ │ ├── nature.json │ │ │ │ └── jneurosci.json │ │ ├── test_cmd_ls.py │ │ └── test_service_scripts.py │ ├── cmd_instances.py │ ├── __init__.py │ ├── cmd_digest.py │ ├── cmd_delete.py │ ├── cmd_shell_completion.py │ ├── formatter.py │ ├── cmd_move.py │ ├── cmd_upload.py │ ├── base.py │ └── cmd_organize.py ├── metadata │ ├── __init__.py │ └── core.py ├── support │ ├── tests │ │ ├── __init__.py │ │ └── test_iterators.py │ ├── __init__.py │ ├── threaded_walk.py │ └── iterators.py ├── bids_validator_deno │ ├── __init__.py │ └── _models.py ├── __init__.py ├── exceptions.py ├── due.py ├── pytest_plugin.py ├── files │ └── _private.py └── keyring_utils.py ├── docs ├── requirements.txt ├── source │ ├── modref │ │ ├── utils.rst │ │ ├── misctypes.rst │ │ ├── files.rst │ │ ├── support.digests.rst │ │ ├── dandiarchive.rst │ │ ├── consts.rst │ │ ├── index.rst │ │ └── dandiapi.rst │ ├── ref │ │ ├── index.rst │ │ └── urls.rst │ ├── cmdline │ │ ├── index.rst │ │ ├── digest.rst │ │ ├── shell-completion.rst │ │ ├── dandi.rst │ │ ├── instances.rst │ │ ├── validate.rst │ │ ├── delete.rst │ │ ├── ls.rst │ │ ├── download.rst │ │ ├── service-scripts.rst │ │ ├── upload.rst │ │ ├── organize.rst │ │ └── move.rst │ ├── index.rst │ ├── examples │ │ ├── dandiapi-as_readable.py │ │ └── dandiapi-example.py │ └── conf.py ├── Makefile ├── make.bat └── demos │ └── basic-workflow1.sh ├── .mailmap ├── .gitattributes ├── codeql.yml ├── lgtm.yml ├── .gitignore ├── MANIFEST.in ├── .github ├── dependabot.yml └── workflows │ ├── update-year.yml │ ├── typing.yml │ ├── lint.yml │ ├── labels.yml │ ├── docs.yml │ ├── codeql.yml │ ├── claude.yml │ └── release.yml ├── setup.cfg ├── .readthedocs.yaml ├── .autorc ├── .et ├── .pre-commit-config.yaml ├── setup.py ├── tools ├── migrate-dandisets.py └── update-assets-on-server ├── CLAUDE.md ├── tox.ini └── README.md /dandi/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dandi/cli/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dandi/metadata/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dandi/support/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | furo 2 | Sphinx 3 | -------------------------------------------------------------------------------- /.mailmap: -------------------------------------------------------------------------------- 1 | Ben Dichter 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | dandi/_version.py export-subst 2 | -------------------------------------------------------------------------------- /codeql.yml: -------------------------------------------------------------------------------- 1 | paths-ignore: 2 | - dandi/_version.py 3 | - dandi/due.py 4 | - versioneer.py 5 | -------------------------------------------------------------------------------- /docs/source/modref/utils.rst: -------------------------------------------------------------------------------- 1 | ``dandi.utils`` 2 | =============== 3 | 4 | .. automodule:: dandi.utils 5 | -------------------------------------------------------------------------------- /dandi/support/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Various utilities, typically to support use of external modules etc 3 | """ 4 | -------------------------------------------------------------------------------- /docs/source/modref/misctypes.rst: -------------------------------------------------------------------------------- 1 | ``dandi.misctypes`` 2 | =================== 3 | 4 | .. automodule:: dandi.misctypes 5 | -------------------------------------------------------------------------------- /docs/source/ref/index.rst: -------------------------------------------------------------------------------- 1 | ********** 2 | References 3 | ********** 4 | 5 | .. toctree:: 6 | :glob: 7 | 8 | * 9 | -------------------------------------------------------------------------------- /lgtm.yml: -------------------------------------------------------------------------------- 1 | path_classifiers: 2 | external: 3 | - dandi/_version.py 4 | - dandi/due.py 5 | - versioneer.py 6 | -------------------------------------------------------------------------------- /docs/source/modref/files.rst: -------------------------------------------------------------------------------- 1 | ``dandi.files`` 2 | =============== 3 | 4 | .. automodule:: dandi.files 5 | :show-inheritance: 6 | -------------------------------------------------------------------------------- /docs/source/modref/support.digests.rst: -------------------------------------------------------------------------------- 1 | ``dandi.support.digests`` 2 | ========================= 3 | 4 | .. automodule:: dandi.support.digests 5 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/array_v3_corrupt_zarr_json.zarr/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "zarr_format": 3, 3 | "node_type": "array" 4 | } 5 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/single_array.zarr/c/0/0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/valid_stores/single_array.zarr/c/0/0 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/single_array.zarr/c/0/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/valid_stores/single_array.zarr/c/0/1 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/single_array.zarr/c/1/0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/valid_stores/single_array.zarr/c/1/0 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/single_array.zarr/c/1/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/valid_stores/single_array.zarr/c/1/1 -------------------------------------------------------------------------------- /docs/source/modref/dandiarchive.rst: -------------------------------------------------------------------------------- 1 | ``dandi.dandiarchive`` 2 | ====================== 3 | 4 | .. automodule:: dandi.dandiarchive 5 | :member-order: bysource 6 | :show-inheritance: 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/arrays_in_groups.zarr/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/arrays_in_groups.zarr/foo/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/arrays_in_groups.zarr/foo1/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/arrays_in_groups.zarr/foo/bar/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/arrays_in_groups.zarr/foo1/bar1/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/array_v3_corrupt_zarr_json.zarr/c/0/0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/array_v3_corrupt_zarr_json.zarr/c/0/0 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/array_v3_corrupt_zarr_json.zarr/c/0/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/array_v3_corrupt_zarr_json.zarr/c/0/1 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/array_v3_corrupt_zarr_json.zarr/c/1/0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/array_v3_corrupt_zarr_json.zarr/c/1/0 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/array_v3_corrupt_zarr_json.zarr/c/1/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/array_v3_corrupt_zarr_json.zarr/c/1/1 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_node_type_problem.zarr/foo/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": 1 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_node_type_problem.zarr/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /docs/source/cmdline/index.rst: -------------------------------------------------------------------------------- 1 | .. _chap_cmdline: 2 | 3 | ********************** 4 | Command-Line Interface 5 | ********************** 6 | 7 | .. toctree:: 8 | :glob: 9 | 10 | dandi 11 | * 12 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_missing_zarr_json.zarr/foo/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_missing_zarr_json.zarr/foo1/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_node_type_problem.zarr/foo1/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/single_array_missing_zarr_json.zarr/c/0/0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/single_array_missing_zarr_json.zarr/c/0/0 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/single_array_missing_zarr_json.zarr/c/0/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/single_array_missing_zarr_json.zarr/c/0/1 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/single_array_missing_zarr_json.zarr/c/1/0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/single_array_missing_zarr_json.zarr/c/1/0 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/single_array_missing_zarr_json.zarr/c/1/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/single_array_missing_zarr_json.zarr/c/1/1 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/single_array_node_type_problem.zarr/c/0/0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/single_array_node_type_problem.zarr/c/0/0 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/single_array_node_type_problem.zarr/c/0/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/single_array_node_type_problem.zarr/c/0/1 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/single_array_node_type_problem.zarr/c/1/0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/single_array_node_type_problem.zarr/c/1/0 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/single_array_node_type_problem.zarr/c/1/1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dandi/dandi-cli/HEAD/dandi/tests/data/zarr3_stores/invalid_stores/single_array_node_type_problem.zarr/c/1/1 -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_missing_zarr_json.zarr/foo/bar/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_missing_zarr_json.zarr/foo1/bar1/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_node_type_problem.zarr/foo/bar/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_node_type_problem.zarr/foo1/bar1/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "attributes": {}, 3 | "zarr_format": 3, 4 | "consolidated_metadata": null, 5 | "node_type": "group" 6 | } 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info/ 2 | *.pyc 3 | .*.swp 4 | .coverage 5 | .coverage.* 6 | .docker/ 7 | .eggs 8 | .idea 9 | .tox/ 10 | __pycache__/ 11 | build/ 12 | dist/ 13 | docs/**/generated/ 14 | pip-wheel-metadata/ 15 | sandbox/ 16 | venv/ 17 | venvs/ 18 | .DS_Store 19 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # This line includes versioneer.py in sdists, which is necessary for wheels 2 | # built from sdists to have the version set in their metadata. 3 | include versioneer.py 4 | include CHANGELOG.md tox.ini 5 | 6 | graft dandi 7 | 8 | global-exclude *.py[cod] 9 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: / 5 | schedule: 6 | interval: weekly 7 | commit-message: 8 | prefix: "[gh-actions]" 9 | include: scope 10 | labels: 11 | - internal 12 | -------------------------------------------------------------------------------- /dandi/tests/test_dandiset.py: -------------------------------------------------------------------------------- 1 | from ..dandiset import Dandiset 2 | 3 | 4 | def test_get_dandiset_record() -> None: 5 | out = Dandiset.get_dandiset_record({"identifier": "000000"}) 6 | # Should have only header with "DO NOT EDIT" 7 | assert out.startswith("# DO NOT EDIT") 8 | assert "000000" in out 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # This file is intentionally minimal. All package metadata has been moved to pyproject.toml 2 | # Only tool-specific configurations that require setup.cfg format remain here. 3 | 4 | [flake8] 5 | max-line-length = 100 6 | ignore = E203,W503 7 | extend-exclude = 8 | _version.py 9 | versioneer.py 10 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | build: 3 | os: ubuntu-22.04 4 | tools: 5 | python: "3.10" 6 | python: 7 | install: 8 | - requirements: docs/requirements.txt 9 | - method: pip 10 | path: . 11 | extra_requirements: [test] 12 | sphinx: 13 | configuration: docs/source/conf.py 14 | fail_on_warning: true 15 | -------------------------------------------------------------------------------- /dandi/bids_validator_deno/__init__.py: -------------------------------------------------------------------------------- 1 | """Package providing an interface to the deno-compiled BIDS validator""" 2 | 3 | from ._validator import bids_validate, get_version 4 | 5 | __all__ = ["bids_validate", "get_version"] 6 | 7 | 8 | def __dir__() -> list[str]: 9 | return list(__all__) # return a copy of `__all__` to avoid modifying the original 10 | -------------------------------------------------------------------------------- /docs/source/cmdline/digest.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi digest` 2 | ======================= 3 | 4 | :: 5 | 6 | dandi [] digest [] [ ...] 7 | 8 | Calculate file digests 9 | 10 | Options 11 | ------- 12 | 13 | .. option:: -d, --digest [dandi-etag|md5|sha1|sha256|sha512|zarr-checksum] 14 | 15 | Digest algorithm to use [default: ``dandi-etag``] 16 | -------------------------------------------------------------------------------- /.autorc: -------------------------------------------------------------------------------- 1 | { 2 | "baseBranch": "master", 3 | "name": "dandibot", 4 | "email": "dandibot@mit.edu", 5 | "noVersionPrefix": true, 6 | "plugins": [ 7 | "protected-branch", 8 | "git-tag", 9 | [ 10 | "exec", 11 | { 12 | "afterRelease": "python -m build && twine upload dist/*" 13 | } 14 | ], 15 | "released" 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /docs/source/modref/consts.rst: -------------------------------------------------------------------------------- 1 | ``dandi.consts`` 2 | ================ 3 | 4 | .. automodule:: dandi.consts 5 | 6 | .. NOTE: Due to , only 7 | data values with doc comments are included by automodule::, even with 8 | `undoc-members` set, and the available workarounds involve explicitly 9 | listing every data value, leading to problems when the list is not kept 10 | in sync. 11 | -------------------------------------------------------------------------------- /.github/workflows/update-year.yml: -------------------------------------------------------------------------------- 1 | name: Update year 2 | 3 | on: 4 | # We would not trigger update until actual changes are done 5 | # to our code, or we explicitly request 6 | push: 7 | branches: 8 | - 'master' 9 | workflow_dispatch: 10 | 11 | 12 | jobs: 13 | run: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v6 17 | with: 18 | fetch-depth: 0 19 | - uses: FantasticFiasco/action-update-license-year@v3 20 | with: 21 | token: ${{ secrets.GITHUB_TOKEN }} 22 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to the dandi documentation 2 | ================================== 3 | 4 | The `dandi `_ library provides both a 5 | command line interface (CLI) and a Python API for interacting with `DANDI 6 | Archive `_. 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | :caption: Contents: 11 | 12 | cmdline/index 13 | modref/index 14 | ref/index 15 | 16 | 17 | Indices and tables 18 | ================== 19 | 20 | * :ref:`genindex` 21 | * :ref:`modindex` 22 | * :ref:`search` 23 | -------------------------------------------------------------------------------- /dandi/tests/test_fixtures.py: -------------------------------------------------------------------------------- 1 | # Largely a helper to quickly trigger fixtures to smoke test them 2 | # and possibly go through their internal asserts 3 | 4 | from pathlib import Path 5 | 6 | from .xfail import mark_xfail_windows_python313_posixsubprocess 7 | 8 | 9 | def test_organized_nwb_dir(organized_nwb_dir: Path) -> None: 10 | pass # Just a smoke test to trigger fixture's asserts 11 | 12 | 13 | @mark_xfail_windows_python313_posixsubprocess 14 | def test_organized_nwb_dir2(organized_nwb_dir2: Path) -> None: 15 | pass # Just a smoke test to trigger fixture's asserts 16 | -------------------------------------------------------------------------------- /dandi/cli/cmd_instances.py: -------------------------------------------------------------------------------- 1 | from dataclasses import asdict 2 | import sys 3 | 4 | import click 5 | import ruamel.yaml 6 | 7 | from .base import map_to_click_exceptions 8 | from ..consts import known_instances 9 | 10 | 11 | @click.command() 12 | @map_to_click_exceptions 13 | def instances(): 14 | """List known DANDI instances that the CLI can interact with""" 15 | yaml = ruamel.yaml.YAML(typ="safe") 16 | yaml.default_flow_style = False 17 | instances = {} 18 | for inst in known_instances.values(): 19 | data = asdict(inst) 20 | data.pop("name") 21 | instances[inst.name] = data 22 | yaml.dump(instances, sys.stdout) 23 | -------------------------------------------------------------------------------- /.github/workflows/typing.yml: -------------------------------------------------------------------------------- 1 | name: Type-check 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | typing: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Check out repository 12 | uses: actions/checkout@v6 13 | with: 14 | fetch-depth: 0 15 | 16 | - name: Set up Python 17 | uses: actions/setup-python@v6 18 | with: 19 | python-version: '3.10' 20 | 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | python -m pip install --upgrade tox 25 | 26 | - name: Run type checker 27 | run: tox -e typing 28 | -------------------------------------------------------------------------------- /docs/source/cmdline/shell-completion.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi shell-completion` 2 | ================================= 3 | 4 | :: 5 | 6 | dandi [] shell-completion [] 7 | 8 | Emit a shell script for enabling command completion. 9 | 10 | The output of this command should be "sourced" by bash or zsh to enable command 11 | completion. 12 | 13 | Example:: 14 | 15 | $ source <(dandi shell-completion) 16 | $ dandi -- 17 | 18 | Options 19 | ------- 20 | 21 | .. option:: -s, --shell [bash|zsh|fish|auto] 22 | 23 | The shell for which to generate completion code; ``auto`` (default) 24 | attempts autodetection 25 | -------------------------------------------------------------------------------- /dandi/tests/test_helptext.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | def get_helptext(command): 5 | result = subprocess.run( 6 | [*command, '--help'], 7 | stdout=subprocess.PIPE, 8 | stderr=subprocess.PIPE, 9 | text=True 10 | ) 11 | return result.stdout 12 | 13 | 14 | def test_resource_identifier_helptext(): 15 | # The \n chars must be included for correct rendering 16 | correct = "Accepted resource identifier patterns:\n - :[/]\n" 17 | 18 | ls_helptext = get_helptext(['dandi', 'ls']) 19 | assert correct in ls_helptext 20 | 21 | download_helptext = get_helptext(['dandi', 'download']) 22 | assert correct in download_helptext 23 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Linters 2 | 3 | on: 4 | - push 5 | - pull_request 6 | 7 | jobs: 8 | lint: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Set up environment 12 | uses: actions/checkout@v6 13 | with: 14 | fetch-depth: 0 15 | - name: Set up Python 16 | uses: actions/setup-python@v6 17 | with: 18 | python-version: '3.10' 19 | - name: Install dependencies 20 | run: | 21 | python -m pip install --upgrade pip 22 | python -m pip install --upgrade tox 23 | # Annotate codespell within PR 24 | - uses: codespell-project/codespell-problem-matcher@v1 25 | - name: Run linters 26 | run: | 27 | tox -e lint 28 | -------------------------------------------------------------------------------- /docs/source/examples/dandiapi-as_readable.py: -------------------------------------------------------------------------------- 1 | from dandi.dandiapi import DandiAPIClient 2 | 3 | dandiset_id = "000006" # ephys dataset from the Svoboda Lab 4 | filepath = "sub-anm372795/sub-anm372795_ses-20170718.nwb" # 450 kB file 5 | 6 | with DandiAPIClient() as client: 7 | asset = client.get_dandiset(dandiset_id, "draft").get_asset_by_path(filepath) 8 | # https://dandi.readthedocs.io/en/latest/modref/dandiapi.html#dandi.dandiapi.BaseRemoteBlobAsset.as_readable 9 | # provides file-like object which uses fsspec to provide sparse access to content 10 | # of the file on S3: 11 | with asset.as_readable().open() as f: 12 | print(f.read(4)) 13 | f.seek(100) 14 | print(f.read(4)) 15 | -------------------------------------------------------------------------------- /dandi/cli/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Command line interface for DANDI client 3 | 4 | TODO: 5 | - consider placing common option definitions into options.py submodule. 6 | pipenv is a nice example although common command definitions are somewhat 7 | too cubmersome. yoh thinks he saw a bit more lightweight somewhere. 8 | e.g. girder-client 9 | """ 10 | 11 | try: 12 | # A trick found on https://github.com/h5py/h5py/issues/1079#issuecomment-567081386 13 | # to avoid some weird behavior on Yarik's laptop where MPI fails to initialize 14 | # and that takes h5py additional 5 seconds to import 15 | import mpi4py # type: ignore[import-not-found] 16 | 17 | mpi4py.rc(initialize=False) 18 | except Exception: 19 | pass 20 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= -W 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/source/cmdline/dandi.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi` 2 | ================ 3 | 4 | :: 5 | 6 | dandi [] [] 7 | 8 | A command-line client for interacting with a DANDI instance, such as the 9 | `DANDI Archive `_. 10 | 11 | Global Options 12 | -------------- 13 | 14 | .. option:: -l , --log-level 15 | 16 | Set the `logging level`_ to the given value; default: ``INFO``. The level 17 | can be given as a case-insensitive level name or as a numeric value. 18 | 19 | .. _logging level: https://docs.python.org/3/library/logging.html 20 | #logging-levels 21 | 22 | .. option:: --pdb 23 | 24 | Handle errors by opening `pdb (the Python Debugger) 25 | `_ 26 | -------------------------------------------------------------------------------- /dandi/cli/tests/test_shell_completion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import subprocess 4 | 5 | import pytest 6 | 7 | from ...utils import on_windows 8 | 9 | 10 | # Process substitution is apparently broken on certain older versions of Bash 11 | # on Windows, which includes the version used by conda-forge as of 2021-07-08, 12 | # so we need to skip this test entirely on Windows. 13 | @pytest.mark.skipif( 14 | shutil.which("bash") is None or on_windows, reason="Bash on POSIX required" 15 | ) 16 | def test_shell_completion_sourceable(): 17 | subprocess.run( 18 | ["bash", "-c", "source <(dandi shell-completion)"], 19 | check=True, 20 | # When testing for conda-forge on Windows, SHELL doesn't seem to be 21 | # set, so we need to set it ourselves: 22 | env={**os.environ, "SHELL": shutil.which("bash")}, 23 | ) 24 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/single_array.zarr/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "shape": [ 3 | 20, 4 | 20 5 | ], 6 | "data_type": "float32", 7 | "chunk_grid": { 8 | "name": "regular", 9 | "configuration": { 10 | "chunk_shape": [ 11 | 10, 12 | 10 13 | ] 14 | } 15 | }, 16 | "chunk_key_encoding": { 17 | "name": "default", 18 | "configuration": { 19 | "separator": "/" 20 | } 21 | }, 22 | "fill_value": 0.0, 23 | "codecs": [ 24 | { 25 | "name": "bytes", 26 | "configuration": { 27 | "endian": "little" 28 | } 29 | }, 30 | { 31 | "name": "zstd", 32 | "configuration": { 33 | "level": 0, 34 | "checksum": false 35 | } 36 | } 37 | ], 38 | "attributes": {}, 39 | "zarr_format": 3, 40 | "node_type": "array", 41 | "storage_transformers": [] 42 | } 43 | -------------------------------------------------------------------------------- /dandi/cli/cmd_digest.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import click 4 | 5 | from .base import map_to_click_exceptions 6 | 7 | 8 | @click.command() 9 | @click.option( 10 | "-d", 11 | "--digest", 12 | "digest_alg", 13 | type=click.Choice( 14 | ["dandi-etag", "md5", "sha1", "sha256", "sha512", "zarr-checksum"], 15 | case_sensitive=False, 16 | ), 17 | default="dandi-etag", 18 | help="Digest algorithm to use", 19 | show_default=True, 20 | ) 21 | @click.argument("paths", nargs=-1, type=click.Path(exists=True)) 22 | @map_to_click_exceptions 23 | def digest(paths: tuple[str, ...], digest_alg: str) -> None: 24 | """Calculate file digests""" 25 | # Avoid heavy import by importing within function: 26 | from ..support.digests import get_digest 27 | 28 | for p in paths: 29 | print(f"{p}:", get_digest(p, digest=digest_alg)) 30 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_node_type_problem.zarr/foo1/bar1/z1/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "shape": [ 3 | 10000, 4 | 10000 5 | ], 6 | "data_type": "int32", 7 | "chunk_grid": { 8 | "name": "regular", 9 | "configuration": { 10 | "chunk_shape": [ 11 | 1000, 12 | 1000 13 | ] 14 | } 15 | }, 16 | "chunk_key_encoding": { 17 | "name": "default", 18 | "configuration": { 19 | "separator": "/" 20 | } 21 | }, 22 | "fill_value": 0, 23 | "codecs": [ 24 | { 25 | "name": "bytes", 26 | "configuration": { 27 | "endian": "little" 28 | } 29 | }, 30 | { 31 | "name": "zstd", 32 | "configuration": { 33 | "level": 0, 34 | "checksum": false 35 | } 36 | } 37 | ], 38 | "attributes": {}, 39 | "zarr_format": 3, 40 | "storage_transformers": [] 41 | } 42 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/single_array_node_type_problem.zarr/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "shape": [ 3 | 20, 4 | 20 5 | ], 6 | "data_type": "float32", 7 | "chunk_grid": { 8 | "name": "regular", 9 | "configuration": { 10 | "chunk_shape": [ 11 | 10, 12 | 10 13 | ] 14 | } 15 | }, 16 | "chunk_key_encoding": { 17 | "name": "default", 18 | "configuration": { 19 | "separator": "/" 20 | } 21 | }, 22 | "fill_value": 0.0, 23 | "codecs": [ 24 | { 25 | "name": "bytes", 26 | "configuration": { 27 | "endian": "little" 28 | } 29 | }, 30 | { 31 | "name": "zstd", 32 | "configuration": { 33 | "level": 0, 34 | "checksum": false 35 | } 36 | } 37 | ], 38 | "attributes": {}, 39 | "zarr_format": 3, 40 | "node_type": 42, 41 | "storage_transformers": [] 42 | } 43 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/arrays_in_groups.zarr/foo/bar/z/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "shape": [ 3 | 10000, 4 | 10000 5 | ], 6 | "data_type": "int32", 7 | "chunk_grid": { 8 | "name": "regular", 9 | "configuration": { 10 | "chunk_shape": [ 11 | 1000, 12 | 1000 13 | ] 14 | } 15 | }, 16 | "chunk_key_encoding": { 17 | "name": "default", 18 | "configuration": { 19 | "separator": "/" 20 | } 21 | }, 22 | "fill_value": 0, 23 | "codecs": [ 24 | { 25 | "name": "bytes", 26 | "configuration": { 27 | "endian": "little" 28 | } 29 | }, 30 | { 31 | "name": "zstd", 32 | "configuration": { 33 | "level": 0, 34 | "checksum": false 35 | } 36 | } 37 | ], 38 | "attributes": {}, 39 | "zarr_format": 3, 40 | "node_type": "array", 41 | "storage_transformers": [] 42 | } 43 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/valid_stores/arrays_in_groups.zarr/foo1/bar1/z1/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "shape": [ 3 | 10000, 4 | 10000 5 | ], 6 | "data_type": "int32", 7 | "chunk_grid": { 8 | "name": "regular", 9 | "configuration": { 10 | "chunk_shape": [ 11 | 1000, 12 | 1000 13 | ] 14 | } 15 | }, 16 | "chunk_key_encoding": { 17 | "name": "default", 18 | "configuration": { 19 | "separator": "/" 20 | } 21 | }, 22 | "fill_value": 0, 23 | "codecs": [ 24 | { 25 | "name": "bytes", 26 | "configuration": { 27 | "endian": "little" 28 | } 29 | }, 30 | { 31 | "name": "zstd", 32 | "configuration": { 33 | "level": 0, 34 | "checksum": false 35 | } 36 | } 37 | ], 38 | "attributes": {}, 39 | "zarr_format": 3, 40 | "node_type": "array", 41 | "storage_transformers": [] 42 | } 43 | -------------------------------------------------------------------------------- /.github/workflows/labels.yml: -------------------------------------------------------------------------------- 1 | name: Check PR Labels 2 | 3 | on: 4 | pull_request: 5 | types: [opened, labeled, unlabeled, synchronize] 6 | 7 | jobs: 8 | check_labels: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Check PR Labels 12 | uses: actions/github-script@v8 13 | with: 14 | script: | 15 | const allowedLabels = ['major', 'minor', 'patch', 'performance', 'internal', 'documentation', 'tests', 'dependencies']; 16 | 17 | const labels = context.payload.pull_request.labels.map(label => label.name); 18 | 19 | const hasValidLabel = labels.some(label => allowedLabels.includes(label)); 20 | if (!hasValidLabel) { 21 | core.setFailed(`The pull request must have one of these labels: ${allowedLabels.join(', ')}`); 22 | } else { 23 | console.log('PR has a valid label.'); 24 | } 25 | -------------------------------------------------------------------------------- /dandi/tests/xfail.py: -------------------------------------------------------------------------------- 1 | # ex: set sts=4 ts=4 sw=4 noet: 2 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 3 | # 4 | # See LICENSE file distributed along with the dandi-cli package for the 5 | # copyright and license terms. 6 | # 7 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 8 | """Define reusable xfail markers for tests. 9 | 10 | This module provides commonly used xfail markers that can be shared across 11 | multiple test modules to avoid duplication. 12 | """ 13 | import sys 14 | 15 | import pytest 16 | 17 | # Reusable xfail markers 18 | 19 | mark_xfail_windows_python313_posixsubprocess = pytest.mark.xfail( 20 | condition=sys.platform == "win32" and sys.version_info >= (3, 13), 21 | reason="Fails on Windows with Python 3.13 due to multiprocessing _posixsubprocess module error", 22 | strict=False, 23 | raises=AssertionError, 24 | ) 25 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_missing_zarr_json.zarr/foo/bar/z/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "shape": [ 3 | 10000, 4 | 10000 5 | ], 6 | "data_type": "int32", 7 | "chunk_grid": { 8 | "name": "regular", 9 | "configuration": { 10 | "chunk_shape": [ 11 | 1000, 12 | 1000 13 | ] 14 | } 15 | }, 16 | "chunk_key_encoding": { 17 | "name": "default", 18 | "configuration": { 19 | "separator": "/" 20 | } 21 | }, 22 | "fill_value": 0, 23 | "codecs": [ 24 | { 25 | "name": "bytes", 26 | "configuration": { 27 | "endian": "little" 28 | } 29 | }, 30 | { 31 | "name": "zstd", 32 | "configuration": { 33 | "level": 0, 34 | "checksum": false 35 | } 36 | } 37 | ], 38 | "attributes": {}, 39 | "zarr_format": 3, 40 | "node_type": "array", 41 | "storage_transformers": [] 42 | } 43 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_node_type_problem.zarr/foo/bar/z/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "shape": [ 3 | 10000, 4 | 10000 5 | ], 6 | "data_type": "int32", 7 | "chunk_grid": { 8 | "name": "regular", 9 | "configuration": { 10 | "chunk_shape": [ 11 | 1000, 12 | 1000 13 | ] 14 | } 15 | }, 16 | "chunk_key_encoding": { 17 | "name": "default", 18 | "configuration": { 19 | "separator": "/" 20 | } 21 | }, 22 | "fill_value": 0, 23 | "codecs": [ 24 | { 25 | "name": "bytes", 26 | "configuration": { 27 | "endian": "little" 28 | } 29 | }, 30 | { 31 | "name": "zstd", 32 | "configuration": { 33 | "level": 0, 34 | "checksum": false 35 | } 36 | } 37 | ], 38 | "attributes": {}, 39 | "zarr_format": 3, 40 | "node_type": "array", 41 | "storage_transformers": [] 42 | } 43 | -------------------------------------------------------------------------------- /dandi/tests/data/zarr3_stores/invalid_stores/arrays_in_groups_missing_zarr_json.zarr/foo1/bar1/z1/zarr.json: -------------------------------------------------------------------------------- 1 | { 2 | "shape": [ 3 | 10000, 4 | 10000 5 | ], 6 | "data_type": "int32", 7 | "chunk_grid": { 8 | "name": "regular", 9 | "configuration": { 10 | "chunk_shape": [ 11 | 1000, 12 | 1000 13 | ] 14 | } 15 | }, 16 | "chunk_key_encoding": { 17 | "name": "default", 18 | "configuration": { 19 | "separator": "/" 20 | } 21 | }, 22 | "fill_value": 0, 23 | "codecs": [ 24 | { 25 | "name": "bytes", 26 | "configuration": { 27 | "endian": "little" 28 | } 29 | }, 30 | { 31 | "name": "zstd", 32 | "configuration": { 33 | "level": 0, 34 | "checksum": false 35 | } 36 | } 37 | ], 38 | "attributes": {}, 39 | "zarr_format": 3, 40 | "node_type": "array", 41 | "storage_transformers": [] 42 | } 43 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Build Docs 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | 9 | jobs: 10 | docs: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python: 16 | - '3.10' 17 | #- 3.11 18 | steps: 19 | - name: Check out repository 20 | uses: actions/checkout@v6 21 | with: 22 | # Fetch all commits so that versioneer will return something compatible 23 | # with semantic-version 24 | fetch-depth: 0 25 | 26 | - name: Set up Python ${{ matrix.python }} 27 | uses: actions/setup-python@v6 28 | with: 29 | python-version: ${{ matrix.python }} 30 | 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install --upgrade pip wheel 34 | python -m pip install --upgrade tox 35 | 36 | - name: Build docs 37 | run: tox -e docs 38 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /dandi/cli/cmd_delete.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from .base import devel_debug_option, instance_option, map_to_click_exceptions 4 | 5 | 6 | @click.command() 7 | @click.option("--skip-missing", is_flag=True) 8 | @click.option( 9 | "--force", 10 | is_flag=True, 11 | help="Force deletion without requesting interactive confirmation", 12 | ) 13 | @click.argument("paths", nargs=-1, type=click.Path(exists=False, dir_okay=True)) 14 | @instance_option() 15 | @devel_debug_option() 16 | @map_to_click_exceptions 17 | def delete(paths, skip_missing, dandi_instance, force, devel_debug=False): 18 | """Delete dandisets and assets from the server. 19 | 20 | PATH could be a local path or a URL to an asset, directory, or an entire 21 | dandiset. 22 | """ 23 | from ..delete import delete 24 | 25 | delete( 26 | paths, 27 | dandi_instance=dandi_instance, 28 | devel_debug=devel_debug, 29 | force=force, 30 | skip_missing=skip_missing, 31 | ) 32 | -------------------------------------------------------------------------------- /dandi/tests/test_helpers.py: -------------------------------------------------------------------------------- 1 | from operator import attrgetter 2 | from pathlib import Path 3 | 4 | 5 | # This needs to be in a file named "test_*.py" so that pytest performs its 6 | # assertion rewriting on it. 7 | def assert_dirtrees_eq(tree1: Path, tree2: Path) -> None: 8 | """Assert that the file trees at the given paths are equal""" 9 | assert sorted(map(attrgetter("name"), tree1.iterdir())) == sorted( 10 | map(attrgetter("name"), tree2.iterdir()) 11 | ) 12 | for p1 in tree1.iterdir(): 13 | p2 = tree2 / p1.name 14 | assert p1.is_dir() == p2.is_dir() 15 | if p1.is_dir(): 16 | assert_dirtrees_eq(p1, p2) 17 | # TODO: Considering using the identify library to test for binary-ness. 18 | # (We can't use mimetypes, as .json maps to application/json instead of 19 | # text/json.) 20 | elif p1.suffix in {".txt", ".py", ".json"}: 21 | assert p1.read_text() == p2.read_text() 22 | else: 23 | assert p1.read_bytes() == p2.read_bytes() 24 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | schedule: 9 | - cron: "31 5 * * 5" 10 | 11 | jobs: 12 | analyze: 13 | name: Analyze 14 | runs-on: ubuntu-latest 15 | permissions: 16 | actions: read 17 | contents: read 18 | security-events: write 19 | 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | language: [ python ] 24 | 25 | steps: 26 | - name: Checkout 27 | uses: actions/checkout@v6 28 | 29 | - name: Initialize CodeQL 30 | uses: github/codeql-action/init@v4 31 | with: 32 | languages: ${{ matrix.language }} 33 | queries: +security-and-quality 34 | config-file: codeql.yml 35 | 36 | - name: Autobuild 37 | uses: github/codeql-action/autobuild@v4 38 | 39 | - name: Perform CodeQL Analysis 40 | uses: github/codeql-action/analyze@v4 41 | with: 42 | category: "/language:${{ matrix.language }}" 43 | -------------------------------------------------------------------------------- /.et: -------------------------------------------------------------------------------- 1 | { "bad_versions" : [ 2 | "0.0.1", 3 | "0.0.2", 4 | "0.1", 5 | "0.1.0", 6 | "0.2.0", 7 | "0.3.0", 8 | "0.4.0", 9 | "0.4.1", 10 | "0.4.2", 11 | "0.4.3", 12 | "0.4.4", 13 | "0.4.5", 14 | "0.4.6", 15 | "0.5.0", 16 | "0.6.0", 17 | "0.6.1", 18 | "0.6.2", 19 | "0.6.4", 20 | "0.7.0", 21 | "0.7.1", 22 | "0.7.2", 23 | "0.8.0", 24 | "0.9.0", 25 | "0.10.0", 26 | "0.11.0", 27 | "0.12.0", 28 | "0.12.1", 29 | "0.13.0", 30 | "0.13.1", 31 | "0.13.2", 32 | "0.14.0", 33 | "0.14.1", 34 | "0.14.2", 35 | "0.15.0", 36 | "0.16.0", 37 | "0.17.0", 38 | "0.18.0", 39 | "0.19.0", 40 | "0.20.0", 41 | "0.21.0", 42 | "0.22.0", 43 | "0.23.0", 44 | "0.23.1", 45 | "0.23.2", 46 | "0.24.0", 47 | "0.25.0", 48 | "0.26.0", 49 | "0.26.1", 50 | "0.27.0", 51 | "0.27.1", 52 | "0.27.2", 53 | "0.27.3", 54 | "0.28.0", 55 | "0.29.0", 56 | "0.30.0", 57 | "0.30.1", 58 | "0.30.2", 59 | "0.30.3", 60 | "0.31.0", 61 | "0.32.0", 62 | "0.32.1", 63 | "0.32.2", 64 | "0.33.0" 65 | ] 66 | } 67 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v4.0.1 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | - repo: https://github.com/psf/black 12 | rev: 22.3.0 13 | hooks: 14 | - id: black 15 | exclude: ^(dandi/_version\.py|dandi/due\.py|versioneer\.py)$ 16 | - repo: https://github.com/PyCQA/isort 17 | rev: 5.12.0 18 | hooks: 19 | - id: isort 20 | exclude: ^(dandi/_version\.py|dandi/due\.py|versioneer\.py)$ 21 | - repo: https://github.com/codespell-project/codespell 22 | rev: v2.4.1 23 | hooks: 24 | - id: codespell 25 | exclude: ^(dandi/_version\.py|dandi/due\.py|versioneer\.py|pyproject\.toml)$ 26 | additional_dependencies: 27 | - tomli; python_version<'3.11' 28 | - repo: https://github.com/PyCQA/flake8 29 | rev: 7.0.0 30 | hooks: 31 | - id: flake8 32 | exclude: ^(dandi/_version\.py|dandi/due\.py|versioneer\.py)$ 33 | -------------------------------------------------------------------------------- /docs/source/cmdline/instances.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi instances` 2 | ========================== 3 | 4 | :: 5 | 6 | dandi [] instances 7 | 8 | List known DANDI instances that can be passed to the 9 | ``-i``/``--dandi-instance`` option of other subcommands for the CLI to 10 | interact with. Output is in YAML. 11 | 12 | Example output: 13 | 14 | .. code:: yaml 15 | 16 | dandi: 17 | api: https://api.dandiarchive.org/api 18 | gui: https://gui.dandiarchive.org 19 | dandi-api-local-docker-tests: 20 | api: http://localhost:8000/api 21 | gui: http://localhost:8085 22 | dandi-sandbox: 23 | api: https://api.sandbox.dandiarchive.org/api 24 | gui: https://sandbox.dandiarchive.org 25 | linc-staging: 26 | api: https://staging-api.lincbrain.org/api 27 | gui: https://staging.lincbrain.org 28 | linc: 29 | api: https://api.lincbrain.org/api 30 | gui: https://lincbrain.org 31 | ember-dandi-sandbox: 32 | api: https://api-dandi-sandbox.emberarchive.org/api 33 | gui: https://apl-setup--ember-dandi-archive.netlify.app/ 34 | ember-dandi: 35 | api: https://api-dandi.emberarchive.org/api 36 | gui: https://dandi.emberarchive.org 37 | -------------------------------------------------------------------------------- /dandi/tests/data/metadata/metadata2asset_simple1.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "dandiasset:bfc23fb6192b41c083a7257e09a3702b", 3 | "schemaKey": "Asset", 4 | "schemaVersion": "0.4.1", 5 | "keywords": [ 6 | "keyword1", 7 | "keyword 2" 8 | ], 9 | "access": [ 10 | { 11 | "schemaKey": "AccessRequirements", 12 | "status": "dandi:OpenAccess" 13 | } 14 | ], 15 | "wasGeneratedBy": [ 16 | { 17 | "schemaKey": "Session", 18 | "identifier": "session_id1", 19 | "name": "session_id1", 20 | "description": "session_description1", 21 | "startDate": "2017-04-15T12:00:00Z" 22 | } 23 | ], 24 | "contentSize": 69105, 25 | "encodingFormat": "application/x-nwb", 26 | "digest": { 27 | "dandi:dandi-etag": "e455839e5ab2fa659861f58a423fd17f-1" 28 | }, 29 | "path": "/test/path", 30 | "wasDerivedFrom": [ 31 | { 32 | "schemaKey": "BioSample", 33 | "identifier": "tissue42", 34 | "sampleType": { 35 | "schemaKey": "SampleType", 36 | "name": "tissuesample" 37 | } 38 | } 39 | ], 40 | "wasAttributedTo": [ 41 | { 42 | "schemaKey": "Participant", 43 | "identifier": "sub-01" 44 | } 45 | ], 46 | "relatedResource": [] 47 | } 48 | -------------------------------------------------------------------------------- /.github/workflows/claude.yml: -------------------------------------------------------------------------------- 1 | name: Claude Code 2 | 3 | on: 4 | issue_comment: 5 | types: [created] 6 | pull_request_review_comment: 7 | types: [created] 8 | issues: 9 | types: [opened, assigned] 10 | pull_request_review: 11 | types: [submitted] 12 | 13 | jobs: 14 | claude: 15 | if: | 16 | (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) || 17 | (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) || 18 | (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) || 19 | (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude'))) 20 | runs-on: ubuntu-latest 21 | permissions: 22 | contents: read 23 | pull-requests: read 24 | issues: read 25 | id-token: write 26 | steps: 27 | - name: Checkout repository 28 | uses: actions/checkout@v6 29 | with: 30 | fetch-depth: 1 31 | 32 | - name: Run Claude Code 33 | id: claude 34 | uses: anthropics/claude-code-action@beta 35 | with: 36 | anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} 37 | 38 | -------------------------------------------------------------------------------- /docs/source/cmdline/validate.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi validate` 2 | ========================= 3 | 4 | :: 5 | 6 | dandi [] validate [ ...] 7 | 8 | Validate files for data standards compliance. 9 | 10 | Exits with non-zero exit code if any file is not compliant. 11 | 12 | Options 13 | ------- 14 | 15 | .. option:: -g, --grouping [none|path] 16 | 17 | Set how to group reported errors & warnings: by path or not at all 18 | (default) 19 | 20 | .. option:: --ignore REGEX 21 | 22 | Ignore any validation errors & warnings whose ID matches the given regular 23 | expression 24 | 25 | .. option:: --min-severity [HINT|WARNING|ERROR] 26 | 27 | Only display issues with severities above this level (HINT by default) 28 | 29 | 30 | Development Options 31 | ------------------- 32 | 33 | The following options are intended only for development & testing purposes. 34 | They are only available if the :envvar:`DANDI_DEVEL` environment variable is 35 | set to a nonempty value. 36 | 37 | .. option:: --allow-any-path 38 | 39 | Validate all file types, not just NWBs and Zarrs 40 | 41 | .. option:: --devel-debug 42 | 43 | Do not use pyout callbacks, do not swallow exceptions, do not parallelize. 44 | 45 | .. option:: --schema 46 | 47 | Validate against new schema version 48 | -------------------------------------------------------------------------------- /dandi/cli/tests/test_instances.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from click.testing import CliRunner 4 | 5 | from ..cmd_instances import instances 6 | 7 | 8 | def test_cmd_instances(monkeypatch): 9 | instancehost = os.environ.get("DANDI_INSTANCEHOST", "localhost") 10 | r = CliRunner().invoke(instances, []) 11 | assert r.exit_code == 0 12 | assert r.output == ( 13 | "dandi:\n" 14 | " api: https://api.dandiarchive.org/api\n" 15 | " gui: https://dandiarchive.org\n" 16 | "dandi-api-local-docker-tests:\n" 17 | f" api: http://{instancehost}:8000/api\n" 18 | f" gui: http://{instancehost}:8085\n" 19 | "dandi-sandbox:\n" 20 | " api: https://api.sandbox.dandiarchive.org/api\n" 21 | " gui: https://sandbox.dandiarchive.org\n" 22 | "ember-dandi:\n" 23 | " api: https://api-dandi.emberarchive.org/api\n" 24 | " gui: https://dandi.emberarchive.org\n" 25 | "ember-dandi-sandbox:\n" 26 | " api: https://api-dandi-sandbox.emberarchive.org/api\n" 27 | " gui: https://apl-setup--ember-dandi-archive.netlify.app/\n" 28 | "linc:\n" 29 | " api: https://api.lincbrain.org/api\n" 30 | " gui: https://lincbrain.org\n" 31 | "linc-staging:\n" 32 | " api: https://staging-api.lincbrain.org/api\n" 33 | " gui: https://staging.lincbrain.org\n" 34 | ) 35 | -------------------------------------------------------------------------------- /docs/source/modref/index.rst: -------------------------------------------------------------------------------- 1 | .. -*- mode: rst -*- 2 | .. vi: set ft=rst sts=4 ts=4 sw=4 et tw=79: 3 | 4 | .. currentmodule:: dandi 5 | 6 | .. _chap_modref: 7 | 8 | ********** 9 | Python API 10 | ********** 11 | 12 | High-level user interfaces 13 | ========================== 14 | 15 | Such interfaces mirror :ref:`Command-Line Interfaces `. 16 | 17 | .. autosummary:: 18 | :toctree: generated 19 | 20 | delete 21 | download 22 | move 23 | organize 24 | upload 25 | validate 26 | 27 | Mid-level user interfaces 28 | ========================== 29 | 30 | Object-oriented interfaces to manipulate Dandisets and assets on a DANDI instance. 31 | 32 | .. toctree:: 33 | 34 | dandiarchive 35 | 36 | Low-level user interfaces 37 | ========================= 38 | 39 | Low level interfaces to e.g. interact with the DANDI REST API and files directly. 40 | 41 | .. toctree:: 42 | 43 | dandiapi 44 | files 45 | misctypes 46 | 47 | Support functionality 48 | ===================== 49 | 50 | .. toctree:: 51 | 52 | consts 53 | utils 54 | support.digests 55 | 56 | Test infrastructure 57 | =================== 58 | 59 | .. autosummary:: 60 | :toctree: generated 61 | 62 | tests.fixtures 63 | tests.skip 64 | 65 | .. 66 | Command line interface infrastructure 67 | ===================================== 68 | 69 | .. autosummary:: 70 | :toctree: generated 71 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 3 | # vi: set ft=python sts=4 ts=4 sw=4 et: 4 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 5 | # 6 | # See LICENSE file distributed along with the dandi-cli package for the 7 | # copyright and license terms. 8 | # 9 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 10 | """Build helper.""" 11 | 12 | import os.path 13 | import sys 14 | 15 | from setuptools import setup 16 | 17 | if sys.version_info < (3,): 18 | raise RuntimeError( 19 | "dandi-cli's setup.py requires python 3 or later. " 20 | "You are using %s" % sys.version 21 | ) 22 | 23 | # This is needed for versioneer to be importable when building with PEP 517. 24 | # See and links 25 | # therein for more information. 26 | sys.path.insert(0, os.path.dirname(__file__)) 27 | 28 | try: 29 | import versioneer 30 | 31 | setup_kw = { 32 | "version": versioneer.get_version(), 33 | "cmdclass": versioneer.get_cmdclass(), 34 | } 35 | except ImportError: 36 | # see https://github.com/warner/python-versioneer/issues/192 37 | print("WARNING: failed to import versioneer, falling back to no version for now") 38 | setup_kw = {} 39 | 40 | if __name__ == "__main__": 41 | setup(name="dandi", **setup_kw) 42 | -------------------------------------------------------------------------------- /docs/source/cmdline/delete.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi delete` 2 | ======================= 3 | 4 | :: 5 | 6 | dandi [] delete [] [ ...] 7 | 8 | Delete Dandisets and assets from the server. 9 | 10 | Each argument must be either a file path pointing to an asset file or directory 11 | in a local Dandiset (in which case the corresponding assets are deleted on the 12 | remote server) or a :ref:`resource identifier ` pointing to a 13 | remote asset, directory, or entire Dandiset. 14 | 15 | Options 16 | ------- 17 | 18 | .. option:: --force 19 | 20 | Force deletion without requesting interactive confirmation 21 | 22 | .. option:: -i, --dandi-instance 23 | 24 | DANDI instance (either a base URL or a known instance name) to delete 25 | assets & Dandisets from [default: ``dandi``] 26 | 27 | .. option:: --skip-missing 28 | 29 | By default, if an argument points to a remote resource that does not exist, 30 | an error is raised. If :option:`--skip-missing` is supplied, missing 31 | resources are instead simply silently ignored. 32 | 33 | 34 | Development Options 35 | ------------------- 36 | 37 | The following options are intended only for development & testing purposes. 38 | They are only available if the :envvar:`DANDI_DEVEL` environment variable is 39 | set to a nonempty value. 40 | 41 | .. option:: --devel-debug 42 | 43 | Do not use pyout callbacks, do not swallow exceptions, do not parallelize. 44 | -------------------------------------------------------------------------------- /docs/source/examples/dandiapi-example.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from dandi.dandiapi import DandiAPIClient 4 | 5 | with DandiAPIClient.for_dandi_instance("dandi") as client: 6 | for dandiset in client.get_dandisets(): 7 | # Note: for demo purposes we go only through a few dandisets, and skip all others 8 | # so comment out/remove this condition if you would like to go through all. 9 | if not (35 < int(dandiset.identifier) < 40): 10 | print(f"For demo purposes skipping {dandiset}") 11 | continue 12 | if dandiset.most_recent_published_version is None: 13 | continue 14 | latest_dandiset = dandiset.for_version(dandiset.most_recent_published_version) 15 | for asset in latest_dandiset.get_assets(): 16 | metadata = asset.get_metadata() 17 | if any( 18 | mtt is not None and "two-photon" in mtt.name 19 | for mtt in (metadata.measurementTechnique or []) 20 | ): 21 | print(json.dumps(metadata.json_dict(), indent=4)) 22 | # Can be used to also download the asset: 23 | # asset.download(pathlib.Path(dandiset.identifier, asset.path)) 24 | # Note: for demonstration purposes we stop at a single asset found 25 | print( 26 | f"\n\nAn example two-photon measurement technique was found in dandiset {dandiset}. For demonstration purposes, skipping other assets." 27 | ) 28 | break 29 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Auto-release on PR merge 2 | 3 | on: 4 | # ATM, this is the closest trigger to a PR merging 5 | push: 6 | branches: 7 | - master 8 | workflow_dispatch: 9 | 10 | jobs: 11 | auto-release: 12 | runs-on: ubuntu-latest 13 | if: "!contains(github.event.head_commit.message, 'ci skip') && !contains(github.event.head_commit.message, 'skip ci')" 14 | steps: 15 | - name: Checkout source 16 | uses: actions/checkout@v6 17 | with: 18 | fetch-depth: 0 19 | 20 | - name: Download latest auto 21 | run: | 22 | auto_download_url="$(curl -fsSL https://api.github.com/repos/intuit/auto/releases/latest | jq -r '.assets[] | select(.name == "auto-linux.gz") | .browser_download_url')" 23 | wget -O- "$auto_download_url" | gunzip > ~/auto 24 | chmod a+x ~/auto 25 | 26 | - name: Set up Python 27 | uses: actions/setup-python@v6 28 | with: 29 | python-version: '3.10' 30 | 31 | - name: Install build & twine 32 | run: python -m pip install build twine 33 | 34 | - name: Create release 35 | run: | 36 | echo "@${{ github.actor }} is creating a release triggered by ${{ github.event_name }}" 37 | if [ "${{ github.event_name }}" = workflow_dispatch ] 38 | then opts= 39 | else opts=--only-publish-with-release-label 40 | fi 41 | ~/auto shipit -vv $opts 42 | env: 43 | PROTECTED_BRANCH_REVIEWER_TOKEN: ${{ secrets.GH_TOKEN }} 44 | GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 45 | TWINE_USERNAME: __token__ 46 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 47 | 48 | # vim:set sts=2: 49 | -------------------------------------------------------------------------------- /dandi/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | DANDI 3 | ===== 4 | 5 | The `dandi `_ library provides both a 6 | command line interface (CLI) and a Python API for interacting with `DANDI 7 | Archive `_. 8 | 9 | Additional references: 10 | 11 | - `Source Git repository `_ 12 | - `Library documentation `_ 13 | - `DANDI Docs `_ 14 | """ 15 | 16 | import logging 17 | import os 18 | 19 | from . import _version 20 | 21 | __version__ = _version.get_versions()["version"] 22 | 23 | 24 | from .due import Doi, due 25 | 26 | due.cite( 27 | Doi("10.5281/zenodo.3692138"), # lgtm [py/procedure-return-value-used] 28 | cite_module=True, # highly specialized -- if imported, means used. 29 | description="Client to interact with DANDI Archive", 30 | path="dandi-cli", 31 | version=__version__, # since yoh hijacked dandi for module but is not brave enough 32 | # to claim it to be dandi as the whole 33 | ) 34 | 35 | 36 | # 37 | # Basic logger configuration 38 | # 39 | 40 | 41 | def get_logger(name=None): 42 | """Return a logger to use""" 43 | return logging.getLogger("dandi" + (".%s" % name if name else "")) 44 | 45 | 46 | def set_logger_level(lgr, level): 47 | if isinstance(level, int): 48 | pass 49 | elif level.isnumeric(): 50 | level = int(level) 51 | elif level.isalpha(): 52 | level = getattr(logging, level) 53 | else: 54 | lgr.warning("Do not know how to treat loglevel %s" % level) 55 | return 56 | lgr.setLevel(level) 57 | 58 | 59 | lgr = get_logger() 60 | set_logger_level(lgr, os.environ.get("DANDI_LOG_LEVEL", logging.INFO)) 61 | -------------------------------------------------------------------------------- /dandi/cli/cmd_shell_completion.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import basename, normcase, splitext 3 | 4 | import click 5 | from packaging.version import Version 6 | 7 | SHELLS = ["bash", "zsh", "fish"] 8 | 9 | 10 | @click.command("shell-completion") 11 | @click.option( 12 | "-s", 13 | "--shell", 14 | type=click.Choice(["auto"] + SHELLS), 15 | default="auto", 16 | show_default=True, 17 | help="The shell for which to generate completion code; `auto` attempts autodetection", 18 | ) 19 | def shell_completion(shell): 20 | """ 21 | Emit shell script for enabling command completion. 22 | 23 | The output of this command should be "sourced" by bash or zsh to enable 24 | command completion. 25 | 26 | Example: 27 | 28 | \b 29 | $ source <(dandi shell-completion) 30 | $ dandi -- 31 | """ 32 | if shell == "auto": 33 | try: 34 | shell = basename(os.environ["SHELL"]) 35 | except KeyError: 36 | raise click.UsageError( 37 | "Could not determine running shell: SHELL environment variable not set" 38 | ) 39 | shell = normcase(shell) 40 | stem, ext = splitext(shell) 41 | if ext in (".com", ".exe", ".bat"): 42 | shell = stem 43 | if shell not in SHELLS: 44 | raise click.UsageError(f"Unsupported/unrecognized shell {shell!r}") 45 | if Version(click.__version__) < Version("8.0.0"): 46 | varfmt = "source_{shell}" 47 | else: 48 | varfmt = "{shell}_source" 49 | os.environ["_DANDI_COMPLETE"] = varfmt.format(shell=shell) 50 | 51 | # Avoid circular import by importing within function: 52 | from .command import main 53 | 54 | main.main(args=[]) 55 | -------------------------------------------------------------------------------- /tools/migrate-dandisets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import click 3 | import requests 4 | 5 | from dandi.dandiapi import DandiAPIClient 6 | from dandi.dandiset import APIDandiset 7 | 8 | 9 | @click.command() 10 | @click.option( 11 | "-d", "--delete-extant", is_flag=True, help="Delete Dandisets that already exist" 12 | ) 13 | @click.option("--only-metadata", is_flag=True, help="Only update Dandiset metadata") 14 | @click.argument("api_url") 15 | @click.argument("token") 16 | @click.argument("dandiset_path", nargs=-1) 17 | def main(api_url, token, dandiset_path, delete_extant, only_metadata): 18 | client = DandiAPIClient(api_url=api_url, token=token) 19 | with client.session(): 20 | for dpath in dandiset_path: 21 | dandiset = APIDandiset(dpath) 22 | if delete_extant: 23 | try: 24 | client.get_dandiset(dandiset.identifier, "draft") 25 | except requests.HTTPError as e: 26 | if e.response.status_code != 404: 27 | raise 28 | else: 29 | print("Dandiset", dandiset.identifier, "already exists; deleting") 30 | client.delete(f"/dandisets/{dandiset.identifier}/") 31 | if only_metadata: 32 | print("Setting metadata for Dandiset", dandiset.identifier) 33 | client.set_dandiset_metadata( 34 | dandiset.identifier, metadata=dandiset.metadata 35 | ) 36 | else: 37 | print("Creating Dandiset", dandiset.identifier) 38 | client.create_dandiset( 39 | name=dandiset.metadata.get("name", ""), metadata=dandiset.metadata 40 | ) 41 | 42 | 43 | if __name__ == "__main__": 44 | main() 45 | -------------------------------------------------------------------------------- /dandi/cli/tests/test_formatter.py: -------------------------------------------------------------------------------- 1 | from io import StringIO 2 | 3 | import pytest 4 | 5 | from ..formatter import JSONFormatter, JSONLinesFormatter 6 | 7 | 8 | def test_json_formatter(): 9 | out = StringIO() 10 | fmtr = JSONFormatter(out=out) 11 | with fmtr: 12 | fmtr({"foo": 23, "bar": 42}) 13 | fmtr({"bar": "gnusto", "foo": "cleesh"}) 14 | assert out.getvalue() == ( 15 | "[\n" 16 | ' {"bar": 42, "foo": 23},\n' 17 | ' {"bar": "gnusto", "foo": "cleesh"}\n' 18 | "]\n" 19 | ) 20 | 21 | 22 | def test_json_formatter_indented(): 23 | out = StringIO() 24 | fmtr = JSONFormatter(indent=2, out=out) 25 | with fmtr: 26 | fmtr({"foo": 23, "bar": 42}) 27 | fmtr({"bar": "gnusto", "foo": "cleesh"}) 28 | assert out.getvalue() == ( 29 | "[\n" 30 | " {\n" 31 | ' "bar": 42,\n' 32 | ' "foo": 23\n' 33 | " },\n" 34 | " {\n" 35 | ' "bar": "gnusto",\n' 36 | ' "foo": "cleesh"\n' 37 | " }\n" 38 | "]\n" 39 | ) 40 | 41 | 42 | @pytest.mark.parametrize("indent", [None, 2]) 43 | def test_json_formatter_empty(indent): 44 | out = StringIO() 45 | fmtr = JSONFormatter(indent=indent, out=out) 46 | with fmtr: 47 | pass 48 | assert out.getvalue() == "[]\n" 49 | 50 | 51 | def test_json_lines_formatter(): 52 | out = StringIO() 53 | fmtr = JSONLinesFormatter(out=out) 54 | with fmtr: 55 | fmtr({"foo": 23, "bar": 42}) 56 | fmtr({"bar": "gnusto", "foo": "cleesh"}) 57 | assert out.getvalue() == ( 58 | '{"bar": 42, "foo": 23}\n{"bar": "gnusto", "foo": "cleesh"}\n' 59 | ) 60 | 61 | 62 | def test_json_lines_formatter_empty(): 63 | out = StringIO() 64 | fmtr = JSONLinesFormatter(out=out) 65 | with fmtr: 66 | pass 67 | assert out.getvalue() == "" 68 | -------------------------------------------------------------------------------- /CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | ## Build/Test Commands 6 | - Run tests: `tox -e py3` but should also work with just `python -m pytest dandi` if in a venv 7 | - Tests which require an instance of the archive, would use a fixture to start on using docker-compose. 8 | - Set env var `DANDI_TESTS_PULL_DOCKER_COMPOSE=""` (to empty value) to avoid `docker compose pull` to speed up repetitive runs 9 | - Run single test: `tox r -e py3 -- dandi/tests/test_file.py::test_function -v` 10 | - Lint and type checking: `tox -e lint,typing` 11 | - Install pre-commit hooks (if not installed as could be indicated by absence of 12 | `.git/hooks/pre-commit`): `pre-commit install` 13 | 14 | ## Committing 15 | - Due to use of `pre-commit` with black and other commands which auto-fix, if changes 16 | were reported to be done, just rerun commit again 2nd time, and if only then if still 17 | does not commit analyze output more 18 | 19 | ## Test Markers 20 | - When adding AI-generated tests, mark them with `@pytest.mark.ai_generated` 21 | - Any new pytest markers must be registered in `tox.ini` under `[pytest]` section in the `markers` list 22 | 23 | ## Code Style 24 | - Code is formatted with Black (line length 100) 25 | - Imports sorted with isort (profile="black") 26 | - Type annotations required for new code 27 | - Use PEP 440 for versioning 28 | - Class names: CamelCase; functions/variables: snake_case 29 | - Exception names end with "Error" (e.g., `ValidateError`) 30 | - Docstrings in NumPy style for public APIs 31 | - Prefer specific exceptions over generic ones 32 | - For CLI, use click library patterns 33 | - Imports organized: stdlib, third-party, local (alphabetical within groups) 34 | 35 | ## Documentation 36 | - Keep docstrings updated when changing function signatures 37 | - CLI help text should be clear and include examples where appropriate 38 | -------------------------------------------------------------------------------- /docs/source/cmdline/ls.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi ls` 2 | =================== 3 | 4 | :: 5 | 6 | dandi [] ls [] [ ...] 7 | 8 | List :file:`*.nwb` files' and Dandisets' metadata. 9 | 10 | The arguments may be either :ref:`resource identifiers ` or paths 11 | to local files/directories. 12 | 13 | Options 14 | ------- 15 | 16 | .. option:: -f, --format [auto|pyout|json|json_pp|json_lines|yaml] 17 | 18 | Choose the format/frontend for output. If ``auto`` (the default), 19 | ``pyout`` will be used in case of multiple files, ``yaml`` for a single 20 | file. 21 | 22 | .. option:: -F, --fields 23 | 24 | Comma-separated list of fields to display. Specifying ``-F ""`` causes a 25 | list of available fields to be printed out. 26 | 27 | .. option:: -J, --jobs 28 | 29 | Number of parallel download jobs [default: 6] 30 | 31 | .. option:: --metadata [api|all|assets] 32 | 33 | Control when to include asset metadata for remote assets: 34 | 35 | - ``api`` [default] — only include asset metadata if returned by the API 36 | response (i.e., if a URL identifying an asset by ID was supplied) 37 | 38 | - ``all`` — make an additional request to fetch asset metadata if not 39 | returned by initial API response 40 | 41 | - ``assets`` — same as ``all`` 42 | 43 | .. option:: -r, --recursive 44 | 45 | Recurse into Dandisets/directories. Only :file:`*.nwb` files will be 46 | considered. 47 | 48 | .. option:: --schema 49 | 50 | Convert metadata to new schema version 51 | 52 | 53 | Development Options 54 | ------------------- 55 | 56 | The following options are intended only for development & testing purposes. 57 | They are only available if the :envvar:`DANDI_DEVEL` environment variable is 58 | set to a nonempty value. 59 | 60 | .. option:: --use-fake-digest 61 | 62 | Use dummy value for digests of local files instead of computing 63 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | import dandi 21 | 22 | project = "dandi" 23 | copyright = "2021-%Y The DANDI Team" 24 | author = "The DANDI Team" 25 | 26 | # The full version, including alpha/beta/rc tags 27 | version = dandi.__version__ 28 | release = dandi.__version__ 29 | 30 | 31 | # -- General configuration --------------------------------------------------- 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 35 | # ones. 36 | extensions = [ 37 | "sphinx.ext.autodoc", 38 | "sphinx.ext.autosummary", 39 | "sphinx.ext.napoleon", 40 | "sphinx.ext.viewcode", 41 | ] 42 | 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ["_templates"] 45 | 46 | # List of patterns, relative to source directory, that match files and 47 | # directories to ignore when looking for source files. 48 | # This pattern also affects html_static_path and html_extra_path. 49 | exclude_patterns = [] 50 | 51 | autodoc_default_options = { 52 | "members": True, 53 | "undoc-members": True, 54 | } 55 | 56 | napoleon_google_docstring = False 57 | napoleon_numpy_docstring = True 58 | default_role = "py:obj" 59 | 60 | 61 | # -- Options for HTML output ------------------------------------------------- 62 | 63 | # The theme to use for HTML and HTML Help pages. See the documentation for 64 | # a list of builtin themes. 65 | # 66 | html_theme = "furo" 67 | -------------------------------------------------------------------------------- /dandi/exceptions.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import requests 4 | from semantic_version import Version 5 | 6 | 7 | class OrganizeImpossibleError(ValueError): 8 | """Exception to be raised if given current list of files it is impossible 9 | 10 | E.g. if metadata is not sufficient or conflicting 11 | """ 12 | 13 | pass 14 | 15 | 16 | class UnknownURLError(ValueError): 17 | """Given url is not known to correspond to DANDI schema(s)""" 18 | 19 | pass 20 | 21 | 22 | class NotFoundError(RuntimeError): 23 | """Online resource which we tried to connect to is not found""" 24 | 25 | pass 26 | 27 | 28 | class FailedToConnectError(RuntimeError): 29 | """Failed to connect to online resource""" 30 | 31 | pass 32 | 33 | 34 | class LockingError(RuntimeError): 35 | """Failed to lock or unlock a resource""" 36 | 37 | pass 38 | 39 | 40 | class CliVersionError(RuntimeError): 41 | """Base class for `CliVersionTooOldError` and `BadCliVersionError`""" 42 | 43 | def __init__( 44 | self, our_version: Version, minversion: Version, bad_versions: list[Version] 45 | ) -> None: 46 | self.our_version = our_version 47 | self.minversion = minversion 48 | self.bad_versions = bad_versions 49 | 50 | def server_requirements(self) -> str: 51 | s = f"Server requires at least version {self.minversion}" 52 | if self.bad_versions: 53 | s += f" (but not {', '.join(map(str, self.bad_versions))})" 54 | return s 55 | 56 | 57 | class CliVersionTooOldError(CliVersionError): 58 | def __str__(self) -> str: 59 | return ( 60 | f"Client version {self.our_version} is too old! " 61 | + self.server_requirements() 62 | ) 63 | 64 | 65 | class BadCliVersionError(CliVersionError): 66 | def __str__(self) -> str: 67 | return ( 68 | f"Client version {self.our_version} is rejected by server! " 69 | + self.server_requirements() 70 | ) 71 | 72 | 73 | class SchemaVersionError(Exception): 74 | pass 75 | 76 | 77 | class UnknownAssetError(ValueError): 78 | pass 79 | 80 | 81 | class HTTP404Error(requests.HTTPError): 82 | pass 83 | 84 | 85 | class UploadError(Exception): 86 | pass 87 | -------------------------------------------------------------------------------- /dandi/due.py: -------------------------------------------------------------------------------- 1 | # emacs: at the end of the file 2 | # ex: set sts=4 ts=4 sw=4 et: 3 | # ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### # 4 | """ 5 | 6 | Stub file for a guaranteed safe import of duecredit constructs: if duecredit 7 | is not available. 8 | 9 | To use it, place it into your project codebase to be imported, e.g. copy as 10 | 11 | cp stub.py /path/tomodule/module/due.py 12 | 13 | Note that it might be better to avoid naming it duecredit.py to avoid shadowing 14 | installed duecredit. 15 | 16 | Then use in your code as 17 | 18 | from .due import due, Doi, BibTeX, Text 19 | 20 | See https://github.com/duecredit/duecredit/blob/master/README.md for examples. 21 | 22 | Origin: Originally a part of the duecredit 23 | Copyright: 2015-2019 DueCredit developers 24 | License: BSD-2 25 | """ 26 | 27 | __version__ = "0.0.8" 28 | 29 | 30 | class InactiveDueCreditCollector: 31 | """Just a stub at the Collector which would not do anything""" 32 | 33 | def _donothing(self, *args, **kwargs): 34 | """Perform no good and no bad""" 35 | pass 36 | 37 | def dcite(self, *args, **kwargs): 38 | """If I could cite I would""" 39 | 40 | def nondecorating_decorator(func): 41 | return func 42 | 43 | return nondecorating_decorator 44 | 45 | active = False 46 | activate = add = cite = dump = load = _donothing 47 | 48 | def __repr__(self): 49 | return self.__class__.__name__ + "()" 50 | 51 | 52 | def _donothing_func(*args, **kwargs): 53 | """Perform no good and no bad""" 54 | pass 55 | 56 | 57 | try: 58 | from duecredit import due, BibTeX, Doi, Url, Text 59 | 60 | if "due" in locals() and not hasattr(due, "cite"): 61 | raise RuntimeError("Imported due lacks .cite. DueCredit is now disabled") 62 | except Exception as e: 63 | if not isinstance(e, ImportError): 64 | import logging 65 | 66 | logging.getLogger("duecredit").error( 67 | "Failed to import duecredit due to %s" % str(e) 68 | ) 69 | # Initiate due stub 70 | due = InactiveDueCreditCollector() 71 | BibTeX = Doi = Url = Text = _donothing_func 72 | 73 | # Emacs mode definitions 74 | # Local Variables: 75 | # mode: python 76 | # py-indent-offset: 4 77 | # tab-width: 4 78 | # indent-tabs-mode: nil 79 | # End: 80 | -------------------------------------------------------------------------------- /dandi/bids_validator_deno/_models.py: -------------------------------------------------------------------------------- 1 | # This file holds the models used to interface with the deno-compiled BIDS validator 2 | # with the `--json` option. The defined entities in this file share the same names and 3 | # structure as those defined in 4 | # https://github.com/bids-standard/bids-validator/blob/main/src/types/validation-result.ts 5 | # and 6 | # https://github.com/bids-standard/bids-validator/blob/main/src/issues/datasetIssues.ts 7 | # The only exception to that rule is that the `ValidationResult` type in the 8 | # BIDS validator source is named `BidsValidationResult` in this file. 9 | 10 | from __future__ import annotations 11 | 12 | from enum import auto 13 | from typing import Any, Literal, Optional, Union 14 | 15 | from pydantic import BaseModel, ConfigDict 16 | 17 | from dandi.utils import StrEnum 18 | 19 | 20 | class _BaseModel(BaseModel): 21 | """ 22 | The base model for all models in this module 23 | """ 24 | 25 | model_config = ConfigDict(strict=True) 26 | 27 | 28 | class BidsValidationResult(_BaseModel): 29 | issues: DatasetIssues 30 | summary: SummaryOutput 31 | derivativesSummary: Optional[dict[str, BidsValidationResult]] = None 32 | 33 | 34 | class DatasetIssues(_BaseModel): 35 | issues: list[Issue] 36 | codeMessages: dict[str, str] 37 | 38 | 39 | class Issue(_BaseModel): 40 | code: str 41 | subCode: Optional[str] = None 42 | severity: Optional[Severity] = None 43 | location: Optional[str] = None 44 | issueMessage: Optional[str] = None 45 | suggestion: Optional[str] = None 46 | affects: Optional[list[str]] = None 47 | rule: Optional[str] = None 48 | line: Optional[int] = None 49 | character: Optional[int] = None 50 | 51 | 52 | class Severity(StrEnum): 53 | warning = auto() 54 | error = auto() 55 | ignore = auto() 56 | 57 | 58 | class SummaryOutput(_BaseModel): 59 | sessions: list[str] 60 | subjects: list[str] 61 | subjectMetadata: list[SubjectMetadata] 62 | tasks: list[str] 63 | modalities: list[str] 64 | secondaryModalities: list[str] 65 | totalFiles: int 66 | size: int 67 | dataProcessed: bool 68 | pet: dict[str, Any] 69 | dataTypes: list[str] 70 | schemaVersion: str 71 | 72 | 73 | class SubjectMetadata(_BaseModel): 74 | participantId: str 75 | age: Union[float, Literal["89+"], None] = None 76 | sex: Optional[str] = None 77 | -------------------------------------------------------------------------------- /docs/source/cmdline/download.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi download` 2 | ========================= 3 | 4 | :: 5 | 6 | dandi [] download [] ... 7 | 8 | Download one or more Dandisets, assets, or folders of assets from DANDI. 9 | 10 | See :ref:`resource_ids` for allowed URL formats. 11 | 12 | Options 13 | ------- 14 | 15 | .. option:: --download [dandiset.yaml,assets,all] 16 | 17 | Comma-separated list of elements to download [default: ``all``] 18 | 19 | .. option:: -e, --existing [error|skip|overwrite|overwrite-different|refresh] 20 | 21 | How to handle paths that already exist locally [default: ``error``] 22 | 23 | For ``error``, if the local file exists, display an error and skip downloading that asset. 24 | 25 | For ``skip``, if the local file exists, skip downloading that asset. 26 | 27 | For ``overwrite``, if the local file exists, overwrite that asset. 28 | 29 | For ``overwrite-different``, if the local file's hash is the same as on the 30 | server, the asset is skipped; otherwise, it is redownloaded. 31 | 32 | For ``refresh``, if the local file's size and mtime are the same as on the 33 | server, the asset is skipped; otherwise, it is redownloaded. 34 | 35 | .. option:: -f, --format [pyout|debug] 36 | 37 | Choose the format/frontend for output [default: ``pyout``] 38 | 39 | .. option:: -i, --dandi-instance 40 | 41 | DANDI instance (either a base URL or a known instance name) to download 42 | from [default: ``dandi``] 43 | 44 | .. option:: -J, --jobs N[:M] 45 | 46 | Number of parallel download jobs and, optionally, number of upload subjobs 47 | per Zarr asset job [default: 6:4] 48 | 49 | .. option:: -o, --output-dir 50 | 51 | Directory to download to (must exist). Files will be downloaded with paths 52 | relative to that directory. [default: current working directory] 53 | 54 | .. option:: --path-type [exact|glob] 55 | 56 | Whether to interpret asset paths in URLs as exact matches or glob patterns 57 | 58 | .. option:: --preserve-tree 59 | 60 | When downloading only part of a Dandiset, also download 61 | :file:`dandiset.yaml` (unless downloading an asset URL that does not 62 | include a Dandiset ID) and do not strip leading directories from asset 63 | paths. Implies ``--download all``. 64 | 65 | .. option:: --sync 66 | 67 | Delete local assets that do not exist on the server after downloading 68 | -------------------------------------------------------------------------------- /dandi/cli/tests/test_command.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from subprocess import PIPE, Popen 4 | import sys 5 | 6 | from click.testing import CliRunner 7 | import pytest 8 | 9 | from ..cmd_ls import ls 10 | from ..cmd_validate import validate 11 | from ..command import __all_commands__ 12 | 13 | 14 | @pytest.mark.parametrize("command", (ls, validate)) 15 | def test_smoke(organized_nwb_dir, command): 16 | runner = CliRunner() 17 | r = runner.invoke(command, [str(organized_nwb_dir)]) 18 | assert r.exit_code == 0, f"Exited abnormally. out={r.stdout}" 19 | assert r.stdout, "There were no output whatsoever" 20 | 21 | # empty invocation should not crash 22 | # But we must cd to the temp directory since current directory could 23 | # have all kinds of files which could trip the command, e.g. validate 24 | # could find some broken test files in the code base 25 | if command is not validate: 26 | with runner.isolated_filesystem(): 27 | r = runner.invoke(command, []) 28 | assert r.exit_code == 0, f"Exited abnormally. out={r.stdout}" 29 | 30 | 31 | @pytest.mark.parametrize("command", __all_commands__) 32 | def test_smoke_help(command): 33 | runner = CliRunner() 34 | r = runner.invoke(command, ["--help"]) 35 | assert r.exit_code == 0, f"Exited abnormally. out={r.stdout}" 36 | assert r.stdout, "There were no output whatsoever" 37 | 38 | assert re.match("Usage: .*Options:.*--help", r.stdout, flags=re.DOTALL) is not None 39 | 40 | 41 | def test_no_heavy_imports(): 42 | # Timing --version for being fast is unreliable, so we will verify that 43 | # no h5py or numpy (just in case) module is imported upon import of the 44 | # command 45 | heavy_modules = {"pynwb", "h5py", "numpy"} 46 | env = os.environ.copy() 47 | env["NO_ET"] = "1" 48 | p = Popen( 49 | [ 50 | sys.executable, 51 | "-c", 52 | ( 53 | "import sys; " 54 | "import dandi.cli.command; " 55 | "print(','.join(set(m.split('.')[0] for m in sys.modules)));" 56 | ), 57 | ], 58 | env=env, 59 | stdout=PIPE, 60 | stderr=PIPE, 61 | ) 62 | stdout, stderr = p.communicate() 63 | modules = stdout.decode().split(",") 64 | loaded_heavy = set(modules).intersection(heavy_modules) 65 | 66 | assert not loaded_heavy 67 | assert not stderr or b"Failed to check" in stderr or b"dandi version" in stderr 68 | assert not p.wait() 69 | -------------------------------------------------------------------------------- /dandi/support/tests/test_iterators.py: -------------------------------------------------------------------------------- 1 | from time import sleep 2 | 3 | import pytest 4 | 5 | from dandi.utils import on_windows 6 | 7 | from ..iterators import IteratorWithAggregation 8 | 9 | 10 | def sleeping_range(n, secs=0.01, thr=None): 11 | """Fast generator based on range 12 | 13 | Parameters 14 | ---------- 15 | n : int 16 | Number to pass to `range` 17 | secs : float, optional 18 | Seconds to sleep between iterations 19 | thr : int, optional 20 | If specified, will cause loop to raise ValueError when it 21 | reaches that value 22 | 23 | Yields 24 | ------ 25 | int 26 | Integers like range does 27 | 28 | """ 29 | for i in range(n): 30 | yield i 31 | sleep(secs) 32 | if thr and i >= thr: 33 | raise ValueError(i) 34 | 35 | 36 | @pytest.mark.xfail(on_windows, reason="https://github.com/dandi/dandi-cli/issues/1510") 37 | def test_IteratorWithAggregation(): 38 | def sumup(v, t=0): 39 | return v + t 40 | 41 | it = IteratorWithAggregation(sleeping_range(3, 0.0001), agg=sumup) 42 | # we should get our summary available after 2nd iteration and before it finishes 43 | slow_machine = False 44 | for t, i in enumerate(it): 45 | sleep(0.005) # 0.0003 should be sufficient but to deal with Windows failures, 46 | # making it longer 47 | assert t == i # it is just a range after all 48 | if i: 49 | if not it.finished: 50 | # give considerably more time for poor Windows VM 51 | slow_machine = True 52 | sleep(0.1) 53 | assert it.finished 54 | 55 | # If there is an exception thrown, it would be raised only by the end 56 | it = IteratorWithAggregation(sleeping_range(5, 0.0001, thr=2), agg=sumup) 57 | got = [] 58 | with pytest.raises(ValueError): 59 | for i in it: 60 | got.append(i) 61 | sleep(0.001 if not slow_machine else 0.1) 62 | assert got == [0, 1, 2] 63 | assert it.finished 64 | 65 | # If there is an exception thrown, it would be raised immediately 66 | it = IteratorWithAggregation( 67 | sleeping_range(5, 0.0001, thr=2), agg=sumup, reraise_immediately=True 68 | ) 69 | got = [] 70 | with pytest.raises(ValueError): 71 | for i in it: 72 | got.append(i) 73 | # sleep long enough to trigger exception before next iteration 74 | sleep(0.02 if not slow_machine else 0.1) 75 | assert got in ([], [0]) 76 | assert it.finished 77 | -------------------------------------------------------------------------------- /dandi/tests/data/metadata/metadata2asset.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "dandiasset:0b0a1a0b-e3ea-4cf6-be94-e02c830d54be", 3 | "schemaKey": "Asset", 4 | "schemaVersion": "0.4.1", 5 | "keywords": [ 6 | "test", 7 | "sample", 8 | "example", 9 | "test-case" 10 | ], 11 | "access": [ 12 | { 13 | "schemaKey": "AccessRequirements", 14 | "status": "dandi:OpenAccess" 15 | } 16 | ], 17 | "wasGeneratedBy": [ 18 | { 19 | "schemaKey": "Session", 20 | "identifier": "XYZ789", 21 | "name": "XYZ789", 22 | "description": "Some test data", 23 | "startDate": "2020-08-31T15:58:28-04:00", 24 | "used": [ 25 | { 26 | "schemaKey": "Equipment", 27 | "identifier": "probe:probe04", 28 | "name": "Ecephys Probe" 29 | } 30 | ] 31 | } 32 | ], 33 | "contentSize": 69105, 34 | "encodingFormat": "application/x-nwb", 35 | "digest": { 36 | "dandi:dandi-etag": "e455839e5ab2fa659861f58a423fd17f-1" 37 | }, 38 | "path": "/test/path", 39 | "wasDerivedFrom": [ 40 | { 41 | "schemaKey": "BioSample", 42 | "identifier": "cell01", 43 | "sampleType": { 44 | "schemaKey": "SampleType", 45 | "name": "cell" 46 | }, 47 | "wasDerivedFrom": [ 48 | { 49 | "schemaKey": "BioSample", 50 | "identifier": "slice02", 51 | "sampleType": { 52 | "schemaKey": "SampleType", 53 | "name": "slice" 54 | }, 55 | "wasDerivedFrom": [ 56 | { 57 | "schemaKey": "BioSample", 58 | "identifier": "tissue03", 59 | "sampleType": { 60 | "schemaKey": "SampleType", 61 | "name": "tissuesample" 62 | } 63 | } 64 | ] 65 | } 66 | ] 67 | } 68 | ], 69 | "wasAttributedTo": [ 70 | { 71 | "schemaKey": "Participant", 72 | "identifier": "a1b2c3", 73 | "age": { 74 | "unitText": "ISO-8601 duration", 75 | "value": "P170DT12212S", 76 | "schemaKey": "PropertyValue", 77 | "valueReference": {"schemaKey": "PropertyValue", "value": "dandi:BirthReference"} 78 | }, 79 | "sex": { 80 | "schemaKey": "SexType", 81 | "identifier": "http://purl.obolibrary.org/obo/PATO_0000384", 82 | "name": "Male" 83 | }, 84 | "genotype": "Typical", 85 | "species": { 86 | "schemaKey": "SpeciesType", 87 | "identifier": "http://purl.obolibrary.org/obo/NCBITaxon_9606", 88 | "name": "Homo sapiens - Human" 89 | } 90 | } 91 | ] 92 | } 93 | -------------------------------------------------------------------------------- /dandi/tests/data/metadata/metadata2asset_cellline.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "dandiasset:0b0a1a0b-e3ea-4cf6-be94-e02c830d54be", 3 | "schemaKey": "Asset", 4 | "schemaVersion": "0.4.1", 5 | "keywords": [ 6 | "test", 7 | "sample", 8 | "example", 9 | "test-case" 10 | ], 11 | "access": [ 12 | { 13 | "schemaKey": "AccessRequirements", 14 | "status": "dandi:OpenAccess" 15 | } 16 | ], 17 | "wasGeneratedBy": [ 18 | { 19 | "schemaKey": "Session", 20 | "identifier": "XYZ789", 21 | "name": "XYZ789", 22 | "description": "Some test data", 23 | "startDate": "2020-08-31T15:58:28-04:00", 24 | "used": [ 25 | { 26 | "schemaKey": "Equipment", 27 | "identifier": "probe:probe04", 28 | "name": "Ecephys Probe" 29 | } 30 | ] 31 | } 32 | ], 33 | "contentSize": 69105, 34 | "encodingFormat": "application/x-nwb", 35 | "digest": { 36 | "dandi:dandi-etag": "e455839e5ab2fa659861f58a423fd17f-1" 37 | }, 38 | "path": "/test/path", 39 | "wasDerivedFrom": [ 40 | { 41 | "schemaKey": "BioSample", 42 | "identifier": "cell01", 43 | "sampleType": { 44 | "schemaKey": "SampleType", 45 | "name": "cell" 46 | }, 47 | "wasDerivedFrom": [ 48 | { 49 | "schemaKey": "BioSample", 50 | "identifier": "slice02", 51 | "sampleType": { 52 | "schemaKey": "SampleType", 53 | "name": "slice" 54 | }, 55 | "wasDerivedFrom": [ 56 | { 57 | "schemaKey": "BioSample", 58 | "identifier": "tissue03", 59 | "sampleType": { 60 | "schemaKey": "SampleType", 61 | "name": "tissuesample" 62 | } 63 | } 64 | ] 65 | } 66 | ] 67 | } 68 | ], 69 | "wasAttributedTo": [ 70 | { 71 | "schemaKey": "Participant", 72 | "identifier": "a1b2c3", 73 | "age": { 74 | "unitText": "ISO-8601 duration", 75 | "value": "P170DT12212S", 76 | "schemaKey": "PropertyValue", 77 | "valueReference": {"schemaKey": "PropertyValue", "value": "dandi:BirthReference"} 78 | }, 79 | "sex": { 80 | "schemaKey": "SexType", 81 | "identifier": "http://purl.obolibrary.org/obo/PATO_0000384", 82 | "name": "Male" 83 | }, 84 | "cellLine": "abcdef/1", 85 | "genotype": "Typical", 86 | "species": { 87 | "schemaKey": "SpeciesType", 88 | "identifier": "http://purl.obolibrary.org/obo/NCBITaxon_1234175", 89 | "name": "Cyperus bulbosus" 90 | } 91 | } 92 | ] 93 | } 94 | -------------------------------------------------------------------------------- /dandi/metadata/core.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from datetime import datetime 4 | from pathlib import Path 5 | import re 6 | 7 | from dandischema import models 8 | from pydantic import ByteSize 9 | 10 | from .util import extract_model, get_generator 11 | from .. import get_logger 12 | from ..misctypes import Digest, LocalReadableFile, Readable 13 | from ..utils import get_mime_type, get_utcnow_datetime 14 | 15 | lgr = get_logger() 16 | 17 | 18 | def get_default_metadata( 19 | path: str | Path | Readable, digest: Digest | None = None 20 | ) -> models.BareAsset: 21 | metadata = models.BareAsset.model_construct() # type: ignore[call-arg] 22 | start_time = end_time = datetime.now().astimezone() 23 | add_common_metadata(metadata, path, start_time, end_time, digest) 24 | return metadata 25 | 26 | 27 | def add_common_metadata( 28 | metadata: models.BareAsset, 29 | path: str | Path | Readable, 30 | start_time: datetime, 31 | end_time: datetime, 32 | digest: Digest | None = None, 33 | ) -> None: 34 | """ 35 | Update a `dict` of raw "schemadata" with the fields that are common to both 36 | NWB assets and non-NWB assets 37 | """ 38 | if digest is not None: 39 | metadata.digest = digest.asdict() 40 | else: 41 | metadata.digest = {} 42 | metadata.dateModified = get_utcnow_datetime() 43 | if isinstance(path, Readable): 44 | r = path 45 | else: 46 | r = LocalReadableFile(path) 47 | mtime = r.get_mtime() 48 | if mtime is not None: 49 | metadata.blobDateModified = mtime 50 | if mtime > metadata.dateModified: 51 | lgr.warning("mtime %s of %s is in the future", mtime, r) 52 | size = r.get_size() 53 | if digest is not None and digest.algorithm is models.DigestType.dandi_zarr_checksum: 54 | m = re.fullmatch( 55 | r"(?P[0-9a-f]{32})-(?P[0-9]+)--(?P[0-9]+)", digest.value 56 | ) 57 | if m: 58 | size = int(m["size"]) 59 | metadata.contentSize = ByteSize(size) 60 | if metadata.wasGeneratedBy is None: 61 | metadata.wasGeneratedBy = [] 62 | metadata.wasGeneratedBy.append(get_generator(start_time, end_time)) 63 | metadata.encodingFormat = get_mime_type(r.get_filename()) 64 | 65 | 66 | def prepare_metadata(metadata: dict) -> models.BareAsset: 67 | """ 68 | Convert "flatdata" [1]_ for an asset into "schemadata" [2]_ as a 69 | `BareAsset` 70 | 71 | .. [1] a flat `dict` mapping strings to strings & other primitive types; 72 | returned by `get_metadata()` 73 | 74 | .. [2] metadata in the form used by the ``dandischema`` library 75 | """ 76 | return extract_model(models.BareAsset, metadata) 77 | -------------------------------------------------------------------------------- /docs/demos/basic-workflow1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 4 | # This is a demonstration of a typical workflow to prepare and upload 5 | # dataset to dandi archive. 6 | # 7 | # HOWTO: 8 | # - Provide this script with a folder containing .nwb files, which 9 | # you would like to upload to DANDI. 10 | # 11 | # This script will 12 | # - organize your data 13 | # - validate it 14 | # - register a new dandiset in the archive 15 | # - upload to the archive 16 | # and to demonstrate full round-trip/possible simple collaborative workflow 17 | # - redownload it from the archive 18 | 19 | # 20 | # By default let's use our local instance 21 | # 22 | set -eu 23 | 24 | : "${DANDI_DEVEL:=1}" 25 | : "${DANDI_INSTANCE:=local-docker}" 26 | : "${ORGANIZE_FILE_MODE:=symlink}" # Mode which will be used by organize 27 | export DANDI_DEVEL 28 | 29 | function info() { 30 | echo 31 | echo "I: $*" 32 | } 33 | 34 | function indent () { 35 | sed -e 's,^, ,g' 36 | } 37 | 38 | function sneak() { 39 | if hash tree && hash head ; then 40 | info "sneak preview of $1:" 41 | tree "$1" | head -n 10 | indent 42 | fi 43 | } 44 | 45 | 46 | TOPPATH=$(mktemp -d "${TMPDIR:-/tmp}/dandiset-XXXXXXX") 47 | 48 | if [ "$#" != 1 ]; then 49 | echo "No path was provided, we will use some really lightweight data repo with a single file" 50 | git clone http://github.com/dandi-datasets/nwb_test_data $TOPPATH/sourcedata 51 | SRCPATH="$TOPPATH/sourcedata/v2.0.0/test_Subject.nwb" 52 | else 53 | SRCPATH="$1" 54 | fi 55 | OUTPATH=$TOPPATH/organized 56 | 57 | info "Working on $SRCPATH under $OUTPATH" 58 | 59 | info "Organizing" 60 | dandi organize -f "$ORGANIZE_FILE_MODE" -d "$OUTPATH" "$SRCPATH" 61 | sneak "$OUTPATH" 62 | 63 | info "Now we will work in $OUTPATH" 64 | cd "$OUTPATH" 65 | 66 | info "Register a new dandiset" 67 | info "Before that you might need to obtain an API key from the archive" 68 | dandi register -i "$DANDI_INSTANCE" -n "$(basename $SRCPATH)" -D "description" 69 | # TODO: avoid -i if env var is set 70 | info "dandiset.yaml now contains dandiset identifier" 71 | 72 | info "Validating dandiset" 73 | dandi validate 74 | 75 | info "Uploading to the archive" 76 | # TODO: should pick up identifier from dandiset.yaml, 77 | # TODO: avoid -i if env var is set 78 | dandi upload -i "$DANDI_INSTANCE" 79 | 80 | # TODO: with dandi download it is impossible ATM to (re)download into the same dandiset 81 | 82 | info "You can use dandi download now to download the dandiset" 83 | #info "Downloading from the archive to a new directory" 84 | # Cannot do for a local one yet since no router configured, so even if I know 85 | # top url, would need to know girder id 86 | #dandi download 87 | 88 | info "We are done -- you can explore "$OUTPATH" and/or remove it" 89 | -------------------------------------------------------------------------------- /docs/source/cmdline/service-scripts.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi service-scripts` 2 | ================================ 3 | 4 | :: 5 | 6 | dandi [] service-scripts [] 7 | 8 | :program:`dandi service-scripts` is a collection of subcommands for various 9 | utility operations. 10 | 11 | ``reextract-metadata`` 12 | ---------------------- 13 | 14 | :: 15 | 16 | dandi [] service-scripts reextract-metadata [] 17 | 18 | Recompute & update the metadata for NWB assets on a remote server. 19 | 20 | ```` must point to a draft Dandiset or one or more assets inside a draft 21 | Dandiset. See :ref:`resource_ids` for allowed URL formats. 22 | 23 | Running this command requires the fsspec_ library to be installed with the 24 | ``http`` extra (e.g., ``pip install "fsspec[http]"``). 25 | 26 | .. _fsspec: http://github.com/fsspec/filesystem_spec 27 | 28 | Options 29 | ^^^^^^^ 30 | 31 | .. option:: --diff 32 | 33 | Show diffs of old & new metadata for each re-extracted asset 34 | 35 | .. option:: --when [newer-schema-version|always] 36 | 37 | Specify when to re-extract an asset's metadata: 38 | 39 | - ``newer-schema-version`` (default) — when the ``schemaVersion`` in the 40 | asset's current metadata is missing or older than the schema version 41 | currently in use by DANDI 42 | 43 | - ``always`` — always 44 | 45 | 46 | ``update-dandiset-from-doi`` 47 | ---------------------------- 48 | 49 | :: 50 | 51 | dandi [] service-scripts update-dandiset-from-doi --dandiset= [] 52 | 53 | Update the metadata for the draft version of a Dandiset with information from a 54 | given DOI record. 55 | 56 | Options 57 | ^^^^^^^ 58 | 59 | .. option:: -d, --dandiset 60 | 61 | Specify the ID of the Dandiset to operate on. This option is required. 62 | 63 | .. option:: -i, --dandi-instance 64 | 65 | DANDI instance (either a base URL or a known instance name) where the 66 | Dandiset is located [default: ``dandi``] 67 | 68 | .. option:: -e, --existing [ask|overwrite|skip] 69 | 70 | Specify the behavior when a value would be set on or added to the Dandiset 71 | metadata: 72 | 73 | - ``ask`` [default] — Ask the user with confirmation before making the 74 | change 75 | 76 | - ``overwrite`` — Make the change without asking for confirmation 77 | 78 | - ``skip`` — Do not change anything, but still print out details on what 79 | would have been changed 80 | 81 | .. option:: -F, --fields [contributor,name,description,relatedResource,all] 82 | 83 | Comma-separated list of Dandiset metadata fields to update [default: 84 | ``all``] 85 | 86 | .. option:: -y, --yes 87 | 88 | Show the final metadata diff and save any changes without asking for 89 | confirmation 90 | -------------------------------------------------------------------------------- /docs/source/cmdline/upload.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi upload` 2 | ======================= 3 | 4 | :: 5 | 6 | dandi [] upload [] [ ...] 7 | 8 | Upload Dandiset files to DANDI Archive. 9 | 10 | The target Dandiset to upload to must already be registered in the archive, and 11 | a :file:`dandiset.yaml` file must exist in the common ancestor of the given 12 | paths (or the current directory, if no paths are specified) or a parent 13 | directory thereof. 14 | 15 | Local Dandisets should pass validation. For that, the assets should first be 16 | organized using the :ref:`dandi_organize` command. 17 | 18 | By default, all :file:`*.nwb`, :file:`*.zarr`, and :file:`*.ngff` assets in the 19 | Dandiset (ignoring directories starting with a period) will be considered for 20 | the upload. You can point to specific files you would like to validate and 21 | have uploaded. 22 | 23 | Options 24 | ------- 25 | 26 | .. option:: -e, --existing [error|skip|force|overwrite|refresh] 27 | 28 | How to handle files that already exist on the server: 29 | 30 | - ``error`` — raise an error 31 | - ``skip`` — skip the file 32 | - ``force`` — force reupload 33 | - ``overwrite`` — force upload if either size or modification time differs 34 | - ``refresh`` [default] — upload only if local modification time is ahead 35 | of the remote 36 | 37 | .. option:: -i, --dandi-instance 38 | 39 | DANDI instance (either a base URL or a known instance name) to upload to 40 | [default: ``dandi``] 41 | 42 | .. option:: -J, --jobs N[:M] 43 | 44 | Number of assets to upload in parallel and, optionally, number of upload 45 | threads per asset [default: ``5:5``] 46 | 47 | .. option:: --sync 48 | 49 | Delete assets on the server that do not exist locally after uploading 50 | 51 | .. option:: --validation [require|skip|ignore] 52 | 53 | How to handle invalid assets: 54 | 55 | - ``require`` [default] — Do not upload any invalid assets 56 | - ``skip`` — Do not check assets for validity 57 | - ``ignore`` — Emit an error message for invalid assets but upload them 58 | anyway 59 | 60 | Data should pass validation before uploading. Use of this option is highly 61 | discouraged. 62 | 63 | 64 | Development Options 65 | ------------------- 66 | 67 | The following options are intended only for development & testing purposes. 68 | They are only available if the :envvar:`DANDI_DEVEL` environment variable is 69 | set to a nonempty value. 70 | 71 | .. option:: --allow-any-path 72 | 73 | Upload all file types, not just NWBs and Zarrs 74 | 75 | .. option:: --devel-debug 76 | 77 | Do not use pyout callbacks, do not swallow exceptions, do not parallelize. 78 | 79 | .. option:: --upload-dandiset-metadata 80 | 81 | Update Dandiset metadata based on the local :file:`dandiset.yaml` file 82 | -------------------------------------------------------------------------------- /dandi/cli/formatter.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import sys 4 | from textwrap import indent 5 | 6 | import ruamel.yaml 7 | 8 | from .. import get_logger 9 | from ..support import pyout as pyouts 10 | 11 | lgr = get_logger() 12 | 13 | 14 | class Formatter: 15 | def __enter__(self): 16 | pass 17 | 18 | def __exit__(self, exc_type, exc_value, traceback): 19 | pass 20 | 21 | def __call__(self, rec): 22 | pass 23 | 24 | 25 | class JSONFormatter(Formatter): 26 | def __init__(self, indent=None, out=None): 27 | self.out = out or sys.stdout 28 | self.indent = indent 29 | self.first = True 30 | 31 | @staticmethod 32 | def _serializer(o): 33 | if isinstance(o, datetime.datetime): 34 | return str(o) 35 | return o 36 | 37 | def __enter__(self): 38 | print("[", end="", file=self.out) 39 | 40 | def __exit__(self, exc_type, exc_value, traceback): 41 | if not self.first: 42 | print(file=self.out) 43 | print("]", file=self.out) 44 | 45 | def __call__(self, rec): 46 | if self.first: 47 | print(file=self.out) 48 | self.first = False 49 | else: 50 | print(",", file=self.out) 51 | 52 | s = json.dumps( 53 | rec, indent=self.indent, sort_keys=True, default=self._serializer 54 | ) 55 | print(indent(s, " " * (self.indent or 2)), end="", file=self.out) 56 | 57 | 58 | class JSONLinesFormatter(Formatter): 59 | def __init__(self, indent=None, out=None): 60 | self.out = out or sys.stdout 61 | self.indent = indent 62 | 63 | @staticmethod 64 | def _serializer(o): 65 | if isinstance(o, datetime.datetime): 66 | return str(o) 67 | return o 68 | 69 | def __call__(self, rec): 70 | print( 71 | json.dumps( 72 | rec, indent=self.indent, sort_keys=True, default=self._serializer 73 | ), 74 | file=self.out, 75 | ) 76 | 77 | 78 | class YAMLFormatter(Formatter): 79 | def __init__(self, out=None): 80 | self.out = out or sys.stdout 81 | self.records = [] 82 | 83 | def __exit__(self, exc_type, exc_value, traceback): 84 | yaml = ruamel.yaml.YAML(typ="safe") 85 | yaml.default_flow_style = False 86 | yaml.dump(self.records, self.out) 87 | 88 | def __call__(self, rec): 89 | self.records.append(rec) 90 | 91 | 92 | class PYOUTFormatter(pyouts.LogSafeTabular): 93 | def __init__(self, fields, **kwargs): 94 | PYOUT_STYLE = pyouts.get_style(hide_if_missing=not fields) 95 | 96 | kw = dict(style=PYOUT_STYLE) 97 | kw.update(kwargs) 98 | if fields: 99 | kw["columns"] = fields 100 | super().__init__(**kw) 101 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = lint,typing,py3 3 | 4 | [testenv] 5 | setenv = 6 | DANDI_ALLOW_LOCALHOST_URLS=1 7 | DANDI_PAGINATION_DISABLE_FALLBACK=1 8 | passenv = DANDI_*,USER,DBUS_SESSION_BUS_ADDRESS 9 | extras = 10 | extras 11 | test 12 | commands = 13 | # Using pytest-cov instead of using coverage directly leaves a bunch of 14 | # .coverage.$HOSTNAME.#.# files lying around for some reason 15 | coverage erase 16 | coverage run -m pytest -v {posargs} dandi 17 | coverage combine 18 | coverage report 19 | 20 | [testenv:lint] 21 | skip_install = true 22 | deps = 23 | codespell~=2.4.1 24 | flake8 25 | tomli; python_version < '3.11' 26 | commands = 27 | codespell dandi docs tools setup.py 28 | flake8 --config=setup.cfg {posargs} dandi setup.py 29 | 30 | [testenv:typing] 31 | deps = 32 | mypy != 1.11.0 33 | types-python-dateutil 34 | types-requests 35 | commands = 36 | mypy dandi 37 | 38 | [testenv:docs] 39 | basepython = python3 40 | deps = -rdocs/requirements.txt 41 | extras = test 42 | changedir = docs 43 | commands = sphinx-build -E -W -b html source build 44 | 45 | [pytest] 46 | addopts = --tb=short --durations=10 --timeout=300 47 | filterwarnings = 48 | error 49 | ignore:No cached namespaces found .*:UserWarning 50 | ignore:ignoring namespace '.*' because it already exists:UserWarning 51 | ignore::DeprecationWarning:responses 52 | ignore::DeprecationWarning:requests_toolbelt 53 | # 54 | # 55 | ignore:.* size changed, may indicate binary incompatibility.*:RuntimeWarning 56 | # 57 | ignore:\s*safe_load will be removed.*:PendingDeprecationWarning:hdmf 58 | ignore:\s*load will be removed.*:PendingDeprecationWarning:ruamel.yaml 59 | ignore:Passing None into shape arguments.*:DeprecationWarning:h5py 60 | ignore:the imp module is deprecated:DeprecationWarning 61 | ignore:`Unit` has been deprecated:DeprecationWarning:humanize 62 | ignore:The distutils package is deprecated:DeprecationWarning:joblib 63 | ignore:datetime.datetime.utcfromtimestamp\(\) is deprecated.*:DeprecationWarning:dateutil 64 | ignore:\s*Pyarrow will become a required dependency of pandas:DeprecationWarning 65 | ignore:.*Value with data type .* is being converted:hdmf.build.warnings.DtypeConversionWarning 66 | ignore:.*find_spec\(\) not found:ImportWarning 67 | ignore:'cgi' is deprecated:DeprecationWarning:botocore 68 | ignore:.*unclosed.*:ResourceWarning:vcr 69 | # addressed in joblib 0.8.2-826-g05caf07 70 | ignore:(ast.Num|Attribute n) is deprecated.*:DeprecationWarning:joblib 71 | 72 | [coverage:run] 73 | parallel = True 74 | source = dandi 75 | 76 | [coverage:report] 77 | precision = 2 78 | exclude_lines = 79 | pragma: no cover 80 | if TYPE_CHECKING: 81 | \.\.\. 82 | -------------------------------------------------------------------------------- /dandi/tests/data/metadata/metadata2asset_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "dandiasset:0b0a1a0b-e3ea-4cf6-be94-e02c830d54be", 3 | "schemaKey": "Asset", 4 | "schemaVersion": "0.4.1", 5 | "keywords": [ 6 | "test", 7 | "sample", 8 | "example", 9 | "test-case" 10 | ], 11 | "access": [ 12 | { 13 | "schemaKey": "AccessRequirements", 14 | "status": "dandi:OpenAccess" 15 | } 16 | ], 17 | "wasGeneratedBy": [ 18 | { 19 | "schemaKey": "Session", 20 | "identifier": "XYZ789", 21 | "name": "XYZ789", 22 | "description": "Some test data", 23 | "startDate": "2020-08-31T15:58:28-04:00", 24 | "used": [ 25 | { 26 | "schemaKey": "Equipment", 27 | "identifier": "probe:probe04", 28 | "name": "Ecephys Probe" 29 | } 30 | ] 31 | } 32 | ], 33 | "contentSize": 69105, 34 | "encodingFormat": "application/x-nwb", 35 | "digest": { 36 | "dandi:dandi-etag": "e455839e5ab2fa659861f58a423fd17f-1" 37 | }, 38 | "path": "/test/path", 39 | "wasDerivedFrom": [ 40 | { 41 | "schemaKey": "BioSample", 42 | "identifier": "cell01", 43 | "sampleType": { 44 | "schemaKey": "SampleType", 45 | "name": "cell" 46 | }, 47 | "wasDerivedFrom": [ 48 | { 49 | "schemaKey": "BioSample", 50 | "identifier": "slice02", 51 | "sampleType": { 52 | "schemaKey": "SampleType", 53 | "name": "slice" 54 | }, 55 | "wasDerivedFrom": [ 56 | { 57 | "schemaKey": "BioSample", 58 | "identifier": "tissue03", 59 | "sampleType": { 60 | "schemaKey": "SampleType", 61 | "name": "tissuesample" 62 | } 63 | } 64 | ] 65 | } 66 | ] 67 | } 68 | ], 69 | "wasAttributedTo": [ 70 | { 71 | "schemaKey": "Participant", 72 | "identifier": "a1b2c3", 73 | "age": { 74 | "unitText": "ISO-8601 duration", 75 | "value": "P170DT12212S", 76 | "schemaKey": "PropertyValue", 77 | "valueReference": {"schemaKey": "PropertyValue", "value": "dandi:BirthReference"} 78 | }, 79 | "sex": { 80 | "schemaKey": "SexType", 81 | "identifier": "http://purl.obolibrary.org/obo/PATO_0000384", 82 | "name": "Male" 83 | }, 84 | "strain": { 85 | "name": "abcdef/1", 86 | "schemaKey": "StrainType" 87 | }, 88 | "genotype": "Typical", 89 | "species": { 90 | "schemaKey": "SpeciesType", 91 | "identifier": "http://purl.obolibrary.org/obo/NCBITaxon_1234175", 92 | "name": "Cyperus bulbosus" 93 | } 94 | } 95 | ], 96 | "relatedResource": [ 97 | { 98 | "schemaKey": "Resource", 99 | "identifier": "https://doi.org/10.48324/dandi.000027/0.210831.2033", 100 | "relation": "dcite:IsDescribedBy" 101 | } 102 | ] 103 | } 104 | -------------------------------------------------------------------------------- /dandi/cli/tests/test_move.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Any 4 | 5 | import click 6 | from click.testing import CliRunner 7 | import pytest 8 | from pytest_mock import MockerFixture 9 | 10 | from ..cmd_move import move 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "cmdline,srcs,kwargs", 15 | [ 16 | ( 17 | ["src.txt", "dest/"], 18 | ["src.txt"], 19 | { 20 | "dest": "dest/", 21 | "dandiset": None, 22 | "dry_run": False, 23 | "existing": "error", 24 | "jobs": None, 25 | "regex": False, 26 | "work_on": "auto", 27 | "dandi_instance": "dandi", 28 | "devel_debug": False, 29 | }, 30 | ), 31 | ( 32 | ["src.txt", "other.txt", "dest/"], 33 | ["src.txt", "other.txt"], 34 | { 35 | "dest": "dest/", 36 | "dandiset": None, 37 | "dry_run": False, 38 | "existing": "error", 39 | "jobs": None, 40 | "regex": False, 41 | "work_on": "auto", 42 | "dandi_instance": "dandi", 43 | "devel_debug": False, 44 | }, 45 | ), 46 | ( 47 | [ 48 | "-d", 49 | "DANDI:000027", 50 | "--existing=skip", 51 | "--dry-run", 52 | "--jobs", 53 | "5", 54 | "--regex", 55 | "--work-on=remote", 56 | "--dandi-instance", 57 | "dandi-sandbox", 58 | "src.txt", 59 | "dest/", 60 | ], 61 | ["src.txt"], 62 | { 63 | "dest": "dest/", 64 | "dandiset": "DANDI:000027", 65 | "dry_run": True, 66 | "existing": "skip", 67 | "jobs": 5, 68 | "regex": True, 69 | "work_on": "remote", 70 | "dandi_instance": "dandi-sandbox", 71 | "devel_debug": False, 72 | }, 73 | ), 74 | ], 75 | ) 76 | def test_move_command( 77 | mocker: MockerFixture, cmdline: list[str], srcs: list[str], kwargs: dict[str, Any] 78 | ) -> None: 79 | mock_move = mocker.patch("dandi.move.move") 80 | r = CliRunner().invoke(move, cmdline) 81 | assert r.exit_code == 0 82 | mock_move.assert_called_once_with(*srcs, **kwargs) 83 | 84 | 85 | def test_move_command_too_few_paths(mocker: MockerFixture) -> None: 86 | mock_move = mocker.patch("dandi.move.move") 87 | r = CliRunner().invoke(move, ["foo"], standalone_mode=False) 88 | assert r.exit_code != 0 89 | # This is a ClickException when map_to_click_exceptions is in effect and a 90 | # ValueError when it's not (which happens when DANDI_DEVEL is set). 91 | assert isinstance(r.exception, (click.ClickException, ValueError)) 92 | assert str(r.exception) == "At least two paths are required" 93 | mock_move.assert_not_called() 94 | -------------------------------------------------------------------------------- /docs/source/modref/dandiapi.rst: -------------------------------------------------------------------------------- 1 | .. module:: dandi.dandiapi 2 | 3 | ``dandi.dandiapi`` 4 | ================== 5 | 6 | This module provides functionality for interacting with a DANDI instance 7 | via the REST API. Interaction begins with the creation of a `DandiAPIClient` 8 | instance, which can be used to retrieve `RemoteDandiset` objects (representing 9 | Dandisets on the server) and `BaseRemoteAsset` objects (representing assets 10 | without any data associating them with their Dandisets). `RemoteDandiset` 11 | objects can, in turn, be used to retrieve `RemoteAsset` objects (representing 12 | assets associated with Dandisets). Aside from `DandiAPIClient`, none of these 13 | classes should be instantiated directly by the user. 14 | 15 | All operations that merely fetch data from the server can be done without 16 | authenticating, but any operation that writes, uploads, modifies, or deletes 17 | data requires the user to authenticate the `DandiAPIClient` instance by 18 | supplying an API key either when creating the instance or by calling the 19 | `~DandiAPIClient.authenticate()` or `~DandiAPIClient.dandi_authenticate()` 20 | method. 21 | 22 | Example code for printing the metadata of all assets with "two-photon" in their 23 | ``metadata.measurementTechnique[].name`` for the latest published version of 24 | every Dandiset: 25 | 26 | .. literalinclude:: /examples/dandiapi-example.py 27 | :language: python 28 | 29 | Example code for accessing asset files as regular Python file objects without 30 | downloading their entire content locally. Such file objects could then 31 | be passed to functions of pynwb etc. 32 | 33 | .. literalinclude:: /examples/dandiapi-as_readable.py 34 | :language: python 35 | 36 | You can see more usages of DANDI API to assist with data streaming at 37 | `PyNWB: Streaming NWB files `_. 38 | 39 | Client 40 | ------ 41 | 42 | .. autoclass:: RESTFullAPIClient 43 | 44 | .. autoclass:: DandiAPIClient 45 | :show-inheritance: 46 | 47 | Dandisets 48 | --------- 49 | 50 | .. autoclass:: RemoteDandiset() 51 | 52 | .. autoclass:: Version() 53 | :inherited-members: BaseModel 54 | :exclude-members: Config, JSON_EXCLUDE 55 | 56 | .. autoclass:: VersionInfo() 57 | :show-inheritance: 58 | 59 | .. autoclass:: RemoteValidationError() 60 | :inherited-members: BaseModel 61 | 62 | Assets 63 | ------ 64 | 65 | .. autoclass:: BaseRemoteAsset() 66 | :inherited-members: BaseModel 67 | :exclude-members: Config, JSON_EXCLUDE 68 | 69 | .. autoclass:: BaseRemoteBlobAsset() 70 | :show-inheritance: 71 | 72 | .. autoclass:: AssetType 73 | 74 | .. autoclass:: RemoteAsset() 75 | :show-inheritance: 76 | :exclude-members: JSON_EXCLUDE 77 | 78 | .. autoclass:: RemoteBlobAsset() 79 | :show-inheritance: 80 | 81 | Zarr Assets 82 | ^^^^^^^^^^^ 83 | 84 | .. autoclass:: BaseRemoteZarrAsset() 85 | :show-inheritance: 86 | 87 | .. autoclass:: RemoteZarrAsset() 88 | :show-inheritance: 89 | 90 | .. autoclass:: RemoteZarrEntry() 91 | :show-inheritance: 92 | 93 | .. Excluded from documentation: APIBase, RemoteDandisetData, ZarrEntryServerData 94 | -------------------------------------------------------------------------------- /dandi/cli/tests/test_digest.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | import subprocess 4 | 5 | from click.testing import CliRunner 6 | import numpy as np 7 | import pytest 8 | import zarr 9 | 10 | from ..cmd_digest import digest 11 | 12 | 13 | def test_digest_default(): 14 | runner = CliRunner() 15 | with runner.isolated_filesystem(): 16 | Path("file.txt").write_bytes(b"123") 17 | r = runner.invoke(digest, ["file.txt"]) 18 | assert r.exit_code == 0 19 | assert r.output == "file.txt: d022646351048ac0ba397d12dfafa304-1\n" 20 | 21 | 22 | @pytest.mark.parametrize( 23 | "alg,filehash", 24 | [ 25 | ("md5", "202cb962ac59075b964b07152d234b70"), 26 | ("zarr-checksum", "202cb962ac59075b964b07152d234b70"), 27 | ("sha1", "40bd001563085fc35165329ea1ff5c5ecbdbbeef"), 28 | ("sha256", "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3"), 29 | ( 30 | "sha512", 31 | "3c9909afec25354d551dae21590bb26e38d53f2173b8d3dc3eee4c047e7a" 32 | "b1c1eb8b85103e3be7ba613b31bb5c9c36214dc9f14a42fd7a2fdb84856b" 33 | "ca5c44c2", 34 | ), 35 | ], 36 | ) 37 | def test_digest(alg, filehash): 38 | runner = CliRunner() 39 | with runner.isolated_filesystem(): 40 | Path("file.txt").write_bytes(b"123") 41 | r = runner.invoke(digest, ["--digest", alg, "file.txt"]) 42 | assert r.exit_code == 0 43 | assert r.output == f"file.txt: {filehash}\n" 44 | 45 | 46 | def test_digest_zarr(): 47 | # This test assumes that the Zarr serialization format never changes 48 | runner = CliRunner() 49 | with runner.isolated_filesystem(): 50 | dt = np.dtype(" None: 60 | runner = CliRunner() 61 | with runner.isolated_filesystem(): 62 | os.mkdir("empty.zarr") 63 | r = runner.invoke(digest, ["--digest", "zarr-checksum", "empty.zarr"]) 64 | assert r.exit_code == 0 65 | assert r.output == "empty.zarr: 481a2f77ab786a0f45aafd5db0971caa-0--0\n" 66 | 67 | 68 | def test_digest_zarr_with_excluded_dotfiles(): 69 | # This test assumes that the Zarr serialization format never changes 70 | runner = CliRunner() 71 | with runner.isolated_filesystem(): 72 | dt = np.dtype(" 2 | 3 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # ============================================================================== 17 | 18 | from __future__ import annotations 19 | 20 | from collections.abc import Callable, Iterable 21 | import logging 22 | import os.path 23 | from pathlib import Path 24 | import threading 25 | from typing import Any 26 | 27 | log = logging.getLogger(__name__) 28 | 29 | 30 | def threaded_walk( 31 | dirpath: str | Path, 32 | func: Callable[[Path], Any] | None = None, 33 | threads: int = 60, 34 | exclude: Callable[[Path], Any] | None = None, 35 | ) -> Iterable[Any]: 36 | if not os.path.isdir(dirpath): 37 | return 38 | lock = threading.Lock() 39 | on_input = threading.Condition(lock) 40 | on_output = threading.Condition(lock) 41 | tasks = 1 42 | paths = [Path(dirpath)] 43 | output: list = [] 44 | 45 | def worker() -> None: 46 | nonlocal tasks 47 | while True: 48 | with lock: 49 | while True: 50 | if not tasks: 51 | output.append(None) 52 | on_output.notify() 53 | return 54 | if not paths: 55 | on_input.wait() 56 | continue 57 | path = paths.pop() 58 | break 59 | try: 60 | for p in path.iterdir(): 61 | if exclude is not None and exclude(p): 62 | log.debug("Excluding %s from traversal", p) 63 | elif p.is_dir(): 64 | with lock: 65 | tasks += 1 66 | paths.append(p) 67 | on_input.notify() 68 | else: 69 | item = func(p) if func is not None else p 70 | with lock: 71 | output.append(item) 72 | on_output.notify() 73 | except Exception: 74 | log.exception("Error scanning directory %s", path) 75 | finally: 76 | with lock: 77 | tasks -= 1 78 | if not tasks: 79 | on_input.notify_all() 80 | 81 | workers = [ 82 | threading.Thread( 83 | target=worker, name=f"threaded_walk {i} {dirpath}", daemon=True 84 | ) 85 | for i in range(threads) 86 | ] 87 | for w in workers: 88 | w.start() 89 | while threads or output: 90 | with lock: 91 | while not output: 92 | on_output.wait() 93 | item = output.pop() 94 | if item: 95 | yield item 96 | else: 97 | threads -= 1 98 | -------------------------------------------------------------------------------- /dandi/pytest_plugin.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from importlib.metadata import PackageNotFoundError, requires, version 4 | 5 | from dandischema.models import DandiBaseModel 6 | from packaging.requirements import Requirement 7 | from pytest import Config, Item, Parser 8 | 9 | from .tests.fixtures import * # noqa: F401, F403 # lgtm [py/polluting-import] 10 | 11 | 12 | def pytest_addoption(parser: Parser) -> None: 13 | parser.addoption( 14 | "--dandi-api", 15 | action="store_true", 16 | default=False, 17 | help="Only run tests of the new Django DANDI API", 18 | ) 19 | parser.addoption( 20 | "--scheduled", 21 | action="store_true", 22 | default=False, 23 | help="Use configuration for a scheduled daily test run", 24 | ) 25 | 26 | 27 | def pytest_configure(config): 28 | markers = [ 29 | "integration", 30 | "obolibrary", 31 | "flaky", 32 | "ai_generated", 33 | ] 34 | for marker in markers: 35 | config.addinivalue_line("markers", marker) 36 | 37 | 38 | def pytest_report_header(config: Config) -> list[str]: 39 | """Add version information for key dependencies to the pytest header.""" 40 | try: 41 | # Extract package names from requirement strings. 42 | # Format: "package-name >= 1.0" or "package-name ; condition" 43 | # and by regex thus we skip extras (in square brackets like [test]) 44 | deps = {Requirement(dep).name for dep in (requires("dandi") or [])} 45 | except PackageNotFoundError: 46 | # Use defaults if we didn't get deps from metadata 47 | deps = {"dandischema", "h5py", "hdmf"} 48 | 49 | # Format versions for display (sorted for consistent output) 50 | versions = [] 51 | for pkg in sorted(deps): 52 | try: 53 | version_str = f"-{version(pkg)}" 54 | except PackageNotFoundError: 55 | version_str = " NOT INSTALLED" 56 | versions.append(f"{pkg}{version_str}") 57 | 58 | return [f"dependencies: {', '.join(versions)}"] if versions else [] 59 | 60 | 61 | def pytest_collection_modifyitems(items: list[Item], config: Config) -> None: 62 | # Based on 64 | if config.getoption("--dandi-api"): 65 | selected_items = [] 66 | deselected_items = [] 67 | for item in items: 68 | if "local_dandi_api" in getattr(item, "fixturenames", ()): 69 | selected_items.append(item) 70 | else: 71 | deselected_items.append(item) 72 | config.hook.pytest_deselected(items=deselected_items) 73 | items[:] = selected_items 74 | 75 | 76 | def pytest_assertrepr_compare(op, left, right): 77 | """Custom comparison representation for your classes.""" 78 | if ( 79 | isinstance(left, DandiBaseModel) 80 | and isinstance(right, DandiBaseModel) 81 | and op == "==" 82 | ): 83 | ldict, rdict = dict(left), dict(right) 84 | if ldict == rdict: 85 | return [ 86 | "dict representations of models are equal, but values aren't!", 87 | f"Left: {left!r}", 88 | f"Right: {right!r}", 89 | ] 90 | else: 91 | # Rely on pytest just "recursing" into interpreting the dict fails 92 | # TODO: could be further improved by account for ANY values etc 93 | assert ldict == rdict # for easier comprehension of diffs 94 | return None 95 | -------------------------------------------------------------------------------- /dandi/tests/test_pynwb_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Callable 4 | from datetime import datetime, timezone 5 | from pathlib import Path 6 | import re 7 | from typing import Any, NoReturn 8 | 9 | import numpy as np 10 | from pynwb import NWBHDF5IO, NWBFile, TimeSeries 11 | 12 | from ..pynwb_utils import _sanitize_nwb_version, nwb_has_external_links 13 | 14 | 15 | def test_pynwb_io(simple1_nwb: Path) -> None: 16 | # To verify that our dependencies spec is sufficient to avoid 17 | # stepping into known pynwb/hdmf issues 18 | with NWBHDF5IO(simple1_nwb, "r", load_namespaces=True) as reader: 19 | nwbfile = reader.read() 20 | assert repr(nwbfile) 21 | assert str(nwbfile) 22 | 23 | 24 | def test_sanitize_nwb_version() -> None: 25 | def _nocall(*args: Any) -> NoReturn: 26 | raise AssertionError(f"Should have not been called. Was called with {args}") 27 | 28 | def assert_regex(regex: str) -> Callable[[str], None]: 29 | def search(v: str) -> None: 30 | assert re.search(regex, v) 31 | 32 | return search 33 | 34 | assert _sanitize_nwb_version("1.0.0", log=_nocall) == "1.0.0" 35 | assert _sanitize_nwb_version("NWB-1.0.0", log=_nocall) == "1.0.0" 36 | assert _sanitize_nwb_version("NWB-2.0.0", log=_nocall) == "2.0.0" 37 | assert ( 38 | _sanitize_nwb_version( 39 | "NWB-2.1.0", 40 | log=assert_regex("^nwb_version 'NWB-2.1.0' starts with NWB- prefix,"), 41 | ) 42 | == "2.1.0" 43 | ) 44 | assert ( 45 | _sanitize_nwb_version( 46 | "NWB-2.1.0", 47 | filename="/bu", 48 | log=assert_regex( 49 | "^File /bu: nwb_version 'NWB-2.1.0' starts with NWB- prefix," 50 | ), 51 | ) 52 | == "2.1.0" 53 | ) 54 | 55 | 56 | def test_nwb_has_external_links(tmp_path): 57 | # Create the base data 58 | start_time = datetime(2017, 4, 3, 11, tzinfo=timezone.utc) 59 | create_date = datetime(2017, 4, 15, 12, tzinfo=timezone.utc) 60 | data = np.arange(1000).reshape((100, 10)) 61 | timestamps = np.arange(100) 62 | filename1 = tmp_path / "external1_example.nwb" 63 | filename4 = tmp_path / "external_linkdataset_example.nwb" 64 | 65 | # Create the first file 66 | nwbfile1 = NWBFile( 67 | session_description="demonstrate external files", 68 | identifier="NWBE1", 69 | session_start_time=start_time, 70 | file_create_date=create_date, 71 | ) 72 | test_ts1 = TimeSeries( 73 | name="test_timeseries1", data=data, unit="SIunit", timestamps=timestamps 74 | ) 75 | nwbfile1.add_acquisition(test_ts1) 76 | # Write the first file 77 | with NWBHDF5IO(filename1, "w") as io: 78 | io.write(nwbfile1) 79 | 80 | nwbfile4 = NWBFile( 81 | session_description="demonstrate external files", 82 | identifier="NWBE4", 83 | session_start_time=start_time, 84 | file_create_date=create_date, 85 | ) 86 | 87 | # Get the first timeseries 88 | with NWBHDF5IO(filename1, "r") as io1: 89 | nwbfile1 = io1.read() 90 | timeseries_1_data = nwbfile1.get_acquisition("test_timeseries1").data 91 | 92 | # Create a new timeseries that links to our data 93 | test_ts4 = TimeSeries( 94 | name="test_timeseries4", 95 | data=timeseries_1_data, # <------- 96 | unit="SIunit", 97 | timestamps=timestamps, 98 | ) 99 | nwbfile4.add_acquisition(test_ts4) 100 | 101 | with NWBHDF5IO(filename4, "w") as io4: 102 | io4.write(nwbfile4, link_data=True) 103 | 104 | assert not nwb_has_external_links(filename1) 105 | assert nwb_has_external_links(filename4) 106 | -------------------------------------------------------------------------------- /dandi/files/_private.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Mapping 4 | from dataclasses import dataclass 5 | from enum import Enum 6 | from pathlib import Path 7 | from typing import ClassVar 8 | import weakref 9 | 10 | from dandi.consts import ( 11 | BIDS_DATASET_DESCRIPTION, 12 | VIDEO_FILE_EXTENSIONS, 13 | ZARR_EXTENSIONS, 14 | ) 15 | from dandi.exceptions import UnknownAssetError 16 | 17 | from .bases import DandiFile, GenericAsset, LocalAsset, NWBAsset, VideoAsset 18 | from .bids import ( 19 | BIDSAsset, 20 | BIDSDatasetDescriptionAsset, 21 | GenericBIDSAsset, 22 | NWBBIDSAsset, 23 | ZarrBIDSAsset, 24 | ) 25 | from .zarr import ZarrAsset 26 | 27 | 28 | class DandiFileType(Enum): 29 | """:meta private:""" 30 | 31 | NWB = 1 32 | ZARR = 2 33 | VIDEO = 3 34 | GENERIC = 4 35 | BIDS_DATASET_DESCRIPTION = 5 36 | 37 | @staticmethod 38 | def classify(path: Path) -> DandiFileType: 39 | if path.is_dir(): 40 | if path.suffix in ZARR_EXTENSIONS: 41 | if is_empty_zarr(path): 42 | raise UnknownAssetError("Empty directories cannot be Zarr assets") 43 | return DandiFileType.ZARR 44 | raise UnknownAssetError( 45 | f"Directory has unrecognized suffix {path.suffix!r}" 46 | ) 47 | elif path.name == BIDS_DATASET_DESCRIPTION: 48 | return DandiFileType.BIDS_DATASET_DESCRIPTION 49 | elif path.suffix == ".nwb": 50 | return DandiFileType.NWB 51 | elif path.suffix in VIDEO_FILE_EXTENSIONS: 52 | return DandiFileType.VIDEO 53 | else: 54 | return DandiFileType.GENERIC 55 | 56 | 57 | class DandiFileFactory: 58 | """:meta private:""" 59 | 60 | CLASSES: ClassVar[Mapping[DandiFileType, type[LocalAsset]]] = { 61 | DandiFileType.NWB: NWBAsset, 62 | DandiFileType.ZARR: ZarrAsset, 63 | DandiFileType.VIDEO: VideoAsset, 64 | DandiFileType.GENERIC: GenericAsset, 65 | DandiFileType.BIDS_DATASET_DESCRIPTION: BIDSDatasetDescriptionAsset, 66 | } 67 | 68 | def __call__( 69 | self, filepath: Path, path: str, dandiset_path: Path | None 70 | ) -> DandiFile: 71 | return self.CLASSES[DandiFileType.classify(filepath)]( 72 | filepath=filepath, path=path, dandiset_path=dandiset_path 73 | ) 74 | 75 | 76 | @dataclass 77 | class BIDSFileFactory(DandiFileFactory): 78 | """:meta private:""" 79 | 80 | bids_dataset_description: BIDSDatasetDescriptionAsset 81 | 82 | CLASSES: ClassVar[Mapping[DandiFileType, type[BIDSAsset]]] = { 83 | DandiFileType.NWB: NWBBIDSAsset, 84 | DandiFileType.ZARR: ZarrBIDSAsset, 85 | DandiFileType.VIDEO: GenericBIDSAsset, 86 | DandiFileType.GENERIC: GenericBIDSAsset, 87 | } 88 | 89 | def __call__( 90 | self, filepath: Path, path: str, dandiset_path: Path | None 91 | ) -> DandiFile: 92 | ftype = DandiFileType.classify(filepath) 93 | if ftype is DandiFileType.BIDS_DATASET_DESCRIPTION: 94 | if filepath == self.bids_dataset_description.filepath: 95 | return self.bids_dataset_description 96 | else: 97 | ftype = DandiFileType.GENERIC 98 | df = self.CLASSES[ftype]( 99 | filepath=filepath, 100 | path=path, 101 | dandiset_path=dandiset_path, 102 | bids_dataset_description_ref=weakref.ref(self.bids_dataset_description), 103 | ) 104 | self.bids_dataset_description.dataset_files.append(df) 105 | return df 106 | 107 | 108 | def is_empty_zarr(path: Path) -> bool: 109 | """:meta private:""" 110 | zf = ZarrAsset(filepath=path, path=path.name, dandiset_path=None) 111 | return not any(zf.iterfiles()) 112 | -------------------------------------------------------------------------------- /docs/source/cmdline/organize.rst: -------------------------------------------------------------------------------- 1 | .. _dandi_organize: 2 | 3 | :program:`dandi organize` 4 | ========================= 5 | 6 | :: 7 | 8 | dandi [] organize [] [ ...] 9 | 10 | (Re)organize files according to their metadata. 11 | 12 | The purpose of this command is to take advantage of metadata contained in 13 | :file:`*.nwb` files to provide datasets with consistently-named files whose 14 | names reflect the data they contain. 15 | 16 | :file:`*.nwb` files are organized into a hierarchy of subfolders, one per 17 | "subject", e.g. :file:`sub-0001` if an :file:`*.nwb` file contained a Subject 18 | group with ``subject_id=0001``. Each file in a subject-specific subfolder 19 | follows the pattern:: 20 | 21 | sub-[_key-][_mod1+mod2+...].nwb 22 | 23 | where the following keys are considered if present in the data: 24 | 25 | - ``ses`` — ``session_id`` 26 | - ``tis`` — ``tissue_sample_id`` 27 | - ``slice`` — ``slice_id`` 28 | - ``cell`` — ``cell_id`` 29 | 30 | and ``modX`` are "modalities" as identified based on detected neural data types 31 | (such as "ecephys", "icephys") per extensions found in `nwb-schema definitions 32 | `_. 33 | 34 | In addition, an "obj" key with a value corresponding to the crc32 checksum of 35 | "object_id" is added if the aforementioned keys and the list of modalities are 36 | not sufficient to disambiguate different files. 37 | 38 | You can visit https://dandiarchive.org for a growing collection of 39 | (re)organized dandisets. 40 | 41 | Options 42 | ------- 43 | 44 | .. option:: -d, --dandiset-path 45 | 46 | The root directory of the Dandiset to organize files under. If not 47 | specified, the Dandiset under the current directory is assumed. For 48 | 'simulate' mode, the target Dandiset/directory must not exist. 49 | 50 | .. option:: -f, --files-mode [dry|simulate|copy|move|hardlink|symlink|auto] 51 | 52 | How to relocate the files. 53 | 54 | - ``auto`` [default] — The first of ``symlink``, ``hardlink``, and ``copy`` 55 | that is supported by the local filesystem 56 | 57 | - ``dry`` — No action is performed, suggested renames are printed 58 | 59 | - ``simulate`` — A hierarchy of empty files at :option:`--dandiset-path` is 60 | created. Note that the previous layout should be removed prior to this 61 | operation. 62 | 63 | .. option:: --invalid [fail|warn] 64 | 65 | What to do if files without sufficient metadata are encountered [default: 66 | ``fail``] 67 | 68 | .. option:: --media-files-mode [copy|move|symlink|hardlink] 69 | 70 | How to relocate video files referenced by NWB files [default: ``symlink``] 71 | 72 | .. option:: --required-field 73 | 74 | Force a given field to be included in the organized filename of any file 75 | for which it is nonempty. Can be specified multiple times. 76 | 77 | The valid field names are: 78 | 79 | - ``subject_id`` (already required by default) 80 | - ``session_id`` 81 | - ``tissue_sample_id`` 82 | - ``slice_id`` 83 | - ``cell_id`` 84 | - ``probe_ids`` 85 | - ``obj_id`` 86 | - ``modalities`` (already required by default) 87 | - ``extension`` (already required by default) 88 | 89 | .. option:: --update-external-file-paths 90 | 91 | Rewrite the ``external_file`` arguments of ImageSeries in NWB files. The 92 | new values will correspond to the new locations of the video files after 93 | being organized. This option requires :option:`--files-mode` to be 94 | "``copy``" or "``move``". 95 | 96 | Development Options 97 | ------------------- 98 | 99 | The following options are intended only for development & testing purposes. 100 | They are only available if the :envvar:`DANDI_DEVEL` environment variable is 101 | set to a nonempty value. 102 | 103 | .. option:: --devel-debug 104 | 105 | Do not use pyout callbacks, do not swallow exceptions, do not parallelize. 106 | -------------------------------------------------------------------------------- /dandi/cli/cmd_move.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import click 4 | 5 | from .base import devel_debug_option, instance_option, map_to_click_exceptions 6 | from ..move import MoveExisting, MoveWorkOn 7 | 8 | 9 | @click.command() 10 | @click.option( 11 | "-d", "--dandiset", metavar="URL", help="The remote Dandiset to operate on" 12 | ) 13 | @click.option( 14 | "--dry-run", is_flag=True, help="Show what would be moved but do not move anything" 15 | ) 16 | @click.option( 17 | "-e", 18 | "--existing", 19 | type=click.Choice(list(MoveExisting)), 20 | default="error", 21 | help="How to handle assets that would be moved to a destination that already exists", 22 | show_default=True, 23 | ) 24 | @click.option("-J", "--jobs", type=int, help="Number of assets to move in parallel") 25 | @click.option( 26 | "--regex", 27 | is_flag=True, 28 | help="Perform a regex substitution on all asset paths in the directory", 29 | ) 30 | @click.option( 31 | "-w", 32 | "--work-on", 33 | type=click.Choice(list(MoveWorkOn)), 34 | default="auto", 35 | help=( 36 | "Whether to operate on the local Dandiset, remote Dandiset, or both;" 37 | " 'auto' means 'remote' when `--dandiset` is given and 'both' otherwise" 38 | ), 39 | show_default=True, 40 | ) 41 | @click.argument( 42 | "paths", nargs=-1, required=True, type=click.Path(exists=False, dir_okay=True) 43 | ) 44 | @instance_option() 45 | @devel_debug_option() 46 | @map_to_click_exceptions 47 | def move( 48 | paths: tuple[str, ...], 49 | dandiset: str | None, 50 | dry_run: bool, 51 | existing: MoveExisting, 52 | jobs: int | None, 53 | regex: bool, 54 | work_on: MoveWorkOn, 55 | dandi_instance: str, 56 | devel_debug: bool = False, 57 | ) -> None: 58 | """ 59 | Move or rename assets in a local Dandiset and/or on the server. The `dandi 60 | move` command takes one of more source paths of the assets to move, 61 | followed by a destination path indicating where to move them to. Paths 62 | given on the command line must use forward slashes (/) as path separators, 63 | even on Windows. In addition, when running the command inside a 64 | subdirectory of a Dandiset, all paths must be relative to the subdirectory, 65 | even if only operating on the remote Dandiset. (The exception is when the 66 | `--dandiset` option is given in order to operate on an arbitrary remote 67 | Dandiset, in which case any local Dandiset is ignored.) 68 | 69 | If there is more than one source path, or if the destination path either 70 | names an existing directory or ends in a trailing forward slash (/), then 71 | the source assets are placed within the destination directory. Otherwise, 72 | the single source path is renamed to the given destination path. 73 | 74 | Alternatively, if the `--regex` option is given, then there must be exactly 75 | two arguments on the command line: a Python regular expression and a 76 | replacement string, possibly containing regex backreferences. 77 | :program:`dandi move`: will then apply the regular expression to the path 78 | of every asset in the current directory recursively (using paths relative 79 | to the current directory, if in a subdirectory of a Dandiset); if a path 80 | matches, the matching portion is replaced with the replacement string, 81 | after expanding any backreferences. 82 | 83 | For more information, including examples, see 84 | . 85 | """ 86 | 87 | from .. import move as move_mod 88 | 89 | if len(paths) < 2: 90 | raise ValueError("At least two paths are required") 91 | move_mod.move( 92 | *paths[:-1], 93 | dest=paths[-1], 94 | regex=regex, 95 | existing=existing, 96 | dandi_instance=dandi_instance, 97 | dandiset=dandiset, 98 | work_on=work_on, 99 | devel_debug=devel_debug, 100 | jobs=jobs, 101 | dry_run=dry_run, 102 | ) 103 | -------------------------------------------------------------------------------- /dandi/cli/cmd_upload.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import click 4 | 5 | from .base import ( 6 | IntColonInt, 7 | devel_debug_option, 8 | devel_option, 9 | instance_option, 10 | map_to_click_exceptions, 11 | ) 12 | from ..upload import UploadExisting, UploadValidation 13 | 14 | 15 | @click.command() 16 | @click.option( 17 | "-e", 18 | "--existing", 19 | type=click.Choice(list(UploadExisting)), 20 | help="What to do if a file found existing on the server. 'skip' would skip" 21 | "the file, 'force' - force reupload, 'overwrite' - force upload if " 22 | "either size or modification time differs; 'refresh' - upload only if " 23 | "local modification time is ahead of the remote.", 24 | default="refresh", 25 | show_default=True, 26 | ) 27 | @click.option( 28 | "-J", 29 | "--jobs", 30 | "jobs_pair", 31 | type=IntColonInt(), 32 | help=( 33 | "Number of assets to upload in parallel and, optionally, number of" 34 | " upload threads per asset [default: 5:5]" 35 | ), 36 | ) 37 | @click.option( 38 | "--sync", is_flag=True, help="Delete assets on the server that do not exist locally" 39 | ) 40 | @click.option( 41 | "--validation", 42 | help="Controls validation requirements before upload. (Setting this option to a " 43 | "value other than 'require' is highly discouraged.) " 44 | "'require' - data must pass validation before upload; " 45 | "'skip' - no validation is performed on data before upload; " 46 | "'ignore' - data is validated but upload proceeds regardless of validation results.", 47 | type=click.Choice(list(UploadValidation)), 48 | default="require", 49 | show_default=True, 50 | ) 51 | @click.argument("paths", nargs=-1) # , type=click.Path(exists=True, dir_okay=False)) 52 | # & 53 | # Development options: Set DANDI_DEVEL for them to become available 54 | # 55 | # TODO: should always go to dandi for now 56 | @instance_option() 57 | @devel_option( 58 | "--allow-any-path", 59 | help="For development: allow DANDI 'unsupported' file types/paths", 60 | default=False, 61 | is_flag=True, 62 | ) 63 | @devel_option( 64 | "--upload-dandiset-metadata", 65 | help="For development: do upload dandiset metadata", 66 | default=False, 67 | is_flag=True, 68 | ) 69 | @devel_debug_option() 70 | @map_to_click_exceptions 71 | def upload( 72 | paths: tuple[str, ...], 73 | jobs_pair: tuple[int, int] | None, 74 | sync: bool, 75 | dandi_instance: str, 76 | existing: UploadExisting, 77 | validation: UploadValidation, 78 | # Development options should come as kwargs 79 | allow_any_path: bool = False, 80 | upload_dandiset_metadata: bool = False, 81 | devel_debug: bool = False, 82 | ) -> None: 83 | """ 84 | Upload Dandiset files to DANDI Archive. 85 | 86 | The target Dandiset to upload to must already be registered in the archive, 87 | and a `dandiset.yaml` file must exist in the common ancestor of the given 88 | paths (or the current directory, if no paths are specified) or a parent 89 | directory thereof. 90 | 91 | Local Dandiset should pass validation. For that, the assets should first 92 | be organized using the `dandi organize` command. 93 | 94 | By default all .nwb, .zarr, and .ngff assets in the Dandiset (ignoring 95 | directories starting with a period) will be considered for the upload. You 96 | can point to specific files you would like to validate and have uploaded. 97 | """ 98 | # Avoid heavy imports by importing with function: 99 | from ..upload import upload 100 | 101 | if jobs_pair is None: 102 | jobs = None 103 | jobs_per_file = None 104 | else: 105 | jobs, jobs_per_file = jobs_pair 106 | 107 | upload( 108 | paths, 109 | existing=existing, 110 | validation=validation, 111 | dandi_instance=dandi_instance, 112 | allow_any_path=allow_any_path, 113 | upload_dandiset_metadata=upload_dandiset_metadata, 114 | devel_debug=devel_debug, 115 | jobs=jobs, 116 | jobs_per_file=jobs_per_file, 117 | sync=sync, 118 | ) 119 | -------------------------------------------------------------------------------- /docs/source/ref/urls.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: dandi.dandiarchive 2 | 3 | .. _resource_ids: 4 | 5 | Resource Identifiers 6 | ==================== 7 | 8 | ``dandi`` commands and Python functions accept URLs and URL-like identifiers in 9 | the following formats for identifying Dandisets, assets, and asset collections. 10 | 11 | Text in [brackets] is optional. A ``server`` field is a base API or GUI URL 12 | for a DANDI Archive instance. If an optional ``version`` field is omitted from 13 | a URL, the given Dandiset's most recent published version will be used if it 14 | has one, and its draft version will be used otherwise. 15 | 16 | - :samp:`https://identifiers.org/DANDI:{dandiset-id}[/{version}]` 17 | (case insensitive; ``version`` cannot be "draft") when it redirects 18 | to one of the other URL formats 19 | 20 | - :samp:`{instance-name}:{dandiset-id}[/{version}]` (case insensitive, 21 | where ``instance-name`` is a known DANDI instance such as ``DANDI``, 22 | ``DANDI-SANDBOX``, ``LINC``, ``EMBER``, etc.) 23 | — Refers to a Dandiset on the specified DANDI Archive instance. 24 | `parse_dandi_url()` converts this format to a `DandisetURL`. 25 | 26 | - Any ``https://gui.dandiarchive.org/`` or 27 | ``https://*dandiarchive-org.netlify.app/`` URL which redirects to 28 | one of the other URL formats 29 | 30 | - :samp:`https://{server}[/api]/[#/]dandiset/{dandiset-id}[/{version}][/files]` 31 | — Refers to a Dandiset. `parse_dandi_url()` converts this format to a 32 | `DandisetURL`. 33 | 34 | - :samp:`https://{server}[/api]/[#/]dandiset/{dandiset-id}[/{version}]/files?location={path}` 35 | 36 | - If the ``glob``/``--path-type glob`` option is in effect, the URL refers to 37 | a collection of assets whose paths match the glob pattern ``path``, and 38 | `parse_dandi_url()` will convert the URL to an `AssetGlobURL`. 39 | 40 | - If the ``glob``/``--path-type glob`` option is not in effect, the URL 41 | refers to an asset folder by path, and `parse_dandi_url()` will convert the 42 | URL to an `AssetFolderURL`. 43 | 44 | - :samp:`https://{server}[/api]/dandisets/{dandiset-id}[/versions[/{version}]]` 45 | — Refers to a Dandiset. `parse_dandi_url()` converts this format to a 46 | `DandisetURL`. 47 | 48 | - :samp:`https://{server}[/api]/assets/{asset-id}[/download]` — Refers to a 49 | single asset by identifier. `parse_dandi_url()` converts this format to a 50 | `BaseAssetIDURL`. 51 | 52 | - :samp:`https://{server}[/api]/dandisets/{dandiset-id}/versions/{version}/assets/{asset-id}[/download]` 53 | — Refers to a single asset by identifier. `parse_dandi_url()` converts this 54 | format to an `AssetIDURL`. 55 | 56 | - :samp:`https://{server}[/api]/dandisets/{dandiset-id}/versions/{version}/assets/?path={path}` 57 | — Refers to all assets in the given Dandiset whose paths begin with the 58 | prefix ``path``. `parse_dandi_url()` converts this format to an 59 | `AssetPathPrefixURL`. 60 | 61 | - :samp:`https://{server}[/api]/dandisets/{dandiset-id}/versions/{version}/assets/?glob={path}` 62 | — Refers to all assets in the given Dandiset whose paths match the glob 63 | pattern ``path``. `parse_dandi_url()` converts this format to an 64 | `AssetGlobURL`. 65 | 66 | - :samp:`dandi://{instance-name}/{dandiset-id}[@{version}]` (where 67 | ``instance-name`` is the name of a registered DANDI instance) — 68 | Refers to a Dandiset. `parse_dandi_url()` converts this format to a 69 | `DandisetURL`. 70 | 71 | - :samp:`dandi://{instance-name}/{dandiset-id}[@{version}]/{path}` (where 72 | ``instance-name`` is the name of a registered DANDI instance) 73 | 74 | - If the ``glob``/``--path-type glob`` option is in effect, the URL refers to 75 | a collection of assets whose paths match the glob pattern ``path``, and 76 | `parse_dandi_url()` will convert the URL to an `AssetGlobURL`. 77 | 78 | - If the ``glob``/``--path-type glob`` option is not in effect and ``path`` 79 | ends with a trailing slash, the URL refers to an asset folder by path, and 80 | `parse_dandi_url()` will convert the URL to an `AssetFolderURL`. 81 | 82 | - If the ``glob``/``--path-type glob`` option is not in effect and ``path`` 83 | does not end with a trailing slash, the URL refers to a single asset by 84 | path, and `parse_dandi_url()` will convert the URL to an `AssetItemURL`. 85 | 86 | - Any other HTTPS URL that redirects to one of the above 87 | -------------------------------------------------------------------------------- /dandi/tests/data/dandiarchive-docker/docker-compose.yml: -------------------------------------------------------------------------------- 1 | # Based on 2 | # , 3 | # , and 4 | # , 5 | # but using images uploaded to Docker Hub instead of building them locally. 6 | 7 | services: 8 | django: 9 | image: dandiarchive/dandiarchive-api 10 | command: ["./manage.py", "runserver", "--nothreading", "0.0.0.0:8000"] 11 | # Log printing via Rich is enhanced by a TTY 12 | tty: true 13 | depends_on: 14 | minio: 15 | condition: service_healthy 16 | postgres: 17 | condition: service_healthy 18 | rabbitmq: 19 | condition: service_started 20 | environment: &django_env 21 | DJANGO_SETTINGS_MODULE: dandiapi.settings.development 22 | DJANGO_DATABASE_URL: postgres://postgres:postgres@postgres:5432/django 23 | DJANGO_CELERY_BROKER_URL: amqp://rabbitmq:5672/ 24 | # The Minio URL needs to use 127.0.0.1 instead of localhost so that blob 25 | # assets' "S3 URLs" will use 127.0.0.1, and thus tests that try to open 26 | # these URLs via fsspec will not fail on systems where localhost is both 27 | # 127.0.0.1 and ::1. 28 | DJANGO_MINIO_STORAGE_URL: http://minioAccessKey:minioSecretKey@minio:9000/dandi-dandisets 29 | DJANGO_MINIO_STORAGE_MEDIA_URL: http://127.0.0.1:9000/dandi-dandisets 30 | # When in Docker, the bridge network sends requests from the host machine exclusively via a 31 | # dedicated IP address. Since there's no way to determine the real origin address, 32 | # consider any IP address (though actually this will only be the single dedicated address) to 33 | # be internal. This relies on the host to set up appropriate firewalls for Docker, to prevent 34 | # access from non-internal addresses. 35 | DJANGO_INTERNAL_IPS: 0.0.0.0/0 36 | DJANGO_DANDI_WEB_APP_URL: http://localhost:8085 37 | DJANGO_DANDI_API_URL: http://localhost:8000 38 | DJANGO_DANDI_JUPYTERHUB_URL: https://hub.dandiarchive.org 39 | DJANGO_DANDI_INSTANCE_NAME: ${DANDI_TESTS_INSTANCE_NAME:-DANDI} 40 | DJANGO_DANDI_INSTANCE_IDENTIFIER: ${DANDI_TESTS_INSTANCE_IDENTIFIER:-RRID:ABC_123456} 41 | DJANGO_DANDI_DOI_API_PREFIX: ${DANDI_TESTS_DOI_PREFIX:-10.80507} 42 | ports: 43 | - "127.0.0.1:8000:8000" 44 | 45 | celery: 46 | image: dandiarchive/dandiarchive-api 47 | command: [ 48 | "uv", 49 | "run", 50 | "celery", 51 | "--app", "dandiapi.celery", 52 | "worker", 53 | "--loglevel", "INFO", 54 | "--without-heartbeat", 55 | "-Q","celery,calculate_sha256,ingest_zarr_archive,manifest-worker", 56 | "-c","1", 57 | "-B" 58 | ] 59 | # Docker Compose does not set the TTY width, which causes Celery errors 60 | tty: false 61 | depends_on: 62 | minio: 63 | condition: service_healthy 64 | postgres: 65 | condition: service_healthy 66 | rabbitmq: 67 | condition: service_started 68 | environment: 69 | << : *django_env 70 | DJANGO_DANDI_VALIDATION_JOB_INTERVAL: "5" 71 | ulimits: 72 | # https://github.com/celery/billiard/pull/417 73 | nofile: 74 | soft: 1000 75 | hard: 3000 76 | 77 | minio: 78 | image: minio/minio:latest 79 | # When run with a TTY, minio prints credentials on startup 80 | tty: true 81 | command: ["server", "/data"] 82 | ports: 83 | - "127.0.0.1:9000:9000" 84 | environment: 85 | MINIO_ROOT_USER: minioAccessKey 86 | MINIO_ROOT_PASSWORD: minioSecretKey 87 | healthcheck: 88 | test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] 89 | interval: 7s 90 | timeout: 3s 91 | retries: 5 92 | 93 | postgres: 94 | environment: 95 | POSTGRES_DB: django 96 | POSTGRES_PASSWORD: postgres 97 | image: postgres 98 | command: postgres -c log_lock_waits=on -c log_min_duration_statement=100 99 | expose: 100 | - "5432" 101 | healthcheck: 102 | test: ["CMD", "pg_isready", "-U", "postgres"] 103 | interval: 7s 104 | timeout: 3s 105 | retries: 5 106 | 107 | rabbitmq: 108 | image: rabbitmq:management 109 | expose: 110 | - "5672" 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DANDI Client 2 | 3 | [![Tests](https://github.com/dandi/dandi-cli/workflows/Tests/badge.svg)](https://github.com/dandi/dandi-cli/actions?query=workflow%3ATests) 4 | [![codecov.io](https://codecov.io/github/dandi/dandi-cli/coverage.svg?branch=master)](https://codecov.io/github/dandi/dandi-cli?branch=master) 5 | [![Conda](https://anaconda.org/conda-forge/dandi/badges/version.svg)](https://anaconda.org/conda-forge/dandi) 6 | [![Gentoo (::science)](https://repology.org/badge/version-for-repo/gentoo_ovl_science/dandi-cli.svg?header=Gentoo%20%28%3A%3Ascience%29)](https://repology.org/project/dandi-cli/versions) 7 | [![GitHub release](https://img.shields.io/github/release/dandi/dandi-cli.svg)](https://GitHub.com/dandi/dandi-cli/releases/) 8 | [![PyPI version fury.io](https://badge.fury.io/py/dandi.svg)](https://pypi.python.org/pypi/dandi/) 9 | [![Documentation Status](https://readthedocs.org/projects/dandi/badge/?version=latest)](https://dandi.readthedocs.io/en/latest/?badge=latest) 10 | 11 | The [DANDI Python client](https://pypi.org/project/dandi/) allows you to: 12 | 13 | * Download `Dandisets` and individual subject folders or files 14 | * Validate data to locally conform to standards 15 | * Organize your data locally before upload 16 | * Upload `Dandisets` 17 | * Interact with the DANDI instance's web API from Python 18 | * Delete data in the DANDI instance 19 | * Perform other auxiliary operations with data on the DANDI instance 20 | 21 | **Note**: This project is under active development. See [the issues log](https://github.com/dandi/dandi-cli/issues) or 22 | [Work-in-Progress (WiP)](https://github.com/dandi/dandi-cli/pulls). 23 | 24 | ## Installation 25 | 26 | DANDI Client releases are [available from PyPI](https://pypi.org/project/dandi) 27 | and [conda-forge](https://anaconda.org/conda-forge/dandi). Install them in your Python (native, virtualenv, or 28 | conda) environment via 29 | 30 | pip install dandi 31 | 32 | or 33 | 34 | conda install -c conda-forge dandi 35 | 36 | 37 | ## CLI Tool 38 | 39 | This package provides a command line utility with a basic interface 40 | to help you prepare and upload your data to, or obtain data from, a DANDI instance such as the [DANDI Archive](http://dandiarchive.org). 41 | 42 | 43 | ```bash 44 | $> dandi 45 | Usage: dandi [OPTIONS] COMMAND [ARGS]... 46 | 47 | A client to support interactions with DANDI instances, such as the DANDI 48 | Archive (http://dandiarchive.org). 49 | 50 | To see help for a specific command, run 51 | 52 | dandi COMMAND --help 53 | 54 | e.g. dandi upload --help 55 | 56 | Options: 57 | --version 58 | -l, --log-level [DEBUG|INFO|WARNING|ERROR|CRITICAL] 59 | Log level (case insensitive). May be 60 | specified as an integer. [default: INFO] 61 | --pdb Fall into pdb if errors out 62 | --help Show this message and exit. 63 | 64 | Commands: 65 | delete Delete dandisets and assets from the server. 66 | digest Calculate file digests 67 | download Download files or entire folders from DANDI. 68 | instances List known DANDI instances that the CLI can interact... 69 | ls List .nwb files and dandisets metadata. 70 | move Move or rename assets in a local Dandiset and/or on... 71 | organize (Re)organize NWB files according to their metadata. 72 | service-scripts Various utility operations 73 | shell-completion Emit shell script for enabling command completion. 74 | upload Upload Dandiset files to DANDI Archive. 75 | validate Validate files for data standards compliance. 76 | validate-bids Validate BIDS paths. 77 | ``` 78 | Run `dandi --help` or `dandi --help` (e.g. `dandi upload --help`) to see manual pages. 79 | 80 | ## Resources 81 | 82 | * To learn how to interact with the DANDI Archive and for examples on how to use the DANDI Client in various use cases, 83 | see the [DANDI Docs](https://docs.dandiarchive.org) 84 | (specifically the sections on using the CLI to 85 | [download](https://docs.dandiarchive.org/12_download/) and 86 | [upload](https://docs.dandiarchive.org/13_upload/) `Dandisets`). 87 | 88 | * To get help: 89 | - ask a question: https://github.com/dandi/helpdesk/discussions 90 | - file a feature request or bug report: https://github.com/dandi/helpdesk/issues/new/choose 91 | - contact the DANDI team: help@dandiarchive.org 92 | 93 | * To understand how to contribute to the dandi-cli repository, see the [DEVELOPMENT.md](./DEVELOPMENT.md) file. 94 | -------------------------------------------------------------------------------- /dandi/cli/tests/data/update_dandiset_from_doi/nature.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "DANDI:000005/draft", 3 | "url": "http://localhost:8085/dandiset/000005/draft", 4 | "name": "Coregistration of heading to visual cues in retrosplenial cortex", 5 | "access": [ 6 | { 7 | "status": "dandi:OpenAccess", 8 | "schemaKey": "AccessRequirements" 9 | } 10 | ], 11 | "license": [ 12 | "spdx:CC0-1.0" 13 | ], 14 | "version": "draft", 15 | "@context": "https://raw.githubusercontent.com/dandi/schema/master/releases/0.6.4/context.json", 16 | "citation": "Sit, Kevin K.; Goard, Michael J. (2023) Coregistration of heading to visual cues in retrosplenial cortex (Version draft) [Data set]. DANDI Archive. http://localhost:8085/dandiset/000005/draft", 17 | "schemaKey": "Dandiset", 18 | "identifier": "DANDI:000005", 19 | "repository": "http://localhost:8085", 20 | "contributor": [ 21 | { 22 | "name": "Tests, DANDI-Cli", 23 | "email": "nemo@example.com", 24 | "roleName": [ 25 | "dcite:Author", 26 | "dcite:ContactPerson" 27 | ], 28 | "schemaKey": "Person" 29 | }, 30 | { 31 | "name": "Sit, Kevin K.", 32 | "roleName": [ 33 | "dcite:Author" 34 | ], 35 | "schemaKey": "Person", 36 | "affiliation": [], 37 | "includeInCitation": true 38 | }, 39 | { 40 | "name": "Goard, Michael J.", 41 | "roleName": [ 42 | "dcite:Author" 43 | ], 44 | "schemaKey": "Person", 45 | "identifier": "0000-0002-5366-8501", 46 | "affiliation": [], 47 | "includeInCitation": true 48 | } 49 | ], 50 | "dateCreated": "2023-04-25T16:28:31.601155Z", 51 | "description": "AbstractSpatial cognition depends on an accurate representation of orientation within an environment. Head direction cells in distributed brain regions receive a range of sensory inputs, but visual input is particularly important for aligning their responses to environmental landmarks. To investigate how population-level heading responses are aligned to visual input, we recorded from retrosplenial cortex (RSC) of head-fixed mice in a moving environment using two-photon calcium imaging. We show that RSC neurons are tuned to the animal\u2019s relative orientation in the environment, even in the absence of head movement. Next, we found that RSC receives functionally distinct projections from visual and thalamic areas and contains several functional classes of neurons. While some functional classes mirror RSC inputs, a newly discovered class coregisters visual and thalamic signals. Finally, decoding analyses reveal unique contributions to heading from each class. Our results suggest an RSC circuit for anchoring heading representations to environmental visual landmarks.", 52 | "assetsSummary": { 53 | "schemaKey": "AssetsSummary", 54 | "numberOfBytes": 0, 55 | "numberOfFiles": 0 56 | }, 57 | "schemaVersion": "0.6.4", 58 | "wasGeneratedBy": [ 59 | { 60 | "id": "urn:uuid:71209ec0-1378-4b50-a3d8-73d0a7d7315e", 61 | "name": "Metadata extraction from DOI", 62 | "endDate": "2023-04-25 12:28:32.455952-04:00", 63 | "schemaKey": "Activity", 64 | "startDate": "2023-04-25 12:28:32.087930-04:00", 65 | "description": "Metadata (contributor, name, description, relatedResource) was enhanced with data from DOI 10.1038/s41467-023-37704-5 by DANDI cli", 66 | "wasAssociatedWith": [ 67 | { 68 | "url": "https://github.com/dandi/dandi-cli", 69 | "name": "DANDI Command Line Interface", 70 | "version": "0.53.0+16.g9e9bc9c.dirty", 71 | "schemaKey": "Software", 72 | "identifier": "RRID:SCR_019009" 73 | } 74 | ] 75 | } 76 | ], 77 | "relatedResource": [ 78 | { 79 | "url": "https://doi.org/10.1038/s41467-023-37704-5", 80 | "name": "Coregistration of heading to visual cues in retrosplenial cortex", 81 | "relation": "dcite:IsDescribedBy", 82 | "schemaKey": "Resource", 83 | "identifier": "10.1038/s41467-023-37704-5" 84 | } 85 | ], 86 | "manifestLocation": [ 87 | "http://localhost:8000/api/dandisets/000005/versions/draft/assets/" 88 | ] 89 | } 90 | -------------------------------------------------------------------------------- /dandi/support/iterators.py: -------------------------------------------------------------------------------- 1 | """Various helpful iterators""" 2 | 3 | from queue import Empty, Queue 4 | from threading import Thread 5 | 6 | 7 | class IteratorWithAggregation: 8 | """ 9 | An iterable over an iterable which also makes an aggregate of the values available asap 10 | 11 | It iterates over the iterable in a separate thread. 12 | 13 | A use case is a generator which collects information about resources, 14 | which might be relatively fast but still take time. While we are iterating over it, 15 | we could perform other operations on yielded records, but we would also like to have access to 16 | the "summary" object as soon as that iterator completes but while we might still be 17 | iterating over items in the outside loop. 18 | 19 | Use case: iterate over remote resource for downloads, and get "Total" size/number as 20 | soon as it becomes known inside the underlying iterator. 21 | 22 | TODO: probably could be more elegant etc if implemented via async/coroutines. 23 | 24 | Attributes 25 | ---------- 26 | .total: 27 | Aggregated value as known to the moment. None if nothing was aggregated. 28 | It is a final value if `finished` is True. 29 | .finished: bool 30 | Set to True upon completion of iteration 31 | .exc: BaseException or None 32 | If not None -- the exception which was raised 33 | 34 | Example 35 | ------- 36 | 37 | Very simplistic example, since typically (not range) it would be taking some time to 38 | iterate for the nested iteration:: 39 | 40 | it = IteratorWithAggregation(range(3), lambda v, t=0: v+t) 41 | for v in it: 42 | print(it.total, it.finished, v) 43 | sleep(0.02) # doing smth heavy, but we would know .total as soon as it is known 44 | 45 | would produce (so 3 is known right away, again since it is just range) 46 | 47 | 3 True 0 48 | 3 True 1 49 | 3 True 2 50 | 51 | """ 52 | 53 | def __init__(self, gen, agg, reraise_immediately=False): 54 | """ 55 | 56 | Parameters 57 | ---------- 58 | gen: iterable 59 | Generator (but could be any iterable, but it would not make much sense) 60 | to yield from 61 | agg: callable 62 | A callable with two args: new_value[, total=None] which should return adjusted 63 | total. Upon first iteration, no prior `total` is provided 64 | reraise_immediately: bool, optional 65 | If True, it would stop yielding values as soon as it detects that some 66 | exception has occurred (although there might still be values in the queue to be yielded 67 | which were collected before the exception was raised) 68 | """ 69 | self.gen = gen 70 | self.agg = agg 71 | self.reraise_immediately = reraise_immediately 72 | 73 | self.total = None 74 | self.finished = None 75 | self._exc = None 76 | 77 | def __iter__(self): 78 | yield from self.feed(self.gen) 79 | 80 | def feed(self, iterator): 81 | self.finished = False 82 | self._exc = None 83 | 84 | queue = Queue() 85 | 86 | def worker(): 87 | """That is the one which interrogates gen and places total 88 | into queue_total upon completion""" 89 | total = None 90 | try: 91 | for value in iterator: 92 | queue.put(value) 93 | self.total = total = ( 94 | self.agg(value, total) if total is not None else self.agg(value) 95 | ) 96 | except BaseException as e: # lgtm [py/catch-base-exception] 97 | self._exc = e 98 | finally: 99 | self.finished = True 100 | 101 | t = Thread(target=worker) 102 | t.start() 103 | 104 | # yield from the queue (.total and .finished could be accessed meanwhile) 105 | while True: 106 | if self.reraise_immediately and self._exc is not None: 107 | break 108 | 109 | # race condition HERE between checking for self.finished and 110 | if self.finished and queue.empty(): 111 | break 112 | # in general queue should not be empty, but if it is, e.g. due to race 113 | # condition with above check 114 | try: 115 | yield queue.get(timeout=0.001) 116 | except Empty: 117 | continue 118 | t.join() 119 | if self._exc is not None: 120 | raise self._exc # lgtm [py/illegal-raise] 121 | -------------------------------------------------------------------------------- /dandi/cli/base.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | import os 3 | 4 | import click 5 | 6 | from .. import get_logger 7 | 8 | lgr = get_logger() 9 | 10 | # Aux common functionality 11 | 12 | 13 | class IntColonInt(click.ParamType): 14 | name = "int:int" 15 | 16 | def convert(self, value, param, ctx): 17 | if isinstance(value, str): 18 | v1, colon, v2 = value.partition(":") 19 | try: 20 | v1 = int(v1) 21 | v2 = int(v2) if colon else None 22 | except ValueError: 23 | self.fail("Value must be of the form `N[:M]`", param, ctx) 24 | return (v1, v2) 25 | else: 26 | return value 27 | 28 | def get_metavar(self, param, ctx=None): 29 | return "N[:M]" 30 | 31 | 32 | class ChoiceList(click.ParamType): 33 | name = "choice-list" 34 | 35 | def __init__(self, values): 36 | self.values = set(values) 37 | 38 | def convert(self, value, param, ctx): 39 | if value is None or isinstance(value, set): 40 | return value 41 | selected = set() 42 | for v in value.split(","): 43 | if v == "all": 44 | selected = self.values.copy() 45 | elif v in self.values: 46 | selected.add(v) 47 | else: 48 | must_be = ", ".join(sorted(self.values)) + ", all" 49 | self.fail( 50 | f"{v!r}: invalid value; must be one of: {must_be}", param, ctx 51 | ) 52 | return selected 53 | 54 | def get_metavar(self, param, ctx=None): 55 | return "[" + ",".join(self.values) + ",all]" 56 | 57 | 58 | # ???: could make them always available but hidden 59 | # via hidden=True. 60 | def devel_option(*args, **kwargs): 61 | """A helper to make command line options useful for development (only) 62 | 63 | They will become available...""" 64 | 65 | def wrapper(f): 66 | if not os.environ.get("DANDI_DEVEL", None): 67 | return f 68 | else: 69 | return click.option(*args, **kwargs)(f) 70 | 71 | return wrapper 72 | 73 | 74 | # 75 | # Common options to reuse 76 | # 77 | # Functions to provide customizations where needed 78 | def _updated_option(*args, **kwargs): 79 | args, d = args[:-1], args[-1] 80 | kwargs.update(d) 81 | return click.option(*args, **kwargs) 82 | 83 | 84 | def dandiset_path_option(**kwargs): 85 | return _updated_option( 86 | "-d", 87 | "--dandiset-path", 88 | kwargs, 89 | help="Top directory (local) of the dandiset.", 90 | type=click.Path(exists=True, dir_okay=True, file_okay=False), 91 | ) 92 | 93 | 94 | def instance_option(**kwargs): 95 | params = { 96 | "help": "DANDI instance to use", 97 | "default": "dandi", 98 | "show_default": True, 99 | "envvar": "DANDI_INSTANCE", 100 | "show_envvar": True, 101 | } 102 | params.update(kwargs) 103 | return click.option("-i", "--dandi-instance", **params) 104 | 105 | 106 | def devel_debug_option(): 107 | return devel_option( 108 | "--devel-debug", 109 | help="For development: do not use pyout callbacks, do not swallow" 110 | " exception, do not parallelize", 111 | default=False, 112 | is_flag=True, 113 | ) 114 | 115 | 116 | def map_to_click_exceptions(f): 117 | """Catch all exceptions and re-raise as click exceptions. 118 | 119 | Will be active only if DANDI_DEVEL is not set and --pdb is not given 120 | """ 121 | 122 | @click.pass_obj 123 | @wraps(f) 124 | def wrapper(obj, *args, **kwargs): 125 | try: 126 | return f(*args, **kwargs) 127 | # Prints global Usage: useless in majority of cases. 128 | # It seems we better use it with some ctx, so it would hint in some 129 | # cases to the help of a specific command 130 | # except ValueError as e: 131 | # raise click.UsageError(str(e)) 132 | except Exception as e: 133 | e_str = str(e) 134 | lgr.debug("Caught exception %s", e_str, exc_info=True) 135 | if not map_to_click_exceptions._do_map: 136 | raise 137 | raise click.ClickException(e_str) 138 | finally: 139 | if obj is not None: 140 | # obj is None when invoking a subcommand directly (as is done 141 | # during testing) instead of via the `main` command. 142 | lgr.info("Logs saved in %s", obj.logfile) 143 | 144 | return wrapper 145 | 146 | 147 | map_to_click_exceptions._do_map = not bool( # type: ignore[attr-defined] 148 | os.environ.get("DANDI_DEVEL", None) 149 | ) 150 | -------------------------------------------------------------------------------- /dandi/cli/cmd_organize.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import click 4 | 5 | from .base import dandiset_path_option, devel_debug_option, map_to_click_exceptions 6 | from ..consts import dandi_layout_fields 7 | from ..organize import CopyMode, FileOperationMode, OrganizeInvalid 8 | 9 | 10 | @click.command() 11 | @dandiset_path_option( 12 | help="The root directory of the Dandiset to organize files under. " 13 | "If not specified, the Dandiset under the current directory is assumed. " 14 | "For 'simulate' mode, the target Dandiset/directory must not exist.", 15 | type=click.Path(dir_okay=True, file_okay=False), 16 | ) 17 | @click.option( 18 | "--invalid", 19 | help="What to do if files without sufficient metadata are encountered.", 20 | type=click.Choice(list(OrganizeInvalid)), 21 | default="fail", 22 | show_default=True, 23 | ) 24 | @click.option( 25 | "-f", 26 | "--files-mode", 27 | help="If 'dry' - no action is performed, suggested renames are printed. " 28 | "If 'simulate' - hierarchy of empty files at --dandiset-path is created. " 29 | "Note that previous layout should be removed prior to this operation. " 30 | "If 'auto' - whichever of symlink, hardlink, copy is allowed by system. " 31 | "The other modes (copy, move, symlink, hardlink) define how data files " 32 | "should be made available.", 33 | type=click.Choice(list(FileOperationMode)), 34 | default="auto", 35 | show_default=True, 36 | ) 37 | @click.option( 38 | "--update-external-file-paths", 39 | is_flag=True, 40 | default=False, 41 | help="Rewrite the 'external_file' arguments of ImageSeries in NWB files. " 42 | "The new values will correspond to the new locations of the video files " 43 | "after being organized. " 44 | "This option requires --files-mode to be 'copy' or 'move'", 45 | ) 46 | @click.option( 47 | "--media-files-mode", 48 | type=click.Choice(list(CopyMode)), 49 | default=None, 50 | help="How to relocate video files referenced by NWB files", 51 | ) 52 | @click.option( 53 | "--required-field", 54 | "required_fields", 55 | type=click.Choice(list(dandi_layout_fields)), 56 | multiple=True, 57 | help=( 58 | "Force a given field to be included in the organized filename of any" 59 | " file for which it is nonempty. Can be specified multiple times." 60 | ), 61 | ) 62 | @click.argument("paths", nargs=-1, type=click.Path(exists=True)) 63 | @click.option("-J", "--jobs", type=int, help="Number of jobs during organization") 64 | @devel_debug_option() 65 | @map_to_click_exceptions 66 | def organize( 67 | paths: tuple[str, ...], 68 | required_fields: tuple[str, ...], 69 | dandiset_path: str | None, 70 | invalid: OrganizeInvalid, 71 | files_mode: FileOperationMode, 72 | media_files_mode: CopyMode | None, 73 | update_external_file_paths: bool, 74 | jobs: int | None, 75 | devel_debug: bool = False, 76 | ) -> None: 77 | """(Re)organize NWB files according to their metadata. 78 | 79 | The purpose of this command is to take advantage of metadata contained in 80 | .nwb files to provide datasets with consistently-named files whose names 81 | reflect the data they contain. 82 | 83 | .nwb files are organized into a hierarchy of subfolders, one per "subject", 84 | e.g., `sub-0001` if an .nwb file contained a Subject group with 85 | `subject_id=0001`. Each file in a subject-specific subfolder follows the 86 | pattern: 87 | 88 | sub-[_key-][_mod1+mod2+...].nwb 89 | 90 | where the following keys are considered if present in the data: 91 | 92 | \b 93 | ses -- session_id 94 | tis -- tissue_sample_id 95 | slice -- slice_id 96 | cell -- cell_id 97 | 98 | and `modX` are "modalities" as identified based on detected neural data 99 | types (such as "ecephys", "icephys") per extensions found in nwb-schema 100 | definitions: 101 | https://github.com/NeurodataWithoutBorders/nwb-schema/tree/dev/core 102 | 103 | In addition, an "obj" key with a value corresponding to the crc32 checksum 104 | of "object_id" is added if the aforementioned keys and the list of 105 | modalities are not sufficient to disambiguate different files. 106 | 107 | You can visit https://dandiarchive.org for a growing collection of 108 | (re)organized dandisets. 109 | """ 110 | from ..organize import organize 111 | 112 | organize( 113 | paths, 114 | dandiset_path=dandiset_path, 115 | invalid=invalid, 116 | files_mode=files_mode, 117 | devel_debug=devel_debug, 118 | update_external_file_paths=update_external_file_paths, 119 | media_files_mode=media_files_mode, 120 | required_fields=required_fields, 121 | jobs=jobs, 122 | ) 123 | -------------------------------------------------------------------------------- /dandi/cli/tests/data/update_dandiset_from_doi/jneurosci.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "DANDI:000002/draft", 3 | "url": "http://localhost:8085/dandiset/000002/draft", 4 | "name": "Validation of Decision-Making Models and Analysis of Decision Variables in the Rat Basal Ganglia", 5 | "access": [ 6 | { 7 | "status": "dandi:OpenAccess", 8 | "schemaKey": "AccessRequirements" 9 | } 10 | ], 11 | "license": [ 12 | "spdx:CC0-1.0" 13 | ], 14 | "version": "draft", 15 | "@context": "https://raw.githubusercontent.com/dandi/schema/master/releases/0.6.4/context.json", 16 | "citation": "Ito, Makoto; Doya, Kenji (2023) Validation of Decision-Making Models and Analysis of Decision Variables in the Rat Basal Ganglia (Version draft) [Data set]. DANDI Archive. http://localhost:8085/dandiset/000002/draft", 17 | "schemaKey": "Dandiset", 18 | "identifier": "DANDI:000002", 19 | "repository": "http://localhost:8085", 20 | "contributor": [ 21 | { 22 | "name": "Tests, DANDI-Cli", 23 | "email": "nemo@example.com", 24 | "roleName": [ 25 | "dcite:Author", 26 | "dcite:ContactPerson" 27 | ], 28 | "schemaKey": "Person" 29 | }, 30 | { 31 | "name": "Ito, Makoto", 32 | "roleName": [ 33 | "dcite:Author" 34 | ], 35 | "schemaKey": "Person", 36 | "affiliation": [], 37 | "includeInCitation": true 38 | }, 39 | { 40 | "name": "Doya, Kenji", 41 | "roleName": [ 42 | "dcite:Author" 43 | ], 44 | "schemaKey": "Person", 45 | "affiliation": [], 46 | "includeInCitation": true 47 | } 48 | ], 49 | "dateCreated": "2023-04-25T16:28:28.308094Z", 50 | "description": "Reinforcement learning theory plays a key role in understanding the behavioral and neural mechanisms of choice behavior in animals and humans. Especially, intermediate variables of learning models estimated from behavioral data, such as the expectation of reward for each candidate choice (action value), have been used in searches for the neural correlates of computational elements in learning and decision making. The aims of the present study are as follows: (1) to test which computational model best captures the choice learning process in animals and (2) to elucidate how action values are represented in different parts of the corticobasal ganglia circuit. We compared different behavioral learning algorithms to predict the choice sequences generated by rats during a free-choice task and analyzed associated neural activity in the nucleus accumbens (NAc) and ventral pallidum (VP). The major findings of this study were as follows: (1) modified versions of an action\u2013value learning model captured a variety of choice strategies of rats, including win-stay\u2013lose-switch and persevering behavior, and predicted rats' choice sequences better than the best multistep Markov model; and (2) information about action values and future actions was coded in both the NAc and VP, but was less dominant than information about trial types, selected actions, and reward outcome. The results of our model-based analysis suggest that the primary role of the NAc and VP is to monitor information important for updating choice behaviors. Information represented in the NAc and VP might contribute to a choice mechanism that is situated elsewhere.", 51 | "assetsSummary": { 52 | "schemaKey": "AssetsSummary", 53 | "numberOfBytes": 0, 54 | "numberOfFiles": 0 55 | }, 56 | "schemaVersion": "0.6.4", 57 | "wasGeneratedBy": [ 58 | { 59 | "id": "urn:uuid:30f8a68d-cc2a-4a51-b505-e445750a6d87", 60 | "name": "Metadata extraction from DOI", 61 | "endDate": "2023-04-25 12:28:29.203890-04:00", 62 | "schemaKey": "Activity", 63 | "startDate": "2023-04-25 12:28:28.833737-04:00", 64 | "description": "Metadata (contributor, name, description, relatedResource) was enhanced with data from DOI 10.1523/JNEUROSCI.6157-08.2009 by DANDI cli", 65 | "wasAssociatedWith": [ 66 | { 67 | "url": "https://github.com/dandi/dandi-cli", 68 | "name": "DANDI Command Line Interface", 69 | "version": "0.53.0+16.g9e9bc9c.dirty", 70 | "schemaKey": "Software", 71 | "identifier": "RRID:SCR_019009" 72 | } 73 | ] 74 | } 75 | ], 76 | "relatedResource": [ 77 | { 78 | "url": "https://doi.org/10.1523/JNEUROSCI.6157-08.2009", 79 | "name": "Validation of Decision-Making Models and Analysis of Decision Variables in the Rat Basal Ganglia", 80 | "relation": "dcite:IsDescribedBy", 81 | "schemaKey": "Resource", 82 | "identifier": "10.1523/JNEUROSCI.6157-08.2009" 83 | } 84 | ], 85 | "manifestLocation": [ 86 | "http://localhost:8000/api/dandisets/000002/versions/draft/assets/" 87 | ] 88 | } 89 | -------------------------------------------------------------------------------- /tools/update-assets-on-server: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Helper to re-extract metadata and update (mint new) assets with that metadata 3 | 4 | Composed by Satra (with only little changes by yoh). 5 | Initially based on code in dandisets' backups2datalad.py code for updating 6 | as a part of that script but it was interfering with the updates to datalad thus 7 | extracted into a separate script. 8 | """ 9 | 10 | from getpass import getpass 11 | import logging 12 | import os 13 | import sys 14 | 15 | import click 16 | from dandischema.consts import DANDI_SCHEMA_VERSION 17 | import requests 18 | 19 | from dandi.dandiapi import DandiAPIClient 20 | from dandi.metadata import get_default_metadata, nwb2asset 21 | from dandi.misctypes import Digest 22 | from dandi.support.digests import get_digest 23 | 24 | logging.basicConfig( 25 | stream=sys.stdout, 26 | level=logging.INFO, 27 | format="[%(asctime)s] %(levelname)s - %(message)s", 28 | ) 29 | 30 | ul = logging.getLogger("UL") 31 | 32 | # location on drogon 33 | 34 | blobdir = "/mnt/backup/dandi/dandiarchive-s3-backup/blobs" 35 | 36 | 37 | def get_meta(path, digest=None): 38 | try: 39 | if digest is None: 40 | digest = get_digest(path, digest="dandi-etag") 41 | localmeta = nwb2asset(path, digest=Digest.dandi_etag(digest)) 42 | except Exception as e: 43 | ul.error(f"Error {e} getting {path}") 44 | localmeta = get_default_metadata(path, digest=Digest.dandi_etag(digest)) 45 | return localmeta.json_dict() 46 | 47 | 48 | @click.command(help="""Update assetmetadata""") 49 | @click.option( 50 | "-d", 51 | "--dandiset", 52 | type=str, 53 | help="Dandiset to update", 54 | ) 55 | @click.option( 56 | "-u", "--update", default=False, is_flag=True, help="Whether to actually update" 57 | ) 58 | @click.option( 59 | "-a", 60 | "--api_key", 61 | type=str, 62 | default=os.environ.get("DANDI_API_KEY", ""), 63 | help="API key to use", 64 | ) 65 | def process_dandiset(dandiset, update, api_key): 66 | api_key = api_key or getpass("API KEY: ") 67 | 68 | dapi = DandiAPIClient() 69 | 70 | url = "https://api.dandiarchive.org/api/blobs/digest/" 71 | headers = {"Accept": "application/json", "Content-Type": "application/json"} 72 | 73 | ds = dapi.get_dandiset(dandiset, "draft") 74 | ul.info(f"processing dandiset: {dandiset}") 75 | 76 | assets = [] 77 | for ra in ds.get_assets(): 78 | asset = ra.get_raw_metadata() 79 | asset.update(**ra.json_dict()) 80 | payload = { 81 | "algorithm": "dandi:dandi-etag", 82 | "value": asset["digest"]["dandi:dandi-etag"], 83 | } 84 | response = requests.request("POST", url, json=payload, headers=headers) 85 | blob_info = response.json() 86 | assert asset["contentSize"] == blob_info["size"] 87 | asset.update(**blob_info) 88 | assets.append(asset) 89 | ul.info(f"Total assets: {len(assets)}") 90 | 91 | missing = [] 92 | couldnotupdate = [] 93 | 94 | for asset in assets: 95 | blob_id = asset["blob_id"] 96 | localpath = f"{blobdir}/{blob_id[:3]}/{blob_id[3:6]}/{blob_id}" 97 | if not os.path.exists(localpath): 98 | missing.append(asset) 99 | if missing: 100 | ul.error(f"could not find these blobs locally: {missing}") 101 | ul.error(len(missing)) 102 | else: 103 | for asset in assets: 104 | blob_id = asset["blob_id"] 105 | localpath = f"{blobdir}/{blob_id[:3]}/{blob_id[3:6]}/{blob_id}" 106 | if DANDI_SCHEMA_VERSION != asset.get("schemaVersion", ""): 107 | ul.info("Getting metadata") 108 | localmeta = get_meta(localpath, asset["digest"]["dandi:dandi-etag"]) 109 | ul.info("Finished getting metadata") 110 | localmeta["path"] = asset["path"] 111 | localmeta["blobDateModified"] = asset["blobDateModified"] 112 | url = ( 113 | f"https://api.dandiarchive.org/api/dandisets/{dandiset}/" 114 | f"versions/draft/assets/{asset['asset_id']}/" 115 | ) 116 | payload = {"metadata": localmeta, "blob_id": asset["blob_id"]} 117 | if update: 118 | ul.info(f"updating: {asset['path']}") 119 | # print(url, payload) 120 | headers["Authorization"] = f"token {api_key}" 121 | response = requests.request( 122 | "PUT", url, json=payload, headers=headers 123 | ) 124 | ul.info("Finished updating") 125 | if not response.ok: 126 | ul.error(response.text, url) 127 | couldnotupdate.append(asset) 128 | ul.error(f"couldnotupdate: {couldnotupdate}") 129 | 130 | newassets = [] 131 | for ra in ds.get_assets(): 132 | asset = ra.get_raw_metadata() 133 | asset.update(**ra.json_dict()) 134 | newassets.append(asset) 135 | 136 | ul.info(f"New assets: {len(newassets)}") 137 | 138 | 139 | if __name__ == "__main__": 140 | process_dandiset() 141 | -------------------------------------------------------------------------------- /dandi/cli/tests/test_cmd_ls.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | from pathlib import Path 5 | from typing import Any 6 | from unittest.mock import ANY 7 | 8 | from click.testing import CliRunner 9 | from dandischema.consts import DANDI_SCHEMA_VERSION 10 | import pytest 11 | 12 | from dandi.tests.skip import mark 13 | 14 | from ..cmd_ls import ls 15 | from ...utils import yaml_load 16 | 17 | 18 | @pytest.mark.parametrize( 19 | "format", ("auto", "json", "json_pp", "json_lines", "yaml", "pyout") 20 | ) 21 | def test_smoke( 22 | simple1_nwb_metadata: dict[str, Any], simple1_nwb: Path, format: str 23 | ) -> None: 24 | runner = CliRunner() 25 | r = runner.invoke(ls, ["-f", format, str(simple1_nwb)]) 26 | assert r.exit_code == 0, f"Exited abnormally. out={r.stdout}" 27 | # we would need to redirect pyout for its analysis 28 | out = r.stdout 29 | 30 | if format == "json_lines": 31 | 32 | def load(s: str) -> Any: 33 | return json.loads(s) 34 | 35 | elif format.startswith("json"): 36 | 37 | def load(s: str) -> Any: 38 | obj = json.loads(s) 39 | assert len(obj) == 1 # will be a list with a single elem 40 | return obj[0] 41 | 42 | elif format == "yaml": 43 | 44 | def load(s: str) -> Any: 45 | obj = yaml_load(s, typ="base") 46 | assert len(obj) == 1 # will be a list with a single elem 47 | return obj[0] 48 | 49 | else: 50 | return 51 | 52 | metadata = load(out) 53 | assert metadata 54 | # check a few fields 55 | assert metadata.pop("nwb_version").startswith("2.") 56 | for f in ["session_id", "experiment_description"]: 57 | assert metadata[f] == simple1_nwb_metadata[f] 58 | 59 | 60 | def test_ls_nwb_file(simple2_nwb: Path) -> None: 61 | bids_file_path = simple2_nwb / "simple2.nwb" 62 | r = CliRunner().invoke(ls, ["-f", "yaml", str(bids_file_path)]) 63 | assert r.exit_code == 0, r.output 64 | data = yaml_load(r.stdout, "safe") 65 | assert len(data) == 1 66 | 67 | 68 | @mark.skipif_no_network 69 | def test_ls_bids_file(bids_examples: Path) -> None: 70 | bids_file_path = ( 71 | bids_examples / "asl003" / "sub-Sub1" / "anat" / "sub-Sub1_T1w.nii.gz" 72 | ) 73 | r = CliRunner().invoke( 74 | ls, 75 | ["-f", "yaml", str(bids_file_path)], 76 | # workaround since bst manages to log to stdout 77 | # https://github.com/bids-standard/bids-specification/pull/2085 78 | env={"BIDS_SCHEMA_LOG_LEVEL": "CRITICAL"}, 79 | ) 80 | assert r.exit_code == 0, r.output 81 | data = yaml_load(r.stdout, "safe") 82 | assert len(data) == 1 83 | assert data[0]["identifier"] == "Sub1" 84 | 85 | 86 | @mark.skipif_no_network 87 | def test_ls_zarrbids_file(bids_examples: Path) -> None: 88 | bids_file_path = ( 89 | bids_examples 90 | / "micr_SEMzarr" 91 | / "sub-01" 92 | / "ses-01" 93 | / "micr" 94 | / "sub-01_ses-01_sample-A_SPIM.ome.zarr" 95 | ) 96 | r = CliRunner().invoke(ls, ["-f", "yaml", str(bids_file_path)]) 97 | assert r.exit_code == 0, r.output 98 | data = yaml_load(r.stdout, "safe") 99 | assert len(data) == 1 100 | assert data[0]["identifier"] == "01" 101 | 102 | 103 | @mark.skipif_no_network 104 | def test_ls_dandiset_url() -> None: 105 | r = CliRunner().invoke( 106 | ls, ["-f", "yaml", "https://api.dandiarchive.org/api/dandisets/000027"] 107 | ) 108 | assert r.exit_code == 0, r.output 109 | data = yaml_load(r.stdout, "safe") 110 | assert len(data) == 1 111 | assert data[0]["path"] == "000027" 112 | 113 | 114 | @mark.skipif_no_network 115 | def test_ls_dandiset_url_recursive() -> None: 116 | r = CliRunner().invoke( 117 | ls, ["-f", "yaml", "-r", "https://api.dandiarchive.org/api/dandisets/000027"] 118 | ) 119 | assert r.exit_code == 0, r.output 120 | data = yaml_load(r.stdout, "safe") 121 | assert len(data) == 2 122 | assert data[0]["path"] == "000027" 123 | assert data[1]["path"] == "sub-RAT123/sub-RAT123.nwb" 124 | 125 | 126 | @mark.skipif_no_network 127 | def test_ls_path_url() -> None: 128 | r = CliRunner().invoke( 129 | ls, 130 | [ 131 | "-f", 132 | "yaml", 133 | ( 134 | "https://api.dandiarchive.org/api/dandisets/000027/versions/draft" 135 | "/assets/?path=sub-RAT123/" 136 | ), 137 | ], 138 | ) 139 | assert r.exit_code == 0, r.output 140 | data = yaml_load(r.stdout, "safe") 141 | assert len(data) == 1 142 | assert data[0]["path"] == "sub-RAT123/sub-RAT123.nwb" 143 | 144 | 145 | def test_smoke_local_schema(simple1_nwb: Path) -> None: 146 | runner = CliRunner() 147 | r = runner.invoke( 148 | ls, 149 | [ 150 | "-f", 151 | "json", 152 | "--schema", 153 | DANDI_SCHEMA_VERSION, 154 | str(simple1_nwb), 155 | ], 156 | ) 157 | assert r.exit_code == 0, f"Exited abnormally. out={r.stdout}" 158 | out = r.stdout 159 | metadata = json.loads(out) 160 | assert len(metadata) == 1 161 | assert metadata[0]["digest"] == {"dandi:dandi-etag": ANY} 162 | -------------------------------------------------------------------------------- /dandi/cli/tests/test_service_scripts.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from contextlib import nullcontext 4 | import json 5 | import os 6 | from pathlib import Path 7 | import re 8 | import sys 9 | 10 | import anys 11 | from click.testing import CliRunner 12 | from dandischema.consts import DANDI_SCHEMA_VERSION 13 | from dandischema.models import ID_PATTERN 14 | import pytest 15 | 16 | from dandi import __version__ 17 | from dandi.tests.fixtures import SampleDandiset 18 | 19 | from ..cmd_service_scripts import service_scripts 20 | 21 | DATA_DIR = Path(__file__).with_name("data") 22 | 23 | 24 | @pytest.mark.xfail( 25 | "nfsmount" in os.environ.get("TMPDIR", ""), 26 | reason="https://github.com/dandi/dandi-cli/issues/1507", 27 | ) 28 | def test_reextract_metadata( 29 | monkeypatch: pytest.MonkeyPatch, nwb_dandiset: SampleDandiset 30 | ) -> None: 31 | pytest.importorskip("fsspec") 32 | asset_id = nwb_dandiset.dandiset.get_asset_by_path( 33 | "sub-mouse001/sub-mouse001.nwb" 34 | ).identifier 35 | nwb_dandiset.api.monkeypatch_set_api_key_env(monkeypatch) 36 | r = CliRunner().invoke( 37 | service_scripts, 38 | ["reextract-metadata", "--when=always", nwb_dandiset.dandiset.version_api_url], 39 | ) 40 | assert r.exit_code == 0 41 | asset_id2 = nwb_dandiset.dandiset.get_asset_by_path( 42 | "sub-mouse001/sub-mouse001.nwb" 43 | ).identifier 44 | assert asset_id2 != asset_id 45 | 46 | 47 | def record_only_doi_requests(request): 48 | if request.host in ("doi.org", "api.crossref.org"): 49 | # We need to capture api.crossref.org requests as doi.org redirects 50 | # there. 51 | return request 52 | else: 53 | return None 54 | 55 | 56 | @pytest.mark.xfail( 57 | sys.version_info < (3, 10), 58 | reason="Some difference in VCR tape: https://github.com/dandi/dandi-cli/pull/1337", 59 | ) 60 | @pytest.mark.parametrize( 61 | "doi,name", 62 | [ 63 | ("10.1101/2020.01.17.909838", "biorxiv"), 64 | ("10.1523/JNEUROSCI.6157-08.2009", "jneurosci"), 65 | ("10.1016/j.neuron.2019.10.012", "neuron"), 66 | ("10.7554/eLife.48198", "elife"), 67 | ("10.1038/s41467-023-37704-5", "nature"), 68 | ], 69 | ) 70 | def test_update_dandiset_from_doi( 71 | doi: str, 72 | name: str, 73 | new_dandiset: SampleDandiset, 74 | monkeypatch: pytest.MonkeyPatch, 75 | ) -> None: 76 | dandiset_id = new_dandiset.dandiset_id 77 | repository = new_dandiset.api.instance.gui 78 | new_dandiset.api.monkeypatch_set_api_key_env(monkeypatch) 79 | if os.environ.get("DANDI_TESTS_NO_VCR", "") or sys.version_info <= (3, 10): 80 | # Older vcrpy has an issue with Python 3.9 and newer urllib2 >= 2 81 | # But we require newer urllib2 for more correct operation, and 82 | # do still support 3.9. Remove when 3.9 support is dropped 83 | ctx = nullcontext() 84 | else: 85 | import vcr 86 | 87 | ctx = vcr.use_cassette( 88 | str(DATA_DIR / "update_dandiset_from_doi" / f"{name}.vcr.yaml"), 89 | before_record_request=record_only_doi_requests, 90 | ) 91 | with ctx: 92 | r = CliRunner().invoke( 93 | service_scripts, 94 | [ 95 | "update-dandiset-from-doi", 96 | "--dandiset", 97 | dandiset_id, 98 | "--dandi-instance", 99 | new_dandiset.api.instance_id, 100 | "--existing=overwrite", 101 | "--yes", 102 | doi, 103 | ], 104 | ) 105 | assert r.exit_code == 0 106 | metadata = new_dandiset.dandiset.get_raw_metadata() 107 | with (DATA_DIR / "update_dandiset_from_doi" / f"{name}.json").open() as fp: 108 | expected = json.load(fp) 109 | expected["id"] = anys.AnyFullmatch(rf"{ID_PATTERN}:{dandiset_id}/draft") 110 | expected["url"] = f"{repository}/dandiset/{dandiset_id}/draft" 111 | expected["@context"] = ( 112 | "https://raw.githubusercontent.com/dandi/schema/master/releases" 113 | f"/{DANDI_SCHEMA_VERSION}/context.json" 114 | ) 115 | expected["identifier"] = anys.AnyFullmatch(rf"{ID_PATTERN}:{dandiset_id}") 116 | expected["repository"] = repository 117 | expected["dateCreated"] = anys.ANY_AWARE_DATETIME_STR 118 | expected["schemaVersion"] = DANDI_SCHEMA_VERSION 119 | expected["wasGeneratedBy"][0]["id"] = anys.AnyFullmatch( 120 | r"urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" 121 | ) 122 | expected["wasGeneratedBy"][0]["endDate"] = anys.ANY_AWARE_DATETIME_STR 123 | expected["wasGeneratedBy"][0]["startDate"] = anys.ANY_AWARE_DATETIME_STR 124 | expected["wasGeneratedBy"][0]["wasAssociatedWith"][0]["version"] = __version__ 125 | expected["manifestLocation"][ 126 | 0 127 | ] = f"{new_dandiset.api.api_url}/dandisets/{dandiset_id}/versions/draft/assets/" 128 | citation = re.sub( 129 | r"\S+\Z", 130 | f"{repository}/dandiset/{dandiset_id}/draft", 131 | expected["citation"], 132 | ) 133 | if m := re.search(r"\(\d{4}\)", citation): 134 | citation_rgx = ( 135 | re.escape(citation[: m.start()]) 136 | + r"\(\d{4}\)" 137 | + re.escape(citation[m.end() :]) 138 | ) 139 | expected["citation"] = anys.AnyFullmatch(citation_rgx) 140 | else: 141 | expected["citation"] = citation 142 | assert metadata == expected 143 | -------------------------------------------------------------------------------- /docs/source/cmdline/move.rst: -------------------------------------------------------------------------------- 1 | :program:`dandi move` 2 | ===================== 3 | 4 | :: 5 | 6 | dandi [] move [] ... 7 | dandi [] move --regex [] 8 | 9 | Move or rename assets in a local Dandiset and/or on the server. The 10 | :program:`dandi move` command takes one or more source paths of the assets to 11 | move, followed by a destination path indicating where to move them to. Paths 12 | given on the command line must use forward slashes (``/``) as path separators, 13 | even on Windows. In addition, when running the command inside a subdirectory 14 | of a Dandiset, all paths must be relative to the subdirectory, even if only 15 | operating on the remote Dandiset. (The exception is when the ``--dandiset`` 16 | option is given in order to operate on an arbitrary remote Dandiset, in which 17 | case any local Dandiset is ignored and paths are interpreted relative to the 18 | root of the remote Dandiset.) 19 | 20 | If there is more than one source path, or if the destination path either names 21 | an existing directory or ends in a trailing forward slash (``/``), then the 22 | source assets are placed within the destination directory. Otherwise, the 23 | single source path is renamed to the given destination path. 24 | 25 | Alternatively, if the ``--regex`` option is given, then there must be exactly 26 | two arguments on the command line: a `Python regular expression`_ and a 27 | replacement string, possibly containing regex backreferences. :program:`dandi 28 | move` will then apply the regular expression to the path of every asset in the 29 | current directory recursively (using paths relative to the current directory, 30 | if in a subdirectory of a Dandiset); if a path matches, the matching portion is 31 | replaced with the replacement string, after expanding any backreferences. 32 | 33 | .. _Python regular expression: https://docs.python.org/3/library/re.html 34 | #regular-expression-syntax 35 | 36 | 37 | Options 38 | ------- 39 | 40 | .. option:: -i, --dandi-instance 41 | 42 | DANDI instance (either a base URL or a known instance name) containing the 43 | remote Dandiset corresponding to the local Dandiset in the current 44 | directory [default: ``dandi``] 45 | 46 | .. option:: -d, --dandiset 47 | 48 | A :ref:`resource identifier ` pointing to a Dandiset on a 49 | remote instance whose assets you wish to move/rename 50 | 51 | .. option:: --dry-run 52 | 53 | Show what would be moved but do not move anything 54 | 55 | .. option:: -e, --existing [error|skip|overwrite] 56 | 57 | How to handle assets that would be moved to a destination where an asset 58 | already exists: 59 | 60 | - ``error`` [default] — raise an error 61 | - ``skip`` — skip the move 62 | - ``overwrite`` — delete the asset already at the destination 63 | 64 | .. option:: -J, --jobs 65 | 66 | Number of assets to move in parallel; the default value is determined by 67 | the number of CPU cores on your machine 68 | 69 | .. option:: --regex 70 | 71 | Treat the command-line arguments as a regular expression and a replacement 72 | string, and perform the given substitution on all asset paths in the 73 | current directory recursively. 74 | 75 | .. option:: -w, --work-on [auto|both|local|remote] 76 | 77 | Whether to operate on the local Dandiset in the current directory, a remote 78 | Dandiset (either one specified by the ``--dandiset`` option or else the one 79 | corresponding to the local Dandiset), or both at once. If ``auto`` (the 80 | default) is given, it is treated the same as ``remote`` if a ``--dandiset`` 81 | option is given and as ``both`` otherwise. 82 | 83 | 84 | Development Options 85 | ------------------- 86 | 87 | The following options are intended only for development & testing purposes. 88 | They are only available if the :envvar:`DANDI_DEVEL` environment variable is 89 | set to a nonempty value. 90 | 91 | .. option:: --devel-debug 92 | 93 | Do not use pyout callbacks, do not swallow exceptions, do not parallelize. 94 | 95 | 96 | Examples 97 | -------- 98 | 99 | - When working in a local clone of a Dandiset, a file 100 | :file:`sub-01/sub-01_blah.nii.gz` can be renamed to 101 | :file:`sub-02/sub-02_useful.nii.gz` in both the local clone and on the server 102 | with:: 103 | 104 | dandi move sub-01/sub-01_blah.nii.gz sub-02/sub-02_useful.nii.gz 105 | 106 | To rename the file only in the local or remote instance, insert ``--work-on 107 | local`` or ``--work-on remote`` after ``move``. 108 | 109 | - When not working in a local clone of a Dandiset, a file can be renamed in a 110 | remote Dandiset on a server by providing a resource identifier for the 111 | Dandiset to the ``--dandiset`` option. For example, in order to operate on 112 | Dandiset 123456 on the main ``dandi`` instance, use:: 113 | 114 | dandi move --dandiset DANDI:123456 sub-01/sub-01_blah.nii.gz sub-02/sub-02_useful.nii.gz 115 | 116 | To operate on Dandiset 123456 on ``dandi-sandbox``, you can use:: 117 | 118 | dandi move --dandiset https://sandbox.dandiarchive.org/dandiset/123456 sub-01/sub-01_blah.nii.gz sub-02/sub-02_useful.nii.gz 119 | 120 | - To move the contents of a folder :file:`rawdata/` to the top level of a 121 | Dandiset, you can use the ``--regex`` option to strip the ``rawdata/`` prefix 122 | from the beginning of all matching asset paths:: 123 | 124 | dandi move --regex "^rawdata/" "" 125 | -------------------------------------------------------------------------------- /dandi/keyring_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Callable 4 | import os.path as op 5 | from pathlib import Path 6 | from typing import TypeVar 7 | 8 | import click 9 | from keyring.backend import KeyringBackend, get_all_keyring 10 | from keyring.core import get_keyring, load_config, load_env 11 | from keyring.errors import KeyringError 12 | from keyring.util.platform_ import config_root 13 | from keyrings.alt.file import EncryptedKeyring 14 | 15 | from . import get_logger 16 | 17 | lgr = get_logger() 18 | 19 | T = TypeVar("T") 20 | 21 | 22 | def keyring_lookup( 23 | service_name: str, username: str 24 | ) -> tuple[KeyringBackend, str | None]: 25 | """ 26 | Returns an appropriate keyring backend and the password it holds (if any) 27 | for the given service and username. 28 | """ 29 | return keyring_op(lambda kb: kb.get_password(service_name, username)) 30 | 31 | 32 | def keyring_save(service_name: str, username: str, password: str) -> None: 33 | """ 34 | Save a service name, username, and password in an appropriate keyring 35 | backend. 36 | """ 37 | 38 | def save(kb: KeyringBackend) -> tuple[()]: 39 | kb.set_password(service_name, username, password) 40 | return () 41 | 42 | kb, r = keyring_op(save) 43 | if r is None: 44 | save(kb) 45 | 46 | 47 | def keyring_op(func: Callable[[KeyringBackend], T]) -> tuple[KeyringBackend, T | None]: 48 | """ 49 | Determine a keyring backend to use for storing & retrieving credentials, 50 | perform an operation on the backend, and return the backend and the results 51 | of the operation. 52 | 53 | The backend determination is made as follows: 54 | 55 | - If the user has specified a backend explicitly via the 56 | ``PYTHON_KEYRING_BACKEND`` environment variable or a ``keyringrc.cfg`` 57 | file, that backend is used without checking whether it's usable (If it's 58 | not, the user messed up). 59 | 60 | - Otherwise, perform the operation on the default backend (which is 61 | guaranteed to already have the requisite dependencies installed). If 62 | this completes without error, return the backend and the operation 63 | results. 64 | 65 | - If the query fails (e.g., because a GUI is required but the session is in 66 | a plain terminal), try using the ``EncryptedKeyring`` backend. 67 | 68 | - If the default backend *was* the ``EncryptedKeyring`` backend, error. 69 | 70 | - If the ``EncryptedKeyring`` backend is not in the list of available 71 | backends (likely because its dependencies are not installed, though 72 | that shouldn't happen if dandi was installed properly), error. 73 | 74 | - If ``EncryptedKeyring``'s data file already exists, perform the 75 | operation on the backend, and return the backend and the operation 76 | results. 77 | 78 | - If ``EncryptedKeyring``'s data file does not already exist, ask the 79 | user whether they want to start using ``EncryptedKeyring``: 80 | 81 | - If yes, then set ``keyringrc.cfg`` (if it does not already exist) to 82 | specify it as the default backend. The backend is then returned 83 | *without* performing the operation (with `None` returned in place of 84 | the operation results), in order to provide an improved UX when 85 | trying to retrieve a password from the backend (in which case the 86 | user shouldn't be asked for an encryption password to a file that 87 | doesn't yet exist and thus won't contain the password we're after). 88 | 89 | - If no, error. 90 | """ 91 | 92 | kb = load_env() or load_config() 93 | if kb: 94 | return (kb, func(kb)) 95 | kb = get_keyring() 96 | try: 97 | return (kb, func(kb)) 98 | except KeyringError as e: 99 | lgr.info("Default keyring errors on query: %s", e) 100 | if isinstance(kb, EncryptedKeyring): 101 | lgr.info( 102 | "Default keyring is EncryptedKeyring; abandoning keyring procedure" 103 | ) 104 | raise 105 | # Use `type(..) is` instead of `isinstance()` to weed out subclasses 106 | kbs = [k for k in get_all_keyring() if type(k) is EncryptedKeyring] 107 | assert ( 108 | len(kbs) == 1 109 | ), "EncryptedKeyring not available; is pycryptodomex installed?" 110 | kb = kbs[0] 111 | assert isinstance(kb, EncryptedKeyring) 112 | if op.exists(kb.file_path): 113 | lgr.info("EncryptedKeyring file exists; using as keyring backend") 114 | return (kb, func(kb)) 115 | lgr.info("EncryptedKeyring file does not exist") 116 | if click.confirm( 117 | "Would you like to establish an encrypted keyring?", default=True 118 | ): 119 | keyring_cfg = keyringrc_file() 120 | if keyring_cfg.exists(): 121 | lgr.info("%s exists; refusing to overwrite", keyring_cfg) 122 | else: 123 | lgr.info( 124 | "Configuring %s to use EncryptedKeyring as default backend", 125 | keyring_cfg, 126 | ) 127 | keyring_cfg.parent.mkdir(parents=True, exist_ok=True) 128 | keyring_cfg.write_text( 129 | "[backend]\n" 130 | "default-keyring = keyrings.alt.file.EncryptedKeyring\n" 131 | ) 132 | return (kb, None) 133 | raise 134 | 135 | 136 | def keyringrc_file() -> Path: 137 | return Path(config_root(), "keyringrc.cfg") 138 | -------------------------------------------------------------------------------- /dandi/tests/test_validate_types.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import json 4 | from typing import Any 5 | 6 | from pydantic import ValidationError 7 | import pytest 8 | 9 | from dandi.validate_types import ( 10 | Origin, 11 | OriginType, 12 | Scope, 13 | Severity, 14 | ValidationResult, 15 | Validator, 16 | ) 17 | 18 | FOO_ORIGIN = Origin( 19 | type=OriginType.INTERNAL, 20 | validator=Validator.dandi, 21 | validator_version="123", 22 | ) 23 | 24 | 25 | class TestValidationResult: 26 | @pytest.mark.parametrize( 27 | ("severity", "expected_json"), 28 | [ 29 | (Severity.INFO, "INFO"), 30 | (Severity.WARNING, "WARNING"), 31 | (Severity.ERROR, "ERROR"), 32 | (None, None), 33 | ], 34 | ) 35 | def test_severity_serialization( 36 | self, severity: Severity | None, expected_json: str 37 | ) -> None: 38 | """ 39 | Test serialization of `Severity` 40 | """ 41 | r = ValidationResult( 42 | id="foo", 43 | origin=FOO_ORIGIN, 44 | scope=Scope.FILE, 45 | severity=severity, 46 | ) 47 | 48 | # Dump into JSON serializable dict 49 | json_dump = r.model_dump(mode="json") 50 | assert json_dump["severity"] == expected_json 51 | 52 | # Dump into JSON string 53 | json_dumps = r.model_dump_json(indent=2) 54 | json_dict = json.loads(json_dumps) 55 | assert json_dict["severity"] == expected_json 56 | 57 | # Dump into a Python dict 58 | # (severity should remain the same in this serialized form) 59 | python_dump = r.model_dump(mode="python") 60 | assert python_dump["severity"] is severity 61 | 62 | @pytest.mark.parametrize("severity", list(Severity.__members__.values()) + [None]) 63 | def test_severity_round_trip(self, severity: Severity | None) -> None: 64 | """ 65 | Test round trip serializing and deserializing `Severity` with in a 66 | `ValidationResult` object 67 | """ 68 | r = ValidationResult( 69 | id="foo", 70 | origin=FOO_ORIGIN, 71 | scope=Scope.FILE, 72 | severity=severity, 73 | ) 74 | 75 | # Dump into JSON serializable dict 76 | json_dump = r.model_dump(mode="json") 77 | 78 | # Dump into JSON string 79 | json_dumps = r.model_dump_json(indent=2) 80 | 81 | # Dump into a Python dict 82 | # (severity should remain the same in this serialized form) 83 | python_dump = r.model_dump(mode="python") 84 | 85 | for dump in (json_dump, python_dump): 86 | # Reconstitute from dict 87 | r_reconstituted = ValidationResult.model_validate(dump) 88 | assert r == r_reconstituted 89 | 90 | # Reconstitute from JSON string 91 | r_reconstituted = ValidationResult.model_validate_json(json_dumps) 92 | assert r == r_reconstituted 93 | 94 | @pytest.mark.parametrize("severity", Severity.__members__.values()) 95 | def test_severity_validation_from_int(self, severity: Severity) -> None: 96 | """ 97 | Test validation of `Severity` from an integer in a `ValidationResult` object 98 | """ 99 | r = ValidationResult( 100 | id="foo", 101 | origin=FOO_ORIGIN, 102 | scope=Scope.FILE, 103 | severity=severity, 104 | ) 105 | 106 | # Dump into JSON serializable dict 107 | json_dump = r.model_dump(mode="json") 108 | json_dump["severity"] = severity.value # Modify severity into its int value 109 | 110 | # Dump into JSON string 111 | json_dumps = json.dumps(json_dump, indent=2) 112 | 113 | # Reconstitute from JSON serializable dict 114 | r_reconstituted = ValidationResult.model_validate(json_dump) 115 | assert r == r_reconstituted 116 | 117 | # Reconstitute from JSON string 118 | r_reconstituted = ValidationResult.model_validate_json(json_dumps) 119 | assert r == r_reconstituted 120 | 121 | @pytest.mark.parametrize("invalid_severity", ["foo", 42, True]) 122 | def test_invalid_severity_validation(self, invalid_severity: Any) -> None: 123 | """ 124 | Test validation of `Severity` from an invalid value in a `ValidationResult` 125 | object 126 | """ 127 | r = ValidationResult( 128 | id="foo", 129 | origin=FOO_ORIGIN, 130 | scope=Scope.FILE, 131 | severity=None, 132 | ) 133 | 134 | # Dump into JSON serializable dict 135 | json_dump = r.model_dump(mode="json") 136 | json_dump["severity"] = invalid_severity 137 | 138 | # Dump into JSON string 139 | json_dumps = json.dumps(json_dump, indent=2) 140 | 141 | # Dump into a Python dict 142 | python_dump = r.model_dump(mode="python") 143 | python_dump["severity"] = invalid_severity 144 | 145 | for dump in (json_dump, python_dump): 146 | with pytest.raises(ValidationError) as excinfo: 147 | # Reconstitute from dict 148 | ValidationResult.model_validate(dump) 149 | 150 | assert excinfo.value.error_count() == 1 151 | assert excinfo.value.errors()[0]["loc"][-1] == "severity" 152 | 153 | with pytest.raises(ValidationError) as excinfo: 154 | # Reconstitute from JSON string 155 | ValidationResult.model_validate_json(json_dumps) 156 | 157 | assert excinfo.value.error_count() == 1 158 | assert excinfo.value.errors()[0]["loc"][-1] == "severity" 159 | --------------------------------------------------------------------------------