├── .dockerignore ├── .github └── workflows │ ├── python-package.yml │ └── python-publish.yml ├── .gitignore ├── CITATION.cff ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── examples ├── fastapi │ ├── main.py │ └── requirements.txt └── read_test_metadata.py ├── notebooks ├── ExploreSchemaOrg.ipynb ├── README.md ├── ROcrate-linked-data.ipynb ├── ROcrate-validation.ipynb ├── requirements.txt └── sample_data │ ├── clinvap │ └── ro-crate-metadata.jsonld │ └── methylseq │ └── ro-crate-metadata.jsonld ├── pyproject.toml ├── requirements.txt ├── rocrate ├── __init__.py ├── _version.py ├── cli.py ├── data │ ├── ro-crate.jsonld │ ├── schema.jsonld │ └── update.sh ├── memory_buffer.py ├── metadata.py ├── model │ ├── __init__.py │ ├── computationalworkflow.py │ ├── computerlanguage.py │ ├── contextentity.py │ ├── creativework.py │ ├── data_entity.py │ ├── dataset.py │ ├── entity.py │ ├── file.py │ ├── file_or_dir.py │ ├── metadata.py │ ├── person.py │ ├── preview.py │ ├── root_dataset.py │ ├── softwareapplication.py │ ├── testdefinition.py │ ├── testinstance.py │ ├── testservice.py │ └── testsuite.py ├── rocrate.py ├── templates │ └── preview_template.html.j2 ├── utils.py └── vocabs.py ├── setup.cfg ├── setup.py ├── test ├── __init__.py ├── conftest.py ├── test-data │ ├── empty_file_crate │ │ ├── empty.txt │ │ └── folder │ │ │ └── empty_not_listed.txt │ ├── read_crate │ │ ├── a b │ │ │ └── c d.txt │ │ ├── abstract_wf.cwl │ │ ├── examples │ │ │ └── README.txt │ │ ├── ro-crate-metadata.json │ │ ├── ro-crate-metadata.jsonld │ │ ├── ro-crate-preview.html │ │ ├── test │ │ │ └── test-metadata.json │ │ ├── test_file_galaxy.txt │ │ ├── test_galaxy_wf.ga │ │ └── with space.txt │ ├── read_extra │ │ ├── listed.txt │ │ ├── listed │ │ │ ├── listed.txt │ │ │ └── not_listed.txt │ │ ├── not_listed.txt │ │ ├── not_listed │ │ │ └── not_listed.txt │ │ └── ro-crate-metadata.json │ ├── ro-crate-galaxy-sortchangecase │ │ ├── LICENSE │ │ ├── README.md │ │ ├── ro-crate-metadata.json │ │ ├── sort-and-change-case.ga │ │ └── test │ │ │ └── test1 │ │ │ ├── input.bed │ │ │ ├── output_exp.bed │ │ │ └── sort-and-change-case-test.yml │ ├── sample_cwl_wf.cwl │ ├── sample_file.txt │ ├── test_add_dir │ │ └── sample_file_subdir.txt │ ├── test_file_galaxy.txt │ ├── test_file_galaxy2.txt │ └── test_galaxy_wf.ga ├── test_cli.py ├── test_jsonld.py ├── test_metadata.py ├── test_model.py ├── test_read.py ├── test_readwrite.py ├── test_test_metadata.py ├── test_utils.py ├── test_workflow_ro_crate.py ├── test_write.py └── test_wrroc.py └── tools └── add_boilerplate.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .* 2 | Dockerfile* 3 | venv 4 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: 9 | - master 10 | - dev 11 | - windows 12 | pull_request: 13 | branches: 14 | - master 15 | - dev 16 | 17 | jobs: 18 | build: 19 | strategy: 20 | matrix: 21 | os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] 22 | python-version: ['3.9', '3.10', '3.11', '3.12'] 23 | 24 | runs-on: ${{ matrix.os }} 25 | 26 | steps: 27 | - uses: actions/checkout@v4 28 | - name: Set up Python ${{ matrix.python-version }} 29 | uses: actions/setup-python@v5 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | - name: Install 33 | run: | 34 | python -m pip install --upgrade pip 35 | pip install -e . 36 | pip install flake8 pytest 37 | - name: Lint with flake8 38 | run: | 39 | flake8 -v . 40 | - name: Test 41 | run: | 42 | pytest -v test 43 | check-ga2cwl: 44 | runs-on: ubuntu-latest 45 | steps: 46 | - uses: actions/checkout@v4 47 | - name: Set up Python 3.12 48 | uses: actions/setup-python@v5 49 | with: 50 | python-version: '3.12' 51 | - name: Install 52 | run: | 53 | python -m pip install --upgrade pip 54 | pip install -e .[ga2cwl] 55 | pip install pytest 56 | - name: Test 57 | run: | 58 | pytest -v test 59 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflows will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created, released] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine pytest 25 | pip install -r requirements.txt 26 | - name: Check tag name matches setup.py version 27 | run: | 28 | ver=`python setup.py --version` 29 | tag="refs/tags/$ver" 30 | if [[ "$tag" != $GITHUB_REF ]] 31 | then echo Tag $GITHUB_REF does not match $ver from setup.py >&2 32 | echo Please update rocrate/_version.py 33 | false 34 | fi 35 | - name: Test 36 | run: | 37 | pytest -v test 38 | - name: Build and publish 39 | if: "${{ github.event.action == 'released' }}" 40 | env: 41 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 42 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 43 | run: | 44 | python setup.py sdist bdist_wheel 45 | twine upload dist/* 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | .pytest_cache 103 | 104 | # vim 105 | *.swp 106 | 107 | # other 108 | .DS_Store 109 | /.vscode 110 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.1.0 2 | message: "Cite as" 3 | author: 4 | - family-names: Bauer 5 | given-names: Daniel 6 | orcid: https://orcid.org/0000-0001-9447-460X 7 | - family-names: Chadwick 8 | given-names: Eli 9 | orcid: https://orcid.org/0000-0002-0035-6475 10 | - family-names: De Geest 11 | given-names: Paul 12 | orcid: https://orcid.org/0000-0002-8940-4946 13 | - family-names: Droesbeke 14 | given-names: Bert 15 | orcid: https://orcid.org/0000-0003-0522-5674 16 | - family-names: Eguinoa 17 | given-names: Ignacio 18 | orcid: https://orcid.org/0000-0002-6190-122X 19 | - family-names: Gaignard 20 | given-names: Alban 21 | orcid: https://orcid.org/0000-0002-3597-8557 22 | - family-names: Hiraki 23 | given-names: Toshiyuki 24 | orcid: https://orcid.org/0000-0001-6712-6335 25 | - family-names: Hörtenhuber 26 | given-names: Matthias 27 | orcid: https://orcid.org/0000-0002-5599-5565 28 | - family-names: Huber 29 | given-names: Sebastiaan 30 | orcid: https://orcid.org/0000-0001-5845-8880 31 | - family-names: Kinoshita 32 | given-names: Bruno 33 | orcid: https://orcid.org/0000-0001-8250-4074 34 | - family-names: Leo 35 | given-names: Simone 36 | orcid: https://orcid.org/0000-0001-8271-5429 37 | - family-names: Pireddu 38 | given-names: Luca 39 | orcid: https://orcid.org/0000-0002-4663-5613 40 | - family-names: Rodríguez-Navas 41 | given-names: Laura 42 | orcid: https://orcid.org/0000-0003-4929-1219 43 | - family-names: Sirvent 44 | given-names: Raül 45 | orcid: https://orcid.org/0000-0003-0606-2512 46 | - family-names: Soiland-Reyes 47 | given-names: Stian 48 | orcid: https://orcid.org/0000-0001-9842-9718 49 | - family-names: Thomas 50 | given-names: Laurent 51 | orcid: https://orcid.org/0000-0001-7686-3249 52 | 53 | title: "ro-crate-py" 54 | version: 0.13.0 55 | doi: 10.5281/zenodo.3956493 56 | date-released: 2024-12-20 57 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to this repository 2 | 3 | ro-crate-py is open source software distributed under the Apache License, Version 2.0. Contributions are welcome, but please read this guide first. Submitted contributions are assumed to be covered by section 5 of the [license](LICENSE). 4 | 5 | 6 | ## Before you begin 7 | 8 | [Set up Git](https://docs.github.com/en/github/getting-started-with-github/set-up-git) on your local machine, then [fork](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo) this repository on GitHub and [create a local clone of your fork](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo#step-2-create-a-local-clone-of-your-fork). 9 | 10 | For instance, if your GitHub user name is `simleo`, you can get a local clone as follows: 11 | 12 | ``` 13 | $ git clone https://github.com/simleo/ro-crate-py 14 | ``` 15 | 16 | You can see that the local clone is pointing to your remote fork: 17 | 18 | ``` 19 | $ cd ro-crate-py 20 | $ git remote -v 21 | origin https://github.com/simleo/ro-crate-py (fetch) 22 | origin https://github.com/simleo/ro-crate-py (push) 23 | ``` 24 | 25 | To keep a reference to the original (upstream) repository, you can add a remote for it: 26 | 27 | ``` 28 | $ git remote add upstream https://github.com/researchobject/ro-crate-py 29 | $ git fetch upstream 30 | ``` 31 | 32 | This allows, among other things, to easily keep your fork synced to the upstream repository through time. For instance, to sync your `master` branch: 33 | 34 | ``` 35 | $ git checkout master 36 | $ git fetch -p upstream 37 | $ git merge --ff-only upstream/master 38 | $ git push origin master 39 | ``` 40 | 41 | If you need help with Git and GitHub, head over to the [GitHub docs](https://docs.github.com/en/github). In particular, you should be familiar with [issues and pull requests](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests). 42 | 43 | 44 | ## Making a contribution 45 | 46 | Contributions can range from fixing a broken link or a typo in the documentation to fixing a bug or adding a new feature to the software. Ideally, contributions (unless trivial) should be related to an [open issue](https://github.com/researchobject/ro-crate-py/issues). If there is no existing issue or [pull request](https://github.com/researchobject/ro-crate-py/pulls) related to the changes you wish to make, you can open a new one. 47 | 48 | Make your changes on a branch in your fork, then open a pull request (PR). Please take some time to summarize the proposed changes in the PR's description, especially if they're not obvious. If the PR addresses an open issue, you should [link it to the issue](https://docs.github.com/en/github/managing-your-work-on-github/linking-a-pull-request-to-an-issue). 49 | 50 | 51 | ## Contributing documentation 52 | 53 | Currently, documentation consists of a few [Markdown](http://daringfireball.net/projects/markdown) files such as this one. Read the [Mastering Markdown](https://guides.github.com/features/mastering-markdown) guide for a quick introduction to the format. Before opening the PR, you can check that the document renders as expected by looking at the corresponding page on the relevant branch in your fork. 54 | 55 | 56 | ## Contributing software 57 | 58 | ro-crate-py is written in [Python](https://www.python.org). To isolate your development environment from the underlying system, you can use a [virtual environment](https://docs.python.org/3.8/library/venv.html): 59 | 60 | ``` 61 | $ python3 -m venv venv 62 | $ source venv/bin/activate 63 | $ pip install --upgrade pip 64 | $ pip install -r requirements.txt 65 | ``` 66 | 67 | For development, it's recommended to install ro-crate-py in [editable mode](https://setuptools.pypa.io/en/latest/userguide/development_mode.html): 68 | 69 | ``` 70 | pip install -e . 71 | ``` 72 | 73 | In this way, any changes to the code will be picked up immediately, without the need to reinstall the package. 74 | 75 | When you're done with your work, you can deactivate the virtual environment by typing `deactivate` on your shell. 76 | 77 | Before pushing any changes, make sure everything is fine by running the linting and testing commands as explained below. 78 | 79 | ### Linting 80 | 81 | ro-crate-py uses [Flake8](https://github.com/PyCQA/flake8) for linting. The configuration is in `setup.cfg` and it's picked up automatically. If you have a `venv` directory or any other directory you don't want to be checked by Flake8, use the `--exclude` option. 82 | 83 | ``` 84 | pip install flake8 85 | flake8 --exclude venv ./ 86 | ``` 87 | 88 | ### Testing 89 | 90 | Testing is done with [pytest](https://pytest.org): 91 | 92 | ``` 93 | pip install pytest 94 | pytest test 95 | ``` 96 | 97 | If a test is failing, you can get more information by enabling verbose mode and stdout/stderr dump. For instance: 98 | 99 | ``` 100 | pytest -sv test/test_write.py::test_remote_uri_exceptions 101 | ``` 102 | 103 | Ideally, every code contribution should come with new unit tests that add coverage for the bug fix or new feature. 104 | 105 | ### Using the Docker image for development 106 | 107 | ro-crate-py is currently a fairly simple library that does not require any special infrastructure setup, so virtual environments should be enough for development. However, if you want a higher degree of isolation, you can build the [Docker](https://www.docker.com/) image with: 108 | 109 | ``` 110 | docker build -t ro-crate-py . 111 | ``` 112 | 113 | And then run it interactively with: 114 | 115 | ``` 116 | docker run --rm -it --name ro-crate-py ro-crate-py bash 117 | ``` 118 | 119 | 120 | ## Tidying up after PR merge 121 | 122 | After your PR has been merged, you can delete the branch used for your changes. You can delete the remote branch from GitHub, by clicking on "Delete branch" in the PR's page. To resync everything, run: 123 | 124 | ``` 125 | git checkout master 126 | git fetch -p upstream 127 | git merge --ff-only upstream/master 128 | git push origin master 129 | git branch -d 130 | ``` 131 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12 2 | 3 | COPY ./ /ro-crate-py 4 | WORKDIR /ro-crate-py 5 | 6 | RUN pip install --no-cache-dir -r requirements.txt && \ 7 | python setup.py install 8 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt CITATION.cff LICENSE 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := install 2 | 3 | SHELL := /bin/bash 4 | PYTHON ?= python3 5 | VENV_DIR ?= venv 6 | IN_VENV = [ -f $(VENV_DIR)/bin/activate ] && . $(VENV_DIR)/bin/activate; 7 | 8 | 9 | $(VENV_DIR): 10 | $(PYTHON) -m venv $(VENV_DIR) 11 | $(IN_VENV) pip install --upgrade pip 12 | 13 | $(VENV_DIR)/bin/flake8: $(VENV_DIR) 14 | $(IN_VENV) pip install flake8 15 | 16 | $(VENV_DIR)/bin/pytest: $(VENV_DIR) 17 | $(IN_VENV) pip install pytest 18 | 19 | init-venv: $(VENV_DIR) 20 | 21 | install: $(VENV_DIR) 22 | $(IN_VENV) pip install -r requirements.txt 23 | $(IN_VENV) pip install ./ 24 | 25 | lint: $(VENV_DIR)/bin/flake8 26 | $(IN_VENV) flake8 --exclude $(VENV_DIR) ./ 27 | 28 | test: $(VENV_DIR)/bin/pytest install 29 | $(IN_VENV) pytest test 30 | 31 | # WARNING: removes ALL untracked files 32 | clean: 33 | git clean -fdx -e $(VENV_DIR) 34 | 35 | .PHONY: init-venv install lint test clean $(VENV_DIR) 36 | -------------------------------------------------------------------------------- /examples/fastapi/main.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | """ 24 | Streaming RO-Crates from a web server 25 | 26 | This example demonstrates how to create an RO-Crate on-the-fly 27 | and stream the result to the client. 28 | By using `stream_zip`, the RO-Crate is not written to disk and remote 29 | data is only fetched on the fly. 30 | 31 | To run: `fastapi dev main.py`, then visit http://localhost:8000/crate 32 | """ 33 | 34 | from fastapi import FastAPI 35 | from fastapi.responses import StreamingResponse 36 | from rocrate.rocrate import ROCrate 37 | from io import StringIO 38 | 39 | app = FastAPI() 40 | 41 | 42 | @app.get("/crate") 43 | async def get(): 44 | crate = ROCrate() 45 | 46 | # Add a remote file 47 | crate.add_file( 48 | "https://raw.githubusercontent.com/ResearchObject/ro-crate-py/refs/heads/master/test/test-data/sample_file.txt", 49 | fetch_remote=True 50 | ) 51 | 52 | # Add a file containing a string to the crate 53 | crate.add_file( 54 | source=StringIO("Hello, World!"), 55 | dest_path="test-data/hello.txt" 56 | ) 57 | 58 | # Stream crate to client as a zip file 59 | return StreamingResponse( 60 | crate.stream_zip(), 61 | media_type="application/rocrate+zip", 62 | headers={ 63 | "Content-Disposition": "attachment; filename=crate.zip", 64 | } 65 | ) 66 | -------------------------------------------------------------------------------- /examples/fastapi/requirements.txt: -------------------------------------------------------------------------------- 1 | ../../ 2 | fastapi 3 | fastapi-cli 4 | -------------------------------------------------------------------------------- /examples/read_test_metadata.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | """\ 24 | Read test metadata from an RO-Crate and print the information to the console. 25 | If Planemo is installed (pip install planemo), also run a test. 26 | """ 27 | 28 | import pathlib 29 | import shutil 30 | import subprocess 31 | import tempfile 32 | 33 | from rocrate.rocrate import ROCrate 34 | 35 | GALAXY_IMG = "bgruening/galaxy-stable:20.05" 36 | THIS_DIR = pathlib.Path(__file__).absolute().parent 37 | REPO_DIR = THIS_DIR.parent 38 | RO_CRATE_DIR = REPO_DIR / "test/test-data/ro-crate-galaxy-sortchangecase" 39 | PLANEMO = "https://w3id.org/ro/terms/test#PlanemoEngine" 40 | 41 | 42 | def print_suites(crate): 43 | print("test suites:") 44 | for suite in crate.test_suites: 45 | print(" ", suite.id) 46 | print(" workflow:", suite["mainEntity"].id) 47 | print(" instances:") 48 | for inst in suite.instance: 49 | print(" ", inst.id) 50 | print(" service:", inst.service.name) 51 | print(" url:", inst.url) 52 | print(" resource:", inst.resource) 53 | print(" definition:") 54 | print(" id:", suite.definition.id) 55 | engine = suite.definition.engine 56 | print(" engine:", engine.name) 57 | print(" engine version:", suite.definition.engineVersion) 58 | 59 | 60 | def main(): 61 | 62 | wd = pathlib.Path(tempfile.mkdtemp(prefix="ro_crate_py_")) 63 | crate_dir = wd / RO_CRATE_DIR.name 64 | shutil.copytree(RO_CRATE_DIR, crate_dir) 65 | crate = ROCrate(crate_dir) 66 | print_suites(crate) 67 | 68 | main_workflow = crate.root_dataset["mainEntity"] 69 | print("main workflow:", main_workflow.id) 70 | 71 | try: 72 | exe = subprocess.check_output( 73 | "command -v planemo", shell=True, universal_newlines=True 74 | ).strip() 75 | except subprocess.CalledProcessError: 76 | print("planemo executable not found, won't try to run tests") 77 | return 78 | else: 79 | print("planemo executable:", exe) 80 | 81 | # run a test suite 82 | suite = crate.test_suites[0] 83 | def_path = crate_dir / suite.definition.id 84 | workflow = suite["mainEntity"] 85 | workflow_path = crate_dir / workflow.id 86 | 87 | print("running suite:", suite.id) 88 | print("definition path:", def_path) 89 | print("workflow:", workflow.id) 90 | assert suite.definition.engine.id == PLANEMO 91 | new_workflow_path = str(def_path.parent / workflow_path.name) 92 | # Planemo expects the test definition in the same dir as the workflow file 93 | shutil.copy2(workflow_path, new_workflow_path) 94 | cmd = ["planemo", "test", "--engine", "docker_galaxy", 95 | "--docker_galaxy_image", GALAXY_IMG, new_workflow_path] 96 | print("Running Planemo (this may take a while)") 97 | p = subprocess.run(cmd) 98 | p.check_returncode() 99 | print("OK") 100 | 101 | shutil.rmtree(wd) 102 | 103 | 104 | if __name__ == "__main__": 105 | main() 106 | -------------------------------------------------------------------------------- /notebooks/README.md: -------------------------------------------------------------------------------- 1 | # ro-crate-playground 2 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/albangaignard/ro-crate-playground/master?filepath=BagIt%20-%20ResearchObjects%20-%20playground.ipynb) 3 | -------------------------------------------------------------------------------- /notebooks/requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter 2 | requests 3 | rdflib 4 | rdflib-jsonld 5 | pyshacl 6 | bagit 7 | -------------------------------------------------------------------------------- /notebooks/sample_data/clinvap/ro-crate-metadata.jsonld: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://w3id.org/ro/crate/1.0/context", 3 | "@graph": [ 4 | { 5 | "@id": "ro-crate-metadata.jsonld", 6 | "@type": "CreativeWork", 7 | "about": { 8 | "@id": "./" 9 | } 10 | }, 11 | { 12 | "@id": "ro-crate-preview.html", 13 | "@type": "CreativeWork", 14 | "about": { 15 | "@id": "./" 16 | } 17 | }, 18 | { 19 | "@id": "./", 20 | "@type": "Dataset", 21 | "name": "nf-core/clinvap", 22 | "description": "Clinical Variant Annotation Pipeline", 23 | "author": "Bilge Sürün", 24 | "url": "https://github.com/nf-core/clinvap", 25 | "keywords": [ 26 | "nf-core, clinical, variant-annotation, annotation" 27 | ], 28 | "mainEntity": { 29 | "@id": "main.nf" 30 | }, 31 | "hasPart": [ 32 | { 33 | "@id": "main.nf" 34 | }, 35 | { 36 | "@id": "bin/" 37 | }, 38 | { 39 | "@id": "assets/" 40 | }, 41 | { 42 | "@id": "nextflow.config" 43 | }, 44 | { 45 | "@id": "Dockerfile" 46 | }, 47 | { 48 | "@id": "conf/" 49 | }, 50 | { 51 | "@id": "LICENSE" 52 | }, 53 | { 54 | "@id": "docs/" 55 | }, 56 | { 57 | "@id": "Singularity" 58 | }, 59 | { 60 | "@id": "README.md" 61 | }, 62 | { 63 | "@id": "CHANGELOG.md" 64 | }, 65 | { 66 | "@id": "environment.yml" 67 | }, 68 | { 69 | "@id": "CODE_OF_CONDUCT.md" 70 | } 71 | ] 72 | }, 73 | { 74 | "@id": "main.nf", 75 | "@type": [ 76 | "File", 77 | "SoftwareSourceCode", 78 | "Workflow" 79 | ], 80 | "programmingLanguage": { 81 | "@id": "#nextflow" 82 | }, 83 | "contentSize": 13300 84 | }, 85 | { 86 | "@id": "bin/", 87 | "@type": "Dataset" 88 | }, 89 | { 90 | "@id": "assets/", 91 | "@type": "Dataset" 92 | }, 93 | { 94 | "@id": "nextflow.config", 95 | "@type": "File", 96 | "contentSize": 3602 97 | }, 98 | { 99 | "@id": "Dockerfile", 100 | "@type": "File", 101 | "contentSize": 280 102 | }, 103 | { 104 | "@id": "conf/", 105 | "@type": "Dataset" 106 | }, 107 | { 108 | "@id": "LICENSE", 109 | "@type": "File", 110 | "contentSize": 1065 111 | }, 112 | { 113 | "@id": "docs/", 114 | "@type": "Dataset" 115 | }, 116 | { 117 | "@id": "Singularity", 118 | "@type": "File", 119 | "contentSize": 401 120 | }, 121 | { 122 | "@id": "README.md", 123 | "@type": "File", 124 | "contentSize": 1603 125 | }, 126 | { 127 | "@id": "CHANGELOG.md", 128 | "@type": "File", 129 | "contentSize": 141 130 | }, 131 | { 132 | "@id": "environment.yml", 133 | "@type": "File", 134 | "contentSize": 380 135 | }, 136 | { 137 | "@id": "CODE_OF_CONDUCT.md", 138 | "@type": "File", 139 | "contentSize": 3250 140 | }, 141 | { 142 | "@id": "#nextflow", 143 | "@type": "ComputerLanguage", 144 | "name": "Nextflow", 145 | "identifier": { 146 | "@id": "https://www.nextflow.io/" 147 | }, 148 | "url": { 149 | "@id": "https://www.nextflow.io/" 150 | } 151 | } 152 | ] 153 | } -------------------------------------------------------------------------------- /notebooks/sample_data/methylseq/ro-crate-metadata.jsonld: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://w3id.org/ro/crate/1.0/context", 3 | "@graph": [ 4 | { 5 | "@id": "ro-crate-metadata.jsonld", 6 | "@type": "CreativeWork", 7 | "about": { 8 | "@id": "./" 9 | } 10 | }, 11 | { 12 | "@id": "ro-crate-preview.html", 13 | "@type": "CreativeWork", 14 | "about": { 15 | "@id": "./" 16 | } 17 | }, 18 | { 19 | "@id": "./", 20 | "@type": "Dataset", 21 | "name": "nf-core/methylseq", 22 | "description": "Methylation (Bisulfite-Sequencing) Best Practice analysis pipeline, part of the nf-core community.", 23 | "author": "Phil Ewels", 24 | "url": "https://github.com/nf-core/methylseq", 25 | "keywords": [ 26 | "nf-core, bisulfite-sequencing, dna-methylation, methyl-seq" 27 | ], 28 | "mainEntity": { 29 | "@id": "main.nf" 30 | }, 31 | "hasPart": [ 32 | { 33 | "@id": "main.nf" 34 | }, 35 | { 36 | "@id": "bin/" 37 | }, 38 | { 39 | "@id": "assets/" 40 | }, 41 | { 42 | "@id": "parameters.settings.json" 43 | }, 44 | { 45 | "@id": "nextflow.config" 46 | }, 47 | { 48 | "@id": "Dockerfile" 49 | }, 50 | { 51 | "@id": "conf/" 52 | }, 53 | { 54 | "@id": "LICENSE" 55 | }, 56 | { 57 | "@id": "docs/" 58 | }, 59 | { 60 | "@id": "README.md" 61 | }, 62 | { 63 | "@id": "CHANGELOG.md" 64 | }, 65 | { 66 | "@id": "environment.yml" 67 | }, 68 | { 69 | "@id": "CODE_OF_CONDUCT.md" 70 | } 71 | ] 72 | }, 73 | { 74 | "@id": "main.nf", 75 | "@type": [ 76 | "File", 77 | "SoftwareSourceCode", 78 | "Workflow" 79 | ], 80 | "programmingLanguage": { 81 | "@id": "#nextflow" 82 | }, 83 | "contentSize": 49166 84 | }, 85 | { 86 | "@id": "bin/", 87 | "@type": "Dataset" 88 | }, 89 | { 90 | "@id": "assets/", 91 | "@type": "Dataset" 92 | }, 93 | { 94 | "@id": "parameters.settings.json", 95 | "@type": "File", 96 | "contentSize": 19923 97 | }, 98 | { 99 | "@id": "nextflow.config", 100 | "@type": "File", 101 | "contentSize": 5320 102 | }, 103 | { 104 | "@id": "Dockerfile", 105 | "@type": "File", 106 | "contentSize": 373 107 | }, 108 | { 109 | "@id": "conf/", 110 | "@type": "Dataset" 111 | }, 112 | { 113 | "@id": "LICENSE", 114 | "@type": "File", 115 | "contentSize": 1062 116 | }, 117 | { 118 | "@id": "docs/", 119 | "@type": "Dataset" 120 | }, 121 | { 122 | "@id": "README.md", 123 | "@type": "File", 124 | "contentSize": 5733 125 | }, 126 | { 127 | "@id": "CHANGELOG.md", 128 | "@type": "File", 129 | "contentSize": 6070 130 | }, 131 | { 132 | "@id": "environment.yml", 133 | "@type": "File", 134 | "contentSize": 633 135 | }, 136 | { 137 | "@id": "CODE_OF_CONDUCT.md", 138 | "@type": "File", 139 | "contentSize": 3234 140 | }, 141 | { 142 | "@id": "#nextflow", 143 | "@type": "ComputerLanguage", 144 | "name": "Nextflow", 145 | "identifier": { 146 | "@id": "https://www.nextflow.io/" 147 | }, 148 | "url": { 149 | "@id": "https://www.nextflow.io/" 150 | } 151 | } 152 | ] 153 | } -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 64"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | arcp==0.2.1 3 | jinja2 4 | python-dateutil 5 | click 6 | -------------------------------------------------------------------------------- /rocrate/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | """ 26 | Create/parse RO-Crate metadata. 27 | 28 | This module intends to help create or parse 29 | RO-Crate metadata, see rocrate_ 30 | 31 | .. _rocrate: https://w3id.org/ro/crate/ 32 | """ 33 | 34 | __author__ = ", ".join(( 35 | 'Daniel Bauer', 36 | 'Eli Chadwick', 37 | 'Paul De Geest', 38 | 'Bert Droesbeke', 39 | 'Ignacio Eguinoa', 40 | 'Alban Gaignard', 41 | 'Matthias Hörtenhuber', 42 | 'Sebastiaan Huber', 43 | 'Bruno Kinoshita', 44 | 'Simone Leo', 45 | 'Luca Pireddu', 46 | 'Laura Rodríguez-Navas', 47 | 'Raül Sirvent', 48 | 'Stian Soiland-Reyes', 49 | 'Laurent Thomas' 50 | )) 51 | __copyright__ = """\ 52 | Copyright 2019-2025 The University of Manchester, UK 53 | Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 54 | Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 55 | Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 56 | Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 57 | Copyright 2024-2025 Data Centre, SciLifeLab, SE 58 | Copyright 2024-2025 National Institute of Informatics (NII), JP 59 | Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 60 | Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 61 | """ 62 | __license__ = ("Apache License, version 2.0 " 63 | "") 64 | 65 | # for arcp scheme registration with urllib.parse 66 | import arcp # noqa 67 | 68 | # Convenience export of public functions/types 69 | from .model.metadata import Metadata # noqa 70 | from ._version import __version__ # noqa 71 | -------------------------------------------------------------------------------- /rocrate/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.13.0" 2 | -------------------------------------------------------------------------------- /rocrate/cli.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | import os 24 | from pathlib import Path 25 | 26 | import click 27 | from .rocrate import ROCrate 28 | from .model.computerlanguage import LANG_MAP 29 | from .model.testservice import SERVICE_MAP 30 | from .model.softwareapplication import APP_MAP 31 | 32 | 33 | LANG_CHOICES = list(LANG_MAP) 34 | SERVICE_CHOICES = list(SERVICE_MAP) 35 | ENGINE_CHOICES = list(APP_MAP) 36 | 37 | 38 | class CSVParamType(click.ParamType): 39 | name = "csv" 40 | 41 | def convert(self, value, param, ctx): 42 | if isinstance(value, (list, tuple, set, frozenset)): 43 | return value 44 | try: 45 | return value.split(",") if value else [] 46 | except AttributeError: 47 | self.fail(f"{value!r} is not splittable", param, ctx) 48 | 49 | 50 | class KeyValueParamType(click.ParamType): 51 | name = "key_value" 52 | 53 | def convert(self, value, param, ctx): 54 | try: 55 | return tuple(value.split("=", 1)) if value else () 56 | except AttributeError: 57 | self.fail(f"{value!r} is not splittable", param, ctx) 58 | 59 | 60 | CSV = CSVParamType() 61 | KeyValue = KeyValueParamType() 62 | OPTION_CRATE_PATH = click.option( 63 | "-c", 64 | "--crate-dir", 65 | type=click.Path(), 66 | default=os.getcwd, 67 | help="The path to the root data entity of the crate. Defaults to the current working directory.", 68 | ) 69 | OPTION_PROPS = click.option( 70 | "-P", 71 | "--property", 72 | type=KeyValue, 73 | multiple=True, 74 | metavar="KEY=VALUE", 75 | help="Add an additional property to the metadata for this entity. Can be used multiple times to set multiple properties.", 76 | ) 77 | 78 | 79 | @click.group() 80 | def cli(): 81 | pass 82 | 83 | 84 | @cli.command() 85 | @click.option( 86 | "--gen-preview", is_flag=True, help="Generate a HTML preview file for the crate." 87 | ) 88 | @click.option( 89 | "-e", 90 | "--exclude", 91 | type=CSV, 92 | metavar="NAME", 93 | help="Exclude files or directories from the metadata file. NAME may be a single name or a comma-separated list of names.", 94 | ) 95 | @OPTION_CRATE_PATH 96 | def init(crate_dir, gen_preview, exclude): 97 | crate = ROCrate(crate_dir, init=True, gen_preview=gen_preview, exclude=exclude) 98 | crate.metadata.write(crate_dir) 99 | if crate.preview: 100 | crate.preview.write(crate_dir) 101 | 102 | 103 | @cli.group() 104 | def add(): 105 | pass 106 | 107 | 108 | @add.command() 109 | @click.argument("path", type=click.Path(exists=True, dir_okay=False)) 110 | @OPTION_CRATE_PATH 111 | @OPTION_PROPS 112 | def file(crate_dir, path, property): 113 | crate = ROCrate(crate_dir, init=False, gen_preview=False) 114 | source = Path(path).resolve(strict=True) 115 | try: 116 | dest_path = source.relative_to(crate_dir) 117 | except ValueError: 118 | # For now, only support adding an existing file to the metadata 119 | raise ValueError(f"{source} is not in the crate dir {crate_dir}") 120 | crate.add_file(source, dest_path, properties=dict(property)) 121 | crate.metadata.write(crate_dir) 122 | 123 | 124 | @add.command() 125 | @click.argument("path", type=click.Path(exists=True, file_okay=False)) 126 | @OPTION_CRATE_PATH 127 | @OPTION_PROPS 128 | def dataset(crate_dir, path, property): 129 | crate = ROCrate(crate_dir, init=False, gen_preview=False) 130 | source = Path(path).resolve(strict=True) 131 | try: 132 | dest_path = source.relative_to(crate_dir) 133 | except ValueError: 134 | # For now, only support adding an existing directory to the metadata 135 | raise ValueError(f"{source} is not in the crate dir {crate_dir}") 136 | crate.add_dataset(source, dest_path, properties=dict(property)) 137 | crate.metadata.write(crate_dir) 138 | 139 | 140 | @add.command() 141 | @click.argument("path", type=click.Path(exists=True)) 142 | @click.option( 143 | "-l", 144 | "--language", 145 | type=click.Choice(LANG_CHOICES), 146 | default="cwl", 147 | help="The workflow language.", 148 | ) 149 | @OPTION_CRATE_PATH 150 | @OPTION_PROPS 151 | def workflow(crate_dir, path, language, property): 152 | crate = ROCrate(crate_dir, init=False, gen_preview=False) 153 | source = Path(path).resolve(strict=True) 154 | try: 155 | dest_path = source.relative_to(crate_dir) 156 | except ValueError: 157 | # For now, only support marking an existing file as a workflow 158 | raise ValueError(f"{source} is not in the crate dir {crate_dir}") 159 | # TODO: add command options for main and gen_cwl 160 | crate.add_workflow( 161 | source, 162 | dest_path, 163 | main=True, 164 | lang=language, 165 | gen_cwl=False, 166 | properties=dict(property), 167 | ) 168 | crate.metadata.write(crate_dir) 169 | 170 | 171 | @add.command(name="test-suite") 172 | @click.option("-i", "--identifier") 173 | @click.option("-n", "--name") 174 | @click.option("-m", "--main-entity") 175 | @OPTION_CRATE_PATH 176 | @OPTION_PROPS 177 | def suite(crate_dir, identifier, name, main_entity, property): 178 | crate = ROCrate(crate_dir, init=False, gen_preview=False) 179 | suite = crate.add_test_suite( 180 | identifier=identifier, 181 | name=name, 182 | main_entity=main_entity, 183 | properties=dict(property), 184 | ) 185 | crate.metadata.write(crate_dir) 186 | print(suite.id) 187 | 188 | 189 | @add.command(name="test-instance") 190 | @click.argument("suite") 191 | @click.argument("url") 192 | @click.option("-r", "--resource", default="") 193 | @click.option("-s", "--service", type=click.Choice(SERVICE_CHOICES), default="jenkins") 194 | @click.option("-i", "--identifier") 195 | @click.option("-n", "--name") 196 | @OPTION_CRATE_PATH 197 | @OPTION_PROPS 198 | def instance(crate_dir, suite, url, resource, service, identifier, name, property): 199 | crate = ROCrate(crate_dir, init=False, gen_preview=False) 200 | instance_ = crate.add_test_instance( 201 | suite, 202 | url, 203 | resource=resource, 204 | service=service, 205 | identifier=identifier, 206 | name=name, 207 | properties=dict(property), 208 | ) 209 | crate.metadata.write(crate_dir) 210 | print(instance_.id) 211 | 212 | 213 | @add.command(name="test-definition") 214 | @click.argument("suite") 215 | @click.argument("path", type=click.Path(exists=True)) 216 | @click.option("-e", "--engine", type=click.Choice(ENGINE_CHOICES), default="planemo") 217 | @click.option("-v", "--engine-version") 218 | @OPTION_CRATE_PATH 219 | @OPTION_PROPS 220 | def definition(crate_dir, suite, path, engine, engine_version, property): 221 | crate = ROCrate(crate_dir, init=False, gen_preview=False) 222 | source = Path(path).resolve(strict=True) 223 | try: 224 | dest_path = source.relative_to(crate_dir) 225 | except ValueError: 226 | # For now, only support marking an existing file as a test definition 227 | raise ValueError(f"{source} is not in the crate dir {crate_dir}") 228 | crate.add_test_definition( 229 | suite, 230 | source=source, 231 | dest_path=dest_path, 232 | engine=engine, 233 | engine_version=engine_version, 234 | properties=dict(property), 235 | ) 236 | crate.metadata.write(crate_dir) 237 | 238 | 239 | @cli.command() 240 | @click.argument("dst", type=click.Path(writable=True)) 241 | @OPTION_CRATE_PATH 242 | def write_zip(crate_dir, dst): 243 | crate = ROCrate(crate_dir, init=False, gen_preview=False) 244 | crate.write_zip(dst) 245 | 246 | 247 | if __name__ == "__main__": 248 | cli() 249 | -------------------------------------------------------------------------------- /rocrate/data/update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Update version number from https://schema.org/docs/releases.html 4 | curl -L -f -o schema.jsonld https://schema.org/version/10.0/schemaorg-all-http.jsonld 5 | # Apache License 2.0 https://github.com/schemaorg/schemaorg/blob/V10.0-release/LICENSE 6 | # no NOTICE - so just keeping file as-is is sufficient (we are also Apache-2.0) 7 | 8 | curl -L -f -o ro-crate.jsonld https://w3id.org/ro/crate/1.1/context 9 | # CC0 https://creativecommons.org/publicdomain/zero/1.0/ 10 | # so no attribution needed 11 | -------------------------------------------------------------------------------- /rocrate/memory_buffer.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | from io import RawIOBase 24 | 25 | 26 | class MemoryBuffer(RawIOBase): 27 | """ 28 | A buffer class that supports reading and writing binary data. 29 | The buffer automatically resets upon reading to make sure all data is read only once. 30 | """ 31 | 32 | def __init__(self): 33 | self._buffer = b'' 34 | 35 | def write(self, data): 36 | if self.closed: 37 | raise ValueError('write to closed file') 38 | self._buffer += data 39 | return len(data) 40 | 41 | def read(self, size=-1): 42 | if self.closed: 43 | raise ValueError('read from closed file') 44 | if size < 0: 45 | data = self._buffer 46 | self._buffer = b'' 47 | else: 48 | data = self._buffer[:size] 49 | self._buffer = self._buffer[size:] 50 | return data 51 | 52 | def __len__(self): 53 | return len(self._buffer) 54 | -------------------------------------------------------------------------------- /rocrate/metadata.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | import json 24 | import warnings 25 | 26 | from .model import Metadata, LegacyMetadata 27 | 28 | 29 | def read_metadata(metadata_path): 30 | """\ 31 | Read an RO-Crate metadata file. 32 | 33 | Return a tuple of two elements: the context; a dictionary that maps entity 34 | ids to the entities themselves. 35 | """ 36 | if isinstance(metadata_path, dict): 37 | metadata = metadata_path 38 | else: 39 | with open(metadata_path, 'r', encoding='utf-8') as f: 40 | metadata = json.load(f) 41 | try: 42 | context = metadata['@context'] 43 | graph = metadata['@graph'] 44 | except KeyError: 45 | raise ValueError(f"{metadata_path} must have a @context and a @graph") 46 | return context, {_["@id"]: _ for _ in graph} 47 | 48 | 49 | def _check_descriptor(descriptor, entities): 50 | if descriptor["@type"] != "CreativeWork": 51 | raise ValueError('metadata descriptor must be of type "CreativeWork"') 52 | try: 53 | root = entities[descriptor["about"]["@id"]] 54 | except (KeyError, TypeError): 55 | raise ValueError("metadata descriptor does not reference the root entity") 56 | if ("Dataset" not in root["@type"] if isinstance(root["@type"], list) else root["@type"] != "Dataset"): 57 | raise ValueError('root entity must have "Dataset" among its types') 58 | return descriptor["@id"], root["@id"] 59 | 60 | 61 | def find_root_entity_id(entities): 62 | """\ 63 | Find metadata file descriptor and root data entity. 64 | 65 | Expects as input a dictionary that maps JSON entity IDs to the entities 66 | themselves (like the second element returned by read_metadata). 67 | 68 | Return a tuple of the corresponding identifiers (descriptor, root). 69 | If the entities are not found, raise KeyError. If they are found, 70 | but they don't satisfy the required constraints, raise ValueError. 71 | 72 | In the general case, the metadata file descriptor id can be an 73 | absolute URI whose last path segment is "ro-crate-metadata.json[ld]". 74 | Since there can be more than one such id in the crate, we need to 75 | choose among the corresponding (descriptor, root) entity pairs. First, we 76 | exclude those that don't satisfy other constraints, such as the 77 | descriptor entity being of type CreativeWork, etc.; if this doesn't 78 | leave us with a single pair, we try to pick one with a 79 | heuristic. Suppose we are left with the (m1, r1) and (m2, r2) pairs: 80 | if r1 is the actual root of this crate, then m2 and r2 are regular 81 | files in it, and as such they must appear in r1's hasPart; r2, 82 | however, is not required to have a hasPart property listing other 83 | files. Thus, we look for a pair whose root entity "contains" all 84 | descriptor entities from other pairs. If there is no such pair, or there 85 | is more than one, we just return an arbitrary pair. 86 | 87 | """ 88 | descriptor = entities.get(Metadata.BASENAME, entities.get(LegacyMetadata.BASENAME)) 89 | if descriptor: 90 | return _check_descriptor(descriptor, entities) 91 | candidates = [] 92 | for id_, e in entities.items(): 93 | basename = id_.rsplit("/", 1)[-1] 94 | if basename == Metadata.BASENAME or basename == LegacyMetadata.BASENAME: 95 | try: 96 | candidates.append(_check_descriptor(e, entities)) 97 | except ValueError: 98 | pass 99 | if not candidates: 100 | raise KeyError("Metadata file descriptor not found") 101 | elif len(candidates) == 1: 102 | return candidates[0] 103 | else: 104 | warnings.warn("Multiple metadata file descriptors, will pick one with a heuristic") 105 | descriptor_ids = set(_[0] for _ in candidates) 106 | for m_id, r_id in candidates: 107 | try: 108 | root = entities[r_id] 109 | part_ids = set(_["@id"] for _ in root["hasPart"]) 110 | except KeyError: 111 | continue 112 | if part_ids >= descriptor_ids - {m_id}: 113 | # if True for more than one candidate, this pick is arbitrary 114 | return m_id, r_id 115 | return candidates[0] # fall back to arbitrary pick 116 | -------------------------------------------------------------------------------- /rocrate/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | """ 24 | Model of RO-Crate metadata. 25 | 26 | This module intends to cover each of the data entities and contextual entities 27 | in rocrate_ represented as different Python classes. 28 | 29 | .. _rocrate: https://w3id.org/ro/crate/ 30 | """ 31 | 32 | from .computationalworkflow import ComputationalWorkflow, WorkflowDescription, Workflow 33 | from .computerlanguage import ComputerLanguage 34 | from .contextentity import ContextEntity 35 | from .creativework import CreativeWork 36 | from .data_entity import DataEntity 37 | from .dataset import Dataset 38 | from .entity import Entity 39 | from .file import File 40 | from .file_or_dir import FileOrDir 41 | from .metadata import Metadata, LegacyMetadata 42 | from .person import Person 43 | from .root_dataset import RootDataset 44 | from .softwareapplication import SoftwareApplication 45 | from .testdefinition import TestDefinition 46 | from .testinstance import TestInstance 47 | from .preview import Preview 48 | from .testservice import TestService 49 | from .testsuite import TestSuite 50 | 51 | __all__ = [ 52 | "ComputationalWorkflow", 53 | "ComputerLanguage", 54 | "ContextEntity", 55 | "CreativeWork", 56 | "DataEntity", 57 | "Dataset", 58 | "Entity", 59 | "File", 60 | "FileOrDir", 61 | "LegacyMetadata", 62 | "Metadata", 63 | "Person", 64 | "Preview", 65 | "RootDataset", 66 | "SoftwareApplication", 67 | "TestDefinition", 68 | "TestInstance", 69 | "TestService", 70 | "TestSuite", 71 | "Workflow", 72 | "WorkflowDescription", 73 | ] 74 | -------------------------------------------------------------------------------- /rocrate/model/computationalworkflow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | import atexit 26 | import os 27 | import tempfile 28 | from contextlib import redirect_stdout 29 | 30 | from .file import File 31 | 32 | 33 | class ComputationalWorkflow(File): 34 | """\ 35 | A scientific workflow that was used (or can be used) to analyze or 36 | generate files in the RO-Crate. 37 | """ 38 | TYPES = ["File", "SoftwareSourceCode", "ComputationalWorkflow"] 39 | 40 | def _empty(self): 41 | return { 42 | "@id": self.id, 43 | "@type": self.TYPES[:], 44 | "name": os.path.splitext(self.id)[0], 45 | } 46 | 47 | @property 48 | def programmingLanguage(self): 49 | return self.get("programmingLanguage") 50 | 51 | @programmingLanguage.setter 52 | def programmingLanguage(self, programmingLanguage): 53 | self["programmingLanguage"] = programmingLanguage 54 | 55 | language = lang = programmingLanguage 56 | 57 | @property 58 | def subjectOf(self): 59 | return self.get("subjectOf") 60 | 61 | @subjectOf.setter 62 | def subjectOf(self, subjectOf): 63 | self["subjectOf"] = subjectOf 64 | 65 | 66 | class WorkflowDescription(ComputationalWorkflow): 67 | """\ 68 | Abstract CWL description of the main workflow. 69 | """ 70 | TYPES = ["File", "SoftwareSourceCode", "HowTo"] 71 | 72 | 73 | # Legacy 74 | class Workflow(ComputationalWorkflow): 75 | 76 | TYPES = ["File", "SoftwareSourceCode", "Workflow"] 77 | 78 | 79 | def galaxy_to_abstract_cwl(workflow_path, delete=True): 80 | try: 81 | from galaxy2cwl import get_cwl_interface 82 | except ImportError: 83 | raise RuntimeError("conversion to cwl not available: package was not installed with the 'ga2cwl' option") 84 | with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=".cwl") as f: 85 | with redirect_stdout(f): 86 | get_cwl_interface.main(['1', str(workflow_path)]) 87 | if delete: 88 | atexit.register(os.unlink, f.name) 89 | return f.name 90 | -------------------------------------------------------------------------------- /rocrate/model/computerlanguage.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | from .contextentity import ContextEntity 24 | 25 | 26 | class ComputerLanguage(ContextEntity): 27 | 28 | def _empty(self): 29 | return { 30 | "@id": self.id, 31 | "@type": 'ComputerLanguage' 32 | } 33 | 34 | @property 35 | def name(self): 36 | return self.get("name") 37 | 38 | @name.setter 39 | def name(self, name): 40 | self["name"] = name 41 | 42 | @property 43 | def alternateName(self): 44 | return self.get("alternateName") 45 | 46 | @alternateName.setter 47 | def alternateName(self, alternateName): 48 | self["alternateName"] = alternateName 49 | 50 | @property 51 | def identifier(self): 52 | return self.get("identifier") 53 | 54 | @identifier.setter 55 | def identifier(self, identifier): 56 | self["identifier"] = identifier 57 | 58 | @property 59 | def url(self): 60 | return self.get("url") 61 | 62 | @url.setter 63 | def url(self, url): 64 | self["url"] = url 65 | 66 | # Not listed as a property in "https://schema.org/ComputerLanguage" 67 | @property 68 | def version(self): 69 | return self.get("version") 70 | 71 | @version.setter 72 | def version(self, version): 73 | self["version"] = version 74 | 75 | 76 | # See https://w3id.org/workflowhub/workflow-ro-crate/1.0 77 | # (note that it does not specify "version") 78 | 79 | 80 | def cwl(crate, version=None): 81 | id_ = "https://w3id.org/workflowhub/workflow-ro-crate#cwl" 82 | identifier = "https://w3id.org/cwl/" 83 | if version: 84 | identifier = f"{identifier}v{version.lstrip('v')}/" 85 | properties = { 86 | "name": "Common Workflow Language", 87 | "alternateName": "CWL", 88 | "identifier": { 89 | "@id": identifier 90 | }, 91 | "url": { 92 | "@id": "https://www.commonwl.org/" 93 | }, 94 | } 95 | if version: 96 | properties["version"] = version 97 | return ComputerLanguage(crate, identifier=id_, properties=properties) 98 | 99 | 100 | def galaxy(crate, version=None): 101 | id_ = "https://w3id.org/workflowhub/workflow-ro-crate#galaxy" 102 | properties = { 103 | "name": "Galaxy", 104 | "identifier": { 105 | "@id": "https://galaxyproject.org/" 106 | }, 107 | "url": { 108 | "@id": "https://galaxyproject.org/" 109 | } 110 | } 111 | if version: 112 | properties["version"] = version 113 | return ComputerLanguage(crate, identifier=id_, properties=properties) 114 | 115 | 116 | def knime(crate, version=None): 117 | id_ = "https://w3id.org/workflowhub/workflow-ro-crate#knime" 118 | properties = { 119 | "name": "KNIME", 120 | "identifier": { 121 | "@id": "https://www.knime.com/" 122 | }, 123 | "url": { 124 | "@id": "https://www.knime.com/" 125 | } 126 | } 127 | if version: 128 | properties["version"] = version 129 | return ComputerLanguage(crate, identifier=id_, properties=properties) 130 | 131 | 132 | def nextflow(crate, version=None): 133 | id_ = "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" 134 | properties = { 135 | "name": "Nextflow", 136 | "identifier": { 137 | "@id": "https://www.nextflow.io/" 138 | }, 139 | "url": { 140 | "@id": "https://www.nextflow.io/" 141 | } 142 | } 143 | if version: 144 | properties["version"] = version 145 | return ComputerLanguage(crate, identifier=id_, properties=properties) 146 | 147 | 148 | def snakemake(crate, version=None): 149 | id_ = "https://w3id.org/workflowhub/workflow-ro-crate#snakemake" 150 | properties = { 151 | "name": "Snakemake", 152 | "identifier": { 153 | "@id": "https://doi.org/10.1093/bioinformatics/bts480" 154 | }, 155 | "url": { 156 | "@id": "https://snakemake.readthedocs.io" 157 | } 158 | } 159 | if version: 160 | properties["version"] = version 161 | return ComputerLanguage(crate, identifier=id_, properties=properties) 162 | 163 | 164 | def compss(crate, version=None): 165 | properties = { 166 | "name": "COMPSs Programming Model", 167 | "alternateName": "COMPSs", 168 | "url": "http://compss.bsc.es/", 169 | "citation": "https://doi.org/10.1007/s10723-013-9272-5" 170 | } 171 | if version: 172 | properties["version"] = version 173 | return ComputerLanguage(crate, identifier="#compss", properties=properties) 174 | 175 | 176 | def autosubmit(crate, version=None): 177 | properties = { 178 | "name": "Autosubmit", 179 | "alternateName": "AS", 180 | "url": "https://autosubmit.readthedocs.io/", 181 | "citation": "https://doi.org/10.1109/HPCSim.2016.7568429" 182 | } 183 | if version: 184 | properties["version"] = version 185 | return ComputerLanguage(crate, identifier="#autosubmit", properties=properties) 186 | 187 | 188 | LANG_MAP = { 189 | "cwl": cwl, 190 | "galaxy": galaxy, 191 | "knime": knime, 192 | "nextflow": nextflow, 193 | "snakemake": snakemake, 194 | "compss": compss, 195 | "autosubmit": autosubmit, 196 | } 197 | 198 | 199 | def get_lang(crate, name, version=None): 200 | try: 201 | func = LANG_MAP[name.lower()] 202 | except KeyError: 203 | raise ValueError(f"Unknown language: {name}") 204 | return func(crate, version=version) 205 | -------------------------------------------------------------------------------- /rocrate/model/contextentity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | from .entity import Entity 26 | 27 | 28 | class ContextEntity(Entity): 29 | pass 30 | -------------------------------------------------------------------------------- /rocrate/model/creativework.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | from .entity import Entity 26 | 27 | 28 | class CreativeWork(Entity): 29 | pass 30 | -------------------------------------------------------------------------------- /rocrate/model/data_entity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | 26 | from .entity import Entity 27 | 28 | 29 | class DataEntity(Entity): 30 | 31 | def write(self, base_path): 32 | pass 33 | 34 | def stream(self, chunk_size=8192): 35 | """ Stream the data from the source. Each chunk of the content is yielded as a tuple 36 | containing the name of the destination file relative to the crate and the chunk of data. 37 | The destination file name is required because a DataEntity can be a file or a 38 | collection of files (Dataset) and the caller need to know to which file a chunk belongs. 39 | For collection of files, the caller can assume that files are streamed one after another, 40 | meaning once the destination name changes, a file can be closed and the next one can be 41 | openend. 42 | """ 43 | yield from () 44 | -------------------------------------------------------------------------------- /rocrate/model/dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | import errno 26 | import os 27 | import warnings 28 | from pathlib import Path 29 | from urllib.request import urlopen 30 | from urllib.parse import unquote 31 | 32 | from .file_or_dir import FileOrDir 33 | from ..utils import is_url, iso_now, Mode 34 | 35 | 36 | class Dataset(FileOrDir): 37 | 38 | def _empty(self): 39 | val = { 40 | "@id": self.id, 41 | "@type": 'Dataset' 42 | } 43 | return val 44 | 45 | # SHOULD end with / 46 | def format_id(self, identifier): 47 | return identifier.rstrip("/") + "/" 48 | 49 | def _write_from_url(self, base_path): 50 | if self.validate_url and not self.fetch_remote: 51 | with urlopen(self.source) as _: 52 | self._jsonld['sdDatePublished'] = iso_now() 53 | if self.fetch_remote: 54 | out_file_path, out_file = None, None 55 | for rel_path, chunk in self._stream_folder_from_url(): 56 | path = base_path / rel_path 57 | if path != out_file_path: 58 | if out_file: 59 | out_file.close() 60 | out_file_path = Path(path) 61 | out_file_path.parent.mkdir(parents=True, exist_ok=True) 62 | out_file = open(out_file_path, 'wb') 63 | out_file.write(chunk) 64 | if out_file: 65 | out_file.close() 66 | 67 | def _copy_folder(self, base_path): 68 | abs_out_path = base_path / unquote(self.id) 69 | if self.source is None: 70 | abs_out_path.mkdir(parents=True, exist_ok=True) 71 | else: 72 | path = unquote(str(self.source)) 73 | if not Path(path).exists(): 74 | raise FileNotFoundError( 75 | errno.ENOENT, os.strerror(errno.ENOENT), path 76 | ) 77 | abs_out_path.mkdir(parents=True, exist_ok=True) 78 | if self.crate.mode == Mode.CREATE: 79 | self.crate._copy_unlisted(path, abs_out_path) 80 | 81 | def write(self, base_path): 82 | base_path = Path(base_path) 83 | if is_url(str(self.source)): 84 | self._write_from_url(base_path) 85 | else: 86 | self._copy_folder(base_path) 87 | 88 | def stream(self, chunk_size=8192): 89 | if self.source is None: 90 | return 91 | elif is_url(str(self.source)): 92 | yield from self._stream_folder_from_url(chunk_size) 93 | else: 94 | yield from self._stream_folder_from_path(chunk_size) 95 | 96 | def _stream_folder_from_path(self, chunk_size=8192): 97 | path = unquote(str(self.source)) 98 | if not Path(path).exists(): 99 | raise FileNotFoundError( 100 | errno.ENOENT, os.strerror(errno.ENOENT), str(path) 101 | ) 102 | if self.crate.mode == Mode.CREATE: 103 | for root, _, files in os.walk(path): 104 | root = Path(root) 105 | for name in files: 106 | source = root / name 107 | dest = source.relative_to(Path(path).parent) 108 | is_empty = True 109 | with open(source, 'rb') as f: 110 | while chunk := f.read(chunk_size): 111 | is_empty = False 112 | yield str(dest), chunk 113 | 114 | # yield once for an empty file 115 | if is_empty: 116 | yield str(dest), b"" 117 | 118 | def _stream_folder_from_url(self, chunk_size=8192): 119 | if not self.fetch_remote: 120 | if self.validate_url: 121 | with urlopen(self.source) as _: 122 | self._jsonld['sdDatePublished'] = iso_now() 123 | else: 124 | base = self.source.rstrip("/") 125 | for entry in self._jsonld.get("hasPart", []): 126 | try: 127 | part = entry["@id"] 128 | if is_url(part) or part.startswith("/"): 129 | raise RuntimeError(f"'{self.source}': part '{part}' is not a relative path") 130 | part_uri = f"{base}/{part}" 131 | rel_out_path = Path(self.id) / part 132 | 133 | is_empty = True 134 | with urlopen(part_uri) as response: 135 | while chunk := response.read(chunk_size): 136 | is_empty = False 137 | yield str(rel_out_path), chunk 138 | 139 | # yield once for an empty file 140 | if is_empty: 141 | yield str(rel_out_path), b"" 142 | except KeyError: 143 | warnings.warn(f"'hasPart' entry in {self.id} is missing '@id'. Skipping.") 144 | -------------------------------------------------------------------------------- /rocrate/model/entity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | import uuid 26 | from collections.abc import MutableMapping 27 | 28 | from dateutil.parser import isoparse 29 | from .. import vocabs 30 | 31 | 32 | class Entity(MutableMapping): 33 | 34 | def __init__(self, crate, identifier=None, properties=None): 35 | self.crate = crate 36 | if identifier: 37 | self.__id = self.format_id(identifier) 38 | else: 39 | self.__id = f"#{uuid.uuid4()}" 40 | self._jsonld = self._empty() 41 | if properties: 42 | for name, value in properties.items(): 43 | if name.startswith("@"): 44 | self._jsonld[name] = value 45 | else: 46 | self[name] = value 47 | 48 | @property 49 | def id(self): 50 | return self.__id 51 | 52 | # Format the given ID with rules appropriate for this type. 53 | # For example, Dataset (directory) data entities SHOULD end with / 54 | def format_id(self, identifier): 55 | return str(identifier) 56 | 57 | def __repr__(self): 58 | return f"<{self.id} {self.type}>" 59 | 60 | def properties(self): 61 | return self._jsonld 62 | 63 | def as_jsonld(self): 64 | return self._jsonld 65 | 66 | @property 67 | def _default_type(self): 68 | clsName = self.__class__.__name__ 69 | if clsName in vocabs.RO_CRATE["@context"]: 70 | return clsName 71 | return "Thing" 72 | 73 | def canonical_id(self): 74 | return self.crate.resolve_id(self.id) 75 | 76 | def __hash__(self): 77 | return hash(self.canonical_id()) 78 | 79 | def _empty(self): 80 | val = { 81 | "@id": self.id, 82 | "@type": self._default_type 83 | } 84 | return val 85 | 86 | def __getitem__(self, key): 87 | v = self._jsonld[key] 88 | if v is None or key.startswith("@"): 89 | return v 90 | values = v if isinstance(v, list) else [v] 91 | deref_values = [] 92 | for entry in values: 93 | if isinstance(entry, dict): 94 | try: 95 | id_ = entry["@id"] 96 | except KeyError: 97 | raise ValueError(f"no @id in {entry}") 98 | else: 99 | deref_values.append(self.crate.get(id_, id_)) 100 | else: 101 | deref_values.append(entry) 102 | return deref_values if isinstance(v, list) else deref_values[0] 103 | 104 | def __setitem__(self, key: str, value): 105 | if key.startswith("@"): 106 | raise KeyError(f"cannot set '{key}'") 107 | values = value if isinstance(value, list) else [value] 108 | for v in values: 109 | if isinstance(v, dict) and "@id" not in v: 110 | raise ValueError(f"no @id in {v}") 111 | ref_values = [{"@id": _.id} if isinstance(_, Entity) else _ for _ in values] 112 | self._jsonld[key] = ref_values if isinstance(value, list) else ref_values[0] 113 | 114 | def __delitem__(self, key: str): 115 | if key.startswith("@"): 116 | raise KeyError(f"cannot delete '{key}'") 117 | del self._jsonld[key] 118 | 119 | def popitem(self): 120 | raise NotImplementedError 121 | 122 | def clear(self): 123 | raise NotImplementedError 124 | 125 | def update(self): 126 | raise NotImplementedError 127 | 128 | def __iter__(self): 129 | return iter(self._jsonld) 130 | 131 | def __len__(self): 132 | return len(self._jsonld) 133 | 134 | def __contains__(self, key): 135 | return key in self._jsonld 136 | 137 | def __eq__(self, other): 138 | if not isinstance(other, Entity): 139 | return NotImplemented 140 | return self.id == other.id and self._jsonld == other._jsonld 141 | 142 | @property 143 | def type(self): 144 | return self._jsonld['@type'] 145 | 146 | @property 147 | def datePublished(self): 148 | d = self.get('datePublished') 149 | return d if not d else isoparse(d) 150 | 151 | @datePublished.setter 152 | def datePublished(self, value): 153 | try: 154 | value = value.isoformat() 155 | except AttributeError: 156 | pass 157 | self['datePublished'] = value 158 | 159 | def delete(self): 160 | self.crate.delete(self) 161 | 162 | def append_to(self, key: str, value, compact=False): 163 | if key.startswith("@"): 164 | raise KeyError(f"cannot append to '{key}'") 165 | current_value = self._jsonld.setdefault(key, []) 166 | if not isinstance(current_value, list): 167 | current_value = self._jsonld[key] = [current_value] 168 | if not isinstance(value, list): 169 | value = [value] 170 | current_value.extend([{"@id": _.id} if isinstance(_, Entity) else _ for _ in value]) 171 | if compact and len(current_value) == 1: 172 | self._jsonld[key] = current_value[0] 173 | -------------------------------------------------------------------------------- /rocrate/model/file.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | from pathlib import Path 26 | import requests 27 | import shutil 28 | import urllib.request 29 | import warnings 30 | from io import BytesIO, StringIO 31 | from urllib.parse import unquote 32 | 33 | from .file_or_dir import FileOrDir 34 | from ..utils import is_url, iso_now 35 | 36 | 37 | class File(FileOrDir): 38 | 39 | def _empty(self): 40 | val = { 41 | "@id": self.id, 42 | "@type": 'File' 43 | } 44 | return val 45 | 46 | def _has_writeable_stream(self): 47 | if isinstance(self.source, (BytesIO, StringIO)): 48 | return True 49 | elif is_url(str(self.source)): 50 | return self.fetch_remote 51 | else: 52 | return self.source is not None 53 | 54 | def _write_from_stream(self, out_file_path): 55 | if not self._has_writeable_stream(): 56 | # is this does not correspond to a writeable stream (i.e. it is a url but fetch_remote is False), 57 | # we still want to consume the stream to consume file headers, run the size calculation, etc. 58 | all(self.stream()) 59 | return 60 | 61 | out_file_path.parent.mkdir(parents=True, exist_ok=True) 62 | with open(out_file_path, 'wb') as out_file: 63 | for _, chunk in self.stream(): 64 | out_file.write(chunk) 65 | 66 | def _copy_file(self, path, out_file_path): 67 | path = unquote(str(path)) 68 | out_file_path.parent.mkdir(parents=True, exist_ok=True) 69 | if not out_file_path.exists() or not out_file_path.samefile(path): 70 | shutil.copy(path, out_file_path) 71 | if self.record_size: 72 | self._jsonld['contentSize'] = str(out_file_path.stat().st_size) 73 | 74 | def write(self, base_path): 75 | out_file_path = Path(base_path) / unquote(self.id) 76 | if isinstance(self.source, (BytesIO, StringIO)) or is_url(str(self.source)): 77 | self._write_from_stream(out_file_path) 78 | elif self.source is None: 79 | # Allows to record a File entity whose @id does not exist, see #73 80 | warnings.warn(f"No source for {self.id}") 81 | else: 82 | self._copy_file(self.source, out_file_path) 83 | 84 | def _stream_from_stream(self, stream): 85 | size = 0 86 | read = stream.read() 87 | if isinstance(self.source, StringIO): 88 | read = read.encode('utf-8') 89 | while len(read) > 0: 90 | yield self.id, read 91 | size += len(read) 92 | read = stream.read() 93 | if isinstance(self.source, StringIO): 94 | read = read.encode('utf-8') 95 | 96 | if self.record_size: 97 | self._jsonld['contentSize'] = str(size) 98 | 99 | def _stream_from_url(self, url, chunk_size=8192): 100 | if self.fetch_remote or self.validate_url: 101 | if self.validate_url: 102 | if url.startswith("http"): 103 | with requests.head(url) as response: 104 | self._jsonld.update({ 105 | 'contentSize': response.headers.get('Content-Length'), 106 | 'encodingFormat': response.headers.get('Content-Type') 107 | }) 108 | if not self.fetch_remote: 109 | date_published = response.headers.get("Last-Modified", iso_now()) 110 | self._jsonld['sdDatePublished'] = date_published 111 | if self.fetch_remote: 112 | size = 0 113 | self._jsonld['contentUrl'] = str(url) 114 | with urllib.request.urlopen(url) as response: 115 | while chunk := response.read(chunk_size): 116 | yield self.id, chunk 117 | size += len(chunk) 118 | 119 | # yield once for an empty file 120 | if size == 0: 121 | yield self.id, b"" 122 | 123 | if self.record_size: 124 | self._jsonld['contentSize'] = str(size) 125 | 126 | def _stream_from_file(self, path, chunk_size=8192): 127 | path = unquote(str(path)) 128 | size = 0 129 | with open(path, 'rb') as f: 130 | while chunk := f.read(chunk_size): 131 | yield self.id, chunk 132 | size += len(chunk) 133 | 134 | # yield once for an empty file 135 | if size == 0: 136 | yield self.id, b"" 137 | 138 | if self.record_size: 139 | self._jsonld['contentSize'] = str(size) 140 | 141 | def stream(self, chunk_size=8192): 142 | if isinstance(self.source, (BytesIO, StringIO)): 143 | yield from self._stream_from_stream(self.source) 144 | elif is_url(str(self.source)): 145 | yield from self._stream_from_url(self.source, chunk_size) 146 | elif self.source is None: 147 | # Allows to record a File entity whose @id does not exist, see #73 148 | warnings.warn(f"No source for {self.id}") 149 | else: 150 | yield from self._stream_from_file(self.source, chunk_size) 151 | -------------------------------------------------------------------------------- /rocrate/model/file_or_dir.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | import os 26 | from pathlib import Path 27 | from urllib.parse import quote 28 | 29 | from .data_entity import DataEntity 30 | from ..utils import is_url, Mode 31 | 32 | 33 | class FileOrDir(DataEntity): 34 | 35 | def __init__(self, crate, source=None, dest_path=None, fetch_remote=False, 36 | validate_url=False, properties=None, record_size=False): 37 | if properties is None: 38 | properties = {} 39 | self.fetch_remote = fetch_remote 40 | self.validate_url = validate_url 41 | self.record_size = record_size 42 | self.source = source 43 | if dest_path: 44 | dest_path = Path(dest_path) 45 | if dest_path.is_absolute(): 46 | raise ValueError("if provided, dest_path must be relative") 47 | identifier = dest_path.as_posix() 48 | if not crate.mode == Mode.READ: 49 | identifier = quote(identifier) 50 | else: 51 | if not isinstance(source, (str, Path)): 52 | raise ValueError("dest_path must be provided if source is not a path or URI") 53 | if is_url(str(source)): 54 | identifier = os.path.basename(source) if fetch_remote else source 55 | else: 56 | identifier = os.path.basename(str(source).rstrip("/")) 57 | if not crate.mode == Mode.READ: 58 | identifier = quote(identifier) 59 | super().__init__(crate, identifier, properties) 60 | -------------------------------------------------------------------------------- /rocrate/model/metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | import json 26 | from pathlib import Path 27 | 28 | from .file import File 29 | from .dataset import Dataset 30 | 31 | 32 | WORKFLOW_PROFILE = "https://w3id.org/workflowhub/workflow-ro-crate/1.0" 33 | 34 | 35 | class Metadata(File): 36 | """\ 37 | RO-Crate metadata file. 38 | """ 39 | BASENAME = "ro-crate-metadata.json" 40 | PROFILE = "https://w3id.org/ro/crate/1.1" 41 | 42 | def __init__(self, crate, source=None, dest_path=None, properties=None): 43 | if source is None and dest_path is None: 44 | dest_path = self.BASENAME 45 | super().__init__( 46 | crate, 47 | source=source, 48 | dest_path=dest_path, 49 | fetch_remote=False, 50 | validate_url=False, 51 | properties=properties 52 | ) 53 | # https://www.researchobject.org/ro-crate/1.1/appendix/jsonld.html#extending-ro-crate 54 | self.extra_contexts = [] 55 | self.extra_terms = {} 56 | 57 | def _empty(self): 58 | # default properties of the metadata entry 59 | val = {"@id": self.id, 60 | "@type": "CreativeWork", 61 | "conformsTo": {"@id": self.PROFILE}, 62 | "about": {"@id": "./"}} 63 | return val 64 | 65 | # Generate the crate's `ro-crate-metadata.json`. 66 | # @return [String] The rendered JSON-LD as a "prettified" string. 67 | def generate(self): 68 | graph = [] 69 | for entity in self.crate.get_entities(): 70 | graph.append(entity.properties()) 71 | context = [f'{self.PROFILE}/context'] 72 | context.extend(self.extra_contexts) 73 | if self.extra_terms: 74 | context.append(self.extra_terms) 75 | if len(context) == 1: 76 | context = context[0] 77 | return {'@context': context, '@graph': graph} 78 | 79 | def stream(self, chunk_size=8192): 80 | content = self.generate() 81 | yield self.id, str.encode(json.dumps(content, indent=4, sort_keys=True), encoding='utf-8') 82 | 83 | def _has_writeable_stream(self): 84 | return True 85 | 86 | def write(self, dest_base): 87 | write_path = Path(dest_base) / self.id 88 | super()._write_from_stream(write_path) 89 | 90 | @property 91 | def root(self) -> Dataset: 92 | return self.crate.root_dataset 93 | 94 | 95 | class LegacyMetadata(Metadata): 96 | 97 | BASENAME = "ro-crate-metadata.jsonld" 98 | PROFILE = "https://w3id.org/ro/crate/1.0" 99 | 100 | 101 | # https://github.com/ResearchObject/ro-terms/tree/master/test 102 | TESTING_EXTRA_TERMS = { 103 | "TestSuite": "https://w3id.org/ro/terms/test#TestSuite", 104 | "TestInstance": "https://w3id.org/ro/terms/test#TestInstance", 105 | "TestService": "https://w3id.org/ro/terms/test#TestService", 106 | "TestDefinition": "https://w3id.org/ro/terms/test#TestDefinition", 107 | "PlanemoEngine": "https://w3id.org/ro/terms/test#PlanemoEngine", 108 | "JenkinsService": "https://w3id.org/ro/terms/test#JenkinsService", 109 | "TravisService": "https://w3id.org/ro/terms/test#TravisService", 110 | "GithubService": "https://w3id.org/ro/terms/test#GithubService", 111 | "instance": "https://w3id.org/ro/terms/test#instance", 112 | "runsOn": "https://w3id.org/ro/terms/test#runsOn", 113 | "resource": "https://w3id.org/ro/terms/test#resource", 114 | "definition": "https://w3id.org/ro/terms/test#definition", 115 | "engineVersion": "https://w3id.org/ro/terms/test#engineVersion" 116 | } 117 | 118 | 119 | def metadata_class(descriptor_id): 120 | basename = descriptor_id.rsplit("/", 1)[-1] 121 | if basename == Metadata.BASENAME: 122 | return Metadata 123 | elif basename == LegacyMetadata.BASENAME: 124 | return LegacyMetadata 125 | else: 126 | raise ValueError(f"Invalid metadata descriptor ID: {descriptor_id!r}") 127 | -------------------------------------------------------------------------------- /rocrate/model/person.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | from .contextentity import ContextEntity 26 | 27 | 28 | class Person(ContextEntity): 29 | 30 | def __init__(self, crate, identifier=None, properties=None): 31 | super(Person, self).__init__(crate, identifier, properties) 32 | 33 | def _empty(self): 34 | val = { 35 | "@id": self.id, 36 | "@type": 'Person' 37 | } 38 | return val 39 | -------------------------------------------------------------------------------- /rocrate/model/preview.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | import os 26 | from pathlib import Path 27 | 28 | from jinja2 import Template 29 | from .file import File 30 | 31 | 32 | class Preview(File): 33 | """ 34 | RO-Crate preview file 35 | 36 | This object holds a preview of an RO Crate in HTML format_ 37 | """ 38 | BASENAME = "ro-crate-preview.html" 39 | 40 | def __init__(self, crate, source=None, properties=None): 41 | super().__init__(crate, source, self.BASENAME, properties=properties) 42 | 43 | def _empty(self): 44 | # default properties of the metadata entry 45 | val = { 46 | "@id": self.BASENAME, 47 | "@type": "CreativeWork", 48 | "about": {"@id": "./"} 49 | } 50 | return val 51 | 52 | def generate_html(self): 53 | base_path = os.path.abspath(os.path.dirname(__file__)) 54 | template = open( 55 | os.path.join(base_path, '..', 'templates', 'preview_template.html.j2'), 56 | 'r', encoding='utf-8' 57 | ) 58 | src = Template(template.read()) 59 | 60 | def template_function(func): 61 | src.globals[func.__name__] = func 62 | return func 63 | 64 | @template_function 65 | def stringify(a): 66 | if type(a) is list: 67 | return ', '.join(a) 68 | elif type(a) is str: 69 | return a 70 | else: 71 | if a._jsonld and a._jsonld['name']: 72 | return a._jsonld['name'] 73 | else: 74 | return str(a) 75 | 76 | @template_function 77 | def is_object_list(a): 78 | if type(a) is list: 79 | for obj in a: 80 | if obj is not str: 81 | return True 82 | return False 83 | 84 | template.close() 85 | context_entities = [] 86 | data_entities = [] 87 | for entity in self.crate.contextual_entities: 88 | context_entities.append(entity._jsonld) 89 | for entity in self.crate.data_entities: 90 | data_entities.append(entity._jsonld) 91 | out_html = src.render(crate=self.crate, context=context_entities, data=data_entities) 92 | return out_html 93 | 94 | def stream(self, chunk_size=8192): 95 | if self.source: 96 | yield from super().stream() 97 | else: 98 | yield self.id, str.encode(self.generate_html(), encoding='utf-8') 99 | 100 | def _has_writeable_stream(self): 101 | return True 102 | 103 | def write(self, dest_base): 104 | write_path = Path(dest_base) / self.id 105 | super()._write_from_stream(write_path) 106 | -------------------------------------------------------------------------------- /rocrate/model/root_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | from .dataset import Dataset 26 | from ..utils import iso_now 27 | 28 | 29 | class RootDataset(Dataset): 30 | 31 | def __init__(self, crate, source=None, dest_path=None, properties=None): 32 | if source is None and dest_path is None: 33 | dest_path = "./" 34 | super().__init__( 35 | crate, 36 | source=source, 37 | dest_path=dest_path, 38 | fetch_remote=False, 39 | validate_url=False, 40 | properties=properties 41 | ) 42 | 43 | def _empty(self): 44 | val = { 45 | "@id": self.id, 46 | "@type": "Dataset", 47 | "datePublished": iso_now(), 48 | } 49 | return val 50 | -------------------------------------------------------------------------------- /rocrate/model/softwareapplication.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | from .contextentity import ContextEntity 24 | from .creativework import CreativeWork 25 | 26 | 27 | class SoftwareApplication(ContextEntity, CreativeWork): 28 | 29 | def _empty(self): 30 | return { 31 | "@id": self.id, 32 | "@type": 'SoftwareApplication' 33 | } 34 | 35 | @property 36 | def name(self): 37 | return self.get("name") 38 | 39 | @name.setter 40 | def name(self, name): 41 | self["name"] = name 42 | 43 | @property 44 | def url(self): 45 | return self.get("url") 46 | 47 | @url.setter 48 | def url(self, url): 49 | self["url"] = url 50 | 51 | @property 52 | def version(self): 53 | return self.get("version") 54 | 55 | @version.setter 56 | def version(self, version): 57 | self["version"] = version 58 | 59 | 60 | PLANEMO_ID = "https://w3id.org/ro/terms/test#PlanemoEngine" 61 | 62 | 63 | def planemo(crate): 64 | return SoftwareApplication(crate, identifier=PLANEMO_ID, properties={ 65 | "name": "Planemo", 66 | "url": { 67 | "@id": "https://github.com/galaxyproject/planemo" 68 | } 69 | }) 70 | 71 | 72 | APP_MAP = { 73 | "planemo": planemo, 74 | } 75 | 76 | 77 | def get_app(crate, name): 78 | try: 79 | func = APP_MAP[name.lower()] 80 | except KeyError: 81 | raise ValueError(f"Unknown application: {name}") 82 | return func(crate) 83 | -------------------------------------------------------------------------------- /rocrate/model/testdefinition.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | from .file import File 24 | 25 | 26 | class TestDefinition(File): 27 | 28 | def _empty(self): 29 | return { 30 | "@id": self.id, 31 | "@type": ['File', 'TestDefinition'] 32 | } 33 | 34 | @property 35 | def _default_type(self): 36 | return "TestDefinition" 37 | 38 | @property 39 | def engineVersion(self): 40 | return self.get("engineVersion") 41 | 42 | @engineVersion.setter 43 | def engineVersion(self, engineVersion): 44 | self["engineVersion"] = engineVersion 45 | 46 | @property 47 | def conformsTo(self): 48 | return self.get("conformsTo") 49 | 50 | @conformsTo.setter 51 | def conformsTo(self, conformsTo): 52 | self["conformsTo"] = conformsTo 53 | 54 | engine = conformsTo 55 | -------------------------------------------------------------------------------- /rocrate/model/testinstance.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | from .contextentity import ContextEntity 24 | 25 | 26 | class TestInstance(ContextEntity): 27 | 28 | def _empty(self): 29 | return { 30 | "@id": self.id, 31 | "@type": 'TestInstance' 32 | } 33 | 34 | @property 35 | def _default_type(self): 36 | return "TestInstance" 37 | 38 | @property 39 | def name(self): 40 | return self.get("name") 41 | 42 | @name.setter 43 | def name(self, name): 44 | self["name"] = name 45 | 46 | @property 47 | def resource(self): 48 | return self.get("resource") 49 | 50 | @resource.setter 51 | def resource(self, resource): 52 | self["resource"] = resource 53 | 54 | @property 55 | def runsOn(self): 56 | return self.get("runsOn") 57 | 58 | @runsOn.setter 59 | def runsOn(self, runsOn): 60 | self["runsOn"] = runsOn 61 | 62 | @property 63 | def url(self): 64 | return self.get("url") 65 | 66 | @url.setter 67 | def url(self, url): 68 | self["url"] = url 69 | 70 | service = runsOn 71 | -------------------------------------------------------------------------------- /rocrate/model/testservice.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | from .contextentity import ContextEntity 24 | 25 | 26 | class TestService(ContextEntity): 27 | 28 | def _empty(self): 29 | return { 30 | "@id": self.id, 31 | "@type": 'TestService' 32 | } 33 | 34 | @property 35 | def _default_type(self): 36 | return "TestService" 37 | 38 | @property 39 | def name(self): 40 | return self.get("name") 41 | 42 | @name.setter 43 | def name(self, name): 44 | self["name"] = name 45 | 46 | @property 47 | def url(self): 48 | return self.get("url") 49 | 50 | @url.setter 51 | def url(self, url): 52 | self["url"] = url 53 | 54 | 55 | JENKINS_ID = "https://w3id.org/ro/terms/test#JenkinsService" 56 | TRAVIS_ID = "https://w3id.org/ro/terms/test#TravisService" 57 | GITHUB_ID = "https://w3id.org/ro/terms/test#GithubService" 58 | 59 | 60 | def jenkins(crate): 61 | return TestService(crate, identifier=JENKINS_ID, properties={ 62 | "name": "Jenkins", 63 | "url": { 64 | "@id": "https://www.jenkins.io" 65 | }, 66 | }) 67 | 68 | 69 | def travis(crate): 70 | return TestService(crate, identifier=TRAVIS_ID, properties={ 71 | "name": "Travis CI", 72 | "url": { 73 | "@id": "https://www.travis-ci.com" 74 | }, 75 | }) 76 | 77 | 78 | def github(crate): 79 | return TestService(crate, identifier=GITHUB_ID, properties={ 80 | "name": "Github Actions", 81 | "url": { 82 | "@id": "https://github.com" 83 | }, 84 | }) 85 | 86 | 87 | SERVICE_MAP = { 88 | "jenkins": jenkins, 89 | "travis": travis, 90 | "github": github, 91 | } 92 | 93 | 94 | def get_service(crate, name): 95 | try: 96 | func = SERVICE_MAP[name.lower()] 97 | except KeyError: 98 | raise ValueError(f"Unknown service: {name}") 99 | return func(crate) 100 | -------------------------------------------------------------------------------- /rocrate/model/testsuite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | from .contextentity import ContextEntity 24 | 25 | 26 | class TestSuite(ContextEntity): 27 | 28 | def _empty(self): 29 | return { 30 | "@id": self.id, 31 | "@type": 'TestSuite' 32 | } 33 | 34 | @property 35 | def _default_type(self): 36 | return "TestSuite" 37 | 38 | @property 39 | def name(self): 40 | return self.get("name") 41 | 42 | @name.setter 43 | def name(self, name): 44 | self["name"] = name 45 | 46 | @property 47 | def instance(self): 48 | return self.get("instance") 49 | 50 | @instance.setter 51 | def instance(self, instance): 52 | self["instance"] = instance 53 | 54 | @property 55 | def definition(self): 56 | return self.get("definition") 57 | 58 | @definition.setter 59 | def definition(self, definition): 60 | self["definition"] = definition 61 | -------------------------------------------------------------------------------- /rocrate/templates/preview_template.html.j2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {{ crate.name or "New RO Crate" }} 5 | 6 | 59 | 60 | 61 |
62 | 104 |

{{ crate.name or "New RO Crate" }}

105 |

106 | {% if crate.description %} 107 | {{ crate.description }} 108 | {%endif %} 109 |

110 | 111 | {% if crate.image %} 112 | 113 | {%endif %} 114 | 115 |
116 | {% if crate.creator %} 117 | {% if is_object_list(crate.creator) %} 118 |
Creators
119 | {% for obj in crate.creator %} 120 |
{{ stringify(obj) }}
121 | {% endfor %} 122 | {% else %} 123 |
Creator
124 |
{{ stringify(crate.creator) }}
125 | {%endif %} 126 | {%endif %} 127 | 128 | {% if crate.publisher %} 129 | {% if is_object_list(crate.publisher) %} 130 |
Publishers
131 | {% for obj in crate.publisher %} 132 |
{{ stringify(obj) }}
133 | {% endfor %} 134 | {% else %} 135 |
Publisher
136 |
{{ stringify(crate.publisher) }}
137 | {%endif %} 138 | {%endif %} 139 | 140 | {% if crate.url %} 141 |
URL
142 |
143 | {%endif %} 144 | 145 | {% if crate.license %} 146 |
License
147 |
{{ crate.license }}
148 | {%endif %} 149 | 150 | {% if crate.keywords %} 151 |
Keyword(s)
152 |
{{ stringify(crate.keywords) }}
153 | {%endif %} 154 | 155 | {% if crate.isBasedOn %} 156 |
isBasedOn
157 |
{{ crate.isBasedOn }}
158 | {%endif %} 159 | 160 | {% if crate.datePublished %} 161 |
datePublished
162 |
{{ crate.datePublished }}
163 | {%endif %} 164 | 165 | {% if crate.creativeWorkStatus %} 166 |
creativeWorkStatus
167 |
{{ crate.creativeWorkStatus }}
168 | {%endif %} 169 |
170 | 171 |

Contents

172 |
173 | {% for entry in data %} 174 |
175 | Data entity 176 | {{ entry['@id'] }} 177 |

Type: {{ stringify(entry['@type']) }}

178 | {% if entry['programmingLanguage'] %} 179 |

ProgrammingLanguage: {{ entry['programmingLanguage']['@id'] }}

180 | {% endif %} 181 |
182 | {% endfor %} 183 |
184 |
185 | 186 | 187 | -------------------------------------------------------------------------------- /rocrate/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | from enum import Enum 26 | import os 27 | from datetime import datetime, timezone 28 | from urllib.parse import urlsplit 29 | 30 | 31 | def as_list(value): 32 | if isinstance(value, list): 33 | return value 34 | return [value] 35 | 36 | 37 | def is_url(string): 38 | parts = urlsplit(string) 39 | if os.name == "nt" and len(parts.scheme) == 1: 40 | return False 41 | return bool(parts.scheme) 42 | 43 | 44 | def iso_now(): 45 | return datetime.now(timezone.utc).replace(microsecond=0).isoformat() 46 | 47 | 48 | def subclasses(cls): 49 | """\ 50 | Recursively iterate through all subclasses (direct and indirect) of cls. 51 | 52 | Subclasses appear before their parent classes, but ordering is otherwise 53 | undefined. For instance, if Cat and Dog are subclasses of Pet and Beagle 54 | is a subclass of Dog, then Beagle will appear before Dog. 55 | """ 56 | direct = cls.__subclasses__() 57 | for d in direct: 58 | for c in subclasses(d): 59 | yield c 60 | yield d 61 | 62 | 63 | def get_norm_value(json_entity, prop): 64 | """\ 65 | Get a normalized value for a property (always as a list of strings). 66 | """ 67 | value = as_list(json_entity.get(prop, [])) 68 | try: 69 | return [_ if isinstance(_, str) else _["@id"] for _ in value] 70 | except (TypeError, KeyError): 71 | raise ValueError(f"Malformed value for {prop!r}: {json_entity.get(prop)!r}") 72 | 73 | 74 | def walk(top, topdown=True, onerror=None, followlinks=False, exclude=None): 75 | exclude = frozenset(exclude or []) 76 | for root, dirs, files in os.walk(top): 77 | if exclude: 78 | dirs[:] = [_ for _ in dirs if _ not in exclude] 79 | files[:] = [_ for _ in files if _ not in exclude] 80 | yield root, dirs, files 81 | 82 | 83 | class Mode(Enum): 84 | READ = 1 85 | INIT = 2 86 | CREATE = 3 87 | -------------------------------------------------------------------------------- /rocrate/vocabs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | import sys 26 | import json 27 | if sys.version_info.minor < 9: 28 | import pkg_resources 29 | else: 30 | import importlib.resources 31 | 32 | # FIXME: Avoid eager loading? 33 | if sys.version_info.minor < 9: 34 | RO_CRATE = json.loads(pkg_resources.resource_string( 35 | __name__, "data/ro-crate.jsonld" 36 | )) 37 | SCHEMA = json.loads(pkg_resources.resource_string( 38 | __name__, "data/schema.jsonld" 39 | )) 40 | else: 41 | RO_CRATE = json.loads( 42 | importlib.resources.files(__package__).joinpath("data/ro-crate.jsonld").read_text("utf8") 43 | ) 44 | SCHEMA = json.loads( 45 | importlib.resources.files(__package__).joinpath("data/schema.jsonld").read_text("utf8") 46 | ) 47 | SCHEMA_MAP = dict((e["@id"], e) for e in SCHEMA["@graph"]) 48 | 49 | 50 | def term_to_uri(name): 51 | # NOTE: Assumes RO-Crate's flat-style context 52 | return RO_CRATE["@context"][name] 53 | 54 | 55 | def schema_doc(uri): 56 | # NOTE: Ensure rdfs:comment still appears in newer schema.org downloads 57 | # TODO: Support terms outside schema.org? 58 | return SCHEMA_MAP[uri].get("rdfs:comment", "") 59 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E226,E741,E402,E129,W503,W504 3 | max-line-length = 127 4 | 5 | [tool:pytest] 6 | markers = 7 | slow: marks tests as slow 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright 2019-2025 The University of Manchester, UK 4 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 5 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 6 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 7 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 8 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 9 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 10 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 11 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 12 | # 13 | # Licensed under the Apache License, Version 2.0 (the "License"); 14 | # you may not use this file except in compliance with the License. 15 | # You may obtain a copy of the License at 16 | # 17 | # http://www.apache.org/licenses/LICENSE-2.0 18 | # 19 | # Unless required by applicable law or agreed to in writing, software 20 | # distributed under the License is distributed on an "AS IS" BASIS, 21 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | # See the License for the specific language governing permissions and 23 | # limitations under the License. 24 | 25 | from setuptools import setup, find_packages 26 | from codecs import open 27 | from os import path 28 | import re 29 | 30 | # https://www.python.org/dev/peps/pep-0440/#appendix-b-parsing-version-strings-with-regular-expressions # noqa 31 | PEP440_PATTERN = r"([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?" # noqa 32 | 33 | 34 | here = path.abspath(path.dirname(__file__)) 35 | 36 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 37 | long_description = f.read() 38 | 39 | with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f: 40 | required = f.read().splitlines() 41 | 42 | with open(path.join(here, 'rocrate', '_version.py'), encoding='utf-8') as f: 43 | # "parse" rocrate/_version.py which MUST have this pattern 44 | # __version__ = "0.1.1" 45 | # see https://www.python.org/dev/peps/pep-0440 46 | v = f.read().strip() 47 | m = re.match(r'^__version__ = "(' + PEP440_PATTERN + ')"$', v) 48 | if not m: 49 | msg = ('rocrate/_version.py did not match pattern ' 50 | '__version__ = "0.1.2" (see PEP440):\n') + v 51 | raise Exception(msg) 52 | __version__ = m.group(1) 53 | 54 | 55 | setup( 56 | name='rocrate', 57 | packages=find_packages(exclude=['contrib', 'docs', 'tests']), 58 | version=__version__, # update in rocrate/_version.py 59 | description='RO-Crate metadata generator/parser', 60 | long_description_content_type='text/markdown', 61 | long_description=long_description, 62 | author=", ".join(( 63 | 'Daniel Bauer', 64 | 'Eli Chadwick', 65 | 'Paul De Geest', 66 | 'Bert Droesbeke', 67 | 'Ignacio Eguinoa', 68 | 'Alban Gaignard', 69 | 'Matthias Hörtenhuber', 70 | 'Sebastiaan Huber', 71 | 'Bruno Kinoshita', 72 | 'Simone Leo', 73 | 'Luca Pireddu', 74 | 'Laura Rodríguez-Navas', 75 | 'Raül Sirvent', 76 | 'Stian Soiland-Reyes', 77 | 'Laurent Thomas' 78 | )), 79 | python_requires='>=3.9', 80 | author_email='stain@apache.org', 81 | package_data={'': ['data/*.jsonld', 'templates/*.j2']}, 82 | # SPDX, pending https://github.com/pombredanne/spdx-pypi-pep/pull/2 83 | license="Apache-2.0", 84 | url='https://github.com/ResearchObject/ro-crate-py/', 85 | download_url=('https://github.com/researchobject/ro-crate-py/archive/' 86 | f'{__version__}.tar.gz'), 87 | keywords="researchobject ro-crate ro metadata jsonld", 88 | install_requires=[required], 89 | extras_require={ 90 | 'ga2cwl': ['galaxy2cwl'], 91 | }, 92 | classifiers=[ 93 | 'Operating System :: OS Independent', 94 | 'Development Status :: 3 - Alpha', 95 | 'Intended Audience :: Developers', 96 | 'Intended Audience :: Information Technology', 97 | 'Topic :: Software Development :: Libraries', 98 | 'Programming Language :: Python :: 3', 99 | 'Programming Language :: Python :: 3.9', 100 | 'Programming Language :: Python :: 3.10', 101 | 'Programming Language :: Python :: 3.11', 102 | 'Programming Language :: Python :: 3.12', 103 | 'Topic :: Internet', 104 | 'Topic :: Internet :: WWW/HTTP', 105 | 'Topic :: System :: Archiving', 106 | 'Topic :: System :: Archiving :: Packaging', 107 | ], 108 | entry_points={ 109 | "console_scripts": ["rocrate=rocrate.cli:cli"], 110 | }, 111 | ) 112 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ResearchObject/ro-crate-py/c9ccbd86bcbc3f1429d2a45881e3ec248adf2284/test/__init__.py -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | import json 24 | import pathlib 25 | import shutil 26 | 27 | import pytest 28 | from rocrate.utils import get_norm_value 29 | 30 | 31 | THIS_DIR = pathlib.Path(__file__).absolute().parent 32 | TEST_DATA_NAME = 'test-data' 33 | BASE_URL = 'https://w3id.org/ro/crate' 34 | VERSION = '1.1' 35 | LEGACY_VERSION = '1.0' 36 | 37 | 38 | class Helpers: 39 | 40 | PROFILE = f"{BASE_URL}/{VERSION}" 41 | LEGACY_PROFILE = f"{BASE_URL}/{LEGACY_VERSION}" 42 | WORKFLOW_PROFILE = "https://w3id.org/workflowhub/workflow-ro-crate/1.0" 43 | METADATA_FILE_NAME = 'ro-crate-metadata.json' 44 | LEGACY_METADATA_FILE_NAME = 'ro-crate-metadata.jsonld' 45 | WORKFLOW_TYPES = {"File", "SoftwareSourceCode", "ComputationalWorkflow"} 46 | WORKFLOW_DESC_TYPES = {"File", "SoftwareSourceCode", "HowTo"} 47 | LEGACY_WORKFLOW_TYPES = {"File", "SoftwareSourceCode", "Workflow"} 48 | PREVIEW_FILE_NAME = "ro-crate-preview.html" 49 | 50 | @classmethod 51 | def read_json_entities(cls, crate_base_path): 52 | metadata_path = pathlib.Path(crate_base_path) / cls.METADATA_FILE_NAME 53 | with open(metadata_path, "rt") as f: 54 | json_data = json.load(f) 55 | return {_["@id"]: _ for _ in json_data["@graph"]} 56 | 57 | @classmethod 58 | def check_crate(cls, json_entities, root_id="./", data_entity_ids=None): 59 | assert root_id in json_entities 60 | root = json_entities[root_id] 61 | assert root["@type"] == "Dataset" 62 | assert cls.METADATA_FILE_NAME in json_entities 63 | metadata = json_entities[cls.METADATA_FILE_NAME] 64 | assert metadata["@type"] == "CreativeWork" 65 | assert cls.PROFILE in get_norm_value(metadata, "conformsTo") 66 | assert metadata["about"] == {"@id": root_id} 67 | if data_entity_ids: 68 | data_entity_ids = set(data_entity_ids) 69 | assert data_entity_ids.issubset(json_entities) 70 | assert "hasPart" in root 71 | assert data_entity_ids.issubset([_["@id"] for _ in root["hasPart"]]) 72 | 73 | @classmethod 74 | def check_wf_crate(cls, json_entities, wf_file_name, root_id="./"): 75 | cls.check_crate(json_entities, root_id=root_id) 76 | assert json_entities[root_id]["mainEntity"]["@id"] == wf_file_name 77 | assert wf_file_name in json_entities 78 | wf_entity = json_entities[wf_file_name] 79 | assert isinstance(wf_entity["@type"], list) 80 | assert cls.WORKFLOW_TYPES.issubset(wf_entity["@type"]) 81 | assert "programmingLanguage" in wf_entity 82 | metadata = json_entities[cls.METADATA_FILE_NAME] 83 | assert cls.WORKFLOW_PROFILE in get_norm_value(metadata, "conformsTo") 84 | 85 | 86 | @pytest.fixture 87 | def helpers(): 88 | return Helpers 89 | 90 | 91 | # pytest's default tmpdir returns a py.path object 92 | @pytest.fixture 93 | def tmpdir(tmpdir): 94 | return pathlib.Path(tmpdir) 95 | 96 | 97 | @pytest.fixture 98 | def test_data_dir(tmpdir): 99 | d = tmpdir / TEST_DATA_NAME 100 | shutil.copytree(THIS_DIR / TEST_DATA_NAME, d) 101 | return d 102 | -------------------------------------------------------------------------------- /test/test-data/empty_file_crate/empty.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ResearchObject/ro-crate-py/c9ccbd86bcbc3f1429d2a45881e3ec248adf2284/test/test-data/empty_file_crate/empty.txt -------------------------------------------------------------------------------- /test/test-data/empty_file_crate/folder/empty_not_listed.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ResearchObject/ro-crate-py/c9ccbd86bcbc3f1429d2a45881e3ec248adf2284/test/test-data/empty_file_crate/folder/empty_not_listed.txt -------------------------------------------------------------------------------- /test/test-data/read_crate/a b/c d.txt: -------------------------------------------------------------------------------- 1 | C D 2 | -------------------------------------------------------------------------------- /test/test-data/read_crate/abstract_wf.cwl: -------------------------------------------------------------------------------- 1 | class: Workflow 2 | cwlVersion: v1.2.0-dev2 3 | doc: 'Abstract CWL Automatically generated from the Galaxy workflow file: COVID-19: 4 | PE Variation' 5 | inputs: 6 | 'GenBank file ': 7 | format: data 8 | type: File 9 | Paired Collection (fastqsanger): 10 | format: data 11 | type: File 12 | outputs: {} 13 | steps: 14 | 10_Realign reads: 15 | in: 16 | reads: 8_MarkDuplicates/outFile 17 | reference_source|ref: 2_SnpEff build/output_fasta 18 | out: 19 | - realigned 20 | run: 21 | class: Operation 22 | id: toolshed_g2_bx_psu_edu_repos_iuc_lofreq_viterbi_lofreq_viterbi_2_1_3_1+galaxy1 23 | inputs: 24 | reads: 25 | format: Any 26 | type: File 27 | reference_source|ref: 28 | format: Any 29 | type: File 30 | outputs: 31 | realigned: 32 | doc: bam 33 | type: File 34 | 11_MultiQC: 35 | in: 36 | results_0|software_cond|output_0|input: 8_MarkDuplicates/metrics_file 37 | out: 38 | - stats 39 | - plots 40 | - html_report 41 | run: 42 | class: Operation 43 | id: toolshed_g2_bx_psu_edu_repos_iuc_multiqc_multiqc_1_7_1 44 | inputs: 45 | results_0|software_cond|output_0|input: 46 | format: Any 47 | type: File 48 | outputs: 49 | html_report: 50 | doc: html 51 | type: File 52 | plots: 53 | doc: input 54 | type: File 55 | stats: 56 | doc: input 57 | type: File 58 | 12_Call variants: 59 | in: 60 | reads: 10_Realign reads/realigned 61 | reference_source|ref: 2_SnpEff build/output_fasta 62 | out: 63 | - variants 64 | run: 65 | class: Operation 66 | id: toolshed_g2_bx_psu_edu_repos_iuc_lofreq_call_lofreq_call_2_1_3_1+galaxy0 67 | inputs: 68 | reads: 69 | format: Any 70 | type: File 71 | reference_source|ref: 72 | format: Any 73 | type: File 74 | outputs: 75 | variants: 76 | doc: vcf 77 | type: File 78 | 13_SnpEff eff: 79 | in: 80 | input: 12_Call variants/variants 81 | snpDb|snpeff_db: 2_SnpEff build/snpeff_output 82 | out: 83 | - snpeff_output 84 | - statsFile 85 | run: 86 | class: Operation 87 | id: toolshed_g2_bx_psu_edu_repos_iuc_snpeff_snpEff_4_3+T_galaxy1 88 | inputs: 89 | input: 90 | format: Any 91 | type: File 92 | snpDb|snpeff_db: 93 | format: Any 94 | type: File 95 | outputs: 96 | snpeff_output: 97 | doc: vcf 98 | type: File 99 | statsFile: 100 | doc: html 101 | type: File 102 | 14_SnpSift Extract Fields: 103 | in: 104 | input: 13_SnpEff eff/snpeff_output 105 | out: 106 | - output 107 | run: 108 | class: Operation 109 | id: toolshed_g2_bx_psu_edu_repos_iuc_snpsift_snpSift_extractFields_4_3+t_galaxy0 110 | inputs: 111 | input: 112 | format: Any 113 | type: File 114 | outputs: 115 | output: 116 | doc: tabular 117 | type: File 118 | 15_Convert VCF to VCF_BGZIP: 119 | in: 120 | input1: 13_SnpEff eff/snpeff_output 121 | out: 122 | - output1 123 | run: 124 | class: Operation 125 | id: CONVERTER_vcf_to_vcf_bgzip_0 126 | inputs: 127 | input1: 128 | format: Any 129 | type: File 130 | outputs: 131 | output1: 132 | doc: vcf_bgzip 133 | type: File 134 | 16_Collapse Collection: 135 | in: 136 | input_list: 14_SnpSift Extract Fields/output 137 | out: 138 | - output 139 | run: 140 | class: Operation 141 | id: toolshed_g2_bx_psu_edu_repos_nml_collapse_collections_collapse_dataset_4_1 142 | inputs: 143 | input_list: 144 | format: Any 145 | type: File 146 | outputs: 147 | output: 148 | doc: input 149 | type: File 150 | 2_SnpEff build: 151 | in: 152 | input_type|input_gbk: 'GenBank file ' 153 | out: 154 | - snpeff_output 155 | - output_fasta 156 | run: 157 | class: Operation 158 | id: toolshed_g2_bx_psu_edu_repos_iuc_snpeff_snpEff_build_gb_4_3+T_galaxy4 159 | inputs: 160 | input_type|input_gbk: 161 | format: Any 162 | type: File 163 | outputs: 164 | output_fasta: 165 | doc: fasta 166 | type: File 167 | snpeff_output: 168 | doc: snpeffdb 169 | type: File 170 | 3_fastp: 171 | in: 172 | single_paired|paired_input: Paired Collection (fastqsanger) 173 | out: 174 | - output_paired_coll 175 | - report_html 176 | - report_json 177 | run: 178 | class: Operation 179 | id: toolshed_g2_bx_psu_edu_repos_iuc_fastp_fastp_0_19_5+galaxy1 180 | inputs: 181 | single_paired|paired_input: 182 | format: Any 183 | type: File 184 | outputs: 185 | output_paired_coll: 186 | doc: input 187 | type: File 188 | report_html: 189 | doc: html 190 | type: File 191 | report_json: 192 | doc: json 193 | type: File 194 | 4_Map with BWA-MEM: 195 | in: 196 | fastq_input|fastq_input1: 3_fastp/output_paired_coll 197 | reference_source|ref_file: 2_SnpEff build/output_fasta 198 | out: 199 | - bam_output 200 | run: 201 | class: Operation 202 | id: toolshed_g2_bx_psu_edu_repos_devteam_bwa_bwa_mem_0_7_17_1 203 | inputs: 204 | fastq_input|fastq_input1: 205 | format: Any 206 | type: File 207 | reference_source|ref_file: 208 | format: Any 209 | type: File 210 | outputs: 211 | bam_output: 212 | doc: bam 213 | type: File 214 | 5_MultiQC: 215 | in: 216 | results_0|software_cond|input: 3_fastp/report_json 217 | out: 218 | - stats 219 | - plots 220 | - html_report 221 | run: 222 | class: Operation 223 | id: toolshed_g2_bx_psu_edu_repos_iuc_multiqc_multiqc_1_7_1 224 | inputs: 225 | results_0|software_cond|input: 226 | format: Any 227 | type: File 228 | outputs: 229 | html_report: 230 | doc: html 231 | type: File 232 | plots: 233 | doc: input 234 | type: File 235 | stats: 236 | doc: input 237 | type: File 238 | 6_Filter SAM or BAM, output SAM or BAM: 239 | in: 240 | input1: 4_Map with BWA-MEM/bam_output 241 | out: 242 | - output1 243 | run: 244 | class: Operation 245 | id: toolshed_g2_bx_psu_edu_repos_devteam_samtool_filter2_samtool_filter2_1_8+galaxy1 246 | inputs: 247 | input1: 248 | format: Any 249 | type: File 250 | outputs: 251 | output1: 252 | doc: sam 253 | type: File 254 | 7_Samtools stats: 255 | in: 256 | input: 6_Filter SAM or BAM, output SAM or BAM/output1 257 | out: 258 | - output 259 | run: 260 | class: Operation 261 | id: toolshed_g2_bx_psu_edu_repos_devteam_samtools_stats_samtools_stats_2_0_2+galaxy2 262 | inputs: 263 | input: 264 | format: Any 265 | type: File 266 | outputs: 267 | output: 268 | doc: tabular 269 | type: File 270 | 8_MarkDuplicates: 271 | in: 272 | inputFile: 6_Filter SAM or BAM, output SAM or BAM/output1 273 | out: 274 | - metrics_file 275 | - outFile 276 | run: 277 | class: Operation 278 | id: toolshed_g2_bx_psu_edu_repos_devteam_picard_picard_MarkDuplicates_2_18_2_2 279 | inputs: 280 | inputFile: 281 | format: Any 282 | type: File 283 | outputs: 284 | metrics_file: 285 | doc: txt 286 | type: File 287 | outFile: 288 | doc: bam 289 | type: File 290 | 9_MultiQC: 291 | in: 292 | results_0|software_cond|output_0|type|input: 7_Samtools stats/output 293 | out: 294 | - stats 295 | - plots 296 | - html_report 297 | run: 298 | class: Operation 299 | id: toolshed_g2_bx_psu_edu_repos_iuc_multiqc_multiqc_1_7_1 300 | inputs: 301 | results_0|software_cond|output_0|type|input: 302 | format: Any 303 | type: File 304 | outputs: 305 | html_report: 306 | doc: html 307 | type: File 308 | plots: 309 | doc: input 310 | type: File 311 | stats: 312 | doc: input 313 | type: File 314 | 315 | -------------------------------------------------------------------------------- /test/test-data/read_crate/examples/README.txt: -------------------------------------------------------------------------------- 1 | Examples directory 2 | -------------------------------------------------------------------------------- /test/test-data/read_crate/ro-crate-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://w3id.org/ro/crate/1.1/context", 3 | "@graph": [ 4 | { 5 | "@id": "ro-crate-metadata.json", 6 | "@type": "CreativeWork", 7 | "about": { 8 | "@id": "./" 9 | }, 10 | "conformsTo": { 11 | "@id": "https://w3id.org/ro/crate/1.1" 12 | } 13 | }, 14 | { 15 | "@id": "./", 16 | "@type": "Dataset", 17 | "datePublished": "2020-06-25 17:03:04.098286", 18 | "hasPart": [ 19 | { 20 | "@id": "test_galaxy_wf.ga" 21 | }, 22 | { 23 | "@id": "abstract_wf.cwl" 24 | }, 25 | { 26 | "@id": "test_file_galaxy.txt" 27 | }, 28 | { 29 | "@id": "https://raw.githubusercontent.com/ResearchObject/ro-crate-py/master/test/test-data/sample_file.txt" 30 | }, 31 | { 32 | "@id": "examples/" 33 | }, 34 | { 35 | "@id": "test/" 36 | }, 37 | { 38 | "@id": "with%20space.txt" 39 | }, 40 | { 41 | "@id": "a%20b/" 42 | } 43 | ], 44 | "mainEntity": { 45 | "@id": "test_galaxy_wf.ga" 46 | } 47 | }, 48 | { 49 | "@id": "ro-crate-preview.html", 50 | "@type": "CreativeWork", 51 | "about": { 52 | "@id": "./" 53 | } 54 | }, 55 | { 56 | "@id": "test_galaxy_wf.ga", 57 | "@type": [ 58 | "File", 59 | "ComputationalWorkflow", 60 | "SoftwareSourceCode" 61 | ], 62 | "programmingLanguage": { 63 | "@id": "https://galaxyproject.org" 64 | }, 65 | "subjectOf": { 66 | "@id": "abstract_wf.cwl" 67 | } 68 | }, 69 | { 70 | "@id": "abstract_wf.cwl", 71 | "@type": [ 72 | "File", 73 | "SoftwareSourceCode", 74 | "ComputationalWorkflow" 75 | ] 76 | }, 77 | { 78 | "@id": "test_file_galaxy.txt", 79 | "@type": "File" 80 | }, 81 | { 82 | "@id": "#joe", 83 | "@type": "Person", 84 | "name": "Joe Bloggs" 85 | }, 86 | { 87 | "@id": "https://raw.githubusercontent.com/ResearchObject/ro-crate-py/master/test/test-data/sample_file.txt", 88 | "@type": "File" 89 | }, 90 | { 91 | "@id": "examples/", 92 | "@type": "Dataset" 93 | }, 94 | { 95 | "@id": "test/", 96 | "@type": "Dataset" 97 | }, 98 | { 99 | "@id": "with%20space.txt", 100 | "@type": "File" 101 | }, 102 | { 103 | "@id": "a%20b/", 104 | "@type": "Dataset" 105 | } 106 | ] 107 | } 108 | -------------------------------------------------------------------------------- /test/test-data/read_crate/ro-crate-metadata.jsonld: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://w3id.org/ro/crate/1.0/context", 3 | "@graph": [ 4 | { 5 | "@id": "ro-crate-metadata.jsonld", 6 | "@type": "CreativeWork", 7 | "about": { 8 | "@id": "./" 9 | }, 10 | "conformsTo": { 11 | "@id": "https://w3id.org/ro/crate/1.0" 12 | } 13 | }, 14 | { 15 | "@id": "./", 16 | "@type": "Dataset", 17 | "datePublished": "2020-06-25 17:03:04.098286", 18 | "hasPart": [ 19 | { 20 | "@id": "test_galaxy_wf.ga" 21 | }, 22 | { 23 | "@id": "abstract_wf.cwl" 24 | }, 25 | { 26 | "@id": "test_file_galaxy.txt" 27 | }, 28 | { 29 | "@id": "https://raw.githubusercontent.com/ResearchObject/ro-crate-py/master/test/test-data/sample_file.txt" 30 | }, 31 | { 32 | "@id": "examples/" 33 | }, 34 | { 35 | "@id": "test/" 36 | } 37 | ], 38 | "mainEntity": { 39 | "@id": "test_galaxy_wf.ga" 40 | } 41 | }, 42 | { 43 | "@id": "ro-crate-preview.html", 44 | "@type": "CreativeWork", 45 | "about": { 46 | "@id": "./" 47 | } 48 | }, 49 | { 50 | "@id": "test_galaxy_wf.ga", 51 | "@type": [ 52 | "File", 53 | "Workflow", 54 | "SoftwareSourceCode" 55 | ], 56 | "programmingLanguage": { 57 | "@id": "https://galaxyproject.org" 58 | }, 59 | "subjectOf": { 60 | "@id": "abstract_wf.cwl" 61 | } 62 | }, 63 | { 64 | "@id": "abstract_wf.cwl", 65 | "@type": [ 66 | "File", 67 | "SoftwareSourceCode", 68 | "Workflow" 69 | ] 70 | }, 71 | { 72 | "@id": "test_file_galaxy.txt", 73 | "@type": "File" 74 | }, 75 | { 76 | "@id": "#joe", 77 | "@type": "Person", 78 | "name": "Joe Bloggs" 79 | }, 80 | { 81 | "@id": "https://raw.githubusercontent.com/ResearchObject/ro-crate-py/master/test/test-data/sample_file.txt", 82 | "@type": "File" 83 | }, 84 | { 85 | "@id": "examples/", 86 | "@type": "Dataset" 87 | }, 88 | { 89 | "@id": "test/", 90 | "@type": "Dataset" 91 | } 92 | ] 93 | } 94 | -------------------------------------------------------------------------------- /test/test-data/read_crate/ro-crate-preview.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | test_read_crate 5 | 6 | 7 |

Dummy preview

8 | 9 | 10 | -------------------------------------------------------------------------------- /test/test-data/read_crate/test/test-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "tmpformat": "ro/workflow/test-metadata/0.1", 3 | "@id": "test-metadata.json", 4 | "test": [ 5 | { 6 | "name": "test1", 7 | "instance": [ 8 | { 9 | "name": "example_jenkins", 10 | "service": { 11 | "type": "jenkins", 12 | "url": "http://example.org/jenkins", 13 | "resource": "job/tests/" 14 | } 15 | } 16 | ] 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /test/test-data/read_crate/with space.txt: -------------------------------------------------------------------------------- 1 | Test handling of special character (un)escaping 2 | -------------------------------------------------------------------------------- /test/test-data/read_extra/listed.txt: -------------------------------------------------------------------------------- 1 | LISTED 2 | -------------------------------------------------------------------------------- /test/test-data/read_extra/listed/listed.txt: -------------------------------------------------------------------------------- 1 | LISTED 2 | -------------------------------------------------------------------------------- /test/test-data/read_extra/listed/not_listed.txt: -------------------------------------------------------------------------------- 1 | NOT_LISTED 2 | -------------------------------------------------------------------------------- /test/test-data/read_extra/not_listed.txt: -------------------------------------------------------------------------------- 1 | NOT_LISTED 2 | -------------------------------------------------------------------------------- /test/test-data/read_extra/not_listed/not_listed.txt: -------------------------------------------------------------------------------- 1 | NOT_LISTED 2 | -------------------------------------------------------------------------------- /test/test-data/read_extra/ro-crate-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://w3id.org/ro/crate/1.1/context", 3 | "@graph": [ 4 | { 5 | "@id": "./", 6 | "@type": "Dataset", 7 | "datePublished": "2021-02-26T09:46:41.236862", 8 | "hasPart": [ 9 | { 10 | "@id": "listed/" 11 | }, 12 | { 13 | "@id": "listed.txt" 14 | }, 15 | { 16 | "@id": "listed/listed.txt" 17 | } 18 | ] 19 | }, 20 | { 21 | "@id": "ro-crate-metadata.json", 22 | "@type": "CreativeWork", 23 | "about": { 24 | "@id": "./" 25 | }, 26 | "conformsTo": { 27 | "@id": "https://w3id.org/ro/crate/1.1" 28 | } 29 | }, 30 | { 31 | "@id": "listed/", 32 | "@type": "Dataset" 33 | }, 34 | { 35 | "@id": "listed.txt", 36 | "@type": "File" 37 | }, 38 | { 39 | "@id": "listed/listed.txt", 40 | "@type": "File" 41 | } 42 | ] 43 | } -------------------------------------------------------------------------------- /test/test-data/ro-crate-galaxy-sortchangecase/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | -------------------------------------------------------------------------------- /test/test-data/ro-crate-galaxy-sortchangecase/README.md: -------------------------------------------------------------------------------- 1 | # Galaxy mini workflow 2 | 3 | A tiny Galaxy workflow that sorts tabular lines according to the first column 4 | and changes the text to upper case. 5 | 6 | This RO-Crate is based on an [example from the Life Monitor repository](https://github.com/crs4/life_monitor/tree/8437be177822d0e2b3fed30db094b91c2fc14391/interaction_experiments/data/crates/ro-crate-galaxy-sortchangecase). 7 | -------------------------------------------------------------------------------- /test/test-data/ro-crate-galaxy-sortchangecase/ro-crate-metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": [ 3 | "https://w3id.org/ro/crate/1.1/context", 4 | { 5 | "TestSuite": "https://w3id.org/ro/terms/test#TestSuite", 6 | "TestInstance": "https://w3id.org/ro/terms/test#TestInstance", 7 | "TestService": "https://w3id.org/ro/terms/test#TestService", 8 | "TestDefinition": "https://w3id.org/ro/terms/test#TestDefinition", 9 | "PlanemoEngine": "https://w3id.org/ro/terms/test#PlanemoEngine", 10 | "JenkinsService": "https://w3id.org/ro/terms/test#JenkinsService", 11 | "TravisService": "https://w3id.org/ro/terms/test#TravisService", 12 | "GithubService": "https://w3id.org/ro/terms/test#GithubService", 13 | "instance": "https://w3id.org/ro/terms/test#instance", 14 | "runsOn": "https://w3id.org/ro/terms/test#runsOn", 15 | "resource": "https://w3id.org/ro/terms/test#resource", 16 | "definition": "https://w3id.org/ro/terms/test#definition", 17 | "engineVersion": "https://w3id.org/ro/terms/test#engineVersion" 18 | } 19 | ], 20 | "@graph": [ 21 | { 22 | "@id": "ro-crate-metadata.json", 23 | "@type": "CreativeWork", 24 | "about": { 25 | "@id": "./" 26 | }, 27 | "conformsTo": { 28 | "@id": "https://w3id.org/ro/crate/1.1" 29 | } 30 | }, 31 | { 32 | "@id": "./", 33 | "@type": "Dataset", 34 | "name": "sort-and-change-case", 35 | "description": "sort lines and change text to upper case", 36 | "license": "Apache-2.0", 37 | "mainEntity": { 38 | "@id": "sort-and-change-case.ga" 39 | }, 40 | "hasPart": [ 41 | { 42 | "@id": "sort-and-change-case.ga" 43 | }, 44 | { 45 | "@id": "LICENSE" 46 | }, 47 | { 48 | "@id": "README.md" 49 | }, 50 | { 51 | "@id": "test/test1/sort-and-change-case-test.yml" 52 | } 53 | ], 54 | "mentions": [ 55 | { 56 | "@id": "#test1" 57 | } 58 | ] 59 | }, 60 | { 61 | "@id": "sort-and-change-case.ga", 62 | "@type": [ 63 | "File", 64 | "SoftwareSourceCode", 65 | "ComputationalWorkflow" 66 | ], 67 | "programmingLanguage": { 68 | "@id": "#galaxy" 69 | }, 70 | "name": "sort-and-change-case" 71 | }, 72 | { 73 | "@id": "LICENSE", 74 | "@type": "File" 75 | }, 76 | { 77 | "@id": "README.md", 78 | "@type": "File" 79 | }, 80 | { 81 | "@id": "#galaxy", 82 | "@type": "ComputerLanguage", 83 | "name": "Galaxy", 84 | "identifier": { 85 | "@id": "https://galaxyproject.org/" 86 | }, 87 | "url": { 88 | "@id": "https://galaxyproject.org/" 89 | } 90 | }, 91 | { 92 | "@id": "#test1", 93 | "name": "test1", 94 | "@type": "TestSuite", 95 | "mainEntity": { 96 | "@id": "sort-and-change-case.ga" 97 | }, 98 | "instance": [ 99 | {"@id": "#test1_1"} 100 | ], 101 | "definition": {"@id": "test/test1/sort-and-change-case-test.yml"} 102 | }, 103 | { 104 | "@id": "#test1_1", 105 | "name": "test1_1", 106 | "@type": "TestInstance", 107 | "runsOn": {"@id": "https://w3id.org/ro/terms/test#JenkinsService"}, 108 | "url": "http://example.org/jenkins", 109 | "resource": "job/tests/" 110 | }, 111 | { 112 | "@id": "test/test1/sort-and-change-case-test.yml", 113 | "@type": [ 114 | "File", 115 | "TestDefinition" 116 | ], 117 | "conformsTo": {"@id": "https://w3id.org/ro/terms/test#PlanemoEngine"}, 118 | "engineVersion": ">=0.70" 119 | }, 120 | { 121 | "@id": "https://w3id.org/ro/terms/test#JenkinsService", 122 | "@type": "TestService", 123 | "name": "Jenkins", 124 | "url": {"@id": "https://www.jenkins.io"} 125 | }, 126 | { 127 | "@id": "https://w3id.org/ro/terms/test#PlanemoEngine", 128 | "@type": "SoftwareApplication", 129 | "name": "Planemo", 130 | "url": {"@id": "https://github.com/galaxyproject/planemo"} 131 | } 132 | ] 133 | } 134 | -------------------------------------------------------------------------------- /test/test-data/ro-crate-galaxy-sortchangecase/sort-and-change-case.ga: -------------------------------------------------------------------------------- 1 | { 2 | "uuid": "e2a8566c-c025-4181-9e90-7ed29d4e4df1", 3 | "tags": [], 4 | "format-version": "0.1", 5 | "name": "sort-and-change-case", 6 | "version": 0, 7 | "steps": { 8 | "0": { 9 | "tool_id": null, 10 | "tool_version": null, 11 | "outputs": [], 12 | "workflow_outputs": [], 13 | "input_connections": {}, 14 | "tool_state": "{}", 15 | "id": 0, 16 | "uuid": "5a36fad2-66c7-4b9e-8759-0fbcae9b8541", 17 | "errors": null, 18 | "name": "Input dataset", 19 | "label": "bed_input", 20 | "inputs": [], 21 | "position": { 22 | "top": 200, 23 | "left": 200 24 | }, 25 | "annotation": "", 26 | "content_id": null, 27 | "type": "data_input" 28 | }, 29 | "1": { 30 | "tool_id": "sort1", 31 | "tool_version": "1.1.0", 32 | "outputs": [ 33 | { 34 | "type": "input", 35 | "name": "out_file1" 36 | } 37 | ], 38 | "workflow_outputs": [ 39 | { 40 | "output_name": "out_file1", 41 | "uuid": "8237f71a-bc2a-494e-a63c-09c1e65ef7c8", 42 | "label": "sorted_bed" 43 | } 44 | ], 45 | "input_connections": { 46 | "input": { 47 | "output_name": "output", 48 | "id": 0 49 | } 50 | }, 51 | "tool_state": "{\"__page__\": null, \"style\": \"\\\"alpha\\\"\", \"column\": \"\\\"1\\\"\", \"__rerun_remap_job_id__\": null, \"column_set\": \"[]\", \"input\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"header_lines\": \"\\\"0\\\"\", \"order\": \"\\\"ASC\\\"\"}", 52 | "id": 1, 53 | "uuid": "0b6b3cda-c75f-452b-85b1-8ae4f3302ba4", 54 | "errors": null, 55 | "name": "Sort", 56 | "post_job_actions": {}, 57 | "label": "sort", 58 | "inputs": [ 59 | { 60 | "name": "input", 61 | "description": "runtime parameter for tool Sort" 62 | } 63 | ], 64 | "position": { 65 | "top": 200, 66 | "left": 420 67 | }, 68 | "annotation": "", 69 | "content_id": "sort1", 70 | "type": "tool" 71 | }, 72 | "2": { 73 | "tool_id": "ChangeCase", 74 | "tool_version": "1.0.0", 75 | "outputs": [ 76 | { 77 | "type": "tabular", 78 | "name": "out_file1" 79 | } 80 | ], 81 | "workflow_outputs": [ 82 | { 83 | "output_name": "out_file1", 84 | "uuid": "c31cd733-dab6-4d50-9fec-b644d162397b", 85 | "label": "uppercase_bed" 86 | } 87 | ], 88 | "input_connections": { 89 | "input": { 90 | "output_name": "out_file1", 91 | "id": 1 92 | } 93 | }, 94 | "tool_state": "{\"__page__\": null, \"casing\": \"\\\"up\\\"\", \"__rerun_remap_job_id__\": null, \"cols\": \"\\\"c1\\\"\", \"delimiter\": \"\\\"TAB\\\"\", \"input\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", 95 | "id": 2, 96 | "uuid": "9698bcde-0729-48fe-b88d-ccfb6f6153b4", 97 | "errors": null, 98 | "name": "Change Case", 99 | "post_job_actions": {}, 100 | "label": "change_case", 101 | "inputs": [ 102 | { 103 | "name": "input", 104 | "description": "runtime parameter for tool Change Case" 105 | } 106 | ], 107 | "position": { 108 | "top": 200, 109 | "left": 640 110 | }, 111 | "annotation": "", 112 | "content_id": "ChangeCase", 113 | "type": "tool" 114 | } 115 | }, 116 | "annotation": "", 117 | "a_galaxy_workflow": "true" 118 | } 119 | -------------------------------------------------------------------------------- /test/test-data/ro-crate-galaxy-sortchangecase/test/test1/input.bed: -------------------------------------------------------------------------------- 1 | chr1 66999824 67210768 2 | chr7 48998526 50489626 3 | chr1 66874821 66010368 4 | -------------------------------------------------------------------------------- /test/test-data/ro-crate-galaxy-sortchangecase/test/test1/output_exp.bed: -------------------------------------------------------------------------------- 1 | CHR1 66874821 66010368 2 | CHR1 66999824 67210768 3 | CHR7 48998526 50489626 4 | -------------------------------------------------------------------------------- /test/test-data/ro-crate-galaxy-sortchangecase/test/test1/sort-and-change-case-test.yml: -------------------------------------------------------------------------------- 1 | - doc: test with a small input 2 | job: 3 | bed_input: 4 | class: File 5 | path: input.bed 6 | outputs: 7 | uppercase_bed: 8 | path: output_exp.bed 9 | -------------------------------------------------------------------------------- /test/test-data/sample_cwl_wf.cwl: -------------------------------------------------------------------------------- 1 | class: Workflow 2 | cwlVersion: v1.2.0-dev2 3 | doc: 'Abstract CWL Automatically generated from the Galaxy workflow file: COVID-19: 4 | PE Variation' 5 | inputs: 6 | 'GenBank file ': 7 | format: data 8 | type: File 9 | Paired Collection (fastqsanger): 10 | format: data 11 | type: File 12 | outputs: {} 13 | steps: 14 | 10_Realign reads: 15 | in: 16 | reads: 8_MarkDuplicates/outFile 17 | reference_source|ref: 2_SnpEff build/output_fasta 18 | out: 19 | - realigned 20 | run: 21 | class: Operation 22 | id: toolshed_g2_bx_psu_edu_repos_iuc_lofreq_viterbi_lofreq_viterbi_2_1_3_1+galaxy1 23 | inputs: 24 | reads: 25 | format: Any 26 | type: File 27 | reference_source|ref: 28 | format: Any 29 | type: File 30 | outputs: 31 | realigned: 32 | doc: bam 33 | type: File 34 | 11_MultiQC: 35 | in: 36 | results_0|software_cond|output_0|input: 8_MarkDuplicates/metrics_file 37 | out: 38 | - stats 39 | - plots 40 | - html_report 41 | run: 42 | class: Operation 43 | id: toolshed_g2_bx_psu_edu_repos_iuc_multiqc_multiqc_1_7_1 44 | inputs: 45 | results_0|software_cond|output_0|input: 46 | format: Any 47 | type: File 48 | outputs: 49 | html_report: 50 | doc: html 51 | type: File 52 | plots: 53 | doc: input 54 | type: File 55 | stats: 56 | doc: input 57 | type: File 58 | 12_Call variants: 59 | in: 60 | reads: 10_Realign reads/realigned 61 | reference_source|ref: 2_SnpEff build/output_fasta 62 | out: 63 | - variants 64 | run: 65 | class: Operation 66 | id: toolshed_g2_bx_psu_edu_repos_iuc_lofreq_call_lofreq_call_2_1_3_1+galaxy0 67 | inputs: 68 | reads: 69 | format: Any 70 | type: File 71 | reference_source|ref: 72 | format: Any 73 | type: File 74 | outputs: 75 | variants: 76 | doc: vcf 77 | type: File 78 | 13_SnpEff eff: 79 | in: 80 | input: 12_Call variants/variants 81 | snpDb|snpeff_db: 2_SnpEff build/snpeff_output 82 | out: 83 | - snpeff_output 84 | - statsFile 85 | run: 86 | class: Operation 87 | id: toolshed_g2_bx_psu_edu_repos_iuc_snpeff_snpEff_4_3+T_galaxy1 88 | inputs: 89 | input: 90 | format: Any 91 | type: File 92 | snpDb|snpeff_db: 93 | format: Any 94 | type: File 95 | outputs: 96 | snpeff_output: 97 | doc: vcf 98 | type: File 99 | statsFile: 100 | doc: html 101 | type: File 102 | 14_SnpSift Extract Fields: 103 | in: 104 | input: 13_SnpEff eff/snpeff_output 105 | out: 106 | - output 107 | run: 108 | class: Operation 109 | id: toolshed_g2_bx_psu_edu_repos_iuc_snpsift_snpSift_extractFields_4_3+t_galaxy0 110 | inputs: 111 | input: 112 | format: Any 113 | type: File 114 | outputs: 115 | output: 116 | doc: tabular 117 | type: File 118 | 15_Convert VCF to VCF_BGZIP: 119 | in: 120 | input1: 13_SnpEff eff/snpeff_output 121 | out: 122 | - output1 123 | run: 124 | class: Operation 125 | id: CONVERTER_vcf_to_vcf_bgzip_0 126 | inputs: 127 | input1: 128 | format: Any 129 | type: File 130 | outputs: 131 | output1: 132 | doc: vcf_bgzip 133 | type: File 134 | 16_Collapse Collection: 135 | in: 136 | input_list: 14_SnpSift Extract Fields/output 137 | out: 138 | - output 139 | run: 140 | class: Operation 141 | id: toolshed_g2_bx_psu_edu_repos_nml_collapse_collections_collapse_dataset_4_1 142 | inputs: 143 | input_list: 144 | format: Any 145 | type: File 146 | outputs: 147 | output: 148 | doc: input 149 | type: File 150 | 2_SnpEff build: 151 | in: 152 | input_type|input_gbk: 'GenBank file ' 153 | out: 154 | - snpeff_output 155 | - output_fasta 156 | run: 157 | class: Operation 158 | id: toolshed_g2_bx_psu_edu_repos_iuc_snpeff_snpEff_build_gb_4_3+T_galaxy4 159 | inputs: 160 | input_type|input_gbk: 161 | format: Any 162 | type: File 163 | outputs: 164 | output_fasta: 165 | doc: fasta 166 | type: File 167 | snpeff_output: 168 | doc: snpeffdb 169 | type: File 170 | 3_fastp: 171 | in: 172 | single_paired|paired_input: Paired Collection (fastqsanger) 173 | out: 174 | - output_paired_coll 175 | - report_html 176 | - report_json 177 | run: 178 | class: Operation 179 | id: toolshed_g2_bx_psu_edu_repos_iuc_fastp_fastp_0_19_5+galaxy1 180 | inputs: 181 | single_paired|paired_input: 182 | format: Any 183 | type: File 184 | outputs: 185 | output_paired_coll: 186 | doc: input 187 | type: File 188 | report_html: 189 | doc: html 190 | type: File 191 | report_json: 192 | doc: json 193 | type: File 194 | 4_Map with BWA-MEM: 195 | in: 196 | fastq_input|fastq_input1: 3_fastp/output_paired_coll 197 | reference_source|ref_file: 2_SnpEff build/output_fasta 198 | out: 199 | - bam_output 200 | run: 201 | class: Operation 202 | id: toolshed_g2_bx_psu_edu_repos_devteam_bwa_bwa_mem_0_7_17_1 203 | inputs: 204 | fastq_input|fastq_input1: 205 | format: Any 206 | type: File 207 | reference_source|ref_file: 208 | format: Any 209 | type: File 210 | outputs: 211 | bam_output: 212 | doc: bam 213 | type: File 214 | 5_MultiQC: 215 | in: 216 | results_0|software_cond|input: 3_fastp/report_json 217 | out: 218 | - stats 219 | - plots 220 | - html_report 221 | run: 222 | class: Operation 223 | id: toolshed_g2_bx_psu_edu_repos_iuc_multiqc_multiqc_1_7_1 224 | inputs: 225 | results_0|software_cond|input: 226 | format: Any 227 | type: File 228 | outputs: 229 | html_report: 230 | doc: html 231 | type: File 232 | plots: 233 | doc: input 234 | type: File 235 | stats: 236 | doc: input 237 | type: File 238 | 6_Filter SAM or BAM, output SAM or BAM: 239 | in: 240 | input1: 4_Map with BWA-MEM/bam_output 241 | out: 242 | - output1 243 | run: 244 | class: Operation 245 | id: toolshed_g2_bx_psu_edu_repos_devteam_samtool_filter2_samtool_filter2_1_8+galaxy1 246 | inputs: 247 | input1: 248 | format: Any 249 | type: File 250 | outputs: 251 | output1: 252 | doc: sam 253 | type: File 254 | 7_Samtools stats: 255 | in: 256 | input: 6_Filter SAM or BAM, output SAM or BAM/output1 257 | out: 258 | - output 259 | run: 260 | class: Operation 261 | id: toolshed_g2_bx_psu_edu_repos_devteam_samtools_stats_samtools_stats_2_0_2+galaxy2 262 | inputs: 263 | input: 264 | format: Any 265 | type: File 266 | outputs: 267 | output: 268 | doc: tabular 269 | type: File 270 | 8_MarkDuplicates: 271 | in: 272 | inputFile: 6_Filter SAM or BAM, output SAM or BAM/output1 273 | out: 274 | - metrics_file 275 | - outFile 276 | run: 277 | class: Operation 278 | id: toolshed_g2_bx_psu_edu_repos_devteam_picard_picard_MarkDuplicates_2_18_2_2 279 | inputs: 280 | inputFile: 281 | format: Any 282 | type: File 283 | outputs: 284 | metrics_file: 285 | doc: txt 286 | type: File 287 | outFile: 288 | doc: bam 289 | type: File 290 | 9_MultiQC: 291 | in: 292 | results_0|software_cond|output_0|type|input: 7_Samtools stats/output 293 | out: 294 | - stats 295 | - plots 296 | - html_report 297 | run: 298 | class: Operation 299 | id: toolshed_g2_bx_psu_edu_repos_iuc_multiqc_multiqc_1_7_1 300 | inputs: 301 | results_0|software_cond|output_0|type|input: 302 | format: Any 303 | type: File 304 | outputs: 305 | html_report: 306 | doc: html 307 | type: File 308 | plots: 309 | doc: input 310 | type: File 311 | stats: 312 | doc: input 313 | type: File 314 | 315 | -------------------------------------------------------------------------------- /test/test-data/sample_file.txt: -------------------------------------------------------------------------------- 1 | line1 2 | line2 3 | -------------------------------------------------------------------------------- /test/test-data/test_add_dir/sample_file_subdir.txt: -------------------------------------------------------------------------------- 1 | line1 2 | line2 3 | line3 4 | -------------------------------------------------------------------------------- /test/test_jsonld.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | """Tests for the JSON-LD methods of the ROCrate object.""" 24 | 25 | from uuid import uuid4 26 | 27 | import pytest 28 | 29 | from rocrate.rocrate import ROCrate 30 | 31 | 32 | # --- add 33 | 34 | 35 | def test_add_jsonld_raises_json_is_none(): 36 | crate = ROCrate() 37 | 38 | with pytest.raises(ValueError, match='.*non-empty JSON-LD.*'): 39 | crate.add_jsonld(None) 40 | 41 | 42 | def test_add_jsonld_raises_json_is_empty(): 43 | crate = ROCrate() 44 | 45 | with pytest.raises(ValueError, match='.*non-empty JSON-LD.*'): 46 | crate.add_jsonld({}) 47 | 48 | 49 | def test_add_jsonld_raises_json_missing_id(): 50 | crate = ROCrate() 51 | 52 | with pytest.raises(ValueError, match='.*non-empty JSON-LD.*'): 53 | crate.add_jsonld({ 54 | '@type': 'CreativeWork' 55 | }) 56 | 57 | 58 | def test_add_jsonld_raises_json_missing_type(test_data_dir): 59 | crate_dir = test_data_dir / 'read_crate' 60 | crate = ROCrate(crate_dir) 61 | 62 | with pytest.raises(ValueError, match='.*@type.*'): 63 | crate.add_jsonld({ 64 | '@id': './', 65 | 'license': 'NA' 66 | }) 67 | 68 | 69 | def test_add_jsonld_raises_json_duplicate_id(test_data_dir): 70 | crate_dir = test_data_dir / 'read_crate' 71 | crate = ROCrate(crate_dir) 72 | 73 | with pytest.raises(ValueError, match='.*already exists.*'): 74 | crate.add_jsonld({ 75 | '@id': './', 76 | '@type': 'Dataset', 77 | 'license': 'NA' 78 | }) 79 | 80 | 81 | def test_add_jsonld(test_data_dir): 82 | crate_dir = test_data_dir / 'read_crate' 83 | crate = ROCrate(crate_dir) 84 | 85 | new_entity_id = f'#{uuid4()}' 86 | 87 | crate.add_jsonld({ 88 | '@id': new_entity_id, 89 | '@type': 'CreativeWork', 90 | 'name': 'A test entity' 91 | }) 92 | new_entity = crate.get(new_entity_id) 93 | assert new_entity.type == 'CreativeWork' 94 | assert new_entity["name"] == 'A test entity' 95 | 96 | 97 | # --- update 98 | 99 | 100 | def test_update_jsonld_raises_json_is_none(): 101 | crate = ROCrate() 102 | 103 | with pytest.raises(ValueError, match='.*non-empty JSON-LD.*'): 104 | crate.update_jsonld(None) 105 | 106 | 107 | def test_update_jsonld_raises_json_is_empty(): 108 | crate = ROCrate() 109 | 110 | with pytest.raises(ValueError, match='.*non-empty JSON-LD.*'): 111 | crate.update_jsonld({}) 112 | 113 | 114 | def test_update_jsonld_raises_json_missing_id(): 115 | crate = ROCrate() 116 | 117 | with pytest.raises(ValueError, match='.*non-empty JSON-LD.*'): 118 | crate.update_jsonld({ 119 | '@type': 'CreativeWork' 120 | }) 121 | 122 | 123 | def test_update_jsonld_raises_id_not_found(test_data_dir): 124 | crate_dir = test_data_dir / 'read_crate' 125 | crate = ROCrate(crate_dir) 126 | 127 | missing_entity_id = f'#{uuid4()}' 128 | 129 | with pytest.raises(ValueError, match='.*does not exist.*'): 130 | crate.update_jsonld({ 131 | '@id': missing_entity_id, 132 | '@type': 'CreativeWork', 133 | 'name': 'This entity does not exist in the RO-Crate' 134 | }) 135 | 136 | 137 | def test_update_jsonld(test_data_dir): 138 | crate_dir = test_data_dir / 'read_crate' 139 | crate = ROCrate(crate_dir) 140 | 141 | new_entity_id = f'#{uuid4()}' 142 | 143 | crate.add_jsonld({ 144 | '@id': new_entity_id, 145 | '@type': 'CreativeWork', 146 | 'name': 'A test entity' 147 | }) 148 | 149 | entity_added = crate.get(new_entity_id) 150 | assert entity_added.type == 'CreativeWork' 151 | assert entity_added['name'] == 'A test entity' 152 | 153 | entity_added['name'] = 'No potatoes today' 154 | # N.B.: Properties that start with @ are ignored when updating. 155 | update_dict = entity_added._jsonld.copy() 156 | update_dict['@type'] = 'Dataset' 157 | crate.update_jsonld(update_dict) 158 | 159 | updated_entity = crate.get(new_entity_id) 160 | assert entity_added.id == updated_entity.id 161 | assert updated_entity.type == 'CreativeWork' 162 | assert updated_entity['name'] == 'No potatoes today' 163 | 164 | assert '@type' in update_dict 165 | 166 | 167 | # --- add or update 168 | 169 | 170 | def test_add_or_update_jsonld_raises_json_is_none(): 171 | crate = ROCrate() 172 | 173 | with pytest.raises(ValueError, match='.*non-empty JSON-LD.*'): 174 | crate.add_or_update_jsonld(None) 175 | 176 | 177 | def test_add_or_update_jsonld_raises_json_is_empty(): 178 | crate = ROCrate() 179 | 180 | with pytest.raises(ValueError, match='.*non-empty JSON-LD.*'): 181 | crate.add_or_update_jsonld({}) 182 | 183 | 184 | def test_add_or_update_jsonld_raises_json_missing_id(): 185 | crate = ROCrate() 186 | 187 | with pytest.raises(ValueError, match='.*non-empty JSON-LD.*'): 188 | crate.add_or_update_jsonld({ 189 | '@type': 'CreativeWork' 190 | }) 191 | 192 | 193 | def test_add_or_update_add_jsonld(test_data_dir): 194 | crate_dir = test_data_dir / 'read_crate' 195 | crate = ROCrate(crate_dir) 196 | 197 | new_entity_id = f'#{uuid4()}' 198 | 199 | crate.add_or_update_jsonld({ 200 | '@id': new_entity_id, 201 | '@type': 'CreativeWork', 202 | 'name': 'A test entity' 203 | }) 204 | new_entity = crate.get(new_entity_id) 205 | assert new_entity.type == 'CreativeWork' 206 | assert new_entity["name"] == 'A test entity' 207 | 208 | 209 | def test_add_or_update_update_jsonld(test_data_dir): 210 | crate_dir = test_data_dir / 'read_crate' 211 | crate = ROCrate(crate_dir) 212 | 213 | new_entity_id = f'#{uuid4()}' 214 | 215 | crate.add_jsonld({ 216 | '@id': new_entity_id, 217 | '@type': 'CreativeWork', 218 | 'name': 'A test entity' 219 | }) 220 | 221 | entity_added = crate.get(new_entity_id) 222 | assert entity_added.type == 'CreativeWork' 223 | assert entity_added['name'] == 'A test entity' 224 | 225 | entity_added['name'] = 'No potatoes today' 226 | crate.add_or_update_jsonld(entity_added._jsonld.copy()) 227 | 228 | updated_entity = crate.get(new_entity_id) 229 | assert entity_added.id == updated_entity.id 230 | assert entity_added.type == updated_entity.type 231 | assert updated_entity['name'] == 'No potatoes today' 232 | -------------------------------------------------------------------------------- /test/test_metadata.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | import pytest 24 | from copy import deepcopy 25 | 26 | from rocrate.metadata import find_root_entity_id 27 | 28 | 29 | @pytest.mark.parametrize("root,basename", [ 30 | ("", "ro-crate-metadata.json"), 31 | ("", "ro-crate-metadata.jsonld"), 32 | ("https://example.org/crate/", "ro-crate-metadata.json"), 33 | ("https://example.org/crate/", "ro-crate-metadata.jsonld"), 34 | ("", "bad-name.json"), 35 | ]) 36 | def test_find_root(root, basename): 37 | metadata_id = root + basename 38 | root_id = root or "./" 39 | entities = {_["@id"]: _ for _ in [ 40 | { 41 | "@id": metadata_id, 42 | "@type": "CreativeWork", 43 | "about": {"@id": root_id}, 44 | "conformsTo": [ 45 | {"@id": "https://w3id.org/ro/crate/1.1"}, 46 | {"@id": "https://example.org/fancy-ro-crate/1.0"}, 47 | ] 48 | }, 49 | { 50 | "@id": root_id, 51 | "@type": "Dataset", 52 | }, 53 | ]} 54 | if basename not in {"ro-crate-metadata.json", "ro-crate-metadata.jsonld"}: 55 | with pytest.raises(KeyError): 56 | find_root_entity_id(entities) 57 | else: 58 | assert find_root_entity_id(entities) == (metadata_id, root_id) 59 | 60 | 61 | def test_find_root_bad_entities(): 62 | orig_entities = { 63 | "ro-crate-metadata.json": { 64 | "@id": "ro-crate-metadata.json", 65 | "@type": "CreativeWork", 66 | "about": {"@id": "./"}, 67 | "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, 68 | }, 69 | "./": { 70 | "@id": "./", 71 | "@type": "Dataset", 72 | }, 73 | } 74 | # missing "about" 75 | entities = deepcopy(orig_entities) 76 | del entities["ro-crate-metadata.json"]["about"] 77 | with pytest.raises(ValueError, match="does not reference"): 78 | find_root_entity_id(entities) 79 | # "about" does not reference the root entity 80 | entities = deepcopy(orig_entities) 81 | for about in "http://example.org", {"@id": "http://example.org"}: 82 | entities["ro-crate-metadata.json"]["about"] = about 83 | with pytest.raises(ValueError, match="does not reference"): 84 | find_root_entity_id(entities) 85 | # metadata type is not CreativeWork 86 | entities = deepcopy(orig_entities) 87 | entities["ro-crate-metadata.json"]["@type"] = "Thing" 88 | with pytest.raises(ValueError, match="must be of type"): 89 | find_root_entity_id(entities) 90 | # root type is not Dataset 91 | entities = deepcopy(orig_entities) 92 | entities["./"]["@type"] = "Thing" 93 | with pytest.raises(ValueError, match="must have"): 94 | find_root_entity_id(entities) 95 | 96 | 97 | @pytest.mark.filterwarnings("ignore") 98 | def test_find_root_multiple_entries(): 99 | orig_entities = { 100 | "http://example.org/ro-crate-metadata.json": { 101 | "@id": "http://example.org/ro-crate-metadata.json", 102 | "@type": "CreativeWork", 103 | "about": {"@id": "http://example.org/"}, 104 | "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, 105 | }, 106 | "http://example.org/": { 107 | "@id": "http://example.org/", 108 | "@type": "Dataset", 109 | "hasPart": [ 110 | {"@id": "http://example.com/"}, 111 | {"@id": "http://example.com/ro-crate-metadata.json"} 112 | ] 113 | }, 114 | "http://example.com/ro-crate-metadata.json": { 115 | "@id": "http://example.com/ro-crate-metadata.json", 116 | "@type": "CreativeWork", 117 | "about": {"@id": "http://example.com/"}, 118 | "conformsTo": {"@id": "https://w3id.com/ro/crate/1.1"}, 119 | }, 120 | "http://example.com/": { 121 | "@id": "http://example.com/", 122 | "@type": "Dataset", 123 | }, 124 | } 125 | 126 | def check_finds_org(entities): 127 | m_id, r_id = find_root_entity_id(entities) 128 | assert m_id == "http://example.org/ro-crate-metadata.json" 129 | assert r_id == "http://example.org/" 130 | 131 | def check_picks_one(entities): 132 | m_id, r_id = find_root_entity_id(entities) 133 | assert m_id in [f"http://example.{_}/ro-crate-metadata.json" for _ in ("org", "com")] 134 | assert r_id in [f"http://example.{_}/" for _ in ("org", "com")] 135 | 136 | check_finds_org(orig_entities) 137 | # no root candidate contains the other one 138 | mod_entities = deepcopy(orig_entities) 139 | del mod_entities["http://example.org/"]["hasPart"] 140 | check_picks_one(mod_entities) 141 | # each root candidate contains the other one 142 | mod_entities = deepcopy(orig_entities) 143 | mod_entities["http://example.com/"]["hasPart"] = [ 144 | {"@id": "http://example.org/"}, 145 | {"@id": "http://example.org/ro-crate-metadata.json"} 146 | ] 147 | check_picks_one(mod_entities) 148 | # "about" does not reference the root entity 149 | mod_entities = deepcopy(orig_entities) 150 | for about in "http://google.com", {"@id": "http://google.com"}: 151 | mod_entities["http://example.com/ro-crate-metadata.json"]["about"] = about 152 | check_finds_org(mod_entities) 153 | # metadata type is not CreativeWork 154 | mod_entities = deepcopy(orig_entities) 155 | mod_entities["http://example.com/ro-crate-metadata.json"]["@type"] = "Thing" 156 | check_finds_org(mod_entities) 157 | # root type is not Dataset 158 | mod_entities = deepcopy(orig_entities) 159 | mod_entities["http://example.com/"]["@type"] = "Thing" 160 | check_finds_org(mod_entities) 161 | 162 | 163 | def test_find_root_multiple_types(): 164 | entities = {_["@id"]: _ for _ in [ 165 | { 166 | "@id": "ro-crate-metadata.json", 167 | "@type": "CreativeWork", 168 | "about": {"@id": "./"}, 169 | "conformsTo": {"@id": "https://w3id.org/ro/crate/1.1"}, 170 | }, 171 | { 172 | "@id": "./", 173 | "@type": ["Dataset", "RepositoryCollection"], 174 | }, 175 | ]} 176 | m_id, r_id = find_root_entity_id(entities) 177 | assert m_id == "ro-crate-metadata.json" 178 | assert r_id == "./" 179 | # "Dataset" not included 180 | del entities["./"]["@type"][0] 181 | with pytest.raises(ValueError): 182 | find_root_entity_id(entities) 183 | # Check we're not trying to be too clever 184 | entities["./"]["@type"] = "NotADataset" 185 | with pytest.raises(ValueError): 186 | find_root_entity_id(entities) 187 | -------------------------------------------------------------------------------- /test/test_readwrite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | import os 24 | from pathlib import Path 25 | 26 | from rocrate.rocrate import ROCrate 27 | 28 | 29 | def test_crate_update(test_data_dir, tmpdir, helpers): 30 | crate_dir = test_data_dir / 'read_crate' 31 | crate = ROCrate(crate_dir) 32 | 33 | content_map = {} 34 | for root, dirs, files in os.walk(crate_dir): 35 | root = Path(root) 36 | for name in files: 37 | if not name.startswith("ro-crate"): 38 | path = root / name 39 | with open(path, "rb") as f: 40 | content_map[path] = f.read() 41 | 42 | # update an existing file 43 | upd_file_id = "test_file_galaxy.txt" 44 | upd_file = crate.dereference(upd_file_id) 45 | with open(upd_file.source, "rb") as f: 46 | content = f.read() 47 | upd_content = content + b"foobar\n" 48 | upd_source = tmpdir / "upd_source.txt" 49 | with open(upd_source, "wb") as f: 50 | f.write(upd_content) 51 | crate.delete(upd_file) 52 | crate.add_file(upd_source, upd_file_id) 53 | 54 | # add a new file 55 | new_file_id = "spam.txt" 56 | new_content = b"enlarge your crate\n" 57 | new_source = tmpdir / "new_source.txt" 58 | with open(new_source, "wb") as f: 59 | f.write(new_content) 60 | crate.add_file(new_source, new_file_id) 61 | 62 | crate.write(crate_dir) 63 | 64 | for root, dirs, files in os.walk(crate_dir): 65 | root = Path(root) 66 | for name in files: 67 | if not name.startswith("ro-crate"): 68 | path = root / name 69 | with open(path, "rb") as f: 70 | content = f.read() 71 | if path == crate_dir / upd_file_id: 72 | assert content == upd_content 73 | elif path == crate_dir / new_file_id: 74 | assert content == new_content 75 | else: 76 | assert content == content_map[path] 77 | -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | import pytest 24 | 25 | from rocrate.utils import subclasses, get_norm_value, is_url 26 | 27 | 28 | class Pet: 29 | pass 30 | 31 | 32 | class Cat(Pet): 33 | pass 34 | 35 | 36 | class Dog(Pet): 37 | pass 38 | 39 | 40 | class Beagle(Dog): 41 | pass 42 | 43 | 44 | def test_subclasses(): 45 | pet_subclasses = list(subclasses(Pet)) 46 | assert set(pet_subclasses) == {Cat, Dog, Beagle} 47 | assert pet_subclasses.index(Beagle) < pet_subclasses.index(Dog) 48 | 49 | 50 | def test_get_norm_value(): 51 | for value in {"@id": "foo"}, "foo", ["foo"], [{"@id": "foo"}]: 52 | entity = {"@id": "#xyz", "name": value} 53 | assert get_norm_value(entity, "name") == ["foo"] 54 | for value in [{"@id": "foo"}, "bar"], ["foo", {"@id": "bar"}]: 55 | entity = {"@id": "#xyz", "name": value} 56 | assert get_norm_value(entity, "name") == ["foo", "bar"] 57 | assert get_norm_value({"@id": "#xyz"}, "name") == [] 58 | with pytest.raises(ValueError): 59 | get_norm_value({"@id": "#xyz", "name": [["foo"]]}, "name") 60 | 61 | 62 | def test_is_url(): 63 | assert is_url("http://example.com/index.html") 64 | assert is_url("http://example.com/") 65 | assert is_url("http://example.com") 66 | assert not is_url("/etc/") 67 | assert not is_url("/etc") 68 | assert not is_url("/") 69 | -------------------------------------------------------------------------------- /test/test_workflow_ro_crate.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | import pytest 24 | 25 | from rocrate.rocrate import ROCrate, make_workflow_rocrate 26 | try: 27 | import galaxy2cwl # noqa: F401 28 | except ImportError: 29 | CAN_CONVERT_TO_CWL = False 30 | else: 31 | CAN_CONVERT_TO_CWL = True 32 | 33 | WF_CRATE = "https://w3id.org/workflowhub/workflow-ro-crate" 34 | 35 | 36 | @pytest.mark.skipif(not CAN_CONVERT_TO_CWL, reason="cwl gen not enabled") 37 | def test_galaxy_wf_crate(test_data_dir, tmpdir, helpers): 38 | wf_id = 'test_galaxy_wf.ga' 39 | wf_path = test_data_dir / wf_id 40 | wf_crate = make_workflow_rocrate(wf_path, wf_type='Galaxy') 41 | assert isinstance(wf_crate, ROCrate) 42 | 43 | wf = wf_crate.dereference(wf_id) 44 | assert wf._default_type == "ComputationalWorkflow" 45 | assert wf_crate.mainEntity is wf 46 | lang = wf_crate.dereference(f"{WF_CRATE}#galaxy") 47 | assert hasattr(lang, "name") 48 | assert "version" not in lang 49 | assert wf.get("programmingLanguage") is lang 50 | assert wf.get("subjectOf") is not None 51 | assert helpers.WORKFLOW_DESC_TYPES.issubset(wf["subjectOf"].type) 52 | 53 | out_path = tmpdir / 'ro_crate_out' 54 | out_path.mkdir() 55 | wf_crate.write(out_path) 56 | json_entities = helpers.read_json_entities(out_path) 57 | helpers.check_wf_crate(json_entities, wf_id) 58 | wf_entity = json_entities[wf_id] 59 | assert "subjectOf" in wf_entity 60 | abstract_wf_id = wf_entity["subjectOf"]["@id"] 61 | abstract_wf_entity = json_entities[abstract_wf_id] 62 | assert helpers.WORKFLOW_DESC_TYPES.issubset(abstract_wf_entity["@type"]) 63 | 64 | wf_out_path = out_path / wf_id 65 | assert wf_out_path.exists() 66 | with open(wf_path) as f1, open(wf_out_path) as f2: 67 | assert f1.read() == f2.read() 68 | 69 | abstract_wf_out_path = out_path / abstract_wf_id 70 | assert abstract_wf_out_path.exists() 71 | 72 | 73 | def test_cwl_wf_crate(test_data_dir, tmpdir, helpers): 74 | wf_id = 'sample_cwl_wf.cwl' 75 | wf_path = test_data_dir / wf_id 76 | wf_crate = make_workflow_rocrate(wf_path, wf_type='CWL') 77 | assert isinstance(wf_crate, ROCrate) 78 | 79 | wf = wf_crate.dereference(wf_id) 80 | assert wf_crate.mainEntity is wf 81 | lang = wf_crate.dereference(f"{WF_CRATE}#cwl") 82 | assert hasattr(lang, "name") 83 | assert "version" not in lang 84 | assert wf.get("programmingLanguage") is lang 85 | assert "subjectOf" not in wf 86 | 87 | out_path = tmpdir / 'ro_crate_out' 88 | out_path.mkdir() 89 | wf_crate.write(out_path) 90 | json_entities = helpers.read_json_entities(out_path) 91 | helpers.check_wf_crate(json_entities, wf_id) 92 | 93 | wf_out_path = out_path / wf_id 94 | assert wf_out_path.exists() 95 | with open(wf_path) as f1, open(wf_out_path) as f2: 96 | assert f1.read() == f2.read() 97 | 98 | 99 | @pytest.mark.skipif(not CAN_CONVERT_TO_CWL, reason="cwl gen not enabled") 100 | def test_create_wf_include(test_data_dir, tmpdir, helpers): 101 | wf_id = 'test_galaxy_wf.ga' 102 | wf_path = test_data_dir / wf_id 103 | extra_file1 = test_data_dir / 'test_file_galaxy.txt' 104 | extra_file2 = test_data_dir / 'test_file_galaxy2.txt' 105 | files_list = [extra_file1, extra_file2] 106 | wf_crate = make_workflow_rocrate( 107 | wf_path, wf_type='Galaxy', include_files=files_list 108 | ) 109 | assert isinstance(wf_crate, ROCrate) 110 | 111 | wf = wf_crate.dereference(wf_id) 112 | assert wf_crate.mainEntity is wf 113 | lang = wf_crate.dereference(f"{WF_CRATE}#galaxy") 114 | assert hasattr(lang, "name") 115 | assert "version" not in lang 116 | assert wf.get("programmingLanguage") is lang 117 | assert wf.get("subjectOf") is not None 118 | assert helpers.WORKFLOW_DESC_TYPES.issubset(wf["subjectOf"].type) 119 | assert wf_crate.dereference(extra_file1.name) is not None 120 | assert wf_crate.dereference(extra_file2.name) is not None 121 | 122 | out_path = tmpdir / 'ro_crate_out' 123 | out_path.mkdir() 124 | wf_crate.write(out_path) 125 | json_entities = helpers.read_json_entities(out_path) 126 | helpers.check_wf_crate(json_entities, wf_id) 127 | 128 | wf_out_path = out_path / wf_id 129 | file1 = out_path / extra_file1.name 130 | file2 = out_path / extra_file2.name 131 | assert wf_out_path.exists() 132 | with open(wf_path) as f1, open(wf_out_path) as f2: 133 | assert f1.read() == f2.read() 134 | assert file1.exists() 135 | with open(extra_file1) as f1, open(file1) as f2: 136 | assert f1.read() == f2.read() 137 | assert file2.exists() 138 | with open(extra_file2) as f1, open(file2) as f2: 139 | assert f1.read() == f2.read() 140 | 141 | 142 | @pytest.mark.parametrize("lang_version", [None, "1.2", "v1.2"]) 143 | def test_cwl_lang_version(test_data_dir, lang_version): 144 | wf_id = 'sample_cwl_wf.cwl' 145 | wf_path = test_data_dir / wf_id 146 | crate = ROCrate() 147 | workflow = crate.add_workflow(wf_path, wf_id, lang_version=lang_version) 148 | lang = workflow["programmingLanguage"] 149 | lang_id = lang["identifier"] 150 | if lang_version is None: 151 | assert lang_id == "https://w3id.org/cwl/" 152 | assert "version" not in lang 153 | elif lang_version == "1.2": 154 | assert lang_id == "https://w3id.org/cwl/v1.2/" 155 | assert lang["version"] == "1.2" 156 | else: 157 | assert lang_id == "https://w3id.org/cwl/v1.2/" 158 | assert lang["version"] == "v1.2" 159 | -------------------------------------------------------------------------------- /test/test_wrroc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | from rocrate.rocrate import ROCrate 24 | from rocrate.model import ContextEntity, SoftwareApplication 25 | 26 | 27 | def test_add_action(tmpdir): 28 | crate = ROCrate() 29 | instrument = SoftwareApplication(crate) 30 | crate.add(instrument) 31 | f_in_name = "f_in" 32 | f_in_path = tmpdir / f_in_name 33 | with open(f_in_path, "wt") as f: 34 | f.write("IN\n") 35 | f_in = crate.add_file(f_in_path) 36 | f_out_name = "f_out" 37 | f_out_path = tmpdir / f_out_name 38 | with open(f_out_path, "wt") as f: 39 | f.write("OUT\n") 40 | f_out = crate.add_file(f_out_path) 41 | param = crate.add(ContextEntity(crate, "#param", properties={ 42 | "@type": "PropertyValue", 43 | "name": "param_name", 44 | "value": "param_value", 45 | })) 46 | create_action = crate.add_action( 47 | instrument, 48 | object=[f_in, param], 49 | result=[f_out], 50 | properties={ 51 | "name": f"Run 1 of {instrument.id}", 52 | "startTime": "2018-10-25T15:46:35.211153", 53 | "endTime": "2018-10-25T15:46:43.020168", 54 | } 55 | ) 56 | assert create_action.type == "CreateAction" 57 | create_actions = crate.get_by_type("CreateAction") 58 | assert crate.root_dataset.get("mentions") == create_actions 59 | assert create_actions == [create_action] 60 | assert create_action.get("instrument") is instrument 61 | assert create_action.get("object") == [f_in, param] 62 | assert create_action.get("result") == [f_out] 63 | assert create_action.get("name") == f"Run 1 of {instrument.id}" 64 | assert create_action.get("startTime") == "2018-10-25T15:46:35.211153" 65 | assert create_action.get("endTime") == "2018-10-25T15:46:43.020168" 66 | 67 | activate_action = crate.add_action( 68 | instrument, 69 | object=[f_out], 70 | properties={ 71 | "@type": "ActivateAction", 72 | "name": f"Run 2 of {instrument.id}", 73 | "endTime": "2018-10-25T16:48:41.021563", 74 | } 75 | ) 76 | assert activate_action.type == "ActivateAction" 77 | assert crate.root_dataset.get("mentions") == [create_action, activate_action] 78 | assert activate_action.get("instrument") is instrument 79 | assert activate_action.get("object") == [f_out] 80 | assert "result" not in activate_action 81 | assert activate_action.get("name") == f"Run 2 of {instrument.id}" 82 | assert activate_action.get("endTime") == "2018-10-25T16:48:41.021563" 83 | 84 | 85 | def test_add_formal_parameter(): 86 | crate = ROCrate() 87 | name = "test_json_param" 88 | additionalType = "File" 89 | encodingFormat = "application/json" 90 | fp = crate.add_formal_parameter( 91 | name=name, 92 | additionalType=additionalType, 93 | properties={"encodingFormat": encodingFormat} 94 | ) 95 | assert fp.type == "FormalParameter" 96 | assert fp.get("name") == name 97 | assert fp.get("additionalType") == additionalType 98 | assert not fp.get("valueRequired") 99 | assert "defaultValue" not in fp 100 | assert fp.get("encodingFormat") == encodingFormat 101 | assert fp.get("conformsTo") == "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE" 102 | 103 | list_formal_parameter = crate.get_by_type("FormalParameter") 104 | assert list_formal_parameter == [fp] 105 | 106 | # Test with defaultValue and valueRequired 107 | defaultValue = "default" 108 | fp2 = crate.add_formal_parameter( 109 | name="param_string", 110 | additionalType="Text", 111 | defaultValue=defaultValue, 112 | valueRequired=True 113 | ) 114 | assert fp2.get("defaultValue") == defaultValue 115 | assert fp2.get("valueRequired") 116 | -------------------------------------------------------------------------------- /tools/add_boilerplate.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019-2025 The University of Manchester, UK 2 | # Copyright 2020-2025 Vlaams Instituut voor Biotechnologie (VIB), BE 3 | # Copyright 2020-2025 Barcelona Supercomputing Center (BSC), ES 4 | # Copyright 2020-2025 Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT 5 | # Copyright 2022-2025 École Polytechnique Fédérale de Lausanne, CH 6 | # Copyright 2024-2025 Data Centre, SciLifeLab, SE 7 | # Copyright 2024-2025 National Institute of Informatics (NII), JP 8 | # Copyright 2025 Senckenberg Society for Nature Research (SGN), DE 9 | # Copyright 2025 European Molecular Biology Laboratory (EMBL), Heidelberg, DE 10 | # 11 | # Licensed under the Apache License, Version 2.0 (the "License"); 12 | # you may not use this file except in compliance with the License. 13 | # You may obtain a copy of the License at 14 | # 15 | # http://www.apache.org/licenses/LICENSE-2.0 16 | # 17 | # Unless required by applicable law or agreed to in writing, software 18 | # distributed under the License is distributed on an "AS IS" BASIS, 19 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | # See the License for the specific language governing permissions and 21 | # limitations under the License. 22 | 23 | """\ 24 | Add or update Apache 2.0 boilerplate notice. 25 | """ 26 | 27 | import io 28 | import datetime 29 | import os 30 | import re 31 | 32 | THIS_DIR = os.path.dirname(os.path.abspath(__file__)) 33 | TOP_DIR = os.path.dirname(THIS_DIR) 34 | LICENSE = os.path.join(TOP_DIR, "LICENSE") 35 | 36 | START_YEAR_MAP = { 37 | "The University of Manchester, UK": "2019", 38 | "Vlaams Instituut voor Biotechnologie (VIB), BE": "2020", 39 | "Barcelona Supercomputing Center (BSC), ES": "2020", 40 | "Center for Advanced Studies, Research and Development in Sardinia (CRS4), IT": "2020", 41 | "École Polytechnique Fédérale de Lausanne, CH": "2022", 42 | "Data Centre, SciLifeLab, SE": "2024", 43 | "National Institute of Informatics (NII), JP": "2024", 44 | "Senckenberg Society for Nature Research (SGN), DE": "2025", 45 | "European Molecular Biology Laboratory (EMBL), Heidelberg, DE": "2025", 46 | } 47 | THIS_YEAR = str(datetime.date.today().year) 48 | BOILERPLATE_START = "Copyright [yyyy] [name of copyright owner]" 49 | COPYRIGHT_PATTERN = re.compile(r"#\s*Copyright\s+(\d+)(-\d+)?\s+(.*)") 50 | EXCLUDE_DIRS = {"build", "dist", "venv"} 51 | EXCLUDE_FILES = {"_version.py"} 52 | 53 | 54 | def copyright_lines(): 55 | lines = [] 56 | for owner, start_year in START_YEAR_MAP.items(): 57 | span = start_year if start_year == THIS_YEAR else f"{start_year}-{THIS_YEAR}" 58 | lines.append(f"Copyright {span} {owner}") 59 | return lines 60 | 61 | 62 | def get_boilerplate(): 63 | with io.open(LICENSE, "rt") as f: 64 | license = f.read() 65 | template = license[license.find(BOILERPLATE_START):] 66 | return template.replace(BOILERPLATE_START, "\n".join(copyright_lines())) 67 | 68 | 69 | def comment(text, char="#"): 70 | out_lines = [] 71 | for line in text.splitlines(): 72 | line = line.strip() 73 | out_lines.append(char if not line else f"{char} {line}") 74 | return "\n".join(out_lines) + "\n" 75 | 76 | 77 | def _updated_stream(stream): 78 | updated = False 79 | for line in stream: 80 | if COPYRIGHT_PATTERN.match(line): 81 | if not updated: 82 | for l in copyright_lines(): 83 | yield f"# {l}\n" 84 | updated = True 85 | else: 86 | yield line 87 | 88 | 89 | def add_boilerplate(fn): 90 | with io.open(fn, "rt") as f: 91 | text = f.read() 92 | if not text: 93 | return 94 | if COPYRIGHT_PATTERN.search(text): 95 | # update existing 96 | with io.open(fn, "wt") as f: 97 | for line in _updated_stream(text.splitlines(True)): 98 | f.write(line) 99 | return 100 | # add new 101 | if text.startswith("#!"): 102 | head, tail = text.split("\n", 1) 103 | if "python" not in head: 104 | return 105 | head += "\n\n" 106 | else: 107 | head, tail = "", text 108 | boilerplate = comment(get_boilerplate()) 109 | if not tail.startswith("\n"): 110 | boilerplate += "\n" 111 | with io.open(fn, "wt") as f: 112 | f.write(f"{head}{boilerplate}{tail}") 113 | 114 | 115 | def main(): 116 | join = os.path.join 117 | for root, dirs, files in os.walk(TOP_DIR): 118 | dirs[:] = [_ for _ in dirs if not _.startswith(".") and _ not in EXCLUDE_DIRS] 119 | for name in files: 120 | if not name.endswith(".py"): 121 | continue 122 | if name in EXCLUDE_FILES: 123 | continue 124 | path = join(root, name) 125 | print(path) 126 | add_boilerplate(path) 127 | 128 | 129 | if __name__ == "__main__": 130 | main() 131 | --------------------------------------------------------------------------------