├── .github └── workflows │ ├── publish.yaml │ └── test_workflow.yaml ├── .gitignore ├── CITATION.cff ├── CONTRIBUTING.md ├── LICENSE ├── README.rst ├── docs ├── .readthedocs.yaml ├── Makefile ├── _static │ └── css │ │ └── custom.css ├── api │ ├── fairly.client.rst │ ├── fairly.dataset.rst │ ├── fairly.file.rst │ └── fairly.rst ├── conf.py ├── img │ ├── add-filles.png │ ├── clone1.png │ ├── clone2.png │ ├── contex-menu.png │ ├── create-dataset1.png │ ├── create-dataset2.png │ ├── create-directory.png │ ├── labs-home.png │ ├── labs-start.png │ ├── my-dataset.png │ ├── open-metadata.png │ ├── osf-banner2023.png │ ├── push-confirm.png │ ├── push-menu.png │ ├── start-jupyterlab.png │ ├── zenodo-cli-upload.png │ ├── zenodo-token.png │ └── zenodo-upload.png ├── index.rst ├── installation.rst ├── make.bat ├── modules.rst ├── package │ ├── account-datasets.ipynb │ ├── account-token.rst │ ├── archiving-datasets.ipynb │ ├── demo-4tu.ipynb │ └── demo-zenodo.ipynb ├── requirements.txt └── tutorials │ ├── cli.rst │ ├── jupyterlab.rst │ ├── python-api.ipynb │ └── workshop.rst ├── pyproject.toml ├── src └── fairly │ ├── __init__.py │ ├── cli │ ├── __init__.py │ ├── config.py │ └── dataset.py │ ├── client │ ├── __init__.py │ ├── dataverse.py │ ├── djehuty.py │ ├── figshare.py │ ├── invenio.py │ └── zenodo.py │ ├── data │ ├── config.json │ ├── languages │ │ ├── ISO-639-2_8859-1.tab │ │ ├── ISO-639-2_UTF-8.tab │ │ ├── ISO-639-3_8859-1.tab │ │ └── ISO-639-3_UTF-8.tab │ └── templates │ │ ├── default.yaml │ │ ├── figshare.yaml │ │ └── zenodo.yaml │ ├── dataset │ ├── __init__.py │ ├── local.py │ └── remote.py │ ├── diff.py │ ├── file │ ├── __init__.py │ ├── local.py │ └── remote.py │ ├── metadata.py │ └── person.py └── tests ├── __init__.py ├── conftest.py ├── test_cli.py └── test_fairly.py /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | # GitHub recommends pinning actions to a commit SHA. 7 | # To get a newer version, you will need to update the SHA. 8 | # You can also reference a tag or branch, but the action may change without warning. 9 | 10 | name: Publish 11 | 12 | on: 13 | release: 14 | types: [published] 15 | 16 | jobs: 17 | deploy: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Set up Python 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: '3.x' 25 | - name: Install dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install build hatch 29 | - name: Build package 30 | run: python -m build 31 | - name: Publish package 32 | uses: pypa/gh-action-pypi-publish@v1.8.14 33 | with: 34 | user: __token__ 35 | password: ${{ secrets.PYPI_API_TOKEN }} 36 | -------------------------------------------------------------------------------- /.github/workflows/test_workflow.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | timeout-minutes: 10 9 | env: 10 | FAIRLY_FIGSHARE_TOKEN: ${{ secrets.FIGSHARE_TOKEN }} 11 | FAIRLY_ZENODO_TOKEN: ${{ secrets.ZENODO_TOKEN }} 12 | FAIRLY_4TU_TOKEN: ${{ secrets.FOURTU_TOKEN }} 13 | 14 | strategy: 15 | matrix: 16 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -e .[dev] 28 | - name: Test with pytest 29 | run: | 30 | pytest --cov=fairly tests/ 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | *.dataset 36 | tests/fixtures/vcr_cassettes*/ 37 | 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | vcr_cassettes/ 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # experiments 136 | experiments 137 | 138 | # ignore bench-test in tests 139 | tests/notes.py 140 | _no_* 141 | 142 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: Fairly 6 | message: Please cite this software using these metadata. 7 | type: software 8 | authors: 9 | - given-names: Serkan 10 | family-names: Grigin 11 | email: s.girgin@utwente.nl 12 | affiliation: University of Twente 13 | orcid: 'https://orcid.org/0000-0002-0156-185X' 14 | - given-names: Manuel 15 | family-names: Garcia Alvarez 16 | email: m.g.garciaalvarez@tudelft.nl 17 | affiliation: Delft University of Technology 18 | orcid: 'https://orcid.org/0000-0003-1579-9989' 19 | - given-names: Jose 20 | family-names: Urra Llanusa 21 | email: j.c.urrallanusa@tudelft.nl 22 | affiliation: Delft University of Technology 23 | orcid: 'https://orcid.org/0000-0002-9334-3998' 24 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | As an open source software project, we welcome contributions. Please, read this these guidelines before attempting to contribute with this project. 4 | 5 | ## Types of Contributions 6 | A contribution can be one of the following cases: 7 | 8 | 1. you have a question; 9 | 2. you think you may have found a bug (including unexpected behaviour); 10 | 3. you want to make some changes to the code base (e.g. to fix a bug, to add a new feature, to update 4. documentation). 11 | 12 | ## Questions 13 | 14 | 1. use the search functionality [here](https://github.com/ITC-CRIB/fairly/issues) to see if someone already filed the same issue or question; 15 | 2. if your issue search did not yield any relevant results, make a new issue; 16 | 3. apply the "Question" label; apply other labels when relevant. 17 | 18 | ## Find Bugs 19 | 20 | If you think you may have found a bug: 21 | 22 | 1. use the search functionality [here](https://github.com/ITC-CRIB/fairly/issues) to see if someone already filed the same issue; 23 | 2. if your issue search did not yield any relevant results, make a new issue, making sure to provide enough information to the rest of the community to understand the cause and context of the problem. Depending on the issue, you may want to include: 24 | - the [SHA hashcode](https://help.github.com/articles/autolinked-references-and-urls/#commit-shas) of the commit that is causing your problem; 25 | - some identifying information (name and version number) for dependencies you're using; 26 | - information about the operating system; 27 | - detailed steps to reproduce the bug. 28 | 3. apply relevant labels to the newly created issue. 29 | 30 | ## Changes to Source Code: fix bugs and add features 31 | 32 | 1. (important) announce your plan to the rest of the community before you start working. This announcement should be in the form of a (new) issue; 33 | 2. (important) wait until some consensus is reached about your idea is a good idea; 34 | 3. if needed, fork the repository to your own Github profile and create your feature branch out of the latest master commit. While working on your feature branch, make sure to stay up to date with the master branch by pulling in changes; 35 | 4. make sure the existing tests still work; 36 | 5. add your tests (if applicable); 37 | 6. update or expand the documentation; 38 | 7. push your feature branch to (your fork of) this repository on GitHub; 39 | 8. create the pull request, e.g. following the instructions [here](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request). 40 | 41 | > If you feel like you have a valuable contribution to make, but you don't know how to write or run tests for it or create the documentation; don't let this discourage you from making the pull request; we can help you! Just go ahead and submit the pull request, but keep in mind that you might be asked to append additional commits to your pull request. 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 JupyterFAIR Team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. list-table:: 2 | :widths: 25 25 3 | :header-rows: 1 4 | 5 | * - `fair-software.nl `_ recommendations 6 | - Badges 7 | * - \1. Code repository 8 | - |GitHub Badge| 9 | * - \2. License 10 | - |License Badge| 11 | * - \3. Community Registry 12 | - |PyPI Badge| 13 | * - \4. Enable Citation 14 | - |Zenodo Badge| 15 | * - **Other best practices** 16 | - 17 | * - Continuous integration 18 | - |Python Build| |Python Publish| 19 | * - Documentation 20 | - |Documentation Status| 21 | * - Anaconda package 22 | - |Anaconda| |Anaconda Downloads| 23 | 24 | .. |Anaconda| image:: https://anaconda.org/conda-forge/fairly/badges/version.svg 25 | :target: https://anaconda.org/conda-forge/fairly 26 | :alt: Anaconda Package 27 | 28 | .. |Anaconda Downloads| image:: https://anaconda.org/conda-forge/fairly/badges/downloads.svg 29 | :target: https://anaconda.org/conda-forge/fairly 30 | :alt: Anaconda Downloads 31 | 32 | .. |GitHub Badge| image:: https://img.shields.io/github/v/release/ITC-CRIB/fairly 33 | :target: https://github.com/ITC-CRIB/fairly 34 | :alt: GitHub Badge 35 | 36 | .. |License Badge| image:: https://img.shields.io/badge/License-MIT-yellow.svg 37 | :target: https://opensource.org/licenses/MIT 38 | :alt: License Badge 39 | 40 | .. |PyPI Badge| image:: https://img.shields.io/pypi/v/fairly?colorB=blue 41 | :target: https://pypi.org/project/fairly/ 42 | :alt: PyPI Badge 43 | 44 | .. |Zenodo Badge| image:: https://zenodo.org/badge/DOI/10.5281/zenodo.7759648.svg 45 | :target: https://doi.org/10.5281/zenodo.7759648 46 | :alt: Zenodo Badge 47 | 48 | .. |Python Build| image:: https://github.com/ITC-CRIB/fairly/actions/workflows/test_workflow.yaml/badge.svg 49 | :target: https://github.com/ITC-CRIB/fairly/actions/workflows/test_workflow.yaml 50 | :alt: Python Build 51 | 52 | .. |Python Publish| image:: https://github.com/ITC-CRIB/fairly/actions/workflows/publish.yaml/badge.svg 53 | :target: https://github.com/ITC-CRIB/fairly/actions/workflows/publish.yaml 54 | :alt: Python Publish 55 | 56 | .. |Documentation Status| image:: https://readthedocs.org/projects/fairly/badge/?version=latest 57 | :target: https://fairly.readthedocs.io/en/latest/ 58 | :alt: Documentation Status 59 | 60 | 61 | fairly 62 | ====== 63 | 64 | A package to create, publish and clone research datasets. 65 | 66 | |License: MIT| 67 | 68 | Installation 69 | ------------ 70 | 71 | *fairly* requires Python 3.8 or later, and `ruamel.yaml` version *0.17.26* or later. It can be installed directly 72 | from **PYPI** or **Conda-Forge**. 73 | 74 | .. code:: shell 75 | 76 | # Using pip 77 | pip install fairly 78 | 79 | .. code:: shell 80 | 81 | # using anaconda or miniconda 82 | conda install conda-forge::fairly 83 | 84 | 85 | Installing from source 86 | ~~~~~~~~~~~~~~~~~~~~~~ 87 | 88 | 1. Clone or download the `source 89 | code `__: 90 | 91 | .. code:: shell 92 | 93 | git clone https://github.com/ITC-CRIB/fairly.git 94 | 95 | 2. Go to the root directory: 96 | 97 | .. code:: shell 98 | 99 | cd fairly/ 100 | 101 | 3. Compile and install using pip: 102 | 103 | .. code:: shell 104 | 105 | pip install . 106 | 107 | Usage 108 | ----- 109 | 110 | Basic example to create a local research dataset and deposit it to a 111 | repository: 112 | 113 | .. code:: python 114 | 115 | import fairly 116 | 117 | # Initialize a local dataset 118 | dataset = fairly.init_dataset('/path/dataset') 119 | 120 | # Set metadata 121 | dataset.metadata['license'] = 'MIT' 122 | dataset.set_metadata( 123 | title='My dataset', 124 | keywords=['FAIR', 'research', 'data'], 125 | authors=[ 126 | '0000-0002-0156-185X', 127 | {'name': 'John', 'surname': 'Doe'} 128 | ] 129 | ) 130 | 131 | # Add data files 132 | dataset.includes.extend([ 133 | 'README.txt', 134 | '*.csv', 135 | 'train/*.jpg' 136 | ]) 137 | 138 | # Save dataset 139 | dataset.save() 140 | 141 | # Upload to a data repository 142 | remote_dataset = dataset.upload('zenodo') 143 | 144 | Basic example to access a remote dataset and store it locally: 145 | 146 | .. code:: python 147 | 148 | import fairly 149 | 150 | # Open a remote dataset 151 | dataset = fairly.dataset('doi:10.4121/21588096.v1') 152 | 153 | # Get dataset information 154 | dataset.id 155 | >>> {'id': '21588096', 'version': '1'} 156 | 157 | dataset.url 158 | >>> 'https://data.4tu.nl/articles/dataset/.../21588096/1' 159 | 160 | dataset.size 161 | >>> 33339 162 | 163 | len(dataset.files) 164 | >>> 6 165 | 166 | dataset.metadata 167 | >>> Metadata({'keywords': ['Earthquakes', 'precursor', ...], ...}) 168 | 169 | # Update metadata 170 | dataset.metadata['keywords'] = ['Landslides', 'precursor'] 171 | dataset.save_metadata() 172 | 173 | # Store dataset to a local directory (i.e. clone dataset) 174 | local_dataset = dataset.store('/path/dataset') 175 | 176 | Currently, the package supports the following research data management 177 | platforms: 178 | 179 | - `Invenio `__ 180 | - `Figshare `__ 181 | - `Djehuty `__ 182 | (experimental) 183 | 184 | All research data repositories based on the listed platforms are 185 | supported. 186 | 187 | For more details and examples, consult the `package 188 | documentation `__. 189 | 190 | Testing 191 | ------- 192 | 193 | Unit tests can be run by using ``pytest`` command in the root directory. 194 | 195 | Contributions 196 | ------------- 197 | 198 | Read the `guidelines `__ to know how you can be part of 199 | this open source project. 200 | 201 | JupyterLab Extension 202 | -------------------- 203 | 204 | An extension for JupyerLab is being developed in a `different 205 | repository. `__ 206 | 207 | Citation 208 | -------- 209 | 210 | Please cite this software using as follows: 211 | 212 | *Girgin, S., Garcia Alvarez, M., & Urra Llanusa, J., fairly: a package 213 | to create, publish and clone research datasets [Computer software]* 214 | 215 | Acknowledgements 216 | ---------------- 217 | 218 | This research is funded by the `Dutch Research Council (NWO) Open 219 | Science 220 | Fund `__, 221 | File No. 203.001.114. 222 | 223 | Project members: 224 | 225 | - `Center of Expertise in Big Geodata Science, University of Twente, 226 | Faculty ITC `__ 227 | - `Digital Competence Centre, TU Delft `__ 228 | - `4TU.ResearchData `__ 229 | 230 | .. |License: MIT| image:: https://img.shields.io/badge/License-MIT-yellow.svg 231 | :target: https://opensource.org/licenses/MIT 232 | -------------------------------------------------------------------------------- /docs/.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.11" 13 | # You can also specify versions for other tools like so: 14 | # nodejs: "16" 15 | # rust: "1.55" 16 | # golang: "1.17" 17 | 18 | # Build documentation in the docs/ directory with Sphinx 19 | sphinx: 20 | configuration: docs/conf.py 21 | 22 | # If using Sphinx, optionally build your docs in additional formats such as PDF 23 | formats: 24 | - pdf 25 | 26 | # Optionally declare the Python requirements required to build your docs 27 | python: 28 | install: 29 | - requirements: docs/requirements.txt -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | /* Prevent stacking properties */ 2 | /* https://github.com/readthedocs/sphinx_rtd_theme/issues/1301 */ 3 | dl.property { 4 | display: block !important; 5 | width: 100%; 6 | } 7 | /* Prevent double colon for roles */ 8 | /* https://github.com/sphinx-doc/sphinx/issues/10594 */ 9 | dl.field-list .colon { 10 | display: none; 11 | } -------------------------------------------------------------------------------- /docs/api/fairly.client.rst: -------------------------------------------------------------------------------- 1 | fairly.client package 2 | ===================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | fairly.client.djehuty module 8 | ---------------------------- 9 | 10 | .. automodule:: fairly.client.djehuty 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | fairly.client.figshare module 16 | ----------------------------- 17 | 18 | .. automodule:: fairly.client.figshare 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | fairly.client.invenio module 24 | ---------------------------- 25 | 26 | .. automodule:: fairly.client.invenio 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: fairly.client 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | :noindex: 39 | -------------------------------------------------------------------------------- /docs/api/fairly.dataset.rst: -------------------------------------------------------------------------------- 1 | fairly.dataset package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | fairly.dataset.local module 8 | --------------------------- 9 | 10 | .. automodule:: fairly.dataset.local 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | fairly.dataset.remote module 16 | ---------------------------- 17 | 18 | .. automodule:: fairly.dataset.remote 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: fairly.dataset 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | :noindex: 31 | -------------------------------------------------------------------------------- /docs/api/fairly.file.rst: -------------------------------------------------------------------------------- 1 | fairly.file package 2 | =================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | fairly.file.local module 8 | ------------------------ 9 | 10 | .. automodule:: fairly.file.local 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | fairly.file.remote module 16 | ------------------------- 17 | 18 | .. automodule:: fairly.file.remote 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | Module contents 24 | --------------- 25 | 26 | .. automodule:: fairly.file 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | -------------------------------------------------------------------------------- /docs/api/fairly.rst: -------------------------------------------------------------------------------- 1 | .. _`appi`: 2 | 3 | fairly package 4 | ============== 5 | 6 | Subpackages 7 | ----------- 8 | 9 | .. toctree:: 10 | :maxdepth: 4 11 | 12 | fairly.client 13 | fairly.dataset 14 | fairly.file 15 | 16 | Submodules 17 | ---------- 18 | 19 | fairly.diff module 20 | ------------------ 21 | 22 | .. automodule:: fairly.diff 23 | :members: 24 | :undoc-members: 25 | :show-inheritance: 26 | 27 | fairly.metadata module 28 | ---------------------- 29 | 30 | .. automodule:: fairly.metadata 31 | :members: 32 | :undoc-members: 33 | :show-inheritance: 34 | 35 | fairly.person module 36 | -------------------- 37 | 38 | .. automodule:: fairly.person 39 | :members: 40 | :undoc-members: 41 | :show-inheritance: 42 | 43 | Module contents 44 | --------------- 45 | 46 | .. automodule:: fairly 47 | :members: 48 | :undoc-members: 49 | :show-inheritance: 50 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = 'Fairly Toolset' 10 | copyright = '2023, Serkan Girgin, Manuel Garcia Alvarez, Jose Urra Llanusa' 11 | author = 'Serkan Girgin, Manuel Garcia Alvarez, Jose Urra Llanusa' 12 | release = '1.0.0' 13 | 14 | # -- General configuration --------------------------------------------------- 15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 16 | 17 | extensions = [ 18 | 'sphinx.ext.autodoc', 19 | 'sphinx.ext.viewcode', 20 | 'sphinx.ext.napoleon', 21 | 'sphinx_rtd_theme', 22 | 'nbsphinx', 23 | 'sphinx_copybutton', 24 | ] 25 | 26 | templates_path = ['_templates'] 27 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 28 | 29 | 30 | 31 | # -- Options for HTML output ------------------------------------------------- 32 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 33 | 34 | html_theme = 'sphinx_rtd_theme' 35 | html_static_path = ['_static'] 36 | html_css_files = [ 37 | 'css/custom.css', 38 | ] 39 | -------------------------------------------------------------------------------- /docs/img/add-filles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/add-filles.png -------------------------------------------------------------------------------- /docs/img/clone1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/clone1.png -------------------------------------------------------------------------------- /docs/img/clone2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/clone2.png -------------------------------------------------------------------------------- /docs/img/contex-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/contex-menu.png -------------------------------------------------------------------------------- /docs/img/create-dataset1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/create-dataset1.png -------------------------------------------------------------------------------- /docs/img/create-dataset2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/create-dataset2.png -------------------------------------------------------------------------------- /docs/img/create-directory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/create-directory.png -------------------------------------------------------------------------------- /docs/img/labs-home.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/labs-home.png -------------------------------------------------------------------------------- /docs/img/labs-start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/labs-start.png -------------------------------------------------------------------------------- /docs/img/my-dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/my-dataset.png -------------------------------------------------------------------------------- /docs/img/open-metadata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/open-metadata.png -------------------------------------------------------------------------------- /docs/img/osf-banner2023.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/osf-banner2023.png -------------------------------------------------------------------------------- /docs/img/push-confirm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/push-confirm.png -------------------------------------------------------------------------------- /docs/img/push-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/push-menu.png -------------------------------------------------------------------------------- /docs/img/start-jupyterlab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/start-jupyterlab.png -------------------------------------------------------------------------------- /docs/img/zenodo-cli-upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/zenodo-cli-upload.png -------------------------------------------------------------------------------- /docs/img/zenodo-token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/zenodo-token.png -------------------------------------------------------------------------------- /docs/img/zenodo-upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ITC-CRIB/fairly/7fdb1327c35daffc491e22e72855814883880f5a/docs/img/zenodo-upload.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. fairly documentation master file, created by 2 | sphinx-quickstart on Mon Oct 3 21:00:21 2022. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Fairly Toolset Documentation 7 | ================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Fairly Package 12 | 13 | installation 14 | 15 | .. toctree:: 16 | :maxdepth: 1 17 | :caption: Tutorials 18 | 19 | tutorials/jupyterlab 20 | tutorials/cli 21 | tutorials/python-api 22 | 23 | .. toctree:: 24 | :maxdepth: 1 25 | :caption: Fairly API 26 | 27 | modules 28 | api/fairly.client 29 | api/fairly.dataset 30 | api/fairly.file 31 | 32 | 33 | Indices and tables 34 | ================== 35 | 36 | * :ref:`genindex` 37 | * :ref:`modindex` 38 | * :ref:`search` 39 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | Installation 4 | ================ 5 | 6 | The *Fairly Toolset* provides functionality for the core tasks of preparing, uploading and downloading datasets from research data repositories. The toolset currently provides integration with data repositories based on `Invenio `_ and `Figshare `_. 7 | 8 | **What's Included:** 9 | 10 | * fairly Python package 11 | * Command Line Interface (CLI) 12 | * JupyterLab extension 13 | 14 | **Requirements:** 15 | 16 | * Python 3.8 or higher 17 | * pip 20.0 or higher 18 | * ruamel.yaml 0.17.26 or higher 19 | * JupyterLab 3.x 20 | 21 | Installing the Toolset 22 | ------------------------ 23 | 24 | You can install the *full toolset* by installing the JupyterLab extension from PyPI. The fairly package and CLI will be installed automatically. 25 | 26 | Linux / MacOS 27 | ''''''''''''''''''' 28 | 29 | Install the toolset using `pip` 30 | 31 | .. code-block:: shell 32 | 33 | pip install jupyter-fairly 34 | 35 | 36 | Windows 37 | ''''''''''''''''''' 38 | 39 | 1. Download the ZIP file with the `latest release `_ of the JupyterLab extension to a directory. 40 | 2. Unzip the content. 41 | 3. Using the **terminal**, go to the directory where the ZIP file is located and then to the `jupyter_fairly` sub-directory. 42 | 4. Type and run the following command. You need to add Python to the system PATH for this to work. 43 | 44 | .. code-block:: shell 45 | 46 | python -m pip install . 47 | 48 | .. warning:: 49 | For the above to work, you need Pyton in the PATH environment variable on Windows. If your not sure that is the case. Open the Shell, and type :code:`python --version`. You should see the version of Python on the screen. If you see otherwise, follow these steps to `add Python to the PATH on Windows `_ 50 | 51 | Installing Python Package Only 52 | -------------------------------- 53 | 54 | If all you need is the *fairly* Python package and the CLI, you can install them as following. 55 | 56 | Linux / MacOS 57 | ''''''''''''''''''' 58 | 59 | On the terminal type, for PyPI: 60 | 61 | .. code-block:: shell 62 | 63 | pip install fairly 64 | 65 | Or if using Anaconda or Miniconda: 66 | 67 | .. code-block:: shell 68 | 69 | conda install conda-forge::fairly 70 | 71 | 72 | Installing from Source 73 | ''''''''''''''''''''''''' 74 | 75 | Installing *fairly* from source requires `setuptools` version 49.0 or later and `pip`. 76 | 77 | 1. Clone or download the `source code `_: 78 | 79 | .. code-block:: shell 80 | 81 | git clone https://github.com/ITC-CRIB/fairly.git 82 | 83 | 84 | 2. Unzip if necessary, and go to the `fairly` directory: 85 | 86 | .. code-block:: shell 87 | 88 | cd fairly/ 89 | 90 | 91 | 3. Install the package: 92 | 93 | .. code-block:: shell 94 | 95 | pip install . 96 | 97 | .. important:: 98 | Currently, the toolset only supports data repositories based on `Invenio `_ and `Figshare `_. For examples on how to use the toolset, read the `Tutorials `_ 99 | 100 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | fairly 2 | ====== 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | api/fairly 8 | -------------------------------------------------------------------------------- /docs/package/account-datasets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Access Account Datasets\n", 8 | "\n", 9 | "With *fairly*, you can access the datasets in a repository's user account. This tutorial shows you how to do it for the case of 4TU.ResearchData. The procedure is the same for Zenodo.\n", 10 | "\n", 11 | "**Requirements:**\n", 12 | "\n", 13 | "* A 4TU.ResearchData account\n", 14 | "* A personal access token. See [configuring access token](https://fairly.readthedocs.io/en/latest/package/account-token.html) if you don't have one." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## 1. Connect to an account\n", 22 | "\n", 23 | "To connect to an repository's account, we need to pass a personal token when creating a client. Or we can store tokens in a configuration file at `~/.fairly/config.json`" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 3, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# Passing a token directly\n", 33 | "import fairly\n", 34 | "\n", 35 | "fourtu = fairly.client(\"figshare\", token=\"<4tu-token>\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "> To store your tokens, create a JSON file like the one below and store it at `~/.fairly/config.json`. You can store tokens for other repositories by adding them to this file as `\"\": {\"token\": \"\"}`\n", 43 | "\n", 44 | "```json\n", 45 | "\n", 46 | "{\n", 47 | " \"4tu\": {\n", 48 | "\t\t\"token\": \"<4tu-token>\"\n", 49 | "\t}\n", 50 | "}\n", 51 | "```" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "## 2. Retrieve account datasets\n", 59 | "\n", 60 | "You can see the datasets in an account by calling the `get_account_datasets()` method of a client. This retrieves the list of datasets in the account. Then, you can use the `id` and `metadata` properties of a *dataset* to find more details." 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 4, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "There are 2 datasets in this account\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "# Retrieve the datasets in the account\n", 78 | "my_datasets = fourtu.get_account_datasets()\n", 79 | "\n", 80 | "# Display the number of datasets\n", 81 | "print(\"There are\", len(my_datasets), \"datasets in this account\")\n" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 5, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "Dataset Ids:\n", 94 | "[{'id': '20758348', 'version': None}, {'id': '20752675', 'version': None}]\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "# Display the dataset IDs \n", 100 | "print(\"Dataset Ids:\")\n", 101 | "print([dataset.id for dataset in my_datasets])" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 6, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "Metadata({'authors': [Person({'fullname': 'Manuel Garcia Alvarez', 'figshare_id': 10645703})], 'license': 'CC BY 4.0', 'title': 'test-dataset', 'type': 'software', 'access_type': 'open', 'custom_fields': {'Publisher': '4TU.ResearchData', 'Language': '', 'Time coverage': '', 'Geolocation': '', 'Geolocation Longitude': '', 'Geolocation Latitude': '', 'Format': '', 'Data Link': [], 'Derived From': [], 'Same As': [], 'Organizations': ''}, 'embargo_type': 'file', 'categories': ['Communications Technologies'], 'figshare_id': {'id': '20758348', 'version': None}})" 113 | ] 114 | }, 115 | "execution_count": 6, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "# Metadata of the first dataset\n", 122 | "my_datasets[0].metadata" 123 | ] 124 | } 125 | ], 126 | "metadata": { 127 | "kernelspec": { 128 | "display_name": "Python 3.10.4 64-bit", 129 | "language": "python", 130 | "name": "python3" 131 | }, 132 | "language_info": { 133 | "codemirror_mode": { 134 | "name": "ipython", 135 | "version": 3 136 | }, 137 | "file_extension": ".py", 138 | "mimetype": "text/x-python", 139 | "name": "python", 140 | "nbconvert_exporter": "python", 141 | "pygments_lexer": "ipython3", 142 | "version": "3.10.4" 143 | }, 144 | "vscode": { 145 | "interpreter": { 146 | "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" 147 | } 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 4 152 | } 153 | -------------------------------------------------------------------------------- /docs/package/account-token.rst: -------------------------------------------------------------------------------- 1 | .. _access token: 2 | 3 | Configuring Access Token 4 | ########################### 5 | 6 | 7 | *fairly* can be used to access datasets owned by a user of a data repository. For 4TU.ResearchData and Zenodo, we can do that by configuring access tokens. 8 | 9 | Creating a personal access token 10 | ===================================== 11 | 12 | A personal access toke allows to connect to a user account remotely without the need to a *username* and *password*. 13 | 14 | Zenodo 15 | ------------- 16 | 17 | 1. Register for a Zenodo account if you do not already have one. 18 | #. Go to your :guilabel:`Applications`, and click on :guilabel:`New token` under **Personal access tokens**. 19 | #. Enter a name for your token. 20 | #. Select the OAuth scopes you need (:guilabel:`deposit:write` and :guilabel:`deposit:actions`). 21 | #. Click :guilabel:`Create` 22 | #. An access token will be shown, copy it and store it. **The token will only be shown once.** 23 | #. Click on :guilabel:`Save` 24 | 25 | 26 | 4TU.ResearchData 27 | ------------------- 28 | 29 | 1. Register for a Zenodo account if you do not already have one. 30 | #. Go to your :guilabel:`Applications`, and click on :guilabel:`Create Personal Token`. 31 | #. Enter short description for your token, for example a name, and click on :guilabel:`Save` 32 | #. An access token will be shown, copy it and store it. **The token will only be shown once.** 33 | #. Click on :guilabel:`Done` 34 | 35 | Connecting to an Account 36 | ============================ 37 | 38 | Connecting to an account is a simple as passing a token when creating a 4TU.ResearchData or Zenodo client. 39 | 40 | .. code-block:: python 41 | 42 | import fairly 43 | 44 | # For 4TU.ResearchData 45 | fourtu = fairly.client("figshare", token="") 46 | 47 | # For Zenodo 48 | zenodo = fairly.client("zenodo", token="" ) 49 | 50 | Storing Tokens 51 | ================ 52 | 53 | To store your tokens, create a JSON file like the one below and store it at `~/.fairly/config.json`. You can store tokens for other repositories by adding them to this file as `"": {"token": ""}` 54 | 55 | .. code-block:: json 56 | 57 | { 58 | "4tu": { 59 | "token": "<4tu-token>" 60 | } 61 | } 62 | 63 | -------------------------------------------------------------------------------- /docs/package/archiving-datasets.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Archiving Dataset\n", 8 | "\n", 9 | "With **fairly**, we can remotely archive and edit datasets in a user account. Users can prepare a dataset for archiving by editing metadata, defining which files are part of a dataset, and uploading them to a data repository. One of the purposes of **fairly** is to *remove the need of preparing metadata and data for every repository to which a dataset will be archived*. Therefore, saving time and effort, and lowering the barriers for practicing Open Science.\n", 10 | "This tutorial shows what is possible by using the 4TU.ResearchData repository. The procedure is similar for Zenodo.\n", 11 | "\n", 12 | "**Requirements:**\n", 13 | "\n", 14 | "* A 4TU.ResearchData account\n", 15 | "* A personal access token. See [configuring access token](https://fairly.readthedocs.io/en/latest/package/account-token.html) if you don't have one.\n", 16 | "* Files to be archived. We will use a hypothetical case in this tutorial.\n", 17 | "\n", 18 | "> For this tutorial, we assume that our goal is to archive a dataset in 4TU.ResearchData, that we previously archived in Zenodo. We will use the dataset [Quality and timing of crowd-based water level class observations](https://zenodo.org/records/3929547), as an example.\n", 19 | " " 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "## 1. Download the Zenodo dataset\n", 27 | "\n", 28 | "First, we need to download the [Quality and timing of crowd-based water level class observations](https://zenodo.org/records/3929547), using its URL. If you did this already in the tutorial on *downloading datasets from Zenodo*, you can skip this step." 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "import fairly\n", 38 | "\n", 39 | "# Create a Zenodo client\n", 40 | "zenodo = fairly.client(\"zenodo\")\n", 41 | "\n", 42 | "# Connect and download a dataset\n", 43 | "source_dataset = zenodo.get_dataset(\"https://zenodo.org/records/3929547\") \n", 44 | "source_dataset.store(\"./quality/\") " 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## 2. Editing Metadata\n", 52 | "\n", 53 | "Now we can load the downloaded dataset and edit its metadata. For example, we can add a few more *keywords* and edit the *license*." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 4, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "name": "stdout", 63 | "output_type": "stream", 64 | "text": [ 65 | "{'access_type': 'open', 'authors': [Person({'fullname': 'Etter, Simon', 'institution': 'University of Zurich, Department of Geography', 'name': 'Simon', 'orcid_id': '0000-0002-7553-9102', 'surname': 'Etter'}), Person({'fullname': 'Strobl, Barbara', 'institution': 'University of Zurich, Department of Geography', 'name': 'Barbara', 'orcid_id': '0000-0001-5530-4632', 'surname': 'Strobl'}), Person({'fullname': 'Seibert, Jan', 'institution': 'University of Zurich, Department of Geography', 'name': 'Jan', 'orcid_id': '0000-0002-6314-2124', 'surname': 'Seibert'}), Person({'fullname': 'van Meerveld, Ilja (H.J.)', 'institution': 'University of Zurich, Department of Geography', 'name': 'Ilja (H.J.)', 'orcid_id': '0000-0002-7547-3270', 'surname': 'van Meerveld'})], 'description': '

This are the data and the R-scripts used for the manuscript "Quality and timing of crowd-based water level class observations" accepted for publication in the journal Hydrological Processes in July 2020 as a Scientific Briefing. To run the code, just run the R-script with the name "RunThisForResults.R". Results will be written to the "Figures" and the "Results" folder.

', 'doi': '10.5281/zenodo.3929547', 'grants': ['10.13039/501100001711::200021_163008'], 'keywords': ['CrowdWater', 'Hydrology'], 'language': 'eng', 'license': 'CC-BY-4.0', 'prereserve_doi': {'doi': '10.5281/zenodo.3929547', 'recid': 3929547}, 'publication_date': '2020-02-20', 'related_identifiers': [{'identifier': '10.5281/zenodo.3676350', 'relation': 'isVersionOf', 'scheme': 'doi'}], 'title': 'Data and R-Scripts for \"Quality and timing of crowd-based water level class observations\"', 'type': 'dataset', 'version': '2', 'zenodo_id': {'id': '3929547'}}\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "import fairly\n", 71 | "\n", 72 | "# Load a previously downloaded dataset by passing its path\n", 73 | "local_dataset = fairly.dataset(\"./quality/\")\n", 74 | "\n", 75 | "# Display the metadata\n", 76 | "print(local_dataset.metadata)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "# Edit keywords\n", 86 | "local_dataset.metadata[\"keywords\"] = [\"CrowdWater\", \"Hydrology\", \"made by fairly\"]\n", 87 | "\n", 88 | "# Edit the license name to match what is required by 4TU.ResearchData\n", 89 | "local_dataset.metadata[\"license\"] = \"CC BY 4.0\"" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## 3. Archive to 4TU.ResearchData\n", 97 | "Now we can create a new dataset in a 4TU.ResearchData account. We assume a **personal access token** has already been added to `~/.fairly/config.json` " 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 6, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "DataForUploadToZenodo.zip, 26765942/10485760\n", 110 | "DataForUploadToZenodo.zip, 26765942/20971520\n", 111 | "DataForUploadToZenodo.zip, 26765942/26765942\n" 112 | ] 113 | }, 114 | { 115 | "data": { 116 | "text/plain": [ 117 | "" 118 | ] 119 | }, 120 | "execution_count": 6, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "local_dataset.upload(\"figshare\", notify=fairly.notify)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "> We could continue uploading files or editing the metadata in a similar way. For now, **publishing** the dataset should be done via the web interface of 4TU.ResearchData." 134 | ] 135 | } 136 | ], 137 | "metadata": { 138 | "kernelspec": { 139 | "display_name": "Python 3 (ipykernel)", 140 | "language": "python", 141 | "name": "python3" 142 | }, 143 | "language_info": { 144 | "codemirror_mode": { 145 | "name": "ipython", 146 | "version": 3 147 | }, 148 | "file_extension": ".py", 149 | "mimetype": "text/x-python", 150 | "name": "python", 151 | "nbconvert_exporter": "python", 152 | "pygments_lexer": "ipython3", 153 | "version": "3.10.4" 154 | }, 155 | "vscode": { 156 | "interpreter": { 157 | "hash": "262683ceb590c1664a72ae4b5fb24aafe692d2539af9aafb5e1323673742110e" 158 | } 159 | } 160 | }, 161 | "nbformat": 4, 162 | "nbformat_minor": 4 163 | } 164 | -------------------------------------------------------------------------------- /docs/package/demo-4tu.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "441a22a6-7527-48c3-951e-7cab0937707c", 6 | "metadata": {}, 7 | "source": [ 8 | "# Download Datasets from 4TU.ResearchData\n", 9 | "\n", 10 | "**fairly** can download public datasets from 4TU.ResearchData.\n", 11 | "The *4TU.ResearchData* repository uses Figshare as a platform for managing research datasets. For this example, we will use the dataset [EDoM measurement campaign](https://data.4tu.nl/articles/dataset/EDoM_measurement_campaign_full_data_from_the_lower_Ems_River/20308263). This dataset contains 28 files of different types (`.txt`, `.pdf`), and it is about `278 MB` in size. \n", 12 | "\n", 13 | "We can fetch a dataset using either its ID or its URL. For 4TU.ResearchData the dataset ID is the last part of the URL that appears in the web browser. The dataset has ID: `20308263`\n" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "id": "9b1a66e5", 19 | "metadata": {}, 20 | "source": [ 21 | "## 1. Connect to 4TU.ResearchData\n", 22 | "To connect to data repositories we use clients. A client manage the connection to an specific data repository. We can create a client to connect to 4TU.ResearchData as follows:" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 4, 28 | "id": "3ddbd026-62e2-4a2c-a62e-127f06a4b0f3", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import fairly \n", 33 | "\n", 34 | "fourtu = fairly.client(\"4tu\") " 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "id": "f088481e", 40 | "metadata": {}, 41 | "source": [ 42 | "## 2. Connect to a dataset\n", 43 | "\n", 44 | "Now, we can connect to a *public* dataset by calling the `get_dataset()` method and using the dataset ID, its URL, or its DOI." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 5, 50 | "id": "075a2d23-85ee-4415-bd53-888e11627f61", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# Using ID\n", 55 | "# dataset = fourtu.get_dataset(\"20308263\") \n", 56 | "\n", 57 | "# Using URL address\n", 58 | "dataset = fourtu.get_dataset(\"https://data.4tu.nl/articles/dataset/EDoM_measurement_campaign_full_data_from_the_lower_Ems_River/20308263\") \n", 59 | "\n", 60 | "# Using DOI\n", 61 | "# fairly has a convenience method that infers the client from DOI\n", 62 | "# dataset = fairly.dataset(\"https://doi.org/10.4121/19519618.v1\")" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "id": "59c971ed", 68 | "metadata": {}, 69 | "source": [ 70 | "## 3. Explore dataset's metadata\n", 71 | "\n", 72 | "Once we have made a connection to a dataset, we can access its metadata as stored in the data repository by using the `metadata` property." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 3, 78 | "id": "30023980", 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "data": { 83 | "text/plain": [ 84 | "Metadata({'authors': [Person({'fullname': 'Bas van Maren', 'orcid_id': '0000-0001-5820-3212', 'figshare_id': 11844539}), Person({'fullname': 'Andreas Engels', 'figshare_id': 12901508})], 'keywords': ['Hydrodynamics', 'Sediment dynamics', 'Collection: The Ems-Dollard Measurement (EDoM) campaign'], 'description': '

A large amount of long term monitoring data collected during the Edom measurement campaign has been published in Net CDF as part of the collection \\'Edom measurements campaign: data from long-term monitoring\\' ( https://doi.org/10.4121/19519618.v1). This dataset provides the full subset of the long term mooring data (including oxygen and flow velocities) in ASCII text format, and only for the lower Ems River

', 'license': 'CC BY-NC-SA 4.0', 'title': 'EDoM measurement campaign: full data from the lower Ems River', 'doi': '10.4121/20308263.v1', 'type': 'dataset', 'access_type': 'open', 'custom_fields': {'Publisher': '4TU.ResearchData', 'Language': '', 'Time coverage': '2017-2019', 'Geolocation': 'Ems estuary', 'Geolocation Longitude': '7.04', 'Geolocation Latitude': '53.30', 'Format': 'ASCII text', 'Data Link': [], 'Derived From': [], 'Same As': [], 'Organizations': 'Niedersächsischer Landesbetrieb für Wasserwirtschaft Küsten (NLWKN);'}, 'embargo_type': 'file', 'categories': ['Physical Geography and Environmental Geoscience'], 'online_date': '2022-07-14T10:56:04', '4tu_id': {'id': '20308263', 'version': None}})" 85 | ] 86 | }, 87 | "execution_count": 3, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "# Retrieve metadata from the data repository\n", 94 | "dataset.metadata" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "id": "2523d219", 100 | "metadata": {}, 101 | "source": [ 102 | "## 4. List dataset's files\n", 103 | "\n", 104 | "We can list the files of a dataset using the `files` property. The result is a Python dictionary where names of files become keys of the dictionary." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 4, 110 | "id": "f9f51002", 111 | "metadata": {}, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | "There are 28 files in this dataset\n", 118 | "{'CsEmspier_01052017-01052019_from_NLWKN.txt': 'CsEmspier_01052017-01052019_from_NLWKN.txt', 'CsGandesum_01052017-01052019_from_NLWKN.txt': 'CsGandesum_01052017-01052019_from_NLWKN.txt', 'CsKnock_01052017-01052019_from_NLWKN.txt': 'CsKnock_01052017-01052019_from_NLWKN.txt', 'CsMP1_01052017-01052019_from_WSV.txt': 'CsMP1_01052017-01052019_from_WSV.txt', 'CsPogum_01052017-01052019_from_NLWKN.txt': 'CsPogum_01052017-01052019_from_NLWKN.txt', 'CsTerborg_01052017-01052019_from_NLWKN.txt': 'CsTerborg_01052017-01052019_from_NLWKN.txt', 'Messung_Gewaesserguete_EMS_NLWKN.pdf': 'Messung_Gewaesserguete_EMS_NLWKN.pdf', 'O2Emspier_01052017-01052019_from_NLWKN.txt': 'O2Emspier_01052017-01052019_from_NLWKN.txt', 'O2Gandersum_01052017-01052019_from_NLWKN.txt': 'O2Gandersum_01052017-01052019_from_NLWKN.txt', 'O2Knock_01052017-01052019_from_NLWKN.txt': 'O2Knock_01052017-01052019_from_NLWKN.txt', 'O2MP1_01052017-01052019_from_WSV.txt': 'O2MP1_01052017-01052019_from_WSV.txt', 'O2Pogum_01052017-01052019_from_NLWKN.txt': 'O2Pogum_01052017-01052019_from_NLWKN.txt', 'O2Terborg_01052017-01052019_from_NLWKN.txt': 'O2Terborg_01052017-01052019_from_NLWKN.txt', 'Q_Versen_052017-052019.txt': 'Q_Versen_052017-052019.txt', 'readme.txt': 'readme.txt', 'SpEmspier_01052017-01052019_from_NLWKN.txt': 'SpEmspier_01052017-01052019_from_NLWKN.txt', 'SpGandersum_01052017-01052019_from_NLWKN.txt': 'SpGandersum_01052017-01052019_from_NLWKN.txt', 'SpKnock_01052017-01052019_from_NLWKN.txt': 'SpKnock_01052017-01052019_from_NLWKN.txt', 'SpMP1_01052017-01052019_from_WSV.txt': 'SpMP1_01052017-01052019_from_WSV.txt', 'SpPogum_01052017-01052019_from_NLWKN.txt': 'SpPogum_01052017-01052019_from_NLWKN.txt', 'SpTerborg_01052017-01052019_from_NLWKN.txt': 'SpTerborg_01052017-01052019_from_NLWKN.txt', 'U_Emden_01052017-01052019_from_WSA_Emden.txt': 'U_Emden_01052017-01052019_from_WSA_Emden.txt', 'U_Knock_01052017-01052019_from_WSA_Emden.txt': 'U_Knock_01052017-01052019_from_WSA_Emden.txt', 'U_MP1_01052017-01052019_from_WSV.txt': 'U_MP1_01052017-01052019_from_WSV.txt', 'U_Terborg_01052017-01052019_from_WSA_Emden.txt': 'U_Terborg_01052017-01052019_from_WSA_Emden.txt', 'WL_Emden_01052017-01052019_from_WSA_Emden.txt': 'WL_Emden_01052017-01052019_from_WSA_Emden.txt', 'WL_Knock_01052017-01052019_from_WSA_Emden.txt': 'WL_Knock_01052017-01052019_from_WSA_Emden.txt', 'WL_Terborg_01052017-01052019_from_WSA_Emden.txt': 'WL_Terborg_01052017-01052019_from_WSA_Emden.txt'}\n" 119 | ] 120 | } 121 | ], 122 | "source": [ 123 | "# List files associated with the dataset\n", 124 | "files = dataset.files\n", 125 | "\n", 126 | "print(\"There are\", len(files), \"files in this dataset\")\n", 127 | "\n", 128 | "print(files)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "39c30dce-9c82-4d7f-9151-8c65ef4fadfd", 134 | "metadata": {}, 135 | "source": [ 136 | "## 5. Download a file\n", 137 | "\n", 138 | "We can download a single file in a dataset by using its name. For example, this dataset contains a file with the name `'CsEmspier_01052017-01052019_from_NLWKN.txt'`. \n", 139 | "\n", 140 | "> The `path` parameter can be used to define where to store the file, otherwise the file will be store in the working directory.\n" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 5, 146 | "id": "71a25448", 147 | "metadata": {}, 148 | "outputs": [ 149 | { 150 | "data": { 151 | "text/plain": [ 152 | "'CsEmspier_01052017-01052019_from_NLWKN.txt'" 153 | ] 154 | }, 155 | "execution_count": 5, 156 | "metadata": {}, 157 | "output_type": "execute_result" 158 | } 159 | ], 160 | "source": [ 161 | "# Select a file from the dataset\n", 162 | "single_file = dataset.files['CsEmspier_01052017-01052019_from_NLWKN.txt']\n", 163 | "\n", 164 | "# Download the file\n", 165 | "fourtu.download_file(single_file)" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "id": "d7351589", 171 | "metadata": {}, 172 | "source": [ 173 | "## 6. Download a dataset\n", 174 | "\n", 175 | "We can download all files and metadata of a dataset using the `store()` function. We need to provide a `path` to a directory to store the dataset. If the directory does not exist, it would be created." 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 6, 181 | "id": "506d536b-53f0-482a-95e3-1d37eb5c3676", 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "" 188 | ] 189 | }, 190 | "execution_count": 6, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "# This will download about 278 MB\n", 197 | "dataset.store(\"./demo\")" 198 | ] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3.10.4 ('venv': venv)", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.10.4" 218 | }, 219 | "vscode": { 220 | "interpreter": { 221 | "hash": "7d5fcea36288094484ea4026c704bd47a44b80f0a87a063450ae6a7b7e01ed32" 222 | } 223 | } 224 | }, 225 | "nbformat": 4, 226 | "nbformat_minor": 5 227 | } 228 | -------------------------------------------------------------------------------- /docs/package/demo-zenodo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "441a22a6-7527-48c3-951e-7cab0937707c", 6 | "metadata": {}, 7 | "source": [ 8 | "# Download Datasets from Zenodo\n", 9 | "\n", 10 | "*fairly* can also download publid datasets from Zenodo.\n", 11 | "The *Zenodo* repository its own platform for managing research datasets. For this example, we will use the dataset [Quality and timing of crowd-based water level class observations](https://zenodo.org/records/3929547). This dataset is a single compressed file of type `.zip`, which contains several other files and directories, and it is about `27 MBs` in size. \n", 12 | "\n", 13 | "In Zenodo the ID of a dataet can be found by looking its DOI. It last part of a DOI (a number). For example, the DOI for the second version of the dataset is `10.5281/zenodo.3929547`, therefore its ID is `3929547`. We can fetch a dataset using either its ID or its URL.\n", 14 | "\n" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "id": "a88e8a7d", 20 | "metadata": {}, 21 | "source": [ 22 | "## 1. Connect to Zenodo\n", 23 | "To connect to data repositories we use clients. A client manage the connection to an specific data repository. We can create a client to connect to Zenodo as follows:" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 7, 29 | "id": "3ddbd026-62e2-4a2c-a62e-127f06a4b0f3", 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import fairly\n", 34 | "\n", 35 | "zenodo = fairly.client(id=\"zenodo\")" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "id": "f088481e", 41 | "metadata": {}, 42 | "source": [ 43 | "## 2. Connect to a dataset\n", 44 | "Now, we can connect to a *public* dataset by calling the `get_dataset()` method and using either the dataset ID or its URL." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 8, 50 | "id": "075a2d23-85ee-4415-bd53-888e11627f61", 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "# USING ID\n", 55 | "dataset = zenodo.get_dataset(\"3929547\") \n", 56 | "\n", 57 | "# USING URL\n", 58 | "dataset = zenodo.get_dataset(\"https://zenodo.org/records/3929547\") " 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "id": "59c971ed", 64 | "metadata": {}, 65 | "source": [ 66 | "## 3. Explore dataset's metadata\n", 67 | "\n", 68 | "Once we have made a connection to a dataset, we can access its metadata (as stored in the data repository) by calling the `metadata` property of a dataset. " 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 9, 74 | "id": "30023980", 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/plain": [ 80 | "Metadata({'type': 'dataset', 'publication_date': '2020-02-20', 'title': 'Data and R-Scripts for \"Quality and timing of crowd-based water level class observations\"', 'authors': [Person({'fullname': 'Etter, Simon', 'institution': 'University of Zurich, Department of Geography', 'orcid_id': '0000-0002-7553-9102', 'name': 'Simon', 'surname': 'Etter'}), Person({'fullname': 'Strobl, Barbara', 'institution': 'University of Zurich, Department of Geography', 'orcid_id': '0000-0001-5530-4632', 'name': 'Barbara', 'surname': 'Strobl'}), Person({'fullname': 'Seibert, Jan', 'institution': 'University of Zurich, Department of Geography', 'orcid_id': '0000-0002-6314-2124', 'name': 'Jan', 'surname': 'Seibert'}), Person({'fullname': 'van Meerveld, Ilja (H.J.)', 'institution': 'University of Zurich, Department of Geography', 'orcid_id': '0000-0002-7547-3270', 'name': 'Ilja (H.J.)', 'surname': 'van Meerveld'})], 'description': '

This are the data and the R-scripts used for the manuscript "Quality and timing of crowd-based water level class observations" accepted for publication in the journal Hydrological Processes in July 2020 as a Scientific Briefing. To run the code, just run the R-script with the name "RunThisForResults.R". Results will be written to the "Figures" and the "Results" folder.

', 'access_type': 'open', 'license': 'CC-BY-4.0', 'doi': '10.5281/zenodo.3929547', 'keywords': ['CrowdWater', 'Hydrology'], 'zenodo_id': {'id': '3929547'}, 'prereserve_doi': {'doi': '10.5281/zenodo.3929547', 'recid': 3929547}, 'related_identifiers': [{'identifier': '10.5281/zenodo.3676350', 'relation': 'isVersionOf', 'scheme': 'doi'}], 'grants': ['10.13039/501100001711::200021_163008'], 'version': '2', 'language': 'eng'})" 81 | ] 82 | }, 83 | "execution_count": 9, 84 | "metadata": {}, 85 | "output_type": "execute_result" 86 | } 87 | ], 88 | "source": [ 89 | "# Retrieves metadata from data repository\n", 90 | "dataset.metadata" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "id": "2523d219", 96 | "metadata": {}, 97 | "source": [ 98 | "## 4. List dataset's files\n", 99 | "\n", 100 | "We can list the files of a dataset using the `files` property. The result is a Python dictionary where the name of each file is an element of the dictionary. In this case the dataset contains only one file." 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 10, 106 | "id": "f9f51002", 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "name": "stdout", 111 | "output_type": "stream", 112 | "text": [ 113 | "There are 1 files in this dataset\n", 114 | "{'DataForUploadToZenodo.zip': 'DataForUploadToZenodo.zip'}\n" 115 | ] 116 | } 117 | ], 118 | "source": [ 119 | "# Lists files (data) associated to the dataset\n", 120 | "files = dataset.files\n", 121 | "\n", 122 | "print(\"There are\", len(files), \"files in this dataset\")\n", 123 | "\n", 124 | "print(files)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "id": "5fed2262", 130 | "metadata": {}, 131 | "source": [ 132 | "## 5. Download a file\n", 133 | "\n", 134 | "We can download the file in the dataset by using the name of a file. For example `'DataForUploadToZenodo.zip'`. \n", 135 | "\n", 136 | "> The `path` parameter can be used to define where to store the file, otherwise the file will be store in the working directory.\n" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 5, 142 | "id": "71a25448", 143 | "metadata": {}, 144 | "outputs": [ 145 | { 146 | "data": { 147 | "text/plain": [ 148 | "'DataForUploadToZenodo.zip'" 149 | ] 150 | }, 151 | "execution_count": 5, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "# Select a file to download from the dataset\n", 158 | "single_file = dataset.files['DataForUploadToZenodo.zip'] # missing updating the manifest\n", 159 | "\n", 160 | "# download a file\n", 161 | "zenodo.download_file(single_file, path=\"./from-zenodo\")" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "id": "d7351589", 167 | "metadata": {}, 168 | "source": [ 169 | "## 6. Download a dataset\n", 170 | "\n", 171 | "We also can download all files and metadata of a dataset using the `store()` function. We need to provide a path to a directory to store the dataset. If the directory does not exist, it would be created." 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 11, 177 | "id": "506d536b-53f0-482a-95e3-1d37eb5c3676", 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "" 184 | ] 185 | }, 186 | "execution_count": 11, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "# This will download about 278 MBs\n", 193 | "dataset.store(\"./quality\") # use extract=True for unzipping\n" 194 | ] 195 | } 196 | ], 197 | "metadata": { 198 | "kernelspec": { 199 | "display_name": "Python 3.10.4 ('venv': venv)", 200 | "language": "python", 201 | "name": "python3" 202 | }, 203 | "language_info": { 204 | "codemirror_mode": { 205 | "name": "ipython", 206 | "version": 3 207 | }, 208 | "file_extension": ".py", 209 | "mimetype": "text/x-python", 210 | "name": "python", 211 | "nbconvert_exporter": "python", 212 | "pygments_lexer": "ipython3", 213 | "version": "3.10.4" 214 | }, 215 | "vscode": { 216 | "interpreter": { 217 | "hash": "7d5fcea36288094484ea4026c704bd47a44b80f0a87a063450ae6a7b7e01ed32" 218 | } 219 | } 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 5 223 | } 224 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx-copybutton==0.5.0 2 | fairly 3 | nbsphinx>=0.9.3 4 | sphinx-rtd-theme==1.3.0 -------------------------------------------------------------------------------- /docs/tutorials/cli.rst: -------------------------------------------------------------------------------- 1 | Using the CLI 2 | ===================== 3 | 4 | This tutorial shows how to use the *fairly* Command Line Interface (CLI) to clone, and create datasets, and to edit their metadata. 5 | 6 | .. important:: 7 | **Windows Users.** For the following to work, you need Pyton in the PATH environment variable on Windows. If your not sure that is the case. Open the Shell, and type :code:`python --version`. You should see the version of Python on the screen. If you see otherwise, follow these steps to `add Python to the PATH on Windows `_ 8 | 9 | 1. Open a *Terminal* or *Shell* 10 | 11 | 2. Test the *fairly* CLI is accessible in your terminal, by calling the help command: 12 | 13 | .. code:: shell 14 | 15 | fairly --help 16 | 17 | 18 | You should see the following: 19 | 20 | .. code:: shell 21 | 22 | Usage: fairly [OPTIONS] COMMAND [ARGS]... 23 | 24 | ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ 25 | │ --install-completion [bash|zsh|fish|powershell|pwsh] Install completion for the specified shell. [default: None] │ 26 | │ --show-completion [bash|zsh|fish|powershell|pwsh] Show completion for the specified shell, to copy it or customize │ 27 | │ the installation. │ 28 | │ [default: None] │ 29 | │ --help Show this message and exit. │ 30 | ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ 31 | ╭─ Commands ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ 32 | │ config │ 33 | │ dataset │ 34 | │ list-repos List all repositories supported by fairly │ 35 | │ list-user-datasets List all datasets in the specified repository by doi, title, and publication_date │ 36 | ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ 37 | 38 | Cloning a Dataset 39 | -------------------- 40 | 41 | 1. Create a new directory and subdirectory :code:`workshop/clone` 42 | 43 | .. code:: shell 44 | 45 | # On Windows 46 | mkdir workshop 47 | mkdir workshop\clone 48 | 49 | # On Linux/MacOS 50 | mkdir -p workshop/clone 51 | 52 | 2. Go to the :code:`clone` directory 53 | 54 | .. code:: shell 55 | 56 | # On Windows 57 | cd workshop\clone 58 | 59 | # On Linux/MacOS 60 | cd workshop/clone 61 | 62 | 3. Clone this `Zenodo dataset `_, using its URL: 63 | 64 | .. code:: shell 65 | 66 | fairly dataset clone --url https://zenodo.org/records/7748718#.ZBo1SNLMJhF 67 | 68 | 4. Explore the content of the dataset, notice that file(s) of the dataset have been downloaded and its metadata is in the :code:`manifest.yaml` file. 69 | 70 | .. code:: shell 71 | 72 | manifest.yaml Trixi.jl-v0.5.14.zip 73 | 74 | 75 | Creating a Local fairly Dataset 76 | -------------------------------------- 77 | 78 | We can use the CLI to initialize a new dataset. 79 | 80 | 1. Create a new directory called :code:`mydataset-cli` inside the *workshop* directory. Then move to into the directory 81 | 82 | .. code:: shell 83 | 84 | # On Windows/Linux/McOS 85 | mkdir mydataset-cli 86 | cd mydataset-cli 87 | 88 | 2. Create a local dataset using the Zenodo metadata template, as follows 89 | 90 | .. code:: shell 91 | 92 | fairly dataset create zenodo 93 | 94 | 95 | Include Files in your Dataset 96 | '''''''''''''''''''''''''''''''' 97 | 98 | Add some folders and files the :code:`mydataset-cli` directory. You can do this using the file explorer/browser. You can add files of your own, but be careful not to include anything that you want to keep confidential. Also consider the total size of the files you will add, the larger the size the longer the upload will take. Also remember that for the current Zenodo API each file should be :code:`100MB` or smaller; this will change in the future. 99 | 100 | If you do not want to use files from your own, you can download and use the `dummy-data `_ 101 | 102 | Editing the Manifest 103 | '''''''''''''''''''''' 104 | 105 | The :code:`manifest.yaml` file contains several sections to describe the medatadata of a dataset. Some of the sections and fields are compulsory (they are required by the data repository), others are optional. In this example, you started a *fairly* dataset using the template for the Zenodo repository, but you could also do so for 4TU.ResearchData. 106 | 107 | However, if you are not sure which repository you will use to publish a dataset, use the :guilabel:`default` option. This template contains the most common sections and fields for the repositories supported by *fairly* 108 | 109 | .. tip:: 110 | Independently of which template you use to start a dataset, the :code:`manifest.yaml` file is interoperable between data repositories, with very few exceptions. This means that you can use the same manifest file for various data repositories. Different templates are provided only as a guide to indicate what metadata is more relevant for each data repository. 111 | 112 | 113 | 1. Open the :code:`manifest.yaml` using a text editor. On Linux/MacOS you can use **nano** or **vim**. On Windows use the **notepad** 114 | 115 | 2. Substitute the content of the :code:`manifest.yaml` with the text below. *Here, we use only a small set of fields that are possible for Zenodo.* 116 | 117 | .. code-block:: yaml 118 | 119 | metadata: 120 | type: dataset 121 | publication_date: '2023-03-22' 122 | title: My Title CLI 123 | authors: 124 | - fullname: Surname, FirstName 125 | affiliation: Your institution 126 | description: A dataset from the Fairly Toolset workshop 127 | access_type: open 128 | license: CC0-1.0 129 | doi: '' 130 | prereserve_doi: 131 | keywords: 132 | - workshop 133 | - dummy data 134 | notes: '' 135 | related_identifiers: [] 136 | communities: [] 137 | grants: [] 138 | subjects: [] 139 | version: 1.0.0 140 | language: eng 141 | template: zenodo 142 | files: 143 | includes: 144 | - ARP1_.info 145 | - ARP1_d01.zip 146 | - my_code.py 147 | - Survey_AI.csv 148 | - wind-mill.jpg 149 | excludes: [] 150 | 151 | 152 | 3. Edit the dataset metadata by typing the information you want to add. For example, you can change the title, authors, description, etc. Save the file when you are done. 153 | 154 | .. important:: 155 | * The :code:`includes` field must list the files and directories (folders) you want to include as part of the dataset. *Included files and directories will be uploaded to the the data repository* 156 | * The :code:`excludes` field can be used for explicitly indicating what files or directories you **don't want to be part of the dataset**, for example, files that contain sensitive information. Excluded files and directories will never be uploaded to the data repository. 157 | * Files and directories that are not listed in either :code:`includes` or :code:`excludes` will be ignored by *fairly*. 158 | 159 | 160 | Upload Dataset to Data Repository 161 | ----------------------------------- 162 | 163 | Here, we explain how to upload a dataset to an existing account in Zenodo. If you do not have an account yet, you can `sign up in this webpage. `_ 164 | 165 | For this, you first need to :ref:`create-token` and register it manually or :ref:`via JupyterLab `. 166 | 167 | Upload Dataset 168 | '''''''''''''''' 169 | 170 | 1. On the terminal or command prompt, type: 171 | 172 | .. code:: shell 173 | 174 | fairly dataset upload zenodo 175 | 176 | 2. Go to your Zenodo and click on :guilabel:`Upload`. The `My dataset CLI` dataset should be there. 177 | 178 | .. image:: ../img/zenodo-cli-upload.png 179 | 180 | 181 | Explore the dataset and notice that all the files and metadata you added in JupyterLab has been automatically added to the new dataset. You should also notice that the dataset is not **published**, this is on purpose. This gives you the oportunity to review the dataset before deciding to publish if, and if necessary to make changes. In this way we also prevent users to publish dataset by mistake. 182 | 183 | .. note:: 184 | If you try to upload the dataset again, you will get an error message. This is because the dataset already exists in Zenodo. You can see this reflected in the :code:`manifest.yaml` file; the section :code:`remotes:` is added to the file after succesfully uploading a dataset. It lists the names and ids of the repositories where the dataset has been uploaded. 185 | In the future, we will add a feature to allow users to update and sync datasets between repositories. 186 | -------------------------------------------------------------------------------- /docs/tutorials/jupyterlab.rst: -------------------------------------------------------------------------------- 1 | Using the JupyterLab Extension 2 | ============================== 3 | 4 | This tutorial shows how to use the JupyterLab extension to clone and create research datasets using the graphical inteface of JupyterLab, and how to upload dataset to popular research data repositories. 5 | 6 | If you haven not done so, :ref:`install the full toolset. ` 7 | 8 | Start JupyterLab 9 | ------------------ 10 | 11 | Star JupyterLab with the **jupyter-fairly** extension. This will start JupterLab in your browser. 12 | 13 | Windows 14 | '''''''' 15 | 16 | You will use the Shell Terminal to start JupyterLab. 17 | 18 | .. important:: 19 | For the following to work, you need Pyton in the PATH environment variable on Windows. If you are not sure that is the case. Open the Shell, and type :code:`python --version`. You should see the version of Python on the screen. If you see otherwise, follow these steps to `add Python to the PATH on Windows `_ 20 | 21 | On the shell type the following and press `Enter`: 22 | 23 | .. code-block:: shell 24 | 25 | jupyter lab 26 | 27 | Linux / MacOS 28 | '''''''''''''''' 29 | 30 | From the terminal, run: 31 | 32 | .. code-block:: shell 33 | 34 | jupyter lab 35 | 36 | 37 | JupyterLab should automatically start on you browser. 38 | 39 | .. image:: ../img/start-jupyterlab.png 40 | 41 | 42 | Part 1: Cloning Dastasets 43 | ---------------------------- 44 | 45 | Public research datasets can be cloned (copy and downloaded) directly to an empty directory, using the dataset's **URL** or **DOI**. We will use `this datset `_ from 4TU.ResearchData as an example. 46 | 47 | This are other datasets that you can try: 48 | 49 | * https://zenodo.org/records/4302600 50 | * https://zenodo.org/records/8273524 51 | 52 | 53 | Using the JupyterLab interface, create a new directory called :code:`workshop`. *Notice that the content of your main directory would be different.* 54 | 55 | .. image:: ../img/create-directory.png 56 | 57 | 1. Inside the workshop directory, create a new directory called :code:`clone` 58 | 2. Right click on the left panel to open the context menu 59 | 3. Click on :guilabel:`Clone Dataset` 60 | 4. Copy and paste the URL for the example dataset on the dialog window 61 | 5. Click :guilabel:`Clone` 62 | 63 | .. image:: ../img/clone1.png 64 | 65 | .. image:: ../img/clone2.png 66 | 67 | A notification on the bottom-right corner will let you know when the *cloning* is complete, and you should see a list of files on JupyterLab. All the files, except for :code:`manifest.yaml` are files that belong to the dataset in the research repository. The file :code:`manifest.yaml` is automatically created by the Fairly Toolset, and it contains metadata from the research data repository, such as: 68 | 69 | - Authors 70 | - Keywords 71 | - License 72 | - DOI 73 | - Files in the dataset 74 | - etc. 75 | 76 | Part 2: Create a Fairly Dataset 77 | --------------------------------------------- 78 | 79 | Here, we show you how can you create and prepare your own dataset using the JupyterLab extension of *fairly*. 80 | 81 | 1. Create a new directory called :code:`mydataset` inside the *workshop directory*. 82 | 2. Inside :code:`workshop/mydataset/`. Open the context menu and click on :guilabel:`Create Fairly Dataset` 83 | 3. Select :guilabel:`Zenodo` as template from the drop-down list. *Notice that there are templates for other data repositories*. 84 | 4. Click :guilabel:`Create`. A :code:`manifest.yaml` file will be add to the *dummy-data* directory. This file contains a list of fields that you can edit to add metadata to your dataset. 85 | 86 | .. image:: ../img/create-dataset1.png 87 | .. image:: ../img/create-dataset2.png 88 | 89 | 90 | Include Files in your Dataset 91 | '''''''''''''''''''''''''''''''' 92 | 93 | Add some files to the :code:`mydataset` directory. You can add files of your own, but be careful not to include anything that you want to keep confidential. Also consider the size of the files you will add, the larger the size the longer the upload will take. Also remember that for the current Zenodo API each file should be :code:`100MB` or smaller; this will change in the future. 94 | 95 | If you do not want to use your own files, you can download and use the `dummy-data `_ 96 | 97 | After you have added some file and/or folders to :code:`mydataset`, JupyterLab should look something like this: 98 | 99 | .. image:: ../img/my-dataset.png 100 | 101 | Editing the Manifest 102 | '''''''''''''''''''''' 103 | 104 | The :code:`manifest.yaml` file contains several sections to describe the medatadata of a dataset. Some of the sections and fiels are compulsory (they are required by the researh data repository), others are optional. In this example you started a *fairly* dataset using the template for the Zenodo repository, but you could also do so for 4TU.ResearchData. 105 | 106 | However, if you are not sure which repository you will use to publish a dataset, use the :guilabel:`Default` template. This template contains the most common sections and fields for the repositories supported by the Fairly Toolset. 107 | 108 | .. tip:: 109 | Independently of which template you use to start a dataset, the :code:`manifest.yaml` file is interoperable between data repositories, with very few exceptions. This means that you can use the same manifest file for various data repositories. Different templates are provided only as a guide to indicate what metadata is more relevant for each data repository. 110 | 111 | 1. Open the :code:`manifest.yaml` file using the context menu, or by doble-clicking on the file 112 | 113 | .. image:: ../img/open-metadata.png 114 | 115 | 2. Substitute the content of the :code:`manifest.yaml` with the text below. *Here, we use only a small set of fields that are possible for Zenodo.* 116 | 117 | .. code-block:: yaml 118 | 119 | metadata: 120 | type: dataset 121 | publication_date: '2023-08-31' 122 | title: My Title 123 | authors: 124 | - fullname: Surname, FirstName 125 | affiliation: Your institution 126 | description: A dataset from the Fairly Toolset workshop 127 | access_type: open 128 | license: CC0-1.0 129 | doi: '' 130 | prereserve_doi: 131 | keywords: 132 | - fairly-toolset 133 | - tutorial 134 | - dummy data 135 | notes: '' 136 | related_identifiers: [] 137 | communities: [] 138 | grants: [] 139 | subjects: [] 140 | version: 1.0.0 141 | language: eng 142 | template: zenodo 143 | files: 144 | includes: 145 | - ARP1_.info 146 | - ARP1_d01.zip 147 | - my_code.py 148 | - Survey_AI.csv 149 | - wind-mill.jpg 150 | excludes: [] 151 | 152 | 3. Edit the dataset metadata by typing the information you want to add. For example, you can change the title, authors, description, etc. Save the file when you are done. 153 | 154 | .. important:: 155 | * The :code:`includes` field must list the files and directories (folders) you want to include as part of the dataset. *Included files and directories will be uploaded to the the data repository* 156 | * The :code:`excludes` field can be used for explicitly indicating what files or directories you **don't want to be part of the dataset**, for example, files that contain sensitive information. Excluded files and directories will never be uploaded to the data repository. 157 | * Files and directories that are not listed in either :code:`includes` or :code:`excludes` will be ignored by *fairly*. 158 | 159 | Part 3: Upload Dataset to Repository 160 | ------------------------------------- 161 | 162 | This part explains how to upload a dataset to an existing account in Zenodo. If you do not have an account yet, you can `sign up in this webpage. `_ 163 | 164 | .. _create-token: 165 | 166 | Create Personal Token 167 | '''''''''''''''''''''' 168 | 169 | A personal token is a way in which data repositories identify a user. We need to register a personal token for creating datasets in the repository and uploading files to an specific account. 170 | 171 | 1. Sign in to Zenodo. 172 | 2. On the top-right corner click on drop-down arrow, then :guilabel:`Applicaitons`. 173 | 3. On the section :guilabel:`Personal access tokens`, click the :guilabel:`New token` button. 174 | 4. Enter a name for your token, for example: :code:`workshop` 175 | 5. For scopes, check all three boxes, and click :guilabel:`Create` 176 | 6. Copy the token (list of characters in red) to somewhere secure. **You will only see the token once.** 177 | 7. Under :guilabel:`Scopes`, check all three boxes once more. Then click :guilabel:`Save` 178 | 179 | .. image:: ../img/zenodo-token.png 180 | 181 | .. _configuring-fairly: 182 | 183 | Register Personal Token 184 | '''''''''''''''''''''''''''''''' 185 | 186 | To register a personal token to the Fairly Toolset, do the following in JupyterLab: 187 | 188 | 1. Open the :guilabel:`Fairly` menu on the top menu bar, and click on :guilabel:`Add Repository Token` 189 | 2. Select :guilabel:`Zenodo` from the drop-down list. 190 | 3. Paste the token you copied from Zenodo in the previous step. 191 | 4. Click :guilabel:`Add Token` 192 | 193 | .. important:: 194 | * You can register tokens for other repositories supporte by *fairly* in the same way. Tokens added in this way are global, and will be used by by the JupyterLab extension, the Python package and the CLI. 195 | * Tokens are stored in a file called :code:`config.json` in your user home directory. This file is created automatically by *fairly* when you register a token. For Windows the file is located in :code:`C:\\Users\\\\.fairly\\config.json`, and for Linux/MacOS in :code:`~/.fairly/config.json`. 196 | * To **update a token**, simply register a new token with the same name. The old token will be replaced by the new one. To **remove a token**, simply repeate the process, but type a random character in the token field. 197 | 198 | .. warning:: 199 | If you are using the Fairly Toolset in a shared computer, make sure that you **remove your tokens** from the JupterLab extension. Otherwise, other users of the computer will be able to use your token to create datasets in your account. 200 | 201 | .. note:: 202 | Windows users might need to re-start JupyterLab for the tokens to work correctly when uploading datasets. 203 | 204 | Upload Dataset 205 | '''''''''''''''' 206 | 207 | 1. On the left panel, do right-click, and then click :guilabel:`Upload Dataset` 208 | 2. Select Zenodo from the dowp-down list, and click :guilabel:`Continue` 209 | 3. Confirm that you want to upload the dataset to Zenodo by ticking the checkbox. 210 | 4. Click :guilabel:`OK`. A notification on the bottom-right corner will let you know that the upload is in progress and when it is complete. 211 | 5. Go to your Zenodo account and click on :guilabel:`Upload`. The `my dataset` dataset should be there. 212 | 213 | .. image:: ../img/zenodo-upload.png 214 | 215 | Explore the dataset and notice that all the files and metadata you added in JupyterLab has been automatically added to the new dataset. You should also notice that the dataset is not **published**, this is on purpose. This gives you the oportunity to review the dataset before deciding to publish if, and if necessary to make changes. In this way we also prevent users to publish dataset by mistake. 216 | 217 | .. note:: 218 | If you try to upload the dataset again, you will get an error message. This is because the dataset already exists in Zenodo. You can see this reflected in the :code:`manifest.yaml` file; the section :code:`remotes:` is added to the file after succesfully uploading a dataset. It lists the names and ids of the repositories where the dataset has been uploaded. 219 | In the future, we will add a feature to allow users to update and sync datasets between repositories. 220 | 221 | 222 | Part 4: Pushing Changes to Data Repository 223 | -------------------------------------------- 224 | 225 | In the last part of this tutorial, we will show you how to push changes to a dataset that has already been uploaded to a data repository. For this, we will use the dataset we created in the previous part. 226 | 227 | 228 | .. attention:: 229 | 230 | To be able to push updates to an existing dataset in a repository, you need to have write access to the dataset. For most of the repositories this requires you to be the **owner** of the dataset. Most data repositories prevent updates if a dataset is "published" (i.e. editing is limited to datasets that are not yet published). 231 | 232 | You can make changes to the files in a local dataset as you would normally do. For example, you can add new files, edit existing files, or delete files. You can also edit the :code:`manifest.yaml` file to update the metadata of the dataset. 233 | If file inclusion or exclusion rules are defined using patterns (e.g. `'*.txt'`), then the extension automatically identifies added, removed, or modified files. 234 | Otherwise, you need to explicitly indicate what needs to be *included* or *excluded* by updating the :code:`includes` and `excludes` fields in the :code:`manifest.yaml` file. 235 | 236 | 237 | .. image:: ../img/add-filles.png 238 | 239 | Once you have made and **shaved** the changes, you can do the following upload the changes to the data repository. 240 | 241 | 1. On the left panel, do right-click, 242 | 2. click :guilabel:`Push` option from the list, 243 | 3. confirm that you want to push the changes and click :guilabel:`Push` button. A notification on the bottom-right corner will let you know that changes are in progress and when they are completed. 244 | 245 | .. image:: ../img/push-menu.png 246 | 247 | .. image:: ../img/push-confirm.png 248 | 249 | 250 | .. tip:: 251 | 252 | To push change to a dataset that you own, but you did not create using the Fairly Toolset, all you have to do is to clone it first, using the :guilabel:`Clone Dataset` option from the context menu. Then you will be able to make changes to the dataset and push them back to the data repository. 253 | -------------------------------------------------------------------------------- /docs/tutorials/python-api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "attachments": {}, 5 | "cell_type": "markdown", 6 | "metadata": {}, 7 | "source": [ 8 | "# Using the Python API\n", 9 | "\n", 10 | "In this tutorial you will learn how to use *fairly* as a Python package to clone, create and upload datasets to research data repositories.\n", 11 | "\n", 12 | "If you haven not done so, [install the fairly package.](../installation.rst)\n", 13 | "\n", 14 | "## Cloning a dataset\n", 15 | "\n", 16 | "The Python API provides the flexibility to explore the metadata of a `remote dataset` before downloading it. A `remote` dataset is any dataset which is not stored locally. \n", 17 | "\n", 18 | "1. In a python script, import the `fairly` package and open a remote dataset:" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import fairly\n", 28 | "\n", 29 | "# Open a remote dataset\n", 30 | "dataset = fairly.dataset(\"doi:10.4121/21588096.v1\")\n" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "2. You can now explore the metadata of the dataset as follows:" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/plain": [ 48 | "{'id': '21588096', 'version': '1'}" 49 | ] 50 | }, 51 | "execution_count": 2, 52 | "metadata": {}, 53 | "output_type": "execute_result" 54 | } 55 | ], 56 | "source": [ 57 | "dataset.id" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "data": { 67 | "text/plain": [ 68 | "'https://data.4tu.nl/datasets/a37120e2-96db-48e4-bd65-a54b970bc4fe/1'" 69 | ] 70 | }, 71 | "execution_count": 3, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "dataset.url" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 5, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "name": "stdout", 87 | "output_type": "stream", 88 | "text": [ 89 | "33339\n" 90 | ] 91 | }, 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "6" 96 | ] 97 | }, 98 | "execution_count": 5, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "print(dataset.size)\n", 105 | "\n", 106 | "# number of files\n", 107 | "len(dataset.files)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 6, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "Metadata({'authors': [Person({'fullname': 'Stefan Nielsen', 'orcid_id': '0000-0002-9214-2932', 'figshare_id': 12882551})], 'keywords': ['Earthquakes', 'artificial neural network', 'precursor'], 'description': '

These are the accuracy results for the whole dataset A and B together. This is a second batch (2/2) of cycles where network was trained, tested and verified 50 times with different combinations of test, train and verification groups. There is a first batch of 50 in a separate file

', 'license': {'id': 2, 'name': 'CC0', 'url': 'https://creativecommons.org/publicdomain/zero/1.0/'}, 'title': 'Earthquake Precursors detected by convolutional neural network', 'doi': '10.4121/21588096.v1', 'type': 'dataset', 'access_type': 'open', 'custom_fields': {'Time coverage': '2012-2022', 'Publisher': '4TU.ResearchData', 'Organizations': 'University of Durham, Department of Earth Sciences.', 'Geolocation Longitude': '138.204', 'Geolocation Latitude': '36.546', 'Geolocation': 'Japan and surrounding area', 'Format': '*.py, *.csv, *.txt'}, 'categories': [13555], 'online_date': '2022-11-24T07:50:39'})" 119 | ] 120 | }, 121 | "execution_count": 6, 122 | "metadata": {}, 123 | "output_type": "execute_result" 124 | } 125 | ], 126 | "source": [ 127 | "# complete metadata\n", 128 | "dataset.metadata" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "3. You can save the dataset's metadata to a file to a local directory as follows. The directory will be created if it does not exist." 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 7, 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "# store dataset locally (i.e. clone dataset)\n", 145 | "local_dataset = dataset.store(\"./cloned-dataset\")" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Creating a local dataset\n", 153 | "\n", 154 | "A `local dataset`` is a dataset which is stored locally. When creating our own dataset, we used a local dataset.\n", 155 | "\n", 156 | "1. Initialize a new dataset: " 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 2, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "import fairly\n", 166 | "\n", 167 | "# Initialize a local dataset\n", 168 | "dataset = fairly.init_dataset(\"./local-dataset\") # path is created if it does not exist" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "2. Set the dataset's metadata attributes by passing a list of attribute names and values to a local dataset:" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 9, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "dataset.set_metadata(\n", 185 | " title=\"My first dataset\",\n", 186 | " kewords=[ \"fairly\", \"python\", \"api\" ],\n", 187 | " authors=[ \"0000-0002-0516-185X\",\n", 188 | " { \"name\": \"Jane\", \"surname\": \"Doe\" }\n", 189 | " ],\n", 190 | ")" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 10, 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "# Metadata attributes can be passed one by one as follows\n", 200 | "dataset.metadata[\"license\"] = \"CC-BY-4.0\"" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "3. Add files and folders to the dataset:" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 11, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "dataset.includes.extend([ \n", 217 | " \"README\", \n", 218 | " \"*.csv\",\n", 219 | " \"train/*.jpg\" \n", 220 | "])" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "4. To save values to the dataset's attributes to the `manifest.yaml` file, we must call the `save()` method:" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 12, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "# Save changes and update manifest.yaml\n", 237 | "dataset.save()" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "## Uploading a dataset\n", 245 | "\n", 246 | "To upload a dataset to a research data repository, we must first register an access token for an account in the data repository. Check the tutorial on the [JupyterLab extension](./jupyterlab.rst) to learn how to register an access token.\n", 247 | "\n", 248 | "Once you have registered an access token, you can upload a dataset with a single command:" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "# Upload dataset to data repository\n", 258 | "remote_dataset = dataset.upload('zenodo')" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "metadata": {}, 264 | "source": [ 265 | "## Pushing changes to a data repository\n", 266 | "\n", 267 | "After uploading a dataset to a data repository, you can use the `push` command to push changes to the dataset's metadata and files and update the data repository. The `push` method automatically finds the remote version of a dataset from the information available in the *manifest* file. It also updates the remote metadata, if any metadata fields are modified locally.\n", 268 | "\n", 269 | "> To be able to push updates to an existing dataset in a repository, you need to have write access to the dataset. For most of the repositories this requires you to be the owner of the dataset.\n", 270 | "> Most data repositories prevent updates if a dataset is \"published\" (i.e. editing is limited to datasets that are not yet published).\n", 271 | "\n", 272 | "### Changing metadata in a dataset\n", 273 | "\n", 274 | "For example, to update the *title* of a dataset for which you have a local copy, you can do the following:" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 4, 280 | "metadata": {}, 281 | "outputs": [], 282 | "source": [ 283 | "ds = fairly.dataset(\"./local-dataset\")\n", 284 | "ds.metadata[\"title\"] = \"New title\"\n", 285 | "ds.save_metadata() # save changes to manifest.yaml\n", 286 | "\n", 287 | "ds.push() # push changes to data repository to update an existing dataset" 288 | ] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": {}, 293 | "source": [ 294 | "### Changing files in a dataset\n", 295 | "\n", 296 | "You can add, remove, or modify files in a local dataset as you wish. If file inclusion or exclusion rules are defined using patterns (e.g. `'*.txt'`), then fairly automatically identifies added, removed, or modified files. Otherwise, you need to explicitly indicate what needs to be *included* or *excluded*. Use the `includes.append` and `excludes.append` methods to do so." 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 6, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "# include a new file or directory\n", 306 | "ds.includes.append(\"new file.txt\")\n", 307 | "\n", 308 | "# remove a file or directory\n", 309 | "ds.excludes.append(\"old file.txt\")\n", 310 | "\n", 311 | "ds.save() # save changes to manifest.yaml" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "Once the changes are saved to the *manifest file*, the remote version can be updated by calling the `push` method:" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "ds.push() # push changes to data repository " 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "To learn more about the Fairly Python API, check the [API reference](../api/fairly.rst)." 335 | ] 336 | } 337 | ], 338 | "metadata": { 339 | "kernelspec": { 340 | "display_name": "venv", 341 | "language": "python", 342 | "name": "python3" 343 | }, 344 | "language_info": { 345 | "codemirror_mode": { 346 | "name": "ipython", 347 | "version": 3 348 | }, 349 | "file_extension": ".py", 350 | "mimetype": "text/x-python", 351 | "name": "python", 352 | "nbconvert_exporter": "python", 353 | "pygments_lexer": "ipython3", 354 | "version": "3.10.12" 355 | }, 356 | "orig_nbformat": 4 357 | }, 358 | "nbformat": 4, 359 | "nbformat_minor": 2 360 | } 361 | -------------------------------------------------------------------------------- /docs/tutorials/workshop.rst: -------------------------------------------------------------------------------- 1 | Fairly Toolset Workshop 2 | """""""""""""""""""""""" 3 | 4 | Go to `Resources for the workshop `_ 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "fairly" 7 | version = "1.0.1" 8 | description = "A package to create, publish, and download research datasets" 9 | readme = "README.rst" 10 | license = { file="LICENSE" } 11 | requires-python = ">=3.8" 12 | authors = [ 13 | { name="Serkan Girgin", email="s.girgin@utwente.nl" }, 14 | { name="Manuel Garcia Alvarez", email="m.g.garciaalvarez@tudelft.nl" }, 15 | { name="Jose Urra Llanusa", email="j.c.urrallanusa@tudelft.nl" }, 16 | ] 17 | classifiers = [ 18 | "Programming Language :: Python :: 3", 19 | "License :: OSI Approved :: MIT License", 20 | "Operating System :: OS Independent", ] 21 | dependencies = [ 22 | "python-dateutil", 23 | "requests", 24 | "requests_toolbelt>=1.0.0", 25 | "ruamel.yaml>=0.17.26", 26 | "typer>=0.9.0", 27 | "rich" 28 | ] 29 | keywords = ["fairly", "open science", "research data", "data management"] 30 | 31 | [project.optional-dependencies] 32 | dev = [ 33 | "pytest", 34 | "pytest-cov", 35 | "python-dotenv", 36 | "build", 37 | "hatch" 38 | ] 39 | 40 | [project.urls] 41 | "Homepage" = "https://github.com/ITC-CRIB/fairly" 42 | "Bug Tracker" = "https://github.com/ITC-CRIB/fairly/issues" 43 | "Documentation" = "https://fairly.readthedocs.io" 44 | "Funding" = "https://nwo.nl/en/researchprogrammes/open-science/open-science-fund" 45 | 46 | [project.scripts] 47 | fairly = "fairly.cli:app" 48 | -------------------------------------------------------------------------------- /src/fairly/cli/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from ruamel.yaml import YAML 4 | import typer 5 | import fairly 6 | from fairly.cli import dataset 7 | from fairly.cli import config 8 | 9 | app = typer.Typer() 10 | app.add_typer(dataset.app, name="dataset") 11 | app.add_typer(config.app, name="config") 12 | 13 | @app.command() 14 | def list_repos(): 15 | '''List all repositories supported by fairly''' 16 | repositories = fairly.get_repositories() 17 | 18 | print("List of repositories to use with fairly:") 19 | 20 | for key in repositories: 21 | print("- " + key) 22 | 23 | @app.command() 24 | @app.command() 25 | def list_user_datasets( 26 | repository: str = typer.Argument("", help="Repository name"), 27 | ) -> None: 28 | '''List all datasets in the specified repository by doi, title, and publication_date''' 29 | yaml = YAML() 30 | # Test the connection to the repository by listing account datasets 31 | try: 32 | client = fairly.client(repository) 33 | # store dataset lists and print the id, url and title 34 | list = client.get_account_datasets() 35 | if len(list) == 0: 36 | print("There are no datasets under this account") 37 | else: 38 | print("\n") 39 | for dataset in list: 40 | # get the dataset metadata 41 | metadata = dataset.metadata 42 | item = {} 43 | for i in metadata: 44 | if i == "publication_date": item[i] = metadata[i] 45 | if i == 'title': item[i] = metadata[i] 46 | if i == 'doi': item[i] = metadata[i] 47 | 48 | # pretty print the list of datasets with yaml format 49 | yaml.dump(item, sys.stdout) 50 | print("------------------") 51 | 52 | except Exception as e: 53 | print(e) 54 | print("Please specify a repository name that is valid") 55 | return None 56 | 57 | if __name__ == "__main__": 58 | app() -------------------------------------------------------------------------------- /src/fairly/cli/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pprint 4 | import json 5 | 6 | from ruamel.yaml import YAML 7 | 8 | import typer 9 | import fairly 10 | 11 | pp = pprint.PrettyPrinter(indent=4) 12 | 13 | app = typer.Typer() 14 | 15 | CONFIG_FILE = os.path.expanduser("~/.fairly/config.json") 16 | 17 | 18 | # @app.command() 19 | def add( 20 | id: str = typer.Argument("", help="Repository ID"), 21 | ): 22 | '''Add a repository to the config file, 23 | 24 | fairly repository add --id --name --api-url --token 25 | 26 | Notice that this should only be allowed once there is a corresponing module 27 | for the repository. 28 | ''' 29 | raise NotImplementedError 30 | 31 | @app.command() 32 | def show( 33 | 34 | ): 35 | '''Show config details''' 36 | yaml = YAML() 37 | # expand user path 38 | print(f"You can edit the config file located at: {CONFIG_FILE}") 39 | 40 | print("FAIRLY CONFIG") 41 | print("--------------------") 42 | 43 | repos = fairly.get_repositories() 44 | yaml.dump(repos, sys.stdout) 45 | 46 | 47 | @app.command() 48 | def update_token( 49 | id: str = typer.Argument("", help="Repository ID"), 50 | token: str = typer.Argument("", help="Repository token") 51 | ): 52 | ''' Update a repository token os.path.expanduser('~/.fairly/config.json)''' 53 | config = {} 54 | try: 55 | with open(CONFIG_FILE, 'r', encoding='utf-8') as f: 56 | config = json.loads(f.read()) 57 | 58 | # check if token is already set with the same value 59 | if config[id]["token"] == token: 60 | print(f"Token for repository {id} is already set to {token}") 61 | return 62 | 63 | else: config[id]["token"] = token 64 | 65 | with open(CONFIG_FILE, 'w', encoding='utf-8') as f: 66 | f.write(json.dumps(config, indent=4)) 67 | 68 | except FileNotFoundError: 69 | print(f"Config file not found at {CONFIG_FILE}") 70 | return 71 | 72 | 73 | # @app.command() 74 | def remove(): 75 | '''fairly repository remove ''' 76 | raise NotImplementedError 77 | 78 | if __name__ == "__main__": 79 | app() 80 | 81 | -------------------------------------------------------------------------------- /src/fairly/cli/dataset.py: -------------------------------------------------------------------------------- 1 | import typer 2 | 3 | from rich.progress import Progress, SpinnerColumn, TextColumn 4 | 5 | import fairly 6 | 7 | 8 | app = typer.Typer(pretty_exceptions_show_locals=False) 9 | 10 | @app.command() 11 | def create( 12 | path: str = typer.Argument(help="Path where the dataset will be created"), 13 | template: str = typer.Option("default", help="Metadata template to be used for the dataset"), 14 | ) -> None: 15 | '''Create a local dataset under path with default template\n 16 | 17 | fairly dataset create \n 18 | 19 | Create a local dataset under path with the specified template\n 20 |