├── .binder ├── postBuild ├── requirements.txt └── runtime.txt ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md └── workflows │ ├── build.yml │ ├── test-examples-subprocess.yml │ └── towncrier-changelog.yml ├── .gitignore ├── .readthedocs.yaml ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── CONTRIBUTORS.md ├── LICENSE ├── Makefile ├── README.md ├── Substra-logo-colour.svg ├── Substra-logo-white.svg ├── changes └── .gitkeep ├── charts └── substra-documentation-examples │ ├── Chart.yaml │ ├── templates │ ├── _helpers.tpl │ └── deployment.yaml │ └── values.yaml ├── docker └── substra-documentation-examples │ └── Dockerfile ├── docs ├── Makefile ├── doc-requirements.txt ├── environment.yml └── source │ ├── _ext │ └── compatibilitytable.py │ ├── _static │ ├── Gattica-Bold100.otf │ ├── Gattica-Light100.otf │ ├── Gattica-Medium100.otf │ ├── Gattica-Regular100.otf │ ├── example_thumbnail │ │ ├── cyclic-mnist.png │ │ ├── diabetes.png │ │ ├── iris.jpg │ │ ├── mnist.png │ │ └── titanic.jpg │ ├── favicon.png │ ├── images │ │ ├── substra-0.26-task-duration.png │ │ └── workflow_0.27.0.png │ ├── logo.svg │ ├── my-nbsphinx-gallery.css │ ├── owkin.css │ └── schemes │ │ ├── centralized-orc.svg │ │ ├── channel_schema.png │ │ ├── computeplan.state.svg │ │ ├── computetask.state.svg │ │ ├── function.state.svg │ │ ├── index-generator-scheme.svg │ │ ├── introductory-scheme.svg │ │ └── stack-technical-scheme.svg │ ├── additional │ ├── community.rst │ ├── faq.rst │ ├── glossary.rst │ ├── privacy-strategy.rst │ ├── release.rst │ └── releases.yaml │ ├── banner.jpg │ ├── conf.py │ ├── contributing │ ├── code-of-conduct.rst │ └── contributing-guide.rst │ ├── documentation │ ├── api_reference.rst │ ├── backend │ │ └── index.rst │ ├── components.rst │ ├── concepts.rst │ ├── frontend │ │ └── index.rst │ ├── images │ │ ├── copy_token.png │ │ ├── find_token_management_page.png │ │ ├── generate_new_token.png │ │ └── token_management_page.png │ ├── orchestrator │ │ └── index.rst │ └── substra_tools.rst │ ├── examples │ ├── substra_core │ │ ├── diabetes_example │ │ │ ├── assets │ │ │ │ ├── dataset │ │ │ │ │ ├── diabetes_dataset.py │ │ │ │ │ └── diabetes_opener.py │ │ │ │ ├── functions │ │ │ │ │ ├── aggregation │ │ │ │ │ │ └── Dockerfile │ │ │ │ │ ├── description.md │ │ │ │ │ ├── federated_analytics_functions.py │ │ │ │ │ ├── local_first_order_computation │ │ │ │ │ │ └── Dockerfile │ │ │ │ │ └── local_second_order_computation │ │ │ │ │ │ └── Dockerfile │ │ │ │ └── requirements.txt │ │ │ └── run_diabetes.ipynb │ │ ├── index.rst │ │ └── titanic_example │ │ │ ├── assets │ │ │ ├── dataset │ │ │ │ ├── description.md │ │ │ │ └── titanic_opener.py │ │ │ ├── function_random_forest │ │ │ │ ├── description.md │ │ │ │ ├── predict │ │ │ │ │ └── Dockerfile │ │ │ │ ├── titanic_function_rf.py │ │ │ │ └── train │ │ │ │ │ └── Dockerfile │ │ │ ├── metric │ │ │ │ ├── Dockerfile │ │ │ │ ├── description.md │ │ │ │ └── titanic_metrics.py │ │ │ ├── requirements.txt │ │ │ ├── test_data_samples │ │ │ │ ├── data_sample_0 │ │ │ │ │ └── data_sample_0.csv │ │ │ │ └── data_sample_1 │ │ │ │ │ └── data_sample_1.csv │ │ │ └── train_data_samples │ │ │ │ ├── data_sample_0 │ │ │ │ └── data_sample_0.csv │ │ │ │ ├── data_sample_1 │ │ │ │ └── data_sample_1.csv │ │ │ │ ├── data_sample_2 │ │ │ │ └── data_sample_2.csv │ │ │ │ ├── data_sample_3 │ │ │ │ └── data_sample_3.csv │ │ │ │ ├── data_sample_4 │ │ │ │ └── data_sample_4.csv │ │ │ │ ├── data_sample_5 │ │ │ │ └── data_sample_5.csv │ │ │ │ ├── data_sample_6 │ │ │ │ └── data_sample_6.csv │ │ │ │ ├── data_sample_7 │ │ │ │ └── data_sample_7.csv │ │ │ │ ├── data_sample_8 │ │ │ │ └── data_sample_8.csv │ │ │ │ └── data_sample_9 │ │ │ │ └── data_sample_9.csv │ │ │ └── run_titanic.ipynb │ └── substrafl │ │ ├── get_started │ │ ├── run_mnist_torch.ipynb │ │ └── torch_fedavg_assets │ │ │ ├── dataset │ │ │ ├── description.md │ │ │ ├── mnist_dataset.py │ │ │ └── mnist_opener.py │ │ │ └── requirements.txt │ │ ├── go_further │ │ ├── diabetes_substrafl_assets │ │ │ ├── dataset │ │ │ │ ├── diabetes_substrafl_dataset.py │ │ │ │ └── diabetes_substrafl_opener.py │ │ │ └── requirements.txt │ │ ├── run_diabetes_substrafl.ipynb │ │ ├── run_iris_sklearn.ipynb │ │ ├── run_mnist_cyclic.ipynb │ │ ├── sklearn_fedavg_assets │ │ │ ├── dataset │ │ │ │ ├── description.md │ │ │ │ ├── iris_dataset.py │ │ │ │ └── iris_opener.py │ │ │ └── requirements.txt │ │ └── torch_cyclic_assets │ │ │ ├── dataset │ │ │ ├── cyclic_mnist_dataset.py │ │ │ ├── cyclic_mnist_opener.py │ │ │ └── description.md │ │ │ └── requirements.txt │ │ └── index.rst │ ├── how-to │ ├── deploying-substra │ │ ├── howto │ │ │ ├── customize-compute-pod-node.rst │ │ │ ├── existing-volumes.rst │ │ │ ├── external-database.rst │ │ │ └── sso-oidc.rst │ │ ├── index.rst │ │ ├── upgrade-notes.rst │ │ ├── walkthrough.rst │ │ └── walkthrough │ │ │ ├── 10-prerequisites.rst │ │ │ ├── 20-orchestrator-deployment.rst │ │ │ ├── 30-backend-deployment.rst │ │ │ ├── 40-connect-organizations.rst │ │ │ ├── 50-frontend-deployment.rst │ │ │ └── 60-mtls-setup.rst │ ├── developing-substra │ │ ├── harbor.rst │ │ ├── index.rst │ │ ├── local-deployment.rst │ │ └── local-deployment │ │ │ └── k3-create.sh │ └── using-substra │ │ ├── api_tokens_generation.rst │ │ ├── client_configuration.rst │ │ ├── debug.rst │ │ ├── get_performances.rst │ │ ├── gpu.rst │ │ ├── index.rst │ │ └── r_scripts.rst │ ├── index.rst │ ├── reference │ ├── index.rst │ ├── netpol.rst │ ├── pss.rst │ └── volumes.rst │ ├── substrafl_doc │ └── substrafl_overview.rst │ └── templates │ └── breadcrumbs.html ├── examples_requirements.txt ├── requirements.txt ├── skaffold.yaml └── towncrier.toml /.binder/postBuild: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # binder post build script 3 | set -ex 4 | -------------------------------------------------------------------------------- /.binder/requirements.txt: -------------------------------------------------------------------------------- 1 | -r ../examples_requirements.txt 2 | -------------------------------------------------------------------------------- /.binder/runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.10 2 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | /.github @Substra/code-owners 2 | /examples @Substra/code-owners @RomainGoussault 3 | /substrafl_examples @Substra/code-owners @RomainGoussault 4 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Describe the bug 11 | 12 | > Give a clear and concise description of what the bug is. 13 | 14 | ## How To Reproduce 15 | 16 | Steps to reproduce the behavior: 17 | 18 | > 1. Go to '...' 19 | > 2. Click on '....' 20 | > 3. Scroll down to '....' 21 | > 4. See error 22 | 23 | ## Expected behavior 24 | 25 | > Give a clear and concise description of what you expected to happen. 26 | 27 | ## Screenshots 28 | 29 | > If applicable, add screenshots to help explain your problem. 30 | 31 | ## Environment 32 | 33 | - Version, branch (branch and commit number) 34 | - OS and version: [e.g. macOS Mojave version 10.14.4] 35 | 36 | > Get it with `uname -a` on Linux and `system_profiler SPSoftwareDataType` on Mac 37 | 38 | - Browser and version if relevant [e.g. Firefox, Chrome, Safari] 39 | - Any other relevant information [e.g. dependencies, version of Python, version of Go, ...] 40 | 41 | ## Additional context 42 | 43 | > Add any other context about the problem here. 44 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Submit a request for a feature you would love to see implemented in Substra 4 | & help us improve! 5 | title: "[FEATURE_REQUEST]" 6 | labels: feature_request 7 | assignees: '' 8 | 9 | --- 10 | 11 | ## Introduction 12 | 13 | > Please describe the type of feature you would like to discuss (one-shot or series of features) & introduce yourself so we can follow up with you. 14 | 15 | ## Describe the ideal feature 16 | 17 | > Give a clear and concise description of what this feature is expected to do. 18 | 19 | ## Outcome 20 | 21 | > Please, try to describe the expected outcome of such a feature, for users and software engineers, the problem it would solve for you, the impact it would have for you. 22 | 23 | ## Is there any already existing similar feature? 24 | 25 | > Do you know some analog feature already existing somewhere else? 26 | 27 | ## Expected behavior 28 | 29 | > Give a clear and concise description of what you would expect after the feature is implemented. 30 | 31 | ## Screenshots 32 | 33 | > If applicable, add screenshots, schemas or any relevant material to help explain your feature request. 34 | 35 | ## Additional context 36 | 37 | > Add any other context about the feature request, any evidence that you have on the need for this request or if you already have an idea on how we may solve this! 38 | > Please try to be specific about the reach of the underneath problem, the "cost" of not having this feature, how is it urgent for you? 39 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Sphinx Build in CI 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - "main" 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | with: 14 | ref: ${{ github.event.pull_request.head.sha }} 15 | - name: Set up python 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: "3.10" 19 | 20 | - name: Clone substra-tools 21 | uses: actions/checkout@v2 22 | with: 23 | repository: substra/substra-tools 24 | path: substra-tools 25 | ref: main 26 | 27 | - name: Clone substra 28 | uses: actions/checkout@v2 29 | with: 30 | repository: substra/substra 31 | path: substra 32 | ref: main 33 | 34 | - name: Clone substrafl 35 | uses: actions/checkout@v2 36 | with: 37 | repository: substra/substrafl 38 | path: substrafl 39 | ref: main 40 | 41 | - name: Install substra, substra-tools and substrafl 42 | run: | 43 | pip install -e ./substrafl 44 | pip install -e ./substra 45 | pip install -e ./substra-tools 46 | 47 | - name: Copy substra and substrafl api doc in the doc 48 | run: | 49 | cp -r substra/references docs/source/documentation/references 50 | cp -r substrafl/docs/api docs/source/substrafl_doc/ 51 | 52 | - name: Install Pandoc 53 | run: | 54 | sudo wget https://github.com/jgm/pandoc/releases/download/3.1.11.1/pandoc-3.1.11.1-1-amd64.deb 55 | sudo dpkg -i pandoc-3.1.11.1-1-amd64.deb 56 | 57 | - name: Install requirements 58 | run: | 59 | pip install -r requirements.txt 60 | 61 | - name: Sphinx make 62 | working-directory: ./docs 63 | run: make clean html 64 | -------------------------------------------------------------------------------- /.github/workflows/test-examples-subprocess.yml: -------------------------------------------------------------------------------- 1 | name: Documentation examples - subprocess 2 | on: 3 | pull_request: 4 | paths: 5 | - "docs/source/examples/**" 6 | push: 7 | branches: 8 | - main 9 | workflow_dispatch: 10 | 11 | jobs: 12 | pr-validation: 13 | name: test-${{ matrix.os }}-py-${{ matrix.python }} 14 | runs-on: ${{ matrix.os }} 15 | strategy: 16 | matrix: 17 | os: [ubuntu-22.04] 18 | python: ["3.10", "3.11", "3.12"] 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | with: 23 | path: substra-documentation 24 | 25 | - uses: actions/setup-python@v5 26 | with: 27 | python-version: ${{ matrix.python }} 28 | 29 | - name: Free disk space 30 | run: | 31 | # Based on https://github.com/actions/runner-images/issues/2840#issuecomment-790492173 32 | sudo rm -rf /usr/share/dotnet 33 | sudo rm -rf /usr/local/lib/android 34 | sudo rm -rf /opt/ghc 35 | sudo rm -rf "/usr/local/share/boost" 36 | sudo rm -rf "$AGENT_TOOLSDIRECTORY" 37 | 38 | - name: Checkout substra-tools on main 39 | uses: actions/checkout@v4 40 | with: 41 | repository: substra/substra-tools 42 | path: substratools 43 | 44 | - name: Checkout substra on main 45 | uses: actions/checkout@v4 46 | with: 47 | repository: substra/substra 48 | path: substra 49 | 50 | - name: Checkout substrafl on main 51 | uses: actions/checkout@v4 52 | with: 53 | repository: substra/substrafl 54 | path: substrafl 55 | 56 | - name: Install package 57 | run: | 58 | pip install --upgrade pip 59 | pip install --upgrade -e substrafl 60 | pip install --upgrade -e substra 61 | pip install --upgrade -e substratools 62 | 63 | - name: Install examples dependencies 64 | run: | 65 | cd substra-documentation 66 | make install-examples-dependencies 67 | 68 | - name: Run examples 69 | env: 70 | SUBSTRA_FORCE_EDITABLE_MODE: True 71 | run: | 72 | cd substra-documentation 73 | make examples 74 | -------------------------------------------------------------------------------- /.github/workflows/towncrier-changelog.yml: -------------------------------------------------------------------------------- 1 | name: Towncrier changelog 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | app_version: 7 | type: string 8 | description: 'The version of the app' 9 | required: true 10 | branch: 11 | type: string 12 | description: 'The branch to update' 13 | required: true 14 | 15 | jobs: 16 | test-generate-publish: 17 | uses: substra/substra-gha-workflows/.github/workflows/towncrier-changelog.yml@main 18 | secrets: inherit 19 | with: 20 | app_version: ${{ inputs.app_version }} 21 | repo: substra-documentation 22 | branch: ${{ inputs.branch }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | docs/src 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # Sphinx build 133 | _build/ 134 | 135 | # Misc build artefacts 136 | tmp/** 137 | 138 | docs/source/tmp/** 139 | docs/source/documentation/references/** 140 | docs/source/substrafl_doc/api 141 | 142 | # Zip files 143 | *.zip 144 | 145 | # Assets built when ran locally 146 | **/local-worker 147 | 148 | # Experiment summaries when run locally 149 | **/experiment_summaries 150 | 151 | # Data folder when run locally 152 | **/data_iris 153 | **/data_mnist 154 | **/data_diabetes 155 | **/data 156 | 157 | # Algo files folder when run locally 158 | **/algo_files 159 | 160 | # Mac spec 161 | *.DS_Store 162 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | build: 9 | os: "ubuntu-22.04" 10 | tools: 11 | python: "miniconda3-4.7" 12 | jobs: 13 | pre_create_environment: 14 | - conda update --yes --quiet --name=base --channel=defaults conda 15 | 16 | # Build documentation in the docs/ directory with Sphinx 17 | sphinx: 18 | configuration: docs/source/conf.py 19 | fail_on_warning: True 20 | 21 | conda: 22 | environment: docs/environment.yml 23 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | Substra repositories' code of conduct is available in the Substra documentation [here](https://docs.substra.org/en/stable/contributing/code-of-conduct.html). 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Substra repositories' contributing guide is available in the Substra documentation [here](https://docs.substra.org/en/stable/contributing/contributing-guide.html). 2 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | This is a file of people that have made significant contributions to the Substra documentation. It is sorted in chronological order. Please include your contribution at the bottom of this document in the following format : name (N), email (E), description of work (W) and date (D). 2 | 3 | To have your contribution listed, your work must meet the minimum [threshold of originality](https://en.wikipedia.org/wiki/Threshold_of_originality), which will be evaluated by the maintainers of the repository. 4 | 5 | Thank you for your contribution, your work is greatly appreciated ! 6 | 7 | —-- Example —-- 8 | 9 | - N: John Doe 10 | - E: john.doe@owkin.com 11 | - W: Integrated new FL strategy 12 | - D: 02/02/2023 13 | 14 | --- 15 | 16 | Copyright (c) 2018-present Owkin Inc. All rights reserved. 17 | 18 | All other contributions: 19 | Copyright (c) 2023 to the respective contributors. 20 | All rights reserved. 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install-examples-dependencies: 2 | pip3 install -r examples_requirements.txt 3 | 4 | examples: examples-substra examples-substrafl 5 | 6 | examples-substra: example-core-diabetes example-core-titanic 7 | 8 | example-core-diabetes: 9 | cd docs/source/examples/substra_core/diabetes_example/ && ipython -c "%run run_diabetes.ipynb" 10 | example-core-titanic: 11 | cd docs/source/examples/substra_core/titanic_example/ && ipython -c "%run run_titanic.ipynb" 12 | 13 | examples-substrafl: example-fl-mnist example-fl-iris example-fl-cyclic example-fl-diabetes 14 | 15 | example-fl-mnist: 16 | cd docs/source/examples/substrafl/get_started/ && ipython -c "%run run_mnist_torch.ipynb" 17 | example-fl-iris: 18 | cd docs/source/examples/substrafl/go_further/ && ipython -c "%run run_iris_sklearn.ipynb" 19 | example-fl-cyclic: 20 | cd docs/source/examples/substrafl/go_further/ && ipython -c "%run run_mnist_cyclic.ipynb" 21 | example-fl-diabetes: 22 | cd docs/source/examples/substrafl/go_further/ && ipython -c "%run run_diabetes_substrafl.ipynb" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Substra documentation 2 | 3 |
4 | 5 |

6 | 7 |
8 | 9 | 10 | 11 | 12 | Substra 13 | 14 |
15 |
16 |
17 | 18 | Substra is an open source federated learning (FL) software. This specific repository is the documentation of Substra. 19 | 20 | This documentation is hosted on Read The Docs and can be found [here](https://docs.substra.org/). 21 | 22 | ## Support 23 | 24 | If you need support, please either raise an issue on Github or ask on [Slack](https://join.slack.com/t/substra-workspace/shared_invite/zt-1fqnk0nw6-xoPwuLJ8dAPXThfyldX8yA). 25 | 26 | 27 | # Setup 28 | 29 | ## Contributing 30 | 31 | If you would like to contribute to this documentation please clone it locally and make a new branch with the suggested changes. 32 | 33 | You should use python `3.10`. 34 | 35 | To deploy the documentation locally you need to install all the necessary requirements which you can find in the 'requirements.txt' file of the root of this repository. You can use pip in your terminal to install it: `pip install -r requirements.txt`. 36 | 37 | You also need to manually install [pandoc](https://github.com/jgm/pandoc/releases/tag/3.1.11.1). 38 | 39 | 40 | ### Install substra, substratools and substrafl in editable mode 41 | 42 | :warning: if you have these repositories installed in non-editable mode, it will not work. 43 | 44 | Install the repositories in editable mode: 45 | 46 | ```sh 47 | git clone git@github.com:Substra/substra.git 48 | cd substra && pip install -e . && cd .. 49 | ``` 50 | 51 | ```sh 52 | git clone git@github.com:Substra/substra-tools.git 53 | cd substra-tools && pip install -e . && cd .. 54 | ``` 55 | 56 | ```sh 57 | git clone git@github.com:Substra/substrafl.git 58 | cd substrafl && pip install -e '.[dev]' && cd .. 59 | ``` 60 | 61 | ### Build the documentation locally 62 | 63 | Next, to build the documentation move to the docs directory: `cd docs` 64 | 65 | And then: `make clean html` 66 | 67 | The first time you run it or if you updated the examples library it may take a little longer to build the whole documentation. 68 | 69 | To see the doc on your browser : `make livehtml` 70 | And then go to http://127.0.0.1:8000 71 | 72 | Once you are happy with your changes push your branch and make a pull request. 73 | 74 | Thank you for helping us improving! 75 | 76 | ### Add a new example 77 | 78 | - Put the example folder in `substra-documentation/examples` if it is a Substra example, `substra-documentation/substrafl_examples` if it is a Substrafl example. 79 | - create a `README.rst` file at the root of the example 80 | - The main file that is executed must match the regex `run_*.py`, e.g. `run_titanic.py` ([source](https://sphinx-gallery.github.io/stable/configuration.html?highlight=examples_dirs#parsing-and-executing-examples-via-matching-patterns)) 81 | - It must also be structured as described in the Sphinx gallery documentation. In particular, the folder containing the `run_*.py` example file **must** contain a `README.rst` file. 82 | - Add the assets: 83 | - use the `zip_dir` function in the `conf.py` file to zip the assets 84 | - add the link to download the assets to the example's docstring: 85 | 86 | ```rst 87 | .. only:: builder_html or readthedocs 88 | 89 | :download:`assets required to run this example <../../ASSET_NAME.zip>` 90 | ``` 91 | - thumbnail: add the path to the image in a comment in a cell of the example 92 | 93 | `# sphinx_gallery_thumbnail_path = 'auto_examples/EXAMPLE_FOLDER_NAME/images/thumb/sphx_glr_plot_thumb.jpg'` 94 | 95 | 96 | ## Releases 97 | 98 | The documentation is released for each Substra release. 99 | When a semver tag is pushed or a release is created, the doc is builded and published to ReadTheDocs by the [CI](https://github.com/Substra/substra-documentation/blob/main/.github/workflows/publish_stable.yml). 100 | Then ReadTheDocs automatically activates this version and set it as default (takes a few minutes). 101 | You can follow the build on the CI [here](https://github.com/Substra/substra-documentation/actions) and on ReadTheDocs if you have access to the project. 102 | 103 | ## How to generate the changelog 104 | 105 | The changelog is managed with [towncrier](https://towncrier.readthedocs.io/en/stable/index.html). 106 | To add a new entry in the changelog, add a file in the `changes` folder. The file name should have the following structure: 107 | `.`. 108 | The `unique_id` is a unique identifier, we currently use the PR number. 109 | The `change_type` can be of the following types: `added`, `changed`, `removed`, `fixed`. 110 | 111 | To generate the changelog (for example during a release), use the following command (you must have the dev dependencies installed): 112 | 113 | ``` 114 | towncrier build --version= 115 | ``` 116 | 117 | You can use the `--draft` option to see what would be generated without actually writing to the changelog (and without removing the fragments). -------------------------------------------------------------------------------- /changes/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/changes/.gitkeep -------------------------------------------------------------------------------- /charts/substra-documentation-examples/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: substra-documentation-examples 3 | description: Remote example for substra documentation 4 | 5 | type: application 6 | 7 | version: 0.1.0 8 | -------------------------------------------------------------------------------- /charts/substra-documentation-examples/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* vim: set filetype=mustache: */}} 2 | {{/* 3 | Expand the name of the chart. 4 | */}} 5 | {{- define "substra-documentation-examples.name" -}} 6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 7 | {{- end -}} 8 | 9 | {{/* 10 | Create a default fully qualified app name. 11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 12 | If release name contains chart name it will be used as a full name. 13 | */}} 14 | {{- define "substra-documentation-examples.fullname" -}} 15 | {{- if .Values.fullnameOverride -}} 16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 17 | {{- else -}} 18 | {{- $name := default .Chart.Name .Values.nameOverride -}} 19 | {{- if contains $name .Release.Name -}} 20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}} 21 | {{- else -}} 22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 23 | {{- end -}} 24 | {{- end -}} 25 | {{- end -}} 26 | 27 | {{/* 28 | Create chart name and version as used by the chart label. 29 | */}} 30 | {{- define "substra-documentation-examples.chart" -}} 31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} 32 | {{- end -}} 33 | 34 | {{/* 35 | Common labels 36 | */}} 37 | {{- define "substra-documentation-examples.labels" -}} 38 | app.kubernetes.io/name: {{ include "substra-documentation-examples.name" . }} 39 | helm.sh/chart: {{ include "substra-documentation-examples.chart" . }} 40 | app.kubernetes.io/instance: {{ .Release.Name }} 41 | {{- if .Chart.AppVersion }} 42 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 43 | {{- end }} 44 | app.kubernetes.io/managed-by: {{ .Release.Service }} 45 | {{- end -}} 46 | -------------------------------------------------------------------------------- /charts/substra-documentation-examples/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "substra-documentation-examples.fullname" . }} 5 | labels: 6 | {{ include "substra-documentation-examples.labels" . | indent 4 }} 7 | spec: 8 | replicas: 1 9 | selector: 10 | matchLabels: 11 | app.kubernetes.io/name: {{ include "substra-documentation-examples.name" . }} 12 | app.kubernetes.io/instance: {{ .Release.Name }} 13 | template: 14 | metadata: 15 | labels: 16 | app.kubernetes.io/name: {{ include "substra-documentation-examples.name" . }} 17 | app.kubernetes.io/instance: {{ .Release.Name }} 18 | spec: 19 | {{- with .Values.imagePullSecrets }} 20 | imagePullSecrets: 21 | {{- toYaml . | nindent 8 }} 22 | {{- end }} 23 | initContainers: 24 | - name: wait-backend-1 25 | image: jwilder/dockerize 26 | command: ['dockerize', 27 | '-wait', 'http://backend-org-1-substra-backend-server.org-1.svc.cluster.local:8000/readiness', 28 | '-wait', 'http://backend-org-2-substra-backend-server.org-2.svc.cluster.local:8000/readiness', 29 | '-timeout', '1200s'] 30 | containers: 31 | - name: {{ .Chart.Name }} 32 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 33 | imagePullPolicy: {{ .Values.image.pullPolicy }} 34 | command: ["sleep", "infinity"] 35 | resources: 36 | {{- toYaml .Values.resources | nindent 12 }} 37 | env: 38 | - name: DOCKER_HOST 39 | value: tcp://localhost:2376 40 | - name: DOCKER_TLS_VERIFY 41 | value: "1" 42 | - name: DOCKER_CERT_PATH 43 | value: /root/.docker/client 44 | - name: SUBSTRA_FORCE_EDITABLE_MODE 45 | value: "True" 46 | volumeMounts: 47 | - name: dind-certs 48 | mountPath: /root/.docker 49 | - name: tmp 50 | mountPath: /tmp 51 | - name: local-worker 52 | mountPath: /usr/src/app/local-worker/ 53 | - name: dind 54 | image: docker:19.03.12-dind 55 | securityContext: 56 | privileged: true 57 | volumeMounts: 58 | - name: dind-storage 59 | mountPath: /var/lib/docker 60 | - name: dind-certs 61 | mountPath: /root/.docker 62 | - name: tmp 63 | mountPath: /tmp 64 | - name: local-worker 65 | mountPath: /usr/src/app/local-worker/ 66 | env: 67 | - name: DOCKER_TLS_CERTDIR 68 | value: /root/.docker 69 | {{- with .Values.nodeSelector }} 70 | nodeSelector: 71 | {{- toYaml . | nindent 8 }} 72 | {{- end }} 73 | volumes: 74 | - name: dind-storage 75 | emptyDir: {} 76 | - name: dind-certs 77 | emptyDir: {} 78 | - name: tmp 79 | emptyDir: {} 80 | - name: local-worker 81 | emptyDir: {} 82 | {{- with .Values.affinity }} 83 | affinity: 84 | {{- toYaml . | nindent 8 }} 85 | {{- end }} 86 | {{- with .Values.tolerations }} 87 | tolerations: 88 | {{- toYaml . | nindent 8 }} 89 | {{- end }} 90 | -------------------------------------------------------------------------------- /charts/substra-documentation-examples/values.yaml: -------------------------------------------------------------------------------- 1 | image: 2 | repository: substrafoundation/substra-documentation-examples 3 | tag: stable 4 | pullPolicy: IfNotPresent 5 | 6 | imagePullSecrets: [] 7 | nameOverride: "" 8 | fullnameOverride: "" 9 | 10 | resources: {} 11 | # We usually recommend not to specify default resources and to leave this as a conscious 12 | # choice for the user. This also increases chances charts run on environments with little 13 | # resources, such as Minikube. If you do want to specify resources, uncomment the following 14 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'. 15 | # limits: 16 | # cpu: 100m 17 | # memory: 128Mi 18 | # requests: 19 | # cpu: 100m 20 | # memory: 128Mi 21 | 22 | nodeSelector: {} 23 | 24 | tolerations: [] 25 | 26 | affinity: {} 27 | -------------------------------------------------------------------------------- /docker/substra-documentation-examples/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11 2 | 3 | WORKDIR /usr/src/app 4 | 5 | ### Dirty build to install docker client ### 6 | # We need to install a docker client because substra debug mode relies on Docker (DinD in a side container) 7 | # and we need a docker client to authenticate this docker against our container registry to download substra-tools. 8 | RUN apt update && apt install --yes apt-transport-https ca-certificates curl gnupg lsb-release 9 | RUN curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg 10 | RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null 11 | RUN apt update && apt install --yes docker-ce-cli 12 | 13 | COPY substra-tools/ substra-tools/ 14 | COPY substra/ substra/ 15 | COPY substrafl/ substrafl/ 16 | 17 | RUN cd substrafl && python -m pip install --no-cache-dir -e . 18 | 19 | RUN cd substra && python -m pip install --no-cache-dir -e . 20 | 21 | RUN cd substra-tools && python -m pip install --no-cache-dir -e . 22 | 23 | COPY substra-documentation/Makefile substra-documentation/ 24 | COPY substra-documentation/examples_requirements.txt substra-documentation/ 25 | COPY substra-documentation/docs/source/examples substra-documentation/docs/source/examples/ 26 | 27 | RUN cd substra-documentation && make install-examples-dependencies 28 | 29 | WORKDIR /usr/src/app/substra-documentation 30 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= -W --keep-going -n 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | SUBSTRADOCDIR = source/documentation/references 11 | SUBSTRAFLDOCDIR = source/substrafl_doc/api 12 | 13 | # Put it first so that "make" without argument is like "make help". 14 | help: 15 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 16 | 17 | .PHONY: help Makefile clean livehtml livetheme 18 | 19 | # Catch-all target: route all unknown targets to Sphinx using the new 20 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 21 | %: Makefile 22 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 23 | 24 | livehtml: 25 | sphinx-autobuild -b html $(SOURCEDIR) $(BUILDDIR)/html 26 | 27 | livetheme: 28 | sphinx-autobuild -b html -a $(SOURCEDIR) $(BUILDDIR)/html --watch source/_static/ 29 | 30 | clean: 31 | rm -rf $(BUILDDIR) 32 | rm -rf $(SUBSTRADOCDIR) 33 | rm -rf $(SUBSTRAFLDOCDIR) 34 | # Delete the local worker folders in substra-documentation 35 | find .. -type d -name local-worker -prune -exec rm -rf {} \; 36 | # Delete the tmp folders in substra-documentation 37 | find .. -type d -name tmp -prune -exec rm -rf {} \; 38 | -------------------------------------------------------------------------------- /docs/doc-requirements.txt: -------------------------------------------------------------------------------- 1 | recommonmark==0.6.0 2 | sphinx==7.2.6 3 | sphinx-markdown-tables==0.0.17 4 | sphinx-rtd-theme==2.0.0 5 | sphinx-autobuild==2024.2.4 6 | sphinx_click==5.1.0 7 | click==8.1 8 | texttable==1.7.0 9 | myst-parser==2.0.0 10 | docutils==0.20.1 11 | sphinx-fontawesome==0.0.6 12 | sphinx-copybutton==0.5.2 13 | pyyaml==6.0 14 | nbsphinx==0.9.3 15 | pandoc==2.3 16 | git-python==1.0.3 17 | nbconvert<7.14 -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | name: rtd 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - python=3.10 7 | - pandoc=3.1 8 | - pip 9 | - pip: 10 | - -r ../requirements.txt 11 | -------------------------------------------------------------------------------- /docs/source/_ext/compatibilitytable.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | import yaml 4 | from docutils import nodes 5 | from docutils.parsers.rst import Directive 6 | 7 | 8 | def has_helm_chart(table: dict, component_name: str) -> bool: 9 | for release in table["releases"]: 10 | if component_name in release["components"]: 11 | if "helm" in release["components"][component_name]: 12 | return True 13 | return False 14 | 15 | 16 | class CompatibilityTable(Directive): 17 | required_arguments = 1 18 | 19 | def run(self): 20 | # "documentation": 21 | # https://docutils.sourceforge.io/docs/ref/doctree.html#table 22 | # https://github.com/docutils/docutils/blob/173189b4c1c095a43c9388f4edd9bf1ff5d5b49d/docutils/docutils/parsers/rst/states.py#L1793 23 | # https://github.com/docutils/docutils/blob/173189b4c1c095a43c9388f4edd9bf1ff5d5b49d/docutils/docutils/nodes.py#L439 24 | 25 | # documentation says nodes can be constructed by passing their children to the constructor 26 | # for instance nodes.entry(nodes.Text("lol")) should work 27 | # but it doesn't 28 | # this leads to needing to first create the node and then attach children to it 29 | 30 | source_file, _ = self.state_machine.get_source_and_line() 31 | with open(os.path.join(os.path.dirname(source_file), self.arguments[0])) as f: 32 | releases = yaml.safe_load(f) 33 | 34 | table = nodes.table() 35 | tgroup = nodes.tgroup() 36 | for _ in range((len(releases["components"]) + 1) * 2): 37 | colspec = nodes.colspec(colwidth=1) 38 | tgroup.append(colspec) 39 | table += tgroup 40 | 41 | thead = nodes.thead() 42 | tgroup += thead 43 | component_row = nodes.row() 44 | helm_row = nodes.row() 45 | 46 | for component_name in ["release"] + releases["components"]: 47 | if not has_helm_chart(releases, component_name): 48 | name_entry = nodes.entry(morerows=1, morecols=1) 49 | else: 50 | name_entry = nodes.entry(morecols=1) 51 | helm_row += [nodes.entry(), nodes.entry()] 52 | helm_row[-2] += nodes.paragraph(text="app") 53 | helm_row[-1] += nodes.emphasis(text="helm") 54 | 55 | name_entry += nodes.paragraph(text=component_name) 56 | component_row += name_entry 57 | 58 | thead.append(component_row) 59 | thead.append(helm_row) 60 | 61 | tbody = nodes.tbody() 62 | for release in releases["releases"]: 63 | row = nodes.row() 64 | row += nodes.entry(morecols=1) 65 | row[0] += nodes.strong(text=release["version"]) 66 | for component_name in releases["components"]: 67 | component = release["components"][component_name] 68 | app_para = nodes.paragraph() 69 | app_para += nodes.reference( 70 | text=component["version"], refuri=component["link"], internal=False 71 | ) 72 | if "helm" in component: 73 | row += nodes.entry() 74 | row[-1] += app_para 75 | para = nodes.emphasis() 76 | para += nodes.reference( 77 | text=component["helm"]["version"], 78 | refuri=component["helm"]["link"], 79 | internal=False, 80 | ) 81 | row += nodes.entry() 82 | row[-1] += para 83 | else: 84 | row += nodes.entry(morecols=1) 85 | row[-1] += app_para 86 | 87 | tbody += row 88 | tgroup += tbody 89 | 90 | return [table] 91 | 92 | 93 | def setup(app): 94 | app.add_directive("compatibilitytable", CompatibilityTable) 95 | 96 | return { 97 | "version": "0.1", 98 | "parallel_read_safe": True, 99 | "parallel_write_safe": True, 100 | } 101 | -------------------------------------------------------------------------------- /docs/source/_static/Gattica-Bold100.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/Gattica-Bold100.otf -------------------------------------------------------------------------------- /docs/source/_static/Gattica-Light100.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/Gattica-Light100.otf -------------------------------------------------------------------------------- /docs/source/_static/Gattica-Medium100.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/Gattica-Medium100.otf -------------------------------------------------------------------------------- /docs/source/_static/Gattica-Regular100.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/Gattica-Regular100.otf -------------------------------------------------------------------------------- /docs/source/_static/example_thumbnail/cyclic-mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/example_thumbnail/cyclic-mnist.png -------------------------------------------------------------------------------- /docs/source/_static/example_thumbnail/diabetes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/example_thumbnail/diabetes.png -------------------------------------------------------------------------------- /docs/source/_static/example_thumbnail/iris.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/example_thumbnail/iris.jpg -------------------------------------------------------------------------------- /docs/source/_static/example_thumbnail/mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/example_thumbnail/mnist.png -------------------------------------------------------------------------------- /docs/source/_static/example_thumbnail/titanic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/example_thumbnail/titanic.jpg -------------------------------------------------------------------------------- /docs/source/_static/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/favicon.png -------------------------------------------------------------------------------- /docs/source/_static/images/substra-0.26-task-duration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/images/substra-0.26-task-duration.png -------------------------------------------------------------------------------- /docs/source/_static/images/workflow_0.27.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/images/workflow_0.27.0.png -------------------------------------------------------------------------------- /docs/source/_static/my-nbsphinx-gallery.css: -------------------------------------------------------------------------------- 1 | .nbsphinx-gallery { 2 | display: grid; 3 | grid-template-columns: repeat(auto-fill, 180px); 4 | gap: 5px; 5 | margin-top: 1em; 6 | margin-bottom: 1em; 7 | } 8 | 9 | .nbsphinx-gallery>a { 10 | background-image: none; 11 | border: solid #fff 1px; 12 | background-color: #fff; 13 | box-shadow: 0 0 15px rgba(142, 176, 202, 0.2); 14 | border-radius: 5px; 15 | min-height: 230px; 16 | min-width: 180px; 17 | padding: 10px 24px; 18 | text-decoration: none; 19 | color: var(--color-primary-500); 20 | transition: transform 0.2s ease; 21 | } 22 | 23 | .nbsphinx-gallery>a:hover { 24 | border: solid var(--color-primary-500) 1px; 25 | box-shadow: 0 0 15px rgba(142, 176, 202, 0.5); 26 | transform: scale(1.05); 27 | } 28 | 29 | .nbsphinx-gallery img { 30 | max-width: 100%; 31 | max-height: 100%; 32 | } 33 | 34 | .nbsphinx-gallery>a>div:first-child { 35 | display: flex; 36 | align-items: start; 37 | justify-content: center; 38 | height: 120px; 39 | margin-bottom: 5px; 40 | } -------------------------------------------------------------------------------- /docs/source/_static/schemes/channel_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/schemes/channel_schema.png -------------------------------------------------------------------------------- /docs/source/additional/community.rst: -------------------------------------------------------------------------------- 1 | Community 2 | ========= 3 | 4 | .. _community: 5 | 6 | Github 7 | ^^^^^^ 8 | Anyone can open an `issue `_ on the Substra GitHub repository. Please submit bug reports so we can continually improve the software. 9 | 10 | Slack 11 | ^^^^^ 12 | A real-time chat room to ask questions, give feedback and chat about anything related to Substra. Please `join us here `_. 13 | 14 | Newsletter 15 | ^^^^^^^^^^ 16 | `Subscribe here `_ to join our newsletter. -------------------------------------------------------------------------------- /docs/source/additional/faq.rst: -------------------------------------------------------------------------------- 1 | FAQ 2 | === 3 | 4 | .. _faq: 5 | 6 | What is Substra? 7 | ^^^^^^^^^^^^^^^^ 8 | Substra is an open source federated learning (FL) software that enables machine learning on distributed datasets. It provides a flexible Python interface and a web app to perform federated machine learning at scale. 9 | 10 | Substra is the most proven software for federated learning in healthcare and has already been deployed in real production environments by hospitals and biotech companies (see the `MELLODDY `_ project for instance). Substra can also be used on a single machine on a virtually splitted dataset to perform FL simulations and debug code before launching experiments on a real network. 11 | 12 | Who owns Substra? 13 | ^^^^^^^^^^^^^^^^^ 14 | Substra is open source software operated under an Apache 2.0 License. Substra is hosted by the `Linux Foundation for AI and Data `_. Substra was initially developed by engineers at `Owkin `_, a BioTech company that continues to play a big role in its development. 15 | 16 | What kinds of data does Substra support? 17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 18 | Substra can run tasks on any type of data: tabular data, images, videos, audio, time series, etc. 19 | 20 | What kind of machine learning model can I use with Substra? 21 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 22 | Substra is fully compatible with machine learning models written in any Python library (PyTorch, Tensorflow, Sklearn, etc). However, a specific interface has been developed to use PyTorch in Substra, which makes writing PyTorch code simpler than using other frameworks. 23 | 24 | Is Substra limited to medical and biotech applications? 25 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 26 | Although Substra has been designed to work especially well in healthcare settings, it can work on any kind of data with any Python library to perform computation or analysis using distributed data. 27 | 28 | How can I be sure Substra is secure enough to be used with my private data? 29 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 30 | Substra is regularly audited with rigorous security standards (both code source audit and penetration tests). On top of that, by design, private data is never shared between different organizations. The software also provides full traceability on which functions were used on each dataset. 31 | 32 | What is the roadmap for Substra? 33 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 34 | 35 | The roadmap for Substra is primarily decided by product managers and engineers at Owkin. These decisions are based on needs from active and potential FL projects and users that may or may not involve Owkin. You will find below a list of needs that we plan to address in the near future. Please know however that this is not a very strict roadmap and the direction of the product can alter at any moment. 36 | 37 | * **Better support for Federated Analytics:** The Substra library does support Federated Analytics currently but one of our goals is make this more user friendly and accessible. 38 | * **Introduce more FL Strategies:** Substra aims to be a complete FL framework and one way we hope to facilitate FL projects is by adding more strategies. We hope that by implementing these strategies within the library, we can encourage more experimentation by data scientists. We would also be interested in allowing users to define their own FL strategies. 39 | * **Usability Improvements:** We intend to make Substra more easy to deploy and use. This will come in improving the deployment documentation, simplifying the data concepts and merging Substra and Substrafl into one unified library. 40 | 41 | These are some of the main features to be developed in Substra for the coming months. We want to actively make an effort to help our users, so please do not hesitate to reach out if you have a feature request or an idea. Feedback is always welcome! -------------------------------------------------------------------------------- /docs/source/additional/glossary.rst: -------------------------------------------------------------------------------- 1 | .. _glossary-label: 2 | 3 | Glossary 4 | ======== 5 | 6 | 7 | .. glossary:: 8 | 9 | Organization 10 | An organization represents an independant partner in the network. It has its own computing and storage resources. 11 | 12 | Channel 13 | A channel is a group of Substra :term:`Organizations` which operate on a common set of assets. Several channels can be built on top of a Substra network. 14 | 15 | -------------------------------------------------------------------------------- /docs/source/banner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/banner.jpg -------------------------------------------------------------------------------- /docs/source/contributing/code-of-conduct.rst: -------------------------------------------------------------------------------- 1 | *************** 2 | Code of Conduct 3 | *************** 4 | 5 | Our Pledge 6 | ========== 7 | 8 | In the interest of fostering an open and welcoming environment, we as 9 | contributors and maintainers pledge to make participation in our project and 10 | our community a harassment-free experience for everyone, regardless of age, body 11 | size, disability, ethnicity, gender identity and expression, level of experience, 12 | nationality, personal appearance, race, religion, or sexual identity and 13 | orientation. 14 | 15 | Our Standards 16 | ============= 17 | 18 | Examples of behavior that contributes to creating a positive environment 19 | include: 20 | 21 | * Using welcoming and inclusive language 22 | * Being respectful of differing viewpoints and experiences 23 | * Gracefully accepting constructive criticism 24 | * Focusing on what is best for the community 25 | * Showing empathy towards other community members 26 | 27 | Examples of unacceptable behavior by participants include: 28 | 29 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 30 | * Trolling, insulting/derogatory comments, and personal or political attacks 31 | * Public or private harassment 32 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 33 | * Other conduct which could reasonably be considered inappropriate in a professional setting 34 | 35 | Our Responsibilities 36 | ==================== 37 | 38 | Project maintainers are responsible for clarifying the standards of acceptable 39 | behavior and are expected to take appropriate and fair corrective action in 40 | response to any instances of unacceptable behavior. 41 | 42 | Project maintainers have the right and responsibility to remove, edit, or 43 | reject comments, commits, code, wiki edits, issues, and other contributions 44 | that are not aligned to this Code of Conduct, or to ban temporarily or 45 | permanently any contributor for other behaviors that they deem inappropriate, 46 | threatening, offensive, or harmful. 47 | 48 | Scope 49 | ===== 50 | 51 | This Code of Conduct applies both within project spaces and in public spaces 52 | when an individual is representing the project or its community. Examples of 53 | representing a project or community include using an official project e-mail 54 | address, posting via an official social media account, or acting as an appointed 55 | representative at an online or offline event. Representation of a project may be 56 | further defined and clarified by project maintainers. 57 | 58 | Enforcement 59 | =========== 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported by contacting community@substra.org, which is a shared team inbox. If the incident involves someone who receives that shared inbox, you can contact an individual maintainer. All 63 | complaints will be reviewed and investigated and will result in a response that 64 | is deemed necessary and appropriate to the circumstances. The project team is 65 | obligated to maintain confidentiality with regard to the reporter of an incident. 66 | Further details of specific enforcement policies may be posted separately. 67 | 68 | Project maintainers who do not follow or enforce the Code of Conduct in good 69 | faith may face temporary or permanent repercussions as determined by other 70 | members of the project's leadership. 71 | 72 | Attribution 73 | =========== 74 | 75 | This Code of Conduct is adapted from the `Contributor Covenant `_, version 1.4. 76 | -------------------------------------------------------------------------------- /docs/source/contributing/contributing-guide.rst: -------------------------------------------------------------------------------- 1 | ****************** 2 | Contributing Guide 3 | ****************** 4 | 5 | Thanks for checking out the contributing guide. Substra warmly welcomes contributions! 6 | 7 | Ground rules & expectations 8 | =========================== 9 | 10 | Be kind and thoughtful in your conversations around this project. We all come from different backgrounds and projects, which means we likely have different perspectives on how things should be done. Try to listen to others rather than convince them that your way is correct. 11 | 12 | Substra has a :doc:`Contributor Code of Conduct `. By participating in this project, you agree to abide by its terms. 13 | 14 | Who are contributors? 15 | ====================== 16 | 17 | Contributors are any person that have contributed to the code. It does not matter whether it's a typo fix or 10k lines of code. Making a contribution however does not automatically entitle you to copyright over that code. For copyright the contribution must be significant enough meet the `threshold of originality `_, which basically means that your code is somewhat unique and non-generic. Fixing a typo does not give you access to copyright over that word or sentence. 18 | 19 | How to contribute 20 | ================= 21 | 22 | You should usually open a pull request in the following situations: 23 | 24 | * Submit trivial fixes (for example, a typo, a broken link or an obvious error) 25 | * Start work on a contribution that was already asked for, or that you've already discussed, in an issue 26 | 27 | A pull request doesn't have to represent finished work. You can open a pull request early on, so others can watch or give feedback on your progress. Just open it as a "draft". You can always add more commits later. 28 | 29 | Here's how to submit a pull request: 30 | 31 | * `Fork the repository `_ and clone it locally. Connect your local to the original "upstream" repository by adding it as a remote. Pull in changes from "upstream" often so that you stay up to date so that when you submit your pull request, merge conflicts will be less likely. (See more detailed instructions `here `_). 32 | * `Create a branch `_ for your edits. 33 | * **Sign off** your commits. 34 | * **Test your changes.** Please ensure that your contribution passes all tests if you open a pull request. If there are test failures, you will need to address them before we can merge your contribution. When adding or changing functionality, please include new tests for them as part of your contribution. 35 | * **Contribute in the style of the project** to the best of your abilities. This may mean using indents, semi-colons or comments differently than you would in your own repository, but makes it easier for us to merge, others to understand and maintain in the future. Most of project repositories have a :code:`.pre-commit-config.yaml` file. Run :code:`pre-commit install` to automatically match some of the style rules of the project when committing your changes. 36 | * **Add yourself to the contributors**. If you made a significant contribution, don't forget to add yourself to the CONTRIBUTORS.md file of the repo by putting your name and a small description of your work. 37 | 38 | Vulnerabilities 39 | =============== 40 | 41 | Please reach out to support@substra.org immediately if you believe you have found a vulnerability. 42 | 43 | Due to the privacy preserving nature of Substra, we take vulnerabilities very seriously. The core of Federated Learning is security and therefore we take various steps such as auditing and automated testing to ensure that our code base remains secure. All pull requests go through a thorough review. 44 | 45 | If a vulnerability is found, a triage process is begun within one working day to determine the severity of the vulnerability and the next steps to consider. 46 | 47 | Sign Off 48 | ======== 49 | 50 | For compliance purposes, `Developer Certificate of Origin (DCO) on Pull Requests `_ is activated on the repo. 51 | 52 | In practice, you must add a ``Signed-off-by:`` message at the end of every commit: 53 | 54 | .. code-block:: bash 55 | 56 | This is my commit message 57 | Signed-off-by: Random J Developer 58 | 59 | Add ``-s`` flag to add it automatically: ``git commit -s -m 'This is my commit message'``. 60 | 61 | :doc:`Community ` 62 | ======================================== 63 | 64 | Discussions about Substra take place on the repositories' Issues and Pull Requests sections and on Slack. Anybody is welcome to join these conversations. 65 | 66 | Wherever possible, do not take these conversations to private channels, including contacting the maintainers directly. Keeping communication public means everybody can benefit and learn from the conversation. 67 | 68 | Attribution 69 | =========== 70 | 71 | This guide follows guidelines from `opensource.guide `_ -------------------------------------------------------------------------------- /docs/source/documentation/api_reference.rst: -------------------------------------------------------------------------------- 1 | Substra API reference 2 | ===================== 3 | 4 | `substra` version: |substra_version| 5 | 6 | SDK Reference 7 | ------------- 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | references/sdk.md 12 | 13 | 14 | Models 15 | ^^^^^^ 16 | 17 | Models describe the objects returned by the platform. 18 | 19 | .. toctree:: 20 | :maxdepth: 2 21 | 22 | references/sdk_models.md 23 | 24 | Schemas 25 | ^^^^^^^ 26 | Specs are the specifications to register the assets on the platform. 27 | 28 | .. toctree:: 29 | :maxdepth: 2 30 | 31 | references/sdk_schemas.md 32 | -------------------------------------------------------------------------------- /docs/source/documentation/backend/index.rst: -------------------------------------------------------------------------------- 1 | ******* 2 | Backend 3 | ******* 4 | 5 | The backend exposes the REST API for an :term:`Organization` and executes compute tasks (in a subsystem we call *compute engine*). 6 | 7 | .. _backend_kubernetes_pods: 8 | 9 | Kubernetes pods 10 | =============== 11 | 12 | docker-registry 13 | We use this service to store images built from user-provided :ref:`Functions`. 14 | Make sure to assign a large enough volume to avoid rebuilding images over and over due to eviction. 15 | registry-prepopulate 16 | This Pod is managed by a Job running on chart installation or update. 17 | It uploads container Images to the docker-registry to make them available for future use in :ref:`Functions`. 18 | minio 19 | `MinIO`_ is an object storage service and stores all assets registered on the :term:`Organization`. 20 | You should back up the data of this Pod. 21 | postgresql 22 | This is the database supporting the backend. 23 | You should back up the data of this Pod. 24 | redis 25 | This is an organization-specific message broker to support `Celery`_ tasks. 26 | backend-events 27 | This component will consume events from the orchestrator. 28 | It should be able to access the orchestrator over gRPC. 29 | It handles events and triggers appropriate responses such as starting compute tasks. 30 | On startup, it will also register the Organization on the orchestrator. 31 | migrations 32 | This Pod is managed by a Job running on chart installation or update to deal with database schema changes. 33 | This Pod also performs user creation. 34 | scheduler, scheduler-worker 35 | Those are `Celery`_ components, handling scheduled tasks. 36 | server 37 | This is a Django application exposing the REST API through which users interact with Substra. 38 | worker 39 | This is the service processing `Celery`_ tasks. 40 | It handles :ref:`Function` images builds and running compute tasks. 41 | This is where you will find logs related to task processing. 42 | 43 | .. _Celery: https://docs.celeryq.dev/en/latest/index.html 44 | .. _MinIO: https://min.io/ 45 | 46 | .. _backend_communication: 47 | 48 | Communication 49 | ============= 50 | 51 | The backend should be able to reach its orchestrator. 52 | If :term:`Organizations` share :ref:`Models`, involved backends must be able to communicate with each other. 53 | 54 | Helm chart 55 | ========== 56 | 57 | We use Helm charts as a way to package our application deployments. 58 | If you want to deploy the backend you can use the `Helm chart substra-backend`_. 59 | 60 | .. _Helm chart substra-backend: https://artifacthub.io/packages/helm/substra/substra-backend 61 | -------------------------------------------------------------------------------- /docs/source/documentation/components.rst: -------------------------------------------------------------------------------- 1 | Components 2 | ========== 3 | 4 | We distinguish two major components, the orchestrator and the backend. 5 | Although they are independent, their versions must match a tested release as referenced in the :ref:`compatibility table `. 6 | 7 | .. image:: ../_static/schemes/stack-technical-scheme.svg 8 | :width: 800 9 | :align: center 10 | :alt: Substra Components Scheme 11 | 12 | .. toctree:: 13 | :glob: 14 | :titlesonly: 15 | :maxdepth: 1 16 | :caption: Components documentation 17 | 18 | backend/* 19 | frontend/* 20 | orchestrator/* 21 | Substra python library 22 | SubstraFL python library <../substrafl_doc/substrafl_overview> 23 | 24 | 25 | -------------------------------------------------------------------------------- /docs/source/documentation/frontend/index.rst: -------------------------------------------------------------------------------- 1 | ******** 2 | Frontend 3 | ******** 4 | 5 | The frontend (also named web application for the end-users) allows you to monitor your assets (compute plans, tasks, datasets, functions) easily through a user interface. It is mainly a read-only interface: you will need to use the Python library to register data or to launch computation. However there are a few actions that are doable with the frontend, for instance: cancelling compute plans, managing users and creating API tokens. 6 | 7 | .. _frontend_kubernetes_pods: 8 | 9 | Kubernetes pods 10 | =============== 11 | 12 | frontend 13 | A single pod managing the frontend. 14 | 15 | .. _frontend_communication: 16 | 17 | Communication 18 | ============= 19 | 20 | The frontend should be able to reach its backend through the REST API. 21 | The access to the API is secured through the use of JSON Web Tokens (JWT), which are stored through cookies. Each backend server pod has its own token, so when working with different backends or restarting pods, it might be necessary to delete related cookies (namely signature, refresh and header.payload) so a new JWT can be created. Otherwise this could block you from logging into the frontend. 22 | 23 | Helm chart 24 | ========== 25 | 26 | We use Helm charts as a way to package our application deployments. 27 | If you want to deploy the frontend you can use the `Helm chart substra-frontend`_. 28 | 29 | .. _Helm chart substra-frontend: https://artifacthub.io/packages/helm/substra/substra-frontend -------------------------------------------------------------------------------- /docs/source/documentation/images/copy_token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/documentation/images/copy_token.png -------------------------------------------------------------------------------- /docs/source/documentation/images/find_token_management_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/documentation/images/find_token_management_page.png -------------------------------------------------------------------------------- /docs/source/documentation/images/generate_new_token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/documentation/images/generate_new_token.png -------------------------------------------------------------------------------- /docs/source/documentation/images/token_management_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/documentation/images/token_management_page.png -------------------------------------------------------------------------------- /docs/source/documentation/orchestrator/index.rst: -------------------------------------------------------------------------------- 1 | ************ 2 | Orchestrator 3 | ************ 4 | 5 | Performing a Federated Learning experiment implies a lot of different compute tasks: local training, aggregation, testing on different organizations, etc. The role of the orchestrator is to distribute ML tasks among organizations, while ensuring complete traceability of operations. 6 | 7 | The orchestrator registers the status of tasks; when a task is done (status ``Done``), it evaluates if some remaining tasks (status ``Waiting``) are now unblocked, and if it's the case, the status of those tasks is changed to ``To do``. The new status is sent to all the backends, who store the new tasks ``To do`` in the task queue (Celery). Then, the task queue will assign the task to one of the workers (if multiple) and handle retries if needed. 8 | 9 | In case of failure, it will store failure reports and change the status of the faulty task to ``Failed``. 10 | In case of manual cancellation, it will change the status of the tasks to ``Cancelled`` on different backends. 11 | 12 | Orchestration 13 | ============= 14 | 15 | Orchestration is hosted by a central Postgres database: 16 | 17 | .. image:: /_static/schemes/centralized-orc.svg 18 | 19 | Orchestration stores only non-sensitive metadata of the Substra assets, making it possible to verify the integrity of the assets and ensures that the permissions on the assets are respected. 20 | 21 | It therefore requires trusting whomever is operating the orchestrator DB not to tamper with it. 22 | 23 | .. note:: 24 | 25 | Orchestration was available in a **distributed** mode until `v0.34.0 `__ 26 | 27 | .. _orc_kubernetes_pods: 28 | 29 | Kubernetes pods 30 | =============== 31 | 32 | postgresql 33 | This is the database supporting the ledger. 34 | You should back up the data of this Pod. 35 | orchestrator-server 36 | This is the actual orchestration service, accessed over gRPC. 37 | migrations 38 | This Pod is managed by a Job running on Helm chart installation or update. 39 | It deals with database schema changes. 40 | 41 | .. _orc_communication: 42 | 43 | Communication 44 | ============= 45 | 46 | The orchestrator is a central component. 47 | All backends from each :term:`Organization` must have access to the orchestrator over gRPC for command/queries and event subsription. 48 | 49 | The orchestrator authenticates clients with their TLS certificates. 50 | As a consequence, the Kubernetes Ingress must do SSL passthrough. 51 | 52 | Storage 53 | ======= 54 | 55 | The orchestrator stores its data in a PostgreSQL database. 56 | Migrations are executed using a Kubernetes Job on installation and update (this relies on a Helm hook). 57 | 58 | Helm chart 59 | ========== 60 | 61 | We use Helm charts as a way to package our application deployments. 62 | If you want to deploy the orchestrator you can use the `Helm chart orchestrator`_. 63 | 64 | .. _Helm chart orchestrator: https://artifacthub.io/packages/helm/substra/orchestrator 65 | -------------------------------------------------------------------------------- /docs/source/documentation/substra_tools.rst: -------------------------------------------------------------------------------- 1 | Substra Tools 2 | ============= 3 | 4 | In Substra, users create tasks that are registered to the platform, then executed in a containerised environment. 5 | 6 | A task needs a valid Dockerfile to create a container and expose a command line interface. The execution of the command creates the expected output files. 7 | 8 | For example, a function defines a list of inputs and outputs. At the task execution, the inputs files are given to the container, the paths to the files are given as arguments to the command line, and the task is responsible for creating the output files. 9 | 10 | To allow the reproducibility of a task, the task dependencies are defined in the Dockerfile. The code can be written in almost any language, as long as you have the right Docker base image: `R `_, `Python `_, `C `_ and a lot more. 11 | 12 | `Substra-tools `_ is a wrapper for Python code to define valid openers and functions. 13 | 14 | 15 | The substra-tools library, `available on PyPi `_, provides wrappers to write Python code (handles the command line interface creation, the data loading using the opener...). 16 | 17 | Thanks to this library, the user can focus on the task function content. -------------------------------------------------------------------------------- /docs/source/examples/substra_core/diabetes_example/assets/dataset/diabetes_dataset.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_diabetes 2 | import pandas as pd 3 | import pathlib 4 | 5 | 6 | def setup_diabetes(data_path: pathlib.Path): 7 | raw_data = load_diabetes(scaled=False) 8 | 9 | description_file = data_path / "description.md" 10 | description_file.touch() 11 | description_file.write_text(raw_data.DESCR) 12 | 13 | dataset = pd.DataFrame(data=raw_data.data, columns=raw_data.feature_names) 14 | # map the "sex" column to categorical data 15 | dataset["sex"] = dataset["sex"].replace({1: "M", 2: "F"}).astype("category") 16 | 17 | # Create folders for both organisations 18 | (data_path / "org_1").mkdir(exist_ok=True) 19 | (data_path / "org_2").mkdir(exist_ok=True) 20 | 21 | # Split the dataset in two uneven parts 22 | split_index = int(len(dataset) * 2 / 3) 23 | dataset.iloc[:split_index].to_csv(data_path / "org_1" / "data.csv", index=False) 24 | dataset.iloc[split_index:].to_csv(data_path / "org_2" / "data.csv", index=False) 25 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/diabetes_example/assets/dataset/diabetes_opener.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import numpy as np 3 | import pandas as pd 4 | import substratools as tools 5 | 6 | 7 | class DiabetesOpener(tools.Opener): 8 | def fake_data(self, n_samples=None): 9 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100 10 | 11 | features = ["age", "sex", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"] 12 | return pd.DataFrame( 13 | data=np.random.random((N_SAMPLES, len(features))), columns=features 14 | ) 15 | 16 | def get_data(self, folders): 17 | return pd.read_csv( 18 | next(pathlib.Path(folders[0]).glob("*.csv")), dtype={"sex": "category"} 19 | ) 20 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/diabetes_example/assets/functions/aggregation/Dockerfile: -------------------------------------------------------------------------------- 1 | # this base image works in both CPU and GPU enabled environments 2 | FROM python:3.12-slim 3 | 4 | # install dependencies 5 | RUN pip3 install pandas numpy substratools 6 | 7 | # add your algorithm script to docker image 8 | ADD federated_analytics_functions.py . 9 | 10 | # define how script is run 11 | ENTRYPOINT ["python3", "federated_analytics_functions.py", "--function-name", "aggregation"] 12 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/diabetes_example/assets/functions/description.md: -------------------------------------------------------------------------------- 1 | # Federated Analytics with Substra 2 | 3 | This folder contains the Python module and associated Dockerfile to perform Federated Analytics with the Substra library. 4 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/diabetes_example/assets/functions/federated_analytics_functions.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import defaultdict 3 | 4 | import pickle 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import substratools as tools 9 | 10 | 11 | # We are using helper decorators from the substratools library to avoid rewriting boilerplate code. 12 | # The function to be registered takes an `inputs` parameter, which will be matched to the list of 13 | # `FunctionInputSpec` provided in the `FunctionSpec` definition. 14 | # In a similar way, the parameter `outputs` will be matched to the `FunctionOutputSpec`. 15 | # The parameter `task_properties` contains if needed additional values that can be used by the function without being persisted. 16 | @tools.register 17 | def local_first_order_computation(inputs, outputs, task_properties): 18 | df = inputs["datasamples"] 19 | states = { 20 | "n_samples": len(df), 21 | "means": df.select_dtypes(include=np.number).sum().to_dict(), 22 | "counts": { 23 | name: series.value_counts().to_dict() 24 | for name, series in df.select_dtypes(include="category").items() 25 | }, 26 | } 27 | save_states(states, outputs["local_analytics_first_moments"]) 28 | 29 | 30 | @tools.register 31 | def local_second_order_computation(inputs, outputs, task_properties): 32 | df = inputs["datasamples"] 33 | shared_states = load_states(inputs["shared_states"]) 34 | means = pd.Series(shared_states["means"]) 35 | states = { 36 | "n_samples": len(df), 37 | "std": np.power(df.select_dtypes(include=np.number) - means, 2).sum(), 38 | } 39 | save_states(states, outputs["local_analytics_second_moments"]) 40 | 41 | 42 | @tools.register 43 | def aggregation(inputs, outputs, task_properties): 44 | shared_states = [load_states(path) for path in inputs["local_analytics_list"]] 45 | 46 | total_len = 0 47 | for state in shared_states: 48 | total_len += state["n_samples"] 49 | 50 | aggregated_values = defaultdict(lambda: defaultdict(float)) 51 | for state in shared_states: 52 | for analytics_name, col_dict in state.items(): 53 | if analytics_name == "n_samples": 54 | # already aggregated in total_len 55 | continue 56 | for col_name, v in col_dict.items(): 57 | if isinstance(v, dict): 58 | # this column is categorical and v is a dict over the different modalities 59 | if not aggregated_values[analytics_name][col_name]: 60 | aggregated_values[analytics_name][col_name] = defaultdict(float) 61 | for modality, vv in v.items(): 62 | aggregated_values[analytics_name][col_name][modality] += vv / total_len 63 | else: 64 | # this is a numerical column and v is numerical 65 | aggregated_values[analytics_name][col_name] += v / total_len 66 | 67 | # transform default_dict to regular dict 68 | aggregated_values = json.loads(json.dumps(aggregated_values)) 69 | 70 | save_states(aggregated_values, outputs["shared_states"]) 71 | 72 | 73 | def load_states(path): 74 | with open(path, "rb") as f: 75 | return pickle.load(f) 76 | 77 | 78 | def save_states(states, path): 79 | with open(path, "wb") as f: 80 | pickle.dump(states, f) 81 | 82 | 83 | # The Dockerfile uses this entrypoint at run time to execute the function whose name is passed as parameters, 84 | # providing it with the proper arguments as defined at registration time by Substra Specs. 85 | if __name__ == "__main__": 86 | tools.execute() 87 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/diabetes_example/assets/functions/local_first_order_computation/Dockerfile: -------------------------------------------------------------------------------- 1 | # this base image works in both CPU and GPU enabled environments 2 | FROM python:3.12-slim 3 | 4 | # install dependencies 5 | RUN pip3 install pandas numpy substratools 6 | 7 | # add your algorithm script to docker image 8 | ADD federated_analytics_functions.py . 9 | 10 | # define how script is run 11 | ENTRYPOINT ["python3", "federated_analytics_functions.py", "--function-name", "local_first_order_computation"] 12 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/diabetes_example/assets/functions/local_second_order_computation/Dockerfile: -------------------------------------------------------------------------------- 1 | # this base image works in both CPU and GPU enabled environments 2 | FROM python:3.12-slim 3 | 4 | # install dependencies 5 | RUN pip3 install pandas numpy substratools 6 | 7 | # add your algorithm script to docker image 8 | ADD federated_analytics_functions.py . 9 | 10 | # define how script is run 11 | ENTRYPOINT ["python3", "federated_analytics_functions.py", "--function-name", "local_second_order_computation"] 12 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/diabetes_example/assets/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.9.2 2 | scikit-learn==1.5.2 3 | pandas==2.2.2 4 | substra 5 | substratools 6 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/index.rst: -------------------------------------------------------------------------------- 1 | Substra examples 2 | ================ 3 | 4 | The examples below are compatible with Substra |substra_version|. 5 | 6 | 7 | Examples to get started 8 | ^^^^^^^^^^^^^^^^^^^^^^^ 9 | 10 | .. nbgallery:: 11 | ../../../examples/substra_core/titanic_example/run_titanic.ipynb 12 | 13 | Examples to go further 14 | ^^^^^^^^^^^^^^^^^^^^^^ 15 | 16 | .. nbgallery:: 17 | ../../../examples/substra_core/diabetes_example/run_diabetes.ipynb -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/dataset/description.md: -------------------------------------------------------------------------------- 1 | # Titanic 2 | 3 | This dataset comes from Kaggle's ["Titanic: Machine Learning from Disaster" challenge](https://www.kaggle.com/c/titanic/data). 4 | 5 | ## Test and train data samples 6 | 7 | Since Kaggle doesn't provide the ground truth for its test set, all data samples attached to this dataset are extracted from Kaggle's train set. 8 | 9 | Out of the 891 records of the train set: 10 | 11 | - 20% were kept aside as the test data sample 12 | - the remaining 80% were split among 4 train data samples 13 | 14 | This way it is possible to demonstrate cross-validation strategies using these assets. 15 | 16 | These splits were generated using the following code: 17 | 18 | ```python 19 | import os 20 | import pandas as pd 21 | from sklearn.model_selection import KFold, train_test_split 22 | 23 | data = pd.read_csv('train.csv') 24 | 25 | # generate splits 26 | train_data, test_data_sample = train_test_split(data, test_size=0.2) 27 | kf = KFold(n_splits=4) 28 | splits = kf.split(train_data) 29 | train_data_samples = [] 30 | for train_index, test_index in splits: 31 | train_data_samples.append(train_data.iloc[test_index]) 32 | 33 | # save splits 34 | DATA_SAMPLES_ROOT = '../assets' 35 | 36 | filename = os.path.join(DATA_SAMPLES_ROOT, 'test_data_sample/test.csv') 37 | os.makedirs(os.path.dirname(filename)) 38 | with open(filename, 'w') as f: 39 | test_data_sample.to_csv(f) 40 | 41 | for i, train_data_sample in enumerate(train_data_samples): 42 | filename = os.path.join(DATA_SAMPLES_ROOT, f'../assets/train_data_samples/train{i}/train{i}.csv') 43 | os.makedirs(os.path.dirname(filename)) 44 | with open(filename, 'w') as f: 45 | train_data_sample.to_csv(f) 46 | ``` 47 | 48 | ## Data samples structure 49 | 50 | All data samples have the same exact structure. They all contain a single CSV files with the following fields (description are extracted from Kaggle): 51 | 52 | | Field | Type | Description | Values | 53 | | ------------- | ------- | ------------------------------------------ | ------------------------------------------------------------------- | 54 | | `PassengerId` | integer | Type should be integers | `1`, `2`, `3`... | 55 | | `Survived` | bool | Survived or not |  either `0` or `1` | 56 | | `Pclass` | integer | Class of Travel | either `1`, `2` or `3` | 57 | | `Name` | string | Name of Passenger | `Braund, Mr. Owen Harris` | 58 | | `Sex` | string | Gender | either `male` or `female` | 59 | | `Age` | integer | Age of Passengers | `24` | 60 | | `SibSp` | integer | Number of Sibling/Spouse aboard | `0` | 61 | | `Parch` | integer | Number of Parent/Child aboard | `0` | 62 | | `Ticket` | string | Ticket number | `A/5 21171` | 63 | | `Fare` | float | Price of the ticket | `71.2833` | 64 | | `Cabin` | string | Cabin number | `C85` | 65 | | `Embarked` | string | The port in which a passenger has embarked | either `C` for Cherbourg, `S` for Southampton or `Q` for Queenstown | 66 | 67 | ## Opener usage 68 | 69 | The opener exposes 4 methods: 70 | 71 | - `get_data` returns all data 72 | - `fake_data` returns fake data 73 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/dataset/titanic_opener.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import string 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import substratools as tools 8 | 9 | 10 | class TitanicOpener(tools.Opener): 11 | def get_data(self, folders): 12 | # find csv files 13 | paths = [ 14 | os.path.join(folder, f) 15 | for folder in folders 16 | for f in os.listdir(folder) 17 | if f.endswith(".csv") 18 | ] 19 | 20 | # load data 21 | data = pd.concat([pd.read_csv(path) for path in paths]) 22 | 23 | return data 24 | 25 | def fake_data(self, n_samples=None): 26 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100 27 | 28 | data = { 29 | "PassengerId": list(range(N_SAMPLES)), 30 | "Survived": [random.choice([True, False]) for k in range(N_SAMPLES)], 31 | "Pclass": [random.choice([1, 2, 3]) for k in range(N_SAMPLES)], 32 | "Name": ["".join(random.sample(string.ascii_letters, 10)) for k in range(N_SAMPLES)], 33 | "Sex": [random.choice(["male", "female"]) for k in range(N_SAMPLES)], 34 | "Age": [random.choice(range(7, 77)) for k in range(N_SAMPLES)], 35 | "SibSp": [random.choice(range(4)) for k in range(N_SAMPLES)], 36 | "Parch": [random.choice(range(4)) for k in range(N_SAMPLES)], 37 | "Ticket": ["".join(random.sample(string.ascii_letters, 10)) for k in range(N_SAMPLES)], 38 | "Fare": [random.choice(np.arange(15, 150, 0.01)) for k in range(N_SAMPLES)], 39 | "Cabin": ["".join(random.sample(string.ascii_letters, 3)) for k in range(N_SAMPLES)], 40 | "Embarked": [random.choice(["C", "S", "Q"]) for k in range(N_SAMPLES)], 41 | } 42 | return pd.DataFrame(data) 43 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/function_random_forest/description.md: -------------------------------------------------------------------------------- 1 | # Titanic random forest 2 | 3 | Better performance through the usage of a Random Forest classifier. 4 | 5 | Based on Niklas Donges' article, [Predicting the Survival of Titanic Passengers](https://towardsdatascience.com/predicting-the-survival-of-titanic-passengers-30870ccc7e8) 6 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/function_random_forest/predict/Dockerfile: -------------------------------------------------------------------------------- 1 | # this base image works in both CPU and GPU enabled environments 2 | FROM python:3.12-slim 3 | 4 | # install dependencies 5 | RUN pip3 install pandas numpy 'scikit-learn==1.5.2' substratools 6 | 7 | # add your function script to docker image 8 | ADD titanic_function_rf.py . 9 | 10 | # define how script is run 11 | ENTRYPOINT ["python3", "titanic_function_rf.py", "--function-name", "predict"] 12 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/function_random_forest/titanic_function_rf.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import re 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import substratools as tools 7 | from sklearn.ensemble import RandomForestClassifier 8 | 9 | 10 | @tools.register 11 | def train(inputs, outputs, task_properties): 12 | X = inputs["datasamples"].drop(columns="Survived") 13 | y = inputs["datasamples"].Survived 14 | X = _normalize_X(X) 15 | 16 | # the following RFC hyperparameters were determined using: 17 | # >>> param_grid = {"criterion": ["gini", "entropy"], 18 | # "min_samples_leaf": [1, 5, 10, 25, 50, 70], 19 | # "min_samples_split": [2, 4, 10, 12, 16, 18, 25, 35], 20 | # "n_estimators": [100, 400, 700, 1000, 1500]} 21 | # >>> rf = RandomForestClassifier(n_estimators=100, max_features='auto', oob_score=True, 22 | # random_state=1, n_jobs=-1) 23 | # >>>,clf = GridSearchCV(estimator=rf, param_grid=param_grid, n_jobs=-1) 24 | 25 | # Random Forest 26 | random_forest = RandomForestClassifier( 27 | criterion="gini", 28 | min_samples_leaf=1, 29 | min_samples_split=10, 30 | n_estimators=100, 31 | oob_score=True, 32 | random_state=1, 33 | n_jobs=-1, 34 | ) 35 | random_forest.fit(X, y.values.ravel()) 36 | 37 | save_model(random_forest, outputs["model"]) 38 | 39 | 40 | @tools.register 41 | def predict(inputs, outputs, task_properties): 42 | X = inputs["datasamples"].drop(columns="Survived") 43 | model = load_model(inputs["models"]) 44 | X = _normalize_X(X) 45 | pred = _predict_pandas(model, X) 46 | 47 | save_predictions(pred, outputs["predictions"]) 48 | 49 | 50 | def _predict_pandas(model, X): 51 | y_pred = model.predict(X) 52 | return pd.DataFrame(columns=["Survived"], data=y_pred) 53 | 54 | 55 | def load_model(path): 56 | with open(path, "rb") as f: 57 | return pickle.load(f) 58 | 59 | 60 | def save_model(model, path): 61 | with open(path, "wb") as f: 62 | pickle.dump(model, f) 63 | 64 | 65 | def save_predictions(y_pred, path): 66 | y_pred.to_csv(path, index=False) 67 | 68 | 69 | def _normalize_X(X): 70 | # Relatives 71 | X["relatives"] = X["SibSp"] + X["Parch"] 72 | X.loc[X["relatives"] > 0, "not_alone"] = 0 73 | X.loc[X["relatives"] == 0, "not_alone"] = 1 74 | X["not_alone"] = X["not_alone"].astype(int) 75 | 76 | # Passenger ID 77 | X = X.drop(["PassengerId"], axis=1) 78 | 79 | # Cabin 80 | deck = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "U": 8} 81 | X["Cabin"] = X["Cabin"].fillna("U0") 82 | X["Deck"] = X["Cabin"].map(lambda x: re.compile("([a-zA-Z]+)").search(x).group()) 83 | X["Deck"] = X["Deck"].map(deck) 84 | X["Deck"] = X["Deck"].fillna(0) 85 | X["Deck"] = X["Deck"].astype(int) 86 | X = X.drop(["Cabin"], axis=1) 87 | 88 | # Age 89 | mean = X["Age"].mean() 90 | std = X["Age"].std() 91 | is_null = X["Age"].isnull().sum() 92 | 93 | # fill NaN values in Age column with mean 94 | age_slice = X["Age"].copy() 95 | age_slice[np.isnan(age_slice)] = mean 96 | X["Age"] = age_slice 97 | X["Age"] = X["Age"].astype(int) 98 | # make Age into a category 99 | X["Age"] = X["Age"].astype(int) 100 | X.loc[X["Age"] <= 11, "Age"] = 0 101 | X.loc[(X["Age"] > 11) & (X["Age"] <= 18), "Age"] = 1 102 | X.loc[(X["Age"] > 18) & (X["Age"] <= 22), "Age"] = 2 103 | X.loc[(X["Age"] > 22) & (X["Age"] <= 27), "Age"] = 3 104 | X.loc[(X["Age"] > 27) & (X["Age"] <= 33), "Age"] = 4 105 | X.loc[(X["Age"] > 33) & (X["Age"] <= 40), "Age"] = 5 106 | X.loc[(X["Age"] > 40) & (X["Age"] <= 66), "Age"] = 6 107 | X.loc[X["Age"] > 66, "Age"] = 6 108 | # create Age_Class feature 109 | X["Age_Class"] = X["Age"] * X["Pclass"] 110 | 111 | # Embarked 112 | ports = {"S": 0, "C": 1, "Q": 2} 113 | X["Embarked"] = X["Embarked"].fillna("S") 114 | X["Embarked"] = X["Embarked"].map(ports) 115 | 116 | # Fare 117 | X["Fare"] = X["Fare"].fillna(0) 118 | X["Fare"] = X["Fare"].astype(int) 119 | # make Fare into a category 120 | X.loc[X["Fare"] <= 7.91, "Fare"] = 0 121 | X.loc[(X["Fare"] > 7.91) & (X["Fare"] <= 14.454), "Fare"] = 1 122 | X.loc[(X["Fare"] > 14.454) & (X["Fare"] <= 31), "Fare"] = 2 123 | X.loc[(X["Fare"] > 31) & (X["Fare"] <= 99), "Fare"] = 3 124 | X.loc[(X["Fare"] > 99) & (X["Fare"] <= 250), "Fare"] = 4 125 | X.loc[X["Fare"] > 250, "Fare"] = 5 126 | X["Fare"] = X["Fare"].astype(int) 127 | # create Fare_Per_Person feature 128 | X["Fare_Per_Person"] = X["Fare"] / (X["relatives"] + 1) 129 | X["Fare_Per_Person"] = X["Fare_Per_Person"].astype(int) 130 | 131 | # Name 132 | titles = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5} 133 | # extract titles 134 | X["Title"] = X.Name.str.extract(r" ([A-Za-z]+)\.", expand=False) 135 | # replace titles with a more common title or as Rare 136 | X["Title"] = X["Title"].replace( 137 | [ 138 | "Lady", 139 | "Countess", 140 | "Capt", 141 | "Col", 142 | "Don", 143 | "Dr", 144 | "Major", 145 | "Rev", 146 | "Sir", 147 | "Jonkheer", 148 | "Dona", 149 | ], 150 | "Rare", 151 | ) 152 | X["Title"] = X["Title"].replace("Mlle", "Miss") 153 | X["Title"] = X["Title"].replace("Ms", "Miss") 154 | X["Title"] = X["Title"].replace("Mme", "Mrs") 155 | # convert titles into numbers 156 | X["Title"] = X["Title"].map(titles) 157 | # filling NaN with 0, to get safe 158 | X["Title"] = X["Title"].fillna(0) 159 | X = X.drop(["Name"], axis=1) 160 | 161 | # Sex 162 | genders = {"male": 0, "female": 1} 163 | X["Sex"] = X["Sex"].map(genders) 164 | 165 | # Ticket 166 | X = X.drop(["Ticket"], axis=1) 167 | 168 | # Drop non relevant features 169 | X = X.drop("not_alone", axis=1) 170 | X = X.drop("Parch", axis=1) 171 | 172 | return X 173 | 174 | 175 | if __name__ == "__main__": 176 | tools.execute() 177 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/function_random_forest/train/Dockerfile: -------------------------------------------------------------------------------- 1 | # this base image works in both CPU and GPU enabled environments 2 | FROM python:3.12-slim 3 | 4 | # install dependencies 5 | RUN pip3 install pandas numpy 'scikit-learn==1.5.2' substratools 6 | 7 | # add your function script to docker image 8 | ADD titanic_function_rf.py . 9 | 10 | # define how script is run 11 | ENTRYPOINT ["python3", "titanic_function_rf.py", "--function-name", "train"] 12 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/metric/Dockerfile: -------------------------------------------------------------------------------- 1 | # this base image works in both CPU and GPU enabled environments 2 | FROM python:3.12-slim 3 | 4 | # install dependencies 5 | RUN pip3 install pandas numpy 'scikit-learn==1.5.2' substratools 6 | 7 | # add your metrics script to docker image 8 | ADD titanic_metrics.py . 9 | 10 | # define how script is run 11 | ENTRYPOINT ["python3", "titanic_metrics.py", "--function-name", "score"] 12 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/metric/description.md: -------------------------------------------------------------------------------- 1 | # Titanic: Machine Learning From disaster 2 | 3 | *Taken from [the similarly named Kaggle challenge](https://www.kaggle.com/c/titanic/overview)* 4 | 5 | The sinking of the RMS Titanic is one of the most infamous shipwrecks in history. On April 15, 1912, during her maiden voyage, the Titanic sank after colliding with an iceberg, killing 1502 out of 2224 passengers and crew. This sensational tragedy shocked the international community and led to better safety regulations for ships. 6 | 7 | One of the reasons that the shipwreck led to such loss of life was that there were not enough lifeboats for the passengers and crew. Although there was some element of luck involved in surviving the sinking, some groups of people were more likely to survive than others, such as women, children, and the upper-class. 8 | 9 | In this challenge, we ask you to complete the analysis of what sorts of people were likely to survive. In particular, we ask you to apply the tools of machine learning to predict which passengers survived the tragedy. -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/metric/titanic_metrics.py: -------------------------------------------------------------------------------- 1 | import substratools as tools 2 | from sklearn.metrics import accuracy_score 3 | import pandas as pd 4 | 5 | 6 | @tools.register 7 | def score(inputs, outputs, task_properties): 8 | y_true = inputs["datasamples"].Survived.values 9 | y_pred = load_predictions(inputs["predictions"]) 10 | 11 | perf = accuracy_score(y_true, y_pred) 12 | tools.save_performance(perf, outputs["performance"]) 13 | 14 | 15 | def load_predictions(path): 16 | return pd.read_csv(path) 17 | 18 | 19 | if __name__ == "__main__": 20 | tools.execute() 21 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.9.2 2 | scikit-learn==1.5.2 3 | pandas==2.2.2 4 | substra 5 | substratools 6 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_0/data_sample_0.csv: -------------------------------------------------------------------------------- 1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 171,172,0,3,"Rice, Master. Arthur",male,4.0,4,1,382652,29.125,,Q 3 | 690,691,1,1,"Dick, Mr. Albert Adrian",male,31.0,1,0,17474,57.0,B20,S 4 | 225,226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22.0,0,0,PP 4348,9.35,,S 5 | 170,171,0,1,"Van der hoef, Mr. Wyckoff",male,61.0,0,0,111240,33.5,B19,S 6 | 528,529,0,3,"Salonen, Mr. Johan Werner",male,39.0,0,0,3101296,7.925,,S 7 | 50,51,0,3,"Panula, Master. Juha Niilo",male,7.0,4,1,3101295,39.6875,,S 8 | 435,436,1,1,"Carter, Miss. Lucile Polk",female,14.0,1,2,113760,120.0,B96 B98,S 9 | 483,484,1,3,"Turkula, Mrs. (Hedwig)",female,63.0,0,0,4134,9.5875,,S 10 | 817,818,0,2,"Mallet, Mr. Albert",male,31.0,1,1,S.C./PARIS 2079,37.0042,,C 11 | 592,593,0,3,"Elsbury, Mr. William James",male,47.0,0,0,A/5 3902,7.25,,S 12 | 637,638,0,2,"Collyer, Mr. Harvey",male,31.0,1,1,C.A. 31921,26.25,,S 13 | 840,841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20.0,0,0,SOTON/O2 3101287,7.925,,S 14 | 206,207,0,3,"Backstrom, Mr. Karl Alfred",male,32.0,1,0,3101278,15.85,,S 15 | 390,391,1,1,"Carter, Mr. William Ernest",male,36.0,1,2,113760,120.0,B96 B98,S 16 | 299,300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50.0,0,1,PC 17558,247.5208,B58 B60,C 17 | 551,552,0,2,"Sharp, Mr. Percival James R",male,27.0,0,0,244358,26.0,,S 18 | 416,417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34.0,1,1,28220,32.5,,S 19 | 575,576,0,3,"Patchett, Mr. George",male,19.0,0,0,358585,14.5,,S 20 | 544,545,0,1,"Douglas, Mr. Walter Donald",male,50.0,1,0,PC 17761,106.425,C86,C 21 | 576,577,1,2,"Garside, Miss. Ethel",female,34.0,0,0,243880,13.0,,S 22 | 670,671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40.0,1,1,29750,39.0,,S 23 | 238,239,0,2,"Pengelly, Mr. Frederick William",male,19.0,0,0,28665,10.5,,S 24 | 761,762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41.0,0,0,SOTON/O2 3101272,7.125,,S 25 | 526,527,1,2,"Ridsdale, Miss. Lucy",female,50.0,0,0,W./C. 14258,10.5,,S 26 | 60,61,0,3,"Sirayanian, Mr. Orsen",male,22.0,0,0,2669,7.2292,,C 27 | 609,610,1,1,"Shutes, Miss. Elizabeth W",female,40.0,0,0,PC 17582,153.4625,C125,S 28 | 661,662,0,3,"Badt, Mr. Mohamed",male,40.0,0,0,2623,7.225,,C 29 | 257,258,1,1,"Cherry, Miss. Gladys",female,30.0,0,0,110152,86.5,B77,S 30 | 533,534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,,C 31 | 545,546,0,1,"Nicholson, Mr. Arthur Ernest",male,64.0,0,0,693,26.0,,S 32 | 791,792,0,2,"Gaskell, Mr. Alfred",male,16.0,0,0,239865,26.0,,S 33 | 720,721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6.0,0,1,248727,33.0,,S 34 | 129,130,0,3,"Ekstrom, Mr. Johan",male,45.0,0,0,347061,6.975,,S 35 | 391,392,1,3,"Jansson, Mr. Carl Olof",male,21.0,0,0,350034,7.7958,,S 36 | 767,768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,,Q 37 | 782,783,0,1,"Long, Mr. Milton Clyde",male,29.0,0,0,113501,30.0,D6,S 38 | 598,599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,,C 39 | 345,346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24.0,0,0,248733,13.0,F33,S 40 | 81,82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29.0,0,0,345779,9.5,,S 41 | 718,719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,,Q 42 | 638,639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41.0,0,5,3101295,39.6875,,S 43 | 92,93,0,1,"Chaffee, Mr. Herbert Fuller",male,46.0,1,0,W.E.P. 5734,61.175,E31,S 44 | 374,375,0,3,"Palsson, Miss. Stina Viola",female,3.0,3,1,349909,21.075,,S 45 | 853,854,1,1,"Lines, Miss. Mary Conover",female,16.0,0,1,PC 17592,39.4,D28,S 46 | 622,623,1,3,"Nakid, Mr. Sahid",male,20.0,1,1,2653,15.7417,,C 47 | 133,134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29.0,1,0,228414,26.0,,S 48 | 362,363,0,3,"Barbara, Mrs. (Catherine David)",female,45.0,0,1,2691,14.4542,,C 49 | 815,816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0.0,B102,S 50 | 784,785,0,3,"Ali, Mr. William",male,25.0,0,0,SOTON/O.Q. 3101312,7.05,,S 51 | 368,369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,,Q 52 | 20,21,0,2,"Fynney, Mr. Joseph J",male,35.0,0,0,239865,26.0,,S 53 | 687,688,0,3,"Dakic, Mr. Branko",male,19.0,0,0,349228,10.1708,,S 54 | 641,642,1,1,"Sagesser, Mlle. Emma",female,24.0,0,0,PC 17477,69.3,B35,C 55 | 613,614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,,Q 56 | 626,627,0,2,"Kirkland, Rev. Charles Leonard",male,57.0,0,0,219533,12.35,,Q 57 | 799,800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30.0,1,1,345773,24.15,,S 58 | 515,516,0,1,"Walker, Mr. William Anderson",male,47.0,0,0,36967,34.0208,D46,S 59 | 870,871,0,3,"Balkic, Mr. Cerin",male,26.0,0,0,349248,7.8958,,S 60 | 356,357,1,1,"Bowerman, Miss. Elsie Edith",female,22.0,0,1,113505,55.0,E33,S 61 | 252,253,0,1,"Stead, Mr. William Thomas",male,62.0,0,0,113514,26.55,C87,S 62 | 130,131,0,3,"Drazenoic, Mr. Jozef",male,33.0,0,0,349241,7.8958,,C 63 | 166,167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55.0,E33,S 64 | 583,584,0,1,"Ross, Mr. John Hugo",male,36.0,0,0,13049,40.125,A10,C 65 | 721,722,0,3,"Jensen, Mr. Svend Lauritz",male,17.0,1,0,350048,7.0542,,S 66 | 542,543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11.0,4,2,347082,31.275,,S 67 | 707,708,1,1,"Calderhead, Mr. Edward Pennington",male,42.0,0,0,PC 17476,26.2875,E24,S 68 | 742,743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21.0,2,2,PC 17608,262.375,B57 B59 B63 B66,C 69 | 260,261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,,Q 70 | 22,23,1,3,"McGowan, Miss. Anna ""Annie""",female,15.0,0,0,330923,8.0292,,Q 71 | 694,695,0,1,"Weir, Col. John",male,60.0,0,0,113800,26.55,,S 72 | 773,774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,,C 73 | 70,71,0,2,"Jenkin, Mr. Stephen Curnow",male,32.0,0,0,C.A. 33111,10.5,,S 74 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_1/data_sample_1.csv: -------------------------------------------------------------------------------- 1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 352,353,0,3,"Elias, Mr. Tannous",male,15.0,1,1,2695,7.2292,,C 3 | 703,704,0,3,"Gallagher, Mr. Martin",male,25.0,0,0,36864,7.7417,,Q 4 | 210,211,0,3,"Ali, Mr. Ahmed",male,24.0,0,0,SOTON/O.Q. 3101311,7.05,,S 5 | 222,223,0,3,"Green, Mr. George Henry",male,51.0,0,0,21440,8.05,,S 6 | 213,214,0,2,"Givard, Mr. Hans Kristensen",male,30.0,0,0,250646,13.0,,S 7 | 7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S 8 | 367,368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C 9 | 647,648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56.0,0,0,13213,35.5,A26,C 10 | 875,876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15.0,0,0,2667,7.225,,C 11 | 442,443,0,3,"Petterson, Mr. Johan Emil",male,25.0,1,0,347076,7.775,,S 12 | 652,653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21.0,0,0,8475,8.4333,,S 13 | 109,110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,,Q 14 | 307,308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17.0,1,0,PC 17758,108.9,C65,C 15 | 254,255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41.0,0,2,370129,20.2125,,S 16 | 143,144,0,3,"Burke, Mr. Jeremiah",male,19.0,0,0,365222,6.75,,Q 17 | 104,105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37.0,2,0,3101276,7.925,,S 18 | 388,389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,,Q 19 | 754,755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48.0,1,2,220845,65.0,,S 20 | 475,476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52.0,A14,S 21 | 455,456,1,3,"Jalsevac, Mr. Ivan",male,29.0,0,0,349240,7.8958,,C 22 | 655,656,0,2,"Hickman, Mr. Leonard Mark",male,24.0,2,0,S.O.C. 14879,73.5,,S 23 | 35,36,0,1,"Holverson, Mr. Alexander Oskar",male,42.0,1,0,113789,52.0,,S 24 | 566,567,0,3,"Stoytcheff, Mr. Ilia",male,19.0,0,0,349205,7.8958,,S 25 | 804,805,1,3,"Hedman, Mr. Oskar Arvid",male,27.0,0,0,347089,6.975,,S 26 | 867,868,0,1,"Roebling, Mr. Washington Augustus II",male,31.0,0,0,PC 17590,50.4958,A24,S 27 | 822,823,0,1,"Reuchlin, Jonkheer. John George",male,38.0,0,0,19972,0.0,,S 28 | 141,142,1,3,"Nysten, Miss. Anna Sofia",female,22.0,0,0,347081,7.75,,S 29 | 800,801,0,2,"Ponesell, Mr. Martin",male,34.0,0,0,250647,13.0,,S 30 | 632,633,1,1,"Stahelin-Maeglin, Dr. Max",male,32.0,0,0,13214,30.5,B50,C 31 | 806,807,0,1,"Andrews, Mr. Thomas Jr",male,39.0,0,0,112050,0.0,A36,S 32 | 812,813,0,2,"Slemen, Mr. Richard James",male,35.0,0,0,28206,10.5,,S 33 | 525,526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,,Q 34 | 737,738,1,1,"Lesurer, Mr. Gustave J",male,35.0,0,0,PC 17755,512.3292,B101,C 35 | 508,509,0,3,"Olsen, Mr. Henry Margido",male,28.0,0,0,C 4001,22.525,,S 36 | 527,528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S 37 | 46,47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,,Q 38 | 406,407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51.0,0,0,347064,7.75,,S 39 | 777,778,1,3,"Emanuel, Miss. Virginia Ethel",female,5.0,0,0,364516,12.475,,S 40 | 836,837,0,3,"Pasic, Mr. Jakob",male,21.0,0,0,315097,8.6625,,S 41 | 265,266,0,2,"Reeves, Mr. David",male,36.0,0,0,C.A. 17248,10.5,,S 42 | 608,609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22.0,1,2,SC/Paris 2123,41.5792,,C 43 | 335,336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S 44 | 634,635,0,3,"Skoog, Miss. Mabel",female,9.0,3,2,347088,27.9,,S 45 | 195,196,1,1,"Lurette, Miss. Elise",female,58.0,0,0,PC 17569,146.5208,B80,C 46 | 127,128,1,3,"Madsen, Mr. Fridtjof Arne",male,24.0,0,0,C 17369,7.1417,,S 47 | 855,856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18.0,0,1,392091,9.35,,S 48 | 712,713,1,1,"Taylor, Mr. Elmer Zebley",male,48.0,1,0,19996,52.0,C126,S 49 | 219,220,0,2,"Harris, Mr. Walter",male,30.0,0,0,W/C 14208,10.5,,S 50 | 700,701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18.0,1,0,PC 17757,227.525,C62 C64,C 51 | 364,365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,,Q 52 | 665,666,0,2,"Hickman, Mr. Lewis",male,32.0,2,0,S.O.C. 14879,73.5,,S 53 | 102,103,0,1,"White, Mr. Richard Frasar",male,21.0,0,1,35281,77.2875,D26,S 54 | 305,306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S 55 | 743,744,0,3,"McNamee, Mr. Neal",male,24.0,1,0,376566,16.1,,S 56 | 752,753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33.0,0,0,345780,9.5,,S 57 | 878,879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S 58 | 763,764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36.0,1,2,113760,120.0,B96 B98,S 59 | 798,799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30.0,0,0,2685,7.2292,,C 60 | 883,884,0,2,"Banfield, Mr. Frederick James",male,28.0,0,0,C.A./SOTON 34068,10.5,,S 61 | 823,824,1,3,"Moor, Mrs. (Beila)",female,27.0,0,1,392096,12.475,E121,S 62 | 667,668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,,S 63 | 611,612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,,S 64 | 594,595,0,2,"Chapman, Mr. John Henry",male,37.0,1,0,SC/AH 29037,26.0,,S 65 | 491,492,0,3,"Windelov, Mr. Einar",male,21.0,0,0,SOTON/OQ 3101317,7.25,,S 66 | 248,249,1,1,"Beckwith, Mr. Richard Leonard",male,37.0,1,1,11751,52.5542,D35,S 67 | 772,773,0,2,"Mack, Mrs. (Mary)",female,57.0,0,0,S.O./P.P. 3,10.5,E77,S 68 | 563,564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,,S 69 | 550,551,1,1,"Thayer, Mr. John Borland Jr",male,17.0,0,2,17421,110.8833,C70,C 70 | 32,33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q 71 | 580,581,1,2,"Christy, Miss. Julie Rachel",female,25.0,1,1,237789,30.0,,S 72 | 708,709,1,1,"Cleaver, Miss. Alice",female,22.0,0,0,113781,151.55,,S 73 | 274,275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,,Q 74 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_2/data_sample_2.csv: -------------------------------------------------------------------------------- 1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 567,568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29.0,0,4,349909,21.075,,S 3 | 556,557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48.0,1,0,11755,39.6,A16,C 4 | 383,384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35.0,1,0,113789,52.0,,S 5 | 23,24,1,1,"Sloper, Mr. William Thompson",male,28.0,0,0,113788,35.5,A6,S 6 | 645,646,1,1,"Harper, Mr. Henry Sleeper",male,48.0,1,0,PC 17572,76.7292,D33,C 7 | 753,754,0,3,"Jonkoff, Mr. Lalio",male,23.0,0,0,349204,7.8958,,S 8 | 321,322,0,3,"Danoff, Mr. Yoto",male,27.0,0,0,349219,7.8958,,S 9 | 775,776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18.0,0,0,347078,7.75,,S 10 | 876,877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20.0,0,0,7534,9.8458,,S 11 | 434,435,0,1,"Silvey, Mr. William Baird",male,50.0,1,0,13507,55.9,E44,S 12 | 239,240,0,2,"Hunt, Mr. George Henry",male,33.0,0,0,SCO/W 1585,12.275,,S 13 | 885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.125,,Q 14 | 860,861,0,3,"Hansen, Mr. Claus Peter",male,41.0,2,0,350026,14.1083,,S 15 | 709,710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,,C 16 | 227,228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,,S 17 | 317,318,0,2,"Moraweck, Dr. Ernest",male,54.0,0,0,29011,14.0,,S 18 | 408,409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21.0,0,0,312992,7.775,,S 19 | 204,205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18.0,0,0,A/5 3540,8.05,,S 20 | 557,558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,,C 21 | 232,233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59.0,0,0,237442,13.5,,S 22 | 453,454,1,1,"Goldenberg, Mr. Samuel L",male,49.0,1,0,17453,89.1042,C92,C 23 | 181,182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,,C 24 | 180,181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S 25 | 618,619,1,2,"Becker, Miss. Marion Louise",female,4.0,2,1,230136,39.0,F4,S 26 | 319,320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40.0,1,1,16966,134.5,E34,C 27 | 602,603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,,S 28 | 478,479,0,3,"Karlsson, Mr. Nils August",male,22.0,0,0,350060,7.5208,,S 29 | 228,229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18.0,0,0,236171,13.0,,S 30 | 63,64,0,3,"Skoog, Master. Harald",male,4.0,3,2,347088,27.9,,S 31 | 851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.775,,S 32 | 220,221,1,3,"Sunderland, Mr. Victor Francis",male,16.0,0,0,SOTON/OQ 392089,8.05,,S 33 | 736,737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48.0,1,3,W./C. 6608,34.375,,S 34 | 486,487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35.0,1,0,19943,90.0,C93,S 35 | 535,536,1,2,"Hart, Miss. Eva Miriam",female,7.0,0,2,F.C.C. 13529,26.25,,S 36 | 643,644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,,S 37 | 571,572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53.0,2,0,11769,51.4792,C101,S 38 | 421,422,0,3,"Charters, Mr. David",male,21.0,0,0,A/5. 13032,7.7333,,Q 39 | 287,288,0,3,"Naidenoff, Mr. Penko",male,22.0,0,0,349206,7.8958,,S 40 | 859,860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C 41 | 283,284,1,3,"Dorking, Mr. Edward Arthur",male,19.0,0,0,A/5. 10482,8.05,,S 42 | 262,263,0,1,"Taussig, Mr. Emil",male,52.0,1,1,110413,79.65,E67,S 43 | 874,875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28.0,1,0,P/PP 3381,24.0,,C 44 | 193,194,1,2,"Navratil, Master. Michel M",male,3.0,1,1,230080,26.0,F2,S 45 | 351,352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35.0,C128,S 46 | 485,486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,,S 47 | 69,70,0,3,"Kink, Mr. Vincenz",male,26.0,2,0,315151,8.6625,,S 48 | 67,68,0,3,"Crease, Mr. Ernest James",male,19.0,0,0,S.P. 3464,8.1583,,S 49 | 182,183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9.0,4,2,347077,31.3875,,S 50 | 58,59,1,2,"West, Miss. Constance Mirium",female,5.0,1,2,C.A. 34651,27.75,,S 51 | 95,96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S 52 | 835,836,1,1,"Compton, Miss. Sara Rebecca",female,39.0,1,1,PC 17756,83.1583,E49,C 53 | 236,237,0,2,"Hold, Mr. Stephen",male,44.0,1,0,26707,26.0,,S 54 | 425,426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,,S 55 | 482,483,0,3,"Rouse, Mr. Richard Henry",male,50.0,0,0,A/5 3594,8.05,,S 56 | 108,109,0,3,"Rekic, Mr. Tido",male,38.0,0,0,349249,7.8958,,S 57 | 243,244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22.0,0,0,STON/O 2. 3101275,7.125,,S 58 | 560,561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,,Q 59 | 403,404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28.0,1,0,STON/O2. 3101279,15.85,,S 60 | 43,44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3.0,1,2,SC/Paris 2123,41.5792,,C 61 | 436,437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21.0,2,2,W./C. 6608,34.375,,S 62 | 234,235,0,2,"Leyson, Mr. Robert William Norman",male,24.0,0,0,C.A. 29566,10.5,,S 63 | 251,252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29.0,1,1,347054,10.4625,G6,S 64 | 201,202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S 65 | 26,27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,,C 66 | 329,330,1,1,"Hippach, Miss. Jean Gertrude",female,16.0,0,1,111361,57.9792,B18,C 67 | 623,624,0,3,"Hansen, Mr. Henry Damsgaard",male,21.0,0,0,350029,7.8542,,S 68 | 214,215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,,Q 69 | 829,830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62.0,0,0,113572,80.0,B28, 70 | 197,198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42.0,0,1,4579,8.4042,,S 71 | 468,469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q 72 | 372,373,0,3,"Beavan, Mr. William Thomas",male,19.0,0,0,323951,8.05,,S 73 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_3/data_sample_3.csv: -------------------------------------------------------------------------------- 1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 505,506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18.0,1,0,PC 17758,108.9,C65,C 3 | 411,412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,,Q 4 | 820,821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52.0,1,1,12749,93.5,B69,S 5 | 500,501,0,3,"Calic, Mr. Petar",male,17.0,0,0,315086,8.6625,,S 6 | 366,367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60.0,1,0,110813,75.25,D37,C 7 | 619,620,0,2,"Gavey, Mr. Lawrence",male,26.0,0,0,31028,10.5,,S 8 | 101,102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S 9 | 534,535,0,3,"Cacic, Miss. Marija",female,30.0,0,0,315084,8.6625,,S 10 | 168,169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,,S 11 | 834,835,0,3,"Allum, Mr. Owen George",male,18.0,0,0,2223,8.3,,S 12 | 18,19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31.0,1,0,345763,18.0,,S 13 | 449,450,1,1,"Peuchen, Major. Arthur Godfrey",male,52.0,0,0,113786,30.5,C104,S 14 | 651,652,1,2,"Doling, Miss. Elsie",female,18.0,0,1,231919,23.0,,S 15 | 509,510,1,3,"Lang, Mr. Fang",male,26.0,0,0,1601,56.4958,,S 16 | 471,472,0,3,"Cacic, Mr. Luka",male,38.0,0,0,315089,8.6625,,S 17 | 524,525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,,C 18 | 400,401,1,3,"Niskanen, Mr. Juha",male,39.0,0,0,STON/O 2. 3101289,7.925,,S 19 | 126,127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,,Q 20 | 470,471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,,S 21 | 324,325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S 22 | 614,615,0,3,"Brocklebank, Mr. William Alfred",male,35.0,0,0,364512,8.05,,S 23 | 740,741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30.0,D45,S 24 | 591,592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52.0,1,0,36947,78.2667,D20,C 25 | 543,544,1,2,"Beane, Mr. Edward",male,32.0,1,0,2908,26.0,,S 26 | 409,410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,,S 27 | 202,203,0,3,"Johanson, Mr. Jakob Alfred",male,34.0,0,0,3101264,6.4958,,S 28 | 386,387,0,3,"Goodwin, Master. Sidney Leonard",male,1.0,5,2,CA 2144,46.9,,S 29 | 805,806,0,3,"Johansson, Mr. Karl Johan",male,31.0,0,0,347063,7.775,,S 30 | 29,30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,,S 31 | 457,458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,D21,S 32 | 504,505,1,1,"Maioni, Miss. Roberta",female,16.0,0,0,110152,86.5,B79,S 33 | 56,57,1,2,"Rugg, Miss. Emily",female,21.0,0,0,C.A. 31026,10.5,,S 34 | 640,641,0,3,"Jensen, Mr. Hans Peder",male,20.0,0,0,350050,7.8542,,S 35 | 738,739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,,S 36 | 833,834,0,3,"Augustsson, Mr. Albert",male,23.0,0,0,347468,7.8542,,S 37 | 801,802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31.0,1,1,C.A. 31921,26.25,,S 38 | 270,271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31.0,,S 39 | 221,222,0,2,"Bracken, Mr. James H",male,27.0,0,0,220367,13.0,,S 40 | 607,608,1,1,"Daniel, Mr. Robert Williams",male,27.0,0,0,113804,30.5,,S 41 | 477,478,0,3,"Braund, Mr. Lewis Richard",male,29.0,1,0,3460,7.0458,,S 42 | 589,590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,,S 43 | 476,477,0,2,"Renouf, Mr. Peter Henry",male,34.0,1,0,31027,21.0,,S 44 | 17,18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13.0,,S 45 | 856,857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45.0,1,1,36928,164.8667,,S 46 | 14,15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14.0,0,0,350406,7.8542,,S 47 | 418,419,0,2,"Matthews, Mr. William John",male,30.0,0,0,28228,13.0,,S 48 | 624,625,0,3,"Bowen, Mr. David John ""Dai""",male,21.0,0,0,54636,16.1,,S 49 | 669,670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52.0,C126,S 50 | 813,814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6.0,4,2,347082,31.275,,S 51 | 697,698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,,Q 52 | 178,179,0,2,"Hale, Mr. Reginald",male,30.0,0,0,250653,13.0,,S 53 | 446,447,1,2,"Mellinger, Miss. Madeleine Violet",female,13.0,0,1,250644,19.5,,S 54 | 890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q 55 | 348,349,1,3,"Coutts, Master. William Loch ""William""",male,3.0,1,1,C.A. 37671,15.9,,S 56 | 460,461,1,1,"Anderson, Mr. Harry",male,48.0,0,0,19952,26.55,E12,S 57 | 864,865,0,2,"Gill, Mr. John William",male,24.0,0,0,233866,13.0,,S 58 | 207,208,1,3,"Albimona, Mr. Nassef Cassem",male,26.0,0,0,2699,18.7875,,C 59 | 282,283,0,3,"de Pelsmaeker, Mr. Alfons",male,16.0,0,0,345778,9.5,,S 60 | 185,186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50.0,A32,S 61 | 499,500,0,3,"Svensson, Mr. Olof",male,24.0,0,0,350035,7.7958,,S 62 | 746,747,0,3,"Abbott, Mr. Rossmore Edward",male,16.0,1,1,C.A. 2673,20.25,,S 63 | 451,452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,,S 64 | 188,189,0,3,"Bourke, Mr. John",male,40.0,1,1,364849,15.5,,Q 65 | 531,532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,,C 66 | 438,439,0,1,"Fortune, Mr. Mark",male,64.0,1,4,19950,263.0,C23 C25 C27,S 67 | 674,675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0.0,,S 68 | 518,519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36.0,1,0,226875,26.0,,S 69 | 103,104,0,3,"Johansson, Mr. Gustaf Joel",male,33.0,0,0,7540,8.6542,,S 70 | 759,760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33.0,0,0,110152,86.5,B77,S 71 | 779,780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43.0,0,1,24160,211.3375,B3,S 72 | 354,355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C 73 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_4/data_sample_4.csv: -------------------------------------------------------------------------------- 1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 393,394,1,1,"Newell, Miss. Marjorie",female,23.0,1,0,35273,113.275,D36,C 3 | 606,607,0,3,"Karaic, Mr. Milan",male,30.0,0,0,349246,7.8958,,S 4 | 384,385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,,S 5 | 520,521,1,1,"Perreault, Miss. Anne",female,30.0,0,0,12749,93.5,B73,S 6 | 136,137,1,1,"Newsom, Miss. Helen Monypeny",female,19.0,0,2,11752,26.2833,D47,S 7 | 679,680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36.0,0,1,PC 17755,512.3292,B51 B53 B55,C 8 | 816,817,0,3,"Heininen, Miss. Wendla Maria",female,23.0,0,0,STON/O2. 3101290,7.925,,S 9 | 830,831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15.0,1,0,2659,14.4542,,C 10 | 858,859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24.0,0,3,2666,19.2583,,C 11 | 44,45,1,3,"Devaney, Miss. Margaret Delia",female,19.0,0,0,330958,7.8792,,Q 12 | 240,241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C 13 | 64,65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C 14 | 343,344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25.0,0,0,244361,13.0,,S 15 | 552,553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q 16 | 140,141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C 17 | 377,378,0,1,"Widener, Mr. Harry Elkins",male,27.0,0,2,113503,211.5,C82,C 18 | 124,125,0,1,"White, Mr. Percival Wayland",male,54.0,0,1,35281,77.2875,D26,S 19 | 392,393,0,3,"Gustafsson, Mr. Johan Birger",male,28.0,2,0,3101277,7.925,,S 20 | 229,230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,,S 21 | 330,331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q 22 | 114,115,0,3,"Attalah, Miss. Malake",female,17.0,0,0,2627,14.4583,,C 23 | 749,750,0,3,"Connaghton, Mr. Michael",male,31.0,0,0,335097,7.75,,Q 24 | 347,348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,,S 25 | 532,533,0,3,"Elias, Mr. Joseph Jr",male,17.0,1,1,2690,7.2292,,C 26 | 111,112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C 27 | 41,42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27.0,1,0,11668,21.0,,S 28 | 497,498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,,S 29 | 790,791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,,Q 30 | 871,872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47.0,1,1,11751,52.5542,D35,S 31 | 285,286,0,3,"Stankovic, Mr. Ivan",male,33.0,0,0,349239,8.6625,,C 32 | 291,292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19.0,1,0,11967,91.0792,B49,C 33 | 555,556,0,1,"Wright, Mr. George",male,62.0,0,0,113807,26.55,,S 34 | 121,122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,,S 35 | 271,272,1,3,"Tornquist, Mr. William Henry",male,25.0,0,0,LINE,0.0,,S 36 | 75,76,0,3,"Moen, Mr. Sigurd Hansen",male,25.0,0,0,348123,7.65,F G73,S 37 | 267,268,1,3,"Persson, Mr. Ernst Ulrik",male,25.0,1,0,347083,7.775,,S 38 | 780,781,1,3,"Ayoub, Miss. Banoura",female,13.0,0,0,2687,7.2292,,C 39 | 781,782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17.0,1,0,17474,57.0,B20,S 40 | 811,812,0,3,"Lester, Mr. James",male,39.0,0,0,A/4 48871,24.15,,S 41 | 733,734,0,2,"Berriman, Mr. William John",male,23.0,0,0,28425,13.0,,S 42 | 91,92,0,3,"Andreasson, Mr. Paul Edvin",male,20.0,0,0,347466,7.8542,,S 43 | 151,152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22.0,1,0,113776,66.6,C2,S 44 | 496,497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54.0,1,0,36947,78.2667,D20,C 45 | 125,126,1,3,"Nicola-Yarred, Master. Elias",male,12.0,1,0,2651,11.2417,,C 46 | 861,862,0,2,"Giles, Mr. Frederick Edward",male,21.0,1,0,28134,11.5,,S 47 | 839,840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C47,C 48 | 123,124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13.0,E101,S 49 | 776,777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,F38,Q 50 | 573,574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,,Q 51 | 272,273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41.0,0,1,250644,19.5,,S 52 | 596,597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33.0,,S 53 | 730,731,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S 54 | 0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S 55 | 660,661,1,1,"Frauenthal, Dr. Henry William",male,50.0,2,0,PC 17611,133.65,,S 56 | 223,224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,,S 57 | 666,667,0,2,"Butler, Mr. Reginald Fenton",male,25.0,0,0,234686,13.0,,S 58 | 273,274,0,1,"Natsch, Mr. Charles H",male,37.0,0,1,PC 17596,29.7,C118,C 59 | 786,787,1,3,"Sjoblom, Miss. Anna Sofia",female,18.0,0,0,3101265,7.4958,,S 60 | 702,703,0,3,"Barbara, Miss. Saiide",female,18.0,0,1,2691,14.4542,,C 61 | 417,418,1,2,"Silven, Miss. Lyyli Karoliina",female,18.0,0,2,250652,13.0,,S 62 | 615,616,1,2,"Herman, Miss. Alice",female,24.0,1,2,220845,65.0,,S 63 | 11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.55,C103,S 64 | 788,789,1,3,"Dean, Master. Bertram Vere",male,1.0,1,2,C.A. 2315,20.575,,S 65 | 484,485,1,1,"Bishop, Mr. Dickinson H",male,25.0,1,0,11967,91.0792,B49,C 66 | 160,161,0,3,"Cribb, Mr. John Hatfield",male,44.0,0,1,371362,16.1,,S 67 | 128,129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,F E69,C 68 | 137,138,0,1,"Futrelle, Mr. Jacques Heath",male,37.0,1,0,113803,53.1,C123,S 69 | 841,842,0,2,"Mudd, Mr. Thomas Charles",male,16.0,0,0,S.O./P.P. 3,10.5,,S 70 | 331,332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,C124,S 71 | 766,767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,,C 72 | 769,770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32.0,0,0,8471,8.3625,,S 73 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_5/data_sample_5.csv: -------------------------------------------------------------------------------- 1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 691,692,1,3,"Karun, Miss. Manca",female,4.0,0,1,349256,13.4167,,C 3 | 292,293,0,2,"Levy, Mr. Rene Jacques",male,36.0,0,0,SC/Paris 2163,12.875,D,C 4 | 657,658,0,3,"Bourke, Mrs. John (Catherine)",female,32.0,1,1,364849,15.5,,Q 5 | 621,622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42.0,1,0,11753,52.5542,D19,S 6 | 293,294,0,3,"Haas, Miss. Aloisia",female,24.0,0,0,349236,8.85,,S 7 | 760,761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,,S 8 | 714,715,0,2,"Greenberg, Mr. Samuel",male,52.0,0,0,250647,13.0,,S 9 | 888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S 10 | 680,681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,,Q 11 | 422,423,0,3,"Zimmerman, Mr. Leo",male,29.0,0,0,315082,7.875,,S 12 | 159,160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S 13 | 793,794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,,C 14 | 845,846,0,3,"Abbing, Mr. Anthony",male,42.0,0,0,C.A. 5547,7.55,,S 15 | 24,25,0,3,"Palsson, Miss. Torborg Danira",female,8.0,3,1,349909,21.075,,S 16 | 821,822,1,3,"Lulic, Mr. Nikola",male,27.0,0,0,315098,8.6625,,S 17 | 578,579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,,C 18 | 439,440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31.0,0,0,C.A. 18723,10.5,,S 19 | 172,173,1,3,"Johnson, Miss. Eleanor Ileen",female,1.0,1,1,347742,11.1333,,S 20 | 33,34,0,2,"Wheadon, Mr. Edward H",male,66.0,0,0,C.A. 24579,10.5,,S 21 | 419,420,0,3,"Van Impe, Miss. Catharina",female,10.0,0,2,345773,24.15,,S 22 | 346,347,1,2,"Smith, Miss. Marion Elsie",female,40.0,0,0,31418,13.0,,S 23 | 868,869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,,S 24 | 138,139,0,3,"Osen, Mr. Olaf Elon",male,16.0,0,0,7534,9.2167,,S 25 | 581,582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39.0,1,1,17421,110.8833,C68,C 26 | 279,280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35.0,1,1,C.A. 2673,20.25,,S 27 | 218,219,1,1,"Bazzani, Miss. Albina",female,32.0,0,0,11813,76.2917,D15,C 28 | 203,204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C 29 | 258,259,1,1,"Ward, Miss. Anna",female,35.0,0,0,PC 17755,512.3292,,C 30 | 522,523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,,C 31 | 76,77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,,S 32 | 514,515,0,3,"Coleff, Mr. Satio",male,24.0,0,0,349209,7.4958,,S 33 | 832,833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,,C 34 | 429,430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32.0,0,0,SOTON/O.Q. 392078,8.05,E10,S 35 | 686,687,0,3,"Panula, Mr. Jaako Arnold",male,14.0,4,1,3101295,39.6875,,S 36 | 169,170,0,3,"Ling, Mr. Lee",male,28.0,0,0,1601,56.4958,,S 37 | 844,845,0,3,"Culumovic, Mr. Jeso",male,17.0,0,0,315090,8.6625,,S 38 | 231,232,0,3,"Larsson, Mr. Bengt Edvin",male,29.0,0,0,347067,7.775,,S 39 | 456,457,0,1,"Millet, Mr. Francis Davis",male,65.0,0,0,13509,26.55,E38,S 40 | 837,838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,,S 41 | 448,449,1,3,"Baclini, Miss. Marie Catherine",female,5.0,2,1,2666,19.2583,,C 42 | 604,605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35.0,0,0,111426,26.55,,C 43 | 704,705,0,3,"Hansen, Mr. Henrik Juul",male,26.0,1,0,350025,7.8542,,S 44 | 212,213,0,3,"Perkin, Mr. John Henry",male,22.0,0,0,A/5 21174,7.25,,S 45 | 247,248,1,2,"Hamalainen, Mrs. William (Anna)",female,24.0,0,2,250649,14.5,,S 46 | 382,383,0,3,"Tikkanen, Mr. Juho",male,32.0,0,0,STON/O 2. 3101293,7.925,,S 47 | 802,803,1,1,"Carter, Master. William Thornton II",male,11.0,1,2,113760,120.0,B96 B98,S 48 | 90,91,0,3,"Christmann, Mr. Emil",male,29.0,0,0,343276,8.05,,S 49 | 306,307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,,C 50 | 394,395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24.0,0,2,PP 9549,16.7,G6,S 51 | 318,319,1,1,"Wick, Miss. Mary Natalie",female,31.0,0,2,36928,164.8667,C7,S 52 | 472,473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33.0,1,2,C.A. 34651,27.75,,S 53 | 629,630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,,Q 54 | 177,178,0,1,"Isham, Miss. Ann Elizabeth",female,50.0,0,0,PC 17595,28.7125,C49,C 55 | 847,848,0,3,"Markoff, Mr. Marin",male,35.0,0,0,349213,7.8958,,C 56 | 51,52,0,3,"Nosworthy, Mr. Richard Cater",male,21.0,0,0,A/4. 39886,7.8,,S 57 | 734,735,0,2,"Troupiansky, Mr. Moses Aaron",male,23.0,0,0,233639,13.0,,S 58 | 810,811,0,3,"Alexander, Mr. William",male,26.0,0,0,3474,7.8875,,S 59 | 25,26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38.0,1,5,347077,31.3875,,S 60 | 280,281,0,3,"Duane, Mr. Frank",male,65.0,0,0,336439,7.75,,Q 61 | 664,665,1,3,"Lindqvist, Mr. Eino William",male,20.0,1,0,STON/O 2. 3101285,7.925,,S 62 | 646,647,0,3,"Cor, Mr. Liudevit",male,19.0,0,0,349231,7.8958,,S 63 | 857,858,1,1,"Daly, Mr. Peter Denis ",male,51.0,0,0,113055,26.55,E17,S 64 | 200,201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28.0,0,0,345770,9.5,,S 65 | 96,97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C 66 | 269,270,1,1,"Bissette, Miss. Amelia",female,35.0,0,0,PC 17760,135.6333,C99,S 67 | 636,637,0,3,"Leinonen, Mr. Antti Gustaf",male,32.0,0,0,STON/O 2. 3101292,7.925,,S 68 | 365,366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30.0,0,0,C 7076,7.25,,S 69 | 4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S 70 | 378,379,0,3,"Betros, Mr. Tannous",male,20.0,0,0,2648,4.0125,,C 71 | 454,455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S 72 | 807,808,0,3,"Pettersson, Miss. Ellen Natalia",female,18.0,0,0,347087,7.775,,S 73 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_6/data_sample_6.csv: -------------------------------------------------------------------------------- 1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 826,827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,,S 3 | 739,740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,,S 4 | 717,718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27.0,0,0,34218,10.5,E101,S 5 | 506,507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33.0,0,2,26360,26.0,,S 6 | 684,685,0,2,"Brown, Mr. Thomas William Solomon",male,60.0,1,1,29750,39.0,,S 7 | 765,766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51.0,1,0,13502,77.9583,D11,S 8 | 650,651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,,S 9 | 161,162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40.0,0,0,C.A. 33595,15.75,,S 10 | 412,413,1,1,"Minahan, Miss. Daisy E",female,33.0,1,0,19928,90.0,C78,Q 11 | 838,839,1,3,"Chip, Mr. Chang",male,32.0,0,0,1601,56.4958,,S 12 | 568,569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,,C 13 | 77,78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,,S 14 | 889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C 15 | 464,465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,,S 16 | 373,374,0,1,"Ringhini, Mr. Sante",male,22.0,0,0,PC 17760,135.6333,,C 17 | 689,690,1,1,"Madill, Miss. Georgette Alexandra",female,15.0,0,1,24160,211.3375,B5,S 18 | 332,333,0,1,"Graham, Mr. George Edward",male,38.0,0,1,PC 17582,153.4625,C91,S 19 | 685,686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25.0,1,2,SC/Paris 2123,41.5792,,C 20 | 110,111,0,1,"Porter, Mr. Walter Chamberlain",male,47.0,0,0,110465,52.0,C110,S 21 | 487,488,0,1,"Kent, Mr. Edward Austin",male,58.0,0,0,11771,29.7,B37,C 22 | 530,531,1,2,"Quick, Miss. Phyllis May",female,2.0,1,1,26360,26.0,,S 23 | 173,174,0,3,"Sivola, Mr. Antti Wilhelm",male,21.0,0,0,STON/O 2. 3101280,7.925,,S 24 | 144,145,0,2,"Andrew, Mr. Edgardo Samuel",male,18.0,0,0,231945,11.5,,S 25 | 711,712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,C124,S 26 | 135,136,0,2,"Richard, Mr. Emile",male,23.0,0,0,SC/PARIS 2133,15.0458,,C 27 | 148,149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26.0,F2,S 28 | 423,424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28.0,1,1,347080,14.4,,S 29 | 163,164,0,3,"Calic, Mr. Jovo",male,17.0,0,0,315093,8.6625,,S 30 | 395,396,0,3,"Johansson, Mr. Erik",male,22.0,0,0,350052,7.7958,,S 31 | 290,291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26.0,0,0,19877,78.85,,S 32 | 561,562,0,3,"Sivic, Mr. Husein",male,40.0,0,0,349251,7.8958,,S 33 | 398,399,0,2,"Pain, Dr. Alfred",male,23.0,0,0,244278,10.5,,S 34 | 474,475,0,3,"Strandberg, Miss. Ida Sofia",female,22.0,0,0,7553,9.8375,,S 35 | 397,398,0,2,"McKane, Mr. Peter David",male,46.0,0,0,28403,26.0,,S 36 | 735,736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,,S 37 | 308,309,0,2,"Abelson, Mr. Samuel",male,30.0,1,0,P/PP 3381,24.0,,C 38 | 627,628,1,1,"Longley, Miss. Gretchen Fiske",female,21.0,0,0,13502,77.9583,D9,S 39 | 174,175,0,1,"Smith, Mr. James Clinch",male,56.0,0,0,17764,30.6958,A7,C 40 | 87,88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,,S 41 | 750,751,1,2,"Wells, Miss. Joan",female,4.0,1,1,29103,23.0,,S 42 | 156,157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16.0,0,0,35851,7.7333,,Q 43 | 338,339,1,3,"Dahl, Mr. Karl Edwart",male,45.0,0,0,7598,8.05,,S 44 | 167,168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45.0,1,4,347088,27.9,,S 45 | 827,828,1,2,"Mallet, Master. Andre",male,1.0,0,2,S.C./PARIS 2079,37.0042,,C 46 | 315,316,1,3,"Nilsson, Miss. Helmina Josefina",female,26.0,0,0,347470,7.8542,,S 47 | 873,874,0,3,"Vander Cruyssen, Mr. Victor",male,47.0,0,0,345765,9.0,,S 48 | 501,502,0,3,"Canavan, Miss. Mary",female,21.0,0,0,364846,7.75,,Q 49 | 6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S 50 | 323,324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22.0,1,1,248738,29.0,,S 51 | 183,184,1,2,"Becker, Master. Richard F",male,1.0,2,1,230136,39.0,F4,S 52 | 131,132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20.0,0,0,SOTON/O.Q. 3101307,7.05,,S 53 | 493,494,0,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C 54 | 863,864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S 55 | 681,682,1,1,"Hassab, Mr. Hammad",male,27.0,0,0,PC 17572,76.7292,D49,C 56 | 369,370,1,1,"Aubart, Mme. Leontine Pauline",female,24.0,0,0,PC 17477,69.3,B35,C 57 | 447,448,1,1,"Seward, Mr. Frederic Kimber",male,34.0,0,0,113794,26.55,,S 58 | 728,729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25.0,1,0,236853,26.0,,S 59 | 399,400,1,2,"Trout, Mrs. William H (Jessie L)",female,28.0,0,0,240929,12.65,,S 60 | 341,342,1,1,"Fortune, Miss. Alice Elizabeth",female,24.0,3,2,19950,263.0,C23 C25 C27,S 61 | 120,121,0,2,"Hickman, Mr. Stanley George",male,21.0,2,0,S.O.C. 14879,73.5,,S 62 | 380,381,1,1,"Bidois, Miss. Rosalie",female,42.0,0,0,PC 17757,227.525,,C 63 | 433,434,0,3,"Kallio, Mr. Nikolai Erland",male,17.0,0,0,STON/O 2. 3101274,7.125,,S 64 | 441,442,0,3,"Hampe, Mr. Leon",male,20.0,0,0,345769,9.5,,S 65 | 672,673,0,2,"Mitchell, Mr. Henry Michael",male,70.0,0,0,C.A. 24580,10.5,,S 66 | 756,757,0,3,"Carlsson, Mr. August Sigfrid",male,28.0,0,0,350042,7.7958,,S 67 | 326,327,0,3,"Nysveen, Mr. Johan Hansen",male,61.0,0,0,345364,6.2375,,S 68 | 139,140,0,1,"Giglio, Mr. Victor",male,24.0,0,0,PC 17593,79.2,B86,C 69 | 371,372,0,3,"Wiklund, Mr. Jakob Alfred",male,18.0,1,0,3101267,6.4958,,S 70 | 605,606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36.0,1,0,349910,15.55,,S 71 | 86,87,0,3,"Ford, Mr. William Neal",male,16.0,1,3,W./C. 6608,34.375,,S 72 | 357,358,0,2,"Funk, Miss. Annie Clemmer",female,38.0,0,0,237671,13.0,,S 73 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_7/data_sample_7.csv: -------------------------------------------------------------------------------- 1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 671,672,0,1,"Davidson, Mr. Thornton",male,31.0,1,0,F.C. 12750,52.0,B71,S 3 | 94,95,0,3,"Coxon, Mr. Daniel",male,59.0,0,0,364500,7.25,,S 4 | 255,256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29.0,0,2,2650,15.2458,,C 5 | 770,771,0,3,"Lievens, Mr. Rene Aime",male,24.0,0,0,345781,9.5,,S 6 | 381,382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1.0,0,2,2653,15.7417,,C 7 | 510,511,1,3,"Daly, Mr. Eugene Patrick",male,29.0,0,0,382651,7.75,,Q 8 | 344,345,0,2,"Fox, Mr. Stanley Hubert",male,36.0,0,0,229236,13.0,,S 9 | 502,503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,,Q 10 | 432,433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42.0,1,0,SC/AH 3085,26.0,,S 11 | 74,75,1,3,"Bing, Mr. Lee",male,32.0,0,0,1601,56.4958,,S 12 | 426,427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28.0,1,0,2003,26.0,,S 13 | 748,749,0,1,"Marvin, Mr. Daniel Warner",male,19.0,1,0,113773,53.1,D30,S 14 | 115,116,0,3,"Pekoniemi, Mr. Edvard",male,21.0,0,0,STON/O 2. 3101294,7.925,,S 15 | 569,570,1,3,"Jonsson, Mr. Carl",male,32.0,0,0,350417,7.8542,,S 16 | 884,885,0,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.05,,S 17 | 513,514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54.0,1,0,PC 17603,59.4,,C 18 | 88,89,1,1,"Fortune, Miss. Mabel Helen",female,23.0,3,2,19950,263.0,C23 C25 C27,S 19 | 517,518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,,Q 20 | 62,63,0,1,"Harris, Mr. Henry Birkhardt",male,45.0,1,0,36973,83.475,C83,S 21 | 673,674,1,2,"Wilhelms, Mr. Charles",male,31.0,0,0,244270,13.0,,S 22 | 701,702,1,1,"Silverthorne, Mr. Spencer Victor",male,35.0,0,0,PC 17475,26.2875,E24,S 23 | 459,460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,,Q 24 | 538,539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,,S 25 | 751,752,1,3,"Moor, Master. Meier",male,6.0,0,1,392096,12.475,E121,S 26 | 277,278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0.0,,S 27 | 831,832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S 28 | 414,415,1,3,"Sundman, Mr. Johan Julian",male,44.0,0,0,STON/O 2. 3101269,7.925,,S 29 | 241,242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q 30 | 536,537,0,1,"Butt, Major. Archibald Willingham",male,45.0,0,0,113050,26.55,B38,S 31 | 803,804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C 32 | 631,632,0,3,"Lundahl, Mr. Johan Svensson",male,51.0,0,0,347743,7.0542,,S 33 | 516,517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34.0,0,0,C.A. 34260,10.5,F33,S 34 | 158,159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,,S 35 | 119,120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2.0,4,2,347082,31.275,,S 36 | 599,600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49.0,1,0,PC 17485,56.9292,A20,C 37 | 28,29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q 38 | 342,343,0,2,"Collander, Mr. Erik Gustaf",male,28.0,0,0,248740,13.0,,S 39 | 100,101,0,3,"Petranec, Miss. Matilda",female,28.0,0,0,349245,7.8958,,S 40 | 116,117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q 41 | 582,583,0,2,"Downton, Mr. William James",male,54.0,0,0,28403,26.0,,S 42 | 625,626,0,1,"Sutton, Mr. Frederick",male,61.0,0,0,36963,32.3208,D50,S 43 | 370,371,1,1,"Harder, Mr. George Achilles",male,25.0,1,0,11765,55.4417,E50,C 44 | 431,432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,,S 45 | 89,90,0,3,"Celotti, Mr. Francesco",male,24.0,0,0,343275,8.05,,S 46 | 676,677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,,S 47 | 706,707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45.0,0,0,223596,13.5,,S 48 | 233,234,1,3,"Asplund, Miss. Lillian Gertrud",female,5.0,4,2,347077,31.3875,,S 49 | 620,621,0,3,"Yasbeck, Mr. Antoni",male,27.0,1,0,2659,14.4542,,C 50 | 696,697,0,3,"Kelly, Mr. James",male,44.0,0,0,363592,8.05,,S 51 | 413,414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0.0,,S 52 | 99,100,0,2,"Kantor, Mr. Sinai",male,34.0,1,0,244367,26.0,,S 53 | 275,276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63.0,1,0,13502,77.9583,D7,S 54 | 658,659,0,2,"Eitemiller, Mr. George Floyd",male,23.0,0,0,29751,13.0,,S 55 | 250,251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S 56 | 678,679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43.0,1,6,CA 2144,46.9,,S 57 | 113,114,0,3,"Jussila, Miss. Katriina",female,20.0,1,0,4136,9.825,,S 58 | 297,298,0,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S 59 | 849,850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C92,C 60 | 179,180,0,3,"Leonard, Mr. Lionel",male,36.0,0,0,LINE,0.0,,S 61 | 466,467,0,2,"Campbell, Mr. William",male,,0,0,239853,0.0,,S 62 | 774,775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54.0,1,3,29105,23.0,,S 63 | 437,438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24.0,2,3,29106,18.75,,S 64 | 865,866,1,2,"Bystrom, Mrs. (Karolina)",female,42.0,0,0,236852,13.0,,S 65 | 73,74,0,3,"Chronopoulos, Mr. Apostolos",male,26.0,1,0,2680,14.4542,,C 66 | 385,386,0,2,"Davies, Mr. Charles Henry",male,18.0,0,0,S.O.C. 14879,73.5,,S 67 | 235,236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,,S 68 | 211,212,1,2,"Cameron, Miss. Clear Annie",female,35.0,0,0,F.C.C. 13528,21.0,,S 69 | 713,714,0,3,"Larsson, Mr. August Viktor",male,29.0,0,0,7545,9.4833,,S 70 | 572,573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36.0,0,0,PC 17474,26.3875,E25,S 71 | 722,723,0,2,"Gillespie, Mr. William Henry",male,34.0,0,0,12233,13.0,,S 72 | 355,356,0,3,"Vanden Steen, Mr. Leo Peter",male,28.0,0,0,345783,9.5,,S 73 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_8/data_sample_8.csv: -------------------------------------------------------------------------------- 1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 862,863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48.0,0,0,17466,25.9292,D17,S 3 | 825,826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,,Q 4 | 886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S 5 | 444,445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S 6 | 42,43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C 7 | 288,289,1,2,"Hosono, Mr. Masabumi",male,42.0,0,0,237798,13.0,,S 8 | 541,542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9.0,4,2,347082,31.275,,S 9 | 5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q 10 | 724,725,1,1,"Chambers, Mr. Norman Campbell",male,27.0,1,0,113806,53.1,E8,S 11 | 276,277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45.0,0,0,347073,7.75,,S 12 | 877,878,0,3,"Petroff, Mr. Nedelio",male,19.0,0,0,349212,7.8958,,S 13 | 539,540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22.0,0,2,13568,49.5,B39,C 14 | 597,598,0,3,"Johnson, Mr. Alfred",male,49.0,0,0,LINE,0.0,,S 15 | 649,650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23.0,0,0,CA. 2314,7.55,,S 16 | 570,571,1,2,"Harris, Mr. George",male,62.0,0,0,S.W./PP 752,10.5,,S 17 | 850,851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4.0,4,2,347082,31.275,,S 18 | 249,250,0,2,"Carter, Rev. Ernest Courtenay",male,54.0,1,0,244252,26.0,,S 19 | 217,218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42.0,1,0,243847,27.0,,S 20 | 601,602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,,S 21 | 199,200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24.0,0,0,248747,13.0,,S 22 | 843,844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C 23 | 512,513,1,1,"McGough, Mr. James Robert",male,36.0,0,0,PC 17473,26.2875,E25,S 24 | 662,663,0,1,"Colley, Mr. Edward Pomeroy",male,47.0,0,0,5727,25.5875,E58,S 25 | 783,784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,,S 26 | 410,411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,,S 27 | 693,694,0,3,"Saad, Mr. Khalil",male,25.0,0,0,2672,7.225,,C 28 | 208,209,1,3,"Carr, Miss. Helen ""Ellen""",female,16.0,0,0,367231,7.75,,Q 29 | 142,143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24.0,1,0,STON/O2. 3101279,15.85,,S 30 | 230,231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35.0,1,0,36973,83.475,C83,S 31 | 145,146,0,2,"Nicholls, Mr. Joseph Charles",male,19.0,1,1,C.A. 33112,36.75,,S 32 | 430,431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28.0,0,0,110564,26.55,C52,S 33 | 311,312,1,1,"Ryerson, Miss. Emily Borie",female,18.0,2,2,PC 17608,262.375,B57 B59 B63 B66,C 34 | 55,56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S 35 | 153,154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,,S 36 | 656,657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,,S 37 | 445,446,1,1,"Dodge, Master. Washington",male,4.0,0,2,33638,81.8583,A34,S 38 | 48,49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,,C 39 | 745,746,0,1,"Crosby, Capt. Edward Gifford",male,70.0,1,1,WE/P 5735,71.0,B22,S 40 | 587,588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60.0,1,1,13567,79.2,B41,C 41 | 147,148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9.0,2,2,W./C. 6608,34.375,,S 42 | 688,689,0,3,"Fischer, Mr. Eberhard Thelander",male,18.0,0,0,350036,7.7958,,S 43 | 309,310,1,1,"Francatelli, Miss. Laura Mabel",female,30.0,0,0,PC 17485,56.9292,E36,C 44 | 479,480,1,3,"Hirvonen, Miss. Hildur E",female,2.0,0,1,3101298,12.2875,,S 45 | 302,303,0,3,"Johnson, Mr. William Cahoone Jr",male,19.0,0,0,LINE,0.0,,S 46 | 253,254,0,3,"Lobb, Mr. William Arthur",male,30.0,1,0,A/5. 3336,16.1,,S 47 | 149,150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42.0,0,0,244310,13.0,,S 48 | 313,314,0,3,"Hendekovic, Mr. Ignjac",male,28.0,0,0,349243,7.8958,,S 49 | 549,550,1,2,"Davies, Master. John Morgan Jr",male,8.0,1,1,C.A. 33112,36.75,,S 50 | 339,340,0,1,"Blackwell, Mr. Stephen Weart",male,45.0,0,0,113784,35.5,T,S 51 | 47,48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q 52 | 747,748,1,2,"Sinkkonen, Miss. Anna",female,30.0,0,0,250648,13.0,,S 53 | 21,22,1,2,"Beesley, Mr. Lawrence",male,34.0,0,0,248698,13.0,D56,S 54 | 879,880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56.0,0,1,11767,83.1583,C50,C 55 | 635,636,1,2,"Davis, Miss. Mary",female,28.0,0,0,237668,13.0,,S 56 | 52,53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49.0,1,0,PC 17572,76.7292,D33,C 57 | 705,706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39.0,0,0,250655,26.0,,S 58 | 559,560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36.0,1,0,345572,17.4,,S 59 | 854,855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44.0,1,0,244252,26.0,,S 60 | 648,649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,,S 61 | 358,359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,,Q 62 | 134,135,0,2,"Sobey, Mr. Samuel James Hayden",male,25.0,0,0,C.A. 29178,13.0,,S 63 | 303,304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,E101,Q 64 | 146,147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27.0,0,0,350043,7.7958,,S 65 | 316,317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24.0,1,0,244367,26.0,,S 66 | 565,566,0,3,"Davies, Mr. Alfred J",male,24.0,2,0,A/4 48871,24.15,,S 67 | 846,847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S 68 | 420,421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C 69 | 284,285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26.0,A19,S 70 | 376,377,1,3,"Landergren, Miss. Aurora Adelia",female,22.0,0,0,C 7077,7.25,,S 71 | 320,321,0,3,"Dennis, Mr. Samuel",male,22.0,0,0,A/5 21172,7.25,,S 72 | 379,380,0,3,"Gustafsson, Mr. Karl Gideon",male,19.0,0,0,347069,7.775,,S 73 | -------------------------------------------------------------------------------- /docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_9/data_sample_9.csv: -------------------------------------------------------------------------------- 1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 107,108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,,S 3 | 19,20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C 4 | 187,188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45.0,0,0,111428,26.55,,S 5 | 298,299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S 6 | 469,470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C 7 | 53,54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29.0,1,0,2926,26.0,,S 8 | 523,524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44.0,0,1,111361,57.9792,B18,C 9 | 83,84,0,1,"Carrau, Mr. Francisco M",male,28.0,0,0,113059,47.1,,S 10 | 881,882,0,3,"Markun, Mr. Johann",male,33.0,0,0,349257,7.8958,,S 11 | 328,329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31.0,1,1,363291,20.525,,S 12 | 404,405,0,3,"Oreskovic, Miss. Marija",female,20.0,0,0,315096,8.6625,,S 13 | 726,727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30.0,3,0,31027,21.0,,S 14 | 184,185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4.0,0,2,315153,22.025,,S 15 | 164,165,0,3,"Panula, Master. Eino Viljami",male,1.0,4,1,3101295,39.6875,,S 16 | 278,279,0,3,"Rice, Master. Eric",male,7.0,4,1,382652,29.125,,Q 17 | 480,481,0,3,"Goodwin, Master. Harold Victor",male,9.0,5,2,CA 2144,46.9,,S 18 | 617,618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26.0,1,0,A/5. 3336,16.1,,S 19 | 428,429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,,Q 20 | 628,629,0,3,"Bostandyeff, Mr. Guentcho",male,26.0,0,0,349224,7.8958,,S 21 | 37,38,0,3,"Cann, Mr. Ernest Charles",male,21.0,0,0,A./5. 2152,8.05,,S 22 | 584,585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,,C 23 | 548,549,0,3,"Goldsmith, Mr. Frank John",male,33.0,1,1,363291,20.525,,S 24 | 2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S 25 | 268,269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58.0,0,1,PC 17582,153.4625,C125,S 26 | 244,245,0,3,"Attalah, Mr. Sleiman",male,30.0,0,0,2694,7.225,,C 27 | 695,696,0,2,"Chapman, Mr. Charles Henry",male,52.0,0,0,248731,13.5,,S 28 | 795,796,0,2,"Otter, Mr. Richard",male,39.0,0,0,28213,13.0,,S 29 | 797,798,1,3,"Osman, Mrs. Mara",female,31.0,0,0,349244,8.6833,,S 30 | 209,210,1,1,"Blank, Mr. Henry",male,40.0,0,0,112277,31.0,A31,C 31 | 337,338,1,1,"Burns, Miss. Elizabeth Margaret",female,41.0,0,0,16966,134.5,E40,C 32 | 547,548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C 33 | 574,575,0,3,"Rush, Mr. Alfred George John",male,16.0,0,0,A/4. 20589,8.05,,S 34 | 85,86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33.0,3,0,3101278,15.85,,S 35 | 511,512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,,S 36 | 603,604,0,3,"Torber, Mr. Ernst William",male,44.0,0,0,364511,8.05,,S 37 | 558,559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39.0,1,1,110413,79.65,E67,S 38 | 256,257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C 39 | 590,591,0,3,"Rintamaki, Mr. Matti",male,35.0,0,0,STON/O 2. 3101273,7.125,,S 40 | 353,354,0,3,"Arnold-Franchi, Mr. Josef",male,25.0,1,0,349237,17.8,,S 41 | 401,402,0,3,"Adams, Mr. John",male,26.0,0,0,341826,8.05,,S 42 | 80,81,0,3,"Waelens, Mr. Achille",male,22.0,0,0,345767,9.0,,S 43 | 809,810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33.0,1,0,113806,53.1,E8,S 44 | 757,758,0,2,"Bailey, Mr. Percy Andrew",male,18.0,0,0,29108,11.5,,S 45 | 644,645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C 46 | 727,728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,,Q 47 | 154,155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S 48 | 216,217,1,3,"Honkanen, Miss. Eliina",female,27.0,0,0,STON/O2. 3101283,7.925,,S 49 | 118,119,0,1,"Baxter, Mr. Quigg Edmond",male,24.0,0,1,PC 17558,247.5208,B58 B60,C 50 | 263,264,0,1,"Harrison, Mr. William",male,40.0,0,0,112059,0.0,B94,S 51 | 463,464,0,2,"Milling, Mr. Jacob Christian",male,48.0,0,0,234360,13.0,,S 52 | 741,742,0,1,"Cavendish, Mr. Tyrell William",male,36.0,1,0,19877,78.85,C46,S 53 | 677,678,1,3,"Turja, Miss. Anna Sofia",female,18.0,0,0,4138,9.8417,,S 54 | 564,565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,,S 55 | 333,334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16.0,2,0,345764,18.0,,S 56 | 198,199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,,Q 57 | 887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S 58 | 600,601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24.0,2,1,243847,27.0,,S 59 | 189,190,0,3,"Turcin, Mr. Stjepan",male,36.0,0,0,349247,7.8958,,S 60 | 31,32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C 61 | 521,522,0,3,"Vovk, Mr. Janko",male,22.0,0,0,349252,7.8958,,S 62 | 852,853,0,3,"Boulos, Miss. Nourelain",female,9.0,1,1,2678,15.2458,,C 63 | 610,611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39.0,1,5,347082,31.275,,S 64 | 45,46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,,S 65 | 546,547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19.0,1,0,2908,26.0,,S 66 | 117,118,0,2,"Turpin, Mr. William John Robert",male,29.0,1,0,11668,21.0,,S 67 | 577,578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39.0,1,0,13507,55.9,E44,S 68 | 732,733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0.0,,S 69 | 65,66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,,C 70 | 768,769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,,Q 71 | 259,260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50.0,0,1,230433,26.0,,S 72 | 360,361,0,3,"Skoog, Mr. Wilhelm",male,40.0,1,4,347088,27.9,,S 73 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/get_started/torch_fedavg_assets/dataset/description.md: -------------------------------------------------------------------------------- 1 | # Mnist 2 | 3 | This dataset is [THE MNIST DATABASE of handwritten digits](http://yann.lecun.com/exdb/mnist/). It is download from torchvision. 4 | 5 | The target is the number (0 -> 9) represented by the pixels. 6 | 7 | ## Data repartition 8 | 9 | ### Train and test 10 | 11 | ### Split data between organizations 12 | 13 | ## Opener usage 14 | 15 | The opener exposes 2 methods: 16 | 17 | - `get_data` returns a dictionary containing the images and the labels as numpy arrays 18 | - `fake_data` returns a fake data sample of images and labels in a dict 19 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/get_started/torch_fedavg_assets/dataset/mnist_dataset.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import os 3 | import sys 4 | import pathlib 5 | 6 | import numpy as np 7 | from torchvision.datasets import MNIST 8 | 9 | 10 | def get_int(b: bytes) -> int: 11 | return int(codecs.encode(b, "hex"), 16) 12 | 13 | 14 | def MNISTraw2numpy(path: str, strict: bool = True) -> np.array: 15 | # read 16 | with open(path, "rb") as f: 17 | data = f.read() 18 | # parse 19 | magic = get_int(data[0:4]) 20 | nd = magic % 256 21 | assert 1 <= nd <= 3 22 | numpy_type = np.uint8 23 | s = [get_int(data[4 * (i + 1) : 4 * (i + 2)]) for i in range(nd)] 24 | 25 | num_bytes_per_value = np.iinfo(numpy_type).bits // 8 26 | # The MNIST format uses the big endian byte order. If the system uses little endian byte order by default, 27 | # we need to reverse the bytes before we can read them with np.frombuffer(). 28 | needs_byte_reversal = sys.byteorder == "little" and num_bytes_per_value > 1 29 | parsed = np.frombuffer(bytearray(data), dtype=numpy_type, offset=(4 * (nd + 1))) 30 | if needs_byte_reversal: 31 | parsed = parsed.flip(0) 32 | 33 | assert parsed.shape[0] == np.prod(s) or not strict 34 | return parsed.reshape(*s) 35 | 36 | 37 | def setup_mnist(data_path, N_CLIENTS): 38 | raw_path = pathlib.Path(data_path) / "MNIST" / "raw" 39 | 40 | # Download the dataset 41 | MNIST(data_path, download=True) 42 | 43 | # Extract numpy arrays from raw data 44 | train_images = MNISTraw2numpy(str(raw_path / "train-images-idx3-ubyte")) 45 | train_labels = MNISTraw2numpy(str(raw_path / "train-labels-idx1-ubyte")) 46 | test_images = MNISTraw2numpy(str(raw_path / "t10k-images-idx3-ubyte")) 47 | test_labels = MNISTraw2numpy(str(raw_path / "t10k-labels-idx1-ubyte")) 48 | 49 | # Split arrays into the number of organizations 50 | train_images_folds = np.split(train_images, N_CLIENTS) 51 | train_labels_folds = np.split(train_labels, N_CLIENTS) 52 | test_images_folds = np.split(test_images, N_CLIENTS) 53 | test_labels_folds = np.split(test_labels, N_CLIENTS) 54 | 55 | # Save splits in different folders to simulate the different organizations 56 | for i in range(N_CLIENTS): 57 | 58 | # Save train dataset on each org 59 | os.makedirs(str(data_path / f"org_{i+1}/train"), exist_ok=True) 60 | filename = data_path / f"org_{i+1}/train/train_images.npy" 61 | np.save(str(filename), train_images_folds[i]) 62 | filename = data_path / f"org_{i+1}/train/train_labels.npy" 63 | np.save(str(filename), train_labels_folds[i]) 64 | 65 | # Save test dataset on each org 66 | os.makedirs(str(data_path / f"org_{i+1}/test"), exist_ok=True) 67 | filename = data_path / f"org_{i+1}/test/test_images.npy" 68 | np.save(str(filename), test_images_folds[i]) 69 | filename = data_path / f"org_{i+1}/test/test_labels.npy" 70 | np.save(str(filename), test_labels_folds[i]) 71 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/get_started/torch_fedavg_assets/dataset/mnist_opener.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import numpy as np 3 | import substratools as tools 4 | 5 | 6 | class MnistOpener(tools.Opener): 7 | def fake_data(self, n_samples=None): 8 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100 9 | 10 | fake_images = np.random.randint(256, size=(N_SAMPLES, 28, 28)) 11 | 12 | fake_labels = np.random.randint(10, size=N_SAMPLES) 13 | 14 | data = {"images": fake_images, "labels": fake_labels} 15 | 16 | return data 17 | 18 | def get_data(self, folders): 19 | # get npy files 20 | p = pathlib.Path(folders[0]) 21 | images_data_path = p / list(p.glob("*_images.npy"))[0] 22 | labels_data_path = p / list(p.glob("*_labels.npy"))[0] 23 | 24 | # load data 25 | data = { 26 | "images": np.load(images_data_path), 27 | "labels": np.load(labels_data_path), 28 | } 29 | 30 | return data 31 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/get_started/torch_fedavg_assets/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.9.2 2 | numpy==2.1.1 3 | pandas==2.2.2 4 | scikit-learn==1.5.2 5 | substrafl 6 | torch==2.4.1 7 | torchvision==0.19.1 8 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/diabetes_substrafl_assets/dataset/diabetes_substrafl_dataset.py: -------------------------------------------------------------------------------- 1 | from sklearn.datasets import load_diabetes 2 | import pandas as pd 3 | import pathlib 4 | 5 | 6 | def setup_diabetes(data_path: pathlib.Path): 7 | raw_data = load_diabetes(scaled=False) 8 | 9 | description_file = data_path / "description.md" 10 | description_file.touch() 11 | description_file.write_text(raw_data.DESCR) 12 | 13 | dataset = pd.DataFrame(data=raw_data.data, columns=raw_data.feature_names) 14 | # map the "sex" column to categorical data 15 | dataset["sex"] = dataset["sex"].replace({1: "M", 2: "F"}).astype("category") 16 | 17 | # Create folders for both organisations 18 | (data_path / "org_1").mkdir(exist_ok=True) 19 | (data_path / "org_2").mkdir(exist_ok=True) 20 | 21 | # Split the dataset in two uneven parts 22 | split_index = int(len(dataset) * 2 / 3) 23 | dataset.iloc[:split_index].to_csv(data_path / "org_1" / "data.csv", index=False) 24 | dataset.iloc[split_index:].to_csv(data_path / "org_2" / "data.csv", index=False) 25 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/diabetes_substrafl_assets/dataset/diabetes_substrafl_opener.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import numpy as np 3 | import pandas as pd 4 | import substratools as tools 5 | 6 | 7 | class DiabetesOpener(tools.Opener): 8 | def fake_data(self, n_samples=None): 9 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100 10 | 11 | features = ["age", "sex", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"] 12 | return pd.DataFrame(data=np.random.random((N_SAMPLES, len(features))), columns=features) 13 | 14 | def get_data(self, folders): 15 | return pd.read_csv(next(pathlib.Path(folders[0]).glob("*.csv")), dtype={"sex": "category"}) 16 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/diabetes_substrafl_assets/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.9.2 2 | scikit-learn==1.5.2 3 | numpy==2.1.1 4 | pandas==2.2.2 5 | substrafl 6 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/sklearn_fedavg_assets/dataset/description.md: -------------------------------------------------------------------------------- 1 | # Iris 2 | 3 | The [IRIS dataset](https://archive.ics.uci.edu/ml/datasets/iris) is perhaps the best known database to be found in the pattern recognition literature. Fisher's paper is a classic in the field and is referenced frequently to this day. (See Duda & Hart, for example.) The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other. 4 | 5 | It is downloaded using Sickit-Learn. 6 | 7 | ## Opener usage 8 | 9 | The opener exposes 2 methods: 10 | 11 | - `get_data` returns a dictionary containing containing the images and the labels as numpy arrays 12 | - `fake_data` returns a fake data sample of images and labels in a dict 13 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/sklearn_fedavg_assets/dataset/iris_dataset.py: -------------------------------------------------------------------------------- 1 | from sklearn import datasets 2 | import numpy as np 3 | import os 4 | 5 | 6 | def setup_iris(data_path: os.PathLike, n_client: int): 7 | 8 | iris = datasets.load_iris() 9 | 10 | len_iris = len(iris.data) 11 | 12 | index_iris = np.arange(len_iris) 13 | 14 | np.random.shuffle(index_iris) 15 | train_index = index_iris[: int(0.8 * len_iris)] 16 | test_index = index_iris[int(0.8 * len_iris) :] 17 | 18 | train_data = np.array(iris.data)[train_index] 19 | train_targets = np.array(iris.target)[train_index] 20 | test_data = np.array(iris.data)[test_index] 21 | test_targets = np.array(iris.target)[test_index] 22 | 23 | # Split array into the number of organization 24 | train_data_folds = np.split(train_data, n_client) 25 | train_targets_folds = np.split(train_targets, n_client) 26 | test_data_folds = np.split(test_data, n_client) 27 | test_targets_folds = np.split(test_targets, n_client) 28 | 29 | # Save splits in different folders to simulate the different organization 30 | for i in range(n_client): 31 | 32 | # Save train dataset on each org 33 | os.makedirs(str(data_path / f"org_{i+1}/train"), exist_ok=True) 34 | filename = data_path / f"org_{i+1}/train/train_data.npy" 35 | np.save(str(filename), train_data_folds[i]) 36 | filename = data_path / f"org_{i+1}/train/train_targets.npy" 37 | np.save(str(filename), train_targets_folds[i]) 38 | 39 | # Save test dataset on each org 40 | os.makedirs(str(data_path / f"org_{i+1}/test"), exist_ok=True) 41 | filename = data_path / f"org_{i+1}/test/test_data.npy" 42 | np.save(str(filename), test_data_folds[i]) 43 | filename = data_path / f"org_{i+1}/test/test_targets.npy" 44 | np.save(str(filename), test_targets_folds[i]) 45 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/sklearn_fedavg_assets/dataset/iris_opener.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import numpy as np 3 | import substratools as tools 4 | 5 | 6 | class IrisOpener(tools.Opener): 7 | def fake_data(self, n_samples=None): 8 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100 9 | 10 | fake_data = np.random.rand(8, size=(N_SAMPLES, 4)) 11 | 12 | fake_targets = np.random.randint(3, size=N_SAMPLES) 13 | 14 | data = {"images": fake_data, "labels": fake_targets} 15 | 16 | return data 17 | 18 | def get_data(self, folders): 19 | # get npy files 20 | p = pathlib.Path(folders[0]) 21 | images_data_path = p / list(p.glob("*_data.npy"))[0] 22 | labels_data_path = p / list(p.glob("*_targets.npy"))[0] 23 | 24 | # load data 25 | data = {"data": np.load(images_data_path), "targets": np.load(labels_data_path)} 26 | 27 | return data 28 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/sklearn_fedavg_assets/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.9.2 2 | numpy==2.1.1 3 | pandas==2.2.2 4 | scikit-learn==1.5.2 5 | substrafl -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/torch_cyclic_assets/dataset/cyclic_mnist_dataset.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | import os 3 | import sys 4 | import pathlib 5 | 6 | import numpy as np 7 | from torchvision.datasets import MNIST 8 | 9 | 10 | def get_int(b: bytes) -> int: 11 | return int(codecs.encode(b, "hex"), 16) 12 | 13 | 14 | def MNISTraw2numpy(path: str, strict: bool = True) -> np.array: 15 | # read 16 | with open(path, "rb") as f: 17 | data = f.read() 18 | # parse 19 | magic = get_int(data[0:4]) 20 | nd = magic % 256 21 | assert 1 <= nd <= 3 22 | numpy_type = np.uint8 23 | s = [get_int(data[4 * (i + 1) : 4 * (i + 2)]) for i in range(nd)] 24 | 25 | num_bytes_per_value = np.iinfo(numpy_type).bits // 8 26 | # The MNIST format uses the big endian byte order. If the system uses little endian byte order by default, 27 | # we need to reverse the bytes before we can read them with np.frombuffer(). 28 | needs_byte_reversal = sys.byteorder == "little" and num_bytes_per_value > 1 29 | parsed = np.frombuffer(bytearray(data), dtype=numpy_type, offset=(4 * (nd + 1))) 30 | if needs_byte_reversal: 31 | parsed = parsed.flip(0) 32 | 33 | assert parsed.shape[0] == np.prod(s) or not strict 34 | return parsed.reshape(*s) 35 | 36 | 37 | def setup_mnist(data_path, N_CLIENTS): 38 | raw_path = pathlib.Path(data_path) / "MNIST" / "raw" 39 | 40 | # Download the dataset 41 | MNIST(data_path, download=True) 42 | 43 | # Extract numpy arrays from raw data 44 | train_images = MNISTraw2numpy(str(raw_path / "train-images-idx3-ubyte")) 45 | train_labels = MNISTraw2numpy(str(raw_path / "train-labels-idx1-ubyte")) 46 | test_images = MNISTraw2numpy(str(raw_path / "t10k-images-idx3-ubyte")) 47 | test_labels = MNISTraw2numpy(str(raw_path / "t10k-labels-idx1-ubyte")) 48 | 49 | # Split arrays into the number of organizations 50 | train_images_folds = np.array_split(train_images, N_CLIENTS) 51 | train_labels_folds = np.array_split(train_labels, N_CLIENTS) 52 | test_images_folds = np.array_split(test_images, N_CLIENTS) 53 | test_labels_folds = np.array_split(test_labels, N_CLIENTS) 54 | 55 | # Save splits in different folders to simulate the different organizations 56 | for i in range(N_CLIENTS): 57 | # Save train dataset on each org 58 | os.makedirs(str(data_path / f"org_{i+1}/train"), exist_ok=True) 59 | filename = data_path / f"org_{i+1}/train/train_images.npy" 60 | np.save(str(filename), train_images_folds[i]) 61 | filename = data_path / f"org_{i+1}/train/train_labels.npy" 62 | np.save(str(filename), train_labels_folds[i]) 63 | 64 | # Save test dataset on each org 65 | os.makedirs(str(data_path / f"org_{i+1}/test"), exist_ok=True) 66 | filename = data_path / f"org_{i+1}/test/test_images.npy" 67 | np.save(str(filename), test_images_folds[i]) 68 | filename = data_path / f"org_{i+1}/test/test_labels.npy" 69 | np.save(str(filename), test_labels_folds[i]) 70 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/torch_cyclic_assets/dataset/cyclic_mnist_opener.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import numpy as np 3 | import substratools as tools 4 | 5 | 6 | class MnistOpener(tools.Opener): 7 | def fake_data(self, n_samples=None): 8 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100 9 | 10 | fake_images = np.random.randint(256, size=(N_SAMPLES, 28, 28)) 11 | 12 | fake_labels = np.random.randint(10, size=N_SAMPLES) 13 | 14 | data = {"images": fake_images, "labels": fake_labels} 15 | 16 | return data 17 | 18 | def get_data(self, folders): 19 | # get npy files 20 | p = pathlib.Path(folders[0]) 21 | images_data_path = p / list(p.glob("*_images.npy"))[0] 22 | labels_data_path = p / list(p.glob("*_labels.npy"))[0] 23 | 24 | # load data 25 | data = { 26 | "images": np.load(images_data_path), 27 | "labels": np.load(labels_data_path), 28 | } 29 | 30 | return data 31 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/torch_cyclic_assets/dataset/description.md: -------------------------------------------------------------------------------- 1 | # Mnist 2 | 3 | This dataset is [THE MNIST DATABASE of handwritten digits](http://yann.lecun.com/exdb/mnist/). It is download from torchvision. 4 | 5 | The target is the number (0 -> 9) represented by the pixels. 6 | 7 | ## Data repartition 8 | 9 | ### Train and test 10 | 11 | ### Split data between organizations 12 | 13 | ## Opener usage 14 | 15 | The opener exposes 2 methods: 16 | 17 | - `get_data` returns a dictionary containing the images and the labels as numpy arrays 18 | - `fake_data` returns a fake data sample of images and labels in a dict 19 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/go_further/torch_cyclic_assets/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.9.2 2 | numpy==2.1.1 3 | pandas==2.2.2 4 | scikit-learn==1.5.2 5 | substrafl 6 | torch==2.4.1 7 | torchvision==0.19.1 8 | -------------------------------------------------------------------------------- /docs/source/examples/substrafl/index.rst: -------------------------------------------------------------------------------- 1 | SubstraFL examples 2 | ================== 3 | 4 | The examples below are compatible with SubstraFL |substrafl_version|. 5 | 6 | 7 | Example to get started using the PyTorch interface 8 | ************************************************** 9 | 10 | .. nbgallery:: 11 | get_started/run_mnist_torch.ipynb 12 | 13 | Example to go further 14 | ********************* 15 | 16 | .. nbgallery:: 17 | go_further/run_iris_sklearn.ipynb 18 | go_further/run_diabetes_substrafl.ipynb 19 | go_further/run_mnist_cyclic.ipynb 20 | -------------------------------------------------------------------------------- /docs/source/how-to/deploying-substra/howto/existing-volumes.rst: -------------------------------------------------------------------------------- 1 | *************************** 2 | How-to use existing volumes 3 | *************************** 4 | 5 | By default, Substra instanciates PersistentVolumeClaims (PVCs) on the fly, which is generally convenient; but this might not suit your needs, in which case Substra can also use preexisting PVCs rather than make new ones. 6 | 7 | For example, you could make a copy of each volume from a Substra deployment and then create a new one configured to use the copies -- thus making a clone of the original instance. 8 | 9 | .. note:: 10 | Substra will still instanciate PVCs on the fly! But this is only for moving data around during jobs, so the instanciated PVCs can generally be ignored and should not be backed up. 11 | 12 | Backend values: 13 | 14 | .. code-block:: yaml 15 | 16 | server: 17 | persistence: 18 | servermedias: 19 | existingClaim: "serverPVC" 20 | postgresql: 21 | primary: 22 | persistence: 23 | existingClaim: "psqlPVC" 24 | redis: 25 | master: 26 | persistence: 27 | existingClaim: "redisPVC" 28 | docker-registry: 29 | persistence: 30 | existingClaim: "registryPVC" 31 | minio: 32 | persistence: 33 | existingClaim: "minioPVC" 34 | 35 | Orchestrator values (in standalone mode, which is the default): 36 | 37 | .. code-block:: yaml 38 | 39 | postgresql: 40 | primary: 41 | persistence: 42 | existingClaim: "orcpsqlPVC" -------------------------------------------------------------------------------- /docs/source/how-to/deploying-substra/howto/external-database.rst: -------------------------------------------------------------------------------- 1 | .. _ops howto external database: 2 | 3 | ******************************* 4 | How-to use an external database 5 | ******************************* 6 | 7 | By default, Substra components use their own integrated postgres databases (one per backend, and one for the orchestrator in centralized mode). 8 | 9 | They can be pointed to any PostgreSQL instance (version 11 or better). 10 | 11 | The backend and orchestrator use the same structure in their values: 12 | 13 | .. code-block:: yaml 14 | 15 | database: 16 | host: my.db.com 17 | port: 5432 18 | 19 | auth: 20 | username: my-username 21 | password: my-password 22 | database: my-substra-db 23 | 24 | Or, for improved security, you can create a secret with your database credentials, under the ``DATABASE_PASSWORD`` and ``DATABASE_USERNAME`` keys. Secrets can be `made very secure `_ but this is the basic example: 25 | 26 | .. code-block:: yaml 27 | 28 | apiVersion: v1 29 | kind: Secret 30 | metadata: 31 | name: my-db-secret 32 | stringData: 33 | DATABASE_PASSWORD: my-password 34 | DATABASE_USERNAME: my-username 35 | 36 | And then point to it in the values, instead of using username & password: 37 | 38 | .. code-block:: yaml 39 | 40 | database: 41 | host: my.db.com 42 | port: 5432 43 | 44 | auth: 45 | database: my-substra-db 46 | credentialsSecretName: my-db-secret -------------------------------------------------------------------------------- /docs/source/how-to/deploying-substra/howto/sso-oidc.rst: -------------------------------------------------------------------------------- 1 | ************************************* 2 | How-to set up SSO with OpenID Connect 3 | ************************************* 4 | 5 | Substra supports external user management through OpenID Connect (OIDC). It is done per-backend, so each organization can use their own user provider and cohabit on the same network. 6 | 7 | OIDC users cannot be created as admins: you'll still need to create at least one admin account as normal, through ``addAccountOperator.users``. OIDC users will all be assigned to a single channel. 8 | 9 | 10 | Set up the provider 11 | =================== 12 | 13 | First, set up an OIDC client at an identity provider (IP -- or OpenID provider, OP). 14 | 15 | The only claims Substra needs are ``openid email``, which any provider should be able to support. Allow ``/oidc/callback`` as a redirect URI. 16 | 17 | Get your **provider URL**. Appending ``/.well-known/openid-configuration`` to this URL should return a JSON description of the provider's capabilities, which Substra will use for much of the configuration. Otherwise, you can set endpoints by hand under ``oidc.provider.endpoints``. 18 | 19 | The provider will give you a **client id** and a **client secret**. Deploy them on the cluster in a secret: 20 | 21 | .. code-block:: yaml 22 | 23 | apiVersion: v1 24 | kind: Secret 25 | metadata: 26 | name: oidc-secret 27 | stringData: 28 | OIDC_RP_CLIENT_ID: "CLIENT_ID" 29 | OIDC_RP_CLIENT_SECRET: "CLIENT_SECRET" 30 | 31 | 32 | Set up user creation 33 | ==================== 34 | 35 | When a user first logs in through OIDC, they are assigned a username based on their email address. The ``oidc.users.appendDomain`` flag controls whether email domain is included. 36 | 37 | You must choose one user creation process: 38 | 39 | * Set up a default channel by setting ``oidc.users.channel`` to the name of an existing channel (see the value of ``orchestrator.channels``). OIDC users will be able to use the platform right away. 40 | * Alternatively, set ``oidc.users.requireApproval`` to ``true``: after their first login, OIDC users will have to wait for manual approval from an administrator (on the web frontend). 41 | 42 | .. admonition:: Note on user validity 43 | 44 | Substra OIDC users accounts will remain valid for a bit after the correspond account at the provider has been disabled; this can be an issue if, for instance, an employee has been recently terminated but still has access to the Substra instance. 45 | 46 | This can be mitigated through ``oidc.users.loginValidityDuration``: accounts that have not logged in in this amount of time (seconds) are disabled until the user logs in again. The API tokens associated with their account stop working as well, but will work again when they refresh their login. 47 | 48 | To avoid irritating users with frequent login prompts, Substra will attempt to do this in the background, making all this invisible to users. However this requires the provider to support offline access and refresh tokens -- not all do, and implementations vary. 49 | 50 | Automated login refresh is enabled by default through the setting ``oidc.users.useRefreshToken``, but Substra will disable it and fall back to the manual mode (actual login prompts) if it can't detect provider support. 51 | 52 | If you are using automated login refresh, you can set ``oidc.users.loginValidityDuration`` to a low value to slightly increase security at a small cost in server load. Otherwise, it is a balance of security versus user convenience. 53 | 54 | 55 | Other settings 56 | ============== 57 | 58 | If OIDC users will be using the Substra API (for instance if they are data scientists running Python scripts), they'll need to generate API tokens on the web frontend and use those in their scripts. 59 | 60 | Having to generate new tokens all the time is a hindrance for the users: you can increase their lifetime through ``config.EXPIRY_TOKEN_LIFETIME`` in the backend values. 61 | 62 | 63 | Putting it all together 64 | ======================= 65 | 66 | Example of a minimal working configuration in the backend values: 67 | 68 | .. code-block:: yaml 69 | 70 | config: 71 | EXPIRY_TOKEN_LIFETIME: "10080" # one week, in minutes 72 | oidc: 73 | enabled: true 74 | clientSecretName: oidc-secret # set earlier 75 | provider: 76 | url: "PROVIDER_URL" 77 | displayName: "PROVIDER_NAME" # will be displayed on the login page 78 | users: 79 | channel: "CHANNEL_ID" 80 | -------------------------------------------------------------------------------- /docs/source/how-to/deploying-substra/index.rst: -------------------------------------------------------------------------------- 1 | How-to guides for deploying Substra 2 | =================================== 3 | 4 | This section is of concern if you are **Deploying Substra in production**. 5 | 6 | Familiarity with infrastructure, and Kubernetes in particular, is recommended. 7 | 8 | 9 | :ref:`The walkthrough guide ` takes you step by step through deploying a production environment. 10 | More specific how-to guides cover additional points. 11 | 12 | :ref:`ops upgrade notes` cover relevant changes when upgrading from one version to the next. 13 | 14 | The :ref:`compatibility table` contains a reference of Substra versions compatible with one another. 15 | 16 | .. toctree:: 17 | :maxdepth: 2 18 | :hidden: 19 | 20 | walkthrough.rst 21 | howto/customize-compute-pod-node.rst 22 | howto/existing-volumes.rst 23 | howto/external-database.rst 24 | howto/sso-oidc.rst 25 | upgrade-notes.rst 26 | 27 | 28 | Substra is meant to be deployed as part of a federated learning network. Each participant *organization* will set up their own *Substra node*, from which their users can connect to the network and run machine learning algorithms on the data registered by participant on their own node. 29 | 30 | .. image:: ../../_static/schemes/stack-technical-scheme.svg 31 | :width: 800 32 | :align: center 33 | :alt: Substra Components Scheme 34 | 35 | The terms *Substra node* and *Substra organization* are practically interchangeable. 36 | 37 | Substra is distributed as Helm charts, running on Kubernetes 1.19 and up. Each component has their Helm chart, which are hosted at https://substra.github.io/charts. 38 | 39 | 40 | Hardware requirements 41 | --------------------- 42 | 43 | Each backend needs the following resources to run Substra: 44 | 45 | * 8 CPU 46 | * 30 GB of RAM 47 | * 300 GB of storage 48 | 49 | In addition, you need to consider the resources required by the compute tasks. For example, if each task needs 10 GB of RAM and you have two tasks running in parallel for a single backend, you will need a total of 50 GB of RAM (30 GB + 2*10 GB). The same applies to CPU usage and storage requirements (datasets and models). 50 | 51 | The orchestrator needs the following resources: 52 | 53 | * 4 CPU 54 | * 16 GB of RAM 55 | * 100 GB of storage 56 | 57 | -------------------------------------------------------------------------------- /docs/source/how-to/deploying-substra/walkthrough.rst: -------------------------------------------------------------------------------- 1 | .. _ops walkthrough: 2 | 3 | ********************* 4 | How-to deploy Substra 5 | ********************* 6 | 7 | This section will guide you through a production deployment with two Substra nodes. 8 | 9 | We will deploy an orchestrator and two Substra nodes (for two organizations, called ``ingen`` and ``biotechnica``) communicating over TLS on the internet. This is how the application is configured for running on actual healthcare data. 10 | 11 | .. toctree:: 12 | :glob: 13 | :titlesonly: 14 | :numbered: 15 | 16 | walkthrough/* 17 | 18 | -------------------------------------------------------------------------------- /docs/source/how-to/deploying-substra/walkthrough/10-prerequisites.rst: -------------------------------------------------------------------------------- 1 | ******************* 2 | Check Prerequisites 3 | ******************* 4 | 5 | Substra version 6 | =============== 7 | 8 | Substra is a set of microservices which are together issued a version number; but, since we are installing the services one by one, we need to know the actual version of each one. 9 | 10 | Check the :ref:`compatibility table` for the Helm chart version needed for the orchestrator, backend and frontend. The corresponding Docker app version is already configured in there, so it's all you need. 11 | 12 | Local tools 13 | =========== 14 | 15 | Install: 16 | - kubectl 17 | - helm 18 | 19 | Add the Substra helm repository: 20 | .. code-block:: shell 21 | 22 | helm repo add substra https://substra.github.io/charts/ 23 | helm repo update 24 | 25 | .. Leaving kubectl and helm purposefully unlinked since they are part of the basics for this kind of work 26 | 27 | Also install: 28 | - ``curl`` for making sure the HTTP endpoints work 29 | - `gRPCurl `_ for making sure the gRPC endpoint works 30 | 31 | 32 | Infrastructure 33 | ============== 34 | 35 | Substra is a federated learning tool and as such it makes little sense to have only one node running, or nodes running on the same cluster merely separated by a namespace. 36 | 37 | Therefore, in this guide we are deploying on two separate Kubernetes clusters, connecting them through the internet. 38 | 39 | Throughout the guide we are giving hostnames to endpoints. On the internet, this means owning a domain name and setting up DNS -- **everytime you see** ``DOMAIN``, **it means your own domain** you are setting this up under. 40 | 41 | Exposing on the internet also means dealing with a certificate authority -- here we're using `Let's Encrypt `__. 42 | 43 | .. note:: 44 | It is entirely possible to host multiple Substra nodes on the same cluster, and/or to have them communicate on a private network with a private CA, and/or to attribute hostnames differently. 45 | 46 | 47 | In practice 48 | ----------- 49 | 50 | Clusters 51 | ^^^^^^^^ 52 | 53 | Set up two clusters -- they have to support allocating PVCs on the fly and opening ingresses to the Internet. For this, we'd recommend using a managed Kubernetes service such as `Google GKE `__, `Azure AKS `__, or `Amazon EKS `__. 54 | 55 | **We'll henceforth refer to the clusters we have set up as** ``cluster-1`` **and** ``cluster-2`` **.** 56 | 57 | We also need some software for routing (ingress-nginx) and certificate management (cert-manager); install both on each cluster (insert your email address in place of ``YOUR_EMAIL_HERE``): 58 | 59 | .. code-block:: shell 60 | :emphasize-lines: 20,35 61 | 62 | helm upgrade --install ingress-nginx ingress-nginx \ 63 | --repo https://kubernetes.github.io/ingress-nginx \ 64 | --namespace ingress-nginx --create-namespace 65 | 66 | helm upgrade --install \ 67 | cert-manager cert-manager \ 68 | --repo https://charts.jetstack.io \ 69 | --namespace cert-manager \ 70 | --create-namespace \ 71 | --set installCRDs=true 72 | 73 | kubectl apply -f - << "EOF" 74 | apiVersion: cert-manager.io/v1 75 | kind: ClusterIssuer 76 | metadata: 77 | name: letsencrypt-staging 78 | spec: 79 | acme: 80 | server: https://acme-staging-v02.api.letsencrypt.org/directory 81 | email: YOUR_EMAIL_HERE 82 | privateKeySecretRef: 83 | name: letsencrypt-staging 84 | solvers: 85 | - http01: 86 | ingress: 87 | class: nginx 88 | --- 89 | apiVersion: cert-manager.io/v1 90 | kind: ClusterIssuer 91 | metadata: 92 | name: letsencrypt-prod 93 | spec: 94 | acme: 95 | server: https://acme-v02.api.letsencrypt.org/directory 96 | email: YOUR_EMAIL_HERE 97 | privateKeySecretRef: 98 | name: letsencrypt-prod 99 | solvers: 100 | - http01: 101 | ingress: 102 | class: nginx 103 | EOF 104 | 105 | This also sets up ``letsencrypt-prod`` as an issuer of certificates (for endpoints exposed on the internet) and ``letsencrypt-staging`` to issue development certificates. 106 | 107 | DNS 108 | ^^^ 109 | 110 | Probably the most convenient way to handle DNS is to set a wildcard record for each cluster and forget about it. Once you have installed nginx-ingress-controller, the corresponding service should have received an IP address you can then set in the DNS: 111 | 112 | .. code-block:: 113 | :caption: DNS zone file for ``DOMAIN`` 114 | 115 | *.cluster-1 300 IN A NGINX_1_IP 116 | *.cluster-2 300 IN A NGINX_2_IP 117 | 118 | This way, any hostname such as ``whatever.cluster-1.DOMAIN`` directs to the same endpoint, which itself directs the traffic to the correct service based on hostname (this is what the Ingress objects are for). 119 | 120 | -------------------------------------------------------------------------------- /docs/source/how-to/deploying-substra/walkthrough/30-backend-deployment.rst: -------------------------------------------------------------------------------- 1 | ****************** 2 | Deploy the backend 3 | ****************** 4 | 5 | This section details deploying a node for the ``ingen`` organization onto ``cluster-1``. 6 | 7 | You will need to repeat this for ``biotechnica`` onto ``cluster-2``, with the appropriate values changed. 8 | 9 | The backend depends on the orchestrator and will fail to run if the orchestrator is not available and operational. 10 | 11 | Prepare your Helm values 12 | ======================== 13 | 14 | .. seealso:: 15 | Full reference on `Artifact Hub `_. 16 | 17 | To configure your values: 18 | 19 | #. Create a Helm values file named ``backend-ingen-values.yaml`` with the following content: 20 | 21 | .. code-block:: yaml 22 | 23 | organizationName: ingen 24 | 25 | 26 | #. Configure your Substra backend Ingress. In the ``backend-ingen-values.yaml`` file add the following content: 27 | 28 | .. code-block:: yaml 29 | 30 | config: 31 | ALLOWED_HOSTS: '[".cluster-1.DOMAIN"]' 32 | 33 | server: 34 | defaultDomain: https://api.cluster-1.DOMAIN:443 35 | commonHostDomain: cluster-1.DOMAIN 36 | 37 | ingress: 38 | enabled: true 39 | hostname: api.cluster-1.DOMAIN 40 | 41 | .. caution:: 42 | For ``ALLOWED_HOSTS``, note that the leading dot is important. 43 | 44 | #. Configure your connection to the orchestrator. In the ``backend-ingen-values.yaml`` file add the following content: 45 | 46 | .. code-block:: yaml 47 | 48 | orchestrator: 49 | host: ORCHESTRATOR_HOSTNAME 50 | port: ORCHESTRATOR_PORT 51 | mspID: ingen 52 | sameCluster: ORCHESTRATOR_SAME_CLUSTER 53 | 54 | 55 | | ``ORCHESTRATOR_HOSTNAME`` should be ``orchestrator.cluster-1.DOMAIN`` if you are _outside_ the cluster, but if we are working on ``cluster-1`` we should use its local name ``orchestrator-server.orchestrator`` (following the ``service-name.namespace`` convention). 56 | | ``ORCHESTRATOR_PORT`` should be ``443`` if TLS is enabled, otherwise ``80``. 57 | | ``ORCHESTRATOR_SAME_CLUSTER`` should be ``true`` if the backend is in the same cluster as the orchestrator, otherwise ``false``. 58 | 59 | .. _backend-channel-config: 60 | 61 | #. Configure your :term:`Substra Channels `. 62 | In the ``backend-values.yaml`` file, add the following content under the ``orchestrator`` key: 63 | 64 | .. code-block:: yaml 65 | 66 | channels: 67 | - our-channel: 68 | restricted: false 69 | model_export_enabled: true 70 | chaincode: 71 | name: mycc 72 | 73 | | The channel name is ``our-channel``, as configured in :ref:`Orchestrator Substra Channels `. 74 | | ``restricted`` would prevent other organizations from joining the channel 75 | | ``model_export_enabled`` allows users from this channel to download models produced by the platform 76 | 77 | #. Optional: If your orchestrator has TLS enabled: 78 | 79 | #. Retrieve the CA certificate from your orchestrator: 80 | 81 | The CA certificate is the ``orchestrator-ca.crt`` file generated at the :ref:`Generate your Certificate Authority certificate ` step of the orchestrator deployment. 82 | If a public Certificate Authority was used to generate the orchestrator certificate, you need to fetch the certificate of the Certificate Authority. 83 | 84 | #. Create a ConfigMap containing the CA certificate: 85 | 86 | .. code-block:: bash 87 | 88 | kubectl create configmap orchestrator-cacert --from-file=ca.crt=orchestrator-ca.crt 89 | 90 | #. Configure your backend to enable orchestrator TLS. In the ``backend-ingen-values.yaml`` file add the following content under the ``orchestrator`` key: 91 | 92 | .. code-block:: yaml 93 | 94 | tls: 95 | enabled: true 96 | cacert: orchestrator-cacert 97 | 98 | #. Add users to your backend. In the ``backend-ingen-values.yaml`` file add the following content: 99 | 100 | .. code-block:: yaml 101 | 102 | addAccountOperator: 103 | users: 104 | - name: admin 105 | secret: an3xtr4lengthyp@ssword 106 | channel: our-channel 107 | 108 | | The password must be at least 20 characters long. 109 | 110 | 111 | Deploy the Chart 112 | ================ 113 | 114 | #. Deploy the backend Helm chart: 115 | 116 | .. code-block:: bash 117 | 118 | helm install backend substra/substra-backend --version VERSION --values backend-values.yaml --namespace ingen --create-namespace 119 | 120 | | Replace ``VERSION`` with the version of the Substra backend helm chart you want to deploy. 121 | 122 | #. Validate: 123 | 124 | .. code-block:: shell 125 | 126 | curl -kL api.cluster-1.DOMAIN 127 | 128 | Should return a ``401`` with the message: 129 | 130 | .. code-block:: javascript 131 | 132 | {"detail":"Authentication credentials were not provided."} 133 | 134 | Execution Problems 135 | ================== 136 | 137 | Once everything is deployed, if there are execution problems when adding a function to substra, it can be related with the network policy. 138 | 139 | #. Check the log of the pod ``backend-substra-backend-builder-0`` 140 | 141 | .. code-block:: bash 142 | 143 | kubectl logs backend-substra-builder-0 -n ingen 144 | 145 | #. If there there is ```HTTPSConnectionPool(host='10.43.0.1', port=443)``` error, modify the next network policies: 146 | 147 | Remove all the network policies except the ```substra-backend-internet-egress``` network policy. 148 | 149 | Add the next lines inside the to section for the ```substra-backend-api-server-egress``` network policy: 150 | 151 | .. code-block:: yaml 152 | 153 | - to: 154 | - ipBlock: 155 | cidr: 0.0.0.0/0 156 | 157 | -------------------------------------------------------------------------------- /docs/source/how-to/deploying-substra/walkthrough/50-frontend-deployment.rst: -------------------------------------------------------------------------------- 1 | ******************* 2 | Deploy the frontend 3 | ******************* 4 | 5 | The Substra frontend is a standalone Helm chart that only needs to be told under what URL the backend API is to be contacted. 6 | 7 | This section details setting up the ``ingen`` frontend on ``cluster-1``, making it available at ``substra.cluster-1.DOMAIN``. 8 | 9 | Naturally this could be repeated for ``biotechnica`` onto ``cluster-2``, with the appropriate values changed. 10 | 11 | Update the backend values 12 | ========================= 13 | 14 | Tell the backend to set the proper headers for cross-origin resources, by adding new values in ``backend-ingen-values.yaml``: 15 | 16 | .. code-block:: yaml 17 | 18 | config: 19 | CORS_ORIGIN_WHITELIST: '["https://substra.cluster-1.DOMAIN"]' # this is a string parsed as a JSON list 20 | CSRF_TRUSTED_ORIGINS: '["https://substra.cluster-1.DOMAIN"]' 21 | CORS_ALLOW_CREDENTIALS: 'true' 22 | # you should already have ALLOWED_HOSTS under "config" 23 | 24 | Prepare your Helm values 25 | ======================== 26 | 27 | .. seealso:: 28 | Full reference on `Artifact Hub `_. 29 | 30 | Create a file for your values, say ``frontend-ingen-values.yaml``. 31 | 32 | Tell the frontend the backend API url: 33 | 34 | .. code-block:: yaml 35 | 36 | api: 37 | url: "https://api.cluster-1.DOMAIN" 38 | 39 | Expose the service with the included ingress: 40 | 41 | .. code-block:: yaml 42 | 43 | ingress: 44 | hosts: 45 | - host: substra.cluster-1.DOMAIN 46 | paths: ['/'] 47 | tls: 48 | - hosts: 49 | - substra.cluster-1.DOMAIN 50 | secretName: substra-frontend-tls 51 | 52 | Deploy the Chart 53 | ================ 54 | 55 | Deploy with Helm, like the backend: 56 | 57 | .. code-block:: shell 58 | 59 | helm install frontend substra/substra-frontend --version VERSION --values frontend-ingen-values.yaml --namespace ingen 60 | 61 | Validate with a web browser; you can log in as ``admin`` with the password ``an3xtr4lengthyp@ssword``, which we set up in the backend values earlier. -------------------------------------------------------------------------------- /docs/source/how-to/deploying-substra/walkthrough/60-mtls-setup.rst: -------------------------------------------------------------------------------- 1 | .. _ops set up mutual TLS: 2 | 3 | ***************** 4 | Set up mutual TLS 5 | ***************** 6 | 7 | This section details setting up mTLS communication between the backends and the orchestrator. 8 | 9 | In this scenario, the orchestrator acts as the certificate authority checking the certificates. 10 | These instructions have to be repeated for each backend. 11 | 12 | This guide assumes that you already have followed the instructions to :ref:`ops set up TLS`. 13 | 14 | Generate backend Certificate Signing Request and signing key 15 | ============================================================ 16 | 17 | The first step is to generate the Certificate Signing Request and a signing key for the :term:`organization`. 18 | 19 | .. code:: bash 20 | 21 | openssl req -newkey rsa:2048 -nodes -keyout ORGNAME.key -subj "/O=ORGNAME/CN=HOSTNAME" -out ORGNAME.csr 22 | 23 | | Replace ``ORGNAME`` with your :term:`organization` name. 24 | It should be the same as the value you put in your ``values.yaml`` file for the key ``orchestrator.mspID``. 25 | | Replace ``HOSTNAME`` with the hostname of your substra backend. 26 | 27 | Then send the file named ``ORGNAME.csr`` to the organization managing the orchestrator for them to sign your certificate. 28 | 29 | Sign the Substra backend certificate 30 | ==================================== 31 | 32 | Now that you have the Certificate Signing Request from your backend in your orchestrator, you can sign it with the orchestrator certificate authority. 33 | 34 | Navigate to the directory where the files ``orchestrator-ca.crt`` and ``orchestrator-ca.key`` are located (created during :ref:`ops set up TLS`), and sign the certificates: 35 | 36 | .. code:: bash 37 | 38 | openssl x509 -req -days 365 -in ORGNAME.csr -CA orchestrator-ca.crt -CAkey orchestrator-ca.key -CAcreateserial -out ORGNAME.crt -sha256 39 | 40 | | Replace ``ORGNAME`` with the :term:`organization` name. 41 | 42 | .. caution:: 43 | We don't recommend having your certificate valid for a year (``365`` days in the previous command), you should change this value based on your company policy. 44 | 45 | Then send the file named ``ORGNAME.crt`` back to the organization managing the Substra backend. You don't need to keep a copy of this certificate. 46 | 47 | Update backend configuration 48 | ============================ 49 | 50 | Once you received the certificate (named ``ORGNAME.crt``), you can create a secret in the Kubernetes cluster containing this file and the file ``ORGNAME.key``: 51 | 52 | .. code-block:: bash 53 | 54 | kubectl create secret tls orchestrator-client-cert --cert=ORGNAME.crt --key=ORGNAME.key 55 | 56 | To use this certificate, update or create the backend ``backend-values.yaml`` config file and add the following lines: 57 | 58 | .. code-block:: yaml 59 | 60 | orchestrator: 61 | tls: 62 | enabled: true 63 | cacert: orchestrator-cacert 64 | mtls: 65 | enabled: true 66 | clientCertificate: orchestrator-client-cert 67 | 68 | Note that you need to have the orchestrator TLS enabled for this to work. 69 | 70 | If your backend is already running, apply the changes (this will restart it): 71 | 72 | .. code-block:: bash 73 | 74 | helm upgrade RELEASE-NAME substra/substra-backend --version VERSION --values backend-values.yaml 75 | 76 | | Replace ``RELEASE-NAME`` with the name of your substra backend release. You can retrieve it with ``helm list``. 77 | | Replace ``VERSION`` with the version of the substra backend helm chart you want to deploy. 78 | If you don't want to change version you can retrieve your currently deployed version with ``helm list``. 79 | 80 | Update orchestrator configuration 81 | ================================= 82 | 83 | Finally, create or update the orchestrator values ``orchestrator-values.yaml`` config file with the following values: 84 | 85 | .. code-block:: bash 86 | 87 | orchestrator: 88 | tls: 89 | enabled: true 90 | mtls: 91 | enabled: true 92 | clientCACerts: 93 | orchestrator: 94 | - orchestrator-tls-cacert 95 | 96 | Here we just put the orchestrator CA cert as a validation certificate. 97 | 98 | If your client certs were signed by another authority that you trust you would need to add them as configmaps to your cluster and reference them here. 99 | With the key ``orchestrator`` in our example being the name of the organization that depend on this CA (it can be any arbitrary name). 100 | The items represent the names of the configmaps you wish to load, note that the object in the configmap shoud be named ``ca.crt``. 101 | 102 | If your orchestrator is already running, apply the changes (this will restart it): 103 | 104 | .. code-block:: bash 105 | 106 | helm upgrade RELEASE-NAME substra/orchestrator --version VERSION --values orchestrator-values.yaml 107 | 108 | | Replace ``RELEASE-NAME`` with the name of your orchestrator release. You can retrieve it with ``helm list``. 109 | | Replace ``VERSION`` with the version of your orchestrator. You can retrieve the currently deployed version with ``helm list``. 110 | -------------------------------------------------------------------------------- /docs/source/how-to/developing-substra/harbor.rst: -------------------------------------------------------------------------------- 1 | ************************** 2 | Harbor in local deployment 3 | ************************** 4 | 5 | 6 | 7 | This page describes how to use Harbor in our development setup, using skaffold. It is divided in two categories: the first one describes how to use a harbor registry inside the `k3d` cluster, meanwhile the second describes how to use a harbor registry outside of the cluster. The third section describes the set-up that will be needed. 8 | 9 | 10 | 11 | Use in-cluster Harbor 12 | ===================== 13 | 14 | First-time set-up (specific to in-cluster) 15 | ------------------------------------------ 16 | 17 | As ``sudo``, add ``127.0.0.1 registry.org-2.com`` to ``/etc/hosts`` 18 | 19 | .. code:: bash 20 | 21 | echo "127.0.0.1 registry.org-2.com" | sudo tee -a /etc/hosts 22 | 23 | Use 24 | --- 25 | 26 | 1. Set ``HARBOR_CERT_PATH`` to point to the absolute path to ``orchestrator/examples/tools/ca.crt`` 27 | 28 | .. code-block:: bash 29 | 30 | export HARBOR_CERT_PATH=/orchestrator/examples/tools/ca.crt 31 | 32 | 2. Re-create the cluster and launch skaffold on the orchestrator 33 | 34 | .. code-block:: bash 35 | 36 | ./k3-create.sh 37 | cd orchestrator 38 | skaffold run 39 | 40 | 3. Start ``substra-backend`` with profile ``org-2-harbor`` 41 | 42 | 4. Activate port-forward (port ``30046``) on 43 | ``harbor-nginx-xxxxxxxxxx-xxxxx`` (referred as ````), 44 | depending of your tool: 45 | 46 | - ``kubectl``: 47 | 48 | .. code:: bash 49 | 50 | kubectl port-forward -n harbor deployments/harbor-nginx 30046:https 51 | 52 | - ``k9s``: 53 | 54 | 1. Hover pod ```` 55 | 2. Press ``+F`` 56 | 3. Replace ``nginx::30000`` by ``nginx::30046`` (the 57 | ``Local port`` should be replaced by ``30046``) 58 | 59 | 5. Follow the instructions in Harbor-set-up with the following informations: 60 | 61 | - URL: https://registry.org-2.com:30046 62 | - Identifier: ``admin`` 63 | - Password: ``harborP@ssword2403`` 64 | 65 | Use external Harbor 66 | =================== 67 | 68 | 1. Follow the Harbor-set-up 69 | 70 | 2. Create ``docker-config`` secret 71 | 72 | .. code:: bash 73 | 74 | kubectl create secret docker-registry docker-config -n org-2 --docker-server= --docker-username= 75 | 76 | 3. Update your value file (you can use ``backend-org-2-harbor.yaml`` as a model) 77 | 78 | .. code:: yaml 79 | 80 | docker-registry: 81 | enabled: false 82 | 83 | containerRegistry: 84 | local: false 85 | scheme: 86 | host: # The host, without the port, as it is defined in the field port 87 | pullDomain: # The harbor host, with the port as it won't use the port field 88 | port: 443 89 | 90 | kaniko: 91 | dockerConfigSecretName: docker-config # Equals to the name given to the secret at the previous step 92 | 93 | 94 | Harbor-set-up 95 | ============= 96 | 97 | 1. In the frontend, create project ``substra`` 98 | 99 | 2. Set the variable ```HARBOR_REGISTRY_DOMAIN``` to your registry URL 100 | 101 | .. code-block:: bash 102 | 103 | export HARBOR_REGISTRY_DOMAIN= 104 | 105 | 3. Login to the registry 106 | 107 | .. code-block:: bash 108 | 109 | docker login $HARBOR_REGISTRY_DOMAIN 110 | 111 | 4. Manually add base image 112 | 113 | 1. Pull image 114 | 115 | .. code:: bash 116 | 117 | docker pull ghcr.io/substra/substra-tools:latest 118 | 119 | 2. Tag it 120 | 121 | .. code:: bash 122 | 123 | docker tag ghcr.io/substra/substra-tools:latest $HARBOR_REGISTRY_DOMAIN/substra/substra-tools:latest 124 | 125 | 3. Push to repository 126 | 127 | .. code:: bash 128 | 129 | docker push $HARBOR_REGISTRY_DOMAIN/substra/substra-tools:latest -------------------------------------------------------------------------------- /docs/source/how-to/developing-substra/index.rst: -------------------------------------------------------------------------------- 1 | How-to guides for developing Substra 2 | ==================================== 3 | 4 | The following guides might be of interest if you are interested in contributing to Substra. 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | local-deployment.rst 10 | harbor.rst -------------------------------------------------------------------------------- /docs/source/how-to/developing-substra/local-deployment/k3-create.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | if [[ "$OSTYPE" == "darwin"* ]]; then 5 | SED_EXEC="gsed" 6 | else 7 | SED_EXEC="sed" 8 | fi 9 | 10 | k3d cluster delete || echo 'No cluster' 11 | mkdir -p /tmp/org-1 12 | mkdir -p /tmp/org-2 13 | mkdir -p /tmp/org-3 14 | args=() 15 | 16 | if [ "$HARBOR_CERT_PATH" ]; then 17 | args+=("--volume" "${HARBOR_CERT_PATH}:/etc/ssl/certs/harbor.crt") 18 | fi 19 | 20 | k3d cluster create --api-port 127.0.0.1:6443 -p 80:80@loadbalancer -p 443:443@loadbalancer --k3s-arg "--disable=traefik,metrics-server@server:*" --volume /tmp/org-1:/tmp/org-1 --volume /tmp/org-2:/tmp/org-2 --volume /tmp/org-3:/tmp/org-3 "${args[@]}" 21 | 22 | # Patch and install nginx-ingress 23 | curl https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/static/provider/kind/deploy.yaml > /tmp/deploy.yaml 24 | $SED_EXEC -i 's/ - --publish-status-address=localhost/ - --publish-status-address=localhost\n - --enable-ssl-passthrough/g' /tmp/deploy.yaml 25 | $SED_EXEC -i "/ingress-ready: \"true\"/d" /tmp/deploy.yaml 26 | kubectl apply -f /tmp/deploy.yaml 27 | kubectl create ns orderer 28 | 29 | # Create namespaces and apply PSA baseline label 30 | for org_index in 1 2 3 31 | do 32 | org_name="org-${org_index}" 33 | kubectl create ns ${org_name} 34 | kubectl label ns ${org_name} pod-security.kubernetes.io/enforce=baseline 35 | done 36 | -------------------------------------------------------------------------------- /docs/source/how-to/using-substra/api_tokens_generation.rst: -------------------------------------------------------------------------------- 1 | How-to use new API tokens for login 2 | =================================== 3 | 4 | This short guide explains how to manage API tokens in the web application, and use them in the Substra SDK. 5 | 6 | .. admonition:: Why generate API tokens? 7 | 8 | The Substra SDK provides a way to log in using username and password (see `substra.Client `_). 9 | 10 | It is safe, but should be used with caution: 11 | 12 | * It doesn't allow for a precise lifetime or separating concerns by creating one token per purpose. 13 | 14 | * It may surprise or limit you through its underlying automated session management. 15 | 16 | * It can encourage using cleartext passwords, which can end up shared in version control. 17 | 18 | For these reasons, it is possible for Substra node administrators (via `chart options `_) to disable "implicit login" and force users to generate tokens in the web app. 19 | 20 | Whatever the situation, you should use a mechanism to ensure credentials are kept out of view, for instance by reading secret files or environment variables at runtime (see :ref:`client configuration howto`). 21 | 22 | 23 | .. warning:: 24 | API tokens are node-specific: if your script connects to multiple nodes, generate a token for each of them. 25 | 26 | Generating new API tokens 27 | ------------------------- 28 | 29 | To do so you need to go to the API tokens management page on the web application following this link ``/manage_tokens``. 30 | You will see a list of your current tokens as well as an option to generate new ones. 31 | 32 | You can also navigate to the page using the user menu: 33 | 34 | 35 | .. image:: /documentation/images/find_token_management_page.png 36 | 37 | 38 | Clicking ``Generate new`` opens a menu allowing you to pick a name and an expiration date for 39 | your new token. 40 | 41 | 42 | .. image:: /documentation/images/generate_new_token.png 43 | 44 | 45 | Afterward your token will be shown only once. Do copy it somewhere safe before proceeding with your work. 46 | 47 | 48 | .. image:: /documentation/images/copy_token.png 49 | 50 | Using API tokens 51 | ---------------- 52 | 53 | Pass tokens to the `substra.Client `_ constructor: 54 | 55 | .. code-block:: Python 56 | :caption: Example of client configuration in code 57 | 58 | client_1 = substra.Client( 59 | backend_type="remote", 60 | url="https://org-1.com", 61 | token="dad943c684f65633635f005b2522a6452d20", 62 | ) 63 | 64 | See :ref:`client configuration howto` for other options. 65 | 66 | Deleting API tokens 67 | ------------------- 68 | 69 | Tokens can be deleted using the web application. Be careful, token deletion is irreversible. 70 | 71 | If you have scripts using a deleted token, they will no longer execute. -------------------------------------------------------------------------------- /docs/source/how-to/using-substra/client_configuration.rst: -------------------------------------------------------------------------------- 1 | .. _client configuration howto: 2 | 3 | How-to configure Substra clients 4 | ================================ 5 | 6 | Whether you are using SubstraFL or directly the Substra SDK, you need to configure one ``Client`` by organisation, 7 | in order to register the datasets and the functions you want to use. 8 | 9 | This how-to guide exposes the different options you have to configure your clients. It targets both first-time and 10 | advanced Substra users. 11 | 12 | Parameters passed directly in the code always override parameters from other sources (environment variables and 13 | configuration files). Parameters set through environment variables override parameters read from the configuration file. 14 | 15 | Configuration from the code 16 | --------------------------- 17 | The first option to configure a ``Client`` is to configure it directly in your code. 18 | 19 | .. code-block:: Python 20 | :caption: Example of client configuration in code 21 | 22 | client_1 = substra.Client( 23 | backend_type="remote", 24 | url="https://org-1.com", 25 | username="user1", 26 | password="secret_password", 27 | ) 28 | client_2 = substra.Client( 29 | backend_type="remote", 30 | url="https://org-2.com", 31 | token="18ccd8c2-ea85-403f-aac3-972d97f3759b" 32 | ) 33 | 34 | You can find details about the parameters in the `API reference `_. 35 | 36 | Any parameter defined in the code will override other configuration options. 37 | 38 | This option is good for debugging, but not for production, as you should not store sensitive information such as 39 | passwords or tokens directly in your code. 40 | 41 | 42 | Configuration using environment variables 43 | ----------------------------------------- 44 | The second option is to use environment variables to configure using environment variables. 45 | That way, sensitive information will not be accidentally committed to a Git repository. 46 | 47 | If a parameter is not defined in the code, Substra will look if a matching environment variable is defined. 48 | You need to pass the name of the client in the parameter ``client_name``. This name will be used to match environment 49 | variables with the right client, as you typically define a client to interact with each organization. 50 | 51 | The environment variable name is defined as follow: ``SUBSTRA_{CLIENT_NAME}_{PARAMETER_NAME}``. 52 | For example, if the ``client_name`` is ``"org-1"``, you can set the value of ``password`` by setting the value of 53 | ``SUBSTRA_ORG_1_PASSWORD``. 54 | 55 | You can use environment variables to configure partially your clients, and configure the rest directly in the code 56 | (or in a configuration file as explained in the next section). 57 | 58 | .. code-block:: bash 59 | :caption: Setting environment variables 60 | 61 | export SUBSTRA_ORG_1_USERNAME="user1" 62 | export SUBSTRA_ORG_1_PASSWORD="secret_password" 63 | export SUBSTRA_ORG_2_TOKEN="18ccd8c2-ea85-403f-aac3-972d97f3759b" 64 | 65 | 66 | 67 | .. code-block:: Python 68 | :caption: Example of client configuration using environment variables 69 | 70 | client_1 = substra.Client( 71 | client_name="org-1", 72 | backend_type="remote", 73 | url="https://org-1.com", 74 | ) 75 | client_2 = substra.Client( 76 | client_name="org-2", 77 | backend_type="remote", 78 | url="https://org-2.com", 79 | ) 80 | 81 | 82 | Configuration using a configuration file 83 | ---------------------------------------- 84 | The last possibility for configuring a Substra client is to use a configuration YAML file. 85 | 86 | The configuration file contains information for each client you want to configure. 87 | Values read from the configuration file have the lowest priority: they are overriden by environment variable and values 88 | set in the code. 89 | 90 | It is recommended to store non-sensitive parameter values, such as URLs, in a configuration file, and sensitive parameters, 91 | such as passwords or tokens in environment variables. 92 | 93 | .. code-block:: YAML 94 | :caption: config.yaml 95 | 96 | org-1: 97 | backend_type: remote 98 | url: https://org-1.com 99 | username: user1 100 | retry_timeout: 60 101 | org-2: 102 | backend_type: remote 103 | url: https://org-2.com 104 | 105 | 106 | 107 | .. code-block:: Python 108 | :caption: Example of client configuration using a configuration file 109 | 110 | client_1 = substra.Client( 111 | client_name="org-1", 112 | configuration_file="config.yaml", 113 | ) 114 | client_2 = substra.Client( 115 | client_name="org-2", 116 | configuration_file="config.yaml", 117 | ) -------------------------------------------------------------------------------- /docs/source/how-to/using-substra/get_performances.rst: -------------------------------------------------------------------------------- 1 | How to monitor performance in local mode 2 | ======================================== 3 | 4 | .. warning:: 5 | The last time this page was tested was with **Substra 0.36.0**. Some changes might be needed if you are using a more recent Substra version. 6 | 7 | Performances of a compute plan can be retrieved 8 | - with the :code:`get_performances(CP_KEY)` function of the `Substra Python library `_ 9 | - on the Substra GUI when using the `remote mode `_. 10 | However, in the `local mode `_, there is no GUI. This page explains how to use `MLFlow `_ to perform live monitoring of the compute plan performances in local mode. 11 | 12 | Performance monitoring using MLFlow 13 | ----------------------------------- 14 | 15 | During a `compute plan `_ in local mode, the performances of your testing tasks are saved in a :code:`performance.json` file as soon as the task is done. This json file is stored in your :code:`.../local_worker/live_performances/compute_plan_key` folder. 16 | 17 | The Python script below reads the json file and plots the live metrics results into an MLflow server, creating a plot for each metric in your compute plan. 18 | 19 | To run it, update :code:`CP_KEY` on the script below, run the Python script, and launch the :code:`mlflow ui` command in a dedicated terminal. 20 | Your metric results appear and are updated live at the given url in your terminal. 21 | 22 | This script will automatically end if the :code:`performance.json` file has not been updated in the last minute. For some compute plans, this parameter should be changed regarding the necessary time to perform each round. 23 | 24 | .. code-block:: python 25 | :caption: mlflow_live_performances.py 26 | 27 | import pandas as pd 28 | import json 29 | from pathlib import Path 30 | from mlflow import log_metric 31 | import time 32 | import os 33 | 34 | TIMEOUT = 60 # Number of seconds to stop the script after the last update of the json file 35 | CP_KEY = "..." # Compute plan key 36 | POLLING_FREQUENCY = 10 # Try to read the updates in the file every 10 seconds 37 | 38 | path_to_json = Path("local-worker") / "live_performances" / CP_KEY / "performances.json" 39 | 40 | # Wait for the file to be found 41 | start = time.time() 42 | while not path_to_json.exists(): 43 | time.sleep(POLLING_FREQUENCY) 44 | if time.time() - start >= TIMEOUT: 45 | raise TimeoutError("The performance file does not exist, maybe no test task has been executed yet.") 46 | 47 | 48 | logged_rows = [] 49 | last_update = time.time() 50 | 51 | while (time.time() - last_update) <= TIMEOUT: 52 | 53 | if last_update == os.path.getmtime(str(path_to_json)): 54 | time.sleep(POLLING_FREQUENCY) 55 | continue 56 | 57 | last_update = os.path.getmtime(str(path_to_json)) 58 | 59 | time.sleep(1) # Waiting for the json to be fully written 60 | dict_perf = json.load(path_to_json.open()) 61 | 62 | df = pd.DataFrame(dict_perf) 63 | 64 | for _, row in df.iterrows(): 65 | if row["task_key"] in logged_rows: 66 | continue 67 | 68 | logged_rows.append(row["task_key"]) 69 | 70 | step = int(row["round_idx"]) if row["round_idx"] is not None else int(row["task_rank"]) 71 | 72 | log_metric(f"{row['identifier']}_{row['worker']}", row["performance"], step) 73 | -------------------------------------------------------------------------------- /docs/source/how-to/using-substra/gpu.rst: -------------------------------------------------------------------------------- 1 | How to leverage GPU 2 | =================== 3 | 4 | Substra can leverage GPU to speed up the training of machine learning models. Find below how to configure Substra to make sure your code can run on GPU. 5 | 6 | 7 | For Substra 8 | ^^^^^^^^^^^ 9 | A Substra task can run on a given GPU if the Docker image used does contain the CUDA drivers needed by this GPU. 10 | 11 | For Torch use cases in SubstraFL 12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 13 | By default everything runs on CPU. 14 | 15 | If you want to make your Torch model run on GPU, you have to put the model and your data in the GPU memory. SubstraFL does it for you if you set ``use_gpu=True`` in your :ref:`Torch Algorithm`. -------------------------------------------------------------------------------- /docs/source/how-to/using-substra/index.rst: -------------------------------------------------------------------------------- 1 | How-to guides for using Substra 2 | =============================== 3 | 4 | The following guides might be of interest if you are a data scientist using Substra. 5 | 6 | These are made to help on specific points, if you wish to have a general walkthrough on how to work with Substra, 7 | you can check the :doc:`tutorials `. 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | 13 | debug.rst 14 | get_performances.rst 15 | client_configuration.rst 16 | api_tokens_generation.rst 17 | gpu.rst 18 | r_scripts.rst 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /docs/source/how-to/using-substra/r_scripts.rst: -------------------------------------------------------------------------------- 1 | How-to use R scripts with Substra 2 | ================================= 3 | 4 | The high-level SubstraFL library is made for working in Python, but the lower-level library Substra is flexible enough 5 | to accommodate running tasks in other programming languages. 6 | This how-to guide explains how to run scripts written in R with Substra. 7 | This uses the low-level interface of Substra and requires writing more boilerplate code than using the high-level interface of SubstraFL. 8 | If you are not familiar with the Substra low-level library, you should read the 9 | :doc:`Substra introductory example ` first. 10 | 11 | .. caution:: This guide provides an easy to run some scripts in another language. 12 | The scripts are wrapped up in a Python process, so performances might be limited. 13 | In particular, multithreading is not supported. 14 | 15 | Preparing the R script 16 | ---------------------- 17 | The inputs of your script are passed as arguments in the command line. This includes parameters (int, float or str) and 18 | (relative) file paths to data. 19 | 20 | The outputs of the scripts are written to stdout, and will be parsed later by the Python script. 21 | Below is an example of what your file should look like: 22 | 23 | .. code-block:: R 24 | :caption: my_script.R 25 | 26 | #!/usr/bin/env Rscript 27 | args <- commandArgs() 28 | # your script here 29 | ... 30 | write(outputs, "") 31 | 32 | 33 | Calling the R script from Python 34 | -------------------------------- 35 | The Python script passed to Substra wraps the R script, so that it can be executed as a Python subprocess. 36 | The Python script reads the inputs defined as Substra ``FunctionInputSpec``, converts everything to string, 37 | appends all parameters in a command (``subprocess.run`` expects a list of str) and launches the subprocess. 38 | After the subprocess has finished, the output is cleaned. 39 | Everything printed to stdout in the R script is available in the Python code through the ``str`` variable ``raw_output.stdout``. 40 | Depending on the type of output, additional cleaning steps might be required. 41 | Finally, the output is saved as a pickle file, to be shared with other organisations. 42 | 43 | .. code-block:: Python 44 | :caption: python_wrapper.py 45 | 46 | import pickle 47 | import subprocess 48 | import substratools as tools 49 | 50 | 51 | @tools.register 52 | def run_script(inputs, outputs, task_properties): 53 | data_file = inputs["data_file_path"] 54 | param1 = str(inputs["param1"]) 55 | param2 = str(inputs["param2"]) 56 | raw_output = subprocess.run(['Rscript', 'my_script.R', data_file, param1, param2], capture_output=True) 57 | model = int(raw_output.stdout.strip()) 58 | save_model(model, outputs["model"]) 59 | 60 | 61 | def save_model(model, path): 62 | with open(path, "wb") as f: 63 | pickle.dump(model, f) 64 | 65 | 66 | if __name__ == "__main__": 67 | tools.execute() 68 | 69 | Adapting the opener 70 | ------------------- 71 | When using Substra with Python, the ``Opener`` object is used to load the data in memory. 72 | When using R, we don't need to load the data as Python objects in memory, so the opener simply returns the file path (or paths). 73 | 74 | .. code-block:: Python 75 | :caption: opener.py 76 | 77 | import pathlib 78 | import substratools as tools 79 | 80 | import os 81 | 82 | class StubOpener(tools.Opener): 83 | def fake_data(self, n_samples=None): 84 | return "" 85 | 86 | def get_data(self, folders): 87 | return list(pathlib.Path(folders[0]).glob("*.csv")) 88 | 89 | 90 | Writing the Dockerfile 91 | ---------------------- 92 | We modify the Dockerfile to install R in the container, and copy both R and Python scripts. 93 | 94 | 95 | .. code-block:: Dockerfile 96 | :caption: Dockerfile 97 | 98 | # this base image works in both CPU and GPU enabled environments 99 | FROM python:3.12-slim 100 | 101 | # install R 102 | RUN apt-get update \ 103 | && apt-get -y install r-base 104 | 105 | # add your algorithm scripts to docker image 106 | ADD python_wrapper.py . 107 | ADD my_script.R . 108 | 109 | # define how script is run 110 | ENTRYPOINT ["python3", "python_wrapper.py", "--function-name", "run_script"] 111 | 112 | Wrapping up 113 | ----------- 114 | That's it, you're all set up! 115 | 116 | You can now define your computation graph as you would normally in Substra, and everything should run fine. 117 | 118 | You can have a different R script for each step, just write a different Python wrapper to call each of them. 119 | Don't forget the ``@tools.register`` decorator on each of your Python wrapper! 120 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Substra documentation master file, created by 2 | sphinx-quickstart on Mon Aug 30 14:12:40 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Substra documentation 7 | ===================== 8 | 9 | Substra is an open source federated learning (FL) software. It provides a flexible Python library and a web application to run federated learning training at scale. 10 | 11 | 12 | .. image:: banner.jpg 13 | :width: 800 14 | :align: center 15 | :alt: Substra 16 | 17 | Substra's main usage is in production environments. It has already been deployed and used by hospitals and biotech companies: see the `MELLODDY `_ and the `HealthChain `_ projects. 18 | 19 | The key Substra differentiators are: 20 | 21 | * **Framework agnostic** — Any Python library can be used: PyTorch, TensorFlow, sklearn, etc. 22 | * **Flexible** — Any kind of computation can be run: machine learning, analytics, etc. 23 | * **Scalable** — Support for vertical scaling (several trainings on one machine) and horizontal scaling (training on several machines). 24 | * **Traceable** — All machine learning operations are logged in an auditable read-only database. 25 | * **Web application** — A web application to monitor long-running computations and explore model's performances. 26 | * **Production ready** — Packaged in Kubernetes and regularly audited. 27 | * **Debugging made easy** — Remote error logs are accessible to data scientists. The same code can be run in a deployed production environment or on a single machine to debug. 28 | 29 | Substra was created by `Owkin `_ and is now hosted by the `Linux Foundation for AI and Data `_. 30 | 31 | How does it work? 32 | ^^^^^^^^^^^^^^^^^ 33 | 34 | .. image:: _static/schemes/introductory-scheme.svg 35 | :width: 800 36 | :align: center 37 | :alt: Substra Network 38 | 39 | Interfaces 40 | ^^^^^^^^^^ 41 | 42 | 43 | Substra has three user interfaces: 44 | * **Substra**: a low-level Python library (also called SDK). Substra is used to create datasets, functions and machine learning tasks on the platform. 45 | * **SubstraFL**: a high-level federated learning Python library based on Substra. SubstraFL is used to run complex federated learning experiments at scale. 46 | * A **web application** used to monitor experiments training and explore their results. 47 | 48 | 49 | 50 | Installation 51 | ^^^^^^^^^^^^ 52 | 53 | **Client side**: Install Substra and SubstraFL python libraries with the following command: ``pip install substrafl``. Substra python library is a dependency of SubstraFL, so it will be automatically installed. More information on the installation :ref:`can be found here `. 54 | 55 | **Server side**: There are 2 options to deploy the server side of Substra (backend, frontend and orchestrator): 56 | 57 | * :ref:`Local deployment `: to deploy locally on a single one machine. Useful for quick tests and for development. 58 | * :doc:`Production deployment `: for real deployments. 59 | 60 | .. note:: 61 | You can start doing local FL experiments with Substra by installing only the **client side**. 62 | 63 | Links 64 | ^^^^^ 65 | 66 | Some quick links: 67 | 68 | * :ref:`MNIST federated learning example ` 69 | * :doc:`SubstraFL overview ` 70 | * :ref:`Compatibility table ` 71 | * :doc:`How to deploy Substra for Site Reliability Engineers ` 72 | * :ref:`Community ` 73 | * `Subscribe to our newsletter `_ 74 | 75 | 76 | .. toctree:: 77 | :glob: 78 | :maxdepth: 2 79 | :caption: What is Substra 80 | :hidden: 81 | 82 | 83 | substrafl_doc/substrafl_overview 84 | documentation/concepts 85 | documentation/components 86 | additional/privacy-strategy.rst 87 | 88 | 89 | 90 | .. toctree:: 91 | :glob: 92 | :maxdepth: 2 93 | :caption: Tutorials 94 | :hidden: 95 | 96 | examples/substrafl/index.rst 97 | examples/substra_core/index.rst 98 | 99 | 100 | .. toctree:: 101 | :glob: 102 | :maxdepth: 2 103 | :caption: Reference guides 104 | :hidden: 105 | 106 | substrafl_doc/api/index.rst 107 | documentation/api_reference.rst 108 | reference/index.rst 109 | 110 | 111 | .. toctree:: 112 | :glob: 113 | :maxdepth: 1 114 | :caption: How-to guides 115 | :hidden: 116 | 117 | how-to/using-substra/index.rst 118 | how-to/deploying-substra/index.rst 119 | how-to/developing-substra/index.rst 120 | 121 | 122 | .. toctree:: 123 | :glob: 124 | :maxdepth: 1 125 | :caption: Contributing guide 126 | :hidden: 127 | 128 | contributing/contributing-guide.rst 129 | contributing/code-of-conduct.rst 130 | 131 | .. toctree:: 132 | :glob: 133 | :maxdepth: 2 134 | :caption: Additional Information 135 | :hidden: 136 | 137 | additional/community.rst 138 | additional/release.rst 139 | additional/faq.rst 140 | additional/glossary.rst 141 | -------------------------------------------------------------------------------- /docs/source/reference/index.rst: -------------------------------------------------------------------------------- 1 | Kubernetes deployment reference 2 | =============================== 3 | 4 | For a step-by-step guide on how to deploy the application on Kubernetes, please refer to the 5 | `the deployment how-to `_. 6 | 7 | This section provides a reference of how various Kubernetes resources should look like once deployed with the default settings. 8 | Substra is distributed as Helm charts, and most values can be customized to accommodate the requirements of your environment. 9 | 10 | .. include:: pss.rst 11 | .. include:: netpol.rst 12 | .. include:: volumes.rst 13 | -------------------------------------------------------------------------------- /docs/source/reference/pss.rst: -------------------------------------------------------------------------------- 1 | Pod Security Standards 2 | ---------------------- 3 | 4 | All pods in a Substra deployment are compliant with the *baseline* policy of the 5 | `Pod Security Standards `_. 6 | 7 | All pods can run as non-root, with two exceptions: 8 | 9 | * If the builder feature is enabled (at least one backend per network must have the ability to build images), Kaniko pods used for building images run as root. 10 | * If the private CA feature is used, the initContainer `add-cert` runs as root. 11 | 12 | We are working on ensuring that all pods except the two listed above are compliant with the *restricted* policy. 13 | -------------------------------------------------------------------------------- /docs/source/templates/breadcrumbs.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/templates/breadcrumbs.html -------------------------------------------------------------------------------- /examples_requirements.txt: -------------------------------------------------------------------------------- 1 | ipython==8.21.0 2 | nbformat==5.9.2 3 | tqdm>=4.66.2 4 | -r docs/source/examples/substra_core/diabetes_example/assets/requirements.txt 5 | -r docs/source/examples/substra_core/titanic_example/assets/requirements.txt 6 | -r docs/source/examples/substrafl/get_started/torch_fedavg_assets/requirements.txt 7 | -r docs/source/examples/substrafl/go_further/sklearn_fedavg_assets/requirements.txt 8 | -r docs/source/examples/substrafl/go_further/torch_cyclic_assets/requirements.txt 9 | -r docs/source/examples/substrafl/go_further/diabetes_substrafl_assets/requirements.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r docs/doc-requirements.txt 2 | -r examples_requirements.txt -------------------------------------------------------------------------------- /skaffold.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: skaffold/v1beta13 2 | kind: Config 3 | build: 4 | artifacts: 5 | - image: substra/substra-documentation-examples 6 | context: . 7 | docker: 8 | dockerfile: docker/substra-documentation-examples/Dockerfile 9 | deploy: 10 | statusCheckDeadlineSeconds: 300 11 | helm: 12 | releases: 13 | - name: substra-documentation-examples 14 | chartPath: charts/substra-documentation-examples 15 | namespace: substra-tests 16 | imageStrategy: 17 | helm: {} 18 | values: 19 | image: substra/substra-documentation-examples 20 | flags: 21 | install: ["--create-namespace"] 22 | -------------------------------------------------------------------------------- /towncrier.toml: -------------------------------------------------------------------------------- 1 | [tool.towncrier] 2 | directory = "changes" 3 | filename = "CHANGELOG.md" 4 | start_string = "\n" 5 | underlines = ["", "", ""] 6 | title_format = "## [{version}](https://github.com/Substra/substra-documentation/releases/tag/{version}) - {project_date}" 7 | issue_format = "[#{issue}](https://github.com/Substra/substra-documentation/pull/{issue})" 8 | [tool.towncrier.fragment.added] 9 | [tool.towncrier.fragment.removed] 10 | [tool.towncrier.fragment.changed] 11 | [tool.towncrier.fragment.fixed] 12 | --------------------------------------------------------------------------------