├── .binder
├── postBuild
├── requirements.txt
└── runtime.txt
├── .github
├── CODEOWNERS
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
└── workflows
│ ├── build.yml
│ ├── test-examples-subprocess.yml
│ └── towncrier-changelog.yml
├── .gitignore
├── .readthedocs.yaml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── CONTRIBUTORS.md
├── LICENSE
├── Makefile
├── README.md
├── Substra-logo-colour.svg
├── Substra-logo-white.svg
├── changes
└── .gitkeep
├── charts
└── substra-documentation-examples
│ ├── Chart.yaml
│ ├── templates
│ ├── _helpers.tpl
│ └── deployment.yaml
│ └── values.yaml
├── docker
└── substra-documentation-examples
│ └── Dockerfile
├── docs
├── Makefile
├── doc-requirements.txt
├── environment.yml
└── source
│ ├── _ext
│ └── compatibilitytable.py
│ ├── _static
│ ├── Gattica-Bold100.otf
│ ├── Gattica-Light100.otf
│ ├── Gattica-Medium100.otf
│ ├── Gattica-Regular100.otf
│ ├── example_thumbnail
│ │ ├── cyclic-mnist.png
│ │ ├── diabetes.png
│ │ ├── iris.jpg
│ │ ├── mnist.png
│ │ └── titanic.jpg
│ ├── favicon.png
│ ├── images
│ │ ├── substra-0.26-task-duration.png
│ │ └── workflow_0.27.0.png
│ ├── logo.svg
│ ├── my-nbsphinx-gallery.css
│ ├── owkin.css
│ └── schemes
│ │ ├── centralized-orc.svg
│ │ ├── channel_schema.png
│ │ ├── computeplan.state.svg
│ │ ├── computetask.state.svg
│ │ ├── function.state.svg
│ │ ├── index-generator-scheme.svg
│ │ ├── introductory-scheme.svg
│ │ └── stack-technical-scheme.svg
│ ├── additional
│ ├── community.rst
│ ├── faq.rst
│ ├── glossary.rst
│ ├── privacy-strategy.rst
│ ├── release.rst
│ └── releases.yaml
│ ├── banner.jpg
│ ├── conf.py
│ ├── contributing
│ ├── code-of-conduct.rst
│ └── contributing-guide.rst
│ ├── documentation
│ ├── api_reference.rst
│ ├── backend
│ │ └── index.rst
│ ├── components.rst
│ ├── concepts.rst
│ ├── frontend
│ │ └── index.rst
│ ├── images
│ │ ├── copy_token.png
│ │ ├── find_token_management_page.png
│ │ ├── generate_new_token.png
│ │ └── token_management_page.png
│ ├── orchestrator
│ │ └── index.rst
│ └── substra_tools.rst
│ ├── examples
│ ├── substra_core
│ │ ├── diabetes_example
│ │ │ ├── assets
│ │ │ │ ├── dataset
│ │ │ │ │ ├── diabetes_dataset.py
│ │ │ │ │ └── diabetes_opener.py
│ │ │ │ ├── functions
│ │ │ │ │ ├── aggregation
│ │ │ │ │ │ └── Dockerfile
│ │ │ │ │ ├── description.md
│ │ │ │ │ ├── federated_analytics_functions.py
│ │ │ │ │ ├── local_first_order_computation
│ │ │ │ │ │ └── Dockerfile
│ │ │ │ │ └── local_second_order_computation
│ │ │ │ │ │ └── Dockerfile
│ │ │ │ └── requirements.txt
│ │ │ └── run_diabetes.ipynb
│ │ ├── index.rst
│ │ └── titanic_example
│ │ │ ├── assets
│ │ │ ├── dataset
│ │ │ │ ├── description.md
│ │ │ │ └── titanic_opener.py
│ │ │ ├── function_random_forest
│ │ │ │ ├── description.md
│ │ │ │ ├── predict
│ │ │ │ │ └── Dockerfile
│ │ │ │ ├── titanic_function_rf.py
│ │ │ │ └── train
│ │ │ │ │ └── Dockerfile
│ │ │ ├── metric
│ │ │ │ ├── Dockerfile
│ │ │ │ ├── description.md
│ │ │ │ └── titanic_metrics.py
│ │ │ ├── requirements.txt
│ │ │ ├── test_data_samples
│ │ │ │ ├── data_sample_0
│ │ │ │ │ └── data_sample_0.csv
│ │ │ │ └── data_sample_1
│ │ │ │ │ └── data_sample_1.csv
│ │ │ └── train_data_samples
│ │ │ │ ├── data_sample_0
│ │ │ │ └── data_sample_0.csv
│ │ │ │ ├── data_sample_1
│ │ │ │ └── data_sample_1.csv
│ │ │ │ ├── data_sample_2
│ │ │ │ └── data_sample_2.csv
│ │ │ │ ├── data_sample_3
│ │ │ │ └── data_sample_3.csv
│ │ │ │ ├── data_sample_4
│ │ │ │ └── data_sample_4.csv
│ │ │ │ ├── data_sample_5
│ │ │ │ └── data_sample_5.csv
│ │ │ │ ├── data_sample_6
│ │ │ │ └── data_sample_6.csv
│ │ │ │ ├── data_sample_7
│ │ │ │ └── data_sample_7.csv
│ │ │ │ ├── data_sample_8
│ │ │ │ └── data_sample_8.csv
│ │ │ │ └── data_sample_9
│ │ │ │ └── data_sample_9.csv
│ │ │ └── run_titanic.ipynb
│ └── substrafl
│ │ ├── get_started
│ │ ├── run_mnist_torch.ipynb
│ │ └── torch_fedavg_assets
│ │ │ ├── dataset
│ │ │ ├── description.md
│ │ │ ├── mnist_dataset.py
│ │ │ └── mnist_opener.py
│ │ │ └── requirements.txt
│ │ ├── go_further
│ │ ├── diabetes_substrafl_assets
│ │ │ ├── dataset
│ │ │ │ ├── diabetes_substrafl_dataset.py
│ │ │ │ └── diabetes_substrafl_opener.py
│ │ │ └── requirements.txt
│ │ ├── run_diabetes_substrafl.ipynb
│ │ ├── run_iris_sklearn.ipynb
│ │ ├── run_mnist_cyclic.ipynb
│ │ ├── sklearn_fedavg_assets
│ │ │ ├── dataset
│ │ │ │ ├── description.md
│ │ │ │ ├── iris_dataset.py
│ │ │ │ └── iris_opener.py
│ │ │ └── requirements.txt
│ │ └── torch_cyclic_assets
│ │ │ ├── dataset
│ │ │ ├── cyclic_mnist_dataset.py
│ │ │ ├── cyclic_mnist_opener.py
│ │ │ └── description.md
│ │ │ └── requirements.txt
│ │ └── index.rst
│ ├── how-to
│ ├── deploying-substra
│ │ ├── howto
│ │ │ ├── customize-compute-pod-node.rst
│ │ │ ├── existing-volumes.rst
│ │ │ ├── external-database.rst
│ │ │ └── sso-oidc.rst
│ │ ├── index.rst
│ │ ├── upgrade-notes.rst
│ │ ├── walkthrough.rst
│ │ └── walkthrough
│ │ │ ├── 10-prerequisites.rst
│ │ │ ├── 20-orchestrator-deployment.rst
│ │ │ ├── 30-backend-deployment.rst
│ │ │ ├── 40-connect-organizations.rst
│ │ │ ├── 50-frontend-deployment.rst
│ │ │ └── 60-mtls-setup.rst
│ ├── developing-substra
│ │ ├── harbor.rst
│ │ ├── index.rst
│ │ ├── local-deployment.rst
│ │ └── local-deployment
│ │ │ └── k3-create.sh
│ └── using-substra
│ │ ├── api_tokens_generation.rst
│ │ ├── client_configuration.rst
│ │ ├── debug.rst
│ │ ├── get_performances.rst
│ │ ├── gpu.rst
│ │ ├── index.rst
│ │ └── r_scripts.rst
│ ├── index.rst
│ ├── reference
│ ├── index.rst
│ ├── netpol.rst
│ ├── pss.rst
│ └── volumes.rst
│ ├── substrafl_doc
│ └── substrafl_overview.rst
│ └── templates
│ └── breadcrumbs.html
├── examples_requirements.txt
├── requirements.txt
├── skaffold.yaml
└── towncrier.toml
/.binder/postBuild:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # binder post build script
3 | set -ex
4 |
--------------------------------------------------------------------------------
/.binder/requirements.txt:
--------------------------------------------------------------------------------
1 | -r ../examples_requirements.txt
2 |
--------------------------------------------------------------------------------
/.binder/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.10
2 |
--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | /.github @Substra/code-owners
2 | /examples @Substra/code-owners @RomainGoussault
3 | /substrafl_examples @Substra/code-owners @RomainGoussault
4 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: "[BUG]"
5 | labels: bug
6 | assignees: ''
7 |
8 | ---
9 |
10 | ## Describe the bug
11 |
12 | > Give a clear and concise description of what the bug is.
13 |
14 | ## How To Reproduce
15 |
16 | Steps to reproduce the behavior:
17 |
18 | > 1. Go to '...'
19 | > 2. Click on '....'
20 | > 3. Scroll down to '....'
21 | > 4. See error
22 |
23 | ## Expected behavior
24 |
25 | > Give a clear and concise description of what you expected to happen.
26 |
27 | ## Screenshots
28 |
29 | > If applicable, add screenshots to help explain your problem.
30 |
31 | ## Environment
32 |
33 | - Version, branch (branch and commit number)
34 | - OS and version: [e.g. macOS Mojave version 10.14.4]
35 |
36 | > Get it with `uname -a` on Linux and `system_profiler SPSoftwareDataType` on Mac
37 |
38 | - Browser and version if relevant [e.g. Firefox, Chrome, Safari]
39 | - Any other relevant information [e.g. dependencies, version of Python, version of Go, ...]
40 |
41 | ## Additional context
42 |
43 | > Add any other context about the problem here.
44 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Submit a request for a feature you would love to see implemented in Substra
4 | & help us improve!
5 | title: "[FEATURE_REQUEST]"
6 | labels: feature_request
7 | assignees: ''
8 |
9 | ---
10 |
11 | ## Introduction
12 |
13 | > Please describe the type of feature you would like to discuss (one-shot or series of features) & introduce yourself so we can follow up with you.
14 |
15 | ## Describe the ideal feature
16 |
17 | > Give a clear and concise description of what this feature is expected to do.
18 |
19 | ## Outcome
20 |
21 | > Please, try to describe the expected outcome of such a feature, for users and software engineers, the problem it would solve for you, the impact it would have for you.
22 |
23 | ## Is there any already existing similar feature?
24 |
25 | > Do you know some analog feature already existing somewhere else?
26 |
27 | ## Expected behavior
28 |
29 | > Give a clear and concise description of what you would expect after the feature is implemented.
30 |
31 | ## Screenshots
32 |
33 | > If applicable, add screenshots, schemas or any relevant material to help explain your feature request.
34 |
35 | ## Additional context
36 |
37 | > Add any other context about the feature request, any evidence that you have on the need for this request or if you already have an idea on how we may solve this!
38 | > Please try to be specific about the reach of the underneath problem, the "cost" of not having this feature, how is it urgent for you?
39 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Sphinx Build in CI
2 |
3 | on:
4 | pull_request:
5 | branches:
6 | - "main"
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v2
13 | with:
14 | ref: ${{ github.event.pull_request.head.sha }}
15 | - name: Set up python
16 | uses: actions/setup-python@v2
17 | with:
18 | python-version: "3.10"
19 |
20 | - name: Clone substra-tools
21 | uses: actions/checkout@v2
22 | with:
23 | repository: substra/substra-tools
24 | path: substra-tools
25 | ref: main
26 |
27 | - name: Clone substra
28 | uses: actions/checkout@v2
29 | with:
30 | repository: substra/substra
31 | path: substra
32 | ref: main
33 |
34 | - name: Clone substrafl
35 | uses: actions/checkout@v2
36 | with:
37 | repository: substra/substrafl
38 | path: substrafl
39 | ref: main
40 |
41 | - name: Install substra, substra-tools and substrafl
42 | run: |
43 | pip install -e ./substrafl
44 | pip install -e ./substra
45 | pip install -e ./substra-tools
46 |
47 | - name: Copy substra and substrafl api doc in the doc
48 | run: |
49 | cp -r substra/references docs/source/documentation/references
50 | cp -r substrafl/docs/api docs/source/substrafl_doc/
51 |
52 | - name: Install Pandoc
53 | run: |
54 | sudo wget https://github.com/jgm/pandoc/releases/download/3.1.11.1/pandoc-3.1.11.1-1-amd64.deb
55 | sudo dpkg -i pandoc-3.1.11.1-1-amd64.deb
56 |
57 | - name: Install requirements
58 | run: |
59 | pip install -r requirements.txt
60 |
61 | - name: Sphinx make
62 | working-directory: ./docs
63 | run: make clean html
64 |
--------------------------------------------------------------------------------
/.github/workflows/test-examples-subprocess.yml:
--------------------------------------------------------------------------------
1 | name: Documentation examples - subprocess
2 | on:
3 | pull_request:
4 | paths:
5 | - "docs/source/examples/**"
6 | push:
7 | branches:
8 | - main
9 | workflow_dispatch:
10 |
11 | jobs:
12 | pr-validation:
13 | name: test-${{ matrix.os }}-py-${{ matrix.python }}
14 | runs-on: ${{ matrix.os }}
15 | strategy:
16 | matrix:
17 | os: [ubuntu-22.04]
18 | python: ["3.10", "3.11", "3.12"]
19 |
20 | steps:
21 | - uses: actions/checkout@v4
22 | with:
23 | path: substra-documentation
24 |
25 | - uses: actions/setup-python@v5
26 | with:
27 | python-version: ${{ matrix.python }}
28 |
29 | - name: Free disk space
30 | run: |
31 | # Based on https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
32 | sudo rm -rf /usr/share/dotnet
33 | sudo rm -rf /usr/local/lib/android
34 | sudo rm -rf /opt/ghc
35 | sudo rm -rf "/usr/local/share/boost"
36 | sudo rm -rf "$AGENT_TOOLSDIRECTORY"
37 |
38 | - name: Checkout substra-tools on main
39 | uses: actions/checkout@v4
40 | with:
41 | repository: substra/substra-tools
42 | path: substratools
43 |
44 | - name: Checkout substra on main
45 | uses: actions/checkout@v4
46 | with:
47 | repository: substra/substra
48 | path: substra
49 |
50 | - name: Checkout substrafl on main
51 | uses: actions/checkout@v4
52 | with:
53 | repository: substra/substrafl
54 | path: substrafl
55 |
56 | - name: Install package
57 | run: |
58 | pip install --upgrade pip
59 | pip install --upgrade -e substrafl
60 | pip install --upgrade -e substra
61 | pip install --upgrade -e substratools
62 |
63 | - name: Install examples dependencies
64 | run: |
65 | cd substra-documentation
66 | make install-examples-dependencies
67 |
68 | - name: Run examples
69 | env:
70 | SUBSTRA_FORCE_EDITABLE_MODE: True
71 | run: |
72 | cd substra-documentation
73 | make examples
74 |
--------------------------------------------------------------------------------
/.github/workflows/towncrier-changelog.yml:
--------------------------------------------------------------------------------
1 | name: Towncrier changelog
2 |
3 | on:
4 | workflow_dispatch:
5 | inputs:
6 | app_version:
7 | type: string
8 | description: 'The version of the app'
9 | required: true
10 | branch:
11 | type: string
12 | description: 'The branch to update'
13 | required: true
14 |
15 | jobs:
16 | test-generate-publish:
17 | uses: substra/substra-gha-workflows/.github/workflows/towncrier-changelog.yml@main
18 | secrets: inherit
19 | with:
20 | app_version: ${{ inputs.app_version }}
21 | repo: substra-documentation
22 | branch: ${{ inputs.branch }}
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 | docs/src
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # pipenv
89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
92 | # install all needed dependencies.
93 | #Pipfile.lock
94 |
95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
96 | __pypackages__/
97 |
98 | # Celery stuff
99 | celerybeat-schedule
100 | celerybeat.pid
101 |
102 | # SageMath parsed files
103 | *.sage.py
104 |
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 |
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 |
118 | # Rope project settings
119 | .ropeproject
120 |
121 | # mkdocs documentation
122 | /site
123 |
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 |
129 | # Pyre type checker
130 | .pyre/
131 |
132 | # Sphinx build
133 | _build/
134 |
135 | # Misc build artefacts
136 | tmp/**
137 |
138 | docs/source/tmp/**
139 | docs/source/documentation/references/**
140 | docs/source/substrafl_doc/api
141 |
142 | # Zip files
143 | *.zip
144 |
145 | # Assets built when ran locally
146 | **/local-worker
147 |
148 | # Experiment summaries when run locally
149 | **/experiment_summaries
150 |
151 | # Data folder when run locally
152 | **/data_iris
153 | **/data_mnist
154 | **/data_diabetes
155 | **/data
156 |
157 | # Algo files folder when run locally
158 | **/algo_files
159 |
160 | # Mac spec
161 | *.DS_Store
162 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | build:
9 | os: "ubuntu-22.04"
10 | tools:
11 | python: "miniconda3-4.7"
12 | jobs:
13 | pre_create_environment:
14 | - conda update --yes --quiet --name=base --channel=defaults conda
15 |
16 | # Build documentation in the docs/ directory with Sphinx
17 | sphinx:
18 | configuration: docs/source/conf.py
19 | fail_on_warning: True
20 |
21 | conda:
22 | environment: docs/environment.yml
23 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | Substra repositories' code of conduct is available in the Substra documentation [here](https://docs.substra.org/en/stable/contributing/code-of-conduct.html).
2 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Substra repositories' contributing guide is available in the Substra documentation [here](https://docs.substra.org/en/stable/contributing/contributing-guide.html).
2 |
--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | This is a file of people that have made significant contributions to the Substra documentation. It is sorted in chronological order. Please include your contribution at the bottom of this document in the following format : name (N), email (E), description of work (W) and date (D).
2 |
3 | To have your contribution listed, your work must meet the minimum [threshold of originality](https://en.wikipedia.org/wiki/Threshold_of_originality), which will be evaluated by the maintainers of the repository.
4 |
5 | Thank you for your contribution, your work is greatly appreciated !
6 |
7 | —-- Example —--
8 |
9 | - N: John Doe
10 | - E: john.doe@owkin.com
11 | - W: Integrated new FL strategy
12 | - D: 02/02/2023
13 |
14 | ---
15 |
16 | Copyright (c) 2018-present Owkin Inc. All rights reserved.
17 |
18 | All other contributions:
19 | Copyright (c) 2023 to the respective contributors.
20 | All rights reserved.
21 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | install-examples-dependencies:
2 | pip3 install -r examples_requirements.txt
3 |
4 | examples: examples-substra examples-substrafl
5 |
6 | examples-substra: example-core-diabetes example-core-titanic
7 |
8 | example-core-diabetes:
9 | cd docs/source/examples/substra_core/diabetes_example/ && ipython -c "%run run_diabetes.ipynb"
10 | example-core-titanic:
11 | cd docs/source/examples/substra_core/titanic_example/ && ipython -c "%run run_titanic.ipynb"
12 |
13 | examples-substrafl: example-fl-mnist example-fl-iris example-fl-cyclic example-fl-diabetes
14 |
15 | example-fl-mnist:
16 | cd docs/source/examples/substrafl/get_started/ && ipython -c "%run run_mnist_torch.ipynb"
17 | example-fl-iris:
18 | cd docs/source/examples/substrafl/go_further/ && ipython -c "%run run_iris_sklearn.ipynb"
19 | example-fl-cyclic:
20 | cd docs/source/examples/substrafl/go_further/ && ipython -c "%run run_mnist_cyclic.ipynb"
21 | example-fl-diabetes:
22 | cd docs/source/examples/substrafl/go_further/ && ipython -c "%run run_diabetes_substrafl.ipynb"
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Substra documentation
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | Substra is an open source federated learning (FL) software. This specific repository is the documentation of Substra.
19 |
20 | This documentation is hosted on Read The Docs and can be found [here](https://docs.substra.org/).
21 |
22 | ## Support
23 |
24 | If you need support, please either raise an issue on Github or ask on [Slack](https://join.slack.com/t/substra-workspace/shared_invite/zt-1fqnk0nw6-xoPwuLJ8dAPXThfyldX8yA).
25 |
26 |
27 | # Setup
28 |
29 | ## Contributing
30 |
31 | If you would like to contribute to this documentation please clone it locally and make a new branch with the suggested changes.
32 |
33 | You should use python `3.10`.
34 |
35 | To deploy the documentation locally you need to install all the necessary requirements which you can find in the 'requirements.txt' file of the root of this repository. You can use pip in your terminal to install it: `pip install -r requirements.txt`.
36 |
37 | You also need to manually install [pandoc](https://github.com/jgm/pandoc/releases/tag/3.1.11.1).
38 |
39 |
40 | ### Install substra, substratools and substrafl in editable mode
41 |
42 | :warning: if you have these repositories installed in non-editable mode, it will not work.
43 |
44 | Install the repositories in editable mode:
45 |
46 | ```sh
47 | git clone git@github.com:Substra/substra.git
48 | cd substra && pip install -e . && cd ..
49 | ```
50 |
51 | ```sh
52 | git clone git@github.com:Substra/substra-tools.git
53 | cd substra-tools && pip install -e . && cd ..
54 | ```
55 |
56 | ```sh
57 | git clone git@github.com:Substra/substrafl.git
58 | cd substrafl && pip install -e '.[dev]' && cd ..
59 | ```
60 |
61 | ### Build the documentation locally
62 |
63 | Next, to build the documentation move to the docs directory: `cd docs`
64 |
65 | And then: `make clean html`
66 |
67 | The first time you run it or if you updated the examples library it may take a little longer to build the whole documentation.
68 |
69 | To see the doc on your browser : `make livehtml`
70 | And then go to http://127.0.0.1:8000
71 |
72 | Once you are happy with your changes push your branch and make a pull request.
73 |
74 | Thank you for helping us improving!
75 |
76 | ### Add a new example
77 |
78 | - Put the example folder in `substra-documentation/examples` if it is a Substra example, `substra-documentation/substrafl_examples` if it is a Substrafl example.
79 | - create a `README.rst` file at the root of the example
80 | - The main file that is executed must match the regex `run_*.py`, e.g. `run_titanic.py` ([source](https://sphinx-gallery.github.io/stable/configuration.html?highlight=examples_dirs#parsing-and-executing-examples-via-matching-patterns))
81 | - It must also be structured as described in the Sphinx gallery documentation. In particular, the folder containing the `run_*.py` example file **must** contain a `README.rst` file.
82 | - Add the assets:
83 | - use the `zip_dir` function in the `conf.py` file to zip the assets
84 | - add the link to download the assets to the example's docstring:
85 |
86 | ```rst
87 | .. only:: builder_html or readthedocs
88 |
89 | :download:`assets required to run this example <../../ASSET_NAME.zip>`
90 | ```
91 | - thumbnail: add the path to the image in a comment in a cell of the example
92 |
93 | `# sphinx_gallery_thumbnail_path = 'auto_examples/EXAMPLE_FOLDER_NAME/images/thumb/sphx_glr_plot_thumb.jpg'`
94 |
95 |
96 | ## Releases
97 |
98 | The documentation is released for each Substra release.
99 | When a semver tag is pushed or a release is created, the doc is builded and published to ReadTheDocs by the [CI](https://github.com/Substra/substra-documentation/blob/main/.github/workflows/publish_stable.yml).
100 | Then ReadTheDocs automatically activates this version and set it as default (takes a few minutes).
101 | You can follow the build on the CI [here](https://github.com/Substra/substra-documentation/actions) and on ReadTheDocs if you have access to the project.
102 |
103 | ## How to generate the changelog
104 |
105 | The changelog is managed with [towncrier](https://towncrier.readthedocs.io/en/stable/index.html).
106 | To add a new entry in the changelog, add a file in the `changes` folder. The file name should have the following structure:
107 | `.`.
108 | The `unique_id` is a unique identifier, we currently use the PR number.
109 | The `change_type` can be of the following types: `added`, `changed`, `removed`, `fixed`.
110 |
111 | To generate the changelog (for example during a release), use the following command (you must have the dev dependencies installed):
112 |
113 | ```
114 | towncrier build --version=
115 | ```
116 |
117 | You can use the `--draft` option to see what would be generated without actually writing to the changelog (and without removing the fragments).
--------------------------------------------------------------------------------
/changes/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/changes/.gitkeep
--------------------------------------------------------------------------------
/charts/substra-documentation-examples/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | name: substra-documentation-examples
3 | description: Remote example for substra documentation
4 |
5 | type: application
6 |
7 | version: 0.1.0
8 |
--------------------------------------------------------------------------------
/charts/substra-documentation-examples/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/* vim: set filetype=mustache: */}}
2 | {{/*
3 | Expand the name of the chart.
4 | */}}
5 | {{- define "substra-documentation-examples.name" -}}
6 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
7 | {{- end -}}
8 |
9 | {{/*
10 | Create a default fully qualified app name.
11 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
12 | If release name contains chart name it will be used as a full name.
13 | */}}
14 | {{- define "substra-documentation-examples.fullname" -}}
15 | {{- if .Values.fullnameOverride -}}
16 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
17 | {{- else -}}
18 | {{- $name := default .Chart.Name .Values.nameOverride -}}
19 | {{- if contains $name .Release.Name -}}
20 | {{- .Release.Name | trunc 63 | trimSuffix "-" -}}
21 | {{- else -}}
22 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
23 | {{- end -}}
24 | {{- end -}}
25 | {{- end -}}
26 |
27 | {{/*
28 | Create chart name and version as used by the chart label.
29 | */}}
30 | {{- define "substra-documentation-examples.chart" -}}
31 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
32 | {{- end -}}
33 |
34 | {{/*
35 | Common labels
36 | */}}
37 | {{- define "substra-documentation-examples.labels" -}}
38 | app.kubernetes.io/name: {{ include "substra-documentation-examples.name" . }}
39 | helm.sh/chart: {{ include "substra-documentation-examples.chart" . }}
40 | app.kubernetes.io/instance: {{ .Release.Name }}
41 | {{- if .Chart.AppVersion }}
42 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
43 | {{- end }}
44 | app.kubernetes.io/managed-by: {{ .Release.Service }}
45 | {{- end -}}
46 |
--------------------------------------------------------------------------------
/charts/substra-documentation-examples/templates/deployment.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: Deployment
3 | metadata:
4 | name: {{ include "substra-documentation-examples.fullname" . }}
5 | labels:
6 | {{ include "substra-documentation-examples.labels" . | indent 4 }}
7 | spec:
8 | replicas: 1
9 | selector:
10 | matchLabels:
11 | app.kubernetes.io/name: {{ include "substra-documentation-examples.name" . }}
12 | app.kubernetes.io/instance: {{ .Release.Name }}
13 | template:
14 | metadata:
15 | labels:
16 | app.kubernetes.io/name: {{ include "substra-documentation-examples.name" . }}
17 | app.kubernetes.io/instance: {{ .Release.Name }}
18 | spec:
19 | {{- with .Values.imagePullSecrets }}
20 | imagePullSecrets:
21 | {{- toYaml . | nindent 8 }}
22 | {{- end }}
23 | initContainers:
24 | - name: wait-backend-1
25 | image: jwilder/dockerize
26 | command: ['dockerize',
27 | '-wait', 'http://backend-org-1-substra-backend-server.org-1.svc.cluster.local:8000/readiness',
28 | '-wait', 'http://backend-org-2-substra-backend-server.org-2.svc.cluster.local:8000/readiness',
29 | '-timeout', '1200s']
30 | containers:
31 | - name: {{ .Chart.Name }}
32 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
33 | imagePullPolicy: {{ .Values.image.pullPolicy }}
34 | command: ["sleep", "infinity"]
35 | resources:
36 | {{- toYaml .Values.resources | nindent 12 }}
37 | env:
38 | - name: DOCKER_HOST
39 | value: tcp://localhost:2376
40 | - name: DOCKER_TLS_VERIFY
41 | value: "1"
42 | - name: DOCKER_CERT_PATH
43 | value: /root/.docker/client
44 | - name: SUBSTRA_FORCE_EDITABLE_MODE
45 | value: "True"
46 | volumeMounts:
47 | - name: dind-certs
48 | mountPath: /root/.docker
49 | - name: tmp
50 | mountPath: /tmp
51 | - name: local-worker
52 | mountPath: /usr/src/app/local-worker/
53 | - name: dind
54 | image: docker:19.03.12-dind
55 | securityContext:
56 | privileged: true
57 | volumeMounts:
58 | - name: dind-storage
59 | mountPath: /var/lib/docker
60 | - name: dind-certs
61 | mountPath: /root/.docker
62 | - name: tmp
63 | mountPath: /tmp
64 | - name: local-worker
65 | mountPath: /usr/src/app/local-worker/
66 | env:
67 | - name: DOCKER_TLS_CERTDIR
68 | value: /root/.docker
69 | {{- with .Values.nodeSelector }}
70 | nodeSelector:
71 | {{- toYaml . | nindent 8 }}
72 | {{- end }}
73 | volumes:
74 | - name: dind-storage
75 | emptyDir: {}
76 | - name: dind-certs
77 | emptyDir: {}
78 | - name: tmp
79 | emptyDir: {}
80 | - name: local-worker
81 | emptyDir: {}
82 | {{- with .Values.affinity }}
83 | affinity:
84 | {{- toYaml . | nindent 8 }}
85 | {{- end }}
86 | {{- with .Values.tolerations }}
87 | tolerations:
88 | {{- toYaml . | nindent 8 }}
89 | {{- end }}
90 |
--------------------------------------------------------------------------------
/charts/substra-documentation-examples/values.yaml:
--------------------------------------------------------------------------------
1 | image:
2 | repository: substrafoundation/substra-documentation-examples
3 | tag: stable
4 | pullPolicy: IfNotPresent
5 |
6 | imagePullSecrets: []
7 | nameOverride: ""
8 | fullnameOverride: ""
9 |
10 | resources: {}
11 | # We usually recommend not to specify default resources and to leave this as a conscious
12 | # choice for the user. This also increases chances charts run on environments with little
13 | # resources, such as Minikube. If you do want to specify resources, uncomment the following
14 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
15 | # limits:
16 | # cpu: 100m
17 | # memory: 128Mi
18 | # requests:
19 | # cpu: 100m
20 | # memory: 128Mi
21 |
22 | nodeSelector: {}
23 |
24 | tolerations: []
25 |
26 | affinity: {}
27 |
--------------------------------------------------------------------------------
/docker/substra-documentation-examples/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.11
2 |
3 | WORKDIR /usr/src/app
4 |
5 | ### Dirty build to install docker client ###
6 | # We need to install a docker client because substra debug mode relies on Docker (DinD in a side container)
7 | # and we need a docker client to authenticate this docker against our container registry to download substra-tools.
8 | RUN apt update && apt install --yes apt-transport-https ca-certificates curl gnupg lsb-release
9 | RUN curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
10 | RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null
11 | RUN apt update && apt install --yes docker-ce-cli
12 |
13 | COPY substra-tools/ substra-tools/
14 | COPY substra/ substra/
15 | COPY substrafl/ substrafl/
16 |
17 | RUN cd substrafl && python -m pip install --no-cache-dir -e .
18 |
19 | RUN cd substra && python -m pip install --no-cache-dir -e .
20 |
21 | RUN cd substra-tools && python -m pip install --no-cache-dir -e .
22 |
23 | COPY substra-documentation/Makefile substra-documentation/
24 | COPY substra-documentation/examples_requirements.txt substra-documentation/
25 | COPY substra-documentation/docs/source/examples substra-documentation/docs/source/examples/
26 |
27 | RUN cd substra-documentation && make install-examples-dependencies
28 |
29 | WORKDIR /usr/src/app/substra-documentation
30 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?= -W --keep-going -n
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 | SUBSTRADOCDIR = source/documentation/references
11 | SUBSTRAFLDOCDIR = source/substrafl_doc/api
12 |
13 | # Put it first so that "make" without argument is like "make help".
14 | help:
15 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
16 |
17 | .PHONY: help Makefile clean livehtml livetheme
18 |
19 | # Catch-all target: route all unknown targets to Sphinx using the new
20 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
21 | %: Makefile
22 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
23 |
24 | livehtml:
25 | sphinx-autobuild -b html $(SOURCEDIR) $(BUILDDIR)/html
26 |
27 | livetheme:
28 | sphinx-autobuild -b html -a $(SOURCEDIR) $(BUILDDIR)/html --watch source/_static/
29 |
30 | clean:
31 | rm -rf $(BUILDDIR)
32 | rm -rf $(SUBSTRADOCDIR)
33 | rm -rf $(SUBSTRAFLDOCDIR)
34 | # Delete the local worker folders in substra-documentation
35 | find .. -type d -name local-worker -prune -exec rm -rf {} \;
36 | # Delete the tmp folders in substra-documentation
37 | find .. -type d -name tmp -prune -exec rm -rf {} \;
38 |
--------------------------------------------------------------------------------
/docs/doc-requirements.txt:
--------------------------------------------------------------------------------
1 | recommonmark==0.6.0
2 | sphinx==7.2.6
3 | sphinx-markdown-tables==0.0.17
4 | sphinx-rtd-theme==2.0.0
5 | sphinx-autobuild==2024.2.4
6 | sphinx_click==5.1.0
7 | click==8.1
8 | texttable==1.7.0
9 | myst-parser==2.0.0
10 | docutils==0.20.1
11 | sphinx-fontawesome==0.0.6
12 | sphinx-copybutton==0.5.2
13 | pyyaml==6.0
14 | nbsphinx==0.9.3
15 | pandoc==2.3
16 | git-python==1.0.3
17 | nbconvert<7.14
--------------------------------------------------------------------------------
/docs/environment.yml:
--------------------------------------------------------------------------------
1 | name: rtd
2 | channels:
3 | - defaults
4 | - conda-forge
5 | dependencies:
6 | - python=3.10
7 | - pandoc=3.1
8 | - pip
9 | - pip:
10 | - -r ../requirements.txt
11 |
--------------------------------------------------------------------------------
/docs/source/_ext/compatibilitytable.py:
--------------------------------------------------------------------------------
1 | import os.path
2 |
3 | import yaml
4 | from docutils import nodes
5 | from docutils.parsers.rst import Directive
6 |
7 |
8 | def has_helm_chart(table: dict, component_name: str) -> bool:
9 | for release in table["releases"]:
10 | if component_name in release["components"]:
11 | if "helm" in release["components"][component_name]:
12 | return True
13 | return False
14 |
15 |
16 | class CompatibilityTable(Directive):
17 | required_arguments = 1
18 |
19 | def run(self):
20 | # "documentation":
21 | # https://docutils.sourceforge.io/docs/ref/doctree.html#table
22 | # https://github.com/docutils/docutils/blob/173189b4c1c095a43c9388f4edd9bf1ff5d5b49d/docutils/docutils/parsers/rst/states.py#L1793
23 | # https://github.com/docutils/docutils/blob/173189b4c1c095a43c9388f4edd9bf1ff5d5b49d/docutils/docutils/nodes.py#L439
24 |
25 | # documentation says nodes can be constructed by passing their children to the constructor
26 | # for instance nodes.entry(nodes.Text("lol")) should work
27 | # but it doesn't
28 | # this leads to needing to first create the node and then attach children to it
29 |
30 | source_file, _ = self.state_machine.get_source_and_line()
31 | with open(os.path.join(os.path.dirname(source_file), self.arguments[0])) as f:
32 | releases = yaml.safe_load(f)
33 |
34 | table = nodes.table()
35 | tgroup = nodes.tgroup()
36 | for _ in range((len(releases["components"]) + 1) * 2):
37 | colspec = nodes.colspec(colwidth=1)
38 | tgroup.append(colspec)
39 | table += tgroup
40 |
41 | thead = nodes.thead()
42 | tgroup += thead
43 | component_row = nodes.row()
44 | helm_row = nodes.row()
45 |
46 | for component_name in ["release"] + releases["components"]:
47 | if not has_helm_chart(releases, component_name):
48 | name_entry = nodes.entry(morerows=1, morecols=1)
49 | else:
50 | name_entry = nodes.entry(morecols=1)
51 | helm_row += [nodes.entry(), nodes.entry()]
52 | helm_row[-2] += nodes.paragraph(text="app")
53 | helm_row[-1] += nodes.emphasis(text="helm")
54 |
55 | name_entry += nodes.paragraph(text=component_name)
56 | component_row += name_entry
57 |
58 | thead.append(component_row)
59 | thead.append(helm_row)
60 |
61 | tbody = nodes.tbody()
62 | for release in releases["releases"]:
63 | row = nodes.row()
64 | row += nodes.entry(morecols=1)
65 | row[0] += nodes.strong(text=release["version"])
66 | for component_name in releases["components"]:
67 | component = release["components"][component_name]
68 | app_para = nodes.paragraph()
69 | app_para += nodes.reference(
70 | text=component["version"], refuri=component["link"], internal=False
71 | )
72 | if "helm" in component:
73 | row += nodes.entry()
74 | row[-1] += app_para
75 | para = nodes.emphasis()
76 | para += nodes.reference(
77 | text=component["helm"]["version"],
78 | refuri=component["helm"]["link"],
79 | internal=False,
80 | )
81 | row += nodes.entry()
82 | row[-1] += para
83 | else:
84 | row += nodes.entry(morecols=1)
85 | row[-1] += app_para
86 |
87 | tbody += row
88 | tgroup += tbody
89 |
90 | return [table]
91 |
92 |
93 | def setup(app):
94 | app.add_directive("compatibilitytable", CompatibilityTable)
95 |
96 | return {
97 | "version": "0.1",
98 | "parallel_read_safe": True,
99 | "parallel_write_safe": True,
100 | }
101 |
--------------------------------------------------------------------------------
/docs/source/_static/Gattica-Bold100.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/Gattica-Bold100.otf
--------------------------------------------------------------------------------
/docs/source/_static/Gattica-Light100.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/Gattica-Light100.otf
--------------------------------------------------------------------------------
/docs/source/_static/Gattica-Medium100.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/Gattica-Medium100.otf
--------------------------------------------------------------------------------
/docs/source/_static/Gattica-Regular100.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/Gattica-Regular100.otf
--------------------------------------------------------------------------------
/docs/source/_static/example_thumbnail/cyclic-mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/example_thumbnail/cyclic-mnist.png
--------------------------------------------------------------------------------
/docs/source/_static/example_thumbnail/diabetes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/example_thumbnail/diabetes.png
--------------------------------------------------------------------------------
/docs/source/_static/example_thumbnail/iris.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/example_thumbnail/iris.jpg
--------------------------------------------------------------------------------
/docs/source/_static/example_thumbnail/mnist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/example_thumbnail/mnist.png
--------------------------------------------------------------------------------
/docs/source/_static/example_thumbnail/titanic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/example_thumbnail/titanic.jpg
--------------------------------------------------------------------------------
/docs/source/_static/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/favicon.png
--------------------------------------------------------------------------------
/docs/source/_static/images/substra-0.26-task-duration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/images/substra-0.26-task-duration.png
--------------------------------------------------------------------------------
/docs/source/_static/images/workflow_0.27.0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/images/workflow_0.27.0.png
--------------------------------------------------------------------------------
/docs/source/_static/my-nbsphinx-gallery.css:
--------------------------------------------------------------------------------
1 | .nbsphinx-gallery {
2 | display: grid;
3 | grid-template-columns: repeat(auto-fill, 180px);
4 | gap: 5px;
5 | margin-top: 1em;
6 | margin-bottom: 1em;
7 | }
8 |
9 | .nbsphinx-gallery>a {
10 | background-image: none;
11 | border: solid #fff 1px;
12 | background-color: #fff;
13 | box-shadow: 0 0 15px rgba(142, 176, 202, 0.2);
14 | border-radius: 5px;
15 | min-height: 230px;
16 | min-width: 180px;
17 | padding: 10px 24px;
18 | text-decoration: none;
19 | color: var(--color-primary-500);
20 | transition: transform 0.2s ease;
21 | }
22 |
23 | .nbsphinx-gallery>a:hover {
24 | border: solid var(--color-primary-500) 1px;
25 | box-shadow: 0 0 15px rgba(142, 176, 202, 0.5);
26 | transform: scale(1.05);
27 | }
28 |
29 | .nbsphinx-gallery img {
30 | max-width: 100%;
31 | max-height: 100%;
32 | }
33 |
34 | .nbsphinx-gallery>a>div:first-child {
35 | display: flex;
36 | align-items: start;
37 | justify-content: center;
38 | height: 120px;
39 | margin-bottom: 5px;
40 | }
--------------------------------------------------------------------------------
/docs/source/_static/schemes/channel_schema.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/_static/schemes/channel_schema.png
--------------------------------------------------------------------------------
/docs/source/additional/community.rst:
--------------------------------------------------------------------------------
1 | Community
2 | =========
3 |
4 | .. _community:
5 |
6 | Github
7 | ^^^^^^
8 | Anyone can open an `issue `_ on the Substra GitHub repository. Please submit bug reports so we can continually improve the software.
9 |
10 | Slack
11 | ^^^^^
12 | A real-time chat room to ask questions, give feedback and chat about anything related to Substra. Please `join us here `_.
13 |
14 | Newsletter
15 | ^^^^^^^^^^
16 | `Subscribe here `_ to join our newsletter.
--------------------------------------------------------------------------------
/docs/source/additional/faq.rst:
--------------------------------------------------------------------------------
1 | FAQ
2 | ===
3 |
4 | .. _faq:
5 |
6 | What is Substra?
7 | ^^^^^^^^^^^^^^^^
8 | Substra is an open source federated learning (FL) software that enables machine learning on distributed datasets. It provides a flexible Python interface and a web app to perform federated machine learning at scale.
9 |
10 | Substra is the most proven software for federated learning in healthcare and has already been deployed in real production environments by hospitals and biotech companies (see the `MELLODDY `_ project for instance). Substra can also be used on a single machine on a virtually splitted dataset to perform FL simulations and debug code before launching experiments on a real network.
11 |
12 | Who owns Substra?
13 | ^^^^^^^^^^^^^^^^^
14 | Substra is open source software operated under an Apache 2.0 License. Substra is hosted by the `Linux Foundation for AI and Data `_. Substra was initially developed by engineers at `Owkin `_, a BioTech company that continues to play a big role in its development.
15 |
16 | What kinds of data does Substra support?
17 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
18 | Substra can run tasks on any type of data: tabular data, images, videos, audio, time series, etc.
19 |
20 | What kind of machine learning model can I use with Substra?
21 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
22 | Substra is fully compatible with machine learning models written in any Python library (PyTorch, Tensorflow, Sklearn, etc). However, a specific interface has been developed to use PyTorch in Substra, which makes writing PyTorch code simpler than using other frameworks.
23 |
24 | Is Substra limited to medical and biotech applications?
25 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
26 | Although Substra has been designed to work especially well in healthcare settings, it can work on any kind of data with any Python library to perform computation or analysis using distributed data.
27 |
28 | How can I be sure Substra is secure enough to be used with my private data?
29 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30 | Substra is regularly audited with rigorous security standards (both code source audit and penetration tests). On top of that, by design, private data is never shared between different organizations. The software also provides full traceability on which functions were used on each dataset.
31 |
32 | What is the roadmap for Substra?
33 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
34 |
35 | The roadmap for Substra is primarily decided by product managers and engineers at Owkin. These decisions are based on needs from active and potential FL projects and users that may or may not involve Owkin. You will find below a list of needs that we plan to address in the near future. Please know however that this is not a very strict roadmap and the direction of the product can alter at any moment.
36 |
37 | * **Better support for Federated Analytics:** The Substra library does support Federated Analytics currently but one of our goals is make this more user friendly and accessible.
38 | * **Introduce more FL Strategies:** Substra aims to be a complete FL framework and one way we hope to facilitate FL projects is by adding more strategies. We hope that by implementing these strategies within the library, we can encourage more experimentation by data scientists. We would also be interested in allowing users to define their own FL strategies.
39 | * **Usability Improvements:** We intend to make Substra more easy to deploy and use. This will come in improving the deployment documentation, simplifying the data concepts and merging Substra and Substrafl into one unified library.
40 |
41 | These are some of the main features to be developed in Substra for the coming months. We want to actively make an effort to help our users, so please do not hesitate to reach out if you have a feature request or an idea. Feedback is always welcome!
--------------------------------------------------------------------------------
/docs/source/additional/glossary.rst:
--------------------------------------------------------------------------------
1 | .. _glossary-label:
2 |
3 | Glossary
4 | ========
5 |
6 |
7 | .. glossary::
8 |
9 | Organization
10 | An organization represents an independant partner in the network. It has its own computing and storage resources.
11 |
12 | Channel
13 | A channel is a group of Substra :term:`Organizations` which operate on a common set of assets. Several channels can be built on top of a Substra network.
14 |
15 |
--------------------------------------------------------------------------------
/docs/source/banner.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/banner.jpg
--------------------------------------------------------------------------------
/docs/source/contributing/code-of-conduct.rst:
--------------------------------------------------------------------------------
1 | ***************
2 | Code of Conduct
3 | ***************
4 |
5 | Our Pledge
6 | ==========
7 |
8 | In the interest of fostering an open and welcoming environment, we as
9 | contributors and maintainers pledge to make participation in our project and
10 | our community a harassment-free experience for everyone, regardless of age, body
11 | size, disability, ethnicity, gender identity and expression, level of experience,
12 | nationality, personal appearance, race, religion, or sexual identity and
13 | orientation.
14 |
15 | Our Standards
16 | =============
17 |
18 | Examples of behavior that contributes to creating a positive environment
19 | include:
20 |
21 | * Using welcoming and inclusive language
22 | * Being respectful of differing viewpoints and experiences
23 | * Gracefully accepting constructive criticism
24 | * Focusing on what is best for the community
25 | * Showing empathy towards other community members
26 |
27 | Examples of unacceptable behavior by participants include:
28 |
29 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
30 | * Trolling, insulting/derogatory comments, and personal or political attacks
31 | * Public or private harassment
32 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
33 | * Other conduct which could reasonably be considered inappropriate in a professional setting
34 |
35 | Our Responsibilities
36 | ====================
37 |
38 | Project maintainers are responsible for clarifying the standards of acceptable
39 | behavior and are expected to take appropriate and fair corrective action in
40 | response to any instances of unacceptable behavior.
41 |
42 | Project maintainers have the right and responsibility to remove, edit, or
43 | reject comments, commits, code, wiki edits, issues, and other contributions
44 | that are not aligned to this Code of Conduct, or to ban temporarily or
45 | permanently any contributor for other behaviors that they deem inappropriate,
46 | threatening, offensive, or harmful.
47 |
48 | Scope
49 | =====
50 |
51 | This Code of Conduct applies both within project spaces and in public spaces
52 | when an individual is representing the project or its community. Examples of
53 | representing a project or community include using an official project e-mail
54 | address, posting via an official social media account, or acting as an appointed
55 | representative at an online or offline event. Representation of a project may be
56 | further defined and clarified by project maintainers.
57 |
58 | Enforcement
59 | ===========
60 |
61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
62 | reported by contacting community@substra.org, which is a shared team inbox. If the incident involves someone who receives that shared inbox, you can contact an individual maintainer. All
63 | complaints will be reviewed and investigated and will result in a response that
64 | is deemed necessary and appropriate to the circumstances. The project team is
65 | obligated to maintain confidentiality with regard to the reporter of an incident.
66 | Further details of specific enforcement policies may be posted separately.
67 |
68 | Project maintainers who do not follow or enforce the Code of Conduct in good
69 | faith may face temporary or permanent repercussions as determined by other
70 | members of the project's leadership.
71 |
72 | Attribution
73 | ===========
74 |
75 | This Code of Conduct is adapted from the `Contributor Covenant `_, version 1.4.
76 |
--------------------------------------------------------------------------------
/docs/source/contributing/contributing-guide.rst:
--------------------------------------------------------------------------------
1 | ******************
2 | Contributing Guide
3 | ******************
4 |
5 | Thanks for checking out the contributing guide. Substra warmly welcomes contributions!
6 |
7 | Ground rules & expectations
8 | ===========================
9 |
10 | Be kind and thoughtful in your conversations around this project. We all come from different backgrounds and projects, which means we likely have different perspectives on how things should be done. Try to listen to others rather than convince them that your way is correct.
11 |
12 | Substra has a :doc:`Contributor Code of Conduct `. By participating in this project, you agree to abide by its terms.
13 |
14 | Who are contributors?
15 | ======================
16 |
17 | Contributors are any person that have contributed to the code. It does not matter whether it's a typo fix or 10k lines of code. Making a contribution however does not automatically entitle you to copyright over that code. For copyright the contribution must be significant enough meet the `threshold of originality `_, which basically means that your code is somewhat unique and non-generic. Fixing a typo does not give you access to copyright over that word or sentence.
18 |
19 | How to contribute
20 | =================
21 |
22 | You should usually open a pull request in the following situations:
23 |
24 | * Submit trivial fixes (for example, a typo, a broken link or an obvious error)
25 | * Start work on a contribution that was already asked for, or that you've already discussed, in an issue
26 |
27 | A pull request doesn't have to represent finished work. You can open a pull request early on, so others can watch or give feedback on your progress. Just open it as a "draft". You can always add more commits later.
28 |
29 | Here's how to submit a pull request:
30 |
31 | * `Fork the repository `_ and clone it locally. Connect your local to the original "upstream" repository by adding it as a remote. Pull in changes from "upstream" often so that you stay up to date so that when you submit your pull request, merge conflicts will be less likely. (See more detailed instructions `here `_).
32 | * `Create a branch `_ for your edits.
33 | * **Sign off** your commits.
34 | * **Test your changes.** Please ensure that your contribution passes all tests if you open a pull request. If there are test failures, you will need to address them before we can merge your contribution. When adding or changing functionality, please include new tests for them as part of your contribution.
35 | * **Contribute in the style of the project** to the best of your abilities. This may mean using indents, semi-colons or comments differently than you would in your own repository, but makes it easier for us to merge, others to understand and maintain in the future. Most of project repositories have a :code:`.pre-commit-config.yaml` file. Run :code:`pre-commit install` to automatically match some of the style rules of the project when committing your changes.
36 | * **Add yourself to the contributors**. If you made a significant contribution, don't forget to add yourself to the CONTRIBUTORS.md file of the repo by putting your name and a small description of your work.
37 |
38 | Vulnerabilities
39 | ===============
40 |
41 | Please reach out to support@substra.org immediately if you believe you have found a vulnerability.
42 |
43 | Due to the privacy preserving nature of Substra, we take vulnerabilities very seriously. The core of Federated Learning is security and therefore we take various steps such as auditing and automated testing to ensure that our code base remains secure. All pull requests go through a thorough review.
44 |
45 | If a vulnerability is found, a triage process is begun within one working day to determine the severity of the vulnerability and the next steps to consider.
46 |
47 | Sign Off
48 | ========
49 |
50 | For compliance purposes, `Developer Certificate of Origin (DCO) on Pull Requests `_ is activated on the repo.
51 |
52 | In practice, you must add a ``Signed-off-by:`` message at the end of every commit:
53 |
54 | .. code-block:: bash
55 |
56 | This is my commit message
57 | Signed-off-by: Random J Developer
58 |
59 | Add ``-s`` flag to add it automatically: ``git commit -s -m 'This is my commit message'``.
60 |
61 | :doc:`Community `
62 | ========================================
63 |
64 | Discussions about Substra take place on the repositories' Issues and Pull Requests sections and on Slack. Anybody is welcome to join these conversations.
65 |
66 | Wherever possible, do not take these conversations to private channels, including contacting the maintainers directly. Keeping communication public means everybody can benefit and learn from the conversation.
67 |
68 | Attribution
69 | ===========
70 |
71 | This guide follows guidelines from `opensource.guide `_
--------------------------------------------------------------------------------
/docs/source/documentation/api_reference.rst:
--------------------------------------------------------------------------------
1 | Substra API reference
2 | =====================
3 |
4 | `substra` version: |substra_version|
5 |
6 | SDK Reference
7 | -------------
8 | .. toctree::
9 | :maxdepth: 2
10 |
11 | references/sdk.md
12 |
13 |
14 | Models
15 | ^^^^^^
16 |
17 | Models describe the objects returned by the platform.
18 |
19 | .. toctree::
20 | :maxdepth: 2
21 |
22 | references/sdk_models.md
23 |
24 | Schemas
25 | ^^^^^^^
26 | Specs are the specifications to register the assets on the platform.
27 |
28 | .. toctree::
29 | :maxdepth: 2
30 |
31 | references/sdk_schemas.md
32 |
--------------------------------------------------------------------------------
/docs/source/documentation/backend/index.rst:
--------------------------------------------------------------------------------
1 | *******
2 | Backend
3 | *******
4 |
5 | The backend exposes the REST API for an :term:`Organization` and executes compute tasks (in a subsystem we call *compute engine*).
6 |
7 | .. _backend_kubernetes_pods:
8 |
9 | Kubernetes pods
10 | ===============
11 |
12 | docker-registry
13 | We use this service to store images built from user-provided :ref:`Functions`.
14 | Make sure to assign a large enough volume to avoid rebuilding images over and over due to eviction.
15 | registry-prepopulate
16 | This Pod is managed by a Job running on chart installation or update.
17 | It uploads container Images to the docker-registry to make them available for future use in :ref:`Functions`.
18 | minio
19 | `MinIO`_ is an object storage service and stores all assets registered on the :term:`Organization`.
20 | You should back up the data of this Pod.
21 | postgresql
22 | This is the database supporting the backend.
23 | You should back up the data of this Pod.
24 | redis
25 | This is an organization-specific message broker to support `Celery`_ tasks.
26 | backend-events
27 | This component will consume events from the orchestrator.
28 | It should be able to access the orchestrator over gRPC.
29 | It handles events and triggers appropriate responses such as starting compute tasks.
30 | On startup, it will also register the Organization on the orchestrator.
31 | migrations
32 | This Pod is managed by a Job running on chart installation or update to deal with database schema changes.
33 | This Pod also performs user creation.
34 | scheduler, scheduler-worker
35 | Those are `Celery`_ components, handling scheduled tasks.
36 | server
37 | This is a Django application exposing the REST API through which users interact with Substra.
38 | worker
39 | This is the service processing `Celery`_ tasks.
40 | It handles :ref:`Function` images builds and running compute tasks.
41 | This is where you will find logs related to task processing.
42 |
43 | .. _Celery: https://docs.celeryq.dev/en/latest/index.html
44 | .. _MinIO: https://min.io/
45 |
46 | .. _backend_communication:
47 |
48 | Communication
49 | =============
50 |
51 | The backend should be able to reach its orchestrator.
52 | If :term:`Organizations` share :ref:`Models`, involved backends must be able to communicate with each other.
53 |
54 | Helm chart
55 | ==========
56 |
57 | We use Helm charts as a way to package our application deployments.
58 | If you want to deploy the backend you can use the `Helm chart substra-backend`_.
59 |
60 | .. _Helm chart substra-backend: https://artifacthub.io/packages/helm/substra/substra-backend
61 |
--------------------------------------------------------------------------------
/docs/source/documentation/components.rst:
--------------------------------------------------------------------------------
1 | Components
2 | ==========
3 |
4 | We distinguish two major components, the orchestrator and the backend.
5 | Although they are independent, their versions must match a tested release as referenced in the :ref:`compatibility table `.
6 |
7 | .. image:: ../_static/schemes/stack-technical-scheme.svg
8 | :width: 800
9 | :align: center
10 | :alt: Substra Components Scheme
11 |
12 | .. toctree::
13 | :glob:
14 | :titlesonly:
15 | :maxdepth: 1
16 | :caption: Components documentation
17 |
18 | backend/*
19 | frontend/*
20 | orchestrator/*
21 | Substra python library
22 | SubstraFL python library <../substrafl_doc/substrafl_overview>
23 |
24 |
25 |
--------------------------------------------------------------------------------
/docs/source/documentation/frontend/index.rst:
--------------------------------------------------------------------------------
1 | ********
2 | Frontend
3 | ********
4 |
5 | The frontend (also named web application for the end-users) allows you to monitor your assets (compute plans, tasks, datasets, functions) easily through a user interface. It is mainly a read-only interface: you will need to use the Python library to register data or to launch computation. However there are a few actions that are doable with the frontend, for instance: cancelling compute plans, managing users and creating API tokens.
6 |
7 | .. _frontend_kubernetes_pods:
8 |
9 | Kubernetes pods
10 | ===============
11 |
12 | frontend
13 | A single pod managing the frontend.
14 |
15 | .. _frontend_communication:
16 |
17 | Communication
18 | =============
19 |
20 | The frontend should be able to reach its backend through the REST API.
21 | The access to the API is secured through the use of JSON Web Tokens (JWT), which are stored through cookies. Each backend server pod has its own token, so when working with different backends or restarting pods, it might be necessary to delete related cookies (namely signature, refresh and header.payload) so a new JWT can be created. Otherwise this could block you from logging into the frontend.
22 |
23 | Helm chart
24 | ==========
25 |
26 | We use Helm charts as a way to package our application deployments.
27 | If you want to deploy the frontend you can use the `Helm chart substra-frontend`_.
28 |
29 | .. _Helm chart substra-frontend: https://artifacthub.io/packages/helm/substra/substra-frontend
--------------------------------------------------------------------------------
/docs/source/documentation/images/copy_token.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/documentation/images/copy_token.png
--------------------------------------------------------------------------------
/docs/source/documentation/images/find_token_management_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/documentation/images/find_token_management_page.png
--------------------------------------------------------------------------------
/docs/source/documentation/images/generate_new_token.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/documentation/images/generate_new_token.png
--------------------------------------------------------------------------------
/docs/source/documentation/images/token_management_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/documentation/images/token_management_page.png
--------------------------------------------------------------------------------
/docs/source/documentation/orchestrator/index.rst:
--------------------------------------------------------------------------------
1 | ************
2 | Orchestrator
3 | ************
4 |
5 | Performing a Federated Learning experiment implies a lot of different compute tasks: local training, aggregation, testing on different organizations, etc. The role of the orchestrator is to distribute ML tasks among organizations, while ensuring complete traceability of operations.
6 |
7 | The orchestrator registers the status of tasks; when a task is done (status ``Done``), it evaluates if some remaining tasks (status ``Waiting``) are now unblocked, and if it's the case, the status of those tasks is changed to ``To do``. The new status is sent to all the backends, who store the new tasks ``To do`` in the task queue (Celery). Then, the task queue will assign the task to one of the workers (if multiple) and handle retries if needed.
8 |
9 | In case of failure, it will store failure reports and change the status of the faulty task to ``Failed``.
10 | In case of manual cancellation, it will change the status of the tasks to ``Cancelled`` on different backends.
11 |
12 | Orchestration
13 | =============
14 |
15 | Orchestration is hosted by a central Postgres database:
16 |
17 | .. image:: /_static/schemes/centralized-orc.svg
18 |
19 | Orchestration stores only non-sensitive metadata of the Substra assets, making it possible to verify the integrity of the assets and ensures that the permissions on the assets are respected.
20 |
21 | It therefore requires trusting whomever is operating the orchestrator DB not to tamper with it.
22 |
23 | .. note::
24 |
25 | Orchestration was available in a **distributed** mode until `v0.34.0 `__
26 |
27 | .. _orc_kubernetes_pods:
28 |
29 | Kubernetes pods
30 | ===============
31 |
32 | postgresql
33 | This is the database supporting the ledger.
34 | You should back up the data of this Pod.
35 | orchestrator-server
36 | This is the actual orchestration service, accessed over gRPC.
37 | migrations
38 | This Pod is managed by a Job running on Helm chart installation or update.
39 | It deals with database schema changes.
40 |
41 | .. _orc_communication:
42 |
43 | Communication
44 | =============
45 |
46 | The orchestrator is a central component.
47 | All backends from each :term:`Organization` must have access to the orchestrator over gRPC for command/queries and event subsription.
48 |
49 | The orchestrator authenticates clients with their TLS certificates.
50 | As a consequence, the Kubernetes Ingress must do SSL passthrough.
51 |
52 | Storage
53 | =======
54 |
55 | The orchestrator stores its data in a PostgreSQL database.
56 | Migrations are executed using a Kubernetes Job on installation and update (this relies on a Helm hook).
57 |
58 | Helm chart
59 | ==========
60 |
61 | We use Helm charts as a way to package our application deployments.
62 | If you want to deploy the orchestrator you can use the `Helm chart orchestrator`_.
63 |
64 | .. _Helm chart orchestrator: https://artifacthub.io/packages/helm/substra/orchestrator
65 |
--------------------------------------------------------------------------------
/docs/source/documentation/substra_tools.rst:
--------------------------------------------------------------------------------
1 | Substra Tools
2 | =============
3 |
4 | In Substra, users create tasks that are registered to the platform, then executed in a containerised environment.
5 |
6 | A task needs a valid Dockerfile to create a container and expose a command line interface. The execution of the command creates the expected output files.
7 |
8 | For example, a function defines a list of inputs and outputs. At the task execution, the inputs files are given to the container, the paths to the files are given as arguments to the command line, and the task is responsible for creating the output files.
9 |
10 | To allow the reproducibility of a task, the task dependencies are defined in the Dockerfile. The code can be written in almost any language, as long as you have the right Docker base image: `R `_, `Python `_, `C `_ and a lot more.
11 |
12 | `Substra-tools `_ is a wrapper for Python code to define valid openers and functions.
13 |
14 |
15 | The substra-tools library, `available on PyPi `_, provides wrappers to write Python code (handles the command line interface creation, the data loading using the opener...).
16 |
17 | Thanks to this library, the user can focus on the task function content.
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/diabetes_example/assets/dataset/diabetes_dataset.py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import load_diabetes
2 | import pandas as pd
3 | import pathlib
4 |
5 |
6 | def setup_diabetes(data_path: pathlib.Path):
7 | raw_data = load_diabetes(scaled=False)
8 |
9 | description_file = data_path / "description.md"
10 | description_file.touch()
11 | description_file.write_text(raw_data.DESCR)
12 |
13 | dataset = pd.DataFrame(data=raw_data.data, columns=raw_data.feature_names)
14 | # map the "sex" column to categorical data
15 | dataset["sex"] = dataset["sex"].replace({1: "M", 2: "F"}).astype("category")
16 |
17 | # Create folders for both organisations
18 | (data_path / "org_1").mkdir(exist_ok=True)
19 | (data_path / "org_2").mkdir(exist_ok=True)
20 |
21 | # Split the dataset in two uneven parts
22 | split_index = int(len(dataset) * 2 / 3)
23 | dataset.iloc[:split_index].to_csv(data_path / "org_1" / "data.csv", index=False)
24 | dataset.iloc[split_index:].to_csv(data_path / "org_2" / "data.csv", index=False)
25 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/diabetes_example/assets/dataset/diabetes_opener.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import numpy as np
3 | import pandas as pd
4 | import substratools as tools
5 |
6 |
7 | class DiabetesOpener(tools.Opener):
8 | def fake_data(self, n_samples=None):
9 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100
10 |
11 | features = ["age", "sex", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"]
12 | return pd.DataFrame(
13 | data=np.random.random((N_SAMPLES, len(features))), columns=features
14 | )
15 |
16 | def get_data(self, folders):
17 | return pd.read_csv(
18 | next(pathlib.Path(folders[0]).glob("*.csv")), dtype={"sex": "category"}
19 | )
20 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/diabetes_example/assets/functions/aggregation/Dockerfile:
--------------------------------------------------------------------------------
1 | # this base image works in both CPU and GPU enabled environments
2 | FROM python:3.12-slim
3 |
4 | # install dependencies
5 | RUN pip3 install pandas numpy substratools
6 |
7 | # add your algorithm script to docker image
8 | ADD federated_analytics_functions.py .
9 |
10 | # define how script is run
11 | ENTRYPOINT ["python3", "federated_analytics_functions.py", "--function-name", "aggregation"]
12 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/diabetes_example/assets/functions/description.md:
--------------------------------------------------------------------------------
1 | # Federated Analytics with Substra
2 |
3 | This folder contains the Python module and associated Dockerfile to perform Federated Analytics with the Substra library.
4 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/diabetes_example/assets/functions/federated_analytics_functions.py:
--------------------------------------------------------------------------------
1 | import json
2 | from collections import defaultdict
3 |
4 | import pickle
5 |
6 | import numpy as np
7 | import pandas as pd
8 | import substratools as tools
9 |
10 |
11 | # We are using helper decorators from the substratools library to avoid rewriting boilerplate code.
12 | # The function to be registered takes an `inputs` parameter, which will be matched to the list of
13 | # `FunctionInputSpec` provided in the `FunctionSpec` definition.
14 | # In a similar way, the parameter `outputs` will be matched to the `FunctionOutputSpec`.
15 | # The parameter `task_properties` contains if needed additional values that can be used by the function without being persisted.
16 | @tools.register
17 | def local_first_order_computation(inputs, outputs, task_properties):
18 | df = inputs["datasamples"]
19 | states = {
20 | "n_samples": len(df),
21 | "means": df.select_dtypes(include=np.number).sum().to_dict(),
22 | "counts": {
23 | name: series.value_counts().to_dict()
24 | for name, series in df.select_dtypes(include="category").items()
25 | },
26 | }
27 | save_states(states, outputs["local_analytics_first_moments"])
28 |
29 |
30 | @tools.register
31 | def local_second_order_computation(inputs, outputs, task_properties):
32 | df = inputs["datasamples"]
33 | shared_states = load_states(inputs["shared_states"])
34 | means = pd.Series(shared_states["means"])
35 | states = {
36 | "n_samples": len(df),
37 | "std": np.power(df.select_dtypes(include=np.number) - means, 2).sum(),
38 | }
39 | save_states(states, outputs["local_analytics_second_moments"])
40 |
41 |
42 | @tools.register
43 | def aggregation(inputs, outputs, task_properties):
44 | shared_states = [load_states(path) for path in inputs["local_analytics_list"]]
45 |
46 | total_len = 0
47 | for state in shared_states:
48 | total_len += state["n_samples"]
49 |
50 | aggregated_values = defaultdict(lambda: defaultdict(float))
51 | for state in shared_states:
52 | for analytics_name, col_dict in state.items():
53 | if analytics_name == "n_samples":
54 | # already aggregated in total_len
55 | continue
56 | for col_name, v in col_dict.items():
57 | if isinstance(v, dict):
58 | # this column is categorical and v is a dict over the different modalities
59 | if not aggregated_values[analytics_name][col_name]:
60 | aggregated_values[analytics_name][col_name] = defaultdict(float)
61 | for modality, vv in v.items():
62 | aggregated_values[analytics_name][col_name][modality] += vv / total_len
63 | else:
64 | # this is a numerical column and v is numerical
65 | aggregated_values[analytics_name][col_name] += v / total_len
66 |
67 | # transform default_dict to regular dict
68 | aggregated_values = json.loads(json.dumps(aggregated_values))
69 |
70 | save_states(aggregated_values, outputs["shared_states"])
71 |
72 |
73 | def load_states(path):
74 | with open(path, "rb") as f:
75 | return pickle.load(f)
76 |
77 |
78 | def save_states(states, path):
79 | with open(path, "wb") as f:
80 | pickle.dump(states, f)
81 |
82 |
83 | # The Dockerfile uses this entrypoint at run time to execute the function whose name is passed as parameters,
84 | # providing it with the proper arguments as defined at registration time by Substra Specs.
85 | if __name__ == "__main__":
86 | tools.execute()
87 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/diabetes_example/assets/functions/local_first_order_computation/Dockerfile:
--------------------------------------------------------------------------------
1 | # this base image works in both CPU and GPU enabled environments
2 | FROM python:3.12-slim
3 |
4 | # install dependencies
5 | RUN pip3 install pandas numpy substratools
6 |
7 | # add your algorithm script to docker image
8 | ADD federated_analytics_functions.py .
9 |
10 | # define how script is run
11 | ENTRYPOINT ["python3", "federated_analytics_functions.py", "--function-name", "local_first_order_computation"]
12 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/diabetes_example/assets/functions/local_second_order_computation/Dockerfile:
--------------------------------------------------------------------------------
1 | # this base image works in both CPU and GPU enabled environments
2 | FROM python:3.12-slim
3 |
4 | # install dependencies
5 | RUN pip3 install pandas numpy substratools
6 |
7 | # add your algorithm script to docker image
8 | ADD federated_analytics_functions.py .
9 |
10 | # define how script is run
11 | ENTRYPOINT ["python3", "federated_analytics_functions.py", "--function-name", "local_second_order_computation"]
12 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/diabetes_example/assets/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.9.2
2 | scikit-learn==1.5.2
3 | pandas==2.2.2
4 | substra
5 | substratools
6 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/index.rst:
--------------------------------------------------------------------------------
1 | Substra examples
2 | ================
3 |
4 | The examples below are compatible with Substra |substra_version|.
5 |
6 |
7 | Examples to get started
8 | ^^^^^^^^^^^^^^^^^^^^^^^
9 |
10 | .. nbgallery::
11 | ../../../examples/substra_core/titanic_example/run_titanic.ipynb
12 |
13 | Examples to go further
14 | ^^^^^^^^^^^^^^^^^^^^^^
15 |
16 | .. nbgallery::
17 | ../../../examples/substra_core/diabetes_example/run_diabetes.ipynb
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/dataset/description.md:
--------------------------------------------------------------------------------
1 | # Titanic
2 |
3 | This dataset comes from Kaggle's ["Titanic: Machine Learning from Disaster" challenge](https://www.kaggle.com/c/titanic/data).
4 |
5 | ## Test and train data samples
6 |
7 | Since Kaggle doesn't provide the ground truth for its test set, all data samples attached to this dataset are extracted from Kaggle's train set.
8 |
9 | Out of the 891 records of the train set:
10 |
11 | - 20% were kept aside as the test data sample
12 | - the remaining 80% were split among 4 train data samples
13 |
14 | This way it is possible to demonstrate cross-validation strategies using these assets.
15 |
16 | These splits were generated using the following code:
17 |
18 | ```python
19 | import os
20 | import pandas as pd
21 | from sklearn.model_selection import KFold, train_test_split
22 |
23 | data = pd.read_csv('train.csv')
24 |
25 | # generate splits
26 | train_data, test_data_sample = train_test_split(data, test_size=0.2)
27 | kf = KFold(n_splits=4)
28 | splits = kf.split(train_data)
29 | train_data_samples = []
30 | for train_index, test_index in splits:
31 | train_data_samples.append(train_data.iloc[test_index])
32 |
33 | # save splits
34 | DATA_SAMPLES_ROOT = '../assets'
35 |
36 | filename = os.path.join(DATA_SAMPLES_ROOT, 'test_data_sample/test.csv')
37 | os.makedirs(os.path.dirname(filename))
38 | with open(filename, 'w') as f:
39 | test_data_sample.to_csv(f)
40 |
41 | for i, train_data_sample in enumerate(train_data_samples):
42 | filename = os.path.join(DATA_SAMPLES_ROOT, f'../assets/train_data_samples/train{i}/train{i}.csv')
43 | os.makedirs(os.path.dirname(filename))
44 | with open(filename, 'w') as f:
45 | train_data_sample.to_csv(f)
46 | ```
47 |
48 | ## Data samples structure
49 |
50 | All data samples have the same exact structure. They all contain a single CSV files with the following fields (description are extracted from Kaggle):
51 |
52 | | Field | Type | Description | Values |
53 | | ------------- | ------- | ------------------------------------------ | ------------------------------------------------------------------- |
54 | | `PassengerId` | integer | Type should be integers | `1`, `2`, `3`... |
55 | | `Survived` | bool | Survived or not | either `0` or `1` |
56 | | `Pclass` | integer | Class of Travel | either `1`, `2` or `3` |
57 | | `Name` | string | Name of Passenger | `Braund, Mr. Owen Harris` |
58 | | `Sex` | string | Gender | either `male` or `female` |
59 | | `Age` | integer | Age of Passengers | `24` |
60 | | `SibSp` | integer | Number of Sibling/Spouse aboard | `0` |
61 | | `Parch` | integer | Number of Parent/Child aboard | `0` |
62 | | `Ticket` | string | Ticket number | `A/5 21171` |
63 | | `Fare` | float | Price of the ticket | `71.2833` |
64 | | `Cabin` | string | Cabin number | `C85` |
65 | | `Embarked` | string | The port in which a passenger has embarked | either `C` for Cherbourg, `S` for Southampton or `Q` for Queenstown |
66 |
67 | ## Opener usage
68 |
69 | The opener exposes 4 methods:
70 |
71 | - `get_data` returns all data
72 | - `fake_data` returns fake data
73 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/dataset/titanic_opener.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import string
4 |
5 | import numpy as np
6 | import pandas as pd
7 | import substratools as tools
8 |
9 |
10 | class TitanicOpener(tools.Opener):
11 | def get_data(self, folders):
12 | # find csv files
13 | paths = [
14 | os.path.join(folder, f)
15 | for folder in folders
16 | for f in os.listdir(folder)
17 | if f.endswith(".csv")
18 | ]
19 |
20 | # load data
21 | data = pd.concat([pd.read_csv(path) for path in paths])
22 |
23 | return data
24 |
25 | def fake_data(self, n_samples=None):
26 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100
27 |
28 | data = {
29 | "PassengerId": list(range(N_SAMPLES)),
30 | "Survived": [random.choice([True, False]) for k in range(N_SAMPLES)],
31 | "Pclass": [random.choice([1, 2, 3]) for k in range(N_SAMPLES)],
32 | "Name": ["".join(random.sample(string.ascii_letters, 10)) for k in range(N_SAMPLES)],
33 | "Sex": [random.choice(["male", "female"]) for k in range(N_SAMPLES)],
34 | "Age": [random.choice(range(7, 77)) for k in range(N_SAMPLES)],
35 | "SibSp": [random.choice(range(4)) for k in range(N_SAMPLES)],
36 | "Parch": [random.choice(range(4)) for k in range(N_SAMPLES)],
37 | "Ticket": ["".join(random.sample(string.ascii_letters, 10)) for k in range(N_SAMPLES)],
38 | "Fare": [random.choice(np.arange(15, 150, 0.01)) for k in range(N_SAMPLES)],
39 | "Cabin": ["".join(random.sample(string.ascii_letters, 3)) for k in range(N_SAMPLES)],
40 | "Embarked": [random.choice(["C", "S", "Q"]) for k in range(N_SAMPLES)],
41 | }
42 | return pd.DataFrame(data)
43 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/function_random_forest/description.md:
--------------------------------------------------------------------------------
1 | # Titanic random forest
2 |
3 | Better performance through the usage of a Random Forest classifier.
4 |
5 | Based on Niklas Donges' article, [Predicting the Survival of Titanic Passengers](https://towardsdatascience.com/predicting-the-survival-of-titanic-passengers-30870ccc7e8)
6 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/function_random_forest/predict/Dockerfile:
--------------------------------------------------------------------------------
1 | # this base image works in both CPU and GPU enabled environments
2 | FROM python:3.12-slim
3 |
4 | # install dependencies
5 | RUN pip3 install pandas numpy 'scikit-learn==1.5.2' substratools
6 |
7 | # add your function script to docker image
8 | ADD titanic_function_rf.py .
9 |
10 | # define how script is run
11 | ENTRYPOINT ["python3", "titanic_function_rf.py", "--function-name", "predict"]
12 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/function_random_forest/titanic_function_rf.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import re
3 |
4 | import numpy as np
5 | import pandas as pd
6 | import substratools as tools
7 | from sklearn.ensemble import RandomForestClassifier
8 |
9 |
10 | @tools.register
11 | def train(inputs, outputs, task_properties):
12 | X = inputs["datasamples"].drop(columns="Survived")
13 | y = inputs["datasamples"].Survived
14 | X = _normalize_X(X)
15 |
16 | # the following RFC hyperparameters were determined using:
17 | # >>> param_grid = {"criterion": ["gini", "entropy"],
18 | # "min_samples_leaf": [1, 5, 10, 25, 50, 70],
19 | # "min_samples_split": [2, 4, 10, 12, 16, 18, 25, 35],
20 | # "n_estimators": [100, 400, 700, 1000, 1500]}
21 | # >>> rf = RandomForestClassifier(n_estimators=100, max_features='auto', oob_score=True,
22 | # random_state=1, n_jobs=-1)
23 | # >>>,clf = GridSearchCV(estimator=rf, param_grid=param_grid, n_jobs=-1)
24 |
25 | # Random Forest
26 | random_forest = RandomForestClassifier(
27 | criterion="gini",
28 | min_samples_leaf=1,
29 | min_samples_split=10,
30 | n_estimators=100,
31 | oob_score=True,
32 | random_state=1,
33 | n_jobs=-1,
34 | )
35 | random_forest.fit(X, y.values.ravel())
36 |
37 | save_model(random_forest, outputs["model"])
38 |
39 |
40 | @tools.register
41 | def predict(inputs, outputs, task_properties):
42 | X = inputs["datasamples"].drop(columns="Survived")
43 | model = load_model(inputs["models"])
44 | X = _normalize_X(X)
45 | pred = _predict_pandas(model, X)
46 |
47 | save_predictions(pred, outputs["predictions"])
48 |
49 |
50 | def _predict_pandas(model, X):
51 | y_pred = model.predict(X)
52 | return pd.DataFrame(columns=["Survived"], data=y_pred)
53 |
54 |
55 | def load_model(path):
56 | with open(path, "rb") as f:
57 | return pickle.load(f)
58 |
59 |
60 | def save_model(model, path):
61 | with open(path, "wb") as f:
62 | pickle.dump(model, f)
63 |
64 |
65 | def save_predictions(y_pred, path):
66 | y_pred.to_csv(path, index=False)
67 |
68 |
69 | def _normalize_X(X):
70 | # Relatives
71 | X["relatives"] = X["SibSp"] + X["Parch"]
72 | X.loc[X["relatives"] > 0, "not_alone"] = 0
73 | X.loc[X["relatives"] == 0, "not_alone"] = 1
74 | X["not_alone"] = X["not_alone"].astype(int)
75 |
76 | # Passenger ID
77 | X = X.drop(["PassengerId"], axis=1)
78 |
79 | # Cabin
80 | deck = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "U": 8}
81 | X["Cabin"] = X["Cabin"].fillna("U0")
82 | X["Deck"] = X["Cabin"].map(lambda x: re.compile("([a-zA-Z]+)").search(x).group())
83 | X["Deck"] = X["Deck"].map(deck)
84 | X["Deck"] = X["Deck"].fillna(0)
85 | X["Deck"] = X["Deck"].astype(int)
86 | X = X.drop(["Cabin"], axis=1)
87 |
88 | # Age
89 | mean = X["Age"].mean()
90 | std = X["Age"].std()
91 | is_null = X["Age"].isnull().sum()
92 |
93 | # fill NaN values in Age column with mean
94 | age_slice = X["Age"].copy()
95 | age_slice[np.isnan(age_slice)] = mean
96 | X["Age"] = age_slice
97 | X["Age"] = X["Age"].astype(int)
98 | # make Age into a category
99 | X["Age"] = X["Age"].astype(int)
100 | X.loc[X["Age"] <= 11, "Age"] = 0
101 | X.loc[(X["Age"] > 11) & (X["Age"] <= 18), "Age"] = 1
102 | X.loc[(X["Age"] > 18) & (X["Age"] <= 22), "Age"] = 2
103 | X.loc[(X["Age"] > 22) & (X["Age"] <= 27), "Age"] = 3
104 | X.loc[(X["Age"] > 27) & (X["Age"] <= 33), "Age"] = 4
105 | X.loc[(X["Age"] > 33) & (X["Age"] <= 40), "Age"] = 5
106 | X.loc[(X["Age"] > 40) & (X["Age"] <= 66), "Age"] = 6
107 | X.loc[X["Age"] > 66, "Age"] = 6
108 | # create Age_Class feature
109 | X["Age_Class"] = X["Age"] * X["Pclass"]
110 |
111 | # Embarked
112 | ports = {"S": 0, "C": 1, "Q": 2}
113 | X["Embarked"] = X["Embarked"].fillna("S")
114 | X["Embarked"] = X["Embarked"].map(ports)
115 |
116 | # Fare
117 | X["Fare"] = X["Fare"].fillna(0)
118 | X["Fare"] = X["Fare"].astype(int)
119 | # make Fare into a category
120 | X.loc[X["Fare"] <= 7.91, "Fare"] = 0
121 | X.loc[(X["Fare"] > 7.91) & (X["Fare"] <= 14.454), "Fare"] = 1
122 | X.loc[(X["Fare"] > 14.454) & (X["Fare"] <= 31), "Fare"] = 2
123 | X.loc[(X["Fare"] > 31) & (X["Fare"] <= 99), "Fare"] = 3
124 | X.loc[(X["Fare"] > 99) & (X["Fare"] <= 250), "Fare"] = 4
125 | X.loc[X["Fare"] > 250, "Fare"] = 5
126 | X["Fare"] = X["Fare"].astype(int)
127 | # create Fare_Per_Person feature
128 | X["Fare_Per_Person"] = X["Fare"] / (X["relatives"] + 1)
129 | X["Fare_Per_Person"] = X["Fare_Per_Person"].astype(int)
130 |
131 | # Name
132 | titles = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
133 | # extract titles
134 | X["Title"] = X.Name.str.extract(r" ([A-Za-z]+)\.", expand=False)
135 | # replace titles with a more common title or as Rare
136 | X["Title"] = X["Title"].replace(
137 | [
138 | "Lady",
139 | "Countess",
140 | "Capt",
141 | "Col",
142 | "Don",
143 | "Dr",
144 | "Major",
145 | "Rev",
146 | "Sir",
147 | "Jonkheer",
148 | "Dona",
149 | ],
150 | "Rare",
151 | )
152 | X["Title"] = X["Title"].replace("Mlle", "Miss")
153 | X["Title"] = X["Title"].replace("Ms", "Miss")
154 | X["Title"] = X["Title"].replace("Mme", "Mrs")
155 | # convert titles into numbers
156 | X["Title"] = X["Title"].map(titles)
157 | # filling NaN with 0, to get safe
158 | X["Title"] = X["Title"].fillna(0)
159 | X = X.drop(["Name"], axis=1)
160 |
161 | # Sex
162 | genders = {"male": 0, "female": 1}
163 | X["Sex"] = X["Sex"].map(genders)
164 |
165 | # Ticket
166 | X = X.drop(["Ticket"], axis=1)
167 |
168 | # Drop non relevant features
169 | X = X.drop("not_alone", axis=1)
170 | X = X.drop("Parch", axis=1)
171 |
172 | return X
173 |
174 |
175 | if __name__ == "__main__":
176 | tools.execute()
177 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/function_random_forest/train/Dockerfile:
--------------------------------------------------------------------------------
1 | # this base image works in both CPU and GPU enabled environments
2 | FROM python:3.12-slim
3 |
4 | # install dependencies
5 | RUN pip3 install pandas numpy 'scikit-learn==1.5.2' substratools
6 |
7 | # add your function script to docker image
8 | ADD titanic_function_rf.py .
9 |
10 | # define how script is run
11 | ENTRYPOINT ["python3", "titanic_function_rf.py", "--function-name", "train"]
12 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/metric/Dockerfile:
--------------------------------------------------------------------------------
1 | # this base image works in both CPU and GPU enabled environments
2 | FROM python:3.12-slim
3 |
4 | # install dependencies
5 | RUN pip3 install pandas numpy 'scikit-learn==1.5.2' substratools
6 |
7 | # add your metrics script to docker image
8 | ADD titanic_metrics.py .
9 |
10 | # define how script is run
11 | ENTRYPOINT ["python3", "titanic_metrics.py", "--function-name", "score"]
12 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/metric/description.md:
--------------------------------------------------------------------------------
1 | # Titanic: Machine Learning From disaster
2 |
3 | *Taken from [the similarly named Kaggle challenge](https://www.kaggle.com/c/titanic/overview)*
4 |
5 | The sinking of the RMS Titanic is one of the most infamous shipwrecks in history. On April 15, 1912, during her maiden voyage, the Titanic sank after colliding with an iceberg, killing 1502 out of 2224 passengers and crew. This sensational tragedy shocked the international community and led to better safety regulations for ships.
6 |
7 | One of the reasons that the shipwreck led to such loss of life was that there were not enough lifeboats for the passengers and crew. Although there was some element of luck involved in surviving the sinking, some groups of people were more likely to survive than others, such as women, children, and the upper-class.
8 |
9 | In this challenge, we ask you to complete the analysis of what sorts of people were likely to survive. In particular, we ask you to apply the tools of machine learning to predict which passengers survived the tragedy.
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/metric/titanic_metrics.py:
--------------------------------------------------------------------------------
1 | import substratools as tools
2 | from sklearn.metrics import accuracy_score
3 | import pandas as pd
4 |
5 |
6 | @tools.register
7 | def score(inputs, outputs, task_properties):
8 | y_true = inputs["datasamples"].Survived.values
9 | y_pred = load_predictions(inputs["predictions"])
10 |
11 | perf = accuracy_score(y_true, y_pred)
12 | tools.save_performance(perf, outputs["performance"])
13 |
14 |
15 | def load_predictions(path):
16 | return pd.read_csv(path)
17 |
18 |
19 | if __name__ == "__main__":
20 | tools.execute()
21 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.9.2
2 | scikit-learn==1.5.2
3 | pandas==2.2.2
4 | substra
5 | substratools
6 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_0/data_sample_0.csv:
--------------------------------------------------------------------------------
1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 171,172,0,3,"Rice, Master. Arthur",male,4.0,4,1,382652,29.125,,Q
3 | 690,691,1,1,"Dick, Mr. Albert Adrian",male,31.0,1,0,17474,57.0,B20,S
4 | 225,226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22.0,0,0,PP 4348,9.35,,S
5 | 170,171,0,1,"Van der hoef, Mr. Wyckoff",male,61.0,0,0,111240,33.5,B19,S
6 | 528,529,0,3,"Salonen, Mr. Johan Werner",male,39.0,0,0,3101296,7.925,,S
7 | 50,51,0,3,"Panula, Master. Juha Niilo",male,7.0,4,1,3101295,39.6875,,S
8 | 435,436,1,1,"Carter, Miss. Lucile Polk",female,14.0,1,2,113760,120.0,B96 B98,S
9 | 483,484,1,3,"Turkula, Mrs. (Hedwig)",female,63.0,0,0,4134,9.5875,,S
10 | 817,818,0,2,"Mallet, Mr. Albert",male,31.0,1,1,S.C./PARIS 2079,37.0042,,C
11 | 592,593,0,3,"Elsbury, Mr. William James",male,47.0,0,0,A/5 3902,7.25,,S
12 | 637,638,0,2,"Collyer, Mr. Harvey",male,31.0,1,1,C.A. 31921,26.25,,S
13 | 840,841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20.0,0,0,SOTON/O2 3101287,7.925,,S
14 | 206,207,0,3,"Backstrom, Mr. Karl Alfred",male,32.0,1,0,3101278,15.85,,S
15 | 390,391,1,1,"Carter, Mr. William Ernest",male,36.0,1,2,113760,120.0,B96 B98,S
16 | 299,300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50.0,0,1,PC 17558,247.5208,B58 B60,C
17 | 551,552,0,2,"Sharp, Mr. Percival James R",male,27.0,0,0,244358,26.0,,S
18 | 416,417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34.0,1,1,28220,32.5,,S
19 | 575,576,0,3,"Patchett, Mr. George",male,19.0,0,0,358585,14.5,,S
20 | 544,545,0,1,"Douglas, Mr. Walter Donald",male,50.0,1,0,PC 17761,106.425,C86,C
21 | 576,577,1,2,"Garside, Miss. Ethel",female,34.0,0,0,243880,13.0,,S
22 | 670,671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40.0,1,1,29750,39.0,,S
23 | 238,239,0,2,"Pengelly, Mr. Frederick William",male,19.0,0,0,28665,10.5,,S
24 | 761,762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41.0,0,0,SOTON/O2 3101272,7.125,,S
25 | 526,527,1,2,"Ridsdale, Miss. Lucy",female,50.0,0,0,W./C. 14258,10.5,,S
26 | 60,61,0,3,"Sirayanian, Mr. Orsen",male,22.0,0,0,2669,7.2292,,C
27 | 609,610,1,1,"Shutes, Miss. Elizabeth W",female,40.0,0,0,PC 17582,153.4625,C125,S
28 | 661,662,0,3,"Badt, Mr. Mohamed",male,40.0,0,0,2623,7.225,,C
29 | 257,258,1,1,"Cherry, Miss. Gladys",female,30.0,0,0,110152,86.5,B77,S
30 | 533,534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,,C
31 | 545,546,0,1,"Nicholson, Mr. Arthur Ernest",male,64.0,0,0,693,26.0,,S
32 | 791,792,0,2,"Gaskell, Mr. Alfred",male,16.0,0,0,239865,26.0,,S
33 | 720,721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6.0,0,1,248727,33.0,,S
34 | 129,130,0,3,"Ekstrom, Mr. Johan",male,45.0,0,0,347061,6.975,,S
35 | 391,392,1,3,"Jansson, Mr. Carl Olof",male,21.0,0,0,350034,7.7958,,S
36 | 767,768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,,Q
37 | 782,783,0,1,"Long, Mr. Milton Clyde",male,29.0,0,0,113501,30.0,D6,S
38 | 598,599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,,C
39 | 345,346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24.0,0,0,248733,13.0,F33,S
40 | 81,82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29.0,0,0,345779,9.5,,S
41 | 718,719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,,Q
42 | 638,639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41.0,0,5,3101295,39.6875,,S
43 | 92,93,0,1,"Chaffee, Mr. Herbert Fuller",male,46.0,1,0,W.E.P. 5734,61.175,E31,S
44 | 374,375,0,3,"Palsson, Miss. Stina Viola",female,3.0,3,1,349909,21.075,,S
45 | 853,854,1,1,"Lines, Miss. Mary Conover",female,16.0,0,1,PC 17592,39.4,D28,S
46 | 622,623,1,3,"Nakid, Mr. Sahid",male,20.0,1,1,2653,15.7417,,C
47 | 133,134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29.0,1,0,228414,26.0,,S
48 | 362,363,0,3,"Barbara, Mrs. (Catherine David)",female,45.0,0,1,2691,14.4542,,C
49 | 815,816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0.0,B102,S
50 | 784,785,0,3,"Ali, Mr. William",male,25.0,0,0,SOTON/O.Q. 3101312,7.05,,S
51 | 368,369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,,Q
52 | 20,21,0,2,"Fynney, Mr. Joseph J",male,35.0,0,0,239865,26.0,,S
53 | 687,688,0,3,"Dakic, Mr. Branko",male,19.0,0,0,349228,10.1708,,S
54 | 641,642,1,1,"Sagesser, Mlle. Emma",female,24.0,0,0,PC 17477,69.3,B35,C
55 | 613,614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,,Q
56 | 626,627,0,2,"Kirkland, Rev. Charles Leonard",male,57.0,0,0,219533,12.35,,Q
57 | 799,800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30.0,1,1,345773,24.15,,S
58 | 515,516,0,1,"Walker, Mr. William Anderson",male,47.0,0,0,36967,34.0208,D46,S
59 | 870,871,0,3,"Balkic, Mr. Cerin",male,26.0,0,0,349248,7.8958,,S
60 | 356,357,1,1,"Bowerman, Miss. Elsie Edith",female,22.0,0,1,113505,55.0,E33,S
61 | 252,253,0,1,"Stead, Mr. William Thomas",male,62.0,0,0,113514,26.55,C87,S
62 | 130,131,0,3,"Drazenoic, Mr. Jozef",male,33.0,0,0,349241,7.8958,,C
63 | 166,167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55.0,E33,S
64 | 583,584,0,1,"Ross, Mr. John Hugo",male,36.0,0,0,13049,40.125,A10,C
65 | 721,722,0,3,"Jensen, Mr. Svend Lauritz",male,17.0,1,0,350048,7.0542,,S
66 | 542,543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11.0,4,2,347082,31.275,,S
67 | 707,708,1,1,"Calderhead, Mr. Edward Pennington",male,42.0,0,0,PC 17476,26.2875,E24,S
68 | 742,743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21.0,2,2,PC 17608,262.375,B57 B59 B63 B66,C
69 | 260,261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,,Q
70 | 22,23,1,3,"McGowan, Miss. Anna ""Annie""",female,15.0,0,0,330923,8.0292,,Q
71 | 694,695,0,1,"Weir, Col. John",male,60.0,0,0,113800,26.55,,S
72 | 773,774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,,C
73 | 70,71,0,2,"Jenkin, Mr. Stephen Curnow",male,32.0,0,0,C.A. 33111,10.5,,S
74 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_1/data_sample_1.csv:
--------------------------------------------------------------------------------
1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 352,353,0,3,"Elias, Mr. Tannous",male,15.0,1,1,2695,7.2292,,C
3 | 703,704,0,3,"Gallagher, Mr. Martin",male,25.0,0,0,36864,7.7417,,Q
4 | 210,211,0,3,"Ali, Mr. Ahmed",male,24.0,0,0,SOTON/O.Q. 3101311,7.05,,S
5 | 222,223,0,3,"Green, Mr. George Henry",male,51.0,0,0,21440,8.05,,S
6 | 213,214,0,2,"Givard, Mr. Hans Kristensen",male,30.0,0,0,250646,13.0,,S
7 | 7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8 | 367,368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C
9 | 647,648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56.0,0,0,13213,35.5,A26,C
10 | 875,876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15.0,0,0,2667,7.225,,C
11 | 442,443,0,3,"Petterson, Mr. Johan Emil",male,25.0,1,0,347076,7.775,,S
12 | 652,653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21.0,0,0,8475,8.4333,,S
13 | 109,110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,,Q
14 | 307,308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17.0,1,0,PC 17758,108.9,C65,C
15 | 254,255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41.0,0,2,370129,20.2125,,S
16 | 143,144,0,3,"Burke, Mr. Jeremiah",male,19.0,0,0,365222,6.75,,Q
17 | 104,105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37.0,2,0,3101276,7.925,,S
18 | 388,389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,,Q
19 | 754,755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48.0,1,2,220845,65.0,,S
20 | 475,476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52.0,A14,S
21 | 455,456,1,3,"Jalsevac, Mr. Ivan",male,29.0,0,0,349240,7.8958,,C
22 | 655,656,0,2,"Hickman, Mr. Leonard Mark",male,24.0,2,0,S.O.C. 14879,73.5,,S
23 | 35,36,0,1,"Holverson, Mr. Alexander Oskar",male,42.0,1,0,113789,52.0,,S
24 | 566,567,0,3,"Stoytcheff, Mr. Ilia",male,19.0,0,0,349205,7.8958,,S
25 | 804,805,1,3,"Hedman, Mr. Oskar Arvid",male,27.0,0,0,347089,6.975,,S
26 | 867,868,0,1,"Roebling, Mr. Washington Augustus II",male,31.0,0,0,PC 17590,50.4958,A24,S
27 | 822,823,0,1,"Reuchlin, Jonkheer. John George",male,38.0,0,0,19972,0.0,,S
28 | 141,142,1,3,"Nysten, Miss. Anna Sofia",female,22.0,0,0,347081,7.75,,S
29 | 800,801,0,2,"Ponesell, Mr. Martin",male,34.0,0,0,250647,13.0,,S
30 | 632,633,1,1,"Stahelin-Maeglin, Dr. Max",male,32.0,0,0,13214,30.5,B50,C
31 | 806,807,0,1,"Andrews, Mr. Thomas Jr",male,39.0,0,0,112050,0.0,A36,S
32 | 812,813,0,2,"Slemen, Mr. Richard James",male,35.0,0,0,28206,10.5,,S
33 | 525,526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,,Q
34 | 737,738,1,1,"Lesurer, Mr. Gustave J",male,35.0,0,0,PC 17755,512.3292,B101,C
35 | 508,509,0,3,"Olsen, Mr. Henry Margido",male,28.0,0,0,C 4001,22.525,,S
36 | 527,528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S
37 | 46,47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,,Q
38 | 406,407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51.0,0,0,347064,7.75,,S
39 | 777,778,1,3,"Emanuel, Miss. Virginia Ethel",female,5.0,0,0,364516,12.475,,S
40 | 836,837,0,3,"Pasic, Mr. Jakob",male,21.0,0,0,315097,8.6625,,S
41 | 265,266,0,2,"Reeves, Mr. David",male,36.0,0,0,C.A. 17248,10.5,,S
42 | 608,609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22.0,1,2,SC/Paris 2123,41.5792,,C
43 | 335,336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S
44 | 634,635,0,3,"Skoog, Miss. Mabel",female,9.0,3,2,347088,27.9,,S
45 | 195,196,1,1,"Lurette, Miss. Elise",female,58.0,0,0,PC 17569,146.5208,B80,C
46 | 127,128,1,3,"Madsen, Mr. Fridtjof Arne",male,24.0,0,0,C 17369,7.1417,,S
47 | 855,856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18.0,0,1,392091,9.35,,S
48 | 712,713,1,1,"Taylor, Mr. Elmer Zebley",male,48.0,1,0,19996,52.0,C126,S
49 | 219,220,0,2,"Harris, Mr. Walter",male,30.0,0,0,W/C 14208,10.5,,S
50 | 700,701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18.0,1,0,PC 17757,227.525,C62 C64,C
51 | 364,365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,,Q
52 | 665,666,0,2,"Hickman, Mr. Lewis",male,32.0,2,0,S.O.C. 14879,73.5,,S
53 | 102,103,0,1,"White, Mr. Richard Frasar",male,21.0,0,1,35281,77.2875,D26,S
54 | 305,306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S
55 | 743,744,0,3,"McNamee, Mr. Neal",male,24.0,1,0,376566,16.1,,S
56 | 752,753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33.0,0,0,345780,9.5,,S
57 | 878,879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S
58 | 763,764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36.0,1,2,113760,120.0,B96 B98,S
59 | 798,799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30.0,0,0,2685,7.2292,,C
60 | 883,884,0,2,"Banfield, Mr. Frederick James",male,28.0,0,0,C.A./SOTON 34068,10.5,,S
61 | 823,824,1,3,"Moor, Mrs. (Beila)",female,27.0,0,1,392096,12.475,E121,S
62 | 667,668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,,S
63 | 611,612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,,S
64 | 594,595,0,2,"Chapman, Mr. John Henry",male,37.0,1,0,SC/AH 29037,26.0,,S
65 | 491,492,0,3,"Windelov, Mr. Einar",male,21.0,0,0,SOTON/OQ 3101317,7.25,,S
66 | 248,249,1,1,"Beckwith, Mr. Richard Leonard",male,37.0,1,1,11751,52.5542,D35,S
67 | 772,773,0,2,"Mack, Mrs. (Mary)",female,57.0,0,0,S.O./P.P. 3,10.5,E77,S
68 | 563,564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,,S
69 | 550,551,1,1,"Thayer, Mr. John Borland Jr",male,17.0,0,2,17421,110.8833,C70,C
70 | 32,33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q
71 | 580,581,1,2,"Christy, Miss. Julie Rachel",female,25.0,1,1,237789,30.0,,S
72 | 708,709,1,1,"Cleaver, Miss. Alice",female,22.0,0,0,113781,151.55,,S
73 | 274,275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,,Q
74 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_2/data_sample_2.csv:
--------------------------------------------------------------------------------
1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 567,568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29.0,0,4,349909,21.075,,S
3 | 556,557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48.0,1,0,11755,39.6,A16,C
4 | 383,384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35.0,1,0,113789,52.0,,S
5 | 23,24,1,1,"Sloper, Mr. William Thompson",male,28.0,0,0,113788,35.5,A6,S
6 | 645,646,1,1,"Harper, Mr. Henry Sleeper",male,48.0,1,0,PC 17572,76.7292,D33,C
7 | 753,754,0,3,"Jonkoff, Mr. Lalio",male,23.0,0,0,349204,7.8958,,S
8 | 321,322,0,3,"Danoff, Mr. Yoto",male,27.0,0,0,349219,7.8958,,S
9 | 775,776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18.0,0,0,347078,7.75,,S
10 | 876,877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20.0,0,0,7534,9.8458,,S
11 | 434,435,0,1,"Silvey, Mr. William Baird",male,50.0,1,0,13507,55.9,E44,S
12 | 239,240,0,2,"Hunt, Mr. George Henry",male,33.0,0,0,SCO/W 1585,12.275,,S
13 | 885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.125,,Q
14 | 860,861,0,3,"Hansen, Mr. Claus Peter",male,41.0,2,0,350026,14.1083,,S
15 | 709,710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,,C
16 | 227,228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,,S
17 | 317,318,0,2,"Moraweck, Dr. Ernest",male,54.0,0,0,29011,14.0,,S
18 | 408,409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21.0,0,0,312992,7.775,,S
19 | 204,205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18.0,0,0,A/5 3540,8.05,,S
20 | 557,558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,,C
21 | 232,233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59.0,0,0,237442,13.5,,S
22 | 453,454,1,1,"Goldenberg, Mr. Samuel L",male,49.0,1,0,17453,89.1042,C92,C
23 | 181,182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,,C
24 | 180,181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S
25 | 618,619,1,2,"Becker, Miss. Marion Louise",female,4.0,2,1,230136,39.0,F4,S
26 | 319,320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40.0,1,1,16966,134.5,E34,C
27 | 602,603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,,S
28 | 478,479,0,3,"Karlsson, Mr. Nils August",male,22.0,0,0,350060,7.5208,,S
29 | 228,229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18.0,0,0,236171,13.0,,S
30 | 63,64,0,3,"Skoog, Master. Harald",male,4.0,3,2,347088,27.9,,S
31 | 851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.775,,S
32 | 220,221,1,3,"Sunderland, Mr. Victor Francis",male,16.0,0,0,SOTON/OQ 392089,8.05,,S
33 | 736,737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48.0,1,3,W./C. 6608,34.375,,S
34 | 486,487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35.0,1,0,19943,90.0,C93,S
35 | 535,536,1,2,"Hart, Miss. Eva Miriam",female,7.0,0,2,F.C.C. 13529,26.25,,S
36 | 643,644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,,S
37 | 571,572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53.0,2,0,11769,51.4792,C101,S
38 | 421,422,0,3,"Charters, Mr. David",male,21.0,0,0,A/5. 13032,7.7333,,Q
39 | 287,288,0,3,"Naidenoff, Mr. Penko",male,22.0,0,0,349206,7.8958,,S
40 | 859,860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C
41 | 283,284,1,3,"Dorking, Mr. Edward Arthur",male,19.0,0,0,A/5. 10482,8.05,,S
42 | 262,263,0,1,"Taussig, Mr. Emil",male,52.0,1,1,110413,79.65,E67,S
43 | 874,875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28.0,1,0,P/PP 3381,24.0,,C
44 | 193,194,1,2,"Navratil, Master. Michel M",male,3.0,1,1,230080,26.0,F2,S
45 | 351,352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35.0,C128,S
46 | 485,486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,,S
47 | 69,70,0,3,"Kink, Mr. Vincenz",male,26.0,2,0,315151,8.6625,,S
48 | 67,68,0,3,"Crease, Mr. Ernest James",male,19.0,0,0,S.P. 3464,8.1583,,S
49 | 182,183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9.0,4,2,347077,31.3875,,S
50 | 58,59,1,2,"West, Miss. Constance Mirium",female,5.0,1,2,C.A. 34651,27.75,,S
51 | 95,96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S
52 | 835,836,1,1,"Compton, Miss. Sara Rebecca",female,39.0,1,1,PC 17756,83.1583,E49,C
53 | 236,237,0,2,"Hold, Mr. Stephen",male,44.0,1,0,26707,26.0,,S
54 | 425,426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,,S
55 | 482,483,0,3,"Rouse, Mr. Richard Henry",male,50.0,0,0,A/5 3594,8.05,,S
56 | 108,109,0,3,"Rekic, Mr. Tido",male,38.0,0,0,349249,7.8958,,S
57 | 243,244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22.0,0,0,STON/O 2. 3101275,7.125,,S
58 | 560,561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,,Q
59 | 403,404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28.0,1,0,STON/O2. 3101279,15.85,,S
60 | 43,44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3.0,1,2,SC/Paris 2123,41.5792,,C
61 | 436,437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21.0,2,2,W./C. 6608,34.375,,S
62 | 234,235,0,2,"Leyson, Mr. Robert William Norman",male,24.0,0,0,C.A. 29566,10.5,,S
63 | 251,252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29.0,1,1,347054,10.4625,G6,S
64 | 201,202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S
65 | 26,27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,,C
66 | 329,330,1,1,"Hippach, Miss. Jean Gertrude",female,16.0,0,1,111361,57.9792,B18,C
67 | 623,624,0,3,"Hansen, Mr. Henry Damsgaard",male,21.0,0,0,350029,7.8542,,S
68 | 214,215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,,Q
69 | 829,830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62.0,0,0,113572,80.0,B28,
70 | 197,198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42.0,0,1,4579,8.4042,,S
71 | 468,469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q
72 | 372,373,0,3,"Beavan, Mr. William Thomas",male,19.0,0,0,323951,8.05,,S
73 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_3/data_sample_3.csv:
--------------------------------------------------------------------------------
1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 505,506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18.0,1,0,PC 17758,108.9,C65,C
3 | 411,412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,,Q
4 | 820,821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52.0,1,1,12749,93.5,B69,S
5 | 500,501,0,3,"Calic, Mr. Petar",male,17.0,0,0,315086,8.6625,,S
6 | 366,367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60.0,1,0,110813,75.25,D37,C
7 | 619,620,0,2,"Gavey, Mr. Lawrence",male,26.0,0,0,31028,10.5,,S
8 | 101,102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S
9 | 534,535,0,3,"Cacic, Miss. Marija",female,30.0,0,0,315084,8.6625,,S
10 | 168,169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,,S
11 | 834,835,0,3,"Allum, Mr. Owen George",male,18.0,0,0,2223,8.3,,S
12 | 18,19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31.0,1,0,345763,18.0,,S
13 | 449,450,1,1,"Peuchen, Major. Arthur Godfrey",male,52.0,0,0,113786,30.5,C104,S
14 | 651,652,1,2,"Doling, Miss. Elsie",female,18.0,0,1,231919,23.0,,S
15 | 509,510,1,3,"Lang, Mr. Fang",male,26.0,0,0,1601,56.4958,,S
16 | 471,472,0,3,"Cacic, Mr. Luka",male,38.0,0,0,315089,8.6625,,S
17 | 524,525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,,C
18 | 400,401,1,3,"Niskanen, Mr. Juha",male,39.0,0,0,STON/O 2. 3101289,7.925,,S
19 | 126,127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,,Q
20 | 470,471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,,S
21 | 324,325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S
22 | 614,615,0,3,"Brocklebank, Mr. William Alfred",male,35.0,0,0,364512,8.05,,S
23 | 740,741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30.0,D45,S
24 | 591,592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52.0,1,0,36947,78.2667,D20,C
25 | 543,544,1,2,"Beane, Mr. Edward",male,32.0,1,0,2908,26.0,,S
26 | 409,410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,,S
27 | 202,203,0,3,"Johanson, Mr. Jakob Alfred",male,34.0,0,0,3101264,6.4958,,S
28 | 386,387,0,3,"Goodwin, Master. Sidney Leonard",male,1.0,5,2,CA 2144,46.9,,S
29 | 805,806,0,3,"Johansson, Mr. Karl Johan",male,31.0,0,0,347063,7.775,,S
30 | 29,30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,,S
31 | 457,458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,D21,S
32 | 504,505,1,1,"Maioni, Miss. Roberta",female,16.0,0,0,110152,86.5,B79,S
33 | 56,57,1,2,"Rugg, Miss. Emily",female,21.0,0,0,C.A. 31026,10.5,,S
34 | 640,641,0,3,"Jensen, Mr. Hans Peder",male,20.0,0,0,350050,7.8542,,S
35 | 738,739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,,S
36 | 833,834,0,3,"Augustsson, Mr. Albert",male,23.0,0,0,347468,7.8542,,S
37 | 801,802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31.0,1,1,C.A. 31921,26.25,,S
38 | 270,271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31.0,,S
39 | 221,222,0,2,"Bracken, Mr. James H",male,27.0,0,0,220367,13.0,,S
40 | 607,608,1,1,"Daniel, Mr. Robert Williams",male,27.0,0,0,113804,30.5,,S
41 | 477,478,0,3,"Braund, Mr. Lewis Richard",male,29.0,1,0,3460,7.0458,,S
42 | 589,590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,,S
43 | 476,477,0,2,"Renouf, Mr. Peter Henry",male,34.0,1,0,31027,21.0,,S
44 | 17,18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13.0,,S
45 | 856,857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45.0,1,1,36928,164.8667,,S
46 | 14,15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14.0,0,0,350406,7.8542,,S
47 | 418,419,0,2,"Matthews, Mr. William John",male,30.0,0,0,28228,13.0,,S
48 | 624,625,0,3,"Bowen, Mr. David John ""Dai""",male,21.0,0,0,54636,16.1,,S
49 | 669,670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52.0,C126,S
50 | 813,814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6.0,4,2,347082,31.275,,S
51 | 697,698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,,Q
52 | 178,179,0,2,"Hale, Mr. Reginald",male,30.0,0,0,250653,13.0,,S
53 | 446,447,1,2,"Mellinger, Miss. Madeleine Violet",female,13.0,0,1,250644,19.5,,S
54 | 890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q
55 | 348,349,1,3,"Coutts, Master. William Loch ""William""",male,3.0,1,1,C.A. 37671,15.9,,S
56 | 460,461,1,1,"Anderson, Mr. Harry",male,48.0,0,0,19952,26.55,E12,S
57 | 864,865,0,2,"Gill, Mr. John William",male,24.0,0,0,233866,13.0,,S
58 | 207,208,1,3,"Albimona, Mr. Nassef Cassem",male,26.0,0,0,2699,18.7875,,C
59 | 282,283,0,3,"de Pelsmaeker, Mr. Alfons",male,16.0,0,0,345778,9.5,,S
60 | 185,186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50.0,A32,S
61 | 499,500,0,3,"Svensson, Mr. Olof",male,24.0,0,0,350035,7.7958,,S
62 | 746,747,0,3,"Abbott, Mr. Rossmore Edward",male,16.0,1,1,C.A. 2673,20.25,,S
63 | 451,452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,,S
64 | 188,189,0,3,"Bourke, Mr. John",male,40.0,1,1,364849,15.5,,Q
65 | 531,532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,,C
66 | 438,439,0,1,"Fortune, Mr. Mark",male,64.0,1,4,19950,263.0,C23 C25 C27,S
67 | 674,675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0.0,,S
68 | 518,519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36.0,1,0,226875,26.0,,S
69 | 103,104,0,3,"Johansson, Mr. Gustaf Joel",male,33.0,0,0,7540,8.6542,,S
70 | 759,760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33.0,0,0,110152,86.5,B77,S
71 | 779,780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43.0,0,1,24160,211.3375,B3,S
72 | 354,355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C
73 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_4/data_sample_4.csv:
--------------------------------------------------------------------------------
1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 393,394,1,1,"Newell, Miss. Marjorie",female,23.0,1,0,35273,113.275,D36,C
3 | 606,607,0,3,"Karaic, Mr. Milan",male,30.0,0,0,349246,7.8958,,S
4 | 384,385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,,S
5 | 520,521,1,1,"Perreault, Miss. Anne",female,30.0,0,0,12749,93.5,B73,S
6 | 136,137,1,1,"Newsom, Miss. Helen Monypeny",female,19.0,0,2,11752,26.2833,D47,S
7 | 679,680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36.0,0,1,PC 17755,512.3292,B51 B53 B55,C
8 | 816,817,0,3,"Heininen, Miss. Wendla Maria",female,23.0,0,0,STON/O2. 3101290,7.925,,S
9 | 830,831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15.0,1,0,2659,14.4542,,C
10 | 858,859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24.0,0,3,2666,19.2583,,C
11 | 44,45,1,3,"Devaney, Miss. Margaret Delia",female,19.0,0,0,330958,7.8792,,Q
12 | 240,241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C
13 | 64,65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C
14 | 343,344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25.0,0,0,244361,13.0,,S
15 | 552,553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q
16 | 140,141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C
17 | 377,378,0,1,"Widener, Mr. Harry Elkins",male,27.0,0,2,113503,211.5,C82,C
18 | 124,125,0,1,"White, Mr. Percival Wayland",male,54.0,0,1,35281,77.2875,D26,S
19 | 392,393,0,3,"Gustafsson, Mr. Johan Birger",male,28.0,2,0,3101277,7.925,,S
20 | 229,230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,,S
21 | 330,331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q
22 | 114,115,0,3,"Attalah, Miss. Malake",female,17.0,0,0,2627,14.4583,,C
23 | 749,750,0,3,"Connaghton, Mr. Michael",male,31.0,0,0,335097,7.75,,Q
24 | 347,348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,,S
25 | 532,533,0,3,"Elias, Mr. Joseph Jr",male,17.0,1,1,2690,7.2292,,C
26 | 111,112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C
27 | 41,42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27.0,1,0,11668,21.0,,S
28 | 497,498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,,S
29 | 790,791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,,Q
30 | 871,872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47.0,1,1,11751,52.5542,D35,S
31 | 285,286,0,3,"Stankovic, Mr. Ivan",male,33.0,0,0,349239,8.6625,,C
32 | 291,292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19.0,1,0,11967,91.0792,B49,C
33 | 555,556,0,1,"Wright, Mr. George",male,62.0,0,0,113807,26.55,,S
34 | 121,122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,,S
35 | 271,272,1,3,"Tornquist, Mr. William Henry",male,25.0,0,0,LINE,0.0,,S
36 | 75,76,0,3,"Moen, Mr. Sigurd Hansen",male,25.0,0,0,348123,7.65,F G73,S
37 | 267,268,1,3,"Persson, Mr. Ernst Ulrik",male,25.0,1,0,347083,7.775,,S
38 | 780,781,1,3,"Ayoub, Miss. Banoura",female,13.0,0,0,2687,7.2292,,C
39 | 781,782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17.0,1,0,17474,57.0,B20,S
40 | 811,812,0,3,"Lester, Mr. James",male,39.0,0,0,A/4 48871,24.15,,S
41 | 733,734,0,2,"Berriman, Mr. William John",male,23.0,0,0,28425,13.0,,S
42 | 91,92,0,3,"Andreasson, Mr. Paul Edvin",male,20.0,0,0,347466,7.8542,,S
43 | 151,152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22.0,1,0,113776,66.6,C2,S
44 | 496,497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54.0,1,0,36947,78.2667,D20,C
45 | 125,126,1,3,"Nicola-Yarred, Master. Elias",male,12.0,1,0,2651,11.2417,,C
46 | 861,862,0,2,"Giles, Mr. Frederick Edward",male,21.0,1,0,28134,11.5,,S
47 | 839,840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C47,C
48 | 123,124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13.0,E101,S
49 | 776,777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,F38,Q
50 | 573,574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,,Q
51 | 272,273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41.0,0,1,250644,19.5,,S
52 | 596,597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33.0,,S
53 | 730,731,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S
54 | 0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
55 | 660,661,1,1,"Frauenthal, Dr. Henry William",male,50.0,2,0,PC 17611,133.65,,S
56 | 223,224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,,S
57 | 666,667,0,2,"Butler, Mr. Reginald Fenton",male,25.0,0,0,234686,13.0,,S
58 | 273,274,0,1,"Natsch, Mr. Charles H",male,37.0,0,1,PC 17596,29.7,C118,C
59 | 786,787,1,3,"Sjoblom, Miss. Anna Sofia",female,18.0,0,0,3101265,7.4958,,S
60 | 702,703,0,3,"Barbara, Miss. Saiide",female,18.0,0,1,2691,14.4542,,C
61 | 417,418,1,2,"Silven, Miss. Lyyli Karoliina",female,18.0,0,2,250652,13.0,,S
62 | 615,616,1,2,"Herman, Miss. Alice",female,24.0,1,2,220845,65.0,,S
63 | 11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.55,C103,S
64 | 788,789,1,3,"Dean, Master. Bertram Vere",male,1.0,1,2,C.A. 2315,20.575,,S
65 | 484,485,1,1,"Bishop, Mr. Dickinson H",male,25.0,1,0,11967,91.0792,B49,C
66 | 160,161,0,3,"Cribb, Mr. John Hatfield",male,44.0,0,1,371362,16.1,,S
67 | 128,129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,F E69,C
68 | 137,138,0,1,"Futrelle, Mr. Jacques Heath",male,37.0,1,0,113803,53.1,C123,S
69 | 841,842,0,2,"Mudd, Mr. Thomas Charles",male,16.0,0,0,S.O./P.P. 3,10.5,,S
70 | 331,332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,C124,S
71 | 766,767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,,C
72 | 769,770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32.0,0,0,8471,8.3625,,S
73 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_5/data_sample_5.csv:
--------------------------------------------------------------------------------
1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 691,692,1,3,"Karun, Miss. Manca",female,4.0,0,1,349256,13.4167,,C
3 | 292,293,0,2,"Levy, Mr. Rene Jacques",male,36.0,0,0,SC/Paris 2163,12.875,D,C
4 | 657,658,0,3,"Bourke, Mrs. John (Catherine)",female,32.0,1,1,364849,15.5,,Q
5 | 621,622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42.0,1,0,11753,52.5542,D19,S
6 | 293,294,0,3,"Haas, Miss. Aloisia",female,24.0,0,0,349236,8.85,,S
7 | 760,761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,,S
8 | 714,715,0,2,"Greenberg, Mr. Samuel",male,52.0,0,0,250647,13.0,,S
9 | 888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
10 | 680,681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,,Q
11 | 422,423,0,3,"Zimmerman, Mr. Leo",male,29.0,0,0,315082,7.875,,S
12 | 159,160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S
13 | 793,794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,,C
14 | 845,846,0,3,"Abbing, Mr. Anthony",male,42.0,0,0,C.A. 5547,7.55,,S
15 | 24,25,0,3,"Palsson, Miss. Torborg Danira",female,8.0,3,1,349909,21.075,,S
16 | 821,822,1,3,"Lulic, Mr. Nikola",male,27.0,0,0,315098,8.6625,,S
17 | 578,579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,,C
18 | 439,440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31.0,0,0,C.A. 18723,10.5,,S
19 | 172,173,1,3,"Johnson, Miss. Eleanor Ileen",female,1.0,1,1,347742,11.1333,,S
20 | 33,34,0,2,"Wheadon, Mr. Edward H",male,66.0,0,0,C.A. 24579,10.5,,S
21 | 419,420,0,3,"Van Impe, Miss. Catharina",female,10.0,0,2,345773,24.15,,S
22 | 346,347,1,2,"Smith, Miss. Marion Elsie",female,40.0,0,0,31418,13.0,,S
23 | 868,869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,,S
24 | 138,139,0,3,"Osen, Mr. Olaf Elon",male,16.0,0,0,7534,9.2167,,S
25 | 581,582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39.0,1,1,17421,110.8833,C68,C
26 | 279,280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35.0,1,1,C.A. 2673,20.25,,S
27 | 218,219,1,1,"Bazzani, Miss. Albina",female,32.0,0,0,11813,76.2917,D15,C
28 | 203,204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C
29 | 258,259,1,1,"Ward, Miss. Anna",female,35.0,0,0,PC 17755,512.3292,,C
30 | 522,523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,,C
31 | 76,77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,,S
32 | 514,515,0,3,"Coleff, Mr. Satio",male,24.0,0,0,349209,7.4958,,S
33 | 832,833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,,C
34 | 429,430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32.0,0,0,SOTON/O.Q. 392078,8.05,E10,S
35 | 686,687,0,3,"Panula, Mr. Jaako Arnold",male,14.0,4,1,3101295,39.6875,,S
36 | 169,170,0,3,"Ling, Mr. Lee",male,28.0,0,0,1601,56.4958,,S
37 | 844,845,0,3,"Culumovic, Mr. Jeso",male,17.0,0,0,315090,8.6625,,S
38 | 231,232,0,3,"Larsson, Mr. Bengt Edvin",male,29.0,0,0,347067,7.775,,S
39 | 456,457,0,1,"Millet, Mr. Francis Davis",male,65.0,0,0,13509,26.55,E38,S
40 | 837,838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,,S
41 | 448,449,1,3,"Baclini, Miss. Marie Catherine",female,5.0,2,1,2666,19.2583,,C
42 | 604,605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35.0,0,0,111426,26.55,,C
43 | 704,705,0,3,"Hansen, Mr. Henrik Juul",male,26.0,1,0,350025,7.8542,,S
44 | 212,213,0,3,"Perkin, Mr. John Henry",male,22.0,0,0,A/5 21174,7.25,,S
45 | 247,248,1,2,"Hamalainen, Mrs. William (Anna)",female,24.0,0,2,250649,14.5,,S
46 | 382,383,0,3,"Tikkanen, Mr. Juho",male,32.0,0,0,STON/O 2. 3101293,7.925,,S
47 | 802,803,1,1,"Carter, Master. William Thornton II",male,11.0,1,2,113760,120.0,B96 B98,S
48 | 90,91,0,3,"Christmann, Mr. Emil",male,29.0,0,0,343276,8.05,,S
49 | 306,307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,,C
50 | 394,395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24.0,0,2,PP 9549,16.7,G6,S
51 | 318,319,1,1,"Wick, Miss. Mary Natalie",female,31.0,0,2,36928,164.8667,C7,S
52 | 472,473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33.0,1,2,C.A. 34651,27.75,,S
53 | 629,630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,,Q
54 | 177,178,0,1,"Isham, Miss. Ann Elizabeth",female,50.0,0,0,PC 17595,28.7125,C49,C
55 | 847,848,0,3,"Markoff, Mr. Marin",male,35.0,0,0,349213,7.8958,,C
56 | 51,52,0,3,"Nosworthy, Mr. Richard Cater",male,21.0,0,0,A/4. 39886,7.8,,S
57 | 734,735,0,2,"Troupiansky, Mr. Moses Aaron",male,23.0,0,0,233639,13.0,,S
58 | 810,811,0,3,"Alexander, Mr. William",male,26.0,0,0,3474,7.8875,,S
59 | 25,26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38.0,1,5,347077,31.3875,,S
60 | 280,281,0,3,"Duane, Mr. Frank",male,65.0,0,0,336439,7.75,,Q
61 | 664,665,1,3,"Lindqvist, Mr. Eino William",male,20.0,1,0,STON/O 2. 3101285,7.925,,S
62 | 646,647,0,3,"Cor, Mr. Liudevit",male,19.0,0,0,349231,7.8958,,S
63 | 857,858,1,1,"Daly, Mr. Peter Denis ",male,51.0,0,0,113055,26.55,E17,S
64 | 200,201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28.0,0,0,345770,9.5,,S
65 | 96,97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
66 | 269,270,1,1,"Bissette, Miss. Amelia",female,35.0,0,0,PC 17760,135.6333,C99,S
67 | 636,637,0,3,"Leinonen, Mr. Antti Gustaf",male,32.0,0,0,STON/O 2. 3101292,7.925,,S
68 | 365,366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30.0,0,0,C 7076,7.25,,S
69 | 4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
70 | 378,379,0,3,"Betros, Mr. Tannous",male,20.0,0,0,2648,4.0125,,C
71 | 454,455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S
72 | 807,808,0,3,"Pettersson, Miss. Ellen Natalia",female,18.0,0,0,347087,7.775,,S
73 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_6/data_sample_6.csv:
--------------------------------------------------------------------------------
1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 826,827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,,S
3 | 739,740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,,S
4 | 717,718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27.0,0,0,34218,10.5,E101,S
5 | 506,507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33.0,0,2,26360,26.0,,S
6 | 684,685,0,2,"Brown, Mr. Thomas William Solomon",male,60.0,1,1,29750,39.0,,S
7 | 765,766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51.0,1,0,13502,77.9583,D11,S
8 | 650,651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,,S
9 | 161,162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40.0,0,0,C.A. 33595,15.75,,S
10 | 412,413,1,1,"Minahan, Miss. Daisy E",female,33.0,1,0,19928,90.0,C78,Q
11 | 838,839,1,3,"Chip, Mr. Chang",male,32.0,0,0,1601,56.4958,,S
12 | 568,569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,,C
13 | 77,78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,,S
14 | 889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
15 | 464,465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,,S
16 | 373,374,0,1,"Ringhini, Mr. Sante",male,22.0,0,0,PC 17760,135.6333,,C
17 | 689,690,1,1,"Madill, Miss. Georgette Alexandra",female,15.0,0,1,24160,211.3375,B5,S
18 | 332,333,0,1,"Graham, Mr. George Edward",male,38.0,0,1,PC 17582,153.4625,C91,S
19 | 685,686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25.0,1,2,SC/Paris 2123,41.5792,,C
20 | 110,111,0,1,"Porter, Mr. Walter Chamberlain",male,47.0,0,0,110465,52.0,C110,S
21 | 487,488,0,1,"Kent, Mr. Edward Austin",male,58.0,0,0,11771,29.7,B37,C
22 | 530,531,1,2,"Quick, Miss. Phyllis May",female,2.0,1,1,26360,26.0,,S
23 | 173,174,0,3,"Sivola, Mr. Antti Wilhelm",male,21.0,0,0,STON/O 2. 3101280,7.925,,S
24 | 144,145,0,2,"Andrew, Mr. Edgardo Samuel",male,18.0,0,0,231945,11.5,,S
25 | 711,712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,C124,S
26 | 135,136,0,2,"Richard, Mr. Emile",male,23.0,0,0,SC/PARIS 2133,15.0458,,C
27 | 148,149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26.0,F2,S
28 | 423,424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28.0,1,1,347080,14.4,,S
29 | 163,164,0,3,"Calic, Mr. Jovo",male,17.0,0,0,315093,8.6625,,S
30 | 395,396,0,3,"Johansson, Mr. Erik",male,22.0,0,0,350052,7.7958,,S
31 | 290,291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26.0,0,0,19877,78.85,,S
32 | 561,562,0,3,"Sivic, Mr. Husein",male,40.0,0,0,349251,7.8958,,S
33 | 398,399,0,2,"Pain, Dr. Alfred",male,23.0,0,0,244278,10.5,,S
34 | 474,475,0,3,"Strandberg, Miss. Ida Sofia",female,22.0,0,0,7553,9.8375,,S
35 | 397,398,0,2,"McKane, Mr. Peter David",male,46.0,0,0,28403,26.0,,S
36 | 735,736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,,S
37 | 308,309,0,2,"Abelson, Mr. Samuel",male,30.0,1,0,P/PP 3381,24.0,,C
38 | 627,628,1,1,"Longley, Miss. Gretchen Fiske",female,21.0,0,0,13502,77.9583,D9,S
39 | 174,175,0,1,"Smith, Mr. James Clinch",male,56.0,0,0,17764,30.6958,A7,C
40 | 87,88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,,S
41 | 750,751,1,2,"Wells, Miss. Joan",female,4.0,1,1,29103,23.0,,S
42 | 156,157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16.0,0,0,35851,7.7333,,Q
43 | 338,339,1,3,"Dahl, Mr. Karl Edwart",male,45.0,0,0,7598,8.05,,S
44 | 167,168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45.0,1,4,347088,27.9,,S
45 | 827,828,1,2,"Mallet, Master. Andre",male,1.0,0,2,S.C./PARIS 2079,37.0042,,C
46 | 315,316,1,3,"Nilsson, Miss. Helmina Josefina",female,26.0,0,0,347470,7.8542,,S
47 | 873,874,0,3,"Vander Cruyssen, Mr. Victor",male,47.0,0,0,345765,9.0,,S
48 | 501,502,0,3,"Canavan, Miss. Mary",female,21.0,0,0,364846,7.75,,Q
49 | 6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
50 | 323,324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22.0,1,1,248738,29.0,,S
51 | 183,184,1,2,"Becker, Master. Richard F",male,1.0,2,1,230136,39.0,F4,S
52 | 131,132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20.0,0,0,SOTON/O.Q. 3101307,7.05,,S
53 | 493,494,0,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C
54 | 863,864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S
55 | 681,682,1,1,"Hassab, Mr. Hammad",male,27.0,0,0,PC 17572,76.7292,D49,C
56 | 369,370,1,1,"Aubart, Mme. Leontine Pauline",female,24.0,0,0,PC 17477,69.3,B35,C
57 | 447,448,1,1,"Seward, Mr. Frederic Kimber",male,34.0,0,0,113794,26.55,,S
58 | 728,729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25.0,1,0,236853,26.0,,S
59 | 399,400,1,2,"Trout, Mrs. William H (Jessie L)",female,28.0,0,0,240929,12.65,,S
60 | 341,342,1,1,"Fortune, Miss. Alice Elizabeth",female,24.0,3,2,19950,263.0,C23 C25 C27,S
61 | 120,121,0,2,"Hickman, Mr. Stanley George",male,21.0,2,0,S.O.C. 14879,73.5,,S
62 | 380,381,1,1,"Bidois, Miss. Rosalie",female,42.0,0,0,PC 17757,227.525,,C
63 | 433,434,0,3,"Kallio, Mr. Nikolai Erland",male,17.0,0,0,STON/O 2. 3101274,7.125,,S
64 | 441,442,0,3,"Hampe, Mr. Leon",male,20.0,0,0,345769,9.5,,S
65 | 672,673,0,2,"Mitchell, Mr. Henry Michael",male,70.0,0,0,C.A. 24580,10.5,,S
66 | 756,757,0,3,"Carlsson, Mr. August Sigfrid",male,28.0,0,0,350042,7.7958,,S
67 | 326,327,0,3,"Nysveen, Mr. Johan Hansen",male,61.0,0,0,345364,6.2375,,S
68 | 139,140,0,1,"Giglio, Mr. Victor",male,24.0,0,0,PC 17593,79.2,B86,C
69 | 371,372,0,3,"Wiklund, Mr. Jakob Alfred",male,18.0,1,0,3101267,6.4958,,S
70 | 605,606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36.0,1,0,349910,15.55,,S
71 | 86,87,0,3,"Ford, Mr. William Neal",male,16.0,1,3,W./C. 6608,34.375,,S
72 | 357,358,0,2,"Funk, Miss. Annie Clemmer",female,38.0,0,0,237671,13.0,,S
73 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_7/data_sample_7.csv:
--------------------------------------------------------------------------------
1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 671,672,0,1,"Davidson, Mr. Thornton",male,31.0,1,0,F.C. 12750,52.0,B71,S
3 | 94,95,0,3,"Coxon, Mr. Daniel",male,59.0,0,0,364500,7.25,,S
4 | 255,256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29.0,0,2,2650,15.2458,,C
5 | 770,771,0,3,"Lievens, Mr. Rene Aime",male,24.0,0,0,345781,9.5,,S
6 | 381,382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1.0,0,2,2653,15.7417,,C
7 | 510,511,1,3,"Daly, Mr. Eugene Patrick",male,29.0,0,0,382651,7.75,,Q
8 | 344,345,0,2,"Fox, Mr. Stanley Hubert",male,36.0,0,0,229236,13.0,,S
9 | 502,503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,,Q
10 | 432,433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42.0,1,0,SC/AH 3085,26.0,,S
11 | 74,75,1,3,"Bing, Mr. Lee",male,32.0,0,0,1601,56.4958,,S
12 | 426,427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28.0,1,0,2003,26.0,,S
13 | 748,749,0,1,"Marvin, Mr. Daniel Warner",male,19.0,1,0,113773,53.1,D30,S
14 | 115,116,0,3,"Pekoniemi, Mr. Edvard",male,21.0,0,0,STON/O 2. 3101294,7.925,,S
15 | 569,570,1,3,"Jonsson, Mr. Carl",male,32.0,0,0,350417,7.8542,,S
16 | 884,885,0,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.05,,S
17 | 513,514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54.0,1,0,PC 17603,59.4,,C
18 | 88,89,1,1,"Fortune, Miss. Mabel Helen",female,23.0,3,2,19950,263.0,C23 C25 C27,S
19 | 517,518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,,Q
20 | 62,63,0,1,"Harris, Mr. Henry Birkhardt",male,45.0,1,0,36973,83.475,C83,S
21 | 673,674,1,2,"Wilhelms, Mr. Charles",male,31.0,0,0,244270,13.0,,S
22 | 701,702,1,1,"Silverthorne, Mr. Spencer Victor",male,35.0,0,0,PC 17475,26.2875,E24,S
23 | 459,460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,,Q
24 | 538,539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,,S
25 | 751,752,1,3,"Moor, Master. Meier",male,6.0,0,1,392096,12.475,E121,S
26 | 277,278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0.0,,S
27 | 831,832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S
28 | 414,415,1,3,"Sundman, Mr. Johan Julian",male,44.0,0,0,STON/O 2. 3101269,7.925,,S
29 | 241,242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q
30 | 536,537,0,1,"Butt, Major. Archibald Willingham",male,45.0,0,0,113050,26.55,B38,S
31 | 803,804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C
32 | 631,632,0,3,"Lundahl, Mr. Johan Svensson",male,51.0,0,0,347743,7.0542,,S
33 | 516,517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34.0,0,0,C.A. 34260,10.5,F33,S
34 | 158,159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,,S
35 | 119,120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2.0,4,2,347082,31.275,,S
36 | 599,600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49.0,1,0,PC 17485,56.9292,A20,C
37 | 28,29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q
38 | 342,343,0,2,"Collander, Mr. Erik Gustaf",male,28.0,0,0,248740,13.0,,S
39 | 100,101,0,3,"Petranec, Miss. Matilda",female,28.0,0,0,349245,7.8958,,S
40 | 116,117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q
41 | 582,583,0,2,"Downton, Mr. William James",male,54.0,0,0,28403,26.0,,S
42 | 625,626,0,1,"Sutton, Mr. Frederick",male,61.0,0,0,36963,32.3208,D50,S
43 | 370,371,1,1,"Harder, Mr. George Achilles",male,25.0,1,0,11765,55.4417,E50,C
44 | 431,432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,,S
45 | 89,90,0,3,"Celotti, Mr. Francesco",male,24.0,0,0,343275,8.05,,S
46 | 676,677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,,S
47 | 706,707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45.0,0,0,223596,13.5,,S
48 | 233,234,1,3,"Asplund, Miss. Lillian Gertrud",female,5.0,4,2,347077,31.3875,,S
49 | 620,621,0,3,"Yasbeck, Mr. Antoni",male,27.0,1,0,2659,14.4542,,C
50 | 696,697,0,3,"Kelly, Mr. James",male,44.0,0,0,363592,8.05,,S
51 | 413,414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0.0,,S
52 | 99,100,0,2,"Kantor, Mr. Sinai",male,34.0,1,0,244367,26.0,,S
53 | 275,276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63.0,1,0,13502,77.9583,D7,S
54 | 658,659,0,2,"Eitemiller, Mr. George Floyd",male,23.0,0,0,29751,13.0,,S
55 | 250,251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S
56 | 678,679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43.0,1,6,CA 2144,46.9,,S
57 | 113,114,0,3,"Jussila, Miss. Katriina",female,20.0,1,0,4136,9.825,,S
58 | 297,298,0,1,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S
59 | 849,850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C92,C
60 | 179,180,0,3,"Leonard, Mr. Lionel",male,36.0,0,0,LINE,0.0,,S
61 | 466,467,0,2,"Campbell, Mr. William",male,,0,0,239853,0.0,,S
62 | 774,775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54.0,1,3,29105,23.0,,S
63 | 437,438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24.0,2,3,29106,18.75,,S
64 | 865,866,1,2,"Bystrom, Mrs. (Karolina)",female,42.0,0,0,236852,13.0,,S
65 | 73,74,0,3,"Chronopoulos, Mr. Apostolos",male,26.0,1,0,2680,14.4542,,C
66 | 385,386,0,2,"Davies, Mr. Charles Henry",male,18.0,0,0,S.O.C. 14879,73.5,,S
67 | 235,236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,,S
68 | 211,212,1,2,"Cameron, Miss. Clear Annie",female,35.0,0,0,F.C.C. 13528,21.0,,S
69 | 713,714,0,3,"Larsson, Mr. August Viktor",male,29.0,0,0,7545,9.4833,,S
70 | 572,573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36.0,0,0,PC 17474,26.3875,E25,S
71 | 722,723,0,2,"Gillespie, Mr. William Henry",male,34.0,0,0,12233,13.0,,S
72 | 355,356,0,3,"Vanden Steen, Mr. Leo Peter",male,28.0,0,0,345783,9.5,,S
73 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_8/data_sample_8.csv:
--------------------------------------------------------------------------------
1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 862,863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48.0,0,0,17466,25.9292,D17,S
3 | 825,826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,,Q
4 | 886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
5 | 444,445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S
6 | 42,43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C
7 | 288,289,1,2,"Hosono, Mr. Masabumi",male,42.0,0,0,237798,13.0,,S
8 | 541,542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9.0,4,2,347082,31.275,,S
9 | 5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
10 | 724,725,1,1,"Chambers, Mr. Norman Campbell",male,27.0,1,0,113806,53.1,E8,S
11 | 276,277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45.0,0,0,347073,7.75,,S
12 | 877,878,0,3,"Petroff, Mr. Nedelio",male,19.0,0,0,349212,7.8958,,S
13 | 539,540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22.0,0,2,13568,49.5,B39,C
14 | 597,598,0,3,"Johnson, Mr. Alfred",male,49.0,0,0,LINE,0.0,,S
15 | 649,650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23.0,0,0,CA. 2314,7.55,,S
16 | 570,571,1,2,"Harris, Mr. George",male,62.0,0,0,S.W./PP 752,10.5,,S
17 | 850,851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4.0,4,2,347082,31.275,,S
18 | 249,250,0,2,"Carter, Rev. Ernest Courtenay",male,54.0,1,0,244252,26.0,,S
19 | 217,218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42.0,1,0,243847,27.0,,S
20 | 601,602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,,S
21 | 199,200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24.0,0,0,248747,13.0,,S
22 | 843,844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C
23 | 512,513,1,1,"McGough, Mr. James Robert",male,36.0,0,0,PC 17473,26.2875,E25,S
24 | 662,663,0,1,"Colley, Mr. Edward Pomeroy",male,47.0,0,0,5727,25.5875,E58,S
25 | 783,784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,,S
26 | 410,411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,,S
27 | 693,694,0,3,"Saad, Mr. Khalil",male,25.0,0,0,2672,7.225,,C
28 | 208,209,1,3,"Carr, Miss. Helen ""Ellen""",female,16.0,0,0,367231,7.75,,Q
29 | 142,143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24.0,1,0,STON/O2. 3101279,15.85,,S
30 | 230,231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35.0,1,0,36973,83.475,C83,S
31 | 145,146,0,2,"Nicholls, Mr. Joseph Charles",male,19.0,1,1,C.A. 33112,36.75,,S
32 | 430,431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28.0,0,0,110564,26.55,C52,S
33 | 311,312,1,1,"Ryerson, Miss. Emily Borie",female,18.0,2,2,PC 17608,262.375,B57 B59 B63 B66,C
34 | 55,56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S
35 | 153,154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,,S
36 | 656,657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,,S
37 | 445,446,1,1,"Dodge, Master. Washington",male,4.0,0,2,33638,81.8583,A34,S
38 | 48,49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,,C
39 | 745,746,0,1,"Crosby, Capt. Edward Gifford",male,70.0,1,1,WE/P 5735,71.0,B22,S
40 | 587,588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60.0,1,1,13567,79.2,B41,C
41 | 147,148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9.0,2,2,W./C. 6608,34.375,,S
42 | 688,689,0,3,"Fischer, Mr. Eberhard Thelander",male,18.0,0,0,350036,7.7958,,S
43 | 309,310,1,1,"Francatelli, Miss. Laura Mabel",female,30.0,0,0,PC 17485,56.9292,E36,C
44 | 479,480,1,3,"Hirvonen, Miss. Hildur E",female,2.0,0,1,3101298,12.2875,,S
45 | 302,303,0,3,"Johnson, Mr. William Cahoone Jr",male,19.0,0,0,LINE,0.0,,S
46 | 253,254,0,3,"Lobb, Mr. William Arthur",male,30.0,1,0,A/5. 3336,16.1,,S
47 | 149,150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42.0,0,0,244310,13.0,,S
48 | 313,314,0,3,"Hendekovic, Mr. Ignjac",male,28.0,0,0,349243,7.8958,,S
49 | 549,550,1,2,"Davies, Master. John Morgan Jr",male,8.0,1,1,C.A. 33112,36.75,,S
50 | 339,340,0,1,"Blackwell, Mr. Stephen Weart",male,45.0,0,0,113784,35.5,T,S
51 | 47,48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q
52 | 747,748,1,2,"Sinkkonen, Miss. Anna",female,30.0,0,0,250648,13.0,,S
53 | 21,22,1,2,"Beesley, Mr. Lawrence",male,34.0,0,0,248698,13.0,D56,S
54 | 879,880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56.0,0,1,11767,83.1583,C50,C
55 | 635,636,1,2,"Davis, Miss. Mary",female,28.0,0,0,237668,13.0,,S
56 | 52,53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49.0,1,0,PC 17572,76.7292,D33,C
57 | 705,706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39.0,0,0,250655,26.0,,S
58 | 559,560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36.0,1,0,345572,17.4,,S
59 | 854,855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44.0,1,0,244252,26.0,,S
60 | 648,649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,,S
61 | 358,359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,,Q
62 | 134,135,0,2,"Sobey, Mr. Samuel James Hayden",male,25.0,0,0,C.A. 29178,13.0,,S
63 | 303,304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,E101,Q
64 | 146,147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27.0,0,0,350043,7.7958,,S
65 | 316,317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24.0,1,0,244367,26.0,,S
66 | 565,566,0,3,"Davies, Mr. Alfred J",male,24.0,2,0,A/4 48871,24.15,,S
67 | 846,847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S
68 | 420,421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C
69 | 284,285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26.0,A19,S
70 | 376,377,1,3,"Landergren, Miss. Aurora Adelia",female,22.0,0,0,C 7077,7.25,,S
71 | 320,321,0,3,"Dennis, Mr. Samuel",male,22.0,0,0,A/5 21172,7.25,,S
72 | 379,380,0,3,"Gustafsson, Mr. Karl Gideon",male,19.0,0,0,347069,7.775,,S
73 |
--------------------------------------------------------------------------------
/docs/source/examples/substra_core/titanic_example/assets/train_data_samples/data_sample_9/data_sample_9.csv:
--------------------------------------------------------------------------------
1 | ,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 107,108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,,S
3 | 19,20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C
4 | 187,188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45.0,0,0,111428,26.55,,S
5 | 298,299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S
6 | 469,470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C
7 | 53,54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29.0,1,0,2926,26.0,,S
8 | 523,524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44.0,0,1,111361,57.9792,B18,C
9 | 83,84,0,1,"Carrau, Mr. Francisco M",male,28.0,0,0,113059,47.1,,S
10 | 881,882,0,3,"Markun, Mr. Johann",male,33.0,0,0,349257,7.8958,,S
11 | 328,329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31.0,1,1,363291,20.525,,S
12 | 404,405,0,3,"Oreskovic, Miss. Marija",female,20.0,0,0,315096,8.6625,,S
13 | 726,727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30.0,3,0,31027,21.0,,S
14 | 184,185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4.0,0,2,315153,22.025,,S
15 | 164,165,0,3,"Panula, Master. Eino Viljami",male,1.0,4,1,3101295,39.6875,,S
16 | 278,279,0,3,"Rice, Master. Eric",male,7.0,4,1,382652,29.125,,Q
17 | 480,481,0,3,"Goodwin, Master. Harold Victor",male,9.0,5,2,CA 2144,46.9,,S
18 | 617,618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26.0,1,0,A/5. 3336,16.1,,S
19 | 428,429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,,Q
20 | 628,629,0,3,"Bostandyeff, Mr. Guentcho",male,26.0,0,0,349224,7.8958,,S
21 | 37,38,0,3,"Cann, Mr. Ernest Charles",male,21.0,0,0,A./5. 2152,8.05,,S
22 | 584,585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,,C
23 | 548,549,0,3,"Goldsmith, Mr. Frank John",male,33.0,1,1,363291,20.525,,S
24 | 2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
25 | 268,269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58.0,0,1,PC 17582,153.4625,C125,S
26 | 244,245,0,3,"Attalah, Mr. Sleiman",male,30.0,0,0,2694,7.225,,C
27 | 695,696,0,2,"Chapman, Mr. Charles Henry",male,52.0,0,0,248731,13.5,,S
28 | 795,796,0,2,"Otter, Mr. Richard",male,39.0,0,0,28213,13.0,,S
29 | 797,798,1,3,"Osman, Mrs. Mara",female,31.0,0,0,349244,8.6833,,S
30 | 209,210,1,1,"Blank, Mr. Henry",male,40.0,0,0,112277,31.0,A31,C
31 | 337,338,1,1,"Burns, Miss. Elizabeth Margaret",female,41.0,0,0,16966,134.5,E40,C
32 | 547,548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C
33 | 574,575,0,3,"Rush, Mr. Alfred George John",male,16.0,0,0,A/4. 20589,8.05,,S
34 | 85,86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33.0,3,0,3101278,15.85,,S
35 | 511,512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,,S
36 | 603,604,0,3,"Torber, Mr. Ernst William",male,44.0,0,0,364511,8.05,,S
37 | 558,559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39.0,1,1,110413,79.65,E67,S
38 | 256,257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C
39 | 590,591,0,3,"Rintamaki, Mr. Matti",male,35.0,0,0,STON/O 2. 3101273,7.125,,S
40 | 353,354,0,3,"Arnold-Franchi, Mr. Josef",male,25.0,1,0,349237,17.8,,S
41 | 401,402,0,3,"Adams, Mr. John",male,26.0,0,0,341826,8.05,,S
42 | 80,81,0,3,"Waelens, Mr. Achille",male,22.0,0,0,345767,9.0,,S
43 | 809,810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33.0,1,0,113806,53.1,E8,S
44 | 757,758,0,2,"Bailey, Mr. Percy Andrew",male,18.0,0,0,29108,11.5,,S
45 | 644,645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C
46 | 727,728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,,Q
47 | 154,155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S
48 | 216,217,1,3,"Honkanen, Miss. Eliina",female,27.0,0,0,STON/O2. 3101283,7.925,,S
49 | 118,119,0,1,"Baxter, Mr. Quigg Edmond",male,24.0,0,1,PC 17558,247.5208,B58 B60,C
50 | 263,264,0,1,"Harrison, Mr. William",male,40.0,0,0,112059,0.0,B94,S
51 | 463,464,0,2,"Milling, Mr. Jacob Christian",male,48.0,0,0,234360,13.0,,S
52 | 741,742,0,1,"Cavendish, Mr. Tyrell William",male,36.0,1,0,19877,78.85,C46,S
53 | 677,678,1,3,"Turja, Miss. Anna Sofia",female,18.0,0,0,4138,9.8417,,S
54 | 564,565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,,S
55 | 333,334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16.0,2,0,345764,18.0,,S
56 | 198,199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,,Q
57 | 887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
58 | 600,601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24.0,2,1,243847,27.0,,S
59 | 189,190,0,3,"Turcin, Mr. Stjepan",male,36.0,0,0,349247,7.8958,,S
60 | 31,32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C
61 | 521,522,0,3,"Vovk, Mr. Janko",male,22.0,0,0,349252,7.8958,,S
62 | 852,853,0,3,"Boulos, Miss. Nourelain",female,9.0,1,1,2678,15.2458,,C
63 | 610,611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39.0,1,5,347082,31.275,,S
64 | 45,46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,,S
65 | 546,547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19.0,1,0,2908,26.0,,S
66 | 117,118,0,2,"Turpin, Mr. William John Robert",male,29.0,1,0,11668,21.0,,S
67 | 577,578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39.0,1,0,13507,55.9,E44,S
68 | 732,733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0.0,,S
69 | 65,66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,,C
70 | 768,769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,,Q
71 | 259,260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50.0,0,1,230433,26.0,,S
72 | 360,361,0,3,"Skoog, Mr. Wilhelm",male,40.0,1,4,347088,27.9,,S
73 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/get_started/torch_fedavg_assets/dataset/description.md:
--------------------------------------------------------------------------------
1 | # Mnist
2 |
3 | This dataset is [THE MNIST DATABASE of handwritten digits](http://yann.lecun.com/exdb/mnist/). It is download from torchvision.
4 |
5 | The target is the number (0 -> 9) represented by the pixels.
6 |
7 | ## Data repartition
8 |
9 | ### Train and test
10 |
11 | ### Split data between organizations
12 |
13 | ## Opener usage
14 |
15 | The opener exposes 2 methods:
16 |
17 | - `get_data` returns a dictionary containing the images and the labels as numpy arrays
18 | - `fake_data` returns a fake data sample of images and labels in a dict
19 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/get_started/torch_fedavg_assets/dataset/mnist_dataset.py:
--------------------------------------------------------------------------------
1 | import codecs
2 | import os
3 | import sys
4 | import pathlib
5 |
6 | import numpy as np
7 | from torchvision.datasets import MNIST
8 |
9 |
10 | def get_int(b: bytes) -> int:
11 | return int(codecs.encode(b, "hex"), 16)
12 |
13 |
14 | def MNISTraw2numpy(path: str, strict: bool = True) -> np.array:
15 | # read
16 | with open(path, "rb") as f:
17 | data = f.read()
18 | # parse
19 | magic = get_int(data[0:4])
20 | nd = magic % 256
21 | assert 1 <= nd <= 3
22 | numpy_type = np.uint8
23 | s = [get_int(data[4 * (i + 1) : 4 * (i + 2)]) for i in range(nd)]
24 |
25 | num_bytes_per_value = np.iinfo(numpy_type).bits // 8
26 | # The MNIST format uses the big endian byte order. If the system uses little endian byte order by default,
27 | # we need to reverse the bytes before we can read them with np.frombuffer().
28 | needs_byte_reversal = sys.byteorder == "little" and num_bytes_per_value > 1
29 | parsed = np.frombuffer(bytearray(data), dtype=numpy_type, offset=(4 * (nd + 1)))
30 | if needs_byte_reversal:
31 | parsed = parsed.flip(0)
32 |
33 | assert parsed.shape[0] == np.prod(s) or not strict
34 | return parsed.reshape(*s)
35 |
36 |
37 | def setup_mnist(data_path, N_CLIENTS):
38 | raw_path = pathlib.Path(data_path) / "MNIST" / "raw"
39 |
40 | # Download the dataset
41 | MNIST(data_path, download=True)
42 |
43 | # Extract numpy arrays from raw data
44 | train_images = MNISTraw2numpy(str(raw_path / "train-images-idx3-ubyte"))
45 | train_labels = MNISTraw2numpy(str(raw_path / "train-labels-idx1-ubyte"))
46 | test_images = MNISTraw2numpy(str(raw_path / "t10k-images-idx3-ubyte"))
47 | test_labels = MNISTraw2numpy(str(raw_path / "t10k-labels-idx1-ubyte"))
48 |
49 | # Split arrays into the number of organizations
50 | train_images_folds = np.split(train_images, N_CLIENTS)
51 | train_labels_folds = np.split(train_labels, N_CLIENTS)
52 | test_images_folds = np.split(test_images, N_CLIENTS)
53 | test_labels_folds = np.split(test_labels, N_CLIENTS)
54 |
55 | # Save splits in different folders to simulate the different organizations
56 | for i in range(N_CLIENTS):
57 |
58 | # Save train dataset on each org
59 | os.makedirs(str(data_path / f"org_{i+1}/train"), exist_ok=True)
60 | filename = data_path / f"org_{i+1}/train/train_images.npy"
61 | np.save(str(filename), train_images_folds[i])
62 | filename = data_path / f"org_{i+1}/train/train_labels.npy"
63 | np.save(str(filename), train_labels_folds[i])
64 |
65 | # Save test dataset on each org
66 | os.makedirs(str(data_path / f"org_{i+1}/test"), exist_ok=True)
67 | filename = data_path / f"org_{i+1}/test/test_images.npy"
68 | np.save(str(filename), test_images_folds[i])
69 | filename = data_path / f"org_{i+1}/test/test_labels.npy"
70 | np.save(str(filename), test_labels_folds[i])
71 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/get_started/torch_fedavg_assets/dataset/mnist_opener.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import numpy as np
3 | import substratools as tools
4 |
5 |
6 | class MnistOpener(tools.Opener):
7 | def fake_data(self, n_samples=None):
8 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100
9 |
10 | fake_images = np.random.randint(256, size=(N_SAMPLES, 28, 28))
11 |
12 | fake_labels = np.random.randint(10, size=N_SAMPLES)
13 |
14 | data = {"images": fake_images, "labels": fake_labels}
15 |
16 | return data
17 |
18 | def get_data(self, folders):
19 | # get npy files
20 | p = pathlib.Path(folders[0])
21 | images_data_path = p / list(p.glob("*_images.npy"))[0]
22 | labels_data_path = p / list(p.glob("*_labels.npy"))[0]
23 |
24 | # load data
25 | data = {
26 | "images": np.load(images_data_path),
27 | "labels": np.load(labels_data_path),
28 | }
29 |
30 | return data
31 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/get_started/torch_fedavg_assets/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.9.2
2 | numpy==2.1.1
3 | pandas==2.2.2
4 | scikit-learn==1.5.2
5 | substrafl
6 | torch==2.4.1
7 | torchvision==0.19.1
8 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/diabetes_substrafl_assets/dataset/diabetes_substrafl_dataset.py:
--------------------------------------------------------------------------------
1 | from sklearn.datasets import load_diabetes
2 | import pandas as pd
3 | import pathlib
4 |
5 |
6 | def setup_diabetes(data_path: pathlib.Path):
7 | raw_data = load_diabetes(scaled=False)
8 |
9 | description_file = data_path / "description.md"
10 | description_file.touch()
11 | description_file.write_text(raw_data.DESCR)
12 |
13 | dataset = pd.DataFrame(data=raw_data.data, columns=raw_data.feature_names)
14 | # map the "sex" column to categorical data
15 | dataset["sex"] = dataset["sex"].replace({1: "M", 2: "F"}).astype("category")
16 |
17 | # Create folders for both organisations
18 | (data_path / "org_1").mkdir(exist_ok=True)
19 | (data_path / "org_2").mkdir(exist_ok=True)
20 |
21 | # Split the dataset in two uneven parts
22 | split_index = int(len(dataset) * 2 / 3)
23 | dataset.iloc[:split_index].to_csv(data_path / "org_1" / "data.csv", index=False)
24 | dataset.iloc[split_index:].to_csv(data_path / "org_2" / "data.csv", index=False)
25 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/diabetes_substrafl_assets/dataset/diabetes_substrafl_opener.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import numpy as np
3 | import pandas as pd
4 | import substratools as tools
5 |
6 |
7 | class DiabetesOpener(tools.Opener):
8 | def fake_data(self, n_samples=None):
9 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100
10 |
11 | features = ["age", "sex", "bmi", "bp", "s1", "s2", "s3", "s4", "s5", "s6"]
12 | return pd.DataFrame(data=np.random.random((N_SAMPLES, len(features))), columns=features)
13 |
14 | def get_data(self, folders):
15 | return pd.read_csv(next(pathlib.Path(folders[0]).glob("*.csv")), dtype={"sex": "category"})
16 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/diabetes_substrafl_assets/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.9.2
2 | scikit-learn==1.5.2
3 | numpy==2.1.1
4 | pandas==2.2.2
5 | substrafl
6 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/sklearn_fedavg_assets/dataset/description.md:
--------------------------------------------------------------------------------
1 | # Iris
2 |
3 | The [IRIS dataset](https://archive.ics.uci.edu/ml/datasets/iris) is perhaps the best known database to be found in the pattern recognition literature. Fisher's paper is a classic in the field and is referenced frequently to this day. (See Duda & Hart, for example.) The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other.
4 |
5 | It is downloaded using Sickit-Learn.
6 |
7 | ## Opener usage
8 |
9 | The opener exposes 2 methods:
10 |
11 | - `get_data` returns a dictionary containing containing the images and the labels as numpy arrays
12 | - `fake_data` returns a fake data sample of images and labels in a dict
13 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/sklearn_fedavg_assets/dataset/iris_dataset.py:
--------------------------------------------------------------------------------
1 | from sklearn import datasets
2 | import numpy as np
3 | import os
4 |
5 |
6 | def setup_iris(data_path: os.PathLike, n_client: int):
7 |
8 | iris = datasets.load_iris()
9 |
10 | len_iris = len(iris.data)
11 |
12 | index_iris = np.arange(len_iris)
13 |
14 | np.random.shuffle(index_iris)
15 | train_index = index_iris[: int(0.8 * len_iris)]
16 | test_index = index_iris[int(0.8 * len_iris) :]
17 |
18 | train_data = np.array(iris.data)[train_index]
19 | train_targets = np.array(iris.target)[train_index]
20 | test_data = np.array(iris.data)[test_index]
21 | test_targets = np.array(iris.target)[test_index]
22 |
23 | # Split array into the number of organization
24 | train_data_folds = np.split(train_data, n_client)
25 | train_targets_folds = np.split(train_targets, n_client)
26 | test_data_folds = np.split(test_data, n_client)
27 | test_targets_folds = np.split(test_targets, n_client)
28 |
29 | # Save splits in different folders to simulate the different organization
30 | for i in range(n_client):
31 |
32 | # Save train dataset on each org
33 | os.makedirs(str(data_path / f"org_{i+1}/train"), exist_ok=True)
34 | filename = data_path / f"org_{i+1}/train/train_data.npy"
35 | np.save(str(filename), train_data_folds[i])
36 | filename = data_path / f"org_{i+1}/train/train_targets.npy"
37 | np.save(str(filename), train_targets_folds[i])
38 |
39 | # Save test dataset on each org
40 | os.makedirs(str(data_path / f"org_{i+1}/test"), exist_ok=True)
41 | filename = data_path / f"org_{i+1}/test/test_data.npy"
42 | np.save(str(filename), test_data_folds[i])
43 | filename = data_path / f"org_{i+1}/test/test_targets.npy"
44 | np.save(str(filename), test_targets_folds[i])
45 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/sklearn_fedavg_assets/dataset/iris_opener.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import numpy as np
3 | import substratools as tools
4 |
5 |
6 | class IrisOpener(tools.Opener):
7 | def fake_data(self, n_samples=None):
8 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100
9 |
10 | fake_data = np.random.rand(8, size=(N_SAMPLES, 4))
11 |
12 | fake_targets = np.random.randint(3, size=N_SAMPLES)
13 |
14 | data = {"images": fake_data, "labels": fake_targets}
15 |
16 | return data
17 |
18 | def get_data(self, folders):
19 | # get npy files
20 | p = pathlib.Path(folders[0])
21 | images_data_path = p / list(p.glob("*_data.npy"))[0]
22 | labels_data_path = p / list(p.glob("*_targets.npy"))[0]
23 |
24 | # load data
25 | data = {"data": np.load(images_data_path), "targets": np.load(labels_data_path)}
26 |
27 | return data
28 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/sklearn_fedavg_assets/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.9.2
2 | numpy==2.1.1
3 | pandas==2.2.2
4 | scikit-learn==1.5.2
5 | substrafl
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/torch_cyclic_assets/dataset/cyclic_mnist_dataset.py:
--------------------------------------------------------------------------------
1 | import codecs
2 | import os
3 | import sys
4 | import pathlib
5 |
6 | import numpy as np
7 | from torchvision.datasets import MNIST
8 |
9 |
10 | def get_int(b: bytes) -> int:
11 | return int(codecs.encode(b, "hex"), 16)
12 |
13 |
14 | def MNISTraw2numpy(path: str, strict: bool = True) -> np.array:
15 | # read
16 | with open(path, "rb") as f:
17 | data = f.read()
18 | # parse
19 | magic = get_int(data[0:4])
20 | nd = magic % 256
21 | assert 1 <= nd <= 3
22 | numpy_type = np.uint8
23 | s = [get_int(data[4 * (i + 1) : 4 * (i + 2)]) for i in range(nd)]
24 |
25 | num_bytes_per_value = np.iinfo(numpy_type).bits // 8
26 | # The MNIST format uses the big endian byte order. If the system uses little endian byte order by default,
27 | # we need to reverse the bytes before we can read them with np.frombuffer().
28 | needs_byte_reversal = sys.byteorder == "little" and num_bytes_per_value > 1
29 | parsed = np.frombuffer(bytearray(data), dtype=numpy_type, offset=(4 * (nd + 1)))
30 | if needs_byte_reversal:
31 | parsed = parsed.flip(0)
32 |
33 | assert parsed.shape[0] == np.prod(s) or not strict
34 | return parsed.reshape(*s)
35 |
36 |
37 | def setup_mnist(data_path, N_CLIENTS):
38 | raw_path = pathlib.Path(data_path) / "MNIST" / "raw"
39 |
40 | # Download the dataset
41 | MNIST(data_path, download=True)
42 |
43 | # Extract numpy arrays from raw data
44 | train_images = MNISTraw2numpy(str(raw_path / "train-images-idx3-ubyte"))
45 | train_labels = MNISTraw2numpy(str(raw_path / "train-labels-idx1-ubyte"))
46 | test_images = MNISTraw2numpy(str(raw_path / "t10k-images-idx3-ubyte"))
47 | test_labels = MNISTraw2numpy(str(raw_path / "t10k-labels-idx1-ubyte"))
48 |
49 | # Split arrays into the number of organizations
50 | train_images_folds = np.array_split(train_images, N_CLIENTS)
51 | train_labels_folds = np.array_split(train_labels, N_CLIENTS)
52 | test_images_folds = np.array_split(test_images, N_CLIENTS)
53 | test_labels_folds = np.array_split(test_labels, N_CLIENTS)
54 |
55 | # Save splits in different folders to simulate the different organizations
56 | for i in range(N_CLIENTS):
57 | # Save train dataset on each org
58 | os.makedirs(str(data_path / f"org_{i+1}/train"), exist_ok=True)
59 | filename = data_path / f"org_{i+1}/train/train_images.npy"
60 | np.save(str(filename), train_images_folds[i])
61 | filename = data_path / f"org_{i+1}/train/train_labels.npy"
62 | np.save(str(filename), train_labels_folds[i])
63 |
64 | # Save test dataset on each org
65 | os.makedirs(str(data_path / f"org_{i+1}/test"), exist_ok=True)
66 | filename = data_path / f"org_{i+1}/test/test_images.npy"
67 | np.save(str(filename), test_images_folds[i])
68 | filename = data_path / f"org_{i+1}/test/test_labels.npy"
69 | np.save(str(filename), test_labels_folds[i])
70 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/torch_cyclic_assets/dataset/cyclic_mnist_opener.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import numpy as np
3 | import substratools as tools
4 |
5 |
6 | class MnistOpener(tools.Opener):
7 | def fake_data(self, n_samples=None):
8 | N_SAMPLES = n_samples if n_samples and n_samples <= 100 else 100
9 |
10 | fake_images = np.random.randint(256, size=(N_SAMPLES, 28, 28))
11 |
12 | fake_labels = np.random.randint(10, size=N_SAMPLES)
13 |
14 | data = {"images": fake_images, "labels": fake_labels}
15 |
16 | return data
17 |
18 | def get_data(self, folders):
19 | # get npy files
20 | p = pathlib.Path(folders[0])
21 | images_data_path = p / list(p.glob("*_images.npy"))[0]
22 | labels_data_path = p / list(p.glob("*_labels.npy"))[0]
23 |
24 | # load data
25 | data = {
26 | "images": np.load(images_data_path),
27 | "labels": np.load(labels_data_path),
28 | }
29 |
30 | return data
31 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/torch_cyclic_assets/dataset/description.md:
--------------------------------------------------------------------------------
1 | # Mnist
2 |
3 | This dataset is [THE MNIST DATABASE of handwritten digits](http://yann.lecun.com/exdb/mnist/). It is download from torchvision.
4 |
5 | The target is the number (0 -> 9) represented by the pixels.
6 |
7 | ## Data repartition
8 |
9 | ### Train and test
10 |
11 | ### Split data between organizations
12 |
13 | ## Opener usage
14 |
15 | The opener exposes 2 methods:
16 |
17 | - `get_data` returns a dictionary containing the images and the labels as numpy arrays
18 | - `fake_data` returns a fake data sample of images and labels in a dict
19 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/go_further/torch_cyclic_assets/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.9.2
2 | numpy==2.1.1
3 | pandas==2.2.2
4 | scikit-learn==1.5.2
5 | substrafl
6 | torch==2.4.1
7 | torchvision==0.19.1
8 |
--------------------------------------------------------------------------------
/docs/source/examples/substrafl/index.rst:
--------------------------------------------------------------------------------
1 | SubstraFL examples
2 | ==================
3 |
4 | The examples below are compatible with SubstraFL |substrafl_version|.
5 |
6 |
7 | Example to get started using the PyTorch interface
8 | **************************************************
9 |
10 | .. nbgallery::
11 | get_started/run_mnist_torch.ipynb
12 |
13 | Example to go further
14 | *********************
15 |
16 | .. nbgallery::
17 | go_further/run_iris_sklearn.ipynb
18 | go_further/run_diabetes_substrafl.ipynb
19 | go_further/run_mnist_cyclic.ipynb
20 |
--------------------------------------------------------------------------------
/docs/source/how-to/deploying-substra/howto/existing-volumes.rst:
--------------------------------------------------------------------------------
1 | ***************************
2 | How-to use existing volumes
3 | ***************************
4 |
5 | By default, Substra instanciates PersistentVolumeClaims (PVCs) on the fly, which is generally convenient; but this might not suit your needs, in which case Substra can also use preexisting PVCs rather than make new ones.
6 |
7 | For example, you could make a copy of each volume from a Substra deployment and then create a new one configured to use the copies -- thus making a clone of the original instance.
8 |
9 | .. note::
10 | Substra will still instanciate PVCs on the fly! But this is only for moving data around during jobs, so the instanciated PVCs can generally be ignored and should not be backed up.
11 |
12 | Backend values:
13 |
14 | .. code-block:: yaml
15 |
16 | server:
17 | persistence:
18 | servermedias:
19 | existingClaim: "serverPVC"
20 | postgresql:
21 | primary:
22 | persistence:
23 | existingClaim: "psqlPVC"
24 | redis:
25 | master:
26 | persistence:
27 | existingClaim: "redisPVC"
28 | docker-registry:
29 | persistence:
30 | existingClaim: "registryPVC"
31 | minio:
32 | persistence:
33 | existingClaim: "minioPVC"
34 |
35 | Orchestrator values (in standalone mode, which is the default):
36 |
37 | .. code-block:: yaml
38 |
39 | postgresql:
40 | primary:
41 | persistence:
42 | existingClaim: "orcpsqlPVC"
--------------------------------------------------------------------------------
/docs/source/how-to/deploying-substra/howto/external-database.rst:
--------------------------------------------------------------------------------
1 | .. _ops howto external database:
2 |
3 | *******************************
4 | How-to use an external database
5 | *******************************
6 |
7 | By default, Substra components use their own integrated postgres databases (one per backend, and one for the orchestrator in centralized mode).
8 |
9 | They can be pointed to any PostgreSQL instance (version 11 or better).
10 |
11 | The backend and orchestrator use the same structure in their values:
12 |
13 | .. code-block:: yaml
14 |
15 | database:
16 | host: my.db.com
17 | port: 5432
18 |
19 | auth:
20 | username: my-username
21 | password: my-password
22 | database: my-substra-db
23 |
24 | Or, for improved security, you can create a secret with your database credentials, under the ``DATABASE_PASSWORD`` and ``DATABASE_USERNAME`` keys. Secrets can be `made very secure `_ but this is the basic example:
25 |
26 | .. code-block:: yaml
27 |
28 | apiVersion: v1
29 | kind: Secret
30 | metadata:
31 | name: my-db-secret
32 | stringData:
33 | DATABASE_PASSWORD: my-password
34 | DATABASE_USERNAME: my-username
35 |
36 | And then point to it in the values, instead of using username & password:
37 |
38 | .. code-block:: yaml
39 |
40 | database:
41 | host: my.db.com
42 | port: 5432
43 |
44 | auth:
45 | database: my-substra-db
46 | credentialsSecretName: my-db-secret
--------------------------------------------------------------------------------
/docs/source/how-to/deploying-substra/howto/sso-oidc.rst:
--------------------------------------------------------------------------------
1 | *************************************
2 | How-to set up SSO with OpenID Connect
3 | *************************************
4 |
5 | Substra supports external user management through OpenID Connect (OIDC). It is done per-backend, so each organization can use their own user provider and cohabit on the same network.
6 |
7 | OIDC users cannot be created as admins: you'll still need to create at least one admin account as normal, through ``addAccountOperator.users``. OIDC users will all be assigned to a single channel.
8 |
9 |
10 | Set up the provider
11 | ===================
12 |
13 | First, set up an OIDC client at an identity provider (IP -- or OpenID provider, OP).
14 |
15 | The only claims Substra needs are ``openid email``, which any provider should be able to support. Allow ``/oidc/callback`` as a redirect URI.
16 |
17 | Get your **provider URL**. Appending ``/.well-known/openid-configuration`` to this URL should return a JSON description of the provider's capabilities, which Substra will use for much of the configuration. Otherwise, you can set endpoints by hand under ``oidc.provider.endpoints``.
18 |
19 | The provider will give you a **client id** and a **client secret**. Deploy them on the cluster in a secret:
20 |
21 | .. code-block:: yaml
22 |
23 | apiVersion: v1
24 | kind: Secret
25 | metadata:
26 | name: oidc-secret
27 | stringData:
28 | OIDC_RP_CLIENT_ID: "CLIENT_ID"
29 | OIDC_RP_CLIENT_SECRET: "CLIENT_SECRET"
30 |
31 |
32 | Set up user creation
33 | ====================
34 |
35 | When a user first logs in through OIDC, they are assigned a username based on their email address. The ``oidc.users.appendDomain`` flag controls whether email domain is included.
36 |
37 | You must choose one user creation process:
38 |
39 | * Set up a default channel by setting ``oidc.users.channel`` to the name of an existing channel (see the value of ``orchestrator.channels``). OIDC users will be able to use the platform right away.
40 | * Alternatively, set ``oidc.users.requireApproval`` to ``true``: after their first login, OIDC users will have to wait for manual approval from an administrator (on the web frontend).
41 |
42 | .. admonition:: Note on user validity
43 |
44 | Substra OIDC users accounts will remain valid for a bit after the correspond account at the provider has been disabled; this can be an issue if, for instance, an employee has been recently terminated but still has access to the Substra instance.
45 |
46 | This can be mitigated through ``oidc.users.loginValidityDuration``: accounts that have not logged in in this amount of time (seconds) are disabled until the user logs in again. The API tokens associated with their account stop working as well, but will work again when they refresh their login.
47 |
48 | To avoid irritating users with frequent login prompts, Substra will attempt to do this in the background, making all this invisible to users. However this requires the provider to support offline access and refresh tokens -- not all do, and implementations vary.
49 |
50 | Automated login refresh is enabled by default through the setting ``oidc.users.useRefreshToken``, but Substra will disable it and fall back to the manual mode (actual login prompts) if it can't detect provider support.
51 |
52 | If you are using automated login refresh, you can set ``oidc.users.loginValidityDuration`` to a low value to slightly increase security at a small cost in server load. Otherwise, it is a balance of security versus user convenience.
53 |
54 |
55 | Other settings
56 | ==============
57 |
58 | If OIDC users will be using the Substra API (for instance if they are data scientists running Python scripts), they'll need to generate API tokens on the web frontend and use those in their scripts.
59 |
60 | Having to generate new tokens all the time is a hindrance for the users: you can increase their lifetime through ``config.EXPIRY_TOKEN_LIFETIME`` in the backend values.
61 |
62 |
63 | Putting it all together
64 | =======================
65 |
66 | Example of a minimal working configuration in the backend values:
67 |
68 | .. code-block:: yaml
69 |
70 | config:
71 | EXPIRY_TOKEN_LIFETIME: "10080" # one week, in minutes
72 | oidc:
73 | enabled: true
74 | clientSecretName: oidc-secret # set earlier
75 | provider:
76 | url: "PROVIDER_URL"
77 | displayName: "PROVIDER_NAME" # will be displayed on the login page
78 | users:
79 | channel: "CHANNEL_ID"
80 |
--------------------------------------------------------------------------------
/docs/source/how-to/deploying-substra/index.rst:
--------------------------------------------------------------------------------
1 | How-to guides for deploying Substra
2 | ===================================
3 |
4 | This section is of concern if you are **Deploying Substra in production**.
5 |
6 | Familiarity with infrastructure, and Kubernetes in particular, is recommended.
7 |
8 |
9 | :ref:`The walkthrough guide ` takes you step by step through deploying a production environment.
10 | More specific how-to guides cover additional points.
11 |
12 | :ref:`ops upgrade notes` cover relevant changes when upgrading from one version to the next.
13 |
14 | The :ref:`compatibility table` contains a reference of Substra versions compatible with one another.
15 |
16 | .. toctree::
17 | :maxdepth: 2
18 | :hidden:
19 |
20 | walkthrough.rst
21 | howto/customize-compute-pod-node.rst
22 | howto/existing-volumes.rst
23 | howto/external-database.rst
24 | howto/sso-oidc.rst
25 | upgrade-notes.rst
26 |
27 |
28 | Substra is meant to be deployed as part of a federated learning network. Each participant *organization* will set up their own *Substra node*, from which their users can connect to the network and run machine learning algorithms on the data registered by participant on their own node.
29 |
30 | .. image:: ../../_static/schemes/stack-technical-scheme.svg
31 | :width: 800
32 | :align: center
33 | :alt: Substra Components Scheme
34 |
35 | The terms *Substra node* and *Substra organization* are practically interchangeable.
36 |
37 | Substra is distributed as Helm charts, running on Kubernetes 1.19 and up. Each component has their Helm chart, which are hosted at https://substra.github.io/charts.
38 |
39 |
40 | Hardware requirements
41 | ---------------------
42 |
43 | Each backend needs the following resources to run Substra:
44 |
45 | * 8 CPU
46 | * 30 GB of RAM
47 | * 300 GB of storage
48 |
49 | In addition, you need to consider the resources required by the compute tasks. For example, if each task needs 10 GB of RAM and you have two tasks running in parallel for a single backend, you will need a total of 50 GB of RAM (30 GB + 2*10 GB). The same applies to CPU usage and storage requirements (datasets and models).
50 |
51 | The orchestrator needs the following resources:
52 |
53 | * 4 CPU
54 | * 16 GB of RAM
55 | * 100 GB of storage
56 |
57 |
--------------------------------------------------------------------------------
/docs/source/how-to/deploying-substra/walkthrough.rst:
--------------------------------------------------------------------------------
1 | .. _ops walkthrough:
2 |
3 | *********************
4 | How-to deploy Substra
5 | *********************
6 |
7 | This section will guide you through a production deployment with two Substra nodes.
8 |
9 | We will deploy an orchestrator and two Substra nodes (for two organizations, called ``ingen`` and ``biotechnica``) communicating over TLS on the internet. This is how the application is configured for running on actual healthcare data.
10 |
11 | .. toctree::
12 | :glob:
13 | :titlesonly:
14 | :numbered:
15 |
16 | walkthrough/*
17 |
18 |
--------------------------------------------------------------------------------
/docs/source/how-to/deploying-substra/walkthrough/10-prerequisites.rst:
--------------------------------------------------------------------------------
1 | *******************
2 | Check Prerequisites
3 | *******************
4 |
5 | Substra version
6 | ===============
7 |
8 | Substra is a set of microservices which are together issued a version number; but, since we are installing the services one by one, we need to know the actual version of each one.
9 |
10 | Check the :ref:`compatibility table` for the Helm chart version needed for the orchestrator, backend and frontend. The corresponding Docker app version is already configured in there, so it's all you need.
11 |
12 | Local tools
13 | ===========
14 |
15 | Install:
16 | - kubectl
17 | - helm
18 |
19 | Add the Substra helm repository:
20 | .. code-block:: shell
21 |
22 | helm repo add substra https://substra.github.io/charts/
23 | helm repo update
24 |
25 | .. Leaving kubectl and helm purposefully unlinked since they are part of the basics for this kind of work
26 |
27 | Also install:
28 | - ``curl`` for making sure the HTTP endpoints work
29 | - `gRPCurl `_ for making sure the gRPC endpoint works
30 |
31 |
32 | Infrastructure
33 | ==============
34 |
35 | Substra is a federated learning tool and as such it makes little sense to have only one node running, or nodes running on the same cluster merely separated by a namespace.
36 |
37 | Therefore, in this guide we are deploying on two separate Kubernetes clusters, connecting them through the internet.
38 |
39 | Throughout the guide we are giving hostnames to endpoints. On the internet, this means owning a domain name and setting up DNS -- **everytime you see** ``DOMAIN``, **it means your own domain** you are setting this up under.
40 |
41 | Exposing on the internet also means dealing with a certificate authority -- here we're using `Let's Encrypt `__.
42 |
43 | .. note::
44 | It is entirely possible to host multiple Substra nodes on the same cluster, and/or to have them communicate on a private network with a private CA, and/or to attribute hostnames differently.
45 |
46 |
47 | In practice
48 | -----------
49 |
50 | Clusters
51 | ^^^^^^^^
52 |
53 | Set up two clusters -- they have to support allocating PVCs on the fly and opening ingresses to the Internet. For this, we'd recommend using a managed Kubernetes service such as `Google GKE `__, `Azure AKS `__, or `Amazon EKS `__.
54 |
55 | **We'll henceforth refer to the clusters we have set up as** ``cluster-1`` **and** ``cluster-2`` **.**
56 |
57 | We also need some software for routing (ingress-nginx) and certificate management (cert-manager); install both on each cluster (insert your email address in place of ``YOUR_EMAIL_HERE``):
58 |
59 | .. code-block:: shell
60 | :emphasize-lines: 20,35
61 |
62 | helm upgrade --install ingress-nginx ingress-nginx \
63 | --repo https://kubernetes.github.io/ingress-nginx \
64 | --namespace ingress-nginx --create-namespace
65 |
66 | helm upgrade --install \
67 | cert-manager cert-manager \
68 | --repo https://charts.jetstack.io \
69 | --namespace cert-manager \
70 | --create-namespace \
71 | --set installCRDs=true
72 |
73 | kubectl apply -f - << "EOF"
74 | apiVersion: cert-manager.io/v1
75 | kind: ClusterIssuer
76 | metadata:
77 | name: letsencrypt-staging
78 | spec:
79 | acme:
80 | server: https://acme-staging-v02.api.letsencrypt.org/directory
81 | email: YOUR_EMAIL_HERE
82 | privateKeySecretRef:
83 | name: letsencrypt-staging
84 | solvers:
85 | - http01:
86 | ingress:
87 | class: nginx
88 | ---
89 | apiVersion: cert-manager.io/v1
90 | kind: ClusterIssuer
91 | metadata:
92 | name: letsencrypt-prod
93 | spec:
94 | acme:
95 | server: https://acme-v02.api.letsencrypt.org/directory
96 | email: YOUR_EMAIL_HERE
97 | privateKeySecretRef:
98 | name: letsencrypt-prod
99 | solvers:
100 | - http01:
101 | ingress:
102 | class: nginx
103 | EOF
104 |
105 | This also sets up ``letsencrypt-prod`` as an issuer of certificates (for endpoints exposed on the internet) and ``letsencrypt-staging`` to issue development certificates.
106 |
107 | DNS
108 | ^^^
109 |
110 | Probably the most convenient way to handle DNS is to set a wildcard record for each cluster and forget about it. Once you have installed nginx-ingress-controller, the corresponding service should have received an IP address you can then set in the DNS:
111 |
112 | .. code-block::
113 | :caption: DNS zone file for ``DOMAIN``
114 |
115 | *.cluster-1 300 IN A NGINX_1_IP
116 | *.cluster-2 300 IN A NGINX_2_IP
117 |
118 | This way, any hostname such as ``whatever.cluster-1.DOMAIN`` directs to the same endpoint, which itself directs the traffic to the correct service based on hostname (this is what the Ingress objects are for).
119 |
120 |
--------------------------------------------------------------------------------
/docs/source/how-to/deploying-substra/walkthrough/30-backend-deployment.rst:
--------------------------------------------------------------------------------
1 | ******************
2 | Deploy the backend
3 | ******************
4 |
5 | This section details deploying a node for the ``ingen`` organization onto ``cluster-1``.
6 |
7 | You will need to repeat this for ``biotechnica`` onto ``cluster-2``, with the appropriate values changed.
8 |
9 | The backend depends on the orchestrator and will fail to run if the orchestrator is not available and operational.
10 |
11 | Prepare your Helm values
12 | ========================
13 |
14 | .. seealso::
15 | Full reference on `Artifact Hub `_.
16 |
17 | To configure your values:
18 |
19 | #. Create a Helm values file named ``backend-ingen-values.yaml`` with the following content:
20 |
21 | .. code-block:: yaml
22 |
23 | organizationName: ingen
24 |
25 |
26 | #. Configure your Substra backend Ingress. In the ``backend-ingen-values.yaml`` file add the following content:
27 |
28 | .. code-block:: yaml
29 |
30 | config:
31 | ALLOWED_HOSTS: '[".cluster-1.DOMAIN"]'
32 |
33 | server:
34 | defaultDomain: https://api.cluster-1.DOMAIN:443
35 | commonHostDomain: cluster-1.DOMAIN
36 |
37 | ingress:
38 | enabled: true
39 | hostname: api.cluster-1.DOMAIN
40 |
41 | .. caution::
42 | For ``ALLOWED_HOSTS``, note that the leading dot is important.
43 |
44 | #. Configure your connection to the orchestrator. In the ``backend-ingen-values.yaml`` file add the following content:
45 |
46 | .. code-block:: yaml
47 |
48 | orchestrator:
49 | host: ORCHESTRATOR_HOSTNAME
50 | port: ORCHESTRATOR_PORT
51 | mspID: ingen
52 | sameCluster: ORCHESTRATOR_SAME_CLUSTER
53 |
54 |
55 | | ``ORCHESTRATOR_HOSTNAME`` should be ``orchestrator.cluster-1.DOMAIN`` if you are _outside_ the cluster, but if we are working on ``cluster-1`` we should use its local name ``orchestrator-server.orchestrator`` (following the ``service-name.namespace`` convention).
56 | | ``ORCHESTRATOR_PORT`` should be ``443`` if TLS is enabled, otherwise ``80``.
57 | | ``ORCHESTRATOR_SAME_CLUSTER`` should be ``true`` if the backend is in the same cluster as the orchestrator, otherwise ``false``.
58 |
59 | .. _backend-channel-config:
60 |
61 | #. Configure your :term:`Substra Channels `.
62 | In the ``backend-values.yaml`` file, add the following content under the ``orchestrator`` key:
63 |
64 | .. code-block:: yaml
65 |
66 | channels:
67 | - our-channel:
68 | restricted: false
69 | model_export_enabled: true
70 | chaincode:
71 | name: mycc
72 |
73 | | The channel name is ``our-channel``, as configured in :ref:`Orchestrator Substra Channels `.
74 | | ``restricted`` would prevent other organizations from joining the channel
75 | | ``model_export_enabled`` allows users from this channel to download models produced by the platform
76 |
77 | #. Optional: If your orchestrator has TLS enabled:
78 |
79 | #. Retrieve the CA certificate from your orchestrator:
80 |
81 | The CA certificate is the ``orchestrator-ca.crt`` file generated at the :ref:`Generate your Certificate Authority certificate ` step of the orchestrator deployment.
82 | If a public Certificate Authority was used to generate the orchestrator certificate, you need to fetch the certificate of the Certificate Authority.
83 |
84 | #. Create a ConfigMap containing the CA certificate:
85 |
86 | .. code-block:: bash
87 |
88 | kubectl create configmap orchestrator-cacert --from-file=ca.crt=orchestrator-ca.crt
89 |
90 | #. Configure your backend to enable orchestrator TLS. In the ``backend-ingen-values.yaml`` file add the following content under the ``orchestrator`` key:
91 |
92 | .. code-block:: yaml
93 |
94 | tls:
95 | enabled: true
96 | cacert: orchestrator-cacert
97 |
98 | #. Add users to your backend. In the ``backend-ingen-values.yaml`` file add the following content:
99 |
100 | .. code-block:: yaml
101 |
102 | addAccountOperator:
103 | users:
104 | - name: admin
105 | secret: an3xtr4lengthyp@ssword
106 | channel: our-channel
107 |
108 | | The password must be at least 20 characters long.
109 |
110 |
111 | Deploy the Chart
112 | ================
113 |
114 | #. Deploy the backend Helm chart:
115 |
116 | .. code-block:: bash
117 |
118 | helm install backend substra/substra-backend --version VERSION --values backend-values.yaml --namespace ingen --create-namespace
119 |
120 | | Replace ``VERSION`` with the version of the Substra backend helm chart you want to deploy.
121 |
122 | #. Validate:
123 |
124 | .. code-block:: shell
125 |
126 | curl -kL api.cluster-1.DOMAIN
127 |
128 | Should return a ``401`` with the message:
129 |
130 | .. code-block:: javascript
131 |
132 | {"detail":"Authentication credentials were not provided."}
133 |
134 | Execution Problems
135 | ==================
136 |
137 | Once everything is deployed, if there are execution problems when adding a function to substra, it can be related with the network policy.
138 |
139 | #. Check the log of the pod ``backend-substra-backend-builder-0``
140 |
141 | .. code-block:: bash
142 |
143 | kubectl logs backend-substra-builder-0 -n ingen
144 |
145 | #. If there there is ```HTTPSConnectionPool(host='10.43.0.1', port=443)``` error, modify the next network policies:
146 |
147 | Remove all the network policies except the ```substra-backend-internet-egress``` network policy.
148 |
149 | Add the next lines inside the to section for the ```substra-backend-api-server-egress``` network policy:
150 |
151 | .. code-block:: yaml
152 |
153 | - to:
154 | - ipBlock:
155 | cidr: 0.0.0.0/0
156 |
157 |
--------------------------------------------------------------------------------
/docs/source/how-to/deploying-substra/walkthrough/50-frontend-deployment.rst:
--------------------------------------------------------------------------------
1 | *******************
2 | Deploy the frontend
3 | *******************
4 |
5 | The Substra frontend is a standalone Helm chart that only needs to be told under what URL the backend API is to be contacted.
6 |
7 | This section details setting up the ``ingen`` frontend on ``cluster-1``, making it available at ``substra.cluster-1.DOMAIN``.
8 |
9 | Naturally this could be repeated for ``biotechnica`` onto ``cluster-2``, with the appropriate values changed.
10 |
11 | Update the backend values
12 | =========================
13 |
14 | Tell the backend to set the proper headers for cross-origin resources, by adding new values in ``backend-ingen-values.yaml``:
15 |
16 | .. code-block:: yaml
17 |
18 | config:
19 | CORS_ORIGIN_WHITELIST: '["https://substra.cluster-1.DOMAIN"]' # this is a string parsed as a JSON list
20 | CSRF_TRUSTED_ORIGINS: '["https://substra.cluster-1.DOMAIN"]'
21 | CORS_ALLOW_CREDENTIALS: 'true'
22 | # you should already have ALLOWED_HOSTS under "config"
23 |
24 | Prepare your Helm values
25 | ========================
26 |
27 | .. seealso::
28 | Full reference on `Artifact Hub `_.
29 |
30 | Create a file for your values, say ``frontend-ingen-values.yaml``.
31 |
32 | Tell the frontend the backend API url:
33 |
34 | .. code-block:: yaml
35 |
36 | api:
37 | url: "https://api.cluster-1.DOMAIN"
38 |
39 | Expose the service with the included ingress:
40 |
41 | .. code-block:: yaml
42 |
43 | ingress:
44 | hosts:
45 | - host: substra.cluster-1.DOMAIN
46 | paths: ['/']
47 | tls:
48 | - hosts:
49 | - substra.cluster-1.DOMAIN
50 | secretName: substra-frontend-tls
51 |
52 | Deploy the Chart
53 | ================
54 |
55 | Deploy with Helm, like the backend:
56 |
57 | .. code-block:: shell
58 |
59 | helm install frontend substra/substra-frontend --version VERSION --values frontend-ingen-values.yaml --namespace ingen
60 |
61 | Validate with a web browser; you can log in as ``admin`` with the password ``an3xtr4lengthyp@ssword``, which we set up in the backend values earlier.
--------------------------------------------------------------------------------
/docs/source/how-to/deploying-substra/walkthrough/60-mtls-setup.rst:
--------------------------------------------------------------------------------
1 | .. _ops set up mutual TLS:
2 |
3 | *****************
4 | Set up mutual TLS
5 | *****************
6 |
7 | This section details setting up mTLS communication between the backends and the orchestrator.
8 |
9 | In this scenario, the orchestrator acts as the certificate authority checking the certificates.
10 | These instructions have to be repeated for each backend.
11 |
12 | This guide assumes that you already have followed the instructions to :ref:`ops set up TLS`.
13 |
14 | Generate backend Certificate Signing Request and signing key
15 | ============================================================
16 |
17 | The first step is to generate the Certificate Signing Request and a signing key for the :term:`organization`.
18 |
19 | .. code:: bash
20 |
21 | openssl req -newkey rsa:2048 -nodes -keyout ORGNAME.key -subj "/O=ORGNAME/CN=HOSTNAME" -out ORGNAME.csr
22 |
23 | | Replace ``ORGNAME`` with your :term:`organization` name.
24 | It should be the same as the value you put in your ``values.yaml`` file for the key ``orchestrator.mspID``.
25 | | Replace ``HOSTNAME`` with the hostname of your substra backend.
26 |
27 | Then send the file named ``ORGNAME.csr`` to the organization managing the orchestrator for them to sign your certificate.
28 |
29 | Sign the Substra backend certificate
30 | ====================================
31 |
32 | Now that you have the Certificate Signing Request from your backend in your orchestrator, you can sign it with the orchestrator certificate authority.
33 |
34 | Navigate to the directory where the files ``orchestrator-ca.crt`` and ``orchestrator-ca.key`` are located (created during :ref:`ops set up TLS`), and sign the certificates:
35 |
36 | .. code:: bash
37 |
38 | openssl x509 -req -days 365 -in ORGNAME.csr -CA orchestrator-ca.crt -CAkey orchestrator-ca.key -CAcreateserial -out ORGNAME.crt -sha256
39 |
40 | | Replace ``ORGNAME`` with the :term:`organization` name.
41 |
42 | .. caution::
43 | We don't recommend having your certificate valid for a year (``365`` days in the previous command), you should change this value based on your company policy.
44 |
45 | Then send the file named ``ORGNAME.crt`` back to the organization managing the Substra backend. You don't need to keep a copy of this certificate.
46 |
47 | Update backend configuration
48 | ============================
49 |
50 | Once you received the certificate (named ``ORGNAME.crt``), you can create a secret in the Kubernetes cluster containing this file and the file ``ORGNAME.key``:
51 |
52 | .. code-block:: bash
53 |
54 | kubectl create secret tls orchestrator-client-cert --cert=ORGNAME.crt --key=ORGNAME.key
55 |
56 | To use this certificate, update or create the backend ``backend-values.yaml`` config file and add the following lines:
57 |
58 | .. code-block:: yaml
59 |
60 | orchestrator:
61 | tls:
62 | enabled: true
63 | cacert: orchestrator-cacert
64 | mtls:
65 | enabled: true
66 | clientCertificate: orchestrator-client-cert
67 |
68 | Note that you need to have the orchestrator TLS enabled for this to work.
69 |
70 | If your backend is already running, apply the changes (this will restart it):
71 |
72 | .. code-block:: bash
73 |
74 | helm upgrade RELEASE-NAME substra/substra-backend --version VERSION --values backend-values.yaml
75 |
76 | | Replace ``RELEASE-NAME`` with the name of your substra backend release. You can retrieve it with ``helm list``.
77 | | Replace ``VERSION`` with the version of the substra backend helm chart you want to deploy.
78 | If you don't want to change version you can retrieve your currently deployed version with ``helm list``.
79 |
80 | Update orchestrator configuration
81 | =================================
82 |
83 | Finally, create or update the orchestrator values ``orchestrator-values.yaml`` config file with the following values:
84 |
85 | .. code-block:: bash
86 |
87 | orchestrator:
88 | tls:
89 | enabled: true
90 | mtls:
91 | enabled: true
92 | clientCACerts:
93 | orchestrator:
94 | - orchestrator-tls-cacert
95 |
96 | Here we just put the orchestrator CA cert as a validation certificate.
97 |
98 | If your client certs were signed by another authority that you trust you would need to add them as configmaps to your cluster and reference them here.
99 | With the key ``orchestrator`` in our example being the name of the organization that depend on this CA (it can be any arbitrary name).
100 | The items represent the names of the configmaps you wish to load, note that the object in the configmap shoud be named ``ca.crt``.
101 |
102 | If your orchestrator is already running, apply the changes (this will restart it):
103 |
104 | .. code-block:: bash
105 |
106 | helm upgrade RELEASE-NAME substra/orchestrator --version VERSION --values orchestrator-values.yaml
107 |
108 | | Replace ``RELEASE-NAME`` with the name of your orchestrator release. You can retrieve it with ``helm list``.
109 | | Replace ``VERSION`` with the version of your orchestrator. You can retrieve the currently deployed version with ``helm list``.
110 |
--------------------------------------------------------------------------------
/docs/source/how-to/developing-substra/harbor.rst:
--------------------------------------------------------------------------------
1 | **************************
2 | Harbor in local deployment
3 | **************************
4 |
5 |
6 |
7 | This page describes how to use Harbor in our development setup, using skaffold. It is divided in two categories: the first one describes how to use a harbor registry inside the `k3d` cluster, meanwhile the second describes how to use a harbor registry outside of the cluster. The third section describes the set-up that will be needed.
8 |
9 |
10 |
11 | Use in-cluster Harbor
12 | =====================
13 |
14 | First-time set-up (specific to in-cluster)
15 | ------------------------------------------
16 |
17 | As ``sudo``, add ``127.0.0.1 registry.org-2.com`` to ``/etc/hosts``
18 |
19 | .. code:: bash
20 |
21 | echo "127.0.0.1 registry.org-2.com" | sudo tee -a /etc/hosts
22 |
23 | Use
24 | ---
25 |
26 | 1. Set ``HARBOR_CERT_PATH`` to point to the absolute path to ``orchestrator/examples/tools/ca.crt``
27 |
28 | .. code-block:: bash
29 |
30 | export HARBOR_CERT_PATH=/orchestrator/examples/tools/ca.crt
31 |
32 | 2. Re-create the cluster and launch skaffold on the orchestrator
33 |
34 | .. code-block:: bash
35 |
36 | ./k3-create.sh
37 | cd orchestrator
38 | skaffold run
39 |
40 | 3. Start ``substra-backend`` with profile ``org-2-harbor``
41 |
42 | 4. Activate port-forward (port ``30046``) on
43 | ``harbor-nginx-xxxxxxxxxx-xxxxx`` (referred as ````),
44 | depending of your tool:
45 |
46 | - ``kubectl``:
47 |
48 | .. code:: bash
49 |
50 | kubectl port-forward -n harbor deployments/harbor-nginx 30046:https
51 |
52 | - ``k9s``:
53 |
54 | 1. Hover pod ````
55 | 2. Press ``+F``
56 | 3. Replace ``nginx::30000`` by ``nginx::30046`` (the
57 | ``Local port`` should be replaced by ``30046``)
58 |
59 | 5. Follow the instructions in Harbor-set-up with the following informations:
60 |
61 | - URL: https://registry.org-2.com:30046
62 | - Identifier: ``admin``
63 | - Password: ``harborP@ssword2403``
64 |
65 | Use external Harbor
66 | ===================
67 |
68 | 1. Follow the Harbor-set-up
69 |
70 | 2. Create ``docker-config`` secret
71 |
72 | .. code:: bash
73 |
74 | kubectl create secret docker-registry docker-config -n org-2 --docker-server= --docker-username=
75 |
76 | 3. Update your value file (you can use ``backend-org-2-harbor.yaml`` as a model)
77 |
78 | .. code:: yaml
79 |
80 | docker-registry:
81 | enabled: false
82 |
83 | containerRegistry:
84 | local: false
85 | scheme:
86 | host: # The host, without the port, as it is defined in the field port
87 | pullDomain: # The harbor host, with the port as it won't use the port field
88 | port: 443
89 |
90 | kaniko:
91 | dockerConfigSecretName: docker-config # Equals to the name given to the secret at the previous step
92 |
93 |
94 | Harbor-set-up
95 | =============
96 |
97 | 1. In the frontend, create project ``substra``
98 |
99 | 2. Set the variable ```HARBOR_REGISTRY_DOMAIN``` to your registry URL
100 |
101 | .. code-block:: bash
102 |
103 | export HARBOR_REGISTRY_DOMAIN=
104 |
105 | 3. Login to the registry
106 |
107 | .. code-block:: bash
108 |
109 | docker login $HARBOR_REGISTRY_DOMAIN
110 |
111 | 4. Manually add base image
112 |
113 | 1. Pull image
114 |
115 | .. code:: bash
116 |
117 | docker pull ghcr.io/substra/substra-tools:latest
118 |
119 | 2. Tag it
120 |
121 | .. code:: bash
122 |
123 | docker tag ghcr.io/substra/substra-tools:latest $HARBOR_REGISTRY_DOMAIN/substra/substra-tools:latest
124 |
125 | 3. Push to repository
126 |
127 | .. code:: bash
128 |
129 | docker push $HARBOR_REGISTRY_DOMAIN/substra/substra-tools:latest
--------------------------------------------------------------------------------
/docs/source/how-to/developing-substra/index.rst:
--------------------------------------------------------------------------------
1 | How-to guides for developing Substra
2 | ====================================
3 |
4 | The following guides might be of interest if you are interested in contributing to Substra.
5 |
6 | .. toctree::
7 | :maxdepth: 1
8 |
9 | local-deployment.rst
10 | harbor.rst
--------------------------------------------------------------------------------
/docs/source/how-to/developing-substra/local-deployment/k3-create.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e
3 |
4 | if [[ "$OSTYPE" == "darwin"* ]]; then
5 | SED_EXEC="gsed"
6 | else
7 | SED_EXEC="sed"
8 | fi
9 |
10 | k3d cluster delete || echo 'No cluster'
11 | mkdir -p /tmp/org-1
12 | mkdir -p /tmp/org-2
13 | mkdir -p /tmp/org-3
14 | args=()
15 |
16 | if [ "$HARBOR_CERT_PATH" ]; then
17 | args+=("--volume" "${HARBOR_CERT_PATH}:/etc/ssl/certs/harbor.crt")
18 | fi
19 |
20 | k3d cluster create --api-port 127.0.0.1:6443 -p 80:80@loadbalancer -p 443:443@loadbalancer --k3s-arg "--disable=traefik,metrics-server@server:*" --volume /tmp/org-1:/tmp/org-1 --volume /tmp/org-2:/tmp/org-2 --volume /tmp/org-3:/tmp/org-3 "${args[@]}"
21 |
22 | # Patch and install nginx-ingress
23 | curl https://raw.githubusercontent.com/kubernetes/ingress-nginx/master/deploy/static/provider/kind/deploy.yaml > /tmp/deploy.yaml
24 | $SED_EXEC -i 's/ - --publish-status-address=localhost/ - --publish-status-address=localhost\n - --enable-ssl-passthrough/g' /tmp/deploy.yaml
25 | $SED_EXEC -i "/ingress-ready: \"true\"/d" /tmp/deploy.yaml
26 | kubectl apply -f /tmp/deploy.yaml
27 | kubectl create ns orderer
28 |
29 | # Create namespaces and apply PSA baseline label
30 | for org_index in 1 2 3
31 | do
32 | org_name="org-${org_index}"
33 | kubectl create ns ${org_name}
34 | kubectl label ns ${org_name} pod-security.kubernetes.io/enforce=baseline
35 | done
36 |
--------------------------------------------------------------------------------
/docs/source/how-to/using-substra/api_tokens_generation.rst:
--------------------------------------------------------------------------------
1 | How-to use new API tokens for login
2 | ===================================
3 |
4 | This short guide explains how to manage API tokens in the web application, and use them in the Substra SDK.
5 |
6 | .. admonition:: Why generate API tokens?
7 |
8 | The Substra SDK provides a way to log in using username and password (see `substra.Client `_).
9 |
10 | It is safe, but should be used with caution:
11 |
12 | * It doesn't allow for a precise lifetime or separating concerns by creating one token per purpose.
13 |
14 | * It may surprise or limit you through its underlying automated session management.
15 |
16 | * It can encourage using cleartext passwords, which can end up shared in version control.
17 |
18 | For these reasons, it is possible for Substra node administrators (via `chart options `_) to disable "implicit login" and force users to generate tokens in the web app.
19 |
20 | Whatever the situation, you should use a mechanism to ensure credentials are kept out of view, for instance by reading secret files or environment variables at runtime (see :ref:`client configuration howto`).
21 |
22 |
23 | .. warning::
24 | API tokens are node-specific: if your script connects to multiple nodes, generate a token for each of them.
25 |
26 | Generating new API tokens
27 | -------------------------
28 |
29 | To do so you need to go to the API tokens management page on the web application following this link ``/manage_tokens``.
30 | You will see a list of your current tokens as well as an option to generate new ones.
31 |
32 | You can also navigate to the page using the user menu:
33 |
34 |
35 | .. image:: /documentation/images/find_token_management_page.png
36 |
37 |
38 | Clicking ``Generate new`` opens a menu allowing you to pick a name and an expiration date for
39 | your new token.
40 |
41 |
42 | .. image:: /documentation/images/generate_new_token.png
43 |
44 |
45 | Afterward your token will be shown only once. Do copy it somewhere safe before proceeding with your work.
46 |
47 |
48 | .. image:: /documentation/images/copy_token.png
49 |
50 | Using API tokens
51 | ----------------
52 |
53 | Pass tokens to the `substra.Client `_ constructor:
54 |
55 | .. code-block:: Python
56 | :caption: Example of client configuration in code
57 |
58 | client_1 = substra.Client(
59 | backend_type="remote",
60 | url="https://org-1.com",
61 | token="dad943c684f65633635f005b2522a6452d20",
62 | )
63 |
64 | See :ref:`client configuration howto` for other options.
65 |
66 | Deleting API tokens
67 | -------------------
68 |
69 | Tokens can be deleted using the web application. Be careful, token deletion is irreversible.
70 |
71 | If you have scripts using a deleted token, they will no longer execute.
--------------------------------------------------------------------------------
/docs/source/how-to/using-substra/client_configuration.rst:
--------------------------------------------------------------------------------
1 | .. _client configuration howto:
2 |
3 | How-to configure Substra clients
4 | ================================
5 |
6 | Whether you are using SubstraFL or directly the Substra SDK, you need to configure one ``Client`` by organisation,
7 | in order to register the datasets and the functions you want to use.
8 |
9 | This how-to guide exposes the different options you have to configure your clients. It targets both first-time and
10 | advanced Substra users.
11 |
12 | Parameters passed directly in the code always override parameters from other sources (environment variables and
13 | configuration files). Parameters set through environment variables override parameters read from the configuration file.
14 |
15 | Configuration from the code
16 | ---------------------------
17 | The first option to configure a ``Client`` is to configure it directly in your code.
18 |
19 | .. code-block:: Python
20 | :caption: Example of client configuration in code
21 |
22 | client_1 = substra.Client(
23 | backend_type="remote",
24 | url="https://org-1.com",
25 | username="user1",
26 | password="secret_password",
27 | )
28 | client_2 = substra.Client(
29 | backend_type="remote",
30 | url="https://org-2.com",
31 | token="18ccd8c2-ea85-403f-aac3-972d97f3759b"
32 | )
33 |
34 | You can find details about the parameters in the `API reference `_.
35 |
36 | Any parameter defined in the code will override other configuration options.
37 |
38 | This option is good for debugging, but not for production, as you should not store sensitive information such as
39 | passwords or tokens directly in your code.
40 |
41 |
42 | Configuration using environment variables
43 | -----------------------------------------
44 | The second option is to use environment variables to configure using environment variables.
45 | That way, sensitive information will not be accidentally committed to a Git repository.
46 |
47 | If a parameter is not defined in the code, Substra will look if a matching environment variable is defined.
48 | You need to pass the name of the client in the parameter ``client_name``. This name will be used to match environment
49 | variables with the right client, as you typically define a client to interact with each organization.
50 |
51 | The environment variable name is defined as follow: ``SUBSTRA_{CLIENT_NAME}_{PARAMETER_NAME}``.
52 | For example, if the ``client_name`` is ``"org-1"``, you can set the value of ``password`` by setting the value of
53 | ``SUBSTRA_ORG_1_PASSWORD``.
54 |
55 | You can use environment variables to configure partially your clients, and configure the rest directly in the code
56 | (or in a configuration file as explained in the next section).
57 |
58 | .. code-block:: bash
59 | :caption: Setting environment variables
60 |
61 | export SUBSTRA_ORG_1_USERNAME="user1"
62 | export SUBSTRA_ORG_1_PASSWORD="secret_password"
63 | export SUBSTRA_ORG_2_TOKEN="18ccd8c2-ea85-403f-aac3-972d97f3759b"
64 |
65 |
66 |
67 | .. code-block:: Python
68 | :caption: Example of client configuration using environment variables
69 |
70 | client_1 = substra.Client(
71 | client_name="org-1",
72 | backend_type="remote",
73 | url="https://org-1.com",
74 | )
75 | client_2 = substra.Client(
76 | client_name="org-2",
77 | backend_type="remote",
78 | url="https://org-2.com",
79 | )
80 |
81 |
82 | Configuration using a configuration file
83 | ----------------------------------------
84 | The last possibility for configuring a Substra client is to use a configuration YAML file.
85 |
86 | The configuration file contains information for each client you want to configure.
87 | Values read from the configuration file have the lowest priority: they are overriden by environment variable and values
88 | set in the code.
89 |
90 | It is recommended to store non-sensitive parameter values, such as URLs, in a configuration file, and sensitive parameters,
91 | such as passwords or tokens in environment variables.
92 |
93 | .. code-block:: YAML
94 | :caption: config.yaml
95 |
96 | org-1:
97 | backend_type: remote
98 | url: https://org-1.com
99 | username: user1
100 | retry_timeout: 60
101 | org-2:
102 | backend_type: remote
103 | url: https://org-2.com
104 |
105 |
106 |
107 | .. code-block:: Python
108 | :caption: Example of client configuration using a configuration file
109 |
110 | client_1 = substra.Client(
111 | client_name="org-1",
112 | configuration_file="config.yaml",
113 | )
114 | client_2 = substra.Client(
115 | client_name="org-2",
116 | configuration_file="config.yaml",
117 | )
--------------------------------------------------------------------------------
/docs/source/how-to/using-substra/get_performances.rst:
--------------------------------------------------------------------------------
1 | How to monitor performance in local mode
2 | ========================================
3 |
4 | .. warning::
5 | The last time this page was tested was with **Substra 0.36.0**. Some changes might be needed if you are using a more recent Substra version.
6 |
7 | Performances of a compute plan can be retrieved
8 | - with the :code:`get_performances(CP_KEY)` function of the `Substra Python library `_
9 | - on the Substra GUI when using the `remote mode `_.
10 | However, in the `local mode `_, there is no GUI. This page explains how to use `MLFlow `_ to perform live monitoring of the compute plan performances in local mode.
11 |
12 | Performance monitoring using MLFlow
13 | -----------------------------------
14 |
15 | During a `compute plan `_ in local mode, the performances of your testing tasks are saved in a :code:`performance.json` file as soon as the task is done. This json file is stored in your :code:`.../local_worker/live_performances/compute_plan_key` folder.
16 |
17 | The Python script below reads the json file and plots the live metrics results into an MLflow server, creating a plot for each metric in your compute plan.
18 |
19 | To run it, update :code:`CP_KEY` on the script below, run the Python script, and launch the :code:`mlflow ui` command in a dedicated terminal.
20 | Your metric results appear and are updated live at the given url in your terminal.
21 |
22 | This script will automatically end if the :code:`performance.json` file has not been updated in the last minute. For some compute plans, this parameter should be changed regarding the necessary time to perform each round.
23 |
24 | .. code-block:: python
25 | :caption: mlflow_live_performances.py
26 |
27 | import pandas as pd
28 | import json
29 | from pathlib import Path
30 | from mlflow import log_metric
31 | import time
32 | import os
33 |
34 | TIMEOUT = 60 # Number of seconds to stop the script after the last update of the json file
35 | CP_KEY = "..." # Compute plan key
36 | POLLING_FREQUENCY = 10 # Try to read the updates in the file every 10 seconds
37 |
38 | path_to_json = Path("local-worker") / "live_performances" / CP_KEY / "performances.json"
39 |
40 | # Wait for the file to be found
41 | start = time.time()
42 | while not path_to_json.exists():
43 | time.sleep(POLLING_FREQUENCY)
44 | if time.time() - start >= TIMEOUT:
45 | raise TimeoutError("The performance file does not exist, maybe no test task has been executed yet.")
46 |
47 |
48 | logged_rows = []
49 | last_update = time.time()
50 |
51 | while (time.time() - last_update) <= TIMEOUT:
52 |
53 | if last_update == os.path.getmtime(str(path_to_json)):
54 | time.sleep(POLLING_FREQUENCY)
55 | continue
56 |
57 | last_update = os.path.getmtime(str(path_to_json))
58 |
59 | time.sleep(1) # Waiting for the json to be fully written
60 | dict_perf = json.load(path_to_json.open())
61 |
62 | df = pd.DataFrame(dict_perf)
63 |
64 | for _, row in df.iterrows():
65 | if row["task_key"] in logged_rows:
66 | continue
67 |
68 | logged_rows.append(row["task_key"])
69 |
70 | step = int(row["round_idx"]) if row["round_idx"] is not None else int(row["task_rank"])
71 |
72 | log_metric(f"{row['identifier']}_{row['worker']}", row["performance"], step)
73 |
--------------------------------------------------------------------------------
/docs/source/how-to/using-substra/gpu.rst:
--------------------------------------------------------------------------------
1 | How to leverage GPU
2 | ===================
3 |
4 | Substra can leverage GPU to speed up the training of machine learning models. Find below how to configure Substra to make sure your code can run on GPU.
5 |
6 |
7 | For Substra
8 | ^^^^^^^^^^^
9 | A Substra task can run on a given GPU if the Docker image used does contain the CUDA drivers needed by this GPU.
10 |
11 | For Torch use cases in SubstraFL
12 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
13 | By default everything runs on CPU.
14 |
15 | If you want to make your Torch model run on GPU, you have to put the model and your data in the GPU memory. SubstraFL does it for you if you set ``use_gpu=True`` in your :ref:`Torch Algorithm`.
--------------------------------------------------------------------------------
/docs/source/how-to/using-substra/index.rst:
--------------------------------------------------------------------------------
1 | How-to guides for using Substra
2 | ===============================
3 |
4 | The following guides might be of interest if you are a data scientist using Substra.
5 |
6 | These are made to help on specific points, if you wish to have a general walkthrough on how to work with Substra,
7 | you can check the :doc:`tutorials `.
8 |
9 | .. toctree::
10 | :maxdepth: 1
11 |
12 |
13 | debug.rst
14 | get_performances.rst
15 | client_configuration.rst
16 | api_tokens_generation.rst
17 | gpu.rst
18 | r_scripts.rst
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/docs/source/how-to/using-substra/r_scripts.rst:
--------------------------------------------------------------------------------
1 | How-to use R scripts with Substra
2 | =================================
3 |
4 | The high-level SubstraFL library is made for working in Python, but the lower-level library Substra is flexible enough
5 | to accommodate running tasks in other programming languages.
6 | This how-to guide explains how to run scripts written in R with Substra.
7 | This uses the low-level interface of Substra and requires writing more boilerplate code than using the high-level interface of SubstraFL.
8 | If you are not familiar with the Substra low-level library, you should read the
9 | :doc:`Substra introductory example ` first.
10 |
11 | .. caution:: This guide provides an easy to run some scripts in another language.
12 | The scripts are wrapped up in a Python process, so performances might be limited.
13 | In particular, multithreading is not supported.
14 |
15 | Preparing the R script
16 | ----------------------
17 | The inputs of your script are passed as arguments in the command line. This includes parameters (int, float or str) and
18 | (relative) file paths to data.
19 |
20 | The outputs of the scripts are written to stdout, and will be parsed later by the Python script.
21 | Below is an example of what your file should look like:
22 |
23 | .. code-block:: R
24 | :caption: my_script.R
25 |
26 | #!/usr/bin/env Rscript
27 | args <- commandArgs()
28 | # your script here
29 | ...
30 | write(outputs, "")
31 |
32 |
33 | Calling the R script from Python
34 | --------------------------------
35 | The Python script passed to Substra wraps the R script, so that it can be executed as a Python subprocess.
36 | The Python script reads the inputs defined as Substra ``FunctionInputSpec``, converts everything to string,
37 | appends all parameters in a command (``subprocess.run`` expects a list of str) and launches the subprocess.
38 | After the subprocess has finished, the output is cleaned.
39 | Everything printed to stdout in the R script is available in the Python code through the ``str`` variable ``raw_output.stdout``.
40 | Depending on the type of output, additional cleaning steps might be required.
41 | Finally, the output is saved as a pickle file, to be shared with other organisations.
42 |
43 | .. code-block:: Python
44 | :caption: python_wrapper.py
45 |
46 | import pickle
47 | import subprocess
48 | import substratools as tools
49 |
50 |
51 | @tools.register
52 | def run_script(inputs, outputs, task_properties):
53 | data_file = inputs["data_file_path"]
54 | param1 = str(inputs["param1"])
55 | param2 = str(inputs["param2"])
56 | raw_output = subprocess.run(['Rscript', 'my_script.R', data_file, param1, param2], capture_output=True)
57 | model = int(raw_output.stdout.strip())
58 | save_model(model, outputs["model"])
59 |
60 |
61 | def save_model(model, path):
62 | with open(path, "wb") as f:
63 | pickle.dump(model, f)
64 |
65 |
66 | if __name__ == "__main__":
67 | tools.execute()
68 |
69 | Adapting the opener
70 | -------------------
71 | When using Substra with Python, the ``Opener`` object is used to load the data in memory.
72 | When using R, we don't need to load the data as Python objects in memory, so the opener simply returns the file path (or paths).
73 |
74 | .. code-block:: Python
75 | :caption: opener.py
76 |
77 | import pathlib
78 | import substratools as tools
79 |
80 | import os
81 |
82 | class StubOpener(tools.Opener):
83 | def fake_data(self, n_samples=None):
84 | return ""
85 |
86 | def get_data(self, folders):
87 | return list(pathlib.Path(folders[0]).glob("*.csv"))
88 |
89 |
90 | Writing the Dockerfile
91 | ----------------------
92 | We modify the Dockerfile to install R in the container, and copy both R and Python scripts.
93 |
94 |
95 | .. code-block:: Dockerfile
96 | :caption: Dockerfile
97 |
98 | # this base image works in both CPU and GPU enabled environments
99 | FROM python:3.12-slim
100 |
101 | # install R
102 | RUN apt-get update \
103 | && apt-get -y install r-base
104 |
105 | # add your algorithm scripts to docker image
106 | ADD python_wrapper.py .
107 | ADD my_script.R .
108 |
109 | # define how script is run
110 | ENTRYPOINT ["python3", "python_wrapper.py", "--function-name", "run_script"]
111 |
112 | Wrapping up
113 | -----------
114 | That's it, you're all set up!
115 |
116 | You can now define your computation graph as you would normally in Substra, and everything should run fine.
117 |
118 | You can have a different R script for each step, just write a different Python wrapper to call each of them.
119 | Don't forget the ``@tools.register`` decorator on each of your Python wrapper!
120 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. Substra documentation master file, created by
2 | sphinx-quickstart on Mon Aug 30 14:12:40 2021.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Substra documentation
7 | =====================
8 |
9 | Substra is an open source federated learning (FL) software. It provides a flexible Python library and a web application to run federated learning training at scale.
10 |
11 |
12 | .. image:: banner.jpg
13 | :width: 800
14 | :align: center
15 | :alt: Substra
16 |
17 | Substra's main usage is in production environments. It has already been deployed and used by hospitals and biotech companies: see the `MELLODDY `_ and the `HealthChain `_ projects.
18 |
19 | The key Substra differentiators are:
20 |
21 | * **Framework agnostic** — Any Python library can be used: PyTorch, TensorFlow, sklearn, etc.
22 | * **Flexible** — Any kind of computation can be run: machine learning, analytics, etc.
23 | * **Scalable** — Support for vertical scaling (several trainings on one machine) and horizontal scaling (training on several machines).
24 | * **Traceable** — All machine learning operations are logged in an auditable read-only database.
25 | * **Web application** — A web application to monitor long-running computations and explore model's performances.
26 | * **Production ready** — Packaged in Kubernetes and regularly audited.
27 | * **Debugging made easy** — Remote error logs are accessible to data scientists. The same code can be run in a deployed production environment or on a single machine to debug.
28 |
29 | Substra was created by `Owkin `_ and is now hosted by the `Linux Foundation for AI and Data `_.
30 |
31 | How does it work?
32 | ^^^^^^^^^^^^^^^^^
33 |
34 | .. image:: _static/schemes/introductory-scheme.svg
35 | :width: 800
36 | :align: center
37 | :alt: Substra Network
38 |
39 | Interfaces
40 | ^^^^^^^^^^
41 |
42 |
43 | Substra has three user interfaces:
44 | * **Substra**: a low-level Python library (also called SDK). Substra is used to create datasets, functions and machine learning tasks on the platform.
45 | * **SubstraFL**: a high-level federated learning Python library based on Substra. SubstraFL is used to run complex federated learning experiments at scale.
46 | * A **web application** used to monitor experiments training and explore their results.
47 |
48 |
49 |
50 | Installation
51 | ^^^^^^^^^^^^
52 |
53 | **Client side**: Install Substra and SubstraFL python libraries with the following command: ``pip install substrafl``. Substra python library is a dependency of SubstraFL, so it will be automatically installed. More information on the installation :ref:`can be found here `.
54 |
55 | **Server side**: There are 2 options to deploy the server side of Substra (backend, frontend and orchestrator):
56 |
57 | * :ref:`Local deployment `: to deploy locally on a single one machine. Useful for quick tests and for development.
58 | * :doc:`Production deployment `: for real deployments.
59 |
60 | .. note::
61 | You can start doing local FL experiments with Substra by installing only the **client side**.
62 |
63 | Links
64 | ^^^^^
65 |
66 | Some quick links:
67 |
68 | * :ref:`MNIST federated learning example `
69 | * :doc:`SubstraFL overview `
70 | * :ref:`Compatibility table `
71 | * :doc:`How to deploy Substra for Site Reliability Engineers `
72 | * :ref:`Community `
73 | * `Subscribe to our newsletter `_
74 |
75 |
76 | .. toctree::
77 | :glob:
78 | :maxdepth: 2
79 | :caption: What is Substra
80 | :hidden:
81 |
82 |
83 | substrafl_doc/substrafl_overview
84 | documentation/concepts
85 | documentation/components
86 | additional/privacy-strategy.rst
87 |
88 |
89 |
90 | .. toctree::
91 | :glob:
92 | :maxdepth: 2
93 | :caption: Tutorials
94 | :hidden:
95 |
96 | examples/substrafl/index.rst
97 | examples/substra_core/index.rst
98 |
99 |
100 | .. toctree::
101 | :glob:
102 | :maxdepth: 2
103 | :caption: Reference guides
104 | :hidden:
105 |
106 | substrafl_doc/api/index.rst
107 | documentation/api_reference.rst
108 | reference/index.rst
109 |
110 |
111 | .. toctree::
112 | :glob:
113 | :maxdepth: 1
114 | :caption: How-to guides
115 | :hidden:
116 |
117 | how-to/using-substra/index.rst
118 | how-to/deploying-substra/index.rst
119 | how-to/developing-substra/index.rst
120 |
121 |
122 | .. toctree::
123 | :glob:
124 | :maxdepth: 1
125 | :caption: Contributing guide
126 | :hidden:
127 |
128 | contributing/contributing-guide.rst
129 | contributing/code-of-conduct.rst
130 |
131 | .. toctree::
132 | :glob:
133 | :maxdepth: 2
134 | :caption: Additional Information
135 | :hidden:
136 |
137 | additional/community.rst
138 | additional/release.rst
139 | additional/faq.rst
140 | additional/glossary.rst
141 |
--------------------------------------------------------------------------------
/docs/source/reference/index.rst:
--------------------------------------------------------------------------------
1 | Kubernetes deployment reference
2 | ===============================
3 |
4 | For a step-by-step guide on how to deploy the application on Kubernetes, please refer to the
5 | `the deployment how-to `_.
6 |
7 | This section provides a reference of how various Kubernetes resources should look like once deployed with the default settings.
8 | Substra is distributed as Helm charts, and most values can be customized to accommodate the requirements of your environment.
9 |
10 | .. include:: pss.rst
11 | .. include:: netpol.rst
12 | .. include:: volumes.rst
13 |
--------------------------------------------------------------------------------
/docs/source/reference/pss.rst:
--------------------------------------------------------------------------------
1 | Pod Security Standards
2 | ----------------------
3 |
4 | All pods in a Substra deployment are compliant with the *baseline* policy of the
5 | `Pod Security Standards `_.
6 |
7 | All pods can run as non-root, with two exceptions:
8 |
9 | * If the builder feature is enabled (at least one backend per network must have the ability to build images), Kaniko pods used for building images run as root.
10 | * If the private CA feature is used, the initContainer `add-cert` runs as root.
11 |
12 | We are working on ensuring that all pods except the two listed above are compliant with the *restricted* policy.
13 |
--------------------------------------------------------------------------------
/docs/source/templates/breadcrumbs.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Substra/substra-documentation/7e2366aa8fd2c8d667a5e400dadf365caeff5c84/docs/source/templates/breadcrumbs.html
--------------------------------------------------------------------------------
/examples_requirements.txt:
--------------------------------------------------------------------------------
1 | ipython==8.21.0
2 | nbformat==5.9.2
3 | tqdm>=4.66.2
4 | -r docs/source/examples/substra_core/diabetes_example/assets/requirements.txt
5 | -r docs/source/examples/substra_core/titanic_example/assets/requirements.txt
6 | -r docs/source/examples/substrafl/get_started/torch_fedavg_assets/requirements.txt
7 | -r docs/source/examples/substrafl/go_further/sklearn_fedavg_assets/requirements.txt
8 | -r docs/source/examples/substrafl/go_further/torch_cyclic_assets/requirements.txt
9 | -r docs/source/examples/substrafl/go_further/diabetes_substrafl_assets/requirements.txt
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r docs/doc-requirements.txt
2 | -r examples_requirements.txt
--------------------------------------------------------------------------------
/skaffold.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: skaffold/v1beta13
2 | kind: Config
3 | build:
4 | artifacts:
5 | - image: substra/substra-documentation-examples
6 | context: .
7 | docker:
8 | dockerfile: docker/substra-documentation-examples/Dockerfile
9 | deploy:
10 | statusCheckDeadlineSeconds: 300
11 | helm:
12 | releases:
13 | - name: substra-documentation-examples
14 | chartPath: charts/substra-documentation-examples
15 | namespace: substra-tests
16 | imageStrategy:
17 | helm: {}
18 | values:
19 | image: substra/substra-documentation-examples
20 | flags:
21 | install: ["--create-namespace"]
22 |
--------------------------------------------------------------------------------
/towncrier.toml:
--------------------------------------------------------------------------------
1 | [tool.towncrier]
2 | directory = "changes"
3 | filename = "CHANGELOG.md"
4 | start_string = "\n"
5 | underlines = ["", "", ""]
6 | title_format = "## [{version}](https://github.com/Substra/substra-documentation/releases/tag/{version}) - {project_date}"
7 | issue_format = "[#{issue}](https://github.com/Substra/substra-documentation/pull/{issue})"
8 | [tool.towncrier.fragment.added]
9 | [tool.towncrier.fragment.removed]
10 | [tool.towncrier.fragment.changed]
11 | [tool.towncrier.fragment.fixed]
12 |
--------------------------------------------------------------------------------