├── requirements.txt
├── docs
    ├── _config.yml
    ├── images
    │   └── mnist_plot.png
    ├── css
    │   └── my-styles.css
    ├── dev_notes.md
    ├── grpc_examples.md
    ├── mli_factory.md
    ├── installation.md
    ├── examples.md
    ├── python_src
    │   ├── mnist_keras.py
    │   └── mnist_pytorch.py
    ├── index.md
    ├── differential_privacy.md
    ├── demo.md
    ├── intro_tutorial_keras.md
    ├── intro_tutorial_pytorch.md
    ├── technical
    │   └── AXIM-160-docu.md
    └── about.md
├── .dockerignore
├── setup.cfg
├── colearn_grpc
    ├── proto
    │   ├── compile.sh
    │   └── interface.proto
    ├── __init__.py
    ├── scripts
    │   ├── probe_grpc_server.py
    │   ├── run_grpc_server.py
    │   └── run_n_grpc_servers.py
    ├── logging.py
    ├── mli_factory_interface.py
    ├── factory_registry.py
    ├── example_mli_factory.py
    ├── test_example_mli_factory.py
    ├── grpc_server.py
    └── test_grpc_utils.py
├── scripts
    └── entrypoint.sh
├── .github
    ├── workflows
    │   ├── docker-push.yml
    │   ├── update-docs.yml
    │   └── python-app.yml
    ├── ISSUE_TEMPLATE
    │   ├── feature_request.md
    │   └── bug_report.md
    └── dependabot.yml
├── docker
    ├── ml.Dockerfile
    └── build.py
├── .gitignore
├── tests
    ├── __init__.py
    ├── plus_one_learner
    │   ├── test_plus_one_learner.py
    │   ├── plus_one_runner.py
    │   └── plus_one_learner.py
    ├── test_colearn_utils_data.py
    └── check_copyright_notice.py
├── colearn
    ├── __init__.py
    ├── utils
    │   ├── __init__.py
    │   ├── results.py
    │   └── data.py
    ├── standalone_driver.py
    ├── training.py
    └── ml_interface.py
├── colearn_keras
    ├── __init__.py
    ├── utils.py
    ├── test_keras_scania.py
    └── test_keras_learner.py
├── colearn_other
    ├── __init__.py
    └── demo_utils.py
├── colearn_pytorch
    ├── __init__.py
    ├── utils.py
    └── test_pytorch_learner.py
├── mkdocs.yml
├── .pylintrc
├── README.md
├── tox.ini
├── setup.py
└── colearn_examples
    ├── grpc
        └── run_grpc_demo.py
    └── ml_interface
        ├── keras_mnist.py
        ├── keras_fraud.py
        ├── keras_cifar.py
        ├── pytorch_mnist.py
        ├── keras_mnist_diffpriv.py
        ├── pytorch_mnist_diffpriv.py
        └── pytorch_cifar.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 | -e .[all]


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-minimal


--------------------------------------------------------------------------------
/docs/images/mnist_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fetchai/colearn/HEAD/docs/images/mnist_plot.png


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | .git/
 2 | .github/
 3 | .idea/
 4 | .mypy_cache/
 5 | .pytest_cache/
 6 | .tox/
 7 | build/
 8 | colearn.egg-info/
 9 | dist/
10 | docs/
11 | site/
12 | tests/
13 | 
14 | *.py[cgo]


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | max-line-length = 88
 3 | select = C,E,F,I,W,B901,B902,B903,B301,B302,B303,B304,B305,B306,B001,B002,B003,B004,B005,B006,B007,B008,B010,B011,B012,B013
 4 | ignore = E203,E501,W503,D202,B009,B014
 5 | application-import-names = colearn_interface colearn tests colearn_examples
 6 | 
 7 | [mypy]
 8 | ignore_missing_imports = True
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/colearn_grpc/proto/compile.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | rm -r ./generated/*
 4 | 
 5 | python3 -m grpc_tools.protoc \
 6 |         -I . \
 7 |         --python_out=./generated \
 8 |         --grpc_python_out=./generated \
 9 |         *.proto
10 | 
11 | # protoc uses implicit relative imports which are not allowed in python3. This converts implicit imports of the
12 | # form "import .*_pb2" to explicit relative imports ("from . import")
13 | sed -i.bak '/^import\ .*_pb2/s/^/from \. /' ./generated/*.py


--------------------------------------------------------------------------------
/scripts/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if test "$#" -eq 1; then
 4 |     DOMAIN=$1
 5 |     echo "Setting up self signed certificate for domain $DOMAIN"
 6 | 
 7 |     cd /app/colearn && openssl req -newkey rsa:2048 -nodes -keyout server.key -x509 -days 365 -out server.crt -subj "/C=GB/ST=Cambridge/L=Cambridge/O=None/OU=None Department/CN=$DOMAIN"
 8 | fi
 9 | 
10 | echo "Running python3 /app/run_grpc_server.py"
11 | python3 -u /app/run_grpc_server.py --enable_encryption --server_key /app/colearn/server.key --server_crt /app/colearn/server.crt
12 | 


--------------------------------------------------------------------------------
/docs/css/my-styles.css:
--------------------------------------------------------------------------------
 1 | pre {
 2 |     background-color: #f8f8f7;
 3 | }
 4 | 
 5 | code {
 6 |     background-color: #0083fb;
 7 | }
 8 | 
 9 | /* Katharine's css additions */
10 | .md-header,
11 | .md-tabs,
12 | .md-footer-meta,
13 | .md-footer-nav,
14 | .md-footer-nav__inner {
15 |     background-color: #172b6e;
16 | }
17 | 
18 | .md-nav__title {
19 |     color: #172b6e;
20 | }
21 | 
22 | .md-icon {
23 |     ./assets/images/favicon.ico;
24 | }
25 | 
26 | /* Needed so that Mermaid UML diagrams don't end up being massively tall */
27 | svg{
28 |     height: auto;
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/docs/dev_notes.md:
--------------------------------------------------------------------------------
 1 | # Developer Notes
 2 | 
 3 | These are some notes for developers working on the colearn code repo
 4 | 
 5 | ## Google Cloud Storage
 6 | 
 7 | To have access to the google cloud storage you need to set up your google authentication and
 8 | have the $GOOGLE_APPLICATION_CREDENTIALS set up correctly.
 9 | For more details ask or see the contract-learn documentation
10 | 
11 | ## Build image
12 | 
13 | To build ML server image and push to google cloud use the following command:
14 | 
15 | ```
16 | cd docker
17 | python3 ./build.py --publish --allow_dirty
18 | # Check this worked correctly
19 | docker images
20 | ```
21 | 


--------------------------------------------------------------------------------
/docs/grpc_examples.md:
--------------------------------------------------------------------------------
 1 | # Mnist gRPC Example
 2 | 
 3 | To run the Keras Mnist gRPC example run:
 4 | 
 5 | ```bash
 6 | python -m colearn_examples.grpc.run_grpc_demo --n_learners 5 --dataloader_tag KERAS_MNIST --model_tag KERAS_MNIST \
 7 | --data_locations /tmp/mnist/0,/tmp/mnist/1,/tmp/mnist/2,/tmp/mnist/3,/tmp/mnist/4
 8 | ```
 9 | 
10 | !!!Note
11 |     This requires `colearn[keras]`
12 | 
13 | You can verify that the example is working correctly by running the probe:
14 | 
15 | ```bash
16 | python -m colearn_grpc.scripts.probe_grpc_server --port 9995
17 | ```
18 | 
19 | For more about the gRPC components of Colearn see the [gRPC Tutorial](grpc_tutorial.md)
20 | 


--------------------------------------------------------------------------------
/.github/workflows/docker-push.yml:
--------------------------------------------------------------------------------
 1 | name: Container Publish
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - '*'
 6 | 
 7 | jobs:
 8 |   build-and-push:
 9 |     name: Dockerhub Push
10 |     runs-on: ubuntu-20.04
11 | 
12 |     steps:
13 |       - uses: actions/checkout@v2
14 | 
15 |       - name: Login to Docker Hub
16 |         run: |
17 |           docker login --username ${{ secrets.DOCKERHUB_USERNAME }} --password ${{ secrets.DOCKERHUB_PASSWORD }}
18 | 
19 |       - name: Build and Publish Public Images
20 |         uses: ejfitzgerald/action-docker-tag@v0.2.0
21 |         with:
22 |           repo: fetchai/colearn
23 |           dockerfile: docker/ml.Dockerfile
24 | 


--------------------------------------------------------------------------------
/docker/ml.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.7.9-slim-buster as base
 2 | 
 3 | USER root
 4 | 
 5 | WORKDIR /app
 6 | 
 7 | RUN apt update && \
 8 |     apt upgrade -y --no-install-recommends && \
 9 |     apt autoremove -y && \
10 |     rm -rf /var/lib/apt/lists/*
11 | 
12 | COPY ./requirements.txt ./
13 | COPY ./setup.py ./
14 | 
15 | RUN pip3 install --no-cache-dir --upgrade pip && \
16 |     pip3 install --no-cache-dir -r requirements.txt
17 | 
18 | COPY ./ ./colearn
19 | 
20 | RUN cd ./colearn && \
21 |     pip3 install --no-cache-dir --upgrade pip && \
22 |     pip3 install --no-cache-dir -e .[all]
23 | 
24 | COPY colearn_grpc/scripts/run_grpc_server.py ./
25 | 
26 | COPY scripts/entrypoint.sh ./
27 | 
28 | EXPOSE 9995
29 | EXPOSE 9091
30 | ENV PYTHONUNBUFFERED 0
31 | 


--------------------------------------------------------------------------------
/.github/workflows/update-docs.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, build and push the docs
 2 | 
 3 | name: Update Docs
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   update_docs:
11 |     continue-on-error: False
12 |     runs-on: ubuntu-latest
13 |     timeout-minutes: 5
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python 3.7
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: 3.7
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install wheel
25 |         pip install .[docs]
26 |     - name: Push docs
27 |       run: |
28 |         git fetch origin gh-pages:gh-pages
29 |         mkdocs gh-deploy


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Distribution / packaging
 7 | .Python
 8 | build/
 9 | develop-eggs/
10 | dist/
11 | downloads/
12 | eggs/
13 | .eggs/
14 | lib/
15 | lib64/
16 | parts/
17 | sdist/
18 | var/
19 | wheels/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | MANIFEST
24 | Pipfile.lock
25 | 
26 | # Installer logs
27 | pip-log.txt
28 | pip-delete-this-directory.txt
29 | 
30 | # Unit test / coverage reports
31 | htmlcov/
32 | .tox/
33 | .coverage
34 | .coverage.*
35 | .cache
36 | nosetests.xml
37 | coverage.xml
38 | *.cover
39 | .hypothesis/
40 | .pytest_cache/
41 | 
42 | # pyenv
43 | .python-version
44 | venv/
45 | 
46 | # mypy
47 | .mypy_cache/
48 | 
49 | .DS_Store
50 | */.DS_Store
51 | .idea/
52 | .*.swp
53 | *.db
54 | 
55 | # mkdocs
56 | site/
57 | 
58 | Pipfile
59 | *.bak


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Thanks for taking the time to let us know about a feature that you would like! 
11 | Please try to fill in as much as possible of this form so that we can help.
12 | 
13 | **Is your feature request related to a problem? Please describe.**
14 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
15 | 
16 | **Describe the solution you'd like**
17 | A clear and concise description of what you want to happen.
18 | 
19 | **Describe alternatives you've considered**
20 | A clear and concise description of any alternative solutions or features you've considered.
21 | 
22 | **Additional context**
23 | Add any other context or screenshots about the feature request here.
24 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | 


--------------------------------------------------------------------------------
/colearn/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | 


--------------------------------------------------------------------------------
/colearn/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | 


--------------------------------------------------------------------------------
/colearn_keras/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | 


--------------------------------------------------------------------------------
/colearn_other/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | 


--------------------------------------------------------------------------------
/colearn_pytorch/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Thanks for taking the time to let us know about a bug! 
11 | Please try to fill in as much as possible of this form so that we can fix it.
12 | 
13 | **Describe the bug**
14 | A clear and concise description of what the bug is.
15 | 
16 | **To Reproduce**
17 | Steps to reproduce the behavior:
18 | 1. Go to '...'
19 | 2. Click on '....'
20 | 3. Scroll down to '....'
21 | 4. See error
22 | 
23 | **Expected behavior**
24 | A clear and concise description of what you expected to happen.
25 | 
26 | **Screenshots**
27 | If applicable, add screenshots to help explain your problem.
28 | 
29 | **Desktop (please complete the following information):**
30 |  - OS: [e.g. iOS]
31 |  - Version [e.g. 22]
32 | 
33 | **Additional context**
34 | Add any other context about the problem here.
35 | 


--------------------------------------------------------------------------------
/colearn_grpc/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | from pkg_resources import get_distribution
19 | 
20 | __version__ = get_distribution('colearn').version
21 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 |     target-branch: "master"
13 |     labels:
14 |       - "dependencies"
15 |     open-pull-requests-limit: 5
16 |     
17 |   - package-ecosystem: "github-actions"
18 |     directory: "/"
19 |     schedule:
20 |       interval: "daily"
21 |     target-branch: "master"
22 |     labels:
23 |       - "dependencies"
24 |     open-pull-requests-limit: 2
25 |     
26 |   - package-ecosystem: "docker"
27 |     directory: "/docker"
28 |     schedule:
29 |       interval: "daily"
30 |     target-branch: "master"
31 |     labels:
32 |       - "dependencies"  
33 |     open-pull-requests-limit: 2
34 |     
35 | 


--------------------------------------------------------------------------------
/tests/plus_one_learner/test_plus_one_learner.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | from tests.plus_one_learner.plus_one_learner import PlusOneLearner
19 | 
20 | 
21 | def test_init():
22 |     learner = PlusOneLearner(0)
23 |     assert learner.current_value == 0
24 |     learner_2 = PlusOneLearner(2)
25 |     assert learner_2.current_value == 2
26 | 


--------------------------------------------------------------------------------
/docs/mli_factory.md:
--------------------------------------------------------------------------------
 1 | # MLI Factory
 2 | 
 3 | The machine learning interface factory are the minimum methods a client needs to implement
 4 | to work with the GRPC Server (and become a Learner).
 5 | 
 6 | There are two main types of functions:
 7 | 
 8 | - Supported Systems (get_models, get_dataloaders, get_compatibilities)
 9 | - Get a MachineLearningInterface (get_mli)
10 | 
11 | When the GRPC server is connected to the Orchestrator, it will query the supported system
12 | functions to know what the MLI Factory can serve.
13 | 
14 | Later when the Orchestrator wants to run something on this Learner it will call get_mli
15 | with a model_arch_name, a dataloader_name and more parameters for both.
16 | The object returned is then used to run the experiment through the MLI.
17 | 
18 | ### Supported Systems
19 | 
20 | The supported systems functions get_models and get_dataloaders should return a set of
21 | <name, {default parameters dictionary}> which will be stored (not currently implemented)
22 | in the api database. The idea being that the user can change these values on the
23 | UI while preparing to start/join an experiment.
24 | 
25 | ### ExampleMliFactory
26 | 
27 | An example MLIFactory that will implement all the tasks in run_demo.
28 | This is the one used by contract_learn.
29 | 


--------------------------------------------------------------------------------
/tests/plus_one_learner/plus_one_runner.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | from colearn.standalone_driver import run
19 | 
20 | from tests.plus_one_learner.plus_one_learner import PlusOneLearner
21 | 
22 | 
23 | def run_experiment(n_learners):
24 |     learners = [PlusOneLearner(0)] * n_learners
25 | 
26 |     for ln in learners:
27 |         print(ln.current_value)
28 | 
29 |     run(10, learners)
30 | 
31 |     for ln in learners:
32 |         print(ln.current_value)
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     # execute only if run as a script
37 |     run_experiment(5)
38 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: colearn
 2 | site_url: https://docs.fetch.ai/
 3 | site_description: Everything you need to know about Collective Learning.
 4 | repo_url: https://github.com/fetchai/colearn/
 5 | site_author: developer@fetch.ai
 6 | 
 7 | theme:
 8 |   name: 'material'
 9 |   feature:
10 |     tabs: true
11 | 
12 | strict: true
13 | 
14 | nav:
15 |   - Colearn: 'index.md'
16 |   - Collective Learning Protocol: 'about.md'
17 |   - Getting Started:
18 |       - Installation: 'installation.md'
19 |       - Keras: 'intro_tutorial_keras.md'
20 |       - PyTorch: 'intro_tutorial_pytorch.md'
21 |       - The MachineLearningInterface: 'intro_tutorial_mli.md'
22 |       - Differential Privacy: 'differential_privacy.md'
23 |       - gRPC server: 'grpc_tutorial.md'
24 |   - Examples:
25 |       - Demo: 'demo.md'
26 |       - Standalone examples: 'examples.md'
27 |       - gRPC example: 'grpc_examples.md'
28 | 
29 | extra_css:
30 |   - css/my-styles.css
31 | 
32 | plugins:
33 |   - search
34 |   - macros  # use variables, e.g. {{ repo_root }}
35 | 
36 | markdown_extensions:
37 |   - toc:  # table of contents
38 |       permalink: true
39 |   - markdown_include.include:
40 |       base_path: docs
41 |   - admonition  # provides notes, syntax is !!!\n\tnote
42 |   - codehilite  # code highlighting
43 |   - extra
44 |   - pymdownx.superfences:  # "fenced" code blocks
45 |       custom_fences:
46 |       - name: mermaid
47 |         class: mermaid
48 |         format: !!python/name:pymdownx.superfences.fence_div_format ''
49 |   - pymdownx.tabbed  # provides tabs
50 | 
51 | extra:
52 |   repo_root: https://github.com/fetchai/colearn/tree/master/


--------------------------------------------------------------------------------
/colearn_grpc/scripts/probe_grpc_server.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import argparse
19 | from pprint import pprint
20 | 
21 | from colearn_grpc.example_grpc_learner_client import ExampleGRPCLearnerClient
22 | from colearn_grpc.logging import set_log_levels
23 | 
24 | cli_args = argparse.ArgumentParser(description='Probe a GRPC learner server')
25 | cli_args.add_argument('-p', '--port', type=int, default=9995, help='server port')
26 | cli_args.add_argument('-i', '--ip_addr', type=str, default="127.0.0.1", help='IP address or hostname')
27 | 
28 | 
29 | args = cli_args.parse_args()
30 | 
31 | # Now make a grpc client
32 | log_levels = {"default": "INFO"}
33 | set_log_levels(log_levels)
34 | port = args.port
35 | ml_system = ExampleGRPCLearnerClient("probing client", f"{args.ip_addr}:{port}")
36 | ml_system.start()
37 | 
38 | # get info about client
39 | ml_info = ml_system.get_supported_system()
40 | pprint(ml_info)
41 | ml_system.stop()
42 | 


--------------------------------------------------------------------------------
/.github/workflows/python-app.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: PR Checks
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   code_quality_checks:
14 | 
15 |     continue-on-error: False
16 |     runs-on: ubuntu-latest
17 |     timeout-minutes: 11
18 | 
19 |     steps:
20 |     - uses: actions/checkout@v2
21 |     - name: Set up Python 3.7
22 |       uses: actions/setup-python@v2
23 |       with:
24 |         python-version: 3.7
25 |     - name: Install dependencies
26 |       run: |
27 |         python -m pip install --upgrade pip
28 |         pip install wheel
29 |         pip install tox
30 |     - name: Style checks
31 |       run: tox -e flake8 -e pylint
32 |     - name: Type checking
33 |       run: tox -e mypy
34 |     - name: Build docs
35 |       run: tox -e docs
36 |     - name: Check copyright notice
37 |       run: tox -e copyright_check
38 | 
39 |   code_pytests_multiversion:
40 | 
41 |     continue-on-error: False
42 |     runs-on: self-hosted
43 |     timeout-minutes: 30
44 | 
45 |     strategy:
46 |       matrix:
47 |         python-version: [3.7, 3.8]
48 |     env:
49 |       GITHUB_ACTION: true
50 | 
51 |     steps:
52 |     - uses: actions/checkout@v2
53 |     - name: Set up Python ${{ matrix.python-version }}
54 |       uses: actions/setup-python@v2
55 |       with:
56 |         python-version: ${{ matrix.python-version }}
57 |     - name: Install dependencies
58 |       run: |
59 |         python -m pip install --upgrade pip
60 |         pip install wheel
61 |         pip install tox
62 |     - name: Short pytests
63 |       run: tox -e pytest${{ matrix.python-version }}
64 |     - name: Long pytests
65 |       run: tox -e pytest-slow${{ matrix.python-version }}
66 | 
67 | 


--------------------------------------------------------------------------------
/colearn/standalone_driver.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | from typing import List, Sequence
19 | 
20 | from colearn.ml_interface import MachineLearningInterface
21 | 
22 | 
23 | def run(n_rounds: int, learners: List[MachineLearningInterface]):
24 |     for i in range(n_rounds):
25 |         run_one_round(i, learners)
26 | 
27 | 
28 | def run_one_round(round_index: int, learners: Sequence[MachineLearningInterface],
29 |                   vote_threshold=0.5):
30 | 
31 |     # Get weights from proposer
32 |     proposer = round_index % len(learners)
33 |     new_weights = learners[proposer].mli_propose_weights()
34 | 
35 |     prop_weights_list = [ln.mli_test_weights(new_weights) for ln in learners]
36 | 
37 |     # Invalidate vote on self since not allowed
38 |     prop_weights_list[proposer].vote = None
39 | 
40 |     approves = sum(1 if v.vote else 0 for v in prop_weights_list)
41 | 
42 |     vote = False
43 |     if approves >= (len(prop_weights_list) - 1) * vote_threshold:
44 |         vote = True
45 |         # Set all learners to new weights
46 |         for learner in learners:
47 |             learner.mli_accept_weights(new_weights)
48 | 
49 |     return prop_weights_list, vote
50 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | The core package, `colearn`, contains only the [MachineLearningInterface](about.md) and a simple driver that
 4 | implements the Collective Learning Protocol.
 5 | To install only the core package:
 6 | 
 7 | ```
 8 | pip install colearn
 9 | ```
10 | 
11 | To make collective learning easier to use we have defined extra packages with helpers
12 | for model development in Keras and Pytorch.
13 | 
14 | To install with Keras/Pytorch extras:
15 | 
16 | ```
17 | pip install colearn[keras]
18 | pip install colearn[pytorch]
19 | ```
20 | 
21 | To install both the Keras and Pytorch extras use:
22 | 
23 | ```
24 | pip install colearn[all]
25 | ```
26 | 
27 | To run stand-alone examples:
28 | 
29 |    ```bash
30 |     python -m colearn_examples.ml_interface.run_demo
31 |    ```
32 | 
33 | For more examples see the [Examples Page](examples.md)
34 | 
35 | ## Installing From Source
36 | 
37 | Alternatively, to install the latest code from the repo:
38 | 
39 | 1. Download the source code from github:
40 | 
41 |    ```bash
42 |    git clone https://github.com/fetchai/colearn.git && cd colearn
43 |    ```
44 | 
45 | 1. Create and launch a clean virtual environment with Python 3.7.
46 |    (This library has currently only been tested with Python 3.7).
47 | 
48 |    ```bash
49 |    pipenv --python 3.7 && pipenv shell
50 |    ```
51 | 
52 | 2. Install the package from source:
53 | 
54 |     ```bash
55 |     pip install -e .[all]
56 |     ```
57 | 
58 | 3. Run one of the examples:
59 | 
60 |     ```bash
61 |     python colearn_examples/ml_interface/pytorch_mnist.py
62 |     ```
63 | 
64 | If you are developing the colearn library then install it in editable mode so that new
65 | changes are effective immediately:
66 | 
67 | ```
68 | pip install -e .[all]
69 | ```
70 | 
71 | ### Running the tests
72 | 
73 | Tests can be run with:
74 | 
75 | ```
76 | tox
77 | ```
78 | 
79 | ## Documentation
80 | 
81 | To run the documentation, first install [mkdocs](https://www.mkdocs.org) and plugins:
82 | 
83 | ```bash
84 | pip install .[docs] 
85 | ```
86 | 
87 | Then run:
88 | 
89 | ```
90 | mkdocs serve
91 | ```
92 | 


--------------------------------------------------------------------------------
/colearn_keras/utils.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import numpy as np
19 | import tensorflow as tf
20 | from tensorflow.python.data.ops.dataset_ops import PrefetchDataset
21 | 
22 | 
23 | def normalize_img(image, label):
24 |     """Normalizes images: `uint8` -> `float32`."""
25 |     return tf.cast(image, tf.float32) / 255., label
26 | 
27 | 
28 | def _make_loader(images: np.ndarray,
29 |                  labels: np.ndarray,
30 |                  batch_size: int = 32,
31 |                  dp_enabled: bool = False) -> PrefetchDataset:
32 |     """
33 |     Converts array of images and labels to Tensorflow dataset
34 |     :param images: Numpy array of input data
35 |     :param labels:  Numpy array of output labels
36 |     :param batch_size: Batch size
37 |     :return: Shuffled Tensorflow prefetch dataset holding images and labels
38 |     """
39 |     dataset = tf.data.Dataset.from_tensor_slices((images, labels))
40 |     n_datapoints = images.shape[0]
41 | 
42 |     dataset = dataset.cache()
43 |     dataset = dataset.shuffle(n_datapoints)
44 |     # tf privacy expects fix batch sizes, thus drop_remainder=True
45 |     dataset = dataset.batch(batch_size, drop_remainder=dp_enabled)
46 |     dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
47 | 
48 |     return dataset
49 | 


--------------------------------------------------------------------------------
/tests/test_colearn_utils_data.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import numpy as np
19 | import pytest
20 | 
21 | from colearn.utils.data import split_list_into_fractions
22 | 
23 | 
24 | def test_split_list_into_fractions():
25 |     original_list = list(range(100))
26 |     fractions = [1 / 5] * 5
27 |     split_data = split_list_into_fractions(original_list, fractions)
28 |     assert split_data == [list(range(0, 20)),
29 |                           list(range(20, 40)),
30 |                           list(range(40, 60)),
31 |                           list(range(60, 80)),
32 |                           list(range(80, 100))]
33 | 
34 | 
35 | def test_split_list_into_fractions_ndarray():
36 |     original_list = np.array(range(100))
37 |     fractions = [1 / 5] * 5
38 |     split_data = split_list_into_fractions(original_list, fractions)
39 |     ground_truth = [np.array(range(0, 20)),
40 |                     np.array(range(20, 40)),
41 |                     np.array(range(40, 60)),
42 |                     np.array(range(60, 80)),
43 |                     np.array(range(80, 100))]
44 | 
45 |     for sd, gt in zip(split_data, ground_truth):
46 |         assert np.all(sd == gt)
47 | 
48 | 
49 | def test_split_list_into_fractions_minsize():
50 |     original_list = list(range(100))
51 |     fractions = [1 / 5] * 5
52 |     with pytest.raises(Exception):
53 |         split_list_into_fractions(original_list, fractions, min_part_size=30)
54 | 


--------------------------------------------------------------------------------
/tests/plus_one_learner/plus_one_learner.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | from colearn.ml_interface import MachineLearningInterface, ProposedWeights, \
19 |     Weights, ColearnModel
20 | 
21 | 
22 | class PlusOneLearner(MachineLearningInterface):
23 |     def __init__(self, start_value):
24 |         self.current_value = start_value
25 | 
26 |     def mli_propose_weights(self):
27 |         self.current_value += 1
28 |         return Weights(weights=self.current_value)
29 | 
30 |     def mli_test_weights(self, weights) -> ProposedWeights:
31 |         if weights.weights > self.current_value:
32 |             test_score = 1.0
33 |             vote_score = 1.0
34 |             vote = True
35 |         elif weights == self.current_value:
36 |             test_score = 0.5
37 |             vote_score = 0.5
38 |             vote = False
39 |         else:
40 |             test_score = 0.0
41 |             vote_score = 0.0
42 |             vote = False
43 | 
44 |         result = ProposedWeights(weights=weights,
45 |                                  vote_score=vote_score,
46 |                                  test_score=test_score,
47 |                                  vote=vote
48 |                                  )
49 | 
50 |         return result
51 | 
52 |     def mli_accept_weights(self, weights: Weights):
53 |         self.current_value = weights.weights
54 | 
55 |     def mli_get_current_weights(self) -> Weights:
56 |         return Weights(weights=self.current_value)
57 | 
58 |     def mli_get_current_model(self) -> ColearnModel:
59 |         """
60 |         :return: The current model and its format - not relevant here
61 |         """
62 | 
63 |         return ColearnModel()
64 | 


--------------------------------------------------------------------------------
/colearn_other/demo_utils.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | from typing import Callable
19 | 
20 | 
21 | def get_split_to_folders(dataloader_name: str) -> Callable:
22 |     # pylint: disable=C0415
23 |     if dataloader_name == "PYTORCH_XRAY":
24 |         # noinspection PyUnresolvedReferences
25 |         from colearn_pytorch.pytorch_xray import split_to_folders  # type: ignore[no-redef]
26 | 
27 |     elif dataloader_name == "KERAS_MNIST":
28 |         # noinspection PyUnresolvedReferences
29 |         from colearn_keras.keras_mnist import split_to_folders  # type: ignore[no-redef]
30 | 
31 |     elif dataloader_name == "KERAS_CIFAR10":
32 |         # noinspection PyUnresolvedReferences
33 |         from colearn_keras.keras_cifar10 import split_to_folders  # type: ignore[no-redef]
34 | 
35 |     elif dataloader_name == "PYTORCH_COVID_XRAY":
36 |         # noinspection PyUnresolvedReferences
37 |         from colearn_pytorch.pytorch_covid_xray import split_to_folders  # type: ignore[no-redef]
38 | 
39 |     elif dataloader_name == "FRAUD":
40 |         # noinspection PyUnresolvedReferences
41 |         from colearn_other.fraud_dataset import split_to_folders  # type: ignore[no-redef]
42 |     else:
43 |         raise NotImplementedError("Split not defined for dataloader %s" % dataloader_name)
44 | 
45 |     return split_to_folders
46 | 
47 | 
48 | def get_score_name(model_name: str) -> str:
49 |     if model_name == "PYTORCH_XRAY":
50 |         score_name = "auc"
51 |     elif model_name in ["KERAS_MNIST", "KERAS_MNIST_RESNET", "KERAS_CIFAR10", "PYTORCH_COVID_XRAY"]:
52 |         score_name = "categorical_accuracy"
53 |     elif model_name == "FRAUD":
54 |         score_name = "accuracy"
55 |     else:
56 |         score_name = "loss"
57 |     return score_name
58 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
 1 | [MASTER]
 2 | ignore-patterns=serialization.py,message.py,__main__.py,.*_pb2.py,launch.py,transaction.py
 3 | 
 4 | [MESSAGES CONTROL]
 5 | disable=C0103,C0201,C0330,C0301,C0302,W1202,W1203,W0511,W0107,W0105,W0621,W0235,W0613,W0221,
 6 |     R0902,R0913,R0914,R1720,R1705,R0801,R0904,R0903,R0911,R0912,R0901,R1704,R0916,R1702,
 7 |     R0915,R1710,R1703,R0401,C0114,C0115,C0116
 8 | 
 9 | ENABLED:
10 | # W0703: broad-except
11 | # W0212: protected-access
12 | # W0706: try-except-raise
13 | # W0108: unnecessary-lambda
14 | # W0622: redefined-builtin
15 | # W0163: unused-argument
16 | # W0201: attribute-defined-outside-init
17 | # W0222: signature-differs
18 | # W0223: abstract-method
19 | # W0611: unused-import
20 | # W0612: unused-variable
21 | # W1505: deprecated-method
22 | # W0106: expression-not-assigned
23 | # R0201: no-self-use
24 | # R0205: useless-object-inheritance
25 | # R1723: no-else-break
26 | # R1721: unnecessary-comprehension
27 | # R1718: consider-using-set-comprehension
28 | # R1716: chained-comparison
29 | # R1714: consider-using-in
30 | # R0123: literal-comparison
31 | # R1711: useless-return
32 | # R1722: consider-using-sys-exit
33 | 
34 | ## Resolve these:
35 | # R0401: cyclic-import
36 | # W0221: arguments-differ
37 | # R0902: too-many-instance-attributes
38 | # R0913: too-many-arguments
39 | # R0914: too-many-locals
40 | # R1720: no-else-raise
41 | # R1705: no-else-return
42 | # R0904: too-many-public-methods
43 | # R0903: too-few-public-methods
44 | # R0911: too-many-return-statements
45 | # R0912: too-many-branches
46 | # R0901: too-many-ancestors
47 | # R1704: redefined-argument-from-local
48 | # R0916: too-many-boolean-expressions
49 | # R1702: too-many-nested-blocks
50 | # R0915: too-many-statements
51 | # R1710: inconsistent-return-statements
52 | # R1703: simplifiable-if-statement
53 | 
54 | ## Keep the following:
55 | # C0103: invalid-name
56 | # C0201: consider-iterating-dictionary
57 | # C0330: Wrong haning indentation
58 | # http://pylint-messages.wikidot.com/messages:c0301 > Line too long (%s/%s)
59 | # http://pylint-messages.wikidot.com/messages:c0302 > Too many lines in module (%s)
60 | # W1202: logging-format-interpolation
61 | # W1203: logging-fstring-interpolation
62 | # W0511: fixme
63 | # W0107: unnecessary-pass
64 | # W0105: pointless-string-statement
65 | # W0621: redefined-outer-name
66 | # W0235: useless-super-delegation
67 | # R0801: similar lines
68 | 
69 | [IMPORTS]
70 | ignored-modules=click,google,grpc,matplotlib,numpy,opacus,onnx,onnxmltools,pandas,PIL,prometheus_client,pydantic,pytest,
71 |     tensorflow,tensorflow_core,tensorflow_datasets,tensorflow_privacy,torch,torchsummary,torchvision,typing_extensions,
72 |     scipy,sklearn,xgboost
73 | 
74 | [TYPECHECK]
75 | ignored-classes=ResponseSupportedSystem
76 | 


--------------------------------------------------------------------------------
/docs/examples.md:
--------------------------------------------------------------------------------
 1 | # Examples that use Collective Learning
 2 | 
 3 | This is a list of examples that we've implemented to show you how to use Collective Learning locally. See and example of
 4 | the [gRPC server](grpc_examples.md) for the next step towards decentralized Colearn.
 5 | 
 6 | ### Mnist
 7 | 
 8 |   Uses the standard [Mnist](https://en.wikipedia.org/wiki/MNIST_database) database of handwritten images
 9 |   
10 | * [mnist_keras]({{ repo_root }}/colearn_examples/ml_interface/keras_mnist.py).
11 |   Uses the `KerasLearner` helper class.
12 |   Discussed in more detail [here](./intro_tutorial_keras.md).
13 | * [mnist_pytorch]({{ repo_root }}/colearn_examples/ml_interface/pytorch_mnist.py).
14 |   Uses the `PytorchLearner` helper class.
15 |   Discussed in more detail [here](./intro_tutorial_pytorch.md).
16 | 
17 | ### Fraud
18 | 
19 |   The fraud dataset consists of information about credit card transactions.
20 |   The task is to predict whether transactions are fraudulent or not.
21 |   The data needs to be downloaded from [Kaggle](https://www.kaggle.com/c/ieee-fraud-detection),
22 |   and the data directory passed in with the flag `--data_dir`.
23 | 
24 | * [fraud_mli]({{ repo_root }}/colearn_examples/ml_interface/mli_fraud.py).
25 |   Uses the `MachineLearningInterface` directly and detects fraud in bank transactions.
26 | * [fraud_keras]({{ repo_root }}/colearn_examples/ml_interface/keras_fraud.py).
27 |   Loads data from numpy arrays and uses `KerasLearner`.
28 | 
29 | ### Cifar10
30 | 
31 |   Uses the standard [Cifar10](https://en.wikipedia.org/wiki/CIFAR-10) database of images
32 | 
33 | * [cifar_keras]({{ repo_root }}/colearn_examples/ml_interface/keras_cifar.py).
34 |   Uses the `KerasLearner` helper class.
35 | * [cifar_pytorch]({{ repo_root }}/colearn_examples/ml_interface/pytorch_cifar.py).
36 |   Uses the `PytorchLearner` helper class.
37 | 
38 | ### Xray
39 | 
40 |   A binary classification task that requires predicting pneumonia from images of chest X-rays.
41 |   The data need to be downloaded from [Kaggle](https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia),
42 |   and the data directory passed in with the flag `--data_dir`
43 | 
44 | * [xray_keras]({{ repo_root }}/colearn_examples/ml_interface/keras_xray.py).
45 |   Uses the `KerasLearner` helper class.
46 | * [xray_pytorch]({{ repo_root }}/colearn_examples/ml_interface/pytorch_xray.py).
47 |   Uses the `PytorchLearner` helper class.
48 | 
49 | ### Iris
50 | 
51 | Uses the standard Iris dataset.
52 | The aim of this task is to classify examples into one of three iris species based on measurements of the flower.
53 | 
54 | * [iris_random_forest]({{ repo_root }}/colearn_examples/ml_interface/mli_random_forest_iris.py).
55 |   Uses the `MachineLearningInterface` directly and a random forest for classification.
56 | 


--------------------------------------------------------------------------------
/colearn_keras/test_keras_scania.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import json
19 | import time
20 | from colearn_grpc.example_mli_factory import ExampleMliFactory
21 | from colearn_grpc.grpc_server import GRPCServer
22 | from colearn_grpc.logging import get_logger
23 | from colearn_grpc.example_grpc_learner_client import ExampleGRPCLearnerClient
24 | 
25 | # Register scania models and dataloaders in the FactoryRegistry
26 | # pylint: disable=W0611
27 | import colearn_keras.keras_scania  # type:ignore # noqa: F401
28 | 
29 | 
30 | _logger = get_logger(__name__)
31 | 
32 | 
33 | def test_keras_scania_with_grpc_sever():
34 |     _logger.info("setting up the grpc server ...")
35 | 
36 |     server_port = 34567
37 |     server_key = ""
38 |     server_crt = ""
39 |     enable_encryption = False
40 | 
41 |     server = GRPCServer(
42 |         mli_factory=ExampleMliFactory(),
43 |         port=server_port,
44 |         enable_encryption=enable_encryption,
45 |         server_key=server_key,
46 |         server_crt=server_crt,
47 |     )
48 | 
49 |     server.run(wait_for_termination=False)
50 | 
51 |     time.sleep(2)
52 | 
53 |     client = ExampleGRPCLearnerClient(
54 |         "scania_client", f"127.0.0.1:{server_port}", enable_encryption=enable_encryption
55 |     )
56 | 
57 |     client.start()
58 | 
59 |     ml = client.get_supported_system()
60 |     data_loader = "KERAS_SCANIA"
61 |     model_architecture = "KERAS_SCANIA"
62 |     assert data_loader in ml["data_loaders"].keys()
63 |     assert model_architecture in ml["model_architectures"].keys()
64 | 
65 |     data_location = "gs://colearn-public/scania/0"
66 |     assert client.setup_ml(
67 |         data_loader,
68 |         json.dumps({"location": data_location}),
69 |         model_architecture,
70 |         json.dumps({})
71 |     )
72 | 
73 |     weights = client.mli_propose_weights()
74 |     assert weights.weights is not None
75 | 
76 |     client.mli_accept_weights(weights)
77 |     assert client.mli_get_current_weights().weights == weights.weights
78 | 
79 |     client.stop()
80 |     server.stop()
81 | 


--------------------------------------------------------------------------------
/colearn_pytorch/utils.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | from typing import List
19 | 
20 | from sklearn.metrics import roc_auc_score
21 | import torch
22 | 
23 | 
24 | def binary_accuracy_from_logits(outputs: torch.Tensor, labels: torch.Tensor) -> float:
25 |     """
26 |     Function to compute binary classification accuracy based on model output (in logits) and ground truth labels
27 | 
28 |     :param outputs: Tensor of model output in logits
29 |     :param labels: Tensor of ground truth labels
30 |     :return: Fraction of correct predictions
31 |     """
32 |     outputs = (torch.sigmoid(outputs) > 0.5).float()
33 |     correct = (outputs == labels).sum().item()
34 |     return correct / labels.shape[0]
35 | 
36 | 
37 | def auc_from_logits(outputs: torch.Tensor, labels: torch.Tensor) -> float:
38 |     """
39 |     Function to compute area under curve based on model outputs (in logits) and ground truth labels
40 | 
41 |     :param outputs: Tensor of model outputs in logits
42 |     :param labels: Tensor of ground truth labels
43 |     :return: AUC score
44 |     """
45 |     predictions = torch.sigmoid(outputs)
46 |     return roc_auc_score(labels.cpu().numpy().astype(int), predictions.cpu().numpy())
47 | 
48 | 
49 | def categorical_accuracy(outputs: torch.Tensor, labels: torch.Tensor) -> float:
50 |     """
51 |     Function to compute accuracy based on model prediction and ground truth labels
52 | 
53 |     :param outputs: Tensor of model predictions
54 |     :param labels: Tensor of ground truth labels
55 |     :return: Fraction of correct predictions
56 |     """
57 |     outputs = torch.argmax(outputs, 1).int()
58 |     correct = (outputs == labels).sum().item()
59 |     return correct / labels.shape[0]
60 | 
61 | 
62 | def prepare_data_split_list(data, n: int) -> List[int]:
63 |     """
64 |     Create list of sizes for splitting
65 | 
66 |     :param data: dataset
67 |     :param n: number of equal parts
68 |     :return: list of sizes
69 |     """
70 | 
71 |     parts = [len(data) // n] * n
72 |     if sum(parts) < len(data):
73 |         parts[-1] += len(data) - sum(parts)
74 |     return parts
75 | 


--------------------------------------------------------------------------------
/docs/python_src/mnist_keras.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import tensorflow as tf
19 | import tensorflow_datasets as tfds
20 | 
21 | from colearn_keras.utils import normalize_img
22 | 
23 | n_rounds = 20
24 | width = 28
25 | height = 28
26 | n_classes = 10
27 | l_rate = 0.001
28 | batch_size = 64
29 | 
30 | # Load the data
31 | train_dataset, info = tfds.load('mnist', split='train', as_supervised=True, with_info=True)
32 | n_train = info.splits['train'].num_examples
33 | test_dataset = tfds.load('mnist', split='test', as_supervised=True)
34 | 
35 | train_dataset = train_dataset.map(normalize_img,
36 |                                   num_parallel_calls=tf.data.experimental.AUTOTUNE)
37 | train_dataset = train_dataset.shuffle(n_train)
38 | train_dataset = train_dataset.batch(batch_size)
39 | 
40 | test_dataset = test_dataset.map(normalize_img,
41 |                                 num_parallel_calls=tf.data.experimental.AUTOTUNE)
42 | test_dataset = test_dataset.batch(batch_size)
43 | 
44 | # Define the model
45 | input_img = tf.keras.Input(shape=(width, height, 1), name="Input")
46 | x = tf.keras.layers.Conv2D(64, (3, 3), activation="relu", padding="same", name="Conv1_1")(input_img)
47 | x = tf.keras.layers.BatchNormalization(name="bn1")(x)
48 | x = tf.keras.layers.MaxPooling2D((2, 2), name="pool1")(x)
49 | x = tf.keras.layers.Conv2D(128, (3, 3), activation="relu", padding="same", name="Conv2_1")(x)
50 | x = tf.keras.layers.BatchNormalization(name="bn4")(x)
51 | x = tf.keras.layers.MaxPooling2D((2, 2), name="pool2")(x)
52 | x = tf.keras.layers.Flatten(name="flatten")(x)
53 | x = tf.keras.layers.Dense(n_classes, activation="softmax", name="fc1")(x)
54 | model = tf.keras.Model(inputs=input_img, outputs=x)
55 | 
56 | opt = tf.keras.optimizers.Adam(lr=l_rate)
57 | model.compile(
58 |     loss="sparse_categorical_crossentropy",
59 |     metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
60 |     optimizer=opt)
61 | 
62 | # Train and evaluate model
63 | for round in range(n_rounds):
64 |     model.fit(train_dataset, steps_per_epoch=40)
65 |     result = model.evaluate(x=test_dataset, return_dict=True, steps=10)
66 |     print(f"Performance at round {round} is {result}")
67 | 


--------------------------------------------------------------------------------
/colearn_grpc/proto/interface.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto3";
  2 | 
  3 | import "google/protobuf/empty.proto";
  4 | 
  5 | package contract_learn.grpc;
  6 | 
  7 | message RequestMLSetup {
  8 |     string dataset_loader_name = 1;
  9 |     string dataset_loader_parameters = 2;
 10 |     string model_arch_name = 3;
 11 |     string model_parameters = 4;
 12 | };
 13 | 
 14 | enum MLSetupStatus {
 15 |     UNDEFINED = 0;
 16 |     SUCCESS = 1;
 17 |     ERROR = 2;
 18 | };
 19 | 
 20 | message ResponseMLSetup {
 21 |     MLSetupStatus status = 1;
 22 |     string description = 2;
 23 | };
 24 | 
 25 | message DiffPrivBudget {
 26 |     float target_epsilon   = 1;
 27 |     float target_delta     = 2;
 28 |     float consumed_epsilon = 3;
 29 |     float consumed_delta   = 4;
 30 | };
 31 | 
 32 | message TrainingSummary {
 33 |     DiffPrivBudget dp_budget = 1;
 34 | };
 35 | 
 36 | message WeightsPart {
 37 |     bytes weights      = 1;
 38 |     uint32 byte_index  = 2;
 39 |     uint64 total_bytes = 3;
 40 | 
 41 |     TrainingSummary training_summary = 10;
 42 | };
 43 | 
 44 | message ProposedWeights {
 45 |     float vote_score = 1;
 46 |     float test_score = 2;
 47 |     bool vote = 3;
 48 | };
 49 | 
 50 | message RequestStatus {
 51 | };
 52 | 
 53 | enum SystemStatus {
 54 |     WORKING=0;
 55 |     NO_MODEL=1;
 56 |     INTERNAL_ERROR=2;
 57 |     UNKNOWN=3;
 58 | }
 59 | 
 60 | message ResponseStatus {
 61 |     SystemStatus status = 1;
 62 | };
 63 | 
 64 | message DatasetLoaderSpec {
 65 |     string name = 1;
 66 |     string default_parameters = 2; // JSON encoded default parameters
 67 | };
 68 | 
 69 | message ModelArchSpec {
 70 |     string name = 1;
 71 |     string default_parameters = 2; // JSON encoded default parameters for the model arch.
 72 | };
 73 | 
 74 | message CompatibilitySpec {
 75 |     string model_architecture = 1;
 76 |     repeated string dataloaders = 2;
 77 | };
 78 | 
 79 | message ResponseVersion {
 80 |     string version = 1;
 81 | };
 82 | 
 83 | message ResponseCurrentModel {
 84 |     uint32 model_format = 1;
 85 |     string model_file = 2;
 86 |     bytes model = 3;
 87 | };
 88 | 
 89 | message ResponseSupportedSystem {
 90 |     repeated DatasetLoaderSpec data_loaders    = 1;
 91 |     repeated ModelArchSpec model_architectures = 2;
 92 |     repeated CompatibilitySpec compatibilities = 3;
 93 | };
 94 | 
 95 | service GRPCLearner {
 96 |     rpc QueryVersion(google.protobuf.Empty) returns (ResponseVersion);
 97 |     rpc QuerySupportedSystem(google.protobuf.Empty) returns (ResponseSupportedSystem);
 98 |     rpc GetCurrentModel(google.protobuf.Empty) returns (ResponseCurrentModel);
 99 |     rpc MLSetup(RequestMLSetup) returns (ResponseMLSetup);
100 |     rpc ProposeWeights(google.protobuf.Empty) returns (stream WeightsPart);
101 |     rpc TestWeights(stream WeightsPart) returns (ProposedWeights);
102 |     rpc SetWeights(stream WeightsPart) returns (google.protobuf.Empty);
103 |     rpc GetCurrentWeights(google.protobuf.Empty) returns (stream WeightsPart);
104 |     rpc StatusStream(stream RequestStatus) returns (stream ResponseStatus);
105 | };
106 | 


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Welcome to the Fetch.ai Collective Learning Library
 2 | 
 3 | Colearn is a library that enables privacy-preserving decentralized machine learning tasks on the FET network.
 4 | 
 5 | This blockchain-mediated collective learning system enables multiple stakeholders to build a shared machine learning model without needing to rely on a central authority,
 6 | and without revealing their dataset to the other stakeholders. This library is currently in development.
 7 | 
 8 | ## How collective learning works
 9 | 
10 | A group of *learners* comes together, each of whom have their own datasets and want to collaborate on training a machine learning model over a set number of rounds. We refer
11 | to this as an 'experiment'.
12 | In each round of collective learning:
13 | 
14 | 1. One learner is selected to train the model and propose a new set of model weights.
15 | 2. The other learners vote on whether the weights are an improvement.
16 | 3. If the majority vote that the new weights are better than the old ones then the new weights are accepted by all the learners.
17 |     Otherwise the new weights are discarded.
18 | 4. The next round begins.
19 | For more information on the Collective Learning Protocol see [here](about.md).
20 | 
21 | ### Current Version
22 | 
23 | We have released *v.0.2.8* of the Colearn Machine Learning Interface, the first version of an interface that allows
24 | developers to define their own model architectures that can then be used in collective learning.
25 | Together with the interface we provide a simple backend for local experiments. This is a prototype backend with upcoming blockchain ledger based backends to follow.  
26 | Future releases will use similar interfaces so that learners built with the current system will work on a different backend that integrates a distributed ledger and provides other improvements.
27 | The current framework will then be used mainly for model development and debugging.
28 | We invite all users to experiment with the framework, develop their own models, and provide feedback!
29 | 
30 | ## Getting Started
31 | 
32 | To use the latest stable release we recommend installing the [package from PyPi](https://pypi.org/project/colearn/)
33 | 
34 | To install with support for Keras and Pytorch:
35 | 
36 |    ```bash
37 |    pip install colearn[all]
38 |    ```
39 | 
40 | To install with just support for Keras or Pytorch:
41 | 
42 |    ```bash
43 |    pip install colearn[keras]
44 |    pip install colearn[pytorch]
45 |    ```
46 | 
47 | For more installation options or get the latest (development) version see [Installation](./installation.md)
48 | 
49 | Then run the standalone demo:
50 | 
51 |    ```bash
52 |    python -m colearn_examples.ml_interface.run_demo
53 |    ```
54 | 
55 | For plenty of other examples see the [Examples](./examples.md).
56 | 
57 | ## Writing your own models
58 | 
59 | We encourage users to try out the system by writing their own models.
60 | Models need to implement the collective learning interface, which provides functions for training and voting on updates.
61 | More instructions can be found in the Getting Started section.
62 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Welcome to the Fetch.ai Collective Learning
 2 | 
 3 | Colearn is a library that enables privacy-preserving decentralized machine learning tasks on the [FET network](https://fetch.ai/).
 4 | 
 5 | This blockchain-mediated collective learning system enables multiple stakeholders to build a shared
 6 | machine learning model without needing to rely on a central authority.
 7 | This library is currently in development.
 8 | 
 9 | The collective learning protocol allows learners to collaborate on training a model without requiring trust between the participants. Learners vote on updates to the model, and only updates which pass the quality threshold are accepted. This makes the system robust to attempts to interfere with the model by providing bad updates. For more details on the collective learning system see [here](https://fetchai.github.io/colearn/about/)
10 | 
11 | ## Current Version
12 | 
13 | We have released *v0.2.8* of the Colearn Machine Learning Interface, the first version of an interface that will
14 | allow developers to prepare for future releases.
15 | Together with the interface we provide a simple backend for local experiments. This is the first backend with upcoming blockchain ledger based backends to follow.  
16 | Future releases will use similar interfaces so that learners built with the current system will work on a different backend that integrates a distributed ledger and provides other improvements.
17 | The current framework will then be used mainly for model development and debugging.
18 | We invite all users to experiment with the framework, develop their own models, and provide feedback!
19 | 
20 | See the most up-to-date documentation at [fetchai.github.io/colearn](https://fetchai.github.io/colearn/)
21 | or the documentation for the latest release at [docs.fetch.ai/colearn](https://docs.fetch.ai/colearn/).
22 | 
23 | ## Installation
24 | 
25 | Currently we only support macos and unix systems.
26 | 
27 | To use the latest stable release we recommend installing the [package from PyPi](https://pypi.org/project/colearn/)
28 | 
29 | To install with support for Keras and Pytorch:
30 | 
31 |    ```bash
32 |    pip install colearn[all]
33 |    ```
34 | 
35 | To install with just support for Keras or Pytorch:
36 | 
37 |    ```bash
38 |    pip install colearn[keras]
39 |    pip install colearn[pytorch]
40 |    ```
41 | 
42 | ## Running the examples
43 | 
44 | Examples are available in the colearn_examples module. To run the Mnist demo in Keras or Pytorch run:
45 | 
46 |    ```bash
47 |    python -m colearn_examples.ml_interface.keras_mnist
48 |    python -m colearn_examples.ml_interface.pytorch_mnist
49 |    ```
50 | 
51 | - Or they can be accessed from colearn/colearn_examples by cloning the colearn repo
52 | 
53 |     Please note that although all the examples are always available, which you can run will depend on your installation.
54 |     If you installed only `colearn[keras]` or `colearn[pytorch]` then only their respective examples will work.
55 | 
56 | For more instructions see the documentation at [fetchai.github.io/colearn/installation](https://fetchai.github.io/colearn/installation/)
57 | 
58 | After installation we recommend [running a demo](https://fetchai.github.io/colearn/demo/)
59 | , or seeing [the examples](https://fetchai.github.io/colearn/examples/)
60 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
  1 | ; By default, testenvs are configured to:
  2 | ; - don't skip dist (skipsdist = False)
  3 | ; - don't skip the package installation (skip_install = False)
  4 | ; - don't use source installation (usedevelop = False)
  5 | ; where one of those steps is not necessary for the test,
  6 | ; we set the associated flag (e.g. for linting we don't need
  7 | ; the package installation).
  8 | 
  9 | [tox]
 10 | envlist = flake8, mypy, pylint, pytest3.7, pytest3.8, docs, copyright_check
 11 | 
 12 | [testenv]
 13 | basepython = python3.7
 14 | extras = all
 15 | allowlist_externals = *
 16 | 
 17 | [testenv:pytest3.7]
 18 | deps =
 19 |     pytest==7.2.1
 20 |     pytest-cov==2.8.1
 21 |     pytest-asyncio==0.10.0
 22 |     pytest-randomly==3.12.0
 23 |     pytest-rerunfailures==11.0
 24 | commands = pytest -m "not slow" -rfE --cov-report=html --cov-report=xml --cov-report=term --cov-report=term-missing
 25 | 
 26 | [testenv:pytest3.8]
 27 | basepython = python3.8
 28 | deps =
 29 |     pytest==7.2.1
 30 |     pytest-cov==2.8.1
 31 |     pytest-asyncio==0.10.0
 32 |     pytest-randomly==3.12.0
 33 |     pytest-rerunfailures==11.0
 34 | commands = pytest -m "not slow" -rfE --cov-report=html --cov-report=xml --cov-report=term --cov-report=term-missing
 35 | 
 36 | [testenv:pytest-slow3.7]
 37 | deps =
 38 |     pytest==7.2.1
 39 |     pytest-cov==2.8.1
 40 |     pytest-asyncio==0.10.0
 41 |     pytest-rerunfailures==11.0
 42 | passenv =
 43 |     GITHUB_ACTION
 44 |     COLEARN_DATA_DIR
 45 |     TFDS_DATA_DIR
 46 |     PYTORCH_DATA_DIR
 47 | commands = pytest -vv -m slow -rfE --cov-report=html --cov-report=xml --cov-report=term --cov-report=term-missing
 48 | 
 49 | [testenv:pytest-slow3.8]
 50 | basepython = python3.8
 51 | deps =
 52 |     pytest==7.2.1
 53 |     pytest-cov==2.8.1
 54 |     pytest-asyncio==0.10.0
 55 |     pytest-rerunfailures==11.0
 56 | passenv =
 57 |     GITHUB_ACTION
 58 |     COLEARN_DATA_DIR
 59 |     TFDS_DATA_DIR
 60 |     PYTORCH_DATA_DIR
 61 | commands = pytest -vv -m slow -rfE --cov-report=html --cov-report=xml --cov-report=term --cov-report=term-missing
 62 | 
 63 | [testenv:flake8]
 64 | skipsdist = True
 65 | skip_install = True
 66 | deps = flake8==3.7.9
 67 |        flake8-bugbear==20.1.4
 68 |        pydocstyle==3.0.0
 69 | commands = flake8 --extend-ignore=F821 --exclude="colearn_grpc/proto/generated" colearn colearn_examples \
 70 |     colearn_other colearn_pytorch colearn_keras colearn_grpc grpc_examples docker tests
 71 | 
 72 | [testenv:mypy]
 73 | skipsdist = True
 74 | deps = mypy==0.761
 75 | commands = mypy --show-error-codes colearn colearn_examples/ml_interface colearn_examples/grpc \
 76 |     colearn_other colearn_pytorch colearn_keras colearn_grpc docker tests
 77 | 
 78 | [testenv:pylint]
 79 | skipsdist = True
 80 | skip_install = True
 81 | deps = pylint==2.5.2
 82 | commands = sh -c "pylint colearn colearn_examples/ml_interface/* colearn_examples/grpc/* \
 83 |     colearn_other colearn_pytorch colearn_keras colearn_grpc docker tests"
 84 | 
 85 | [testenv:docs]
 86 | skipsdist = True
 87 | skip_install = False
 88 | description = Build the documentation.
 89 | extras = docs
 90 | commands = mkdocs build --clean
 91 | 
 92 | [testenv:copyright_check]
 93 | skipsdist = True
 94 | skip_install = True
 95 | deps =
 96 | commands = {toxinidir}/tests/check_copyright_notice.py
 97 | 
 98 | [pytest]
 99 | markers =
100 |     slow: mark test as slow
101 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import setuptools
19 | 
20 | keras_deps = [
21 |     'tensorflow>=2.10',
22 |     'tensorflow_datasets>=4.2,<4.5',
23 |     'tensorflow-privacy>=0.5,<0.8',
24 | ]
25 | other_deps = [
26 |     'pandas>=1.1,<1.5',
27 |     'scikit-learn>=0.23,<1.1',
28 | ]
29 | pytorch_deps = [
30 |     'opacus>=1.0.0,<1.1',
31 |     'Pillow>=8.0.1,<9.1.0',
32 |     'scikit-learn>=0.23,<1.1',
33 |     'scipy>=1.5,<1.8',
34 |     'torch>=1.7,<1.11',
35 |     'torchsummary~=1.5.0',
36 |     'torchvision>=0.8,<0.12',
37 | ]
38 | docs_deps = [
39 |     "mkdocs",
40 |     "mkdocs-macros-plugin",
41 |     "mkdocs-material",
42 |     "mkdocs-material-extensions",
43 |     "markdown-include",
44 |     "jinja2==3.0.3"
45 | ]
46 | 
47 | grpc_deps = ['grpcio>=1.35,<1.43',
48 |              'grpcio-tools>=1.35,<1.44',
49 |              'prometheus_client==0.13.1',
50 |              'click'
51 |              ]
52 | all_deps = list(set(keras_deps + other_deps + pytorch_deps + grpc_deps)) + ["xgboost"]
53 | 
54 | long_description = ""
55 | try:
56 |     with open("README.md", "r") as fh:
57 |         long_description = fh.read()
58 | except FileNotFoundError:
59 |     print("README.md file not found, no long description available")
60 | 
61 | setuptools.setup(
62 |     name="colearn",
63 |     version="0.2.8",
64 |     author="Fetch AI",
65 |     author_email="developer@fetch.ai",
66 |     description="The Standalone Fetch AI Collective Learning Framework",
67 |     long_description=long_description,
68 |     long_description_content_type="text/markdown",
69 |     url="https://github.com/fetchai/colearn",
70 |     packages=setuptools.find_namespace_packages(exclude=("tests", "tests.*", "site", "site.*",
71 |                                                          "docs", "docs.*", "docker", "scripts", "build", "build.*")),
72 |     classifiers=[
73 |         # Need to fill in
74 |         "Operating System :: OS Independent",
75 |     ],
76 |     python_requires='>=3.7, <3.9',
77 |     install_requires=[
78 |         'google-cloud-storage>=1.35,<2.2',
79 |         'matplotlib>=3.3,<3.6',
80 |         'onnx==1.8.1',
81 |         'tf2onnx==1.13.0',
82 |         'onnxmltools==1.10.0',
83 |         'numpy>=1.16,<1.23',
84 |         'pydantic>=1.7,<1.10',
85 |     ],
86 |     tests_require=["tox>=3.20,<3.25"],
87 |     extras_require={
88 |         'keras': keras_deps,
89 |         'other': other_deps,
90 |         'pytorch': pytorch_deps,
91 |         'docs': docs_deps,
92 |         'all': all_deps,
93 |         'grpc': grpc_deps
94 |     },
95 | )
96 | 


--------------------------------------------------------------------------------
/docs/differential_privacy.md:
--------------------------------------------------------------------------------
 1 | # What is differential privacy?
 2 | 
 3 | To make a machine learning system that protects privacy we first need to have a definition of what privacy is.
 4 | Differential privacy (DP) is one such definition.
 5 | First we need to have three concepts: the _database_ is a collection of data about _individuals_ (for example, their medical records), and we want to make a _query_ about that data (for example "How much does smoking increase someone's risk of cancer?").
 6 | DP says that privacy is preserved if the result of the query cannot be used to determine if any particular individual is present in the database.
 7 | 
 8 | So if person A has their medical data in a database, and the query that we want to make on that database is
 9 | "How much does smoking increase someone's risk of cancer" then the result of that query shouldn't disclose whether or not person A's details are in the database.
10 | 
11 | From this comes the idea of _sensitivity_ of a query.
12 | The _sensitivity_ of a query determines how much the result of the query depends on an individual's data.
13 | For example, the query "How much does smoking increase the risk of cancer for adults in the UK?" is less sensitive than the query "How much does smoking increase the risk of cancer for men aged 50-55 in Cambridge?" because the second query uses a smaller set of individuals.
14 | 
15 | ## Epsilon-differential privacy
16 | 
17 | EDP is a scheme for preserving differential privacy.
18 | In EDP all queries have random noise added to them, so they are no longer deterministic.
19 | So if the query was "What fraction of people in the database are male", and the true result is 0.5 then the results of calling this query three times might be 0.53, 0.49 and 0.51.
20 | This makes it harder to tell if an individual's data is in the database, because the effect of adding a person can't be distinguished from the effect of the random noise.
21 | Intuitively this is a bit like blurring an image: adding noise obscures personal information.
22 | The amount of personal information that is revealed isn't zero, but it is guaranteed to be below a certain threshold.
23 | 
24 | The level of privacy that is provided is controlled by the parameter epsilon; the greater epsilon is the more noise is added and the more privacy is preserved.
25 | Queries that are more sensitive have more noise added, because they reveal more information about individuals.
26 | It is important to add as little noise as possible, because adding more noise obscures the patterns that you want to extract from the data.
27 | 
28 | ## Differential privacy when training neural networks
29 | 
30 | Each training step for a neural network can be though of as a complicated query on a database of training data.
31 | Differential privacy mechanisms tell you how much noise you need to add to guarantee a certain level of privacy.
32 | The `opacus` and `tensorflow-privacy` libraries implement epsilon-differential privacy for training neural networks for pytorch and keras respectively.
33 | 
34 | # How to use differential privacy with colearn
35 | 
36 | By using `opacus` and `tensorflow-privacy` we can make collective learning use differential privacy.
37 | The learner that is proposing weights does so using a DP-enabled optimiser.
38 | 
39 | To see an example of using this see [dp_pytorch]({{ repo_root }}/colearn_examples/ml_interface/pytorch_mnist_diffpriv.py)
40 | and [dp_keras]({{ repo_root }}/colearn_examples/ml_interface/keras_mnist_diffpriv.py).
41 | 


--------------------------------------------------------------------------------
/colearn/training.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | from typing import Sequence
19 | 
20 | from colearn.ml_interface import ProposedWeights, MachineLearningInterface
21 | from colearn.standalone_driver import run_one_round
22 | from colearn.utils.results import Result
23 | 
24 | 
25 | def set_equal_weights(learners: Sequence[MachineLearningInterface]):
26 |     first_learner_weights = learners[0].mli_get_current_weights()
27 | 
28 |     for learner in learners[1:]:
29 |         learner.mli_accept_weights(first_learner_weights)
30 | 
31 | 
32 | def initial_result(learners: Sequence[MachineLearningInterface]):
33 |     result = Result()
34 |     for learner in learners:
35 |         proposed_weights = learner.mli_test_weights(learner.mli_get_current_weights())  # type: ProposedWeights
36 |         result.test_scores.append(proposed_weights.test_score)
37 |         result.vote_scores.append(proposed_weights.vote_score)
38 |         result.votes.append(True)
39 |     return result
40 | 
41 | 
42 | def collective_learning_round(learners: Sequence[MachineLearningInterface], vote_threshold,
43 |                               round_index):
44 |     print("Doing collective learning round")
45 |     result = Result()
46 | 
47 |     proposed_weights_list, vote = run_one_round(round_index, learners,
48 |                                                 vote_threshold)
49 |     result.vote = vote
50 |     result.votes = [pw.vote for pw in proposed_weights_list]
51 |     result.vote_scores = [pw.vote_score for pw in
52 |                           proposed_weights_list]
53 |     result.test_scores = [pw.test_score for pw in proposed_weights_list]
54 |     result.training_summaries = [
55 |         l.mli_get_current_weights().training_summary
56 |         for l in learners
57 |         if l.mli_get_current_weights().training_summary is not None
58 |     ]
59 |     result.block_proposer = round_index % len(learners)
60 | 
61 |     return result
62 | 
63 | 
64 | def individual_training_round(learners: Sequence[MachineLearningInterface], round_index):
65 |     print("Doing individual training pass")
66 |     result = Result()
67 | 
68 |     # train all models
69 |     for i, learner in enumerate(learners):
70 |         print(f"Training learner #{i} round index {round_index}")
71 |         weights = learner.mli_propose_weights()
72 |         proposed_weights = learner.mli_test_weights(weights)
73 |         learner.mli_accept_weights(weights)
74 | 
75 |         result.votes.append(True)
76 |         result.vote_scores.append(proposed_weights.vote_score)
77 |         result.test_scores.append(proposed_weights.test_score)
78 | 
79 |     return result
80 | 


--------------------------------------------------------------------------------
/colearn_grpc/logging.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import logging
19 | import sys
20 | import click
21 | 
22 | 
23 | """ logging helpers, inspired from https://github.com/fetchai/agents-aea/blob/master/aea/cli/utils/loggers.py """
24 | 
25 | 
26 | class ColorFormatter(logging.Formatter):
27 |     """The default formatter for cli output."""
28 | 
29 |     colors = {
30 |         "error": dict(fg="red"),
31 |         "exception": dict(fg="red"),
32 |         "critical": dict(fg="red"),
33 |         "debug": dict(fg="blue"),
34 |         "info": dict(fg="green"),
35 |         "warning": dict(fg="yellow"),
36 |     }
37 | 
38 |     def format(self, record):
39 |         """Format the log message."""
40 |         if not record.exc_info:
41 |             level = record.levelname.lower()
42 |             msg = record.getMessage()
43 |             if level in self.colors:
44 |                 prefix = click.style("{}: ".format(level), **self.colors[level])
45 |                 msg = "\n".join(prefix + x for x in msg.splitlines())
46 |             return msg
47 |         return logging.Formatter.format(self, record)  # pragma: no cover
48 | 
49 | 
50 | def default_logging_config(logger):  # pylint: disable=redefined-outer-name
51 |     """Set up the default handler and formatter on the given logger."""
52 |     default_handler = logging.StreamHandler(stream=sys.stdout)
53 |     default_handler.formatter = ColorFormatter()
54 |     logger.handlers = [default_handler]
55 |     logger.propagate = True
56 |     return logger
57 | 
58 | 
59 | _log_levels = {}  # type: ignore  # pylint: disable=W0603
60 | _loggers = {}  # type: ignore  # pylint: disable=W0603
61 | 
62 | 
63 | def _set_logger_level(logger, log_level):
64 |     level = logging.getLevelName(log_level.upper())
65 |     logger.setLevel(level)
66 | 
67 | 
68 | def _update_log_level(logger_name, logger):
69 |     if logger_name in _log_levels:
70 |         _set_logger_level(logger, _log_levels[logger_name])
71 |     elif "default" in _log_levels:
72 |         _set_logger_level(logger, _log_levels["default"])
73 | 
74 | 
75 | def get_logger(name, name_length=1):
76 |     global _loggers  # pylint: disable=W0603
77 |     splitted = name.split(".")
78 |     logger_name = ".".join(splitted[-name_length:])
79 |     logger = logging.getLogger(logger_name)
80 |     logger = default_logging_config(logger)
81 |     _update_log_level(logger_name, logger)
82 |     _loggers[logger_name] = logger
83 |     return logger
84 | 
85 | 
86 | def set_log_levels(config):
87 |     global _log_levels, _loggers  # pylint: disable=W0603
88 |     _log_levels = {**config}
89 |     for name, logger in _loggers.items():
90 |         _update_log_level(name, logger)
91 | 


--------------------------------------------------------------------------------
/colearn/utils/results.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | from typing import List
19 | import numpy as np
20 | 
21 | 
22 | class Result:
23 |     def __init__(self):
24 |         self.vote = False
25 |         self.votes = []
26 |         self.test_scores = []
27 |         self.vote_scores = []
28 |         self.training_summaries = []
29 |         self.block_proposer = None
30 | 
31 | 
32 | class Results:
33 |     def __init__(self):
34 |         self.data = []  # type: List[Result]
35 | 
36 |         # Data for plots and statistics
37 |         self.h_test_scores = []
38 |         self.h_vote_scores = []
39 | 
40 |         self.mean_test_scores = []
41 |         self.mean_vote_scores = []
42 | 
43 |     def process_statistics(self):
44 |         self.h_test_scores = []
45 |         self.h_vote_scores = []
46 | 
47 |         n_rounds = len(self.data)
48 |         self.mean_test_scores = [
49 |             np.mean(np.array(self.data[r].test_scores)) for r in range(n_rounds)
50 |         ]
51 |         self.mean_vote_scores = [
52 |             np.mean(np.array(self.data[r].vote_scores)) for r in range(n_rounds)
53 |         ]
54 | 
55 |         # gather individual scores
56 |         n_learners = len(self.data[0].vote_scores)
57 |         for i in range(n_learners):
58 |             self.h_test_scores.append(
59 |                 [self.data[r].test_scores[i] for r in range(n_rounds)]
60 |             )
61 |             self.h_vote_scores.append(
62 |                 [self.data[r].vote_scores[i] for r in range(n_rounds)]
63 |             )
64 | 
65 | 
66 | def print_results(results: Results):
67 |     last_result = results.data[-1]
68 |     print("--------------- LATEST ROUND RESULTS -------------")
69 |     print("Selected proposer:\t", last_result.block_proposer)
70 |     print("New model accepted:\t", last_result.vote)
71 |     print("--------------------------------------------------")
72 |     print("learner id\t\tvote\ttest score\t\tvote score")
73 |     for i in range(len(last_result.votes)):
74 |         print(
75 |             "{id}\t\t\t\t{vote}\t{test_score:.3f}\t\t\t{vote_score:.3f}".format(
76 |                 id=i,
77 |                 vote=last_result.votes[i],
78 |                 test_score=last_result.test_scores[i],
79 |                 vote_score=last_result.vote_scores[i],
80 |             )
81 |         )
82 |     print("--------------------------------------------------")
83 |     if len(last_result.training_summaries) != 0:
84 |         print("learner id\t\ttarget privacy budget\t\tconsumed")
85 |         for i, summary in enumerate(last_result.training_summaries):
86 |             print(
87 |                 f"{i}\t\t\t\t{summary.dp_budget.target_epsilon}"
88 |                 f"\t\t\t\t{summary.dp_budget.consumed_epsilon}"
89 |             )
90 |         print("--------------------------------------------------")
91 | 


--------------------------------------------------------------------------------
/colearn_examples/grpc/run_grpc_demo.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import argparse
19 | import json
20 | 
21 | from colearn.training import set_equal_weights, initial_result, collective_learning_round
22 | from colearn.utils.plot import ColearnPlot
23 | from colearn.utils.results import Results, print_results
24 | from colearn_grpc.example_grpc_learner_client import ExampleGRPCLearnerClient
25 | 
26 | cli_args = argparse.ArgumentParser(description='Start multiple GRPC learner servers')
27 | cli_args.add_argument('-p', '--port', type=int, default=9995, help='first server port')
28 | cli_args.add_argument('-n', '--n_learners', type=int, default=5, help='number of learners')
29 | cli_args.add_argument('-d', '--dataloader_tag', type=str, help='dataloader tag')
30 | cli_args.add_argument('-m', '--model_tag', type=str, help='number of learners')
31 | cli_args.add_argument('-l', '--data_locations', type=str,
32 |                       help='A comma-separated list of folders where the data is located. If the list has only one '
33 |                            'item then all the learners will use the same location.')
34 | cli_args.add_argument('-r', "--n_rounds", default=15, type=int, help="Number of training rounds")
35 | 
36 | args = cli_args.parse_args()
37 | 
38 | data_folders = args.data_locations.split(",")
39 | if len(data_folders) == 1:
40 |     data_folders = data_folders * args.n_learners
41 | elif len(data_folders) != args.n_learners:
42 |     raise Exception(f"Not enough data locations given: {data_folders}")
43 | 
44 | # Now make the corresponding grpc clients
45 | all_learner_models = []
46 | for i in range(args.n_learners):
47 |     port = args.port + i
48 |     ml_system = ExampleGRPCLearnerClient(f"client {i}", f"127.0.0.1:{port}")
49 |     ml_system.start()
50 |     dataloader_params = {"location": data_folders[i]}
51 |     ml_system.setup_ml(dataset_loader_name=args.dataloader_tag,
52 |                        dataset_loader_parameters=json.dumps(dataloader_params),
53 |                        model_arch_name=args.model_tag,
54 |                        model_parameters=json.dumps({}))
55 |     all_learner_models.append(ml_system)
56 | 
57 | # now colearn as usual!
58 | set_equal_weights(all_learner_models)
59 | 
60 | # Train the model using Collective Learning
61 | results = Results()
62 | results.data.append(initial_result(all_learner_models))
63 | 
64 | plot = ColearnPlot(score_name="accuracy")
65 | 
66 | n_rounds = args.n_rounds
67 | vote_threshold = 0.5
68 | for round_index in range(n_rounds):
69 |     results.data.append(
70 |         collective_learning_round(all_learner_models,
71 |                                   vote_threshold, round_index)
72 |     )
73 | 
74 |     print_results(results)
75 |     plot.plot_results_and_votes(results)
76 | 
77 | plot.block()
78 | 
79 | print("Colearn Example Finished!")
80 | 
81 | for model in all_learner_models:
82 |     model.stop()
83 | 


--------------------------------------------------------------------------------
/colearn_grpc/scripts/run_grpc_server.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import argparse
19 | from pathlib import Path
20 | import signal
21 | import sys
22 | from prometheus_client import start_http_server
23 | 
24 | from colearn_grpc.example_mli_factory import ExampleMliFactory
25 | from colearn_grpc.grpc_server import GRPCServer
26 | from colearn_grpc.logging import set_log_levels, get_logger
27 | 
28 | # These are imported so that they are registered in the FactoryRegistry
29 | # pylint: disable=W0611
30 | import colearn_keras.keras_mnist  # type:ignore # noqa: F401
31 | import colearn_keras.keras_cifar10  # type:ignore # noqa: F401
32 | import colearn_keras.keras_scania  # type:ignore # noqa: F401
33 | import colearn_pytorch.pytorch_xray  # type:ignore # noqa: F401
34 | import colearn_pytorch.pytorch_covid_xray  # type:ignore # noqa: F401
35 | import colearn_other.fraud_dataset  # type:ignore # noqa: F401
36 | 
37 | _logger = get_logger(__name__)
38 | 
39 | REPO_ROOT = Path(__file__).absolute().parent.parent
40 | 
41 | 
42 | def create_signal_handler(server):
43 |     def signal_handler(sig, frame):
44 |         _logger.info('You pressed Ctrl+C! Killing server...')
45 |         server.stop()
46 |         _logger.info("...done")
47 |         sys.exit(0)
48 | 
49 |     return signal_handler
50 | 
51 | 
52 | def main():
53 |     cli_args = argparse.ArgumentParser(description='Start GRPC learner server')
54 |     cli_args.add_argument('-p', '--port', type=int, default=9995, help='server port')
55 |     cli_args.add_argument('--metrics_port', type=int, default=9091, help='prometheus metrics webserver port')
56 |     cli_args.add_argument('--enable_encryption', action="store_true",
57 |                           help='enable encryption on grpc channel between server and orchestrator')
58 |     cli_args.add_argument('--server_key', type=str, default=REPO_ROOT / "server.key",
59 |                           help='path to server key for encryption( if enabled)')
60 |     cli_args.add_argument('--server_crt', type=str, default=REPO_ROOT / "server.crt",
61 |                           help='path to server certificate for encryption( if enabled)')
62 |     args = cli_args.parse_args()
63 | 
64 |     log_levels = {
65 |         "default": "INFO"
66 |     }
67 | 
68 |     set_log_levels(log_levels)
69 | 
70 |     try:
71 |         start_http_server(args.metrics_port)
72 |     except Exception as e:  # pylint: disable=W0703
73 |         _logger.warning(f"Could not start the Prometheus metrics! Due to: {e}")
74 | 
75 |     server = GRPCServer(mli_factory=ExampleMliFactory(),
76 |                         port=args.port, enable_encryption=args.enable_encryption,
77 |                         server_key=args.server_key, server_crt=args.server_crt
78 |                         )
79 | 
80 |     signal.signal(signal.SIGINT, create_signal_handler(server))
81 | 
82 |     server.run()
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     main()
87 | 


--------------------------------------------------------------------------------
/colearn_grpc/mli_factory_interface.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import abc
19 | from typing import Dict, Set, Any
20 | import os.path
21 | from pkg_resources import get_distribution, DistributionNotFound
22 | 
23 | from colearn.ml_interface import MachineLearningInterface
24 | 
25 | 
26 | class MliFactory(abc.ABC):
27 |     """
28 |     Interface a class must implement to be used as a factory by the GRPC Server
29 |     """
30 |     _version = "0.0.0"
31 | 
32 |     # https://stackoverflow.com/questions/17583443
33 |     try:
34 |         _dist = get_distribution('colearn')
35 |         # Normalize case for Windows systems
36 |         dist_loc = os.path.normcase(_dist.location)
37 |         here = os.path.normcase(__file__)
38 |         if not here.startswith(os.path.join(dist_loc, 'colearn')):
39 |             # not installed, but there is another version that *is*
40 |             raise DistributionNotFound
41 |     except DistributionNotFound:
42 |         pass
43 |     else:
44 |         _version = _dist.version
45 | 
46 |     def get_version(self) -> str:
47 |         """
48 |         Returns the version of this library....
49 |         """
50 |         return self._version
51 | 
52 |     @abc.abstractmethod
53 |     def get_models(self) -> Dict[str, Dict[str, Any]]:
54 |         """
55 |         Returns the models this factory produces.
56 |         The key is the name of the model and the values are their default parameters
57 |         """
58 |         pass
59 | 
60 |     @abc.abstractmethod
61 |     def get_dataloaders(self) -> Dict[str, Dict[str, Any]]:
62 |         """
63 |         Returns the dataloaders this factory produces.
64 |         The key is the name of the dataloader and the values are their default parameters
65 |         """
66 |         pass
67 | 
68 |     @abc.abstractmethod
69 |     def get_compatibilities(self) -> Dict[str, Set[str]]:
70 |         """
71 |         A model is compatible with a dataloader if they can be used together to
72 |         construct a MachineLearningInterface with the get_MLI function.
73 | 
74 |         Returns a dictionary that defines which model is compatible
75 |         with which dataloader.
76 |         """
77 |         pass
78 | 
79 |     @abc.abstractmethod
80 |     def get_mli(self,
81 |                 model_name: str, model_params: str,
82 |                 dataloader_name: str, dataset_params: str) -> MachineLearningInterface:
83 |         """
84 |         @param model_name: name of a model, must be in the set return by get_models
85 |         @param model_params: user defined parameters for the model
86 |         @param dataloader_name: name of a dataloader to be used:
87 |             - must be in the set returned by get_dataloaders
88 |             - must be compatible with model_name as defined by get_compatibilities
89 |         @param dataset_params: user defined parameters for the dataset
90 |         @return: Instance of MachineLearningInterface
91 |         Constructs an object that implements MachineLearningInterface whose
92 |         underlying model is model_name and dataset is loaded by dataloader_name.
93 |         """
94 |         pass
95 | 


--------------------------------------------------------------------------------
/docs/demo.md:
--------------------------------------------------------------------------------
 1 | # How to run the demo
 2 | 
 3 | You can try collective learning for yourself using the simple demo in [run_demo]({{repo_root }}/colearn_examples/ml_interface/run_demo.py).
 4 | This demo creates n learners for one of six learning tasks and co-ordinates the collective learning between them.
 5 | 
 6 | There are six potential models for the demo
 7 | 
 8 | * KERAS_MNIST is the Tensorflow implementation of a small model for the standard handwritten digits recognition dataset
 9 | * KERAS_MNIST_RESNET is the Tensorflow implementation of a Resnet model for the standard handwritten digits recognition dataset
10 | * KERAS_CIFAR10 is the Tensorflow implementation of the classical image recognition dataset
11 | * PYTORCH_XRAY is Pytorch implementation of a binary classification task that requires predicting pneumonia from images of chest X-rays.
12 |   The data need to be downloaded from [Kaggle](https://www.kaggle.com/paultimothymooney/chest-xray-pneumonia)
13 | * PYTORCH_COVID_XRAY is Pytorch implementation of a 3 class classification task that requires predicting no finding, covid or pneumonia from images of chest X-rays.
14 |   This dataset is not currently publicly available.
15 | * FRAUD The fraud dataset consists of information about credit card transactions, and the task is to predict whether
16 |   transactions are fraudulent or not.
17 |   The data need to be downloaded from [Kaggle](https://www.kaggle.com/c/ieee-fraud-detection)
18 | 
19 | Use the -h flag to see the options:
20 | 
21 | ```bash
22 | python -m colearn_examples.ml_interface.run_demo -h
23 | ```
24 | 
25 | Arguments to run the demo:
26 | 
27 | ```
28 | --data_dir:       Directory containing training data, not required for MNIST and CIFAR10
29 | --test_dir:       Optional directory containing test data. A fraction of the training set will be used as a test set when not specified
30 | --model:          Model to train, options are KERAS_MNIST KERAS_MNIST_RESNET KERAS_CIFAR10 PYTORCH_XRAY PYTORCH_COVID_XRAY FRAUD
31 | --n_learners:     Number of individual learners
32 | --n_rounds:       Number of training rounds
33 | --vote_threshold: Minimum fraction of positive votes to accept the new model
34 | --train_ratio:    Fraction of training dataset to be used as test-set when no test-set is specified
35 | --seed:           Seed for initialising model and shuffling datasets
36 | --learning_rate:  Learning rate for optimiser
37 | --batch_size:     Size of training batch
38 | ```
39 | 
40 | ## Running MNIST
41 | 
42 | The simplest task to run is MNIST because the data are downloaded automatically from `tensorflow_datasets`.
43 | The command below runs the MNIST task with five learners for 15 rounds.
44 | 
45 | ```bash
46 | python -m colearn_examples.ml_interface.run_demo --model KERAS_MNIST --n_learners 5 --n_rounds 15
47 | ```
48 | 
49 | You should see a graph of the vote score and the test score (the score used here is categorical accuracy).
50 | The new model is accepted if the fraction of positive votes (green colour) is higher than 0.5.
51 | The new model is rejected if the fraction of negative votes (red color) is lower than 0.5.
52 | 
53 | ![Alt text](images/mnist_plot.png?raw=true "Collective learning graph")
54 | 
55 | As you can see, there are five learners, and initially they perform poorly.
56 | In round one, learner 0 is selected to propose a new set of weights.
57 | 
58 | ## Other datasets
59 | 
60 | To run the CIFAR10 dataset:
61 | 
62 | ```bash
63 | python -m colearn_examples.ml_interface.run_demo --model KERAS_CIFAR10 --n_learners 5 --n_rounds 15
64 | ```
65 | 
66 | The Fraud and X-ray datasets need to be downloaded from kaggle (this requires a kaggle account).
67 | To run the fraud dataset:
68 | 
69 | ```bash
70 | python -m colearn_examples.ml_interface.run_demo --model FRAUD --n_learners 5 --n_rounds 15 --data_dir ./data/fraud
71 | ```
72 | 
73 | To run the X-ray dataset:
74 | 
75 | ```bash
76 | python -m colearn_examples.ml_interface.run_demo --model PYTORCH_XRAY --n_learners 5 --n_rounds 15 --data_dir ./data/xray
77 | ```
78 | 


--------------------------------------------------------------------------------
/docs/python_src/mnist_pytorch.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | from torchsummary import summary
 19 | from torchvision import transforms, datasets
 20 | import torch.utils.data
 21 | 
 22 | import torch.nn as nn
 23 | import torch.nn.functional as nn_func
 24 | 
 25 | # define some constants
 26 | batch_size = 64
 27 | seed = 42
 28 | n_rounds = 20
 29 | train_fraction = 0.9
 30 | learning_rate = 0.001
 31 | height = 28
 32 | width = 28
 33 | n_classes = 10
 34 | num_test_batches = 10
 35 | 
 36 | no_cuda = False
 37 | cuda = not no_cuda and torch.cuda.is_available()
 38 | device = torch.device("cuda" if cuda else "cpu")
 39 | kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
 40 | 
 41 | # Load the data
 42 | data = datasets.MNIST('/tmp/mnist', transform=transforms.ToTensor(), download=True)
 43 | n_train = int(train_fraction * len(data))
 44 | n_test = len(data) - n_train
 45 | train_data, test_data = torch.utils.data.random_split(data, [n_train, n_test])
 46 | 
 47 | train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, **kwargs)
 48 | test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True, **kwargs)
 49 | 
 50 | 
 51 | # Define the model
 52 | class Net(nn.Module):
 53 |     def __init__(self):
 54 |         super(Net, self).__init__()
 55 |         self.conv1 = nn.Conv2d(1, 20, 5, 1)
 56 |         self.conv2 = nn.Conv2d(20, 50, 5, 1)
 57 |         self.fc1 = nn.Linear(4 * 4 * 50, 500)
 58 |         self.fc2 = nn.Linear(500, n_classes)
 59 | 
 60 |     def forward(self, x):
 61 |         x = nn_func.relu(self.conv1(x.view(-1, 1, height, width)))
 62 |         x = nn_func.max_pool2d(x, 2, 2)
 63 |         x = nn_func.relu(self.conv2(x))
 64 |         x = nn_func.max_pool2d(x, 2, 2)
 65 |         x = x.view(-1, 4 * 4 * 50)
 66 |         x = nn_func.relu(self.fc1(x))
 67 |         x = self.fc2(x)
 68 |         return nn_func.log_softmax(x, dim=1)
 69 | 
 70 | 
 71 | model = Net()
 72 | opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
 73 | criterion = torch.nn.NLLLoss()
 74 | 
 75 | # Train and evaluate the model
 76 | for round in range(n_rounds):
 77 |     # train model
 78 |     model.train()
 79 | 
 80 |     for batch_idx, (data, labels) in enumerate(train_dataloader):
 81 |         opt.zero_grad()
 82 | 
 83 |         # Data needs to be on same device as model
 84 |         data = data.to(device)
 85 |         labels = labels.to(device)
 86 | 
 87 |         output = model(data)
 88 | 
 89 |         loss = criterion(output, labels)
 90 |         loss.backward()
 91 |         opt.step()
 92 | 
 93 |     # evaluate model
 94 |     model.eval()
 95 |     total_score = 0
 96 |     all_labels = []
 97 |     all_outputs = []
 98 |     with torch.no_grad():
 99 |         for batch_idx, (data, labels) in enumerate(test_dataloader):
100 |             if batch_idx == num_test_batches:
101 |                 break
102 |             data = data.to(device)
103 |             labels = labels.to(device)
104 |             output = model(data)
105 |             total_score += criterion(output, labels)
106 |     avg_loss = float(total_score / (num_test_batches * batch_size))
107 |     print(f"Average loss at round {round} is {avg_loss}")
108 | 


--------------------------------------------------------------------------------
/docs/intro_tutorial_keras.md:
--------------------------------------------------------------------------------
  1 | # Using collective learning with keras
  2 | 
  3 | This tutorial is a simple guide to trying out the collective learning protocol with your
  4 | own machine learning code. Everything runs locally.
  5 | 
  6 | The most flexible way to use the collective learning backends is to make a class that implements
  7 | the Collective Learning `MachineLearningInterface` defined in [ml_interface.py]({{ repo_root }}/colearn/ml_interface.py).
  8 | For more details on how to use the `MachineLearningInterface` see [here](./intro_tutorial_mli.md)
  9 | 
 10 | However, the simpler way is to use one of the helper classes that we have provided that implement
 11 | most of the interface for popular ML libraries.
 12 | In this tutorial we are going to walk through using the `KerasLearner`.
 13 | First we are going to define the model architecture, then
 14 | we are going to load the data and configure the model, and then we will run Collective Learning.
 15 | 
 16 | A standard script for machine learning with Keras looks like the one below
 17 | 
 18 | ```Python
 19 | {!python_src/mnist_keras.py!}
 20 | ```
 21 | 
 22 | There are three steps:
 23 | 
 24 | 1. Load the data
 25 | 2. Define the model
 26 | 3. Train the model
 27 | 
 28 | In this tutorial we are going to see how to modify each step to use collective learning.
 29 | We'll end up with code like this:
 30 | 
 31 | ```Python
 32 | {!../colearn_examples/ml_interface/keras_mnist.py!}
 33 | ```
 34 | 
 35 | The first thing is to modify the data loading code.
 36 | Each learner needs to have their own training and testing set from the data.
 37 | This is easy to do with keras:
 38 | 
 39 | ```Python
 40 | train_datasets = [train_dataset.shard(num_shards=n_learners, index=i) for i in range(n_learners)]
 41 | ```
 42 | 
 43 | The model definition is very similar too, except that each learner will need its own copy of the model,
 44 | so we've moved it into a function.
 45 | 
 46 | To use collective learning, we need to create an object that implements the MachineLearningInterface.
 47 | To make it easier to use the `MachineLearningInterface` with keras, we've defined `KerasLearner`.
 48 | `KerasLearner` implements standard training and evaluation routines as well as the MachineLearningInterface methods.
 49 | 
 50 | ```Python
 51 | {!../colearn_keras/keras_learner.py!}
 52 | ```
 53 | 
 54 | We create a set of KerasLearners by passing in the model and the datasets:
 55 | 
 56 | ```Python
 57 | all_learner_models = []
 58 | for i in range(n_learners):
 59 |     all_learner_models.append(KerasLearner(
 60 |         model=get_model(),
 61 |         train_loader=train_datasets[i],
 62 |         vote_loader=vote_datasets[i],
 63 |         test_loader=test_datasets[i],
 64 |         criterion="sparse_categorical_accuracy",
 65 |         minimise_criterion=False,
 66 |         model_evaluate_kwargs={"steps": vote_batches},
 67 |     ))
 68 | ```
 69 | 
 70 | Then we give all the models the same weights to start off with:
 71 | 
 72 | ```Python
 73 | set_equal_weights(all_learner_models)
 74 | ```
 75 | 
 76 | And then we can move on to the final stage, which is training with Collective Learning.
 77 | The function `collective_learning_round` performs one round of collective learning.
 78 | One learner is selected to train and propose an update.
 79 | The other learners vote on the update, and if the vote passes then the update is accepted.
 80 | Then a new round begins.
 81 | 
 82 | ```Python
 83 | # Train the model using Collective Learning
 84 | results = Results()
 85 | results.data.append(initial_result(all_learner_models))
 86 | 
 87 | for round in range(n_rounds):
 88 |     results.data.append(
 89 |         collective_learning_round(all_learner_models,
 90 |                                   vote_threshold, round)
 91 |     )
 92 | 
 93 |     plot_results(results, n_learners, block=False,
 94 |                  score_name=all_learner_models[0].criterion)
 95 |     plot_votes(results, block=False)
 96 | 
 97 | plot_results(results, n_learners, block=False,
 98 |              score_name=all_learner_models[0].criterion)
 99 | plot_votes(results, block=True)
100 | ```
101 | 


--------------------------------------------------------------------------------
/tests/check_copyright_notice.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # ------------------------------------------------------------------------------
  3 | #
  4 | #   Copyright 2021 Fetch.AI Limited
  5 | #
  6 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  7 | #   License, Version 4.0 (the "License"); you may not use this file except in
  8 | #   compliance with the License. You may obtain a copy of the License at
  9 | #
 10 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 11 | #
 12 | #   Unless required by applicable law or agreed to in writing, software
 13 | #   distributed under the License is distributed on an "AS IS" BASIS,
 14 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | #   See the License for the specific language governing permissions and
 16 | #   limitations under the License.
 17 | #
 18 | # ------------------------------------------------------------------------------
 19 | """
 20 | This script checks that all the Python files of the repository have:
 21 | 
 22 | - (optional) the Python shebang
 23 | - the encoding header;
 24 | - the copyright notice;
 25 | 
 26 | It is assumed the script is run from the repository root.
 27 | """
 28 | 
 29 | import itertools
 30 | import re
 31 | import sys
 32 | from pathlib import Path
 33 | 
 34 | 
 35 | HEADER_REGEX = r"""(#!/usr/bin/env python3
 36 | )?# ------------------------------------------------------------------------------
 37 | #
 38 | #   (Copyright 2021 Fetch.AI Limited|Copyright [0-9]{4}(-[0-9]{4})? [a-zA-Z_]+)
 39 | #
 40 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 41 | #   License, Version 4\.0 \(the \"License\"\); you may not use this file except in
 42 | #   compliance with the License\. You may obtain a copy of the License at
 43 | #
 44 | #       http://creativecommons\.org/licenses/by-nc/4\.0/legalcode
 45 | #
 46 | #   Unless required by applicable law or agreed to in writing, software
 47 | #   distributed under the License is distributed on an \"AS IS\" BASIS,
 48 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied\.
 49 | #   See the License for the specific language governing permissions and
 50 | #   limitations under the License\.
 51 | #
 52 | # ------------------------------------------------------------------------------
 53 | """
 54 | 
 55 | 
 56 | def check_copyright(file: Path) -> bool:
 57 |     """
 58 |     Given a file, check if the header stuff is in place.
 59 | 
 60 |     Return True if the files has the encoding header and the copyright notice,
 61 |     optionally prefixed by the shebang. Return False otherwise.
 62 | 
 63 |     :param file: the file to check.
 64 |     :return True if the file is compliant with the checks, False otherwise.
 65 |     """
 66 |     content = file.read_text()
 67 |     header_regex = re.compile(HEADER_REGEX, re.MULTILINE)
 68 |     return re.match(header_regex, content) is not None
 69 | 
 70 | 
 71 | if __name__ == "__main__":
 72 |     python_files = itertools.chain(
 73 |         Path("colearn").glob("**/*.py"),
 74 |         Path("colearn_grpc").glob("*.py"),
 75 |         Path("colearn_keras").glob("**/*.py"),
 76 |         Path("colearn_other").glob("**/*.py"),
 77 |         Path("colearn_pytorch").glob("**/*.py"),
 78 |         Path("docker").glob("**/*.py"),
 79 |         Path("docs").glob("**/*.py"),
 80 |         Path("colearn_examples").glob("**/*.py"),
 81 |         Path("grpc").glob("**/*.py"),
 82 |         Path("tests").glob("**/*.py"),
 83 |         [Path("setup.py")],
 84 |     )
 85 | 
 86 |     # filter out protobuf files (*_pb2.py)
 87 |     python_files_filtered = filter(
 88 |         lambda x: not str(x).endswith("_pb2.py"), python_files
 89 |     )
 90 | 
 91 |     bad_files = [
 92 |         filepath for filepath in python_files_filtered if not check_copyright(filepath)
 93 |     ]
 94 | 
 95 |     if len(bad_files) > 0:
 96 |         print("The following files are not well formatted:")
 97 |         print("\n".join(map(str, bad_files)))
 98 |         sys.exit(1)
 99 |     else:
100 |         print("OK")
101 |         sys.exit(0)
102 | 


--------------------------------------------------------------------------------
/colearn_grpc/factory_registry.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | from inspect import signature
19 | from typing import Callable, Dict, Any, List, NamedTuple
20 | 
21 | 
22 | class RegistryException(Exception):
23 |     pass
24 | 
25 | 
26 | def _get_defaults(to_call: Callable) -> Dict[str, Any]:
27 |     return {param.name: param.default
28 |             for param in signature(to_call).parameters.values()
29 |             if param.default != param.empty}
30 | 
31 | 
32 | def check_dataloader_callable(to_call: Callable):
33 |     sig = signature(to_call)
34 |     if "location" not in sig.parameters:
35 |         raise RegistryException("dataloader must accept a 'location' parameter")
36 | 
37 | 
38 | class FactoryRegistry:
39 |     class DataloaderDef(NamedTuple):
40 |         callable: Callable
41 |         default_parameters: Dict[str, Any]
42 | 
43 |     dataloaders: Dict[str, DataloaderDef] = {}
44 | 
45 |     class ModelArchitectureDef(NamedTuple):
46 |         callable: Callable
47 |         default_parameters: Dict[str, Any]
48 |         compatibilities: List[str]
49 | 
50 |     model_architectures: Dict[str, ModelArchitectureDef] = {}
51 | 
52 |     @classmethod
53 |     def register_dataloader(cls, name: str):
54 |         def wrap(dataloader: Callable):
55 |             check_dataloader_callable(dataloader)
56 |             if name in cls.dataloaders:
57 |                 print(f"Warning: {name} already registered. Replacing with {dataloader.__name__}")
58 |             cls.dataloaders[name] = cls.DataloaderDef(
59 |                 callable=dataloader,
60 |                 default_parameters=_get_defaults(dataloader))
61 |             return dataloader
62 | 
63 |         return wrap
64 | 
65 |     @classmethod
66 |     def register_model_architecture(cls, name: str, compatibilities: List[str]):
67 |         def wrap(model_arch_creator: Callable):
68 |             cls.check_model_callable(model_arch_creator, compatibilities)
69 |             if name in cls.model_architectures:
70 |                 print(f"Warning: {name} already registered. Replacing with {model_arch_creator.__name__}")
71 |             cls.model_architectures[name] = cls.ModelArchitectureDef(
72 |                 callable=model_arch_creator,
73 |                 default_parameters=_get_defaults(model_arch_creator),
74 |                 compatibilities=compatibilities)
75 | 
76 |             return model_arch_creator
77 | 
78 |         return wrap
79 | 
80 |     @classmethod
81 |     def check_model_callable(cls, to_call: Callable, compatibilities: List[str]):
82 |         sig = signature(to_call)
83 |         if "data_loaders" not in sig.parameters:
84 |             raise RegistryException("model must accept a 'data_loaders' parameter")
85 |         model_dl_type = sig.parameters["data_loaders"].annotation
86 |         for dl in compatibilities:
87 |             if dl not in cls.dataloaders:
88 |                 raise RegistryException(f"Compatible dataloader {dl} is not registered. The dataloader needs to be "
89 |                                         "registered before the model that references it.")
90 |             dl_type = signature(cls.dataloaders[dl].callable).return_annotation
91 |             if not dl_type == model_dl_type:
92 |                 raise RegistryException(f"Compatible dataloader {dl} has return type {dl_type}"
93 |                                         f" but model data_loaders expects type {model_dl_type}")
94 | 


--------------------------------------------------------------------------------
/docs/intro_tutorial_pytorch.md:
--------------------------------------------------------------------------------
  1 | # Using collective learning with pytorch
  2 | 
  3 | This tutorial is a simple guide to trying out the collective learning protocol with your
  4 | own machine learning code. Everything runs locally.
  5 | 
  6 | The most flexible way to use the collective learning backends is to make a class that implements
  7 | the Collective Learning `MachineLearningInterface` defined in [ml_interface.py]({{ repo_root }}/colearn/ml_interface.py).
  8 | For more details on how to use the `MachineLearningInterface` see [here](./intro_tutorial_mli.md)
  9 | 
 10 | However, the simpler way is to use one of the helper classes that we have provided that implement
 11 | most of the interface for popular ML libraries.
 12 | In this tutorial we are going to walk through using the `PytorchLearner`.
 13 | First we are going to define the model architecture, then
 14 | we are going to load the data and configure the model, and then we will run Collective Learning.
 15 | 
 16 | A standard script for machine learning with Pytorch looks like the one below
 17 | 
 18 | ```Python
 19 | {!python_src/mnist_pytorch.py!}
 20 | ```
 21 | 
 22 | There are three steps:
 23 | 
 24 | 1. Load the data
 25 | 2. Define the model
 26 | 3. Train the model
 27 | 
 28 | In this tutorial we are going to see how to modify each step to use collective learning.
 29 | We'll end up with code like this:
 30 | 
 31 | ```Python
 32 | {!../colearn_examples/ml_interface/pytorch_mnist.py!}
 33 | ```
 34 | 
 35 | The first thing is to modify the data loading code.
 36 | Each learner needs to have their own training and testing set from the data.
 37 | This is easy to do with the pytorch random_split utility:
 38 | 
 39 | ```Python
 40 | data_split = [len(test_data) // n_learners] * n_learners
 41 | learner_test_data = torch.utils.data.random_split(test_data, data_split)
 42 | ```
 43 | 
 44 | The model definition is the same as before.
 45 | To use collective learning, we need to create an object that implements the MachineLearningInterface.
 46 | To make it easier to use the `MachineLearningInterface` with pytorch, we've defined `PytorchLearner`.
 47 | `PytorchLearner` implements standard training and evaluation routines as well as the MachineLearningInterface methods.
 48 | 
 49 | ```Python
 50 | {!../colearn_pytorch/pytorch_learner.py!}
 51 | ```
 52 | 
 53 | We create a set of PytorchLearners by passing in the model and the datasets:
 54 | 
 55 | ```Python
 56 | all_learner_models = []
 57 | for i in range(n_learners):
 58 |     model = Net()
 59 |     opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
 60 |     learner = PytorchLearner(
 61 |         model=model,
 62 |         train_loader=learner_train_dataloaders[i],
 63 |         vote_loader=learner_vote_dataloaders[i],
 64 |         test_loader=learner_test_dataloaders[i],
 65 |         device=device,
 66 |         optimizer=opt,
 67 |         criterion=torch.nn.NLLLoss(),
 68 |         num_test_batches=vote_batches,
 69 |         vote_criterion=categorical_accuracy,
 70 |         minimise_criterion=False
 71 |     )
 72 | 
 73 |     all_learner_models.append(learner)
 74 | ```
 75 | 
 76 | Then we give all the models the same weights to start off with:
 77 | 
 78 | ```Python
 79 | set_equal_weights(all_learner_models)
 80 | ```
 81 | 
 82 | And then we can move on to the final stage, which is training with Collective Learning.
 83 | The function `collective_learning_round` performs one round of collective learning.
 84 | One learner is selected to train and propose an update.
 85 | The other learners vote on the update, and if the vote passes then the update is accepted.
 86 | Then a new round begins.
 87 | 
 88 | ```Python
 89 | # Train the model using Collective Learning
 90 | results = Results()
 91 | results.data.append(initial_result(all_learner_models))
 92 | 
 93 | for round in range(n_rounds):
 94 |     results.data.append(
 95 |         collective_learning_round(all_learner_models,
 96 |                                   vote_threshold, round)
 97 |     )
 98 |     
 99 |     plot_results(results, n_learners, score_name=score_name)
100 |     plot_votes(results)
101 | 
102 | # Plot the final result with votes
103 | plot_results(results, n_learners, score_name=score_name)
104 | plot_votes(results, block=True)
105 | 
106 | ```
107 | 


--------------------------------------------------------------------------------
/colearn/utils/data.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | import os
 19 | from pathlib import Path
 20 | from typing import List, Union
 21 | 
 22 | from google.cloud import storage
 23 | import numpy as np
 24 | 
 25 | GAUTH_ENV_VAR_NAME = "GOOGLE_APPLICATION_CREDENTIALS"
 26 | 
 27 | 
 28 | def split_list_into_fractions(input_list: Union[List, np.ndarray],
 29 |                               fractions_list: List,
 30 |                               min_part_size=1):
 31 |     split_list = []
 32 |     start_index = 0
 33 |     n_indices = len(input_list)
 34 |     for frac in fractions_list:
 35 |         end_index = start_index + int(n_indices * frac)
 36 |         if end_index >= n_indices:
 37 |             end_index = n_indices
 38 | 
 39 |         if end_index - start_index < min_part_size:
 40 |             raise Exception("Insufficient data in this part")
 41 | 
 42 |         split_list.append(input_list[start_index: end_index])
 43 |         start_index = end_index
 44 | 
 45 |     return split_list
 46 | 
 47 | 
 48 | def get_data(data_dir: str, download_to='/tmp/data_download'):
 49 |     """
 50 |     Gets data, either from local filesystem or a google cloud bucket
 51 | 
 52 |     @param data_dir: path to data. If prefix is "gs://" data will be downloaded. If
 53 |       it is "file://" then it will be stripped off.
 54 |     @param download_to: if data is downloaded where it will be downloaded to/is
 55 |     @return: Full path to either local data or to the downloaded data
 56 | 
 57 |     For more information on how to setup the google cloud bucket see the dev notes
 58 |     """
 59 | 
 60 |     if str(data_dir).startswith("gs://"):
 61 |         return _download_data_from_gcloud(data_dir, download_to)
 62 | 
 63 |     if str(data_dir).startswith("file://"):
 64 |         return str(data_dir).split("file://")[1]
 65 | 
 66 |     return data_dir
 67 | 
 68 | 
 69 | def _download_data_from_gcloud(cloud_data_dir, local_data_dir):
 70 |     """
 71 |     Downloads data from a gcloud bucket to local filesystem
 72 | 
 73 |     @param cloud_data_dir: path in google cloud bucket
 74 |     @param local_data_dir: path to where the data will be downloaded
 75 |     @return: Full path to downloaded data
 76 |     """
 77 |     bucket_name = cloud_data_dir.replace('gs://', '')
 78 |     bucket_name, prefix = bucket_name.split('/', 1)
 79 |     print(f"Downloading data from google cloud: Bucket {bucket_name}, prefix {prefix}")
 80 | 
 81 |     if len(os.getenv(GAUTH_ENV_VAR_NAME, "")) > 0:
 82 |         storage_client = storage.Client()
 83 |     else:
 84 |         storage_client = storage.client.Client.create_anonymous_client()
 85 |     bucket = storage_client.bucket(bucket_name=bucket_name)
 86 |     blobs = bucket.list_blobs(prefix=prefix)  # Get list of files
 87 | 
 88 |     local_full_path = Path(local_data_dir) / prefix
 89 |     file_counter = 0
 90 |     for blob in blobs:
 91 |         filename = blob.name
 92 | 
 93 |         if blob.size == 0:
 94 |             print(f"Skipping empty file {filename}")
 95 |             continue
 96 | 
 97 |         local_filename = Path(local_data_dir) / filename
 98 |         os.makedirs(local_filename.parent, exist_ok=True)
 99 | 
100 |         blob.download_to_filename(local_filename)  # Download
101 |         file_counter += 1
102 | 
103 |     if file_counter == 0:
104 |         raise Exception("No data in folder: " + cloud_data_dir)
105 | 
106 |     return local_full_path
107 | 


--------------------------------------------------------------------------------
/colearn_grpc/scripts/run_n_grpc_servers.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import argparse
19 | import os
20 | import signal
21 | import sys
22 | from multiprocessing.context import Process
23 | 
24 | from prometheus_client import start_http_server
25 | 
26 | from colearn_grpc.example_mli_factory import ExampleMliFactory
27 | from colearn_grpc.grpc_server import GRPCServer
28 | from colearn_grpc.logging import set_log_levels, get_logger
29 | 
30 | # to run tensorflow in multiple processes on the same machine, GPU must be switched off
31 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
32 | 
33 | # These are imported so that they are registered in the FactoryRegistry
34 | # pylint: disable=W0611
35 | # pylint: disable=C0413
36 | import colearn_keras.keras_mnist  # type:ignore # noqa: F401  # pylint: disable=C0413
37 | import colearn_keras.keras_cifar10  # type:ignore # noqa: F401  # pylint: disable=C0413
38 | import colearn_keras.keras_scania  # type:ignore # noqa: F401  # pylint: disable=C0413
39 | import colearn_pytorch.pytorch_xray  # type:ignore # noqa: F401  # pylint: disable=C0413
40 | import colearn_pytorch.pytorch_covid_xray  # type:ignore # noqa: F401  # pylint: disable=C0413
41 | import colearn_other.fraud_dataset  # type:ignore # noqa: F401  # pylint: disable=C0413
42 | 
43 | _logger = get_logger(__name__)
44 | 
45 | 
46 | def run_grpc_server(grpc_server, metrics_port):
47 |     # this function runs in a new process and starts the grpc server and monitoring
48 |     if metrics_port is not None:
49 |         start_http_server(metrics_port)
50 | 
51 |     def signal_handler(sig, frame):
52 |         _logger.info('Received sigterm. Killing server...')
53 |         grpc_server.stop()
54 |         _logger.info("...done")
55 |         sys.exit(0)
56 | 
57 |     signal.signal(signal.SIGTERM, signal_handler)
58 |     server.run()
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     cli_args = argparse.ArgumentParser(description='Start multiple GRPC learner servers')
63 |     cli_args.add_argument('-p', '--port', type=int, default=9995, help='first server port')
64 |     cli_args.add_argument('-m', '--metrics_port', type=int, default=0,
65 |                           help='first prometheus metrics webserver port. 0 means no metrics server.')
66 |     cli_args.add_argument('-n', '--n_learners', type=int, default=5, help='number of learners')
67 | 
68 |     args = cli_args.parse_args()
69 | 
70 |     log_levels = {"default": "INFO"}
71 |     set_log_levels(log_levels)
72 | 
73 |     child_processes = []
74 |     for i in range(args.n_learners):
75 |         port = args.port + i
76 |         if args.metrics_port != 0:
77 |             metrics_port = args.metrics_port + i
78 |         else:
79 |             metrics_port = None
80 |         server = GRPCServer(mli_factory=ExampleMliFactory(),
81 |                             port=port)
82 |         server_process = Process(target=run_grpc_server,
83 |                                  kwargs={"grpc_server": server, "metrics_port": metrics_port})
84 | 
85 |         print("starting server", i)
86 |         server_process.start()
87 |         child_processes.append(server_process)
88 | 
89 |     def signal_handler(sig, frame):
90 |         _logger.info('You pressed Ctrl+C! Killing child servers.')
91 |         for child in child_processes:
92 |             child.terminate()
93 |         _logger.info("...done")
94 |         sys.exit(0)
95 | 
96 |     signal.signal(signal.SIGINT, signal_handler)
97 |     signal.signal(signal.SIGTERM, signal_handler)
98 | 


--------------------------------------------------------------------------------
/colearn/ml_interface.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | import abc
 19 | from enum import Enum
 20 | from typing import Any, Optional
 21 | 
 22 | import onnx
 23 | import onnxmltools
 24 | import sklearn
 25 | import tensorflow as tf
 26 | import torch
 27 | from pydantic import BaseModel
 28 | from tensorflow import keras
 29 | 
 30 | model_classes_keras = (tf.keras.Model, keras.Model, tf.estimator.Estimator)
 31 | model_classes_scipy = (torch.nn.Module)
 32 | model_classes_sklearn = (sklearn.base.ClassifierMixin)
 33 | 
 34 | 
 35 | def convert_model_to_onnx(model: Any):
 36 |     """
 37 |     Helper function to convert a ML model to onnx format
 38 |     """
 39 |     if isinstance(model, model_classes_keras):
 40 |         return onnxmltools.convert_keras(model)
 41 |     if isinstance(model, model_classes_sklearn):
 42 |         return onnxmltools.convert_sklearn(model)
 43 |     if 'xgboost' in model.__repr__():
 44 |         return onnxmltools.convert_sklearn(model)
 45 |     if isinstance(model, model_classes_scipy):
 46 |         raise Exception("Pytorch models not yet supported to onnx")
 47 |     else:
 48 |         raise Exception("Attempt to convert unsupported model to onnx: {model}")
 49 | 
 50 | 
 51 | class DiffPrivBudget(BaseModel):
 52 |     target_epsilon: float
 53 |     target_delta: float
 54 |     consumed_epsilon: float
 55 |     consumed_delta: float
 56 | 
 57 | 
 58 | class ErrorCodes(Enum):
 59 |     DP_BUDGET_EXCEEDED = 1
 60 | 
 61 | 
 62 | class TrainingSummary(BaseModel):
 63 |     dp_budget: Optional[DiffPrivBudget]
 64 |     error_code: Optional[ErrorCodes]
 65 | 
 66 | 
 67 | class Weights(BaseModel):
 68 |     weights: Any
 69 |     training_summary: Optional[TrainingSummary]
 70 | 
 71 | 
 72 | class DiffPrivConfig(BaseModel):
 73 |     target_epsilon: float
 74 |     target_delta: float
 75 |     max_grad_norm: float
 76 |     noise_multiplier: float
 77 | 
 78 | 
 79 | class ProposedWeights(BaseModel):
 80 |     weights: Weights
 81 |     vote_score: float
 82 |     test_score: float
 83 |     vote: Optional[bool]
 84 | 
 85 | 
 86 | class ModelFormat(Enum):
 87 |     PICKLE_WEIGHTS_ONLY = 1
 88 |     ONNX = 2
 89 | 
 90 | 
 91 | class ColearnModel(BaseModel):
 92 |     model_format: ModelFormat
 93 |     model_file: Optional[str]
 94 |     model: Optional[Any]
 95 | 
 96 | 
 97 | def deser_model(model: Any) -> onnx.ModelProto:
 98 |     """
 99 |     Helper function to recover a onnx model from its deserialized form
100 |     """
101 |     return onnx.load_model_from_string(model)
102 | 
103 | 
104 | class MachineLearningInterface(abc.ABC):
105 |     @abc.abstractmethod
106 |     def mli_propose_weights(self) -> Weights:
107 |         """
108 |         Trains the model. Returns new weights. Does not change the current weights of the model.
109 |         """
110 |         pass
111 | 
112 |     @abc.abstractmethod
113 |     def mli_test_weights(self, weights: Weights) -> ProposedWeights:
114 |         """
115 |         Tests the proposed weights and fills in the rest of the fields
116 |         """
117 | 
118 |     @abc.abstractmethod
119 |     def mli_accept_weights(self, weights: Weights):
120 |         """
121 |         Updates the model with the proposed set of weights
122 |         :param weights: The new weights
123 |         """
124 |         pass
125 | 
126 |     @abc.abstractmethod
127 |     def mli_get_current_weights(self) -> Weights:
128 |         """
129 |         Returns the current weights of the model
130 |         """
131 |         pass
132 | 
133 |     @abc.abstractmethod
134 |     def mli_get_current_model(self) -> ColearnModel:
135 |         """
136 |         Returns the current model
137 |         """
138 |         pass
139 | 


--------------------------------------------------------------------------------
/colearn_grpc/example_mli_factory.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | #
 3 | #   Copyright 2021 Fetch.AI Limited
 4 | #
 5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
 6 | #   License, Version 4.0 (the "License"); you may not use this file except in
 7 | #   compliance with the License. You may obtain a copy of the License at
 8 | #
 9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
10 | #
11 | #   Unless required by applicable law or agreed to in writing, software
12 | #   distributed under the License is distributed on an "AS IS" BASIS,
13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #   See the License for the specific language governing permissions and
15 | #   limitations under the License.
16 | #
17 | # ------------------------------------------------------------------------------
18 | import copy
19 | import json
20 | from typing import Set, Dict, Any
21 | 
22 | from colearn.ml_interface import DiffPrivConfig, MachineLearningInterface
23 | from colearn_grpc.mli_factory_interface import MliFactory
24 | from colearn_grpc.factory_registry import FactoryRegistry
25 | from colearn_grpc.logging import get_logger
26 | 
27 | _logger = get_logger(__name__)
28 | 
29 | 
30 | class ExampleMliFactory(MliFactory):
31 | 
32 |     def __init__(self):
33 |         self.models = {name: config.default_parameters for name, config
34 |                        in FactoryRegistry.model_architectures.items()}
35 |         self.dataloaders = {name: config.default_parameters for name, config
36 |                             in FactoryRegistry.dataloaders.items()}
37 | 
38 |         self.compatibilities = {name: config.compatibilities for name, config
39 |                                 in FactoryRegistry.model_architectures.items()}
40 | 
41 |     def get_models(self) -> Dict[str, Dict[str, Any]]:
42 |         return copy.deepcopy(self.models)
43 | 
44 |     def get_dataloaders(self) -> Dict[str, Dict[str, Any]]:
45 |         return copy.deepcopy(self.dataloaders)
46 | 
47 |     def get_compatibilities(self) -> Dict[str, Set[str]]:
48 |         return self.compatibilities
49 | 
50 |     def get_mli(self, model_name: str, model_params: str, dataloader_name: str,
51 |                 dataset_params: str) -> MachineLearningInterface:
52 | 
53 |         print("Call to get_mli")
54 |         print(f"model_name {model_name} -> params: {model_params}")
55 |         print(f"dataloader_name {dataloader_name} -> params: {dataset_params}")
56 | 
57 |         if model_name not in self.models:
58 |             raise Exception(f"Model {model_name} is not a valid model. "
59 |                             f"Available models are: {self.models}")
60 |         if dataloader_name not in self.dataloaders:
61 |             raise Exception(f"Dataloader {dataloader_name} is not a valid dataloader. "
62 |                             f"Available dataloaders are: {self.dataloaders}")
63 |         if dataloader_name not in self.compatibilities[model_name]:
64 |             raise Exception(f"Dataloader {dataloader_name} is not compatible with {model_name}."
65 |                             f"Compatible dataloaders are: {self.compatibilities[model_name]}")
66 | 
67 |         dataloader_config = copy.deepcopy(self.dataloaders[dataloader_name])  # Default parameters
68 |         dataloader_new_config = json.loads(dataset_params)
69 |         for key in dataloader_new_config.keys():
70 |             if key in dataloader_config or key == "location":
71 |                 dataloader_config[key] = dataloader_new_config[key]
72 |             else:
73 |                 _logger.warning(f"Key {key} was included in the dataloader params but this dataloader "
74 |                                 f"({dataloader_name}) does not accept it.")
75 | 
76 |         prepare_data_loaders = FactoryRegistry.dataloaders[dataloader_name][0]
77 |         data_loaders = prepare_data_loaders(**dataloader_config)
78 | 
79 |         model_config = copy.deepcopy(self.models[model_name])  # Default parameters
80 |         model_new_config = json.loads(model_params)
81 |         for key in model_new_config.keys():
82 |             if key in model_config:
83 |                 model_config[key] = model_new_config[key]
84 |             else:
85 |                 _logger.warning(f"Key {key} was included in the model params but this model ({model_name}) does not "
86 |                                 "accept it.")
87 |         if "diff_priv_config" in model_config:
88 |             c = model_config["diff_priv_config"]
89 |             if c is not None:
90 |                 model_config["diff_priv_config"] = DiffPrivConfig(**c)
91 |         prepare_learner = FactoryRegistry.model_architectures[model_name][0]
92 | 
93 |         return prepare_learner(data_loaders=data_loaders, **model_config)
94 | 


--------------------------------------------------------------------------------
/colearn_keras/test_keras_learner.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | from unittest.mock import Mock, create_autospec
 19 | 
 20 | import pytest
 21 | import tensorflow as tf
 22 | from tensorflow import keras
 23 | 
 24 | from colearn.ml_interface import Weights, DiffPrivConfig
 25 | from colearn_keras.keras_learner import KerasLearner
 26 | 
 27 | 
 28 | def get_mock_model() -> Mock:
 29 |     model = create_autospec(keras.Sequential, instance=True)
 30 |     model.evaluate.return_value = {"loss": 1,
 31 |                                    "accuracy": 3}
 32 |     model.get_weights.return_value = "all the weights"
 33 |     model.optimizer = create_autospec(keras.optimizers.Optimizer, instance=True)
 34 |     model.optimizer.get_config.return_value = {"name": "Adam"}
 35 |     # these are needed for the DP optimizers, but do no harm for the non-DP tests
 36 |     model.optimizer._noise_multiplier = 2  # pylint: disable=protected-access
 37 |     model.optimizer._l2_norm_clip = 2  # pylint: disable=protected-access
 38 |     model.optimizer._num_microbatches = 2  # pylint: disable=protected-access
 39 | 
 40 |     model._get_compile_args.return_value = {}  # pylint: disable=protected-access
 41 |     return model
 42 | 
 43 | 
 44 | def get_mock_dataloader() -> Mock:
 45 |     dl = tf.data.Dataset.range(42)
 46 |     dl._batch_size = 42  # pylint: disable=protected-access
 47 |     return dl
 48 | 
 49 | 
 50 | @pytest.fixture
 51 | def nkl():
 52 |     """Returns a Keraslearner"""
 53 |     model = get_mock_model()
 54 |     dl = get_mock_dataloader()
 55 |     vote_dl = get_mock_dataloader()
 56 |     nkl = KerasLearner(model, dl, vote_dl, diff_priv_config=DiffPrivConfig(
 57 |         target_epsilon=5,
 58 |         target_delta=1e-5,
 59 |         max_grad_norm=2,
 60 |         noise_multiplier=3
 61 |     ))
 62 | 
 63 |     return nkl
 64 | 
 65 | 
 66 | def test_vote(nkl):
 67 |     assert nkl.vote_score == get_mock_model().evaluate.return_value["loss"]
 68 | 
 69 |     assert nkl.vote(1.1) is False
 70 |     assert nkl.vote(1) is False
 71 |     assert nkl.vote(0.9) is True
 72 | 
 73 | 
 74 | def test_minimise_criterion(nkl):
 75 |     nkl.minimise_criterion = False
 76 | 
 77 |     assert nkl.vote(1.1) is True
 78 |     assert nkl.vote(1) is False
 79 |     assert nkl.vote(0.9) is False
 80 | 
 81 | 
 82 | def test_criterion(nkl):
 83 |     nkl.criterion = "accuracy"
 84 |     nkl.mli_accept_weights(Weights(weights="foo"))
 85 |     assert nkl.vote_score == get_mock_model().evaluate.return_value["accuracy"]
 86 | 
 87 | 
 88 | def test_propose_weights(nkl):
 89 |     weights = nkl.mli_propose_weights()
 90 |     assert isinstance(weights, Weights)
 91 |     assert weights.weights == get_mock_model().get_weights.return_value
 92 | 
 93 | 
 94 | def test_get_current_weights(nkl):
 95 |     weights = nkl.mli_get_current_weights()
 96 |     assert isinstance(weights, Weights)
 97 |     assert weights.weights == get_mock_model().get_weights.return_value
 98 | 
 99 | 
100 | def test_privacy_calculation(nkl):
101 |     epsilon = nkl.get_privacy_budget()
102 |     assert nkl.diff_priv_budget.consumed_epsilon < epsilon
103 | 
104 | 
105 | def test_privacy_training(nkl):
106 |     # no training when budget is overconsumed
107 |     nkl.diff_priv_budget.target_epsilon = 0
108 |     w = nkl.mli_propose_weights()
109 |     assert w.training_summary.error_code.name == 'DP_BUDGET_EXCEEDED'
110 | 
111 |     # do training when budget is not overcompsumed
112 |     nkl.diff_priv_budget.target_epsilon = 9999999
113 |     w = nkl.mli_propose_weights()
114 |     assert w.training_summary.error_code is None
115 | 
116 | 
117 | def test_reset_optimizer(nkl):
118 |     # without privacy
119 |     nkl.diff_priv_config = None
120 |     nkl.reset_optimizer()
121 | 
122 |     # with privacy
123 |     nkl.diff_priv_config = DiffPrivConfig(
124 |         target_epsilon=5,
125 |         target_delta=1e-5,
126 |         max_grad_norm=2,
127 |         noise_multiplier=3
128 |     )
129 |     nkl.reset_optimizer()
130 | 


--------------------------------------------------------------------------------
/docker/build.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # ------------------------------------------------------------------------------
  3 | #
  4 | #   Copyright 2021 Fetch.AI Limited
  5 | #
  6 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  7 | #   License, Version 4.0 (the "License"); you may not use this file except in
  8 | #   compliance with the License. You may obtain a copy of the License at
  9 | #
 10 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 11 | #
 12 | #   Unless required by applicable law or agreed to in writing, software
 13 | #   distributed under the License is distributed on an "AS IS" BASIS,
 14 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | #   See the License for the specific language governing permissions and
 16 | #   limitations under the License.
 17 | #
 18 | # ------------------------------------------------------------------------------
 19 | import os
 20 | import subprocess
 21 | import argparse
 22 | 
 23 | PROJECT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))  # pylint: disable=W0603
 24 | ML_LEARNER_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), 'ml.Dockerfile'))  # pylint: disable=W0603
 25 | DOCKER_REGISTRY_URL = 'gcr.io/fetch-ai-sandbox'  # pylint: disable=W0603
 26 | 
 27 | 
 28 | def parse_commandline():
 29 |     parser = argparse.ArgumentParser()
 30 |     parser.add_argument('--publish', action='store_true', dest='publish',
 31 |                         help='Publish image to {}'.format(DOCKER_REGISTRY_URL))
 32 |     parser.add_argument('--allow_dirty', action='store_true', dest='allow_dirty', help='Allow building/pushing dirty images')
 33 |     parser.add_argument('--rebuild', action='store_true', dest='no_cache', help='Build image from scratch')
 34 |     parser.add_argument('--tag', type=str, help='Tag to use rather than git commit')
 35 |     return parser.parse_args()
 36 | 
 37 | 
 38 | def check_project_path():
 39 |     tests = [
 40 |         os.path.isdir(PROJECT_PATH),
 41 |         os.path.isdir(os.path.join(PROJECT_PATH, 'docker')),
 42 |         os.path.isfile(os.path.join(PROJECT_PATH, 'requirements.txt')),
 43 |         os.path.isfile(os.path.join(PROJECT_PATH, 'setup.py')),
 44 |     ]
 45 | 
 46 |     if not all(tests):
 47 |         raise RuntimeError('Failed to detect project layout')
 48 | 
 49 | 
 50 | def get_project_version():
 51 |     return subprocess.check_output(['git', 'describe', '--dirty=-dirty', '--always'], cwd=PROJECT_PATH).decode().strip()
 52 | 
 53 | 
 54 | def docker_build_multistage(dockerfile, stage, image_tag, no_cache=False, cache_from=None, publish=False):
 55 |     print('Building docker stage {} from {}...'.format(stage, dockerfile))
 56 |     cmd = [
 57 |         'docker',
 58 |         'build',
 59 |         '--ssh', 'default',
 60 |         '--target', stage,
 61 |     ]
 62 | 
 63 |     if cache_from is not None:
 64 |         cmd += [
 65 |             '--cache-from', cache_from,
 66 |         ]
 67 | 
 68 |     if no_cache is True:
 69 |         cmd += [
 70 |             '--no-cache',
 71 |         ]
 72 | 
 73 |     cmd += [
 74 |         '-t', image_tag,
 75 |         '-f', dockerfile,
 76 |         '.',
 77 |     ]
 78 | 
 79 |     print(cmd)
 80 |     subprocess.check_call(cmd, cwd=PROJECT_PATH, env=dict(os.environ, DOCKER_BUILDKIT='1'))
 81 | 
 82 |     # tag the image as latest
 83 |     image_tag_latest = image_tag.split(':')[0] + ':latest'
 84 |     cmd = [
 85 |         'docker',
 86 |         'tag',
 87 |         image_tag,
 88 |         image_tag_latest,
 89 |     ]
 90 |     print(cmd)
 91 |     subprocess.check_call(cmd)
 92 |     print('Building docker image {} ...complete'.format(image_tag))
 93 | 
 94 |     if publish:
 95 |         image_tag_remote = '{}/{}'.format(DOCKER_REGISTRY_URL, image_tag)
 96 |         cmd = [
 97 |             'docker',
 98 |             'tag',
 99 |             image_tag,
100 |             image_tag_remote,
101 |         ]
102 |         print(cmd)
103 |         subprocess.check_call(cmd)
104 |         cmd = [
105 |             'docker',
106 |             'push',
107 |             image_tag_remote
108 |         ]
109 |         print(cmd)
110 |         subprocess.check_call(cmd)
111 |         print('Publishing docker image {} ...'.format(image_tag_remote))
112 |         print('Publishing docker image {} ...complete'.format(image_tag_remote))
113 | 
114 | 
115 | def main():
116 |     args = parse_commandline()
117 | 
118 |     # auto detect the project path
119 |     check_project_path()
120 | 
121 |     version = get_project_version()
122 | 
123 |     if args.tag:
124 |         version = args.tag
125 | 
126 |     docker_build_multistage(dockerfile=ML_LEARNER_PATH, stage='base', image_tag=f"ml-learner:{version}",
127 |                             no_cache=args.no_cache, publish=args.publish)
128 | 
129 | 
130 | if __name__ == '__main__':
131 |     main()
132 | 


--------------------------------------------------------------------------------
/colearn_pytorch/test_pytorch_learner.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | from unittest.mock import Mock, create_autospec
 19 | from collections import OrderedDict
 20 | 
 21 | import pytest
 22 | import torch
 23 | import torch.utils.data
 24 | from torch.nn.modules.loss import _Loss
 25 | 
 26 | from colearn.ml_interface import Weights
 27 | from colearn_pytorch.pytorch_learner import PytorchLearner
 28 | 
 29 | # torch does not correctly type-hint its tensor class so pylint fails
 30 | MODEL_PARAMETERS = OrderedDict({'param1': torch.tensor([3, 3]), 'param2': torch.tensor([4, 4])})  # pylint: disable=not-callable
 31 | MODEL_PARAMETERS2 = OrderedDict({'param1': torch.tensor([5, 5]), 'param2': torch.tensor([6, 6])})  # pylint: disable=not-callable
 32 | BATCH_SIZE = 2
 33 | TRAIN_BATCHES = 1
 34 | TEST_BATCHES = 1
 35 | LOSS = 12
 36 | 
 37 | 
 38 | def get_mock_model() -> Mock:
 39 |     model = create_autospec(torch.nn.Module, instance=True, spec_set=True)
 40 |     model.state_dict.return_value = MODEL_PARAMETERS
 41 |     model.to.return_value = model
 42 |     return model
 43 | 
 44 | 
 45 | def get_mock_dataloader() -> Mock:
 46 |     dl = create_autospec(torch.utils.data.DataLoader, instance=True)
 47 |     dl.__len__ = Mock(return_value=100)
 48 |     # pylint: disable=not-callable
 49 |     dl.__iter__.return_value = [(torch.tensor([0, 0]),
 50 |                                  torch.tensor([0])),
 51 |                                 (torch.tensor([1, 1]),
 52 |                                  torch.tensor([1]))]
 53 |     dl.batch_size = BATCH_SIZE
 54 |     return dl
 55 | 
 56 | 
 57 | def get_mock_optimiser() -> Mock:
 58 |     opt = Mock()
 59 |     opt.__setstate__ = Mock()
 60 |     return opt
 61 | 
 62 | 
 63 | def get_mock_criterion() -> Mock:
 64 |     crit = create_autospec(_Loss, instance=True)
 65 | 
 66 |     # pylint: disable=not-callable
 67 |     crit.return_value = torch.tensor(LOSS)
 68 |     crit.return_value.backward = Mock()  # type: ignore[assignment]
 69 | 
 70 |     return crit
 71 | 
 72 | 
 73 | @pytest.fixture
 74 | def nkl():
 75 |     """Returns a Pytorchlearner"""
 76 |     model = get_mock_model()
 77 |     dl = get_mock_dataloader()
 78 |     vote_dl = get_mock_dataloader()
 79 |     opt = get_mock_optimiser()
 80 |     crit = get_mock_criterion()
 81 |     nkl = PytorchLearner(model=model, train_loader=dl, vote_loader=vote_dl,
 82 |                          optimizer=opt, criterion=crit,
 83 |                          num_train_batches=1,
 84 |                          num_test_batches=1)
 85 |     return nkl
 86 | 
 87 | 
 88 | def test_setup(nkl):
 89 |     assert str(MODEL_PARAMETERS) == str(nkl.mli_get_current_weights().weights)
 90 |     vote_score = LOSS / (TEST_BATCHES * BATCH_SIZE)
 91 |     assert nkl.vote_score == vote_score
 92 | 
 93 | 
 94 | def test_vote(nkl):
 95 |     vote_score = LOSS / (TEST_BATCHES * BATCH_SIZE)
 96 |     assert nkl.vote_score == vote_score
 97 | 
 98 |     assert nkl.minimise_criterion is True
 99 |     assert nkl.vote(vote_score + 0.1) is False
100 |     assert nkl.vote(vote_score) is False
101 |     assert nkl.vote(vote_score - 0.1) is True
102 | 
103 | 
104 | def test_vote_minimise_criterion(nkl):
105 |     vote_score = LOSS / (TEST_BATCHES * BATCH_SIZE)
106 |     assert nkl.vote_score == vote_score
107 | 
108 |     nkl.minimise_criterion = False
109 | 
110 |     assert nkl.vote(vote_score + 0.1) is True
111 |     assert nkl.vote(vote_score) is False
112 |     assert nkl.vote(vote_score - 0.1) is False
113 | 
114 | 
115 | def test_propose_weights(nkl):
116 |     current_weights = nkl.mli_get_current_weights()
117 |     proposed_weights = nkl.mli_propose_weights()
118 |     assert isinstance(proposed_weights, Weights)
119 |     # current weights should not change
120 |     assert str(current_weights) == str(proposed_weights)
121 |     # proposed_weights should be different from current_weights, but I cannot
122 |     # find a way to test this!
123 | 
124 | 
125 | def test_get_current_weights(nkl):
126 |     weights = nkl.mli_get_current_weights()
127 |     assert isinstance(weights, Weights)
128 |     assert str(weights.weights) == str(MODEL_PARAMETERS)
129 | 


--------------------------------------------------------------------------------
/docs/technical/AXIM-160-docu.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Axim Issue No 160 - Explore a set of model metrics that are common to most ml models
 3 | 
 4 | Table of contents:
 5 | 1. [Easily includable metrics](#easy_metrics)
 6 | 2. [More time-consuming metrics](#hard_metrics)
 7 | 3. [Possible approach](#approach)
 8 | 
 9 | ## Easily includable metrics <a name="easy_metrics"></a>
10 | 
11 | List of possible metrics that can be included with ease. Basically all metrics from [keras metrics](https://keras.io/api/metrics/) can be used. Except the ones listed under `Classification metrics based on True/False positives & negatives`.
12 | But in this list, most of the metrics are more relevant for a regression use case and not so much for Scania or Mnist:
13 | * Accuracy 
14 |     * Calculates how often predictions equal labels.
15 | * TopKCategoricalAccuracy
16 |     * Computes how often targets are in the top K predictions.
17 | * MSE 
18 |     * Computes the mean squared error between y_true and y_pred.
19 | * MAE 
20 |     * Computes the mean absolute error between the labels and predictions.
21 | * MAPE 
22 |     * Computes the mean absolute percentage error between y_true and y_pred.
23 | * MSLE 
24 |     * Computes the mean squared logarithmic error between y_true and y_pred.
25 | 
26 | ## More time-consuming metrics <a name="hard_metrics"></a>
27 | 
28 | This is a list of the more relevant metrics for a classification task such as Scania or Mnist. Therefore the shape of the input data and the loss function needs to be changed.
29 | Here are some of the most common ones:
30 | * Precision
31 |     * Computes the precision of the predictions with respect to the labels. The metric creates two local variables, true_positives and false_positives that are used to compute the precision. This value is ultimately returned as precision, an idempotent operation that simply divides true_positives by the sum of true_positives and false_positives.
32 | * Recall
33 |     * Computes the recall of the predictions with respect to the labels. This metric creates two local variables, true_positives and false_negatives, that are used to compute the recall. This value is ultimately returned as recall, an idempotent operation that simply divides true_positives by the sum of true_positives and false_negatives.
34 | * ROC AUC 
35 |     * Approximates the AUC (Area under the curve) of the ROC curve. The AUC (Area under the curve) of the ROC (Receiver operating characteristic; default) or PR (Precision Recall) curves are quality measures of binary classifiers. Unlike the accuracy, and like cross-entropy losses, ROC-AUC evaluate all the operational points of a model.
36 | 
37 | The F1 score is currently only available in the nightly build of Tensorflow. So we would need to add it manually to the model. The F1 score definition is the following:
38 | ```
39 | The F1 score is defined as the harmonic mean of precision and recall. As a short reminder, the harmonic mean is an alternative metric for the more common arithmetic mean. It is often useful when computing an average rate.
40 | ```
41 | 
42 | Similar to the F1 score any other metrics which is not part of the Keras library can be added as a function manually and then added to the metrics list.
43 | 
44 | ## Possible approach <a name="approach"></a>
45 | 
46 | Assuming we would want to add metrics from the easier category one would need to:
47 | 
48 | * Add the metrics to the list where the model is compiled when registering a new model. E.g. in the keras_scania.py file.
49 | * If necessary adapt the input shape and/or the loss function to match the metrics.
50 | * Instead of using one metric as a criterion, we would need to change it to the loss function (tested it on one learner to use loss function and on one use accuracy and they both seemed to find their optimum)
51 | * Therefore we need to rewrite so we minimize the loss function and not maximising the metrics how it is atm.
52 | * This can simply be done by using the default variables of the keras_learner.py
53 | * Rewrite the test function in e.g. keras_learner.py Line 277 to also return all the metrics and not only the criterion (loss)
54 | * Add a new variable in the ProposedWeights which includes all metrics (ml_interface.py) as a dict
55 | * In the grpc_learner_server.py under `TestWeights` included the metrics dict
56 | * Adapt the orchestrator and frontend to forward and display the metric dicts in the frontend
57 | 
58 | The second approach would be to use the first metric as the deciding score for voting and add the others.
59 | So changing the vote_score variable from and float to a dict or a list.
60 | 
61 | 
62 | Remarks:
63 | * Tested it briefly locally on Mnist. Although the loss function seemed to work, it is not clear why in the end it did not approve the other participants suggestions. I belive that the data set used for the  vote score and the graph's test score are different. If that's true it can be confusing for the user and needs to be discussed. 
64 | * Got test_score and vote_score. Which are two different scores on two different data sets.


--------------------------------------------------------------------------------
/colearn_grpc/test_example_mli_factory.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | import json
 19 | import pytest
 20 | 
 21 | from colearn_keras.keras_mnist import split_to_folders
 22 | from colearn_keras.keras_learner import KerasLearner
 23 | 
 24 | # These are imported here so that they are registered in the FactoryRegistry
 25 | # pylint: disable=W0611
 26 | import colearn_keras.keras_mnist  # type:ignore # noqa: F401
 27 | import colearn_keras.keras_cifar10  # type:ignore # noqa: F401
 28 | import colearn_keras.keras_scania  # type:ignore # noqa: F401
 29 | import colearn_pytorch.pytorch_xray  # type:ignore # noqa: F401
 30 | import colearn_pytorch.pytorch_covid_xray  # type:ignore # noqa: F401
 31 | import colearn_other.fraud_dataset  # type:ignore # noqa: F401
 32 | 
 33 | from colearn_grpc.example_mli_factory import ExampleMliFactory
 34 | 
 35 | DATALOADER_NAMES = {"PYTORCH_XRAY", "KERAS_MNIST", "KERAS_MNIST_WITH_DP", "KERAS_CIFAR10", "KERAS_SCANIA", "PYTORCH_COVID_XRAY", "FRAUD"}
 36 | MODEL_NAMES = {"PYTORCH_XRAY", "KERAS_MNIST", "KERAS_MNIST_RESNET", "KERAS_CIFAR10", "KERAS_SCANIA", "PYTORCH_COVID_XRAY", "FRAUD"}
 37 | 
 38 | 
 39 | @pytest.fixture
 40 | def factory() -> ExampleMliFactory:
 41 |     """Returns an ExampleMLIFactory"""
 42 |     return ExampleMliFactory()
 43 | 
 44 | 
 45 | def test_setup(factory):
 46 |     assert len(factory.get_models()) > 0
 47 |     assert len(factory.get_dataloaders()) > 0
 48 |     assert len(factory.get_compatibilities()) > 0
 49 | 
 50 | 
 51 | def test_model_names(factory):
 52 |     for model in MODEL_NAMES:
 53 |         assert model in factory.get_models().keys()
 54 |     print(factory.get_models())
 55 | 
 56 | 
 57 | def test_dataloader_names(factory):
 58 |     for dl in DATALOADER_NAMES:
 59 |         assert dl in factory.get_dataloaders().keys()
 60 | 
 61 |     assert len(factory.get_dataloaders()["KERAS_MNIST"]) > 0
 62 | 
 63 | 
 64 | def test_compatibilities(factory):
 65 |     for model in MODEL_NAMES:
 66 |         assert model in factory.get_models().keys()
 67 |         for dl in factory.get_compatibilities()[model]:
 68 |             assert dl in DATALOADER_NAMES
 69 | 
 70 | 
 71 | @pytest.fixture()
 72 | def mnist_config():
 73 |     folders = split_to_folders(10)
 74 | 
 75 |     return {
 76 |         'model_name': "KERAS_MNIST",
 77 |         'dataloader_name': "KERAS_MNIST",
 78 |         'location': folders[0],
 79 |     }
 80 | 
 81 | 
 82 | def test_get_mnist(factory, mnist_config):
 83 |     model_params = json.dumps({"steps_per_epoch": 20})
 84 | 
 85 |     dataset_params = json.dumps(
 86 |         {'location': mnist_config['location'],
 87 |          })
 88 | 
 89 |     mli = factory.get_mli(
 90 |         model_name=mnist_config['model_name'],
 91 |         model_params=model_params,
 92 |         dataloader_name=mnist_config['dataloader_name'],
 93 |         dataset_params=dataset_params)
 94 | 
 95 |     assert isinstance(mli, KerasLearner)
 96 |     assert mli.model_fit_kwargs["steps_per_epoch"] == 20
 97 | 
 98 | 
 99 | def test_triple_mnist(factory, mnist_config):
100 |     default_params = json.dumps({})
101 | 
102 |     dataset_params = json.dumps(
103 |         {'location': mnist_config['location'],
104 |          })
105 | 
106 |     mli = factory.get_mli(
107 |         model_name=mnist_config['model_name'],
108 |         model_params=default_params,
109 |         dataloader_name=mnist_config['dataloader_name'],
110 |         dataset_params=dataset_params)
111 | 
112 |     assert isinstance(mli, KerasLearner)
113 |     default_steps = mli.model_fit_kwargs["steps_per_epoch"]
114 | 
115 |     model_params = json.dumps({"steps_per_epoch": 40})
116 | 
117 |     mli = factory.get_mli(
118 |         model_name=mnist_config['model_name'],
119 |         model_params=model_params,
120 |         dataloader_name=mnist_config['dataloader_name'],
121 |         dataset_params=dataset_params)
122 | 
123 |     assert isinstance(mli, KerasLearner)
124 |     assert mli.model_fit_kwargs["steps_per_epoch"] == 40
125 | 
126 |     mli = factory.get_mli(
127 |         model_name=mnist_config['model_name'],
128 |         model_params=default_params,
129 |         dataloader_name=mnist_config['dataloader_name'],
130 |         dataset_params=dataset_params)
131 | 
132 |     assert isinstance(mli, KerasLearner)
133 |     assert mli.model_fit_kwargs["steps_per_epoch"] == default_steps
134 | 


--------------------------------------------------------------------------------
/colearn_grpc/grpc_server.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | from concurrent import futures
 19 | import os
 20 | import grpc
 21 | 
 22 | from colearn_grpc.mli_factory_interface import MliFactory
 23 | 
 24 | from colearn_grpc.grpc_learner_server import GRPCLearnerServer
 25 | import colearn_grpc.proto.generated.interface_pb2_grpc as ipb2_grpc
 26 | 
 27 | from colearn_grpc.logging import get_logger
 28 | 
 29 | 
 30 | _logger = get_logger(__name__)
 31 | 
 32 | 
 33 | class GRPCServer:
 34 |     """
 35 |         This is a wrapper class, which simplify the usage of GRPCLearnerServer.
 36 |         It requires a port, ml_factory and supported_system, out of which builds GRPCLearnerServer
 37 |         object, and creates the GRPC listener server, which can be started using the run method.
 38 |     """
 39 | 
 40 |     def __init__(self, mli_factory: MliFactory, port=None, max_workers=5,
 41 |                  enable_encryption=False, server_key="", server_crt=""):
 42 |         """
 43 |             @param mli_factory is a factory object that produces MachineLearningInterface objects
 44 |             @param port is the port where the server will listen
 45 |             @param max_workers is how many worker threads will be available in the thread pool
 46 |             @param enable_encryption if True they the server will try to enable encryption
 47 |             @param server_key path to server key for encryption (if enabled)
 48 |             @param server_crt path to server certificate for encryption (if enabled)
 49 |         """
 50 |         self.port = port
 51 |         self.server = None
 52 |         self.service = GRPCLearnerServer(mli_factory)
 53 |         self.thread_pool = None
 54 |         self.max_workers = max_workers
 55 |         self.enable_encryption = enable_encryption
 56 |         self.server_key = server_key
 57 |         self.server_crt = server_crt
 58 | 
 59 |     def run(self, wait_for_termination=True):
 60 |         if self.server:
 61 |             raise ValueError("re-running grpc")
 62 | 
 63 |         address = "0.0.0.0:{}".format(self.port)
 64 | 
 65 |         _logger.info(f"Starting GRPC server on {address}...")
 66 | 
 67 |         encrypted_connection = False
 68 |         if self.enable_encryption:
 69 |             # There needs to be a certificate and private key available to enable encryption -
 70 |             # if these are not available, fall back to no encryption.
 71 |             encrypted_connection = True
 72 | 
 73 |             if not os.path.isfile(self.server_crt):
 74 |                 _logger.error(f"Failed to find file {self.server_crt} needed for encrypted grpc connection - not enabling")
 75 |                 encrypted_connection = False
 76 | 
 77 |             if not os.path.isfile(self.server_key):
 78 |                 _logger.error(f"Failed to find file {self.server_key} needed for encrypted grpc connection - not enabling")
 79 |                 encrypted_connection = False
 80 | 
 81 |         self.thread_pool = futures.ThreadPoolExecutor(
 82 |             max_workers=self.max_workers, thread_name_prefix="GRPCLearnerServer-poolworker-")
 83 | 
 84 |         self.server = grpc.server(self.thread_pool)
 85 |         ipb2_grpc.add_GRPCLearnerServicer_to_server(self.service, self.server)
 86 | 
 87 |         if encrypted_connection:
 88 |             # read in key and certificate
 89 |             with open(self.server_key, 'rb') as f:
 90 |                 private_key = f.read()
 91 |             with open(self.server_crt, 'rb') as f:
 92 |                 certificate_chain = f.read()
 93 | 
 94 |             # create server credentials
 95 |             server_credentials = grpc.ssl_server_credentials(((private_key, certificate_chain,),))
 96 |             self.server.add_secure_port(address, server_credentials)
 97 |         else:
 98 |             self.server.add_insecure_port(address)
 99 | 
100 |         self.server.start()
101 | 
102 |         if wait_for_termination:
103 |             _logger.info("GRPC server started. Waiting for termination...")
104 |             self.server.wait_for_termination()
105 |         else:
106 |             _logger.info("GRPC server started.")
107 | 
108 |     def stop(self):
109 |         _logger.info("Stopping GRPC server...")
110 |         if self.server:
111 |             self.server.stop(2).wait()
112 |         self.server = None
113 | 
114 |         if self.thread_pool:
115 |             self.thread_pool.shutdown(wait=True)
116 |         self.thread_pool = None
117 |         _logger.info("server stopped")
118 | 


--------------------------------------------------------------------------------
/docs/about.md:
--------------------------------------------------------------------------------
 1 | # How collective learning works
 2 | 
 3 | A Colearn experiment begins when a group of entities, referred to as  *learners*, decide on a model architecture and
 4 | begin learning. Together they will train a single global model. The goal is to train a model that performs better
 5 | than any of the learners can produce by training on their private data set.
 6 | 
 7 | ### How Training Works
 8 | 
 9 | Training occurs in rounds; during each round the learners attempt to improve the performance of the global shared
10 | model.
11 | To do so each round an **update** of the global model (for example new set of weights in a neural network) is proposed.
12 | The learners then **validate** the update and decide if the new model is better than the current global model.  
13 | If enough learners *approve* the update then the global model is updated. After an update is approved or rejected a
14 | new round begins.
15 | 
16 | The detailed steps of a round updating a global model *M* are as follows:
17 | 
18 | 1. One of the learners is selected and proposes a new updated model *M'*
19 | 2. The rest of the learners **validate** *M'*
20 |    - If *M'* has better performance than *M* against their private data set then the learner votes to approve
21 |    - If not, the learner votes to reject
22 | 3. The total votes are tallied
23 |    - If more than some threshold (typically 50%) of learners approve then *M'* becomes the new global model. If not,
24 |      *M* continues to be the global model
25 | 4. A new round begins.
26 | 
27 | By using a decentralized ledger (a blockchain) this learning process can be run in a completely decentralized,
28 | secure and auditable way. Further security can be provided by using
29 | [differential privacy](https://en.wikipedia.org/wiki/Differential_privacy) to avoid exposing your private data
30 | set when generating an update.
31 | 
32 | ## Learning algorithms that work for collective learning
33 | 
34 | Collective learning is not just for neural networks; any learning algorithm that can be trained on subsets of the
35 | data and which can use the results of previous training rounds as the basis for subsequent rounds can be used.
36 | Neural networks fit both these constraints: training can be done on mini-batches of data and each training step uses
37 | the weights of the previous training step as its starting point.
38 | More generally, any model that is trained using mini-batch stochastic gradient descent is fine.
39 | Other algorithms can be made to work with collective learning as well.
40 | For example, a random forest can be trained iteratively by having each learner add new trees
41 | (see example in [mli_random_forest_iris.py]({{ repo_root }}/examples/mli_random_forest_iris.py)).
42 | For more discussion, see [here](./intro_tutorial_mli.md).
43 | 
44 | ## The driver
45 | 
46 | The driver implements the voting protocol, so it handles selecting a learner to train,
47 | sending the update out for voting, calculating the vote and accepting or declining the update.
48 | Here we have a very minimal driver that doesn't use networking or a blockchain. Eventually the driver will be a
49 | smart contract.
50 | This is the code that implements one round of voting:
51 | 
52 | ```python
53 | def run_one_round(round_index: int, learners: Sequence[MachineLearningInterface],
54 |                   vote_threshold=0.5):
55 |     proposer = round_index % len(learners)
56 |     new_weights = learners[proposer].mli_propose_weights()
57 | 
58 |     prop_weights_list = [ln.mli_test_weights(new_weights) for ln in learners]
59 |     approves = sum(1 if v.vote else 0 for v in prop_weights_list)
60 | 
61 |     vote = False
62 |     if approves >= len(learners) * vote_threshold:
63 |         vote = True
64 |         for j, learner in enumerate(learners):
65 |             learner.mli_accept_weights(prop_weights_list[j])
66 | 
67 |     return prop_weights_list, vote
68 | ```
69 | 
70 | The driver has a list of learners, and each round it selects one learner to be the proposer.
71 | The proposer does some training and proposes an updated set of weights.
72 | The driver then sends the proposed weights to each of the learners, and they each vote on whether this is
73 | an improvement.
74 | If the number of approving votes is greater than the vote threshold the proposed weights are accepted, and if not
75 | they're rejected.
76 | 
77 | ## The Machine Learning Interface
78 | 
79 | ```Python
80 | {!../colearn/ml_interface.py!} 
81 | ```
82 | 
83 | There are four methods that need to be implemented:
84 | 
85 | 1. `propose_weights` causes the model to do some training and then return a
86 |    new set of weights that are proposed to the other learners.
87 |    This method shouldn't change the current weights of the model - that
88 |    only happens when `accept_weights` is called.
89 | 2. `test_weights` - the models takes some new weights and returns a vote on whether the new weights are an improvement.
90 |    As with propose_weights, this shouldn't change the current weights of the model -
91 |    that only happens when `accept_weights` is called.
92 | 3. `accept_weights` - the models accepts some weights that have been voted on and approved by the set of learners.
93 |     The old weights of the model are discarded and replaced by the new weights.
94 | 4. `current_weights` should return the current weights of the model.
95 | 
96 | For more details about directly implementing the machine learning interface
97 | see the tutorial [here](./intro_tutorial_mli.md)
98 | 


--------------------------------------------------------------------------------
/colearn_examples/ml_interface/keras_mnist.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | import os
 19 | 
 20 | import tensorflow as tf
 21 | import tensorflow_datasets as tfds
 22 | 
 23 | from colearn.training import initial_result, collective_learning_round, set_equal_weights
 24 | from colearn.utils.plot import ColearnPlot
 25 | from colearn.utils.results import Results, print_results
 26 | from colearn_keras.keras_learner import KerasLearner
 27 | from colearn_keras.utils import normalize_img
 28 | 
 29 | """
 30 | MNIST training example using Keras
 31 | 
 32 | Used dataset:
 33 | - MNIST is set of 60 000 black and white hand written digits images of size 28x28x1 in 10 classes
 34 | 
 35 | What script does:
 36 | - Loads MNIST dataset from Keras
 37 | - Sets up a Keras learner
 38 | - Randomly splits dataset between multiple learners
 39 | - Does multiple rounds of learning process and displays plot with results
 40 | """
 41 | 
 42 | n_learners = 5
 43 | vote_threshold = 0.5
 44 | vote_batches = 2
 45 | 
 46 | testing_mode = bool(os.getenv("COLEARN_EXAMPLES_TEST", ""))  # for testing
 47 | n_rounds = 20 if not testing_mode else 1
 48 | width = 28
 49 | height = 28
 50 | n_classes = 10
 51 | l_rate = 0.001
 52 | batch_size = 64
 53 | 
 54 | # Load data for each learner
 55 | train_dataset, info = tfds.load('mnist', split='train', as_supervised=True, with_info=True)
 56 | n_datapoints = info.splits['train'].num_examples
 57 | 
 58 | train_datasets = [train_dataset.shard(num_shards=n_learners, index=i) for i in range(n_learners)]
 59 | 
 60 | test_dataset = tfds.load('mnist', split='test', as_supervised=True)
 61 | vote_datasets = [test_dataset.shard(num_shards=2 * n_learners, index=i) for i in range(n_learners)]
 62 | test_datasets = [test_dataset.shard(num_shards=2 * n_learners, index=i) for i in range(n_learners, 2 * n_learners)]
 63 | 
 64 | 
 65 | for i in range(n_learners):
 66 |     train_datasets[i] = train_datasets[i].map(
 67 |         normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
 68 |     train_datasets[i] = train_datasets[i].shuffle(n_datapoints // n_learners)
 69 |     train_datasets[i] = train_datasets[i].batch(batch_size)
 70 | 
 71 |     vote_datasets[i] = vote_datasets[i].map(
 72 |         normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
 73 |     vote_datasets[i] = vote_datasets[i].batch(batch_size)
 74 | 
 75 |     test_datasets[i] = test_datasets[i].map(
 76 |         normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
 77 |     test_datasets[i] = test_datasets[i].batch(batch_size)
 78 | 
 79 | 
 80 | # Define model
 81 | def get_model():
 82 |     input_img = tf.keras.Input(
 83 |         shape=(width, height, 1), name="Input"
 84 |     )
 85 |     x = tf.keras.layers.Conv2D(
 86 |         64, (3, 3), activation="relu", padding="same", name="Conv1_1"
 87 |     )(input_img)
 88 |     x = tf.keras.layers.BatchNormalization(name="bn1")(x)
 89 |     x = tf.keras.layers.MaxPooling2D((2, 2), name="pool1")(x)
 90 |     x = tf.keras.layers.Conv2D(
 91 |         128, (3, 3), activation="relu", padding="same", name="Conv2_1"
 92 |     )(x)
 93 |     x = tf.keras.layers.BatchNormalization(name="bn4")(x)
 94 |     x = tf.keras.layers.MaxPooling2D((2, 2), name="pool2")(x)
 95 |     x = tf.keras.layers.Flatten(name="flatten")(x)
 96 |     x = tf.keras.layers.Dense(
 97 |         n_classes, activation="softmax", name="fc1"
 98 |     )(x)
 99 |     model = tf.keras.Model(inputs=input_img, outputs=x)
100 | 
101 |     opt = tf.keras.optimizers.Adam(lr=l_rate)
102 |     model.compile(
103 |         loss="sparse_categorical_crossentropy",
104 |         metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
105 |         optimizer=opt)
106 |     return model
107 | 
108 | 
109 | all_learner_models = []
110 | for i in range(n_learners):
111 |     all_learner_models.append(KerasLearner(
112 |         model=get_model(),
113 |         train_loader=train_datasets[i],
114 |         vote_loader=vote_datasets[i],
115 |         test_loader=test_datasets[i],
116 |         criterion="sparse_categorical_accuracy",
117 |         minimise_criterion=False,
118 |         model_evaluate_kwargs={"steps": vote_batches},
119 |     ))
120 | 
121 | set_equal_weights(all_learner_models)
122 | 
123 | # Train the model using Collective Learning
124 | results = Results()
125 | results.data.append(initial_result(all_learner_models))
126 | 
127 | plot = ColearnPlot(score_name=all_learner_models[0].criterion)
128 | 
129 | for round_index in range(n_rounds):
130 |     results.data.append(
131 |         collective_learning_round(all_learner_models,
132 |                                   vote_threshold, round_index)
133 |     )
134 | 
135 |     print_results(results)
136 |     plot.plot_results_and_votes(results)
137 | 
138 | plot.block()
139 | 
140 | print("Colearn Example Finished!")
141 | 


--------------------------------------------------------------------------------
/colearn_examples/ml_interface/keras_fraud.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | import argparse
 19 | import os
 20 | import sys
 21 | from pathlib import Path
 22 | 
 23 | import numpy as np
 24 | import tensorflow as tf
 25 | 
 26 | from colearn.training import set_equal_weights, initial_result, collective_learning_round
 27 | from colearn.utils.plot import ColearnPlot
 28 | from colearn.utils.results import Results, print_results
 29 | from colearn_keras.keras_learner import KerasLearner
 30 | from colearn_other.fraud_dataset import fraud_preprocessing
 31 | 
 32 | """
 33 | Fraud training example using Tensorflow Keras
 34 | 
 35 | Used dataset:
 36 | - Fraud, download from kaggle: https://www.kaggle.com/c/ieee-fraud-detection
 37 | 
 38 | What script does:
 39 | - Sets up the Keras model and some configuration parameters
 40 | - Randomly splits the dataset between multiple learners
 41 | - Does multiple rounds of learning process and displays plot with results
 42 | """
 43 | 
 44 | input_classes = 431
 45 | n_classes = 1
 46 | loss = "binary_crossentropy"
 47 | optimizer = tf.keras.optimizers.Adam
 48 | l_rate = 0.0001
 49 | batch_size = 10000
 50 | vote_batches = 1
 51 | 
 52 | 
 53 | def get_model():
 54 |     model_input = tf.keras.Input(shape=input_classes, name="Input")
 55 | 
 56 |     x = tf.keras.layers.Dense(512, activation="relu")(model_input)
 57 |     x = tf.keras.layers.BatchNormalization()(x)
 58 |     x = tf.keras.layers.Dense(512, activation="relu")(x)
 59 |     x = tf.keras.layers.BatchNormalization()(x)
 60 |     x = tf.keras.layers.Dense(512, activation="relu")(x)
 61 |     x = tf.keras.layers.BatchNormalization()(x)
 62 | 
 63 |     x = tf.keras.layers.Dense(
 64 |         n_classes, activation="sigmoid", name="fc1"
 65 |     )(x)
 66 | 
 67 |     model = tf.keras.Model(inputs=model_input, outputs=x)
 68 | 
 69 |     opt = optimizer(lr=l_rate)
 70 |     model.compile(
 71 |         loss=loss,
 72 |         metrics=[tf.keras.metrics.BinaryAccuracy()],
 73 |         optimizer=opt)
 74 |     return model
 75 | 
 76 | 
 77 | parser = argparse.ArgumentParser()
 78 | parser.add_argument("data_dir", help="Path to data directory", type=str)
 79 | parser.add_argument("--use_cache", help="Use cached preprocessed data", type=bool, default=True)
 80 | 
 81 | args = parser.parse_args()
 82 | 
 83 | if not Path.is_dir(Path(args.data_dir)):
 84 |     sys.exit(f"Data path provided: {args.data_dir} is not a valid path or not a directory")
 85 | 
 86 | data_dir = args.data_dir
 87 | train_fraction = 0.9
 88 | vote_fraction = 0.05
 89 | n_learners = 5
 90 | 
 91 | testing_mode = bool(os.getenv("COLEARN_EXAMPLES_TEST", ""))  # for testing
 92 | n_rounds = 7 if not testing_mode else 1
 93 | 
 94 | vote_threshold = 0.5
 95 | steps_per_epoch = 1
 96 | 
 97 | fraud_data, labels = fraud_preprocessing(data_dir, use_cache=args.use_cache)
 98 | 
 99 | n_datapoints = fraud_data.shape[0]
100 | random_indices = np.random.permutation(np.arange(n_datapoints))
101 | n_train = int(n_datapoints * train_fraction)
102 | n_vote = int(n_datapoints * vote_fraction)
103 | train_data = fraud_data[random_indices[:n_train]]
104 | train_labels = labels[random_indices[:n_train]]
105 | vote_data = fraud_data[random_indices[n_train: n_train + n_vote]]
106 | vote_labels = labels[random_indices[n_train:n_train + n_vote]]
107 | test_data = fraud_data[random_indices[n_train + n_vote:]]
108 | test_labels = labels[random_indices[n_train + n_vote:]]
109 | 
110 | # make a tensorflow dataloader out of np arrays
111 | train_dataset = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
112 | vote_dataset = tf.data.Dataset.from_tensor_slices((vote_data, vote_labels))
113 | test_dataset = tf.data.Dataset.from_tensor_slices((test_data, test_labels))
114 | 
115 | # shard the dataset into n_learners pieces and add batching
116 | train_datasets = [train_dataset.shard(num_shards=n_learners, index=i).batch(batch_size) for i in range(n_learners)]
117 | vote_datasets = [vote_dataset.shard(num_shards=n_learners, index=i).batch(batch_size) for i in range(n_learners)]
118 | test_datasets = [test_dataset.shard(num_shards=n_learners, index=i).batch(batch_size) for i in range(n_learners)]
119 | 
120 | all_learner_models = []
121 | for i in range(n_learners):
122 |     model = get_model()
123 |     all_learner_models.append(
124 |         KerasLearner(
125 |             model=model,
126 |             train_loader=train_datasets[i],
127 |             vote_loader=vote_datasets[i],
128 |             test_loader=test_datasets[i],
129 |             model_fit_kwargs={"steps_per_epoch": steps_per_epoch},
130 |             model_evaluate_kwargs={"steps": vote_batches},
131 |         ))
132 | 
133 | set_equal_weights(all_learner_models)
134 | 
135 | results = Results()
136 | # Get initial score
137 | results.data.append(initial_result(all_learner_models))
138 | 
139 | plot = ColearnPlot(score_name="loss")
140 | 
141 | for round_index in range(n_rounds):
142 |     results.data.append(
143 |         collective_learning_round(all_learner_models,
144 |                                   vote_threshold, round_index)
145 |     )
146 |     print_results(results)
147 | 
148 |     # then make an updating graph
149 |     plot.plot_results_and_votes(results)
150 | 
151 | plot.block()
152 | 
153 | print("Colearn Example Finished!")
154 | 


--------------------------------------------------------------------------------
/colearn_examples/ml_interface/keras_cifar.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | import os
 19 | 
 20 | import tensorflow as tf
 21 | import tensorflow_datasets as tfds
 22 | 
 23 | from colearn.training import initial_result, collective_learning_round, set_equal_weights
 24 | from colearn.utils.plot import ColearnPlot
 25 | from colearn.utils.results import Results, print_results
 26 | from colearn_keras.keras_learner import KerasLearner
 27 | from colearn_keras.utils import normalize_img
 28 | 
 29 | """
 30 | CIFAR10 training example using Tensorflow Keras
 31 | 
 32 | Used dataset:
 33 | - CIFAR10 is set of 60 000 colour images of size 32x32x3 in 10 classes
 34 | 
 35 | What script does:
 36 | - Loads CIFAR10 dataset from torchvision.datasets
 37 | - Randomly splits dataset between multiple learners
 38 | - Does multiple rounds of learning process and displays plot with results
 39 | """
 40 | 
 41 | n_learners = 5
 42 | testing_mode = bool(os.getenv("COLEARN_EXAMPLES_TEST", ""))  # for testing
 43 | n_rounds = 20 if not testing_mode else 1
 44 | 
 45 | make_plot = True
 46 | vote_threshold = 0.5
 47 | 
 48 | width = 32
 49 | height = 32
 50 | n_classes = 10
 51 | 
 52 | optimizer = tf.keras.optimizers.Adam
 53 | l_rate = 0.001
 54 | batch_size = 64
 55 | loss = "sparse_categorical_crossentropy"
 56 | vote_batches = 2
 57 | 
 58 | train_datasets, info = tfds.load('cifar10',
 59 |                                  split=tfds.even_splits('train', n=n_learners),
 60 |                                  as_supervised=True, with_info=True)
 61 | n_datapoints = info.splits['train'].num_examples
 62 | 
 63 | all_test_datasets = tfds.load('cifar10',
 64 |                               split=tfds.even_splits('test', n=2 * n_learners),
 65 |                               as_supervised=True)
 66 | 
 67 | vote_datasets = all_test_datasets[0:n_learners]
 68 | test_datasets = all_test_datasets[n_learners:]
 69 | 
 70 | for i in range(n_learners):
 71 |     ds_train = train_datasets[i].map(
 72 |         normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
 73 |     ds_train = ds_train.cache()
 74 |     ds_train = ds_train.shuffle(n_datapoints // n_learners)
 75 |     ds_train = ds_train.batch(batch_size)
 76 |     train_datasets[i] = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
 77 | 
 78 |     ds_vote = vote_datasets[i].map(
 79 |         normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
 80 |     ds_vote = ds_vote.cache()
 81 |     ds_vote = ds_vote.shuffle(n_datapoints // n_learners)
 82 |     ds_vote = ds_vote.batch(batch_size)
 83 |     vote_datasets[i] = ds_vote.prefetch(tf.data.experimental.AUTOTUNE)
 84 | 
 85 |     ds_test = test_datasets[i].map(
 86 |         normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
 87 |     ds_test = ds_test.batch(batch_size)
 88 |     ds_test = ds_test.cache()
 89 |     ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)
 90 |     test_datasets[i] = ds_test
 91 | 
 92 | 
 93 | def get_model():
 94 |     input_img = tf.keras.Input(
 95 |         shape=(width, height, 3), name="Input"
 96 |     )
 97 |     x = tf.keras.layers.Conv2D(
 98 |         32, (5, 5), activation="relu", padding="same", name="Conv1_1"
 99 |     )(input_img)
100 |     x = tf.keras.layers.MaxPooling2D((2, 2), name="pool1")(x)
101 |     x = tf.keras.layers.Conv2D(
102 |         32, (5, 5), activation="relu", padding="same", name="Conv2_1"
103 |     )(x)
104 |     x = tf.keras.layers.MaxPooling2D((2, 2), name="pool2")(x)
105 |     x = tf.keras.layers.Conv2D(
106 |         64, (5, 5), activation="relu", padding="same", name="Conv3_1"
107 |     )(x)
108 |     x = tf.keras.layers.MaxPooling2D((2, 2), name="pool3")(x)
109 |     x = tf.keras.layers.Flatten(name="flatten")(x)
110 |     x = tf.keras.layers.Dense(
111 |         64, activation="relu", name="fc1"
112 |     )(x)
113 |     x = tf.keras.layers.Dense(
114 |         n_classes, activation="softmax", name="fc2"
115 |     )(x)
116 |     model = tf.keras.Model(inputs=input_img, outputs=x)
117 | 
118 |     opt = optimizer(
119 |         lr=l_rate
120 |     )
121 |     model.compile(
122 |         loss=loss,
123 |         metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
124 |         optimizer=opt)
125 |     return model
126 | 
127 | 
128 | all_learner_models = []
129 | for i in range(n_learners):
130 |     all_learner_models.append(KerasLearner(
131 |         model=get_model(),
132 |         train_loader=train_datasets[i],
133 |         vote_loader=vote_datasets[i],
134 |         test_loader=test_datasets[i],
135 |         criterion="sparse_categorical_accuracy",
136 |         minimise_criterion=False,
137 |         model_fit_kwargs={"steps_per_epoch": 100},
138 |         model_evaluate_kwargs={"steps": vote_batches}
139 |     ))
140 | 
141 | set_equal_weights(all_learner_models)
142 | 
143 | results = Results()
144 | # Get initial score
145 | results.data.append(initial_result(all_learner_models))
146 | 
147 | plot = ColearnPlot(score_name=all_learner_models[0].criterion)
148 | 
149 | for round_index in range(n_rounds):
150 |     results.data.append(
151 |         collective_learning_round(all_learner_models,
152 |                                   vote_threshold, round_index)
153 |     )
154 | 
155 |     print_results(results)
156 |     if make_plot:
157 |         # then make an updating graph
158 |         plot.plot_results_and_votes(results)
159 | 
160 | if make_plot:
161 |     plot.block()
162 | 
163 | print("Colearn Example Finished!")
164 | 


--------------------------------------------------------------------------------
/colearn_examples/ml_interface/pytorch_mnist.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | import os
 19 | 
 20 | from typing_extensions import TypedDict
 21 | import torch.nn as nn
 22 | import torch.nn.functional as nn_func
 23 | import torch.utils.data
 24 | from torchsummary import summary
 25 | from torchvision import transforms, datasets
 26 | 
 27 | from colearn.training import initial_result, collective_learning_round, set_equal_weights
 28 | from colearn.utils.plot import ColearnPlot
 29 | from colearn.utils.results import Results, print_results
 30 | from colearn_pytorch.utils import categorical_accuracy
 31 | from colearn_pytorch.pytorch_learner import PytorchLearner
 32 | 
 33 | """
 34 | MNIST training example using PyTorch
 35 | 
 36 | Used dataset:
 37 | - MNIST is set of 60 000 black and white hand written digits images of size 28x28x1 in 10 classes
 38 | 
 39 | What script does:
 40 | - Loads MNIST dataset from torchvision.datasets
 41 | - Randomly splits dataset between multiple learners
 42 | - Does multiple rounds of learning process and displays plot with results
 43 | """
 44 | 
 45 | # define some constants
 46 | n_learners = 5
 47 | batch_size = 64
 48 | 
 49 | testing_mode = bool(os.getenv("COLEARN_EXAMPLES_TEST", ""))  # for testing
 50 | n_rounds = 20 if not testing_mode else 1
 51 | vote_threshold = 0.5
 52 | train_fraction = 0.9
 53 | vote_fraction = 0.05
 54 | learning_rate = 0.001
 55 | height = 28
 56 | width = 28
 57 | n_classes = 10
 58 | vote_batches = 2
 59 | score_name = "categorical accuracy"
 60 | 
 61 | no_cuda = False
 62 | cuda = not no_cuda and torch.cuda.is_available()
 63 | device = torch.device("cuda" if cuda else "cpu")
 64 | DataloaderKwargs = TypedDict('DataloaderKwargs', {'num_workers': int, 'pin_memory': bool}, total=False)
 65 | kwargs: DataloaderKwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
 66 | 
 67 | # Load the data and split for each learner.
 68 | DATA_DIR = os.environ.get('PYTORCH_DATA_DIR',
 69 |                           os.path.expanduser(os.path.join('~', 'pytorch_datasets')))
 70 | data = datasets.MNIST(DATA_DIR, transform=transforms.ToTensor(), download=True)
 71 | n_train = int(train_fraction * len(data))
 72 | n_vote = int(vote_fraction * len(data))
 73 | n_test = len(data) - n_train - n_vote
 74 | train_data, vote_data, test_data = torch.utils.data.random_split(data, [n_train, n_vote, n_test])
 75 | 
 76 | data_split = [len(train_data) // n_learners] * n_learners
 77 | learner_train_data = torch.utils.data.random_split(train_data, data_split)
 78 | learner_train_dataloaders = [torch.utils.data.DataLoader(
 79 |     ds,
 80 |     batch_size=batch_size, shuffle=True, **kwargs) for ds in learner_train_data]
 81 | 
 82 | data_split = [len(vote_data) // n_learners] * n_learners
 83 | learner_vote_data = torch.utils.data.random_split(vote_data, data_split)
 84 | learner_vote_dataloaders = [torch.utils.data.DataLoader(
 85 |     ds,
 86 |     batch_size=batch_size, shuffle=True, **kwargs) for ds in learner_vote_data]
 87 | 
 88 | data_split = [len(test_data) // n_learners] * n_learners
 89 | learner_test_data = torch.utils.data.random_split(test_data, data_split)
 90 | learner_test_dataloaders = [torch.utils.data.DataLoader(
 91 |     ds,
 92 |     batch_size=batch_size, shuffle=True, **kwargs) for ds in learner_test_data]
 93 | 
 94 | 
 95 | # Define the model
 96 | class Net(nn.Module):
 97 |     def __init__(self):
 98 |         super(Net, self).__init__()
 99 |         self.conv1 = nn.Conv2d(1, 20, 5, 1)
100 |         self.conv2 = nn.Conv2d(20, 50, 5, 1)
101 |         self.fc1 = nn.Linear(4 * 4 * 50, 500)
102 |         self.fc2 = nn.Linear(500, n_classes)
103 | 
104 |     def forward(self, x):
105 |         x = nn_func.relu(self.conv1(x.view(-1, 1, height, width)))
106 |         x = nn_func.max_pool2d(x, 2, 2)
107 |         x = nn_func.relu(self.conv2(x))
108 |         x = nn_func.max_pool2d(x, 2, 2)
109 |         x = x.view(-1, 4 * 4 * 50)
110 |         x = nn_func.relu(self.fc1(x))
111 |         x = self.fc2(x)
112 |         return nn_func.log_softmax(x, dim=1)
113 | 
114 | 
115 | # Make n instances of PytorchLearner with model and torch dataloaders
116 | all_learner_models = []
117 | for i in range(n_learners):
118 |     model = Net().to(device)
119 |     opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
120 |     learner = PytorchLearner(
121 |         model=model,
122 |         train_loader=learner_train_dataloaders[i],
123 |         vote_loader=learner_vote_dataloaders[i],
124 |         test_loader=learner_test_dataloaders[i],
125 |         device=device,
126 |         optimizer=opt,
127 |         criterion=torch.nn.NLLLoss(),
128 |         num_test_batches=vote_batches,
129 |         vote_criterion=categorical_accuracy,
130 |         minimise_criterion=False
131 |     )
132 | 
133 |     all_learner_models.append(learner)
134 | 
135 | # Ensure all learners starts with exactly same weights
136 | set_equal_weights(all_learner_models)
137 | 
138 | summary(all_learner_models[0].model, input_size=(width, height), device=str(device))
139 | 
140 | # Train the model using Collective Learning
141 | results = Results()
142 | results.data.append(initial_result(all_learner_models))
143 | 
144 | plot = ColearnPlot(score_name=score_name)
145 | 
146 | for round_index in range(n_rounds):
147 |     results.data.append(
148 |         collective_learning_round(all_learner_models,
149 |                                   vote_threshold, round_index)
150 |     )
151 |     print_results(results)
152 | 
153 |     plot.plot_results_and_votes(results)
154 | 
155 | plot.block()
156 | 
157 | print("Colearn Example Finished!")
158 | 


--------------------------------------------------------------------------------
/colearn_examples/ml_interface/keras_mnist_diffpriv.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | import os
 19 | 
 20 | import tensorflow as tf
 21 | import tensorflow_datasets as tfds
 22 | from tensorflow_privacy.privacy.optimizers.dp_optimizer_keras import DPKerasAdamOptimizer
 23 | 
 24 | from colearn.training import initial_result, collective_learning_round, set_equal_weights
 25 | from colearn.utils.plot import ColearnPlot
 26 | from colearn.utils.results import Results, print_results
 27 | from colearn.ml_interface import DiffPrivConfig
 28 | from colearn_keras.keras_learner import KerasLearner
 29 | from colearn_keras.utils import normalize_img
 30 | 
 31 | n_learners = 5
 32 | 
 33 | testing_mode = bool(os.getenv("COLEARN_EXAMPLES_TEST", ""))  # for testing
 34 | n_rounds = 20 if not testing_mode else 1
 35 | vote_threshold = 0.5
 36 | 
 37 | width = 28
 38 | height = 28
 39 | n_classes = 10
 40 | 
 41 | l_rate = 0.001
 42 | batch_size = 64
 43 | vote_batches = 2
 44 | 
 45 | # Differential privacy parameters
 46 | num_microbatches = 4  # how many batches to split a batch into
 47 | diff_priv_config = DiffPrivConfig(
 48 |     target_epsilon=1.0,  # epsilon budget for the epsilon-delta DP
 49 |     target_delta=1e-5,  # delta budget for the epsilon-delta DP
 50 |     max_grad_norm=1.5,
 51 |     noise_multiplier=1.3  # more noise -> more privacy, less utility
 52 | )
 53 | 
 54 | 
 55 | train_datasets, info = tfds.load('mnist',
 56 |                                  split=tfds.even_splits('train', n=n_learners),
 57 |                                  as_supervised=True, with_info=True)
 58 | n_datapoints = info.splits['train'].num_examples
 59 | 
 60 | test_dataset = tfds.load('mnist', split='test', as_supervised=True)
 61 | vote_datasets = [test_dataset.shard(num_shards=2 * n_learners, index=i) for i in range(n_learners)]
 62 | test_datasets = [test_dataset.shard(num_shards=2 * n_learners, index=i) for i in range(n_learners, 2 * n_learners)]
 63 | 
 64 | for i in range(n_learners):
 65 |     ds_train = train_datasets[i].map(
 66 |         normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
 67 |     ds_train = ds_train.shuffle(n_datapoints // n_learners).cache()
 68 |     # tf privacy expects fix batch sizes, thus drop_remainder=True
 69 |     ds_train = ds_train.batch(batch_size, drop_remainder=True)
 70 |     train_datasets[i] = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
 71 | 
 72 |     ds_vote = vote_datasets[i].map(
 73 |         normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
 74 |     ds_vote = ds_vote.batch(batch_size)
 75 |     ds_vote = ds_vote.prefetch(tf.data.experimental.AUTOTUNE).cache()
 76 |     vote_datasets[i] = ds_vote
 77 | 
 78 |     ds_test = test_datasets[i].map(
 79 |         normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
 80 |     ds_test = ds_test.batch(batch_size)
 81 |     ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE).cache()
 82 |     test_datasets[i] = ds_test
 83 | 
 84 | 
 85 | def get_model():
 86 |     input_img = tf.keras.Input(
 87 |         shape=(width, height, 1), name="Input"
 88 |     )
 89 |     x = tf.keras.layers.Conv2D(
 90 |         64, (3, 3), activation="relu", padding="same", name="Conv1_1"
 91 |     )(input_img)
 92 |     x = tf.keras.layers.BatchNormalization(name="bn1")(x)
 93 |     x = tf.keras.layers.MaxPooling2D((2, 2), name="pool1")(x)
 94 |     x = tf.keras.layers.Conv2D(
 95 |         128, (3, 3), activation="relu", padding="same", name="Conv2_1"
 96 |     )(x)
 97 |     x = tf.keras.layers.BatchNormalization(name="bn4")(x)
 98 |     x = tf.keras.layers.MaxPooling2D((2, 2), name="pool2")(x)
 99 |     x = tf.keras.layers.Flatten(name="flatten")(x)
100 |     x = tf.keras.layers.Dense(
101 |         n_classes, activation="softmax", name="fc1"
102 |     )(x)
103 |     model = tf.keras.Model(inputs=input_img, outputs=x)
104 | 
105 |     opt = DPKerasAdamOptimizer(
106 |         l2_norm_clip=diff_priv_config.max_grad_norm,
107 |         noise_multiplier=diff_priv_config.noise_multiplier,
108 |         num_microbatches=num_microbatches,
109 |         learning_rate=l_rate)
110 | 
111 |     model.compile(
112 |         loss=tf.keras.losses.SparseCategoricalCrossentropy(
113 |             # need to calculare the loss per sample for the
114 |             # per sample / per microbatch gradient clipping
115 |             reduction=tf.losses.Reduction.NONE
116 |         ),
117 |         metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
118 |         optimizer=opt)
119 |     return model
120 | 
121 | 
122 | all_learner_models = []
123 | for i in range(n_learners):
124 |     all_learner_models.append(KerasLearner(
125 |         model=get_model(),
126 |         train_loader=train_datasets[i],
127 |         vote_loader=test_datasets[i],
128 |         test_loader=test_datasets[i],
129 |         criterion="sparse_categorical_accuracy",
130 |         minimise_criterion=False,
131 |         model_evaluate_kwargs={"steps": vote_batches},
132 |         diff_priv_config=diff_priv_config
133 |     ))
134 | 
135 | set_equal_weights(all_learner_models)
136 | 
137 | results = Results()
138 | # Get initial score
139 | results.data.append(initial_result(all_learner_models))
140 | 
141 | plot = ColearnPlot(score_name=all_learner_models[0].criterion)
142 | 
143 | for round_index in range(n_rounds):
144 |     results.data.append(
145 |         collective_learning_round(all_learner_models,
146 |                                   vote_threshold, round_index)
147 |     )
148 |     print_results(results)
149 | 
150 |     plot.plot_results_and_votes(results)
151 | 
152 | plot.block()
153 | 
154 | print("Colearn Example Finished!")
155 | 


--------------------------------------------------------------------------------
/colearn_examples/ml_interface/pytorch_mnist_diffpriv.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | import os
 19 | 
 20 | from typing_extensions import TypedDict
 21 | import torch.nn as nn
 22 | import torch.nn.functional as nn_func
 23 | import torch.utils.data
 24 | from torchsummary import summary
 25 | from torchvision import transforms, datasets
 26 | 
 27 | from colearn.training import initial_result, collective_learning_round
 28 | from colearn.utils.plot import ColearnPlot
 29 | from colearn.utils.results import Results, print_results
 30 | from colearn_pytorch.pytorch_learner import PytorchLearner, DiffPrivConfig
 31 | 
 32 | # define some constants
 33 | n_learners = 5
 34 | batch_size = 64
 35 | seed = 42
 36 | 
 37 | testing_mode = bool(os.getenv("COLEARN_EXAMPLES_TEST", ""))  # for testing
 38 | n_rounds = 10 if not testing_mode else 1
 39 | vote_threshold = 0.5
 40 | train_fraction = 0.9
 41 | vote_fraction = 0.05
 42 | learning_rate = 0.001
 43 | height = 28
 44 | width = 28
 45 | n_classes = 10
 46 | vote_batches = 2
 47 | 
 48 | # Differential Privacy parameters
 49 | noise_multiplier = 1.3
 50 | max_grad_norm = 1.2
 51 | target_epsilon = 10.0
 52 | target_delta = 1.01e-5
 53 | 
 54 | no_cuda = False
 55 | cuda = not no_cuda and torch.cuda.is_available()  # boring torch stuff
 56 | device = torch.device("cuda" if cuda else "cpu")
 57 | DataloaderKwargs = TypedDict('DataloaderKwargs', {'num_workers': int, 'pin_memory': bool}, total=False)
 58 | kwargs: DataloaderKwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
 59 | 
 60 | # Load the data and split for each learner.
 61 | # Using a torch-native dataloader makes this much easier
 62 | transform = transforms.Compose([
 63 |     transforms.ToTensor()])
 64 | DATA_DIR = os.environ.get('PYTORCH_DATA_DIR',
 65 |                           os.path.expanduser(os.path.join('~', 'pytorch_datasets')))
 66 | data = datasets.MNIST(DATA_DIR, transform=transform, download=True,
 67 |                       target_transform=int)
 68 | n_train = int(train_fraction * len(data))
 69 | n_vote = int(vote_fraction * len(data))
 70 | n_test = len(data) - n_train - n_vote
 71 | train_data, vote_data, test_data = torch.utils.data.random_split(data, [n_train, n_vote, n_test])
 72 | 
 73 | data_split = [len(train_data) // n_learners] * n_learners
 74 | learner_train_data = torch.utils.data.random_split(train_data, data_split)
 75 | learner_train_dataloaders = [torch.utils.data.DataLoader(
 76 |     ds,
 77 |     batch_size=batch_size, shuffle=True, **kwargs) for ds in learner_train_data]
 78 | 
 79 | data_split = [len(vote_data) // n_learners] * n_learners
 80 | learner_vote_data = torch.utils.data.random_split(vote_data, data_split)
 81 | learner_vote_dataloaders = [torch.utils.data.DataLoader(
 82 |     ds,
 83 |     batch_size=batch_size, shuffle=True, **kwargs) for ds in learner_vote_data]
 84 | 
 85 | data_split = [len(test_data) // n_learners] * n_learners
 86 | learner_test_data = torch.utils.data.random_split(test_data, data_split)
 87 | learner_test_dataloaders = [torch.utils.data.DataLoader(
 88 |     ds,
 89 |     batch_size=batch_size, shuffle=True, drop_last=True, **kwargs) for ds in learner_test_data]
 90 | 
 91 | 
 92 | # define the neural net architecture in Pytorch
 93 | class Net(nn.Module):
 94 |     def __init__(self):
 95 |         super(Net, self).__init__()
 96 |         self.conv1 = nn.Conv2d(1, 20, 5, 1)
 97 |         self.conv2 = nn.Conv2d(20, 50, 5, 1)
 98 |         self.fc1 = nn.Linear(4 * 4 * 50, 500)
 99 |         self.fc2 = nn.Linear(500, n_classes)
100 | 
101 |     def forward(self, x):
102 |         x = nn_func.relu(self.conv1(x.view(-1, 1, height, width)))
103 |         x = nn_func.max_pool2d(x, 2, 2)
104 |         x = nn_func.relu(self.conv2(x))
105 |         x = nn_func.max_pool2d(x, 2, 2)
106 |         x = x.view(-1, 4 * 4 * 50)
107 |         x = nn_func.relu(self.fc1(x))
108 |         x = self.fc2(x)
109 |         return nn_func.log_softmax(x, dim=1)
110 | 
111 | 
112 | # Make n instances of PytorchLearner with model and torch dataloaders
113 | all_learner = []
114 | for i in range(n_learners):
115 |     model = Net().to(device)
116 |     opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
117 |     diff_priv_config = DiffPrivConfig(
118 |         target_epsilon=target_epsilon,
119 |         target_delta=target_delta,
120 |         max_grad_norm=max_grad_norm,
121 |         noise_multiplier=noise_multiplier,
122 |     )
123 |     learner = PytorchLearner(
124 |         model=model,
125 |         train_loader=learner_train_dataloaders[i],
126 |         vote_loader=learner_vote_dataloaders[i],
127 |         test_loader=learner_test_dataloaders[i],
128 |         device=device,
129 |         optimizer=opt,
130 |         criterion=torch.nn.NLLLoss(),
131 |         num_test_batches=vote_batches,
132 |         diff_priv_config=diff_priv_config,
133 |     )
134 | 
135 |     all_learner.append(learner)
136 | 
137 | # print a summary of the model architecture
138 | summary(all_learner[0].model, input_size=(width, height), device=str(device))
139 | 
140 | # Now we're ready to start collective learning
141 | # Get initial accuracy
142 | results = Results()
143 | results.data.append(initial_result(all_learner))
144 | 
145 | plot = ColearnPlot(score_name="loss")
146 | 
147 | score_name = "loss"
148 | for round_index in range(n_rounds):
149 |     results.data.append(
150 |         collective_learning_round(all_learner,
151 |                                   vote_threshold, round_index)
152 |     )
153 |     print_results(results)
154 | 
155 |     plot.plot_results_and_votes(results)
156 | 
157 | plot.block()
158 | 
159 | print("Colearn Example Finished!")
160 | 


--------------------------------------------------------------------------------
/colearn_grpc/test_grpc_utils.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | 
 19 | import asyncio
 20 | from colearn.ml_interface import Weights
 21 | from colearn_grpc.proto.generated.interface_pb2 import WeightsPart
 22 | 
 23 | from colearn_grpc.utils import encode_weights, decode_weights, \
 24 |     iterator_to_weights, iterator_to_weights_async, weights_to_iterator, WEIGHTS_PART_SIZE_BYTES
 25 | 
 26 | 
 27 | def asyncio_run_synchronously(coroutine_to_run):
 28 |     return asyncio.get_event_loop().run_until_complete(coroutine_to_run)
 29 | 
 30 | 
 31 | def test_encode_decode():
 32 |     test_weights = "weights"
 33 |     weights = Weights(weights=test_weights)
 34 | 
 35 |     encoded = encode_weights(weights)
 36 |     decoded = decode_weights(encoded)
 37 | 
 38 |     assert decoded == weights
 39 |     assert weights.weights == test_weights
 40 | 
 41 |     encoded2 = encode_weights(decoded)
 42 |     assert encoded == encoded2
 43 | 
 44 | 
 45 | def test_in_order_iterator_to_weights():
 46 | 
 47 |     test_weights = b"abc"
 48 |     parts = [WeightsPart(
 49 |         weights=test_weights[i:i + 1],
 50 |         byte_index=i,
 51 |         total_bytes=len(test_weights))
 52 |         for i in range(len(test_weights))]
 53 | 
 54 |     result = iterator_to_weights(request_iterator=iter(parts), decode=False)
 55 | 
 56 |     assert result.weights == test_weights
 57 | 
 58 | 
 59 | # An alternate way to reconstruct weights is async with an async generator
 60 | def test_in_order_iterator_to_weights_async():
 61 | 
 62 |     # Create async generator
 63 |     async def weights_async_gen(parts):
 64 |         for i in parts:
 65 |             yield i
 66 | 
 67 |     test_weights = b"abc"
 68 |     parts = [WeightsPart(
 69 |         weights=test_weights[i:i + 1],
 70 |         byte_index=i,
 71 |         total_bytes=len(test_weights))
 72 |         for i in range(len(test_weights))]
 73 | 
 74 |     # Easy way to call async coroutine from sync context
 75 |     result = asyncio_run_synchronously(iterator_to_weights_async(request_iterator=weights_async_gen(parts), decode=False))
 76 | 
 77 |     assert result.weights == test_weights
 78 | 
 79 | 
 80 | def test_all_order_iterator_to_weights():
 81 | 
 82 |     test_weights = b"abcd"
 83 |     parts = [WeightsPart(
 84 |         weights=test_weights[i:i + 1],
 85 |         byte_index=i,
 86 |         total_bytes=len(test_weights))
 87 |         for i in range(len(test_weights))]
 88 | 
 89 |     for _ in range(len(test_weights)):
 90 |         result = iterator_to_weights(request_iterator=iter(parts), decode=False)
 91 |         assert result.weights == test_weights
 92 |         parts = parts[1:] + parts[:1]
 93 | 
 94 | 
 95 | def test_weights_to_iterator_small():
 96 |     part_a = bytes(b"a")
 97 |     test_weights = part_a
 98 |     weights = Weights(weights=test_weights)
 99 | 
100 |     iterator = weights_to_iterator(input_weights=weights, encode=False)
101 | 
102 |     val = next(iterator, b"")
103 |     assert isinstance(val, WeightsPart)
104 |     assert val.total_bytes == 1
105 |     assert val.byte_index == 0
106 |     assert bytes(val.weights) == part_a
107 | 
108 |     val = next(iterator, b"")
109 |     assert val == b""
110 | 
111 | 
112 | def test_weights_to_iterator_small_limit():
113 |     part_a = bytes(b"a" * WEIGHTS_PART_SIZE_BYTES)
114 |     test_weights = part_a
115 |     weights = Weights(weights=test_weights)
116 | 
117 |     iterator = weights_to_iterator(input_weights=weights, encode=False)
118 | 
119 |     val = next(iterator, b"")
120 |     assert isinstance(val, WeightsPart)
121 |     assert val.total_bytes == WEIGHTS_PART_SIZE_BYTES
122 |     assert val.byte_index == 0
123 |     assert bytes(val.weights) == part_a
124 | 
125 |     val = next(iterator, b"")
126 |     assert val == b""
127 | 
128 | 
129 | def test_weights_to_iterator_small_limit_plus_one():
130 |     part_a = bytes(b"a" * WEIGHTS_PART_SIZE_BYTES)
131 |     part_b = bytes(b"b")
132 |     test_weights = part_a + part_b
133 |     weights = Weights(weights=test_weights)
134 | 
135 |     iterator = weights_to_iterator(input_weights=weights, encode=False)
136 | 
137 |     val = next(iterator, b"")
138 |     assert isinstance(val, WeightsPart)
139 |     assert val.total_bytes == WEIGHTS_PART_SIZE_BYTES + 1
140 |     assert val.byte_index == 0
141 |     assert bytes(val.weights) == part_a
142 | 
143 |     val = next(iterator, b"")
144 |     assert isinstance(val, WeightsPart)
145 |     assert val.total_bytes == WEIGHTS_PART_SIZE_BYTES + 1
146 |     assert val.byte_index == WEIGHTS_PART_SIZE_BYTES
147 |     assert bytes(val.weights) == part_b
148 | 
149 |     val = next(iterator, b"")
150 |     assert val == b""
151 | 
152 | 
153 | def test_weights_to_iterator():
154 |     part_a = bytes(b"a" * WEIGHTS_PART_SIZE_BYTES)
155 |     part_b = bytes(b"b" * (WEIGHTS_PART_SIZE_BYTES - 2))
156 |     test_weights = part_a + part_b
157 |     weights = Weights(weights=test_weights)
158 | 
159 |     iterator = weights_to_iterator(input_weights=weights, encode=False)
160 | 
161 |     val = next(iterator, b"")
162 |     assert isinstance(val, WeightsPart)
163 |     assert val.total_bytes == 2 * WEIGHTS_PART_SIZE_BYTES - 2
164 |     assert val.byte_index == 0
165 |     assert bytes(val.weights) == part_a
166 | 
167 |     val = next(iterator, b"")
168 |     assert isinstance(val, WeightsPart)
169 |     assert val.total_bytes == 2 * WEIGHTS_PART_SIZE_BYTES - 2
170 |     assert val.byte_index == WEIGHTS_PART_SIZE_BYTES
171 |     assert bytes(val.weights) == part_b
172 | 
173 |     val = next(iterator, b"")
174 |     assert val == b""
175 | 
176 | 
177 | def test_iterator_and_back():
178 | 
179 |     part_a = bytes(b"a" * WEIGHTS_PART_SIZE_BYTES)
180 |     part_b = bytes(b"b" * (WEIGHTS_PART_SIZE_BYTES - 2))
181 |     test_weights = part_a + part_b
182 |     weights = Weights(weights=test_weights)
183 | 
184 |     iterator = weights_to_iterator(input_weights=weights, encode=False)
185 | 
186 |     result = iterator_to_weights(request_iterator=iterator, decode=False)
187 | 
188 |     assert result == weights
189 | 


--------------------------------------------------------------------------------
/colearn_examples/ml_interface/pytorch_cifar.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | #
  3 | #   Copyright 2021 Fetch.AI Limited
  4 | #
  5 | #   Licensed under the Creative Commons Attribution-NonCommercial International
  6 | #   License, Version 4.0 (the "License"); you may not use this file except in
  7 | #   compliance with the License. You may obtain a copy of the License at
  8 | #
  9 | #       http://creativecommons.org/licenses/by-nc/4.0/legalcode
 10 | #
 11 | #   Unless required by applicable law or agreed to in writing, software
 12 | #   distributed under the License is distributed on an "AS IS" BASIS,
 13 | #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #   See the License for the specific language governing permissions and
 15 | #   limitations under the License.
 16 | #
 17 | # ------------------------------------------------------------------------------
 18 | import os
 19 | 
 20 | from typing_extensions import TypedDict
 21 | import torch.nn as nn
 22 | import torch.nn.functional as nn_func
 23 | import torch.utils.data
 24 | from torchsummary import summary
 25 | from torchvision import transforms, datasets
 26 | 
 27 | from colearn.training import initial_result, collective_learning_round, set_equal_weights
 28 | from colearn.utils.plot import ColearnPlot
 29 | from colearn.utils.results import Results, print_results
 30 | from colearn_pytorch.utils import categorical_accuracy
 31 | from colearn_pytorch.pytorch_learner import PytorchLearner
 32 | 
 33 | """
 34 | CIFAR10 training example using PyTorch
 35 | 
 36 | Used dataset:
 37 | - CIFAR10 is set of 60 000 colour images of size 32x32x3 in 10 classes
 38 | 
 39 | What script does:
 40 | - Loads CIFAR10 dataset from torchvision.datasets
 41 | - Randomly splits dataset between multiple learners
 42 | - Does multiple rounds of learning process and displays plot with results
 43 | """
 44 | 
 45 | # define some constants
 46 | n_learners = 5
 47 | batch_size = 64
 48 | seed = 42
 49 | testing_mode = bool(os.getenv("COLEARN_EXAMPLES_TEST", ""))  # for testing
 50 | n_rounds = 20 if not testing_mode else 1
 51 | vote_threshold = 0.5
 52 | train_fraction = 0.9
 53 | vote_fraction = 0.05
 54 | learning_rate = 0.001
 55 | height = 32
 56 | width = 32
 57 | channels = 3
 58 | n_classes = 10
 59 | vote_batches = 2
 60 | vote_on_accuracy = True  # False means vote on loss
 61 | 
 62 | no_cuda = False
 63 | cuda = not no_cuda and torch.cuda.is_available()
 64 | device = torch.device("cuda" if cuda else "cpu")
 65 | DataloaderKwargs = TypedDict('DataloaderKwargs', {'num_workers': int, 'pin_memory': bool}, total=False)
 66 | kwargs: DataloaderKwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}
 67 | 
 68 | # Load the data and split for each learner.
 69 | # Using a torch-native dataloader makes this much easier
 70 | transform = transforms.Compose([
 71 |     transforms.ToTensor()])
 72 | DATA_DIR = os.environ.get('PYTORCH_DATA_DIR',
 73 |                           os.path.expanduser(os.path.join('~', 'pytorch_datasets')))
 74 | data = datasets.CIFAR10(DATA_DIR, transform=transform, download=True)
 75 | n_train = int(train_fraction * len(data))
 76 | n_vote = int(vote_fraction * len(data))
 77 | n_test = len(data) - n_train - n_vote
 78 | train_data, vote_data, test_data = torch.utils.data.random_split(data, [n_train, n_vote, n_test])
 79 | 
 80 | data_split = [len(train_data) // n_learners] * n_learners
 81 | learner_train_data = torch.utils.data.random_split(train_data, data_split)
 82 | learner_train_dataloaders = [torch.utils.data.DataLoader(
 83 |     ds,
 84 |     batch_size=batch_size, shuffle=True, **kwargs) for ds in learner_train_data]
 85 | 
 86 | data_split = [len(vote_data) // n_learners] * n_learners
 87 | learner_vote_data = torch.utils.data.random_split(vote_data, data_split)
 88 | learner_vote_dataloaders = [torch.utils.data.DataLoader(
 89 |     ds,
 90 |     batch_size=batch_size, shuffle=True, **kwargs) for ds in learner_vote_data]
 91 | 
 92 | data_split = [len(test_data) // n_learners] * n_learners
 93 | learner_test_data = torch.utils.data.random_split(test_data, data_split)
 94 | learner_test_dataloaders = [torch.utils.data.DataLoader(
 95 |     ds,
 96 |     batch_size=batch_size, shuffle=True, **kwargs) for ds in learner_test_data]
 97 | 
 98 | 
 99 | # define the neural net architecture in Pytorch
100 | class Net(nn.Module):
101 |     def __init__(self):
102 |         super(Net, self).__init__()
103 |         self.conv1 = nn.Conv2d(channels, 32, 5, 1, padding=2)
104 |         self.conv2 = nn.Conv2d(32, 32, 5, 1, padding=2)
105 |         self.conv3 = nn.Conv2d(32, 64, 5, 1, padding=2)
106 |         self.fc1 = nn.Linear(4 * 4 * 64, 64)
107 |         self.fc2 = nn.Linear(64, 10)
108 | 
109 |     def forward(self, x):
110 |         x = nn_func.relu(self.conv1(x.view(-1, channels, height, width)))
111 |         x = nn_func.max_pool2d(x, 2, 2)
112 |         x = nn_func.relu(self.conv2(x))
113 |         x = nn_func.max_pool2d(x, 2, 2)
114 |         x = nn_func.relu(self.conv3(x))
115 |         x = nn_func.max_pool2d(x, 2, 2)
116 |         x = x.view(-1, 4 * 4 * 64)
117 |         x = nn_func.relu(self.fc1(x))
118 |         x = self.fc2(x)
119 | 
120 |         return nn_func.log_softmax(x, dim=1)
121 | 
122 | 
123 | if vote_on_accuracy:
124 |     learner_vote_kwargs = dict(
125 |         vote_criterion=categorical_accuracy,
126 |         minimise_criterion=False)
127 |     score_name = "Categorical accuracy"
128 | else:
129 |     learner_vote_kwargs = {}
130 |     score_name = "loss"
131 | 
132 | # Make n instances of PytorchLearner with model and torch dataloaders
133 | all_learner_models = []
134 | for i in range(n_learners):
135 |     model = Net()
136 |     opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
137 |     learner = PytorchLearner(
138 |         model=model,
139 |         train_loader=learner_train_dataloaders[i],
140 |         vote_loader=learner_vote_dataloaders[i],
141 |         test_loader=learner_test_dataloaders[i],
142 |         device=device,
143 |         optimizer=opt,
144 |         criterion=torch.nn.NLLLoss(),
145 |         num_test_batches=vote_batches,
146 |         **learner_vote_kwargs  # type: ignore[arg-type]
147 |     )
148 | 
149 |     all_learner_models.append(learner)
150 | 
151 | # Ensure all learners starts with exactly same weights
152 | set_equal_weights(all_learner_models)
153 | 
154 | # print a summary of the model architecture
155 | summary(all_learner_models[0].model, input_size=(channels, width, height), device=str(device))
156 | 
157 | # Now we're ready to start collective learning
158 | # Get initial accuracy
159 | results = Results()
160 | results.data.append(initial_result(all_learner_models))
161 | 
162 | plot = ColearnPlot(score_name=score_name)
163 | 
164 | # Do the training
165 | for round_index in range(n_rounds):
166 |     results.data.append(
167 |         collective_learning_round(all_learner_models,
168 |                                   vote_threshold, round_index)
169 |     )
170 |     print_results(results)
171 | 
172 |     plot.plot_results_and_votes(results)
173 | 
174 | plot.block()
175 | 
176 | print("Colearn Example Finished!")
177 | 


--------------------------------------------------------------------------------