├── .coveragerc
├── .github
    ├── CODE_OF_CONDUCT.md
    ├── ISSUE_TEMPLATE.md
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── PULL_REQUEST_TEMPLATE.md
    ├── actions
    │   ├── bicep-to-arm-template-diff
    │   │   └── action.yaml
    │   ├── open-provision-resources
    │   │   └── action.yaml
    │   ├── run-shared-unit-tests
    │   │   └── action.yaml
    │   ├── submit-aml-literal-pipeline
    │   │   └── action.yaml
    │   ├── submit-aml-scatter-gather-pipeline
    │   │   └── action.yaml
    │   ├── submit-example-pipeline
    │   │   └── action.yaml
    │   ├── submit-multiply-data-pipeline
    │   │   └── action.yaml
    │   ├── submit-upload-data-pipeline
    │   │   └── action.yaml
    │   └── vnet-provision-resources
    │   │   └── action.yaml
    ├── scripts
    │   └── delete-run-history.sh
    └── workflows
    │   ├── build-test.yaml
    │   ├── clear-pipeline-run-history.yaml
    │   ├── pipeline-e2e-test.yaml
    │   └── release-branch-test.yaml
├── .gitignore
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── docs
    ├── README.md
    ├── concepts
    │   ├── benchmarking.md
    │   ├── confidentiality.md
    │   ├── glossary.md
    │   ├── guide.md
    │   ├── mlops_for_fl.md
    │   ├── plan-your-fl-project.md
    │   └── vertical-fl.md
    ├── frameworks
    │   ├── flower.md
    │   └── nvflare.md
    ├── pics
    │   ├── ccfraud_acc.jpg
    │   ├── ccfraud_ddp.jpg
    │   ├── ccfraud_time.jpg
    │   ├── combined-losses-silos.PNG
    │   ├── diagram.png
    │   ├── fl_fig.png
    │   ├── fldatatypes.png
    │   ├── industry-bank-marketing.png
    │   ├── industry-fraud-detection.png
    │   ├── industry-medical-imaging.png
    │   ├── industry-ner.png
    │   ├── metrics.PNG
    │   ├── ner_acc.jpg
    │   ├── ner_ddp.jpg
    │   ├── ner_time.jpg
    │   ├── pipeline-aml.PNG
    │   ├── pneumonia_acc.jpg
    │   ├── pneumonia_ddp.jpg
    │   ├── pneumonia_ddp_1tb.jpg
    │   ├── pneumonia_time.jpg
    │   ├── sandboxes_confidential.png
    │   ├── sandboxes_eyesoff.png
    │   ├── sandboxes_eyeson.png
    │   ├── sandboxes_private.png
    │   ├── vfltrainingloop.png
    │   └── vnet_silo_provisioning.png
    ├── provisioning
    │   ├── README.md
    │   ├── external-silos.md
    │   ├── jumpbox_cc.md
    │   ├── orchestrator_open.md
    │   ├── orchestrator_vnet.md
    │   ├── sandboxes.md
    │   ├── silo_open.md
    │   ├── silo_open_aks_with_cc.md
    │   ├── silo_vnet_existingstorage.md
    │   └── silo_vnet_newstorage.md
    ├── quickstart.md
    ├── real-world-examples
    │   ├── bank-marketing-vertical.md
    │   ├── ccfraud-horizontal.md
    │   ├── ccfraud-vertical.md
    │   ├── ccfraud-vetical-fedonce.md
    │   ├── ner-horizontal.md
    │   └── pneumonia-horizontal.md
    ├── troubleshoot.md
    └── tutorials
    │   ├── add-kaggle-credentials.md
    │   ├── dp-for-cross-silo-horizontal-fl.md
    │   ├── e2e-fl-on-cc.md
    │   ├── literal-scatter-gather-tutorial.md
    │   ├── read-local-data-in-k8s-silo.md
    │   └── update-local-data-to-silo-storage-account.md
├── examples
    ├── .gitignore
    ├── cli-jobs
    │   └── upload-local-data
    │   │   ├── confidential_io.py
    │   │   ├── job.yml
    │   │   └── run.py
    ├── components
    │   ├── BANK_MARKETING_VERTICAL
    │   │   ├── traininsilo
    │   │   │   ├── aml_comm.py
    │   │   │   ├── aml_smpc.py
    │   │   │   ├── conda.yaml
    │   │   │   ├── contributor.py
    │   │   │   ├── contributor_spec.yaml
    │   │   │   ├── datasets.py
    │   │   │   ├── host.py
    │   │   │   ├── host_spec.yaml
    │   │   │   ├── models.py
    │   │   │   └── samplers.py
    │   │   └── upload_data
    │   │   │   ├── conda.yaml
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   ├── CCFRAUD
    │   │   ├── preprocessing
    │   │   │   ├── conda.yaml
    │   │   │   ├── confidential_io.py
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   ├── traininsilo
    │   │   │   ├── conda.yaml
    │   │   │   ├── confidential_io.py
    │   │   │   ├── datasets.py
    │   │   │   ├── models.py
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   └── upload_data
    │   │   │   ├── conda.yaml
    │   │   │   ├── confidential_io.py
    │   │   │   ├── run.py
    │   │   │   ├── spec.yaml
    │   │   │   └── us_regions.csv
    │   ├── CCFRAUD_VERTICAL
    │   │   ├── preprocessing
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   ├── psi
    │   │   │   ├── aml_comm.py
    │   │   │   ├── aml_smpc.py
    │   │   │   ├── context
    │   │   │   │   ├── Dockerfile
    │   │   │   │   ├── SymmetricPSI
    │   │   │   │   │   ├── CMakeLists.txt
    │   │   │   │   │   └── psi.cpp
    │   │   │   │   └── vcpkg.json
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   ├── traininsilo
    │   │   │   ├── aml_comm.py
    │   │   │   ├── aml_smpc.py
    │   │   │   ├── conda.yaml
    │   │   │   ├── contributor.py
    │   │   │   ├── contributor_spec.yaml
    │   │   │   ├── datasets.py
    │   │   │   ├── host.py
    │   │   │   ├── host_spec.yaml
    │   │   │   ├── models.py
    │   │   │   └── samplers.py
    │   │   └── upload_data
    │   │   │   ├── conda.yaml
    │   │   │   ├── run.py
    │   │   │   ├── spec.yaml
    │   │   │   └── us_regions.csv
    │   ├── CCFRAUD_VERTICAL_FEDONCE
    │   │   ├── preprocessing
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   ├── pretraining
    │   │   │   ├── conda.yaml
    │   │   │   ├── datasets.py
    │   │   │   ├── models.py
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   ├── traininsilo
    │   │   │   ├── conda.yaml
    │   │   │   ├── datasets.py
    │   │   │   ├── models.py
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   └── upload_data
    │   │   │   ├── conda.yaml
    │   │   │   ├── run.py
    │   │   │   ├── spec.yaml
    │   │   │   └── us_regions.csv
    │   ├── FLWR
    │   │   ├── client
    │   │   │   ├── pneumonia_network.py
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   ├── flower_pytorch_env
    │   │   │   ├── context
    │   │   │   │   ├── Dockerfile
    │   │   │   │   └── requirements.txt
    │   │   │   └── env.yml
    │   │   └── server
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   ├── HELLOWORLD
    │   │   ├── aggregatemodelweights
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   ├── preprocessing
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   └── traininsilo
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   ├── MNIST
    │   │   ├── preprocessing
    │   │   │   ├── conda.yaml
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   └── traininsilo
    │   │   │   ├── conda.yaml
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   ├── MNIST_VERTICAL
    │   │   ├── traininsilo
    │   │   │   ├── aml_comm.py
    │   │   │   ├── aml_smpc.py
    │   │   │   ├── conda.yaml
    │   │   │   ├── contributor.py
    │   │   │   ├── contributor_spec.yaml
    │   │   │   ├── host.py
    │   │   │   ├── host_spec.yaml
    │   │   │   └── samplers.py
    │   │   └── upload_data
    │   │   │   ├── conda.yaml
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   ├── NER
    │   │   ├── preprocessing
    │   │   │   ├── conda.yaml
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   ├── traininsilo
    │   │   │   ├── conda.yaml
    │   │   │   ├── labels.json
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   └── upload_data
    │   │   │   ├── conda.yaml
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   ├── NVFLARE
    │   │   ├── client
    │   │   │   ├── environment
    │   │   │   │   ├── context
    │   │   │   │   │   └── Dockerfile
    │   │   │   │   └── env.yml
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   ├── provision
    │   │   │   ├── environment
    │   │   │   │   ├── context
    │   │   │   │   │   └── Dockerfile
    │   │   │   │   └── env.yml
    │   │   │   └── spec.yaml
    │   │   └── server
    │   │   │   ├── environment
    │   │   │       ├── context
    │   │   │       │   └── Dockerfile
    │   │   │       └── env.yml
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   ├── PNEUMONIA
    │   │   ├── traininsilo
    │   │   │   ├── conda.yaml
    │   │   │   ├── pneumonia_network.py
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   │   └── upload_data
    │   │   │   ├── conda.yaml
    │   │   │   ├── run.py
    │   │   │   └── spec.yaml
    │   ├── shared
    │   │   ├── aml_comm.py
    │   │   ├── aml_smpc.py
    │   │   ├── confidential_io.py
    │   │   └── samplers.py
    │   └── utils
    │   │   ├── aggregatemodelweights
    │   │       ├── conda.yaml
    │   │       ├── run.py
    │   │       └── spec.yaml
    │   │   ├── data_analysis
    │   │       ├── run.py
    │   │       └── spec.yaml
    │   │   └── multiply_data_files
    │   │       ├── conda.yaml
    │   │       ├── run.py
    │   │       └── spec.yaml
    └── pipelines
    │   ├── bank_marketing_vertical
    │       ├── config.yaml
    │       └── submit.py
    │   ├── ccfraud
    │       ├── config.yaml
    │       └── submit.py
    │   ├── ccfraud_vertical
    │       ├── config.yaml
    │       └── submit.py
    │   ├── ccfraud_vertical_fedonce
    │       ├── config.yaml
    │       └── submit.py
    │   ├── environment.yml
    │   ├── fl_cross_silo_literal
    │       ├── config.yaml
    │       └── submit.py
    │   ├── fl_cross_silo_scatter_gather
    │       ├── config.yaml
    │       ├── fl_helper.py
    │       └── submit.py
    │   ├── mnist_vertical
    │       ├── config.yaml
    │       └── submit.py
    │   ├── ner
    │       ├── config.yaml
    │       └── submit.py
    │   ├── pneumonia
    │       ├── config.yaml
    │       └── submit.py
    │   ├── pneumonia_flwr
    │       ├── config.yaml
    │       └── submit.py
    │   ├── pneumonia_nvflare
    │       ├── pneumonia_federated
    │       │   ├── config
    │       │   │   ├── config_fed_client.json
    │       │   │   └── config_fed_server.json
    │       │   └── custom
    │       │   │   ├── mlflow_receiver.py
    │       │   │   ├── pneumonia_network.py
    │       │   │   ├── pt_constants.py
    │       │   │   └── pt_learner.py
    │       ├── project.yaml
    │       └── submit.py
    │   ├── requirements.txt
    │   └── utils
    │       ├── multiply_data_files
    │           ├── config.yaml
    │           └── submit.py
    │       └── upload_data
    │           ├── config.yaml
    │           └── submit.py
├── mlops
    ├── arm
    │   ├── README.md
    │   ├── jumpbox_cc.json
    │   ├── open_aks_with_confcomp_storage_pair.json
    │   ├── open_compute_storage_pair.json
    │   ├── sandbox_fl_confidential.json
    │   ├── sandbox_fl_eyesoff_cpu.json
    │   ├── sandbox_fl_eyesoff_cpu_gpu.json
    │   ├── sandbox_fl_eyesoff_gpu.json
    │   ├── sandbox_fl_eyeson_cpu.json
    │   ├── sandbox_fl_eyeson_cpu_gpu.json
    │   ├── sandbox_fl_eyeson_gpu.json
    │   ├── sandbox_fl_private_cpu.json
    │   ├── sandbox_fl_private_cpu_gpu.json
    │   ├── sandbox_fl_private_gpu.json
    │   ├── sandbox_minimal.json
    │   ├── vnet_compute_existing_storage.json
    │   ├── vnet_compute_storage_pair.json
    │   ├── vnet_private_sandbox_setup.json
    │   ├── vnet_publicip_sandbox_aks_confcomp_setup.json
    │   └── vnet_publicip_sandbox_setup.json
    ├── bicep
    │   ├── modules
    │   │   ├── azureml
    │   │   │   ├── attach_aks_training_to_azureml.bicep
    │   │   │   ├── azureml_resources_ples.bicep
    │   │   │   ├── deploy_aks_azureml_extension.bicep
    │   │   │   ├── deploy_aks_azureml_extension_via_script.bicep
    │   │   │   ├── open_azureml_workspace.bicep
    │   │   │   └── private_azureml_workspace.bicep
    │   │   ├── computes
    │   │   │   ├── open_new_aks_with_confcomp.bicep
    │   │   │   ├── open_new_aml_compute.bicep
    │   │   │   ├── vnet_new_aks_with_confcomp.bicep
    │   │   │   └── vnet_new_aml_compute.bicep
    │   │   ├── fl_pairs
    │   │   │   ├── open_aks_with_confcomp_storage_pair.bicep
    │   │   │   ├── open_compute_storage_pair.bicep
    │   │   │   ├── vnet_aks_storage_pair.bicep
    │   │   │   ├── vnet_compute_existing_storage.bicep
    │   │   │   └── vnet_compute_storage_pair.bicep
    │   │   ├── networking
    │   │   │   ├── azureml_capable_nsg.bicep
    │   │   │   ├── private_dns_zone.bicep
    │   │   │   ├── private_endpoint.bicep
    │   │   │   ├── vnet.bicep
    │   │   │   └── vnet_peering.bicep
    │   │   ├── permissions
    │   │   │   └── msi_storage_rw.bicep
    │   │   ├── resources
    │   │   │   ├── confidentiality_keyvault.bicep
    │   │   │   ├── jumpbox_cc.bicep
    │   │   │   ├── private_acr.bicep
    │   │   │   ├── private_appinsights.bicep
    │   │   │   ├── private_keyvault.bicep
    │   │   │   └── private_storage.bicep
    │   │   └── storages
    │   │   │   ├── existing_blob_storage_datastore.bicep
    │   │   │   └── new_blob_storage_datastore.bicep
    │   ├── sandbox_fl_confidential.bicep
    │   ├── sandbox_fl_eyesoff_cpu.bicep
    │   ├── sandbox_fl_eyesoff_cpu_gpu.bicep
    │   ├── sandbox_fl_eyesoff_gpu.bicep
    │   ├── sandbox_fl_eyeson_cpu.bicep
    │   ├── sandbox_fl_eyeson_cpu_gpu.bicep
    │   ├── sandbox_fl_eyeson_gpu.bicep
    │   ├── sandbox_fl_private_cpu.bicep
    │   ├── sandbox_fl_private_cpu_gpu.bicep
    │   ├── sandbox_fl_private_gpu.bicep
    │   ├── sandbox_minimal.bicep
    │   ├── vnet_private_sandbox_setup.bicep
    │   ├── vnet_publicip_sandbox_aks_confcomp_setup.bicep
    │   └── vnet_publicip_sandbox_setup.bicep
    └── k8s_templates
    │   ├── README.md
    │   ├── deploy_pvc.yaml
    │   ├── instance-type.yaml
    │   ├── k8s_config.yaml
    │   ├── pv.yaml
    │   └── pvc.yaml
└── tests
    ├── examples
        └── components
        │   └── shared
        │       ├── test_aml_comm.py
        │       ├── test_aml_smpc.py
        │       ├── test_samplers.py
        │       └── utils.py
    └── requirements.txt


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run] 
2 | concurrency=multiprocessing
3 | omit =
4 |     tests/*


--------------------------------------------------------------------------------
/.github/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | IF SUFFICIENT INFORMATION IS NOT PROVIDED VIA THE FOLLOWING TEMPLATE THE ISSUE MIGHT BE CLOSED WITHOUT FURTHER CONSIDERATION OR INVESTIGATION
 3 | -->
 4 | > Please provide us with the following information:
 5 | > ---------------------------------------------------------------
 6 | 
 7 | ### This issue is for a: (mark with an `x`)
 8 | ```
 9 | - [ ] bug report -> please search issues before submitting
10 | - [ ] feature request
11 | - [ ] documentation issue or request
12 | - [ ] regression (a behavior that used to work and stopped in a new release)
13 | ```
14 | 
15 | ### Minimal steps to reproduce
16 | >
17 | 
18 | ### Any log messages given by the failure
19 | >
20 | 
21 | ### Expected/desired behavior
22 | >
23 | 
24 | ### OS and Version?
25 | > Windows 7, 8 or 10. Linux (which distribution). macOS (Yosemite? El Capitan? Sierra?)
26 | 
27 | ### Versions
28 | >
29 | 
30 | ### Mention any other details that might be useful
31 | 
32 | > ---------------------------------------------------------------
33 | > Thanks! We'll be in touch soon.
34 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Additional context**
27 | Add any other context about the problem here.
28 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Purpose
 2 | <!-- Describe the intention of the changes being proposed. What problem does it solve or functionality does it add? -->
 3 | * ...
 4 | 
 5 | ## What is the expected review turnaround time?
 6 | <!-- Feel free to add more precisions about the urgency of this PR. -->
 7 | Urgency:
 8 | - [ ] High (needs review today)
 9 | - [ ] Medium (needs review within a few days - most common)
10 | - [ ] Low (can wait a week)
11 | 
12 | ## Does this introduce a breaking change?
13 | <!-- Mark one with an "x". -->
14 | ```
15 | [ ] Yes
16 | [ ] No
17 | ```
18 | 
19 | ## Pull Request Type
20 | What kind of change does this Pull Request introduce?
21 | 
22 | <!-- Please check the one that applies to this PR using "x". -->
23 | ```
24 | [ ] Bugfix
25 | [ ] Feature
26 | [ ] Code style update (formatting, local variables)
27 | [ ] Refactoring (no functional changes, no api changes)
28 | [ ] Documentation content changes
29 | [ ] Other... Please describe:
30 | ```
31 | 
32 | ## How to Test
33 | *  Get the code
34 | 
35 | ```
36 | git clone [repo-address]
37 | cd [repo-name]
38 | git checkout [branch-name]
39 | npm install
40 | ```
41 | 
42 | * Test the code
43 | <!-- Add steps to run the tests suite and/or manually test -->
44 | ```
45 | ```
46 | 
47 | ## What to Check
48 | Verify that the following are valid
49 | * ...
50 | 
51 | ## Other Information
52 | <!-- Add any other helpful information that may be needed here. -->


--------------------------------------------------------------------------------
/.github/actions/bicep-to-arm-template-diff/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Bicep to ARM template diff
 2 | description: Bicep build to ARM template diff 
 3 | inputs:
 4 |   source-file-path:
 5 |     description: Bicep script path
 6 |     required: true
 7 |   target-file-path:
 8 |     description: Arm template file path
 9 |     required: true
10 | 
11 | runs:
12 |   using: composite
13 |   steps:
14 |     - name: Build bicep into arm
15 |       shell: bash
16 |       run: |
17 |         az config set bicep.use_binary_from_path=False
18 |         az bicep install --version v0.14.85
19 |         az bicep build --file ${{ inputs.source-file-path }} --stdout | jq -S . > source.json
20 | 
21 |     - name: Refactor ARM template file
22 |       shell: bash
23 |       run: jq -S . ${{ inputs.target-file-path }} > target.json
24 | 
25 | 
26 |     - name: Source and Target file diff
27 |       shell: bash
28 |       run: |
29 |         if cmp -s ./source.json ./target.json; then
30 |             printf 'Rebuilding bicep "%s" produces exact match with target file "%s"\n' ${{ inputs.source-file-path }} ${{ inputs.target-file-path }}
31 |         else
32 |             printf 'Diff between "%s" build and "%s":\n' ${{ inputs.source-file-path }} ${{ inputs.target-file-path }}
33 |             diff source.json target.json
34 |             exit 1
35 |         fi  
36 | 


--------------------------------------------------------------------------------
/.github/actions/run-shared-unit-tests/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Run unit tests for shared component files
 2 | description: Run unit tests for shared components files and provide coverage
 3 | 
 4 | runs:
 5 |   using: composite
 6 |   steps:
 7 |     - name: Install python dependencies
 8 |       shell: bash
 9 |       run: pip install -r tests/requirements.txt
10 | 
11 |     - name: Run unit tests
12 |       shell: bash
13 |       run: |
14 |         coverage run --source=examples/components/shared -m unittest discover -s tests/examples/components/shared -v
15 | 
16 |     - name: Run coverage
17 |       shell: bash
18 |       run: |
19 |         coverage combine
20 |         coverage report -m


--------------------------------------------------------------------------------
/.github/actions/submit-aml-literal-pipeline/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Submit example literal pipeline
 2 | description: Submit example literal pipeline in AML
 3 | inputs:
 4 |   client-id:
 5 |     description: Client ID of the service principal
 6 |     required: true
 7 |   tenant-id:
 8 |     description: Tenant ID of the service principal
 9 |     required: true
10 |   subscription-id:
11 |     description: Subscription to use for resources
12 |     required: true
13 |   resource-group:
14 |     description: Resource group of the AML workspace
15 |     required: true
16 |   workspace-name:
17 |     description: Workspace name
18 |     required: true
19 |   example:
20 |     description: Example pipline to run
21 |     required: true
22 | 
23 | runs:
24 |   using: composite
25 |   steps:
26 |     - name: Setup python
27 |       uses: actions/setup-python@v2.2.1
28 |       with:
29 |         python-version: 3.8
30 | 
31 |     - name: Azure login
32 |       uses: azure/login@v1
33 |       with:
34 |           client-id: ${{ inputs.client-id }}
35 |           tenant-id: ${{ inputs.tenant-id }}
36 |           subscription-id: ${{ inputs.subscription-id }}
37 | 
38 |     - name: Install azure ml latest extension
39 |       shell: bash
40 |       run: |
41 |         az extension remove -n azure-cli-ml || echo "azure-cli-ml extension is not installed."
42 |         az extension remove -n ml || echo "ml extension is not installed."
43 |         az extension add -n ml -y
44 | 
45 |     - name: Install python dependencies
46 |       shell: bash
47 |       run: pip install -r examples/pipelines/requirements.txt
48 | 
49 |     - name: Submit fl_cross_silo_literal pipeline
50 |       shell: bash
51 |       run: python examples/pipelines/fl_cross_silo_literal/submit.py --subscription_id ${{ inputs.subscription-id }} --resource_group ${{ inputs.resource-group }} --workspace_name ${{ inputs.workspace-name }} --example ${{ inputs.example }} --wait || [ $? == 5 ]
52 | 
53 | 


--------------------------------------------------------------------------------
/.github/actions/submit-aml-scatter-gather-pipeline/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Submit example scatter-gather pipeline
 2 | description: Submit example scatter-gather pipeline in AML
 3 | inputs:
 4 |   client-id:
 5 |     description: Client ID of the service principal
 6 |     required: true
 7 |   tenant-id:
 8 |     description: Tenant ID of the service principal
 9 |     required: true
10 |   subscription-id:
11 |     description: Subscription to use for resources
12 |     required: true
13 |   resource-group:
14 |     description: Resource group of the AML workspace
15 |     required: true
16 |   workspace-name:
17 |     description: Workspace name
18 |     required: true
19 |   example:
20 |     description: Example pipline to run
21 |     required: true
22 | 
23 | runs:
24 |   using: composite
25 |   steps:
26 |     - name: Setup python
27 |       uses: actions/setup-python@v2.2.1
28 |       with:
29 |         python-version: 3.8
30 | 
31 |     - name: Azure login
32 |       uses: azure/login@v1
33 |       with:
34 |           client-id: ${{ inputs.client-id }}
35 |           tenant-id: ${{ inputs.tenant-id }}
36 |           subscription-id: ${{ inputs.subscription-id }}
37 | 
38 |     - name: Install azure ml latest extension
39 |       shell: bash
40 |       run: |
41 |         az extension remove -n azure-cli-ml || echo "azure-cli-ml extension is not installed."
42 |         az extension remove -n ml || echo "ml extension is not installed."
43 |         az extension add -n ml -y
44 | 
45 |     - name: Install python dependencies
46 |       shell: bash
47 |       run: pip install -r examples/pipelines/requirements.txt
48 | 
49 |     - name: Submit fl_cross_silo_scatter_gather pipeline
50 |       shell: bash
51 |       run: python examples/pipelines/fl_cross_silo_scatter_gather/submit.py --subscription_id ${{ inputs.subscription-id }} --resource_group ${{ inputs.resource-group }} --workspace_name ${{ inputs.workspace-name }} --example ${{ inputs.example }} --ignore_validation --wait || [ $? == 5 ]
52 | 


--------------------------------------------------------------------------------
/.github/actions/submit-example-pipeline/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Submit example pipeline
 2 | description: Submit example pipeline in AML
 3 | inputs:
 4 |   client-id:
 5 |     description: Client ID of the service principal
 6 |     required: true
 7 |   tenant-id:
 8 |     description: Tenant ID of the service principal
 9 |     required: true
10 |   subscription-id:
11 |     description: Subscription to use for resources
12 |     required: true
13 |   resource-group:
14 |     description: Resource group of the AML workspace
15 |     required: true
16 |   workspace-name:
17 |     description: Workspace name
18 |     required: true
19 |   example:
20 |     description: Example pipeline to run
21 |     required: true
22 | 
23 | runs:
24 |   using: composite
25 |   steps:
26 |     - name: Setup python
27 |       uses: actions/setup-python@v2.2.1
28 |       with:
29 |         python-version: 3.8
30 | 
31 |     - name: Azure login
32 |       uses: azure/login@v1
33 |       with:
34 |           client-id: ${{ inputs.client-id }}
35 |           tenant-id: ${{ inputs.tenant-id }}
36 |           subscription-id: ${{ inputs.subscription-id }}
37 | 
38 |     - name: Install azure ml latest extension
39 |       shell: bash
40 |       run: |
41 |         az extension remove -n azure-cli-ml || echo "azure-cli-ml extension is not installed."
42 |         az extension remove -n ml || echo "ml extension is not installed."
43 |         az extension add -n ml -y
44 | 
45 |     - name: Install python dependencies
46 |       shell: bash
47 |       run: pip install -r examples/pipelines/requirements.txt
48 | 
49 |     - name: Submit example pipeline
50 |       shell: bash
51 |       run: python examples/pipelines/${{ inputs.example }}/submit.py --subscription_id ${{ inputs.subscription-id }} --resource_group ${{ inputs.resource-group }} --workspace_name ${{ inputs.workspace-name }} --wait || [ $? == 5 ]
52 | 
53 | 


--------------------------------------------------------------------------------
/.github/actions/submit-multiply-data-pipeline/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Submit multiply data pipeline
 2 | description: Submit multiply data pipeline in AML
 3 | inputs:
 4 |   client-id:
 5 |     description: Client ID of the service principal
 6 |     required: true
 7 |   tenant-id:
 8 |     description: Tenant ID of the service principal
 9 |     required: true
10 |   subscription-id:
11 |     description: Subscription to use for resources
12 |     required: true
13 |   resource-group:
14 |     description: Resource group of the AML workspace
15 |     required: true
16 |   workspace-name:
17 |     description: Workspace name
18 |     required: true
19 | 
20 | runs:
21 |   using: composite
22 |   steps:
23 |     - name: Setup python
24 |       uses: actions/setup-python@v2.2.1
25 |       with:
26 |         python-version: 3.8
27 | 
28 |     - name: Azure login
29 |       uses: azure/login@v1
30 |       with:
31 |           client-id: ${{ inputs.client-id }}
32 |           tenant-id: ${{ inputs.tenant-id }}
33 |           subscription-id: ${{ inputs.subscription-id }}
34 | 
35 |     - name: Install azure ml latest extension
36 |       shell: bash
37 |       run: |
38 |         az extension remove -n azure-cli-ml || echo "azure-cli-ml extension is not installed."
39 |         az extension remove -n ml || echo "ml extension is not installed."
40 |         az extension add -n ml -y
41 | 
42 |     - name: Install python dependencies
43 |       shell: bash
44 |       run: pip install -r examples/pipelines/requirements.txt
45 | 
46 |     - name: Submit example pipeline
47 |       shell: bash
48 |       run: python examples/pipelines/utils/multiply_data_files/submit.py --subscription_id ${{ inputs.subscription-id }} --resource_group ${{ inputs.resource-group }} --workspace_name ${{ inputs.workspace-name }} --submit --wait || [ $? == 5 ]
49 | 
50 | 


--------------------------------------------------------------------------------
/.github/actions/submit-upload-data-pipeline/action.yaml:
--------------------------------------------------------------------------------
 1 | name: Submit upload data pipeline
 2 | description: Submit upload data pipeline in AML
 3 | inputs:
 4 |   client-id:
 5 |     description: Client ID of the service principal
 6 |     required: true
 7 |   tenant-id:
 8 |     description: Tenant ID of the service principal
 9 |     required: true
10 |   subscription-id:
11 |     description: Subscription to use for resources
12 |     required: true
13 |   resource-group:
14 |     description: Resource group of the AML workspace
15 |     required: true
16 |   workspace-name:
17 |     description: Workspace name
18 |     required: true
19 |   example:
20 |     description: Example upload data pipeline to run
21 |     required: true
22 | 
23 | runs:
24 |   using: composite
25 |   steps:
26 |     - name: Setup python
27 |       uses: actions/setup-python@v2.2.1
28 |       with:
29 |         python-version: 3.8
30 | 
31 |     - name: Azure login
32 |       uses: azure/login@v1
33 |       with:
34 |           client-id: ${{ inputs.client-id }}
35 |           tenant-id: ${{ inputs.tenant-id }}
36 |           subscription-id: ${{ inputs.subscription-id }}
37 | 
38 |     - name: Install azure ml latest extension
39 |       shell: bash
40 |       run: |
41 |         az extension remove -n azure-cli-ml || echo "azure-cli-ml extension is not installed."
42 |         az extension remove -n ml || echo "ml extension is not installed."
43 |         az extension add -n ml -y
44 | 
45 |     - name: Install python dependencies
46 |       shell: bash
47 |       run: pip install -r examples/pipelines/requirements.txt
48 | 
49 |     - name: Submit example pipeline
50 |       shell: bash
51 |       run: python examples/pipelines/utils/upload_data/submit.py --subscription_id ${{ inputs.subscription-id }} --resource_group ${{ inputs.resource-group }} --workspace_name ${{ inputs.workspace-name }} --wait --example ${{ inputs.example }} || [ $? == 5 ]
52 | 
53 | 


--------------------------------------------------------------------------------
/.github/scripts/delete-run-history.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | job_name_with_created_date=$(az ml job list -g $1 -w $2 --all-results true | jq ".[] | .name, .creation_context.created_at") 
 4 | echo $job_name_with_created_date
 5 | job_name=""
 6 | i=1 
 7 | for item in $job_name_with_created_date; do
 8 |     item=`sed -e 's/^"//' -e 's/"$//' <<< "$item"`
 9 |     if [[ $i%2 -eq 1 ]]; then
10 |         job_name=$item
11 |         i=$((i+1))
12 |         continue
13 |     fi    
14 |     
15 |     num_of_days=$((($(date +%s) - $(date -d $item +%s)) / (60 * 60 * 24) ))
16 |     echo Job name: $job_name, Number of days: $num_of_days
17 |     
18 |     # Archive jobs that are older than 60 days
19 |     if [[ $num_of_days -gt 60 ]]; then
20 |         az ml job archive -g $1 -w $2 -n $job_name
21 |     else
22 |         echo "Number of days are less than 60."
23 |     fi    
24 |     i=$((i+1))
25 | done
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/.github/workflows/build-test.yaml:
--------------------------------------------------------------------------------
 1 | name: Pipeline-validation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - "*"
 7 |   pull_request:
 8 |     branches:
 9 |     - "*"
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |     - uses: actions/checkout@v2
16 | 
17 |     - uses: actions/setup-python@v2.2.1
18 |       with:
19 |         python-version: 3.8
20 | 
21 |     - run: pip install black
22 | 
23 |     - run:  black --check .
24 |       if: github.base_ref
25 | 
26 |     - name: Intall python dependencies
27 |       run: pip install -r examples/pipelines/requirements.txt
28 |     
29 |     - name: Validate fl_cross_silo_literal pipeline
30 |       run: python examples/pipelines/fl_cross_silo_literal/submit.py --offline
31 | 
32 |     - name: Run unit tests for shared component files
33 |       uses: ./.github/actions/run-shared-unit-tests
34 | 


--------------------------------------------------------------------------------
/.github/workflows/clear-pipeline-run-history.yaml:
--------------------------------------------------------------------------------
 1 | name: Delete run history
 2 | on:
 3 |   schedule:
 4 |     - cron: "0 0 * * *"
 5 | 
 6 | jobs:
 7 |   delete-history:
 8 |     runs-on: ubuntu-latest
 9 |     permissions:
10 |       id-token: write
11 |     steps:
12 |     - uses: actions/checkout@v2
13 |     
14 |     - name: Azure login
15 |       uses: azure/login@v1
16 |       with:
17 |         client-id: ${{ secrets.AZURE_CLIENT_ID }}
18 |         tenant-id: ${{ secrets.AZURE_TENANT_ID }}
19 |         subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
20 |     
21 |     - name: Install azure ml latest extension
22 |       shell: bash
23 |       run: |
24 |         az extension remove -n azure-cli-ml || echo "azure-cli-ml extension is not installed."
25 |         az extension remove -n ml || echo "ml extension is not installed."
26 |         az extension add -n ml -y
27 | 
28 |     - name: Delete run history
29 |       shell: bash
30 |       run: ./.github/scripts/delete-run-history.sh ${{ secrets.RESOURCE_GROUP }} ${{ secrets.AML_WORKSPACE_NAME }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # if you use an AzureML config file locally
132 | config.json
133 | 
134 | # for ignoring test jobs
135 | /examples/pipelines/test*
136 | 
137 | # for ignoring local sandbox files for debugging/testing
138 | /sandbox/*
139 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE


--------------------------------------------------------------------------------
/docs/concepts/confidentiality.md:
--------------------------------------------------------------------------------
 1 | # Confidentiality and Federated Learning
 2 | 
 3 | :construction: This page is under construction :construction:
 4 | 
 5 | In Azure, there are several ways your data (customer data, models, etc) are protected. Azure services provide a flexible and comprehensive set of tools to help you meet your compliance and security requirements. This page describes a technique to ensure data encryption at rest from within your training/preprocessing code. Combined with confidential computing, it can allow you to maximize security and minimize the risk of data leakage.
 6 | 
 7 | ## Use private storage, with service-side encryption
 8 | 
 9 | :construction:
10 | 
11 | ## Use confidential computing
12 | 
13 | :construction:
14 | 
15 | ## Use encryption as rest in your code
16 | 
17 | :construction:
18 | 


--------------------------------------------------------------------------------
/docs/concepts/glossary.md:
--------------------------------------------------------------------------------
 1 | # Glossary
 2 | 
 3 | ## Data
 4 | 
 5 | Any file or collection of files. Data will be described in terms of classification.
 6 | Only three classifications are required for the context of this document. "Sensitive" (cannot be moved or even looked at), "intermediate" (can be moved around, but looser restrictions on visibility), and "eyes-on" (can be moved freely and seen by everyone participating in the federated training).
 7 | 
 8 | ## Storage
 9 | 
10 | Wherever data is stored. In this file, storage is assumed to live in Azure. It may exist in locked-down virtual networks.
11 | 
12 | ## Compute
13 | 
14 | Anything that can run "code" (deliberately vague). In this file, compute is assumed to live in Azure.
15 | 
16 | ## Job
17 | 
18 | Execute code (a collection of files) in an environment (a Docker image) against data (from storage). A job can consume data from multiple storage instances and write back to multiple instances.
19 | 
20 | ## Approval
21 | 
22 | REST endpoint to which the platform "asks permission" before running any job. The platform sends the approval endpoint information including:
23 | 
24 | 1. Input and output storage
25 | 2. Which compute the job wishes to run in
26 | 3. The author of the code the job is running
27 | 4. Whether or not the job has been code-signed by the configured policies
28 | 
29 | The approval endpoint can either approve / reject the job based on checked-in configuration (e.g., of which storage accounts are associated with which silo) or pass this information on for manual approval.
30 | 
31 | :exclamation: Note that the approval endpoints do not support 3P-facing AML yet.
32 | 
33 | ## Silo
34 | 
35 | Isolated collection of storage and compute. Here, "isolated" means that the platform guarantees:
36 | 
37 | - Only compute within the silo can "touch" storage within the silo.
38 | - Only data of intermediate or eyes-on classification can be moved outside the silo.
39 | - Only "approved" jobs can change the classification of data or move it outside the silo.
40 | 
41 | Silos are expected to be reliable (i.e., no concerns around network connectivity or uptime).
42 | 
43 | :exclamation:  Note that we assume a hard cap of ≤ 100 silos at current stage.
44 | 
45 | ## Orchestrator
46 | 
47 | Collection of storage and compute. The storage is for model parameters, rather than the actual data. A task orchestrator broadcasts the FL task, sends the current model to each silo, and aggregates the gradients from the silos. In this file, orchestrator is assumed to live in an AML workspace.
48 | 
49 | ## Internal Silos
50 | 
51 | Collection of silos belong to the same Azure tenant.
52 | 
53 | ## External Silos
54 | 
55 | Collection of silos that resides in either different Azure tenant or different cloud provider.
56 | 


--------------------------------------------------------------------------------
/docs/pics/ccfraud_acc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/ccfraud_acc.jpg


--------------------------------------------------------------------------------
/docs/pics/ccfraud_ddp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/ccfraud_ddp.jpg


--------------------------------------------------------------------------------
/docs/pics/ccfraud_time.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/ccfraud_time.jpg


--------------------------------------------------------------------------------
/docs/pics/combined-losses-silos.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/combined-losses-silos.PNG


--------------------------------------------------------------------------------
/docs/pics/diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/diagram.png


--------------------------------------------------------------------------------
/docs/pics/fl_fig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/fl_fig.png


--------------------------------------------------------------------------------
/docs/pics/fldatatypes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/fldatatypes.png


--------------------------------------------------------------------------------
/docs/pics/industry-bank-marketing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/industry-bank-marketing.png


--------------------------------------------------------------------------------
/docs/pics/industry-fraud-detection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/industry-fraud-detection.png


--------------------------------------------------------------------------------
/docs/pics/industry-medical-imaging.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/industry-medical-imaging.png


--------------------------------------------------------------------------------
/docs/pics/industry-ner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/industry-ner.png


--------------------------------------------------------------------------------
/docs/pics/metrics.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/metrics.PNG


--------------------------------------------------------------------------------
/docs/pics/ner_acc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/ner_acc.jpg


--------------------------------------------------------------------------------
/docs/pics/ner_ddp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/ner_ddp.jpg


--------------------------------------------------------------------------------
/docs/pics/ner_time.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/ner_time.jpg


--------------------------------------------------------------------------------
/docs/pics/pipeline-aml.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/pipeline-aml.PNG


--------------------------------------------------------------------------------
/docs/pics/pneumonia_acc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/pneumonia_acc.jpg


--------------------------------------------------------------------------------
/docs/pics/pneumonia_ddp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/pneumonia_ddp.jpg


--------------------------------------------------------------------------------
/docs/pics/pneumonia_ddp_1tb.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/pneumonia_ddp_1tb.jpg


--------------------------------------------------------------------------------
/docs/pics/pneumonia_time.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/pneumonia_time.jpg


--------------------------------------------------------------------------------
/docs/pics/sandboxes_confidential.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/sandboxes_confidential.png


--------------------------------------------------------------------------------
/docs/pics/sandboxes_eyesoff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/sandboxes_eyesoff.png


--------------------------------------------------------------------------------
/docs/pics/sandboxes_eyeson.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/sandboxes_eyeson.png


--------------------------------------------------------------------------------
/docs/pics/sandboxes_private.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/sandboxes_private.png


--------------------------------------------------------------------------------
/docs/pics/vfltrainingloop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/vfltrainingloop.png


--------------------------------------------------------------------------------
/docs/pics/vnet_silo_provisioning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/azure-ml-federated-learning/d72148a08394978db29277b922817cbe871fd2e5/docs/pics/vnet_silo_provisioning.png


--------------------------------------------------------------------------------
/docs/provisioning/jumpbox_cc.md:
--------------------------------------------------------------------------------
 1 | # Create a confidential compute jumpbox VM inside a vnet
 2 | 
 3 | This tutorial will let you create a jumpbox VM inside a vnet, optionally by using Azure Bastion to connect via HTTPS.
 4 | 
 5 | :warning: This should be used for **development purpose only**.
 6 | 
 7 | ## Prerequisites
 8 | 
 9 | To enjoy these quickstart, you will need to:
10 | 
11 | - have an active [Azure subscription](https://azure.microsoft.com) that you can use for development purposes,
12 | - have permissions to create resources, set permissions, and create identities in this subscription (or at least in one resource group),
13 |   - Note that to set permissions, you typically need _Owner_ role in the subscription or resource group - _Contributor_ role is not enough. This is key for being able to _secure_ the setup.
14 | - [install the Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli).
15 | 
16 | ## Deploy a confidential compute VM inside a vNet
17 | 
18 | > Check availability of [confidential compute VMS in your region.](https://azure.microsoft.com/en-us/explore/global-infrastructure/products-by-region/?products=virtual-machines&regions=all).
19 | 
20 | ### Option 1 : one click deployment
21 | 
22 | 1. Click on [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2FAzure-Samples%2Fazure-ml-federated-learning%2Fmain%2Fmlops%2Farm%2Fjumpbox_cc.json)
23 | 
24 | 2. Adjust parameters, in particular:
25 | 
26 |     - vnetName: name of the vNet to join.
27 |     - subnetName: name of the subnet inside the vNet.
28 |     - nsgName: name of the existing security group applying to the VM.
29 | 
30 | ### Option 2 : deployment using az cli
31 | 
32 | In the resource group of your AzureML workspace, use the following command with parameters corresponding to your setup:
33 | 
34 | ```bash
35 | az deployment group create --template-file ./mlops/bicep/modules/resources/jumpbox_cc.bicep --resource-group <resource group name> --parameters vnetName="..." subnetName="..." nsgName="..." jumpboxOs="linux"
36 | ```
37 | 


--------------------------------------------------------------------------------
/docs/provisioning/orchestrator_open.md:
--------------------------------------------------------------------------------
 1 | # Create an open sandbox orchestrator
 2 | 
 3 | :warning: This should be used for **development purpose only**.
 4 | 
 5 | ## Prerequisites
 6 | 
 7 | To run these deployment options, you first need:
 8 | 
 9 | - an existing Azure ML workspace (see [cookbook](README.md#create-an-azure-ml-workspace))
10 | - have permissions to create resources, set permissions, and create identities in this subscription (or at least in one resource group),
11 |   - Note that to set permissions, you typically need _Owner_ role in the subscription or resource group - _Contributor_ role is not enough. This is key for being able to _secure_ the setup.
12 | - Optional: [install the Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli).
13 | 
14 | ## Create a compute and storage pair for the orchestrator
15 | 
16 | > Note: both orchestrator and [silo](./silo_open.md) can be deployed using the same arm/bicep script, changing **Pair Base Name** accordingly.
17 | 
18 | ### Option 1 : one click deployment
19 | 
20 | 1. Click on [![Deploy to Azure](https://aka.ms/deploytoazurebutton)](https://portal.azure.com/#create/Microsoft.Template/uri/https%3A%2F%2Fraw.githubusercontent.com%2FAzure-Samples%2Fazure-ml-federated-learning%2Fmain%2Fmlops%2Farm%2Fopen_compute_storage_pair.json)
21 | 
22 | 2. Adjust parameters, in particular:
23 | 
24 |     - Region: this will be set by Azure to the region of your resource group.
25 |     - Machine Learning Name: need to match the name of the AzureML workspace in the resource group.
26 |     - Machine Learning Region: the region in which the AzureML workspace was deployed (default: same as resource group).
27 |     - Pair Region: the region where the compute and storage will be deployed (default: same as resource group).
28 |     - Pair Base Name: a unique name for the **orchestrator**, example `orch`. This will be used to create all other resources (storage name, compute name, etc.).
29 | 
30 | ### Option 2 : deployment using az cli
31 | 
32 | In the resource group of your AzureML workspace, use the following command with parameters corresponding to your setup:
33 | 
34 | ```bash
35 | az deployment group create --template-file ./mlops/bicep/modules/fl_pairs/open_compute_storage_pair.bicep --resource-group <resource group name> --parameters pairBaseName="orch" pairRegion="eastus" machineLearningName="aml-fldemo" machineLearningRegion="eastus"
36 | ```
37 | 


--------------------------------------------------------------------------------
/docs/tutorials/add-kaggle-credentials.md:
--------------------------------------------------------------------------------
 1 | # Add Kaggle credentials to your FL sandbox
 2 | 
 3 | Your Azure ML workspace has an attached key vault that can be used to store workspace-level secrets (users of the workspace will have access to it). We can use this to store Kaggle API key so that jobs can download data from Kaggle.
 4 | 
 5 | This tutorial shows you how to add your Kaggle credentials to your FL sandbox.
 6 | 
 7 | ### Locate your workspace attached key vault
 8 | 
 9 | You first need to locate your workspace key vault. It is provisioned by default in our [sandboxes](../provisioning/sandboxes.md) and is named `ws-shkv-<demoBaseName>`. You can find the name of your workspace in the Azure portal.
10 | 
11 | ### Option 1: using Azure CLI
12 | 
13 | 1. Let's first obtain your AAD identifier (object id) by running the following command. We'll use it in the next step.
14 | 
15 |     ```bash
16 |     az ad signed-in-user show --query id
17 |     ```
18 | 
19 | 2. Create a new key vault policy for yourself, and grant permissions to list, set & delete secrets.
20 | 
21 |     ```bash
22 |     az keyvault set-policy -n <key-vault-name> --secret-permissions list set delete --object-id <object-id>
23 |     ```
24 | 
25 |     > Note: The AML workspace you created with the aforementioned script contains the name of the key vault. Default is `ws-shkv-fldemo`.
26 | 
27 | 3. With your newly created permissions, you can now create a secret to store the `kaggleusername`.
28 | 
29 |     ```bash
30 |     az keyvault secret set --name kaggleusername --vault-name <key-vault-name> --value <kaggle-username>
31 |     ```
32 | 
33 |     > Make sure to provide your *Kaggle Username*.
34 | 
35 | 4. Create a secret to store the `kagglekey`.
36 | 
37 |     ```bash
38 |     az keyvault secret set --name kagglekey --vault-name <key-vault-name> --value <kaggle-api-token>
39 |     ```
40 | 
41 |     > Make sure to provide the *[Kaggle API Token](https://www.kaggle.com/docs/api#authentication)*.
42 | 
43 | ### Option 2: using Azure UI
44 | 
45 | 1. In your resource group (provisioned in the previous step), open "Access Policies" tab in the newly created key vault and click "Create".
46 | 
47 | 2. Select *List, Set & Delete* right under "Secret Management Operations" and press "Next".
48 | 
49 | 3. Lookup currently logged in user (using user id or an email), select it and press "Next".
50 | 
51 | 4. Press "Next" and "Create" in the next screens.
52 | 
53 |     We are now able to create a secret in the key vault.
54 | 
55 | 5. Open the "Secrets" tab. Create two plain text secrets:
56 | 
57 |     - **kaggleusername** - specifies your Kaggle user name
58 |     - **kagglekey** - this is the API key that can be obtained from your account page on the Kaggle website.
59 | 


--------------------------------------------------------------------------------
/docs/tutorials/literal-scatter-gather-tutorial.md:
--------------------------------------------------------------------------------
 1 | # Adapt our sample "literal" code to your needs
 2 | 
 3 | IMPORTANT: the "literal" code available in this repo has been intentionally designed to:
 4 | 
 5 | - provide an effortless setup to get started.
 6 | - rely only on features that are currently generally available in AzureML SDK v2.
 7 | 
 8 | This tutorial addresses the following scenarios:
 9 | 
10 | - To add/remove a silo:
11 |   - You just need to make the changes in the "`federated_learning/silos`" section of the `examples/pipelines/fl_cross_silo_literal/config.yaml` file.
12 | 
13 | - To change training hyper-parameters:
14 |   - Adjust the parameters in the "`training_parameters`" section of the `examples/pipelines/fl_cross_silo_literal/config.yaml` file.
15 | 
16 | ## Tutorial on how to adapt the "scatter-gather" code
17 | 
18 | Please read the following points to have a better understanding of the "scatter-gather" code:
19 | 
20 | - It has a `set_orchestrator` method that you can leverage to add an orchestrator to your pipeline.
21 | - The `add_silo` method lets you add `n` number of silos to the pipeline and you don't have to worry about the configuration.
22 | - It has a soft validation component that ensures that the appropriate permissions are granted for your assets. That being said, a compute `a` should not have access to dataset `b` and so on.
23 | - You can bypass the validation if you have your own custom rules.
24 | - Enabling type-check, ensures that no data is being saved and only model weights are kept in the datastore.
25 | 
26 | This tutorial addresses the following scenarios:
27 | 
28 | - To add/remove a silo:
29 |   - You just need to make the changes in the "`strategy/horizontal`" section of the `examples/pipelines/fl_cross_silo_scatter_gather/config.yaml` file.
30 | 
31 | - To change the training hyper-parameters:
32 |   - Adjust the parameters in the "`inputs`" section of the `examples/pipelines/fl_cross_silo_scatter_gather/config.yaml` file.
33 | 
34 | - To edit the flow of your training pipeline:
35 |   - Pass your custom subgraph as a parameter to the `scatter_gather` method in the `examples/pipelines/fl_cross_silo_scatter_gather/submit.py` file.
36 | 
37 | - To bypass the soft validation:
38 |   - Use `--ignore_validation` argument while executing the `examples/pipelines/fl_cross_silo_scatter_gather/submit.py` file.
39 | 
40 | - To enable multiple computes(CPU for preprocessing & GPU for training):
41 |   - Set the `compute2` parameter to `true` while [provisioning](../quickstart.md#deploy-demo-resources-in-azure) the resources.(No further changes are required)
42 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | # Hydra outputs.
2 | outputs/
3 | 


--------------------------------------------------------------------------------
/examples/cli-jobs/upload-local-data/job.yml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 2 | 
 3 | code: .
 4 | 
 5 | command: >
 6 |   python run.py 
 7 |   --local_data_folder ${{inputs.local_data_folder}} 
 8 |   --destination_folder ${{outputs.destination_folder}} 
 9 |   --method ${{inputs.method}}
10 | 
11 | inputs:
12 |   local_data_folder:
13 |     type: uri_folder
14 |     path: /path/to/local/data/folder # replace '/path/to/local/data/folder' by the actual path to the folder whose contents you want to upload
15 |   
16 |   method: 'copy' # just copy local to remote
17 |   # method: 'encrypt' # to enable encryption of the outputs using your encryption keys
18 | 
19 | outputs:
20 |   destination_folder:
21 |     type: uri_folder
22 |     mode: upload
23 |     path: azureml://datastores/<your-silo-datastore-name>/paths/<your-custom-local-path>/ # replace '<your-silo-datastore-name>' by the actual datastore name for your silo, and <your-custom-local-path> by the path you want to use in the silo storage account
24 | 
25 | environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest
26 | 
27 | environment_variables:
28 |   # used only if method='encrypt'
29 |   CONFIDENTIALITY_KEYVAULT: https://<keyvault-name>.vault.azure.net # url of the keyvault
30 |   CONFIDENTIALITY_KEY_NAME: dev-rsa-key # name of the secret containing your encryption public key
31 | 
32 | compute: azureml:<your-silo-compute-name> # replace '<your-silo-compute-name>' by the actual compute name for your silo
33 | 


--------------------------------------------------------------------------------
/examples/components/BANK_MARKETING_VERTICAL/traininsilo/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: bank_marketing_vertical_train_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pip=22.3.1
 9 |   - pytorch=1.13.1
10 |   - pytorch-cuda=11.6
11 |   - pip:
12 |     - azureml-core==1.47.0
13 |     - azure-keyvault==4.2.0
14 |     - azureml-mlflow==1.48.0
15 |     - pandas==1.5.2
16 |     - torchmetrics==0.10.3
17 |     - redis==4.5.1
18 | 


--------------------------------------------------------------------------------
/examples/components/BANK_MARKETING_VERTICAL/traininsilo/datasets.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | 
 4 | 
 5 | class BankMarketingDataset(Dataset):
 6 |     """SubscribeDataset Dataset - combination of features and labels
 7 | 
 8 |     Args:
 9 |         feature: Transaction detail tensors
10 |         target: Tensor of labels corresponding to features
11 | 
12 |     Returns:
13 |         None
14 |     """
15 | 
16 |     def __init__(self, df):
17 |         if "label" in df.columns:
18 |             if len(df.columns) > 1:
19 |                 self.X = torch.tensor(
20 |                     df.loc[:, df.columns != "label"].values, dtype=torch.float
21 |                 )
22 |             else:
23 |                 self.X = None
24 |             self.Y = torch.tensor(df.loc[:, "label"].values, dtype=torch.int)
25 |         else:
26 |             self.X = torch.tensor(df.values, dtype=torch.float)
27 |             self.Y = None
28 | 
29 |     def __len__(self):
30 |         if self.Y is None:
31 |             return len(self.X)
32 |         else:
33 |             return len(self.Y)
34 | 
35 |     def features_count(self):
36 |         if self.X is not None:
37 |             return self.X.shape[1]
38 |         return None
39 | 
40 |     def __getitem__(self, idx):
41 |         if self.Y is None:
42 |             return self.X[idx]
43 |         elif self.X is None:
44 |             return self.Y[idx]
45 |         else:
46 |             return self.X[idx], self.Y[idx]
47 | 


--------------------------------------------------------------------------------
/examples/components/BANK_MARKETING_VERTICAL/traininsilo/models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class SimpleLinearBottom(nn.Module):
 6 |     """Bottom (Contributor) part of the model composed of only Linear model interleaved with ReLU activations
 7 | 
 8 |     Args:
 9 |         input_dim (int):
10 |         number of features to be consumed by the model
11 |     """
12 | 
13 |     def __init__(self, input_dim) -> None:
14 |         super().__init__()
15 | 
16 |         self.input_dim = input_dim
17 |         self.model = nn.Sequential(
18 |             nn.Linear(input_dim, 512),
19 |             nn.ReLU(),
20 |             nn.Linear(512, 256),
21 |             nn.ReLU(),
22 |             nn.Linear(256, 128),
23 |             nn.ReLU(),
24 |             nn.Linear(128, 64),
25 |         )
26 |         self._init_weights()
27 | 
28 |     def _init_weights(self):
29 |         for m in self.modules():
30 |             if isinstance(m, nn.Embedding):
31 |                 torch.nn.init.uniform_(m.weight, -0.001, 0.001)
32 |             elif isinstance(m, nn.Linear):
33 |                 torch.nn.init.xavier_uniform_(m.weight)
34 |                 m.bias.data.fill_(0.01)
35 | 
36 |     def forward(self, x):
37 |         return self.model(x)
38 | 
39 | 
40 | class SimpleLinearTop(nn.Module):
41 |     """Top (Host) part of the model composed of only Linear model interleaved with ReLU activations"""
42 | 
43 |     def __init__(self, world_size) -> None:
44 |         super().__init__()
45 | 
46 |         self._world_size = world_size
47 |         self.contributor_weights = torch.nn.ModuleList(
48 |             [nn.Linear(64, 64) for _ in range(self._world_size)]
49 |         )
50 | 
51 |         self.model = nn.Sequential(
52 |             nn.Linear(64, 1),
53 |             nn.Sigmoid(),
54 |         )
55 |         self._init_weights()
56 | 
57 |     def _init_weights(self):
58 |         for m in self.modules():
59 |             if isinstance(m, nn.Embedding):
60 |                 torch.nn.init.uniform_(m.weight, -0.001, 0.001)
61 |             elif isinstance(m, nn.Linear):
62 |                 torch.nn.init.xavier_uniform_(m.weight)
63 |                 m.bias.data.fill_(0.01)
64 | 
65 |     def forward(self, x):
66 |         agg_x = self.contributor_weights[0](x[0])
67 |         for i in range(1, self._world_size - 1):
68 |             agg_x += self.contributor_weights[i](x[i])
69 | 
70 |         return self.model(agg_x).squeeze()
71 | 


--------------------------------------------------------------------------------
/examples/components/BANK_MARKETING_VERTICAL/traininsilo/samplers.py:
--------------------------------------------------------------------------------
 1 | ##########################################################################################
 2 | #                                         WARNING                                        #
 3 | ##########################################################################################
 4 | # Should this file change please update all copies of samplers.py file in the repository #
 5 | ##########################################################################################
 6 | 
 7 | import math
 8 | import torch
 9 | from torch.utils.data import Sampler
10 | 
11 | 
12 | class VerticallyDistributedBatchSampler(Sampler):
13 |     """Batch sampler that uses a distributed communication backend to distribute samples indexes to each worker."""
14 | 
15 |     def __init__(self, data_source, batch_size, comm, rank, world_size, shuffle=False):
16 |         """Initializes the batch sampler.
17 | 
18 |         Args:
19 |             data_source (torch.utils.data.Dataset): The dataset to sample from.
20 |             batch_size (int): The size of the batch to sample.
21 |             comm (AMLComm): The communicator to use for communication.
22 |             rank (int): The rank of the current worker.
23 |             world_size (int): The total number of workers.
24 |             shuffle (bool, optional): Whether to shuffle the indices. Defaults to False.
25 |         """
26 |         self.data_source = data_source
27 |         self.batch_size = batch_size
28 |         self.shuffle = shuffle
29 |         self.rank = rank
30 |         self.world_size = world_size
31 |         self.comm = comm
32 | 
33 |     def __iter__(self):
34 |         if self.rank == 0:
35 |             if self.shuffle:
36 |                 indices = torch.randperm(len(self.data_source))
37 |             else:
38 |                 indices = torch.arange(len(self.data_source))
39 | 
40 |             # Split the indices into batches
41 |             batches = [
42 |                 indices[i : i + self.batch_size]
43 |                 for i in range(0, len(indices), self.batch_size)
44 |             ]
45 | 
46 |             for batch in batches:
47 |                 for i in range(1, self.world_size):
48 |                     # Send the batch to contributor i
49 |                     self.comm.send(batch, i)
50 | 
51 |                 yield batch
52 |         else:
53 |             for i in range(0, len(self.data_source), self.batch_size):
54 |                 # Receive the batch from host
55 |                 batch = self.comm.recv(0)
56 |                 yield batch
57 | 
58 |     def __len__(self):
59 |         return math.ceil(len(self.data_source) / self.batch_size)
60 | 


--------------------------------------------------------------------------------
/examples/components/BANK_MARKETING_VERTICAL/upload_data/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: bank_marketing_vertical_upload_data_conda_env
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=22.2.2
 7 |   - pip:
 8 |     - azure-identity==1.12.0
 9 |     - azure-keyvault==4.2.0
10 |     - azureml-core==1.47.0
11 |     - kaggle==1.5.12
12 |     - scikit-learn==1.1.3
13 |     - numpy==1.23.5
14 |     - pandas==1.3.5
15 | 


--------------------------------------------------------------------------------
/examples/components/BANK_MARKETING_VERTICAL/upload_data/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_bank_marketing_upload_data
 3 | version: 0.0.1
 4 | display_name: Download Bank Marketing data from Kaggle and upload to silo storage
 5 | type: command
 6 | is_deterministic: true
 7 | 
 8 | inputs:
 9 |   silo_count:
10 |     type: number
11 |     optional: false
12 |   silo_index:
13 |     type: number
14 |     optional: false
15 | 
16 | outputs:
17 |   raw_train_data:
18 |     type: uri_folder
19 |     description: the output Bank Marketing raw training data for a given silo
20 |   raw_test_data:
21 |     type: uri_folder
22 |     description: the output Bank Marketing raw testing data for a given silo
23 | 
24 | code: .
25 | 
26 | command: >-
27 |   python run.py --silo_count ${{inputs.silo_count}} --silo_index ${{inputs.silo_index}} --raw_train_data ${{outputs.raw_train_data}} --raw_test_data ${{outputs.raw_test_data}}
28 | 
29 | environment: 
30 |   conda_file: ./conda.yaml
31 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
32 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD/preprocessing/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ccfraud_preprocessing_conda_env
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |     - azure-identity==1.12.0
 9 |     - azure-keyvault==4.2.0
10 |     - azureml-mlflow==1.48.0
11 |     - scikit-learn==1.2.2
12 |     - numpy==1.24.2
13 |     - pandas==1.5.3
14 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD/preprocessing/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_ccfraud_preprocessing_in_silo
 4 | version: 0.3.0
 5 | display_name: CC Fraud Pre-Processing (in silo)
 6 | type: command
 7 | description: Component for preprocessing raw data from silo's blob storage
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   raw_training_data:
12 |     type: uri_file
13 |     description: the raw training data in a given silo
14 |   raw_testing_data:
15 |     type: uri_file
16 |     description: the raw testing data in a given silo
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 | 
23 | 
24 | outputs:
25 |   processed_train_data:
26 |     type: uri_folder
27 |     description: the output training data after preprocessing
28 |   processed_test_data:
29 |     type: uri_folder
30 |     description: the output testing data after preprocessing
31 | 
32 | code: .
33 | 
34 | command: >-
35 |   python run.py --raw_training_data ${{inputs.raw_training_data}} --raw_testing_data ${{inputs.raw_testing_data}} --train_output ${{outputs.processed_train_data}} --test_output ${{outputs.processed_test_data}} $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
36 | 
37 | # NOTE: using one of Azure ML's curated environments
38 | # which has all the dependencies needed for this job
39 | environment: 
40 |   conda_file: ./conda.yaml
41 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
42 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD/traininsilo/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ccfraud_train_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pip=22.3.1
 9 |   - pytorch=1.13.1
10 |   - pytorch-cuda=11.6
11 |   - pip:
12 |     - azure-identity==1.12.0
13 |     - azure-keyvault==4.2.0
14 |     - azureml-mlflow==1.48.0
15 |     - pandas==1.5.3
16 |     - torchmetrics==0.10.3
17 |     - opacus==1.3.0
18 |     - tqdm==4.64.1
19 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD/traininsilo/datasets.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | 
 4 | 
 5 | class FraudDataset(Dataset):
 6 |     """FraudDataset Dataset - combination of features and labels
 7 | 
 8 |     Args:
 9 |         feature: Transaction detail tensors
10 |         target: Tensor of labels corresponding to features
11 | 
12 |     Returns:
13 |         None
14 |     """
15 | 
16 |     def __init__(self, df):
17 |         self.X = torch.tensor(
18 |             df.loc[:, df.columns != "is_fraud"].values, dtype=torch.float
19 |         )
20 |         self.Y = torch.tensor(df.loc[:, "is_fraud"].values, dtype=torch.int)
21 | 
22 |     def __len__(self):
23 |         return len(self.X)
24 | 
25 |     def __getitem__(self, idx):
26 |         if self.Y is None:
27 |             return [self.X[idx]]
28 |         return self.X[idx], self.Y[idx]
29 | 
30 | 
31 | class FraudTimeDataset(Dataset):
32 |     """FraudTimeDataset Dataset - combination of features and labels retrieved sequentially
33 | 
34 |     Args:
35 |         feature: Transaction detail tensors
36 |         target: Tensor of labels corresponding to features
37 | 
38 |     Returns:
39 |         None
40 |     """
41 | 
42 |     def __init__(self, df, time_steps=100):
43 |         self.X = torch.tensor(
44 |             df.loc[:, df.columns != "is_fraud"].values, dtype=torch.float
45 |         )
46 |         self.Y = torch.tensor(df.loc[:, "is_fraud"].values, dtype=torch.int)
47 | 
48 |         assert time_steps >= 10
49 | 
50 |         self._time_steps = time_steps
51 |         self._time_step_overlaps = time_steps // 5
52 | 
53 |     def __len__(self):
54 |         return (
55 |             len(self.X) // (self._time_steps // self._time_step_overlaps)
56 |             - self._time_step_overlaps
57 |         ) + 1
58 | 
59 |     def __getitem__(self, idx):
60 |         if self.Y is None:
61 |             return (
62 |                 self.X[
63 |                     idx
64 |                     * (self._time_steps // self._time_step_overlaps) : idx
65 |                     * (self._time_steps // self._time_step_overlaps)
66 |                     + self._time_steps
67 |                 ],
68 |             )
69 |         return (
70 |             self.X[
71 |                 idx
72 |                 * (self._time_steps // self._time_step_overlaps) : idx
73 |                 * (self._time_steps // self._time_step_overlaps)
74 |                 + self._time_steps
75 |             ],
76 |             self.Y[
77 |                 idx
78 |                 * (self._time_steps // self._time_step_overlaps) : idx
79 |                 * (self._time_steps // self._time_step_overlaps)
80 |                 + self._time_steps
81 |             ],
82 |         )
83 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD/upload_data/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ccfraud_upload_data_conda_env
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |     - azure-identity==1.12.0
 9 |     - azure-keyvault==4.2.0
10 |     - azureml-core==1.47.0
11 |     - kaggle==1.5.12
12 |     - scikit-learn==1.1.3
13 |     - numpy==1.23.5
14 |     - pandas==1.3.5
15 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD/upload_data/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_ccfraud_upload_data
 3 | version: 0.3.0
 4 | display_name: Download CC Fraud data from Kaggle and upload to silo storage
 5 | type: command
 6 | is_deterministic: true
 7 | 
 8 | inputs:
 9 |   silo_count:
10 |     type: number
11 |     optional: false
12 |   silo_index:
13 |     type: number
14 |     optional: false
15 | 
16 | outputs:
17 |   raw_train_data:
18 |     type: uri_folder
19 |     description: the output CC Fraud raw training data for a given silo
20 |   raw_test_data:
21 |     type: uri_folder
22 |     description: the output CC Fraud raw testing data for a given silo
23 | 
24 | code: .
25 | 
26 | command: >-
27 |   python run.py 
28 |   --silo_count ${{inputs.silo_count}} 
29 |   --silo_index ${{inputs.silo_index}} 
30 |   --raw_train_data ${{outputs.raw_train_data}} 
31 |   --raw_test_data ${{outputs.raw_test_data}}
32 | 
33 | environment: 
34 |   conda_file: ./conda.yaml
35 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
36 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD/upload_data/us_regions.csv:
--------------------------------------------------------------------------------
 1 | State,StateCode,Region,Division
 2 | Alaska,AK,West,Pacific
 3 | Alabama,AL,South,East South Central
 4 | Arkansas,AR,South,West South Central
 5 | Arizona,AZ,West,Mountain
 6 | California,CA,West,Pacific
 7 | Colorado,CO,West,Mountain
 8 | Connecticut,CT,Northeast,New England
 9 | District of Columbia,DC,South,South Atlantic
10 | Delaware,DE,South,South Atlantic
11 | Florida,FL,South,South Atlantic
12 | Georgia,GA,South,South Atlantic
13 | Hawaii,HI,West,Pacific
14 | Iowa,IA,Midwest,West North Central
15 | Idaho,ID,West,Mountain
16 | Illinois,IL,Midwest,East North Central
17 | Indiana,IN,Midwest,East North Central
18 | Kansas,KS,Midwest,West North Central
19 | Kentucky,KY,South,East South Central
20 | Louisiana,LA,South,West South Central
21 | Massachusetts,MA,Northeast,New England
22 | Maryland,MD,South,South Atlantic
23 | Maine,ME,Northeast,New England
24 | Michigan,MI,Midwest,East North Central
25 | Minnesota,MN,Midwest,West North Central
26 | Missouri,MO,Midwest,West North Central
27 | Mississippi,MS,South,East South Central
28 | Montana,MT,West,Mountain
29 | North Carolina,NC,South,South Atlantic
30 | North Dakota,ND,Midwest,West North Central
31 | Nebraska,NE,Midwest,West North Central
32 | New Hampshire,NH,Northeast,New England
33 | New Jersey,NJ,Northeast,Middle Atlantic
34 | New Mexico,NM,West,Mountain
35 | Nevada,NV,West,Mountain
36 | New York,NY,Northeast,Middle Atlantic
37 | Ohio,OH,Midwest,East North Central
38 | Oklahoma,OK,South,West South Central
39 | Oregon,OR,West,Pacific
40 | Pennsylvania,PA,Northeast,Middle Atlantic
41 | Rhode Island,RI,Northeast,New England
42 | South Carolina,SC,South,South Atlantic
43 | South Dakota,SD,Midwest,West North Central
44 | Tennessee,TN,South,East South Central
45 | Texas,TX,South,West South Central
46 | Utah,UT,West,Mountain
47 | Virginia,VA,South,South Atlantic
48 | Vermont,VT,Northeast,New England
49 | Washington,WA,West,Pacific
50 | Wisconsin,WI,Midwest,East North Central
51 | West Virginia,WV,South,South Atlantic
52 | Wyoming,WY,West,Mountain
53 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/preprocessing/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_ccfraud_vertical_preprocessing_in_silo
 4 | version: 0.0.1
 5 | display_name: CC Fraud Pre-Processing (in silo)
 6 | type: command
 7 | description: Component for preprocessing raw data from silo's blob storage
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   raw_training_data:
12 |     type: uri_file
13 |     description: the raw training data in a given silo
14 |   raw_testing_data:
15 |     type: uri_file
16 |     description: the raw testing data in a given silo
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 | 
23 | 
24 | outputs:
25 |   processed_train_data:
26 |     type: uri_folder
27 |     description: the output training data after preprocessing
28 |   processed_test_data:
29 |     type: uri_folder
30 |     description: the output testing data after preprocessing
31 | 
32 | code: .
33 | 
34 | command: >-
35 |   python run.py --raw_training_data ${{inputs.raw_training_data}} --raw_testing_data ${{inputs.raw_testing_data}} --train_output ${{outputs.processed_train_data}} --test_output ${{outputs.processed_test_data}} $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
36 | 
37 | # NOTE: using one of Azure ML's curated environments
38 | # which has all the dependencies needed for this job
39 | environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest
40 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/psi/context/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20230412.v1
 2 | 
 3 | COPY vcpkg.json .
 4 | COPY SymmetricPSI /SymmetricPSI
 5 | 
 6 | # Update python and install dependencies
 7 | RUN conda update conda && conda install python=3.10.0
 8 | RUN pip install azureml-core==1.47.0 \
 9 |                 azure-keyvault==4.2.0 \
10 |                 azureml-mlflow==1.48.0 \
11 |                 pandas==1.5.2 \
12 |                 redis==4.5.1 \
13 |                 numpy==1.24.2
14 | 
15 | # Install vcpkg and dependencies
16 | RUN apt-get update && apt-get install -y zip pkg-config build-essential cmake
17 | RUN git clone https://github.com/microsoft/vcpkg.git /vcpkg &&\
18 |     chmod a+x /vcpkg/bootstrap-vcpkg.sh && /bin/bash /vcpkg/bootstrap-vcpkg.sh &&\
19 |     /vcpkg/vcpkg install --triplet=x64-linux --x-buildtrees-root=/vcpkg/buildtrees --x-install-root=/vcpkg/installed --x-packages-root=/vcpkg/packages
20 | 
21 | # Install APSI
22 | RUN git clone https://github.com/microsoft/APSI.git /APSI &&\
23 |     cd /APSI && mkdir build &&\
24 |     cd /APSI/build &&\
25 |     cmake .. -DAPSI_USE_ASM=OFF -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake &&\
26 |     make -j$(nproc) &&\
27 |     make install
28 | 
29 | # Install SymmetricPSI and create Python bindings
30 | RUN cd SymmetricPSI && mkdir build && cd build &&\
31 |     cmake .. -DCMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake -DAPSI_ROOT=/APSI/build -DVCPKG_TARGET_TRIPLET=x64-linux -DCMAKE_BUILD_TYPE=Release &&\
32 |     make -j$(nproc)
33 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/psi/context/SymmetricPSI/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.13)
 2 | 
 3 | if(NOT CMAKE_BUILD_TYPE)
 4 |     set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type" FORCE)
 5 |     set_property(CACHE CMAKE_BUILD_TYPE PROPERTY
 6 |         STRINGS "Release" "Debug" "MinSizeRel" "RelWithDebInfo")
 7 | endif()
 8 | message(STATUS "Build type (CMAKE_BUILD_TYPE): ${CMAKE_BUILD_TYPE}")
 9 | 
10 | project(SymmetricPSI VERSION 1.0.0 LANGUAGES CXX C)
11 | find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
12 | find_package(pybind11 CONFIG REQUIRED)
13 | find_package(APSI CONFIG REQUIRED)
14 | 
15 | pybind11_add_module(SymmetricPSI psi.cpp)
16 | target_link_libraries(SymmetricPSI PRIVATE APSI::apsi Python3::Python pybind11::lto pybind11::embed pybind11::module pybind11::headers)
17 | set_target_properties(SymmetricPSI PROPERTIES PREFIX "" SUFFIX ".so" OUTPUT_NAME "SymmetricPSI" POSITION_INDEPENDENT_CODE TRUE)
18 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/psi/context/vcpkg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "dependencies": [ 
 3 |         {
 4 |             "name": "seal", 
 5 |             "features": ["no-throw-tran"]
 6 |         },
 7 |         "kuku",
 8 |         "log4cplus",
 9 |         "cppzmq",
10 |         "flatbuffers",
11 |         "jsoncpp",
12 |         "tclap",
13 |         "pybind11",
14 |         "python3"
15 |     ],
16 |     "overrides": [
17 |         {
18 |             "name": "seal",
19 |             "version": "4.1.1"
20 |         },
21 |         {
22 |             "name": "kuku",
23 |             "version": "2.1"
24 |         },
25 |         {
26 |             "name": "python3",
27 |             "version": "3.10.0"
28 |         }
29 |     ],
30 |     "builtin-baseline": "a325228200d7f229f3337e612e0077f2a5307090"
31 | }


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/psi/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: mnist_vertical_psi_in_silo
 4 | version: 0.0.3
 5 | display_name: CC Fraud PSI (in silo)
 6 | type: command
 7 | description: Component for private set intersection over data in silo's blob storage
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   train_data:
12 |     type: uri_file
13 |     description: the training data in a given silo
14 |   test_data:
15 |     type: uri_file
16 |     description: the testing data in a given silo
17 |   global_size:
18 |     type: number
19 |     optional: false
20 |   global_rank:
21 |     type: number
22 |     optional: false
23 |   communication_backend:
24 |     type: string
25 |     enum:
26 |       - socket
27 |       - redis
28 |     default: socket
29 |     optional: false
30 |   communication_encrypted:
31 |     type: boolean
32 |     description: Encrypt messages exchanged between the nodes
33 |     optional: false
34 |   metrics_prefix:
35 |     type: string
36 |     description: Metrics prefix
37 |     default: Default-prefix
38 |     optional: true
39 | 
40 | 
41 | outputs:
42 |   matched_train_data:
43 |     type: uri_folder
44 |     description: the output training data after preprocessing
45 |   matched_test_data:
46 |     type: uri_folder
47 |     description: the output testing data after preprocessing
48 | 
49 | code: .
50 | 
51 | command: >-
52 |   cp /SymmetricPSI/build/SymmetricPSI.so . && python run.py 
53 |   --raw_training_data ${{inputs.train_data}}
54 |   --raw_testing_data ${{inputs.test_data}}
55 |   --train_output ${{outputs.matched_train_data}}
56 |   --test_output ${{outputs.matched_test_data}}
57 |   --global_size ${{inputs.global_size}}
58 |   --global_rank ${{inputs.global_rank}}
59 |   --communication_backend ${{inputs.communication_backend}}
60 |   --communication_encrypted ${{inputs.communication_encrypted}}
61 |   $[[--metrics_prefix=${{inputs.metrics_prefix}}]]
62 | 
63 | environment: 
64 |   build:
65 |     path: ./context
66 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/traininsilo/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ccfraud_vertical_train_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pip=22.3.1
 9 |   - pytorch=1.13.1
10 |   - pytorch-cuda=11.6
11 |   - pip:
12 |     - azureml-core==1.47.0
13 |     - azure-keyvault==4.2.0
14 |     - azureml-mlflow==1.48.0
15 |     - pandas==1.5.2
16 |     - torchmetrics==0.10.3
17 |     - redis==4.5.1
18 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/traininsilo/contributor_spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_ccfraud_vertical_train_in_silo_contributor
 4 | version: 0.0.1
 5 | display_name: CC Fraud Train (in silo)
 6 | type: command
 7 | description: Component to train a model to classify CC Fraud.
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   train_data:
12 |     type: uri_folder
13 |     description: the input training data (preprocessed)
14 |   test_data:
15 |     type: uri_folder
16 |     description: the input testing data (preprocessed)
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 |   checkpoint:
23 |     type: uri_folder
24 |     description: a given pre-existing checkpoint
25 |     optional: true
26 |   lr:
27 |     type: number
28 |     description: learning rate
29 |     default: 1e-3
30 |     optional: true
31 |   epochs:
32 |     type: integer
33 |     description: total number of epochs for local training
34 |     default: 10
35 |     optional: true
36 |   batch_size:
37 |     type: integer
38 |     description: batch size
39 |     default: 100
40 |     optional: true
41 |   runtime_args:
42 |     type: string
43 |     description: stringified json config for a silo
44 |     optional: true
45 |   global_size:
46 |     type: number
47 |     optional: false
48 |   global_rank:
49 |     type: number
50 |     optional: false
51 |   communication_backend:
52 |     type: string
53 |     enum:
54 |       - socket
55 |       - redis
56 |     default: socket
57 |     optional: false
58 |   communication_encrypted:
59 |     type: boolean
60 |     description: Encrypt messages exchanged between the nodes
61 |     default: false
62 |     optional: false
63 | 
64 | outputs:
65 |   model:
66 |     type: uri_folder
67 |     description: the output checkpoint
68 | 
69 | code: .
70 | 
71 | command: >-
72 |   python contributor.py
73 |   --train_data ${{inputs.train_data}}
74 |   --test_data ${{inputs.test_data}}
75 |   --model_path ${{outputs.model}}
76 |   --global_size ${{inputs.global_size}}
77 |   --global_rank ${{inputs.global_rank}}
78 |   $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
79 |   $[[--checkpoint ${{inputs.checkpoint}}]]
80 |   $[[--lr ${{inputs.lr}}]]
81 |   $[[--epochs ${{inputs.epochs}}]]
82 |   $[[--batch_size ${{inputs.batch_size}}]]
83 |   --communication_backend ${{inputs.communication_backend}}
84 |   --communication_encrypted ${{inputs.communication_encrypted}}
85 | 
86 | environment: 
87 |   conda_file: ./conda.yaml
88 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
89 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/traininsilo/datasets.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | 
 4 | 
 5 | class FraudDataset(Dataset):
 6 |     """FraudDataset Dataset - combination of features and labels
 7 | 
 8 |     Args:
 9 |         df: Pandas dataframe containing features and/or labels
10 | 
11 |     Returns:
12 |         None
13 |     """
14 | 
15 |     def __init__(self, df):
16 |         if "is_fraud" in df.columns:
17 |             if len(df.columns) > 1:
18 |                 self.X = df.loc[:, df.columns != "is_fraud"].values
19 |             else:
20 |                 self.X = None
21 |             self.Y = df.loc[:, "is_fraud"].values
22 |         else:
23 |             self.X = df.values
24 |             self.Y = None
25 | 
26 |         if self.X is not None:
27 |             self.X = torch.tensor(self.X, dtype=torch.float)
28 |         if self.Y is not None:
29 |             self.Y = torch.tensor(self.Y, dtype=torch.int)
30 | 
31 |     def __len__(self):
32 |         if self.Y is None:
33 |             return len(self.X)
34 |         else:
35 |             return len(self.Y)
36 | 
37 |     def features_count(self):
38 |         if self.X is not None:
39 |             return self.X.shape[1]
40 |         return None
41 | 
42 |     def __getitem__(self, idx):
43 |         if self.Y is None:
44 |             return self.X[idx]
45 |         elif self.X is None:
46 |             return self.Y[idx]
47 |         else:
48 |             return self.X[idx], self.Y[idx]
49 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/traininsilo/host_spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_ccfraud_vertical_train_in_silo_host
 4 | version: 0.0.1
 5 | display_name: CC Fraud Train (in silo)
 6 | type: command
 7 | description: Component to train a model to classify CC Fraud.
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   train_data:
12 |     type: uri_folder
13 |     description: the input training data (preprocessed)
14 |   test_data:
15 |     type: uri_folder
16 |     description: the input testing data (preprocessed)
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 |   checkpoint:
23 |     type: uri_folder
24 |     description: a given pre-existing checkpoint
25 |     optional: true
26 |   lr:
27 |     type: number
28 |     description: learning rate
29 |     default: 1e-3
30 |     optional: true
31 |   epochs:
32 |     type: integer
33 |     description: total number of epochs for local training
34 |     default: 10
35 |     optional: true
36 |   batch_size:
37 |     type: integer
38 |     description: batch size
39 |     default: 100
40 |     optional: true
41 |   global_size:
42 |     type: number
43 |     optional: false
44 |   global_rank:
45 |     type: number
46 |     optional: false
47 |   communication_backend:
48 |     type: string
49 |     enum:
50 |       - socket
51 |       - redis
52 |     default: socket
53 |     optional: false
54 |   communication_encrypted:
55 |     type: boolean
56 |     description: Encrypt messages exchanged between the nodes
57 |     optional: false
58 | 
59 | 
60 | outputs:
61 |   model:
62 |     type: uri_folder
63 |     description: the output checkpoint
64 | 
65 | code: .
66 | 
67 | command: >-
68 |   python host.py
69 |   --train_data ${{inputs.train_data}}
70 |   --test_data ${{inputs.test_data}}
71 |   --model_path ${{outputs.model}}
72 |   --global_size ${{inputs.global_size}}
73 |   --global_rank ${{inputs.global_rank}}
74 |   $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
75 |   $[[--checkpoint ${{inputs.checkpoint}}]]
76 |   $[[--lr ${{inputs.lr}}]]
77 |   $[[--epochs ${{inputs.epochs}}]]
78 |   $[[--batch_size ${{inputs.batch_size}}]]
79 |   --communication_backend ${{inputs.communication_backend}}
80 |   --communication_encrypted ${{inputs.communication_encrypted}}
81 |   
82 | environment: 
83 |   conda_file: ./conda.yaml
84 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
85 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/traininsilo/models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class SimpleLinearBottom(nn.Module):
 7 |     """Bottom (Contributor) part of the model composed of only Linear model interleaved with ReLU activations
 8 | 
 9 |     Args:
10 |         input_dim (int):
11 |         number of features to be consumed by the model
12 |     """
13 | 
14 |     def __init__(self, input_dim, latent_dim=4, hidden_dim=128, layers=4) -> None:
15 |         super().__init__()
16 | 
17 |         self.input_dim = input_dim
18 |         self.latent_dim = latent_dim
19 |         self.layers = nn.ModuleList(
20 |             [
21 |                 nn.Linear(input_dim, hidden_dim)
22 |                 if i == 0
23 |                 else (
24 |                     nn.Linear(hidden_dim, latent_dim)
25 |                     if i == layers - 1
26 |                     else nn.Linear(hidden_dim, hidden_dim)
27 |                 )
28 |                 for i in range(layers)
29 |             ]
30 |         )
31 |         self._init_weights()
32 | 
33 |     def _init_weights(self):
34 |         for m in self.modules():
35 |             if isinstance(m, nn.Embedding):
36 |                 torch.nn.init.uniform_(m.weight, -0.001, 0.001)
37 |             elif isinstance(m, nn.Linear):
38 |                 torch.nn.init.xavier_uniform_(m.weight)
39 |                 m.bias.data.fill_(0.01)
40 | 
41 |     def forward(self, x):
42 |         for i, layer in enumerate(self.layers):
43 |             if i == len(self.layers) - 1:
44 |                 x = layer(x)
45 |             else:
46 |                 x = F.relu(layer(x))
47 |         return x
48 | 
49 | 
50 | class SimpleLinearTop(nn.Module):
51 |     """Top (Host) part of the model composed of only Linear model interleaved with ReLU activations"""
52 | 
53 |     def __init__(self, latent_dim) -> None:
54 |         super().__init__()
55 | 
56 |         self.model = nn.Sequential(
57 |             nn.Linear(latent_dim, 1),
58 |             nn.Sigmoid(),
59 |         )
60 |         self._init_weights()
61 | 
62 |     def _init_weights(self):
63 |         for m in self.modules():
64 |             if isinstance(m, nn.Embedding):
65 |                 torch.nn.init.uniform_(m.weight, -0.001, 0.001)
66 |             elif isinstance(m, nn.Linear):
67 |                 torch.nn.init.xavier_uniform_(m.weight)
68 |                 m.bias.data.fill_(0.01)
69 | 
70 |     def forward(self, x):
71 |         return self.model(x).squeeze()
72 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/traininsilo/samplers.py:
--------------------------------------------------------------------------------
 1 | ##########################################################################################
 2 | #                                         WARNING                                        #
 3 | ##########################################################################################
 4 | # Should this file change please update all copies of samplers.py file in the repository #
 5 | ##########################################################################################
 6 | 
 7 | import math
 8 | import torch
 9 | from torch.utils.data import Sampler
10 | 
11 | 
12 | class VerticallyDistributedBatchSampler(Sampler):
13 |     """Batch sampler that uses a distributed communication backend to distribute samples indexes to each worker."""
14 | 
15 |     def __init__(self, data_source, batch_size, comm, rank, world_size, shuffle=False):
16 |         """Initializes the batch sampler.
17 | 
18 |         Args:
19 |             data_source (torch.utils.data.Dataset): The dataset to sample from.
20 |             batch_size (int): The size of the batch to sample.
21 |             comm (AMLComm): The communicator to use for communication.
22 |             rank (int): The rank of the current worker.
23 |             world_size (int): The total number of workers.
24 |             shuffle (bool, optional): Whether to shuffle the indices. Defaults to False.
25 |         """
26 |         self.data_source = data_source
27 |         self.batch_size = batch_size
28 |         self.shuffle = shuffle
29 |         self.rank = rank
30 |         self.world_size = world_size
31 |         self.comm = comm
32 | 
33 |     def __iter__(self):
34 |         if self.rank == 0:
35 |             if self.shuffle:
36 |                 indices = torch.randperm(len(self.data_source))
37 |             else:
38 |                 indices = torch.arange(len(self.data_source))
39 | 
40 |             # Split the indices into batches
41 |             batches = [
42 |                 indices[i : i + self.batch_size]
43 |                 for i in range(0, len(indices), self.batch_size)
44 |             ]
45 | 
46 |             for batch in batches:
47 |                 for i in range(1, self.world_size):
48 |                     # Send the batch to contributor i
49 |                     self.comm.send(batch, i)
50 | 
51 |                 yield batch
52 |         else:
53 |             for i in range(0, len(self.data_source), self.batch_size):
54 |                 # Receive the batch from host
55 |                 batch = self.comm.recv(0)
56 |                 yield batch
57 | 
58 |     def __len__(self):
59 |         return math.ceil(len(self.data_source) / self.batch_size)
60 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/upload_data/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ccfraud_vertical_upload_data_conda_env
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=22.2.2
 7 |   - pip:
 8 |     - azure-identity==1.12.0
 9 |     - azure-keyvault==4.2.0
10 |     - azureml-core==1.47.0
11 |     - kaggle==1.5.12
12 |     - scikit-learn==1.1.3
13 |     - numpy==1.23.5
14 |     - pandas==1.3.5
15 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/upload_data/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_ccfraud_vertical_upload_data
 3 | version: 0.0.1
 4 | display_name: Download CC Fraud data from Kaggle and upload to silo storage
 5 | type: command
 6 | is_deterministic: true
 7 | 
 8 | inputs:
 9 |   silo_count:
10 |     type: number
11 |     optional: false
12 |   silo_index:
13 |     type: number
14 |     optional: false
15 | 
16 | outputs:
17 |   raw_train_data:
18 |     type: uri_folder
19 |     description: the output CC Fraud raw training data for a given silo
20 |   raw_test_data:
21 |     type: uri_folder
22 |     description: the output CC Fraud raw testing data for a given silo
23 | 
24 | code: .
25 | 
26 | command: >-
27 |   python run.py --silo_count ${{inputs.silo_count}} --silo_index ${{inputs.silo_index}} --raw_train_data ${{outputs.raw_train_data}} --raw_test_data ${{outputs.raw_test_data}}
28 | 
29 | environment: 
30 |   conda_file: ./conda.yaml
31 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
32 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL/upload_data/us_regions.csv:
--------------------------------------------------------------------------------
 1 | State,StateCode,Region,Division
 2 | Alaska,AK,West,Pacific
 3 | Alabama,AL,South,East South Central
 4 | Arkansas,AR,South,West South Central
 5 | Arizona,AZ,West,Mountain
 6 | California,CA,West,Pacific
 7 | Colorado,CO,West,Mountain
 8 | Connecticut,CT,Northeast,New England
 9 | District of Columbia,DC,South,South Atlantic
10 | Delaware,DE,South,South Atlantic
11 | Florida,FL,South,South Atlantic
12 | Georgia,GA,South,South Atlantic
13 | Hawaii,HI,West,Pacific
14 | Iowa,IA,Midwest,West North Central
15 | Idaho,ID,West,Mountain
16 | Illinois,IL,Midwest,East North Central
17 | Indiana,IN,Midwest,East North Central
18 | Kansas,KS,Midwest,West North Central
19 | Kentucky,KY,South,East South Central
20 | Louisiana,LA,South,West South Central
21 | Massachusetts,MA,Northeast,New England
22 | Maryland,MD,South,South Atlantic
23 | Maine,ME,Northeast,New England
24 | Michigan,MI,Midwest,East North Central
25 | Minnesota,MN,Midwest,West North Central
26 | Missouri,MO,Midwest,West North Central
27 | Mississippi,MS,South,East South Central
28 | Montana,MT,West,Mountain
29 | North Carolina,NC,South,South Atlantic
30 | North Dakota,ND,Midwest,West North Central
31 | Nebraska,NE,Midwest,West North Central
32 | New Hampshire,NH,Northeast,New England
33 | New Jersey,NJ,Northeast,Middle Atlantic
34 | New Mexico,NM,West,Mountain
35 | Nevada,NV,West,Mountain
36 | New York,NY,Northeast,Middle Atlantic
37 | Ohio,OH,Midwest,East North Central
38 | Oklahoma,OK,South,West South Central
39 | Oregon,OR,West,Pacific
40 | Pennsylvania,PA,Northeast,Middle Atlantic
41 | Rhode Island,RI,Northeast,New England
42 | South Carolina,SC,South,South Atlantic
43 | South Dakota,SD,Midwest,West North Central
44 | Tennessee,TN,South,East South Central
45 | Texas,TX,South,West South Central
46 | Utah,UT,West,Mountain
47 | Virginia,VA,South,South Atlantic
48 | Vermont,VT,Northeast,New England
49 | Washington,WA,West,Pacific
50 | Wisconsin,WI,Midwest,East North Central
51 | West Virginia,WV,South,South Atlantic
52 | Wyoming,WY,West,Mountain
53 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/preprocessing/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_ccfraud_vertical_fedonce_preprocessing_in_silo
 4 | version: 0.0.1
 5 | display_name: CC Fraud Pre-Processing (in silo)
 6 | type: command
 7 | description: Component for preprocessing raw data from silo's blob storage
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   raw_training_data:
12 |     type: uri_file
13 |     description: the raw training data in a given silo
14 |   raw_testing_data:
15 |     type: uri_file
16 |     description: the raw testing data in a given silo
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 | 
23 | 
24 | outputs:
25 |   processed_train_data:
26 |     type: uri_folder
27 |     description: the output training data after preprocessing
28 |   processed_test_data:
29 |     type: uri_folder
30 |     description: the output testing data after preprocessing
31 | 
32 | code: .
33 | 
34 | command: >-
35 |   python run.py --raw_training_data ${{inputs.raw_training_data}} --raw_testing_data ${{inputs.raw_testing_data}} --train_output ${{outputs.processed_train_data}} --test_output ${{outputs.processed_test_data}} $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
36 | 
37 | # NOTE: using one of Azure ML's curated environments
38 | # which has all the dependencies needed for this job
39 | environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest
40 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/pretraining/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ccfraud_vertical_pretrain_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pip=22.3.1
 9 |   - pytorch=1.13.1
10 |   - pytorch-cuda=11.6
11 |   - pip:
12 |     - azureml-mlflow==1.48.0
13 |     - pandas==1.5.2
14 |     - tqdm==4.64.1
15 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/pretraining/datasets.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Dataset
 3 | 
 4 | 
 5 | class FraudDataset(Dataset):
 6 |     """FraudDataset Dataset - combination of features and labels
 7 | 
 8 |     Args:
 9 |         df: Pandas dataframe containing features and/or labels
10 | 
11 |     Returns:
12 |         None
13 |     """
14 | 
15 |     def __init__(self, df):
16 |         if "is_fraud" in df.columns:
17 |             if len(df.columns) > 1:
18 |                 self.X = torch.tensor(
19 |                     df.loc[:, df.columns != "is_fraud"].values, dtype=torch.float
20 |                 )
21 |             else:
22 |                 self.X = None
23 |             self.Y = torch.tensor(df.loc[:, "is_fraud"].values, dtype=torch.int)
24 |         else:
25 |             self.X = torch.tensor(df.values, dtype=torch.float)
26 |             self.Y = None
27 | 
28 |     def __len__(self):
29 |         if self.Y is None:
30 |             return len(self.X)
31 |         else:
32 |             return len(self.Y)
33 | 
34 |     def features_count(self):
35 |         if self.X is not None:
36 |             return self.X.shape[1]
37 |         return None
38 | 
39 |     def __getitem__(self, idx):
40 |         if self.Y is None:
41 |             return self.X[idx]
42 |         elif self.X is None:
43 |             return self.Y[idx]
44 |         else:
45 |             return self.X[idx], self.Y[idx]
46 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/pretraining/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_ccfraud_vertical_fedonce_pretrain_contributor
 4 | version: 0.0.1
 5 | display_name: CC Fraud Pre-Train (in silo)
 6 | type: command
 7 | description: Component to train a model to generate embeddings representative of contributor samples.
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   train_data:
12 |     type: uri_folder
13 |     description: the input training data (preprocessed)
14 |   test_data:
15 |     type: uri_folder
16 |     description: the input testing data (preprocessed)
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 |   checkpoint:
23 |     type: uri_folder
24 |     description: a given pre-existing checkpoint
25 |     optional: true
26 |   lr:
27 |     type: number
28 |     description: learning rate
29 |     default: 1e-3
30 |     optional: true
31 |   epochs:
32 |     type: integer
33 |     description: total number of epochs for local training
34 |     default: 10
35 |     optional: true
36 |   batch_size:
37 |     type: integer
38 |     description: batch size
39 |     default: 100
40 |     optional: true
41 | 
42 | outputs:
43 |   model:
44 |     type: uri_folder
45 |     description: the output checkpoint
46 |   embeddings:
47 |     type: uri_folder
48 |     description: the output embeddings
49 | 
50 | code: .
51 | 
52 | command: >-
53 |   python run.py
54 |   --train_data ${{inputs.train_data}}
55 |   --test_data ${{inputs.test_data}}
56 |   --model_path ${{outputs.model}}
57 |   --embeddings_path ${{outputs.embeddings}}
58 |   $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
59 |   $[[--checkpoint ${{inputs.checkpoint}}]]
60 |   $[[--lr ${{inputs.lr}}]]
61 |   $[[--epochs ${{inputs.epochs}}]]
62 |   $[[--batch_size ${{inputs.batch_size}}]]
63 | environment: 
64 |   conda_file: ./conda.yaml
65 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
66 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/traininsilo/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ccfraud_vertical_train_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pip=22.3.1
 9 |   - pytorch=1.13.1
10 |   - pytorch-cuda=11.6
11 |   - pip:
12 |     - azureml-core==1.47.0
13 |     - azure-keyvault==4.2.0
14 |     - azureml-mlflow==1.48.0
15 |     - pandas==1.5.2
16 |     - torchmetrics==0.10.3
17 |     - redis==4.5.1
18 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/traininsilo/datasets.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | 
 5 | 
 6 | class FraudDataset(Dataset):
 7 |     """FraudDataset Dataset - combination of features and labels
 8 | 
 9 |     Args:
10 |         df: Pandas dataframe containing features and labels
11 |         kwargs:
12 |             embeddings: list of embeddings to be concatenated to features
13 | 
14 |     Returns:
15 |         None
16 |     """
17 | 
18 |     def __init__(self, df, **kwargs):
19 |         if "is_fraud" in df.columns:
20 |             if len(df.columns) > 1:
21 |                 self.X = df.loc[:, df.columns != "is_fraud"].values
22 |             else:
23 |                 self.X = None
24 |             self.Y = df.loc[:, "is_fraud"].values
25 |         else:
26 |             self.X = df.values
27 |             self.Y = None
28 | 
29 |         if "embeddings" in kwargs and len(kwargs["embeddings"]) > 0:
30 |             self.X = np.load(kwargs["embeddings"][0])
31 |             for embedding in kwargs["embeddings"][1:]:
32 |                 np_embeddings = np.load(embedding)
33 |                 self.X = np.concatenate([self.X, np_embeddings], axis=1)
34 | 
35 |         if self.X is not None:
36 |             self.X = torch.tensor(self.X, dtype=torch.float)
37 |         if self.Y is not None:
38 |             self.Y = torch.tensor(self.Y, dtype=torch.int)
39 | 
40 |     def __len__(self):
41 |         if self.Y is None:
42 |             return len(self.X)
43 |         else:
44 |             return len(self.Y)
45 | 
46 |     def features_count(self):
47 |         if self.X is not None:
48 |             return self.X.shape[1]
49 |         return None
50 | 
51 |     def __getitem__(self, idx):
52 |         if self.Y is None:
53 |             return self.X[idx]
54 |         elif self.X is None:
55 |             return self.Y[idx]
56 |         else:
57 |             return self.X[idx], self.Y[idx]
58 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/traininsilo/models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class SimpleVAETop(nn.Module):
 6 |     """Top (Host) part of the LSTM based VAE with head composed of Linear layers interleaved by ReLU activations"""
 7 | 
 8 |     def __init__(self, latent_dim, hidden_dim=32) -> None:
 9 |         super().__init__()
10 |         self._latent_dim = latent_dim
11 |         self._hidden_dim = hidden_dim
12 | 
13 |         self.seq = torch.nn.Sequential(
14 |             nn.Linear(in_features=self._latent_dim, out_features=self._hidden_dim),
15 |             nn.ReLU(),
16 |             nn.Linear(in_features=self._hidden_dim, out_features=1),
17 |             nn.Sigmoid(),
18 |         )
19 |         self._init_weights()
20 | 
21 |     def _init_weights(self):
22 |         for m in self.modules():
23 |             if isinstance(m, nn.Linear):
24 |                 torch.nn.init.xavier_uniform_(m.weight)
25 |                 m.bias.data.fill_(0.01)
26 | 
27 |     def forward(self, x):
28 |         x = self.seq(x).squeeze()
29 |         return x
30 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/traininsilo/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_ccfraud_vertical_fedonce_train_in_silo_host
 4 | version: 0.0.1
 5 | display_name: CC Fraud Train (in silo)
 6 | type: command
 7 | description: Component to train a model to classify CC Fraud.
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   train_data:
12 |     type: uri_folder
13 |     description: the input training data (preprocessed)
14 |   test_data:
15 |     type: uri_folder
16 |     description: the input testing data (preprocessed)
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 |   checkpoint:
23 |     type: uri_folder
24 |     description: a given pre-existing checkpoint
25 |     optional: true
26 |   contributor_1_embeddings:
27 |     type: uri_folder
28 |     description: path to embeddings extracted by contributor
29 |     optional: true
30 |   contributor_2_embeddings:
31 |     type: uri_folder
32 |     description: path to embeddings extracted by contributor
33 |     optional: true
34 |   contributor_3_embeddings:
35 |     type: uri_folder
36 |     description: path to embeddings extracted by contributor
37 |     optional: true
38 |   lr:
39 |     type: number
40 |     description: learning rate
41 |     default: 1e-3
42 |     optional: true
43 |   epochs:
44 |     type: integer
45 |     description: total number of epochs for local training
46 |     default: 10
47 |     optional: true
48 |   batch_size:
49 |     type: integer
50 |     description: batch size
51 |     default: 100
52 |     optional: true
53 | 
54 | outputs:
55 |   model:
56 |     type: uri_folder
57 |     description: the output checkpoint
58 | 
59 | code: .
60 | 
61 | command: >-
62 |   python run.py
63 |   --train_data ${{inputs.train_data}}
64 |   --test_data ${{inputs.test_data}}
65 |   --model_path ${{outputs.model}}
66 |   $[[--contributor_1_embeddings ${{inputs.contributor_1_embeddings}}]]
67 |   $[[--contributor_2_embeddings ${{inputs.contributor_2_embeddings}}]]
68 |   $[[--contributor_3_embeddings ${{inputs.contributor_3_embeddings}}]]
69 |   $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
70 |   $[[--checkpoint ${{inputs.checkpoint}}]]
71 |   $[[--lr ${{inputs.lr}}]]
72 |   $[[--epochs ${{inputs.epochs}}]]
73 |   $[[--batch_size ${{inputs.batch_size}}]]
74 |   
75 | environment: 
76 |   conda_file: ./conda.yaml
77 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
78 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/upload_data/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ccfraud_vertical_upload_data_conda_env
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=22.2.2
 7 |   - pip:
 8 |     - azure-identity==1.12.0
 9 |     - azure-keyvault==4.2.0
10 |     - azureml-core==1.47.0
11 |     - kaggle==1.5.12
12 |     - scikit-learn==1.1.3
13 |     - numpy==1.23.5
14 |     - pandas==1.3.5
15 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/upload_data/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_ccfraud_vertical_fedonce_upload_data
 3 | version: 0.0.1
 4 | display_name: Download CC Fraud data from Kaggle and upload to silo storage
 5 | type: command
 6 | is_deterministic: true
 7 | 
 8 | inputs:
 9 |   silo_count:
10 |     type: number
11 |     optional: false
12 |   silo_index:
13 |     type: number
14 |     optional: false
15 | 
16 | outputs:
17 |   raw_train_data:
18 |     type: uri_folder
19 |     description: the output CC Fraud raw training data for a given silo
20 |   raw_test_data:
21 |     type: uri_folder
22 |     description: the output CC Fraud raw testing data for a given silo
23 | 
24 | code: .
25 | 
26 | command: >-
27 |   python run.py --silo_count ${{inputs.silo_count}} --silo_index ${{inputs.silo_index}} --raw_train_data ${{outputs.raw_train_data}} --raw_test_data ${{outputs.raw_test_data}}
28 | 
29 | environment: 
30 |   conda_file: ./conda.yaml
31 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
32 | 


--------------------------------------------------------------------------------
/examples/components/CCFRAUD_VERTICAL_FEDONCE/upload_data/us_regions.csv:
--------------------------------------------------------------------------------
 1 | State,StateCode,Region,Division
 2 | Alaska,AK,West,Pacific
 3 | Alabama,AL,South,East South Central
 4 | Arkansas,AR,South,West South Central
 5 | Arizona,AZ,West,Mountain
 6 | California,CA,West,Pacific
 7 | Colorado,CO,West,Mountain
 8 | Connecticut,CT,Northeast,New England
 9 | District of Columbia,DC,South,South Atlantic
10 | Delaware,DE,South,South Atlantic
11 | Florida,FL,South,South Atlantic
12 | Georgia,GA,South,South Atlantic
13 | Hawaii,HI,West,Pacific
14 | Iowa,IA,Midwest,West North Central
15 | Idaho,ID,West,Mountain
16 | Illinois,IL,Midwest,East North Central
17 | Indiana,IN,Midwest,East North Central
18 | Kansas,KS,Midwest,West North Central
19 | Kentucky,KY,South,East South Central
20 | Louisiana,LA,South,West South Central
21 | Massachusetts,MA,Northeast,New England
22 | Maryland,MD,South,South Atlantic
23 | Maine,ME,Northeast,New England
24 | Michigan,MI,Midwest,East North Central
25 | Minnesota,MN,Midwest,West North Central
26 | Missouri,MO,Midwest,West North Central
27 | Mississippi,MS,South,East South Central
28 | Montana,MT,West,Mountain
29 | North Carolina,NC,South,South Atlantic
30 | North Dakota,ND,Midwest,West North Central
31 | Nebraska,NE,Midwest,West North Central
32 | New Hampshire,NH,Northeast,New England
33 | New Jersey,NJ,Northeast,Middle Atlantic
34 | New Mexico,NM,West,Mountain
35 | Nevada,NV,West,Mountain
36 | New York,NY,Northeast,Middle Atlantic
37 | Ohio,OH,Midwest,East North Central
38 | Oklahoma,OK,South,West South Central
39 | Oregon,OR,West,Pacific
40 | Pennsylvania,PA,Northeast,Middle Atlantic
41 | Rhode Island,RI,Northeast,New England
42 | South Carolina,SC,South,South Atlantic
43 | South Dakota,SD,Midwest,West North Central
44 | Tennessee,TN,South,East South Central
45 | Texas,TX,South,West South Central
46 | Utah,UT,West,Mountain
47 | Virginia,VA,South,South Atlantic
48 | Vermont,VT,Northeast,New England
49 | Washington,WA,West,Pacific
50 | Wisconsin,WI,Midwest,East North Central
51 | West Virginia,WV,South,South Atlantic
52 | Wyoming,WY,West,Mountain
53 | 


--------------------------------------------------------------------------------
/examples/components/FLWR/client/pneumonia_network.py:
--------------------------------------------------------------------------------
 1 | # This file defining the model was taken as-is from https://github.com/Azure/medical-imaging/blob/main/federated-learning/pneumonia-federated/custom/pneumonia_network.py.
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class PneumoniaNetwork(nn.Module):
 8 |     def __init__(self):
 9 |         super(PneumoniaNetwork, self).__init__()
10 |         dropout = 0.2
11 | 
12 |         self.conv1 = nn.Conv2d(
13 |             in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1
14 |         )
15 |         self.conv2 = nn.Conv2d(
16 |             in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1
17 |         )
18 |         self.conv3 = nn.Conv2d(
19 |             in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1
20 |         )
21 | 
22 |         self.dropout1 = nn.Dropout(dropout)
23 |         self.dropout2 = nn.Dropout(dropout)
24 | 
25 |         self.fc1 = nn.Linear(28 * 28 * 128, 256)
26 |         self.fc2 = nn.Linear(256, 2)
27 | 
28 |     def forward(self, x):
29 |         x = F.relu(self.conv1(x))  # 224 x 224 x 32
30 |         x = F.max_pool2d(x, 2, 2)  # 112 x 112 x 32
31 |         x = F.relu(self.conv2(x))  # 112 x 112 x 64
32 |         x = F.max_pool2d(x, 2, 2)  # 56 x 56 x 64
33 |         x = self.dropout1(x)
34 |         x = F.relu(self.conv3(x))  # 56 x 56 x 128
35 |         x = F.max_pool2d(x, 2, 2)  # 28 x 28 x 128
36 |         x = self.dropout2(x)
37 |         x = x.view(-1, 28 * 28 * 128)  # 100.352
38 |         x = F.relu(self.fc1(x))
39 |         x = self.fc2(x)
40 |         return x
41 | 


--------------------------------------------------------------------------------
/examples/components/FLWR/client/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_flower_client
 3 | version: 0.0.5
 4 | display_name: Flower client
 5 | type: command
 6 | description: This component runs a Flower client inside an AzureML job.
 7 | is_deterministic: true
 8 | tags:
 9 |   flower: 1.2.0
10 |   url: https://github.com/Azure-Samples/azure-ml-federated-learning
11 | 
12 | inputs:
13 |   federation_identifier:
14 |     type: string
15 |   client_data:
16 |     type: uri_folder
17 |     optional: true
18 |     description: "an optional folder containing data for the client to use"
19 |   lr:
20 |     type: number
21 |     description: learning rate
22 |     default: 0.01
23 |     optional: true
24 |   epochs:
25 |     type: integer
26 |     description: total number of epochs for local training
27 |     default: 3
28 |     optional: true
29 |   checkpoint:
30 |     type: uri_folder
31 |     description: a given pre-existing model checkpoint
32 |     optional: true
33 |   metrics_prefix:
34 |     type: string
35 |     description: Metrics prefix
36 |     default: Default-prefix
37 |     optional: true
38 | 
39 | code: "."
40 | 
41 | command: >-
42 |   python run.py 
43 |   --federation_identifier ${{inputs.federation_identifier}} 
44 |   $[[--client_data ${{inputs.client_data}}]] 
45 |   $[[--checkpoint ${{inputs.checkpoint}}]] 
46 |   $[[--lr ${{inputs.lr}}]] 
47 |   $[[--epochs ${{inputs.epochs}}]] 
48 |   $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
49 | 
50 | environment: 
51 |   build:
52 |     path: ../flower_pytorch_env/context/
53 | 


--------------------------------------------------------------------------------
/examples/components/FLWR/flower_pytorch_env/context/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:22.09-py3
 2 | FROM ${PYTORCH_IMAGE}
 3 | 
 4 | RUN python3 -m pip install -U pip
 5 | RUN python3 -m pip install -U setuptools
 6 | 
 7 | # Install dependencies missing in this container
 8 | ADD requirements.txt /tmp/requirements.txt
 9 | RUN python3 -m pip install -r /tmp/requirements.txt
10 | 


--------------------------------------------------------------------------------
/examples/components/FLWR/flower_pytorch_env/context/requirements.txt:
--------------------------------------------------------------------------------
1 | azureml-mlflow==1.48.0
2 | flwr==1.2.0
3 | 


--------------------------------------------------------------------------------
/examples/components/FLWR/flower_pytorch_env/env.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
2 | name: flower-pt
3 | version: 1.2.0-pytorch
4 | build:
5 |   path: ./context/
6 | 


--------------------------------------------------------------------------------
/examples/components/FLWR/server/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_flower_server
 3 | version: 0.0.5
 4 | display_name: Flower server
 5 | type: command
 6 | description: This component runs a Flower server inside an AzureML job.
 7 | is_deterministic: true
 8 | tags:
 9 |   flower: 1.2.0
10 |   url: https://github.com/Azure-Samples/azure-ml-federated-learning
11 | 
12 | inputs:
13 |   federation_identifier:
14 |     type: string
15 |   expected_clients:
16 |     type: integer
17 |   wait_for_clients_timeout:
18 |     type: integer
19 |     default: 600
20 | 
21 | outputs:
22 |   job_artefacts:
23 |     type: uri_folder
24 | 
25 | code: "."
26 | 
27 | command: >-
28 |   python run.py 
29 |   --federation_identifier ${{inputs.federation_identifier}} 
30 |   --expected_clients ${{inputs.expected_clients}} 
31 |   --output_dir ${{outputs.job_artefacts}} 
32 |   --wait_for_clients_timeout ${{inputs.wait_for_clients_timeout}}
33 | 
34 | environment: 
35 |   build:
36 |     path: ../flower_pytorch_env/context/
37 | 


--------------------------------------------------------------------------------
/examples/components/HELLOWORLD/aggregatemodelweights/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import logging
 4 | import sys
 5 | import glob
 6 | 
 7 | 
 8 | def get_arg_parser(parser=None):
 9 |     """Parse the command line arguments for merge using argparse.
10 | 
11 |     Args:
12 |         parser (argparse.ArgumentParser or CompliantArgumentParser):
13 |         an argument parser instance
14 | 
15 |     Returns:
16 |         ArgumentParser: the argument parser instance
17 | 
18 |     Notes:
19 |         if parser is None, creates a new parser instance
20 |     """
21 |     # add arguments that are specific to the component
22 |     if parser is None:
23 |         parser = argparse.ArgumentParser(description=__doc__)
24 | 
25 |     parser.add_argument("--input_silo_1", type=str, required=True, help="")
26 |     parser.add_argument("--input_silo_2", type=str, required=False, help="")
27 |     parser.add_argument("--input_silo_3", type=str, required=False, help="")
28 |     parser.add_argument("--aggregated_output", type=str, required=True, help="")
29 |     return parser
30 | 
31 | 
32 | def test_input(path):
33 |     file_list = glob.glob(os.path.join(path, "*.*"), recursive=True)
34 |     print(f"Found {len(file_list)} files in {path}")
35 | 
36 |     print(f"Reading files from {path}")
37 |     for file in file_list:
38 |         print(f" -- Reading {file}")
39 |         with open(file, "r") as f:
40 |             f.read()
41 | 
42 | 
43 | def test_output(path):
44 |     print(f"Writing output to {path}/aggregate.txt")
45 |     with open(os.path.join(path, "aggregate.txt"), "w") as f:
46 |         f.write("Hello World!")
47 | 
48 | 
49 | def main(cli_args=None):
50 |     """Component main function.
51 | 
52 |     It parses arguments and executes run() with the right arguments.
53 | 
54 |     Args:
55 |         cli_args (List[str], optional): list of args to feed script, useful for debugging. Defaults to None.
56 |     """
57 |     # build an arg parser
58 |     parser = get_arg_parser()
59 | 
60 |     # run the parser on cli args
61 |     args = parser.parse_args(cli_args)
62 | 
63 |     print(f"Running script with arguments: {args}")
64 |     test_input(args.input_silo_1)
65 |     if args.input_silo_2:
66 |         test_input(args.input_silo_2)
67 |     if args.input_silo_3:
68 |         test_input(args.input_silo_3)
69 |     test_output(args.aggregated_output)
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     # Set logging to sys.out
74 |     logger = logging.getLogger(__name__)
75 |     logger.setLevel(logging.DEBUG)
76 |     log_format = logging.Formatter("[%(asctime)s] [%(levelname)s] - %(message)s")
77 |     handler = logging.StreamHandler(sys.stdout)
78 |     handler.setLevel(logging.DEBUG)
79 |     handler.setFormatter(log_format)
80 |     logger.addHandler(handler)
81 | 
82 |     main()
83 | 


--------------------------------------------------------------------------------
/examples/components/HELLOWORLD/aggregatemodelweights/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_helloworld_aggregate_model_weights
 4 | version: 0.3.0
 5 | display_name: Aggregate Model Weights (from all silos)
 6 | type: command
 7 | description: Component for aggreating model weights.
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   input_silo_1:
12 |     type: uri_folder
13 |     description: input from silo 1 (e.g., model weights, or gradient updates)
14 |     optional: false
15 |   input_silo_2:
16 |     type: uri_folder
17 |     description: input from silo 2 (e.g., model weights, or gradient updates)
18 |     optional: true
19 |   input_silo_3:
20 |     type: uri_folder
21 |     description: input from silo 3 (e.g., model weights, or gradient updates)
22 |     optional: true
23 | 
24 | outputs:
25 |   aggregated_output:
26 |     type: uri_folder
27 |     description: the aggregated model or gradiants, residing in the orchestrator compute.
28 | 
29 | code: .
30 | 
31 | command: >-
32 |   python run.py --aggregated_output ${{outputs.aggregated_output}}
33 |   --input_silo_1 ${{inputs.input_silo_1}}
34 |   $[[--input_silo_2 ${{inputs.input_silo_2}}]]
35 |   $[[--input_silo_3 ${{inputs.input_silo_3}}]]
36 | 
37 | environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:30
38 | 


--------------------------------------------------------------------------------
/examples/components/HELLOWORLD/preprocessing/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import logging
 4 | import sys
 5 | import glob
 6 | 
 7 | 
 8 | def get_arg_parser(parser=None):
 9 |     """Parse the command line arguments for merge using argparse.
10 | 
11 |     Args:
12 |         parser (argparse.ArgumentParser or CompliantArgumentParser):
13 |         an argument parser instance
14 | 
15 |     Returns:
16 |         ArgumentParser: the argument parser instance
17 | 
18 |     Notes:
19 |         if parser is None, creates a new parser instance
20 |     """
21 |     # add arguments that are specific to the component
22 |     if parser is None:
23 |         parser = argparse.ArgumentParser(description=__doc__)
24 | 
25 |     parser.add_argument("--raw_training_data", type=str, required=True, help="")
26 |     parser.add_argument("--raw_testing_data", type=str, required=True, help="")
27 |     parser.add_argument("--train_output", type=str, required=True, help="")
28 |     parser.add_argument("--test_output", type=str, required=True, help="")
29 |     parser.add_argument(
30 |         "--metrics_prefix", type=str, required=False, help="Metrics prefix"
31 |     )
32 |     return parser
33 | 
34 | 
35 | def test_input(path):
36 |     file_list = glob.glob(os.path.join(path, "*.*"), recursive=True)
37 |     print(f"Found {len(file_list)} files in {path}")
38 | 
39 |     print(f"Reading files from {path}")
40 |     for file in file_list:
41 |         print(f" -- Reading {file}")
42 |         with open(file, "r") as f:
43 |             f.read()
44 | 
45 | 
46 | def test_output(path):
47 |     with open(os.path.join(path, "output.txt"), "w") as f:
48 |         f.write("Hello World!")
49 | 
50 | 
51 | def main(cli_args=None):
52 |     """Component main function.
53 | 
54 |     It parses arguments and executes run() with the right arguments.
55 | 
56 |     Args:
57 |         cli_args (List[str], optional): list of args to feed script, useful for debugging. Defaults to None.
58 |     """
59 |     # build an arg parser
60 |     parser = get_arg_parser()
61 | 
62 |     # run the parser on cli args
63 |     args = parser.parse_args(cli_args)
64 | 
65 |     print(f"Running script with arguments: {args}")
66 |     test_input(args.raw_training_data)
67 |     test_input(args.raw_testing_data)
68 |     test_output(args.train_output)
69 |     test_output(args.test_output)
70 | 
71 | 
72 | if __name__ == "__main__":
73 |     # Set logging to sys.out
74 |     logger = logging.getLogger(__name__)
75 |     logger.setLevel(logging.DEBUG)
76 |     log_format = logging.Formatter("[%(asctime)s] [%(levelname)s] - %(message)s")
77 |     handler = logging.StreamHandler(sys.stdout)
78 |     handler.setLevel(logging.DEBUG)
79 |     handler.setFormatter(log_format)
80 |     logger.addHandler(handler)
81 | 
82 |     main()
83 | 


--------------------------------------------------------------------------------
/examples/components/HELLOWORLD/preprocessing/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_helloworld_preprocessing_in_silo
 4 | version: 0.3.0
 5 | display_name: Pre-Processing (in silo)
 6 | type: command
 7 | description: Component for preprocessing raw data in a given silo. The images are transformed using random affine keeping the center invariant, then normalized.
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   raw_training_data:
12 |     type: uri_file
13 |     description: the raw training data in a given silo
14 |   raw_testing_data:
15 |     type: uri_file
16 |     description: the raw testing data in a given silo
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 | 
23 | 
24 | outputs:
25 |   processed_train_data:
26 |     type: uri_folder
27 |     description: the output training data after preprocessing
28 |   processed_test_data:
29 |     type: uri_folder
30 |     description: the output testing data after preprocessing
31 | 
32 | code: .
33 | 
34 | command: >-
35 |   python run.py --raw_training_data ${{inputs.raw_training_data}} --raw_testing_data ${{inputs.raw_testing_data}} --train_output ${{outputs.processed_train_data}} --test_output ${{outputs.processed_test_data}} $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
36 | 
37 | environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:30
38 | 


--------------------------------------------------------------------------------
/examples/components/HELLOWORLD/traininsilo/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_helloworld_train_in_silo
 4 | version: 0.3.0
 5 | display_name: Train (in silo)
 6 | type: command
 7 | description: Component to train a model within a FL silo.
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   train_data:
12 |     type: uri_folder
13 |     description: the input training data
14 |   test_data:
15 |     type: uri_folder
16 |     description: the input testing data
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 |   iteration_num:
23 |     type: integer
24 |     description: Iteration number
25 |     default: 1
26 |     optional: true
27 |   checkpoint:
28 |     type: uri_folder
29 |     description: a given pre-existing checkpoint
30 |     optional: true
31 |   lr:
32 |     type: number
33 |     description: learning rate
34 |     default: 0.01
35 |     optional: true
36 |   epochs:
37 |     type: integer
38 |     description: total number of epochs for local training
39 |     default: 3
40 |     optional: true
41 |   batch_size:
42 |     type: integer
43 |     description: batch size
44 |     default: 64
45 |     optional: true
46 |   dp:
47 |     type: boolean
48 |     description: differential privacy
49 |     default: false
50 |     optional: true
51 |   dp_target_epsilon:
52 |     type: number
53 |     description: DP target epsilon
54 |     default: 50.0
55 |     optional: true
56 |   dp_target_delta:
57 |     type: number
58 |     description: DP target delta
59 |     default: 1e-5
60 |     optional: true
61 |   dp_max_grad_norm:
62 |     type: number
63 |     description: DP max gradient norm
64 |     default: 1.0
65 |     optional: true
66 |   total_num_of_iterations:
67 |     type: integer
68 |     description: Total num of iterations
69 |     default: 1
70 |     optional: true
71 | 
72 | outputs:
73 |   model:
74 |     type: uri_folder
75 |     description: the output checkpoint
76 | 
77 | code: .
78 | 
79 | command: >-
80 |   python run.py --train_data ${{inputs.train_data}} --test_data ${{inputs.test_data}} $[[--metrics_prefix ${{inputs.metrics_prefix}}]] $[[--iteration_num ${{inputs.iteration_num}}]] $[[--checkpoint ${{inputs.checkpoint}}]] --model ${{outputs.model}} $[[--lr ${{inputs.lr}}]] $[[--epochs ${{inputs.epochs}}]] $[[--batch_size ${{inputs.batch_size}}]] $[[--dp ${{inputs.dp}}]] $[[--total_num_of_iterations ${{inputs.total_num_of_iterations}}]] $[[--dp_target_epsilon ${{inputs.dp_target_epsilon}}]] $[[--dp_target_delta ${{inputs.dp_target_delta}}]] $[[--dp_max_grad_norm ${{inputs.dp_max_grad_norm}}]]
81 | 
82 | environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu:30
83 | 


--------------------------------------------------------------------------------
/examples/components/MNIST/preprocessing/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: mnist_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 | dependencies:
 6 |   - python=3.8
 7 |   - pip=22.3.1
 8 |   - pytorch=1.12.1
 9 |   - torchvision=0.13.1
10 |   - cudatoolkit=11.3
11 |   - pip:
12 |     - azureml-mlflow==1.48.0
13 |     - pandas==1.5.2
14 | 


--------------------------------------------------------------------------------
/examples/components/MNIST/preprocessing/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_mnist_preprocessing_in_silo
 4 | version: 0.3.0
 5 | display_name: MNIST Pre-Processing (in silo)
 6 | type: command
 7 | description: Component for preprocessing MNIST data in a given silo. The images are transformed using random affine keeping the center invariant, then normalized.
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   raw_training_data:
12 |     type: uri_file
13 |     description: the raw training data in a given silo
14 |   raw_testing_data:
15 |     type: uri_file
16 |     description: the raw testing data in a given silo
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 | 
23 | 
24 | outputs:
25 |   processed_train_data:
26 |     type: uri_folder
27 |     description: the output training data after preprocessing
28 |   processed_test_data:
29 |     type: uri_folder
30 |     description: the output testing data after preprocessing
31 | 
32 | code: .
33 | 
34 | command: >-
35 |   python run.py --raw_training_data ${{inputs.raw_training_data}} --raw_testing_data ${{inputs.raw_testing_data}} --train_output ${{outputs.processed_train_data}} --test_output ${{outputs.processed_test_data}} $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
36 | 
37 | environment: 
38 |   conda_file: ./conda.yaml
39 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
40 | 


--------------------------------------------------------------------------------
/examples/components/MNIST/traininsilo/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: mnist_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pip=22.3.1
 9 |   - pytorch=2.0.0
10 |   - pytorch-cuda=11.6
11 |   - pip:
12 |     - azureml-mlflow==1.48.0
13 |     - pandas==1.5.2
14 |     - opacus==1.3.0
15 |     - tqdm==4.64.1
16 |     - torchvision==0.15.1
17 | 


--------------------------------------------------------------------------------
/examples/components/MNIST_VERTICAL/traininsilo/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: mnist_vertical_train_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pip=22.3.1
 9 |   - pytorch=1.13.1
10 |   - torchvision=0.13.1
11 |   - pytorch-cuda=11.6
12 |   - pip:
13 |     - azureml-core==1.47.0
14 |     - azure-keyvault==4.2.0
15 |     - azureml-mlflow==1.48.0
16 |     - pandas==1.5.2
17 |     - redis==4.5.1
18 | 


--------------------------------------------------------------------------------
/examples/components/MNIST_VERTICAL/traininsilo/contributor_spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_mnist_vertical_train_in_silo_contributor
 4 | version: 0.0.1
 5 | display_name: MNIST Train (in silo)
 6 | type: command
 7 | description: Component to train a model on MNIST dataset.
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   train_data:
12 |     type: uri_folder
13 |     description: the input training data
14 |   test_data:
15 |     type: uri_folder
16 |     description: the input testing data
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 |   checkpoint:
23 |     type: uri_folder
24 |     description: a given pre-existing checkpoint
25 |     optional: true
26 |   lr:
27 |     type: number
28 |     description: learning rate
29 |     default: 0.01
30 |     optional: true
31 |   epochs:
32 |     type: integer
33 |     description: total number of epochs for local training
34 |     default: 3
35 |     optional: true
36 |   batch_size:
37 |     type: integer
38 |     description: batch size
39 |     default: 64
40 |     optional: true
41 |   global_size:
42 |     type: number
43 |     optional: false
44 |   global_rank:
45 |     type: number
46 |     optional: false
47 |   communication_backend:
48 |     type: string
49 |     enum:
50 |       - socket
51 |       - redis
52 |     default: socket
53 |     optional: true
54 |   communication_encrypted:
55 |     type: boolean
56 |     description: Encrypt messages exchanged between the nodes
57 |     optional: true
58 | 
59 | outputs:
60 |   model:
61 |     type: uri_folder
62 |     description: the output checkpoint
63 | 
64 | code: .
65 | 
66 | command: >-
67 |   python contributor.py 
68 |   --train_data ${{inputs.train_data}}
69 |   --test_data ${{inputs.test_data}}
70 |   $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
71 |   $[[--checkpoint ${{inputs.checkpoint}}]]
72 |   --model ${{outputs.model}}
73 |   $[[--lr ${{inputs.lr}}]]
74 |   $[[--epochs ${{inputs.epochs}}]]
75 |   $[[--batch_size ${{inputs.batch_size}}]]
76 |   --global_size ${{inputs.global_size}}
77 |   --global_rank ${{inputs.global_rank}}
78 |   $[[--communication_backend ${{inputs.communication_backend}}]]
79 |   $[[--communication_encrypted ${{inputs.communication_encrypted}}]]
80 | 
81 | environment: 
82 |   conda_file: ./conda.yaml
83 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
84 | 


--------------------------------------------------------------------------------
/examples/components/MNIST_VERTICAL/traininsilo/host_spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_mnist_vertical_train_in_silo_host
 4 | version: 0.0.1
 5 | display_name: MNIST Train (in silo)
 6 | type: command
 7 | description: Component to train a model on MNIST dataset.
 8 | is_deterministic: false
 9 | 
10 | inputs:
11 |   train_data:
12 |     type: uri_folder
13 |     description: the input training data
14 |   test_data:
15 |     type: uri_folder
16 |     description: the input testing data
17 |   metrics_prefix:
18 |     type: string
19 |     description: Metrics prefix
20 |     default: Default-prefix
21 |     optional: true
22 |   checkpoint:
23 |     type: uri_folder
24 |     description: a given pre-existing checkpoint
25 |     optional: true
26 |   lr:
27 |     type: number
28 |     description: learning rate
29 |     default: 0.01
30 |     optional: true
31 |   epochs:
32 |     type: integer
33 |     description: total number of epochs for local training
34 |     default: 3
35 |     optional: true
36 |   batch_size:
37 |     type: integer
38 |     description: batch size
39 |     default: 64
40 |     optional: true
41 |   global_size:
42 |     type: number
43 |     optional: false
44 |   global_rank:
45 |     type: number
46 |     optional: false
47 |   communication_backend:
48 |     type: string
49 |     enum:
50 |       - socket
51 |       - redis
52 |     default: socket
53 |     optional: true
54 |   communication_encrypted:
55 |     type: boolean
56 |     description: Encrypt messages exchanged between the nodes
57 |     optional: true
58 | 
59 | outputs:
60 |   model:
61 |     type: uri_folder
62 |     description: the output checkpoint
63 | 
64 | code: .
65 | 
66 | command: >-
67 |   python host.py 
68 |   --train_data ${{inputs.train_data}}
69 |   --test_data ${{inputs.test_data}}
70 |   $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
71 |   $[[--checkpoint ${{inputs.checkpoint}}]]
72 |   --model ${{outputs.model}}
73 |   $[[--lr ${{inputs.lr}}]]
74 |   $[[--epochs ${{inputs.epochs}}]]
75 |   $[[--batch_size ${{inputs.batch_size}}]]
76 |   --global_size ${{inputs.global_size}}
77 |   --global_rank ${{inputs.global_rank}}
78 |   $[[--communication_backend ${{inputs.communication_backend}}]]
79 |   $[[--communication_encrypted ${{inputs.communication_encrypted}}]]
80 | 
81 | environment: 
82 |   conda_file: ./conda.yaml
83 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
84 | 


--------------------------------------------------------------------------------
/examples/components/MNIST_VERTICAL/traininsilo/samplers.py:
--------------------------------------------------------------------------------
 1 | ##########################################################################################
 2 | #                                         WARNING                                        #
 3 | ##########################################################################################
 4 | # Should this file change please update all copies of samplers.py file in the repository #
 5 | ##########################################################################################
 6 | 
 7 | import math
 8 | import torch
 9 | from torch.utils.data import Sampler
10 | 
11 | 
12 | class VerticallyDistributedBatchSampler(Sampler):
13 |     """Batch sampler that uses a distributed communication backend to distribute samples indexes to each worker."""
14 | 
15 |     def __init__(self, data_source, batch_size, comm, rank, world_size, shuffle=False):
16 |         """Initializes the batch sampler.
17 | 
18 |         Args:
19 |             data_source (torch.utils.data.Dataset): The dataset to sample from.
20 |             batch_size (int): The size of the batch to sample.
21 |             comm (AMLComm): The communicator to use for communication.
22 |             rank (int): The rank of the current worker.
23 |             world_size (int): The total number of workers.
24 |             shuffle (bool, optional): Whether to shuffle the indices. Defaults to False.
25 |         """
26 |         self.data_source = data_source
27 |         self.batch_size = batch_size
28 |         self.shuffle = shuffle
29 |         self.rank = rank
30 |         self.world_size = world_size
31 |         self.comm = comm
32 | 
33 |     def __iter__(self):
34 |         if self.rank == 0:
35 |             if self.shuffle:
36 |                 indices = torch.randperm(len(self.data_source))
37 |             else:
38 |                 indices = torch.arange(len(self.data_source))
39 | 
40 |             # Split the indices into batches
41 |             batches = [
42 |                 indices[i : i + self.batch_size]
43 |                 for i in range(0, len(indices), self.batch_size)
44 |             ]
45 | 
46 |             for batch in batches:
47 |                 for i in range(1, self.world_size):
48 |                     # Send the batch to contributor i
49 |                     self.comm.send(batch, i)
50 | 
51 |                 yield batch
52 |         else:
53 |             for i in range(0, len(self.data_source), self.batch_size):
54 |                 # Receive the batch from host
55 |                 batch = self.comm.recv(0)
56 |                 yield batch
57 | 
58 |     def __len__(self):
59 |         return math.ceil(len(self.data_source) / self.batch_size)
60 | 


--------------------------------------------------------------------------------
/examples/components/MNIST_VERTICAL/upload_data/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: mnist_vertical_upload_data_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 | dependencies:
 6 |   - python=3.8
 7 |   - pip=22.1.2
 8 |   - pytorch=1.12.1
 9 |   - torchvision=0.13.1
10 |   - cudatoolkit=11.3
11 |   - pip:
12 |     - azureml-mlflow==1.48.0
13 |     - pandas==1.5.2
14 |     - tqdm==4.64.1
15 | 


--------------------------------------------------------------------------------
/examples/components/MNIST_VERTICAL/upload_data/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_mnist_vertical_upload_data
 3 | version: 0.0.1
 4 | display_name: Download MNIST data and upload to silo storage partitioning vertically
 5 | type: command
 6 | is_deterministic: true
 7 | 
 8 | inputs:
 9 |   silo_count:
10 |     type: number
11 |     optional: false
12 |   silo_index:
13 |     type: number
14 |     optional: false
15 | 
16 | outputs:
17 |   raw_train_data:
18 |     type: uri_folder
19 |     description: the output CC Fraud raw training data for a given silo
20 |   raw_test_data:
21 |     type: uri_folder
22 |     description: the output CC Fraud raw testing data for a given silo
23 | 
24 | code: .
25 | 
26 | command: >-
27 |   python run.py --silo_count ${{inputs.silo_count}} --silo_index ${{inputs.silo_index}} --raw_train_data ${{outputs.raw_train_data}} --raw_test_data ${{outputs.raw_test_data}}
28 | 
29 | environment: 
30 |   conda_file: ./conda.yaml
31 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
32 | 


--------------------------------------------------------------------------------
/examples/components/NER/preprocessing/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ner_preprocess_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 | dependencies:
 6 |   - python=3.8
 7 |   - pip=22.3.1
 8 |   - pytorch=1.12.1
 9 |   - cudatoolkit=11.3
10 |   - pip:
11 |     - azureml-mlflow==1.48.0
12 |     - pandas==1.5.2
13 |     - transformers==4.25.1
14 |     - datasets==2.7.1
15 | 


--------------------------------------------------------------------------------
/examples/components/NER/preprocessing/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_ner_preprocessing_in_silo
 4 | version: 0.3.0
 5 | display_name: MultiNERD Pre-Processing (in silo)
 6 | type: command
 7 | description: Component to preprocess the MultiNERD raw data
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   raw_training_data:
12 |     type: uri_folder
13 |     description: the raw MultiNERD training data
14 |   raw_testing_data:
15 |     type: uri_folder
16 |     description: the raw MultiNERD testing data
17 |   tokenizer_name:
18 |     type: string
19 |     description: Tokenizer model name
20 |     default: "bert-base-cased"
21 |     optional: true
22 |   metrics_prefix:
23 |     type: string
24 |     description: Metrics prefix
25 |     default: Default-prefix
26 |     optional: true
27 | 
28 | outputs:
29 |   processed_train_data:
30 |     type: uri_folder
31 |     description: the output training data after preprocessing
32 |   processed_test_data:
33 |     type: uri_folder
34 |     description: the output testing data after preprocessing
35 | 
36 | code: .
37 | 
38 | command: >-
39 |   python run.py --raw_training_data ${{inputs.raw_training_data}} --raw_testing_data ${{inputs.raw_testing_data}} --train_output ${{outputs.processed_train_data}} --test_output ${{outputs.processed_test_data}} $[[--tokenizer_name ${{inputs.tokenizer_name}}]] $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
40 | 
41 | environment: 
42 |   conda_file: ./conda.yaml
43 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
44 | 


--------------------------------------------------------------------------------
/examples/components/NER/traininsilo/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ner_train_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pip=22.3.1
 9 |   - pytorch=1.13.1
10 |   - pytorch-cuda=11.6
11 |   - pip:
12 |     - azureml-mlflow==1.48.0
13 |     - pandas==1.5.2
14 |     - transformers==4.25.1
15 |     - datasets==2.7.1
16 |     - evaluate==0.3.0
17 |     - numpy==1.23.5
18 |     - seqeval==1.2.2
19 |     - opacus==1.3.0
20 |     - tqdm==4.64.1
21 | 


--------------------------------------------------------------------------------
/examples/components/NER/traininsilo/labels.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "O": 0,
 3 |     "B-PER": 1,
 4 |     "I-PER": 2,
 5 |     "B-LOC": 3,
 6 |     "I-LOC": 4,
 7 |     "B-ORG": 5,
 8 |     "I-ORG": 6,
 9 |     "B-ANIM": 7,
10 |     "I-ANIM": 8,
11 |     "B-BIO": 9,
12 |     "I-BIO": 10,
13 |     "B-CEL": 11,
14 |     "I-CEL": 12,
15 |     "B-DIS": 13,
16 |     "I-DIS": 14,
17 |     "B-EVE": 15,
18 |     "I-EVE": 16,
19 |     "B-FOOD": 17,
20 |     "I-FOOD": 18,
21 |     "B-INST": 19,
22 |     "I-INST": 20,
23 |     "B-MEDIA": 21,
24 |     "I-MEDIA": 22,
25 |     "B-PLANT": 23,
26 |     "I-PLANT": 24,
27 |     "B-MYTH": 25,
28 |     "I-MYTH": 26,
29 |     "B-TIME": 27,
30 |     "I-TIME": 28,
31 |     "B-VEHI": 29,
32 |     "I-VEHI": 30,
33 |     "B-SUPER": 31,
34 |     "I-SUPER": 32,
35 |     "B-PHY": 33,
36 |     "I-PHY": 34
37 |     }
38 |     


--------------------------------------------------------------------------------
/examples/components/NER/upload_data/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: ner_upload_data_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 | dependencies:
 6 |   - python=3.8
 7 |   - pip=22.3.1
 8 |   - pip:
 9 |     - datasets==2.7.1
10 | 


--------------------------------------------------------------------------------
/examples/components/NER/upload_data/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_ner_upload_data
 3 | version: 0.3.0
 4 | display_name: Download MultiNERD data and upload to silo storage
 5 | type: command
 6 | is_deterministic: true
 7 | 
 8 | inputs:
 9 |   silo_count:
10 |     type: number
11 |     optional: false
12 |   silo_index:
13 |     type: number
14 |     optional: false
15 | 
16 | outputs:
17 |   raw_train_data:
18 |     type: uri_folder
19 |     description: the output raw training data for a given silo
20 |   raw_test_data:
21 |     type: uri_folder
22 |     description: the output raw testing data for a given silo
23 | 
24 | code: .
25 | 
26 | command: >-
27 |   python run.py --silo_count ${{inputs.silo_count}} --silo_index ${{inputs.silo_index}} --raw_train_data ${{outputs.raw_train_data}} --raw_test_data ${{outputs.raw_test_data}}
28 | environment: 
29 |   conda_file: ./conda.yaml
30 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
31 | 


--------------------------------------------------------------------------------
/examples/components/NVFLARE/client/environment/context/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:22.12-py3
2 | FROM ${PYTORCH_IMAGE}
3 | 
4 | RUN python3 -m pip install -U pip
5 | RUN python3 -m pip install -U setuptools
6 | RUN python3 -m pip install nvflare==2.2.3
7 | RUN python3 -m pip install azureml-mlflow==1.48.0
8 | 


--------------------------------------------------------------------------------
/examples/components/NVFLARE/client/environment/env.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
2 | name: nvflare-pt
3 | version: 2.2.3
4 | build:
5 |   path: ./context/
6 | 


--------------------------------------------------------------------------------
/examples/components/NVFLARE/client/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_nvflare_client
 3 | version: 0.0.5
 4 | display_name: NVFlare client
 5 | type: command
 6 | description: This component runs an NVFlare client inside an AzureML job.
 7 | is_deterministic: true
 8 | tags:
 9 |   nvflare: 2.2.3
10 |   url: https://github.com/Azure-Samples/azure-ml-federated-learning
11 | 
12 | inputs:
13 |   federation_identifier:
14 |     type: string
15 |     description: "a unique identifier for the group of clients and server to find each other"
16 |     default: "fed-0000"
17 |   client_config:
18 |     type: uri_folder
19 |     description: "the NVFlare workspace folder for this client"
20 |   client_data:
21 |     type: uri_folder
22 |     optional: true
23 |     description: "an optional folder containing data for the client to use"
24 |   client_data_env_var:
25 |     type: string
26 |     default: CLIENT_DATA_PATH
27 |     description: "the name of the env variable to set with the mount path of the client_data folder"
28 |   start:
29 |     type: uri_file
30 |     description: "input to trigger the job to start, not actually used"
31 | 
32 | code: "."
33 | 
34 | command: >-
35 |   python run.py 
36 |   --federation_identifier ${{inputs.federation_identifier}} 
37 |   --client_config ${{inputs.client_config}} 
38 |   $[[--client_data ${{inputs.client_data}}]] 
39 |   --client_data_env_var ${{inputs.client_data_env_var}}
40 | 
41 | environment: 
42 |   build:
43 |     path: ./environment/context/
44 | 


--------------------------------------------------------------------------------
/examples/components/NVFLARE/provision/environment/context/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:22.12-py3
2 | FROM ${PYTORCH_IMAGE}
3 | 
4 | RUN python3 -m pip install -U pip
5 | RUN python3 -m pip install -U setuptools
6 | RUN python3 -m pip install nvflare==2.2.3
7 | RUN python3 -m pip install azureml-mlflow==1.48.0
8 | 


--------------------------------------------------------------------------------
/examples/components/NVFLARE/provision/environment/env.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
2 | name: nvflare-sdk
3 | version: 2.2.3
4 | build:
5 |   path: ./context/
6 | 


--------------------------------------------------------------------------------
/examples/components/NVFLARE/provision/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_nvflare_provision
 4 | version: 0.0.5
 5 | display_name: NVFlare provision
 6 | type: command
 7 | description: Provision an NVFlare project yaml config
 8 | is_deterministic: true
 9 | tags:
10 |   nvflare: 2.2.3
11 |   url: https://github.com/Azure-Samples/azure-ml-federated-learning
12 | 
13 | inputs:
14 |   project_config:
15 |     type: uri_file
16 | 
17 | outputs:
18 |   workspace:
19 |     type: uri_folder
20 |   start:
21 |     type: uri_file
22 | 
23 | command: >-
24 |   nvflare provision -p ${{inputs.project_config}} -w ${{outputs.workspace}} && echo "start" >> ${{outputs.start}}/start.txt
25 | 
26 | environment: 
27 |   build:
28 |     path: ./environment/context/
29 | 


--------------------------------------------------------------------------------
/examples/components/NVFLARE/server/environment/context/Dockerfile:
--------------------------------------------------------------------------------
1 | ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:22.12-py3
2 | FROM ${PYTORCH_IMAGE}
3 | 
4 | RUN python3 -m pip install -U pip
5 | RUN python3 -m pip install -U setuptools
6 | RUN python3 -m pip install nvflare==2.2.3
7 | RUN python3 -m pip install azureml-mlflow==1.48.0
8 | 


--------------------------------------------------------------------------------
/examples/components/NVFLARE/server/environment/env.yml:
--------------------------------------------------------------------------------
1 | $schema: https://azuremlschemas.azureedge.net/latest/environment.schema.json
2 | name: nvflare-pt
3 | version: 2.2.3
4 | build:
5 |   path: ./context/
6 | 


--------------------------------------------------------------------------------
/examples/components/NVFLARE/server/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_nvflare_server
 3 | version: 0.0.5
 4 | display_name: NVFlare server
 5 | type: command
 6 | description: This component runs an NVFlare server inside an AzureML job.
 7 | tags:
 8 |   nvflare: 2.2.3
 9 |   url: https://github.com/Azure-Samples/azure-ml-federated-learning
10 | 
11 | is_deterministic: true
12 | 
13 | inputs:
14 |   federation_identifier:
15 |     type: string
16 |     description: "a unique identifier for the group of clients and server to find each other"
17 |     default: "fed-0000"
18 |   server_config:
19 |     type: uri_folder
20 |     description: "the NVFlare workspace folder for this server"
21 |   admin_config:
22 |     type: uri_folder
23 |     description: "the NVFlare workspace admin folder to connect to the server"
24 |   app_dir:
25 |     type: uri_folder
26 |     description: "the NVFlare app code directory"
27 |   server_name:
28 |     type: string
29 |     description: "the name of the server/overseer expected by clients for hostname resolution"
30 |   expected_clients:
31 |     type: integer
32 |     description: "the number of clients expected to connect to the server before training"
33 |   start:
34 |     type: uri_file
35 |     description: "input to trigger the job to start, not actually used"
36 |   wait_for_clients_timeout:
37 |     type: integer
38 |     default: 600
39 |     description: "the number of seconds to wait for clients to connect before timing out"
40 | 
41 | outputs:
42 |   job_artefacts:
43 |     type: uri_folder
44 |     description: "where the NVFlare job artefacts will be saved upon completion of the job"
45 | 
46 | code: "."
47 | 
48 | command: >-
49 |   python run.py 
50 |   --federation_identifier ${{inputs.federation_identifier}} 
51 |   --server_config ${{inputs.server_config}} 
52 |   --admin_config ${{inputs.admin_config}} 
53 |   --app_dir ${{inputs.app_dir}} 
54 |   --server_name ${{inputs.server_name}} 
55 |   --expected_clients ${{inputs.expected_clients}} 
56 |   --output_dir ${{outputs.job_artefacts}} 
57 |   --wait_for_clients_timeout ${{inputs.wait_for_clients_timeout}}
58 | 
59 | environment: 
60 |   build:
61 |     path: ./environment/context/
62 | 


--------------------------------------------------------------------------------
/examples/components/PNEUMONIA/traininsilo/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: pneumonia_train_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 |   - nvidia
 6 | dependencies:
 7 |   - python=3.8
 8 |   - pip=22.3.1
 9 |   - pytorch=2.0.0
10 |   - pytorch-cuda=11.6
11 |   - pip:
12 |     - azureml-mlflow==1.48.0
13 |     - opacus==1.3.0
14 |     - tqdm==4.64.1
15 |     - torchvision==0.15.1
16 | 


--------------------------------------------------------------------------------
/examples/components/PNEUMONIA/traininsilo/pneumonia_network.py:
--------------------------------------------------------------------------------
 1 | # This file defining the model was taken as-is from https://github.com/Azure/medical-imaging/blob/main/federated-learning/pneumonia-federated/custom/pneumonia_network.py.
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class PneumoniaNetwork(nn.Module):
 8 |     def __init__(self):
 9 |         super(PneumoniaNetwork, self).__init__()
10 |         dropout = 0.2
11 | 
12 |         self.conv1 = nn.Conv2d(
13 |             in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1
14 |         )
15 |         self.conv2 = nn.Conv2d(
16 |             in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1
17 |         )
18 |         self.conv3 = nn.Conv2d(
19 |             in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1
20 |         )
21 | 
22 |         self.dropout1 = nn.Dropout(dropout)
23 |         self.dropout2 = nn.Dropout(dropout)
24 | 
25 |         self.fc1 = nn.Linear(28 * 28 * 128, 256)
26 |         self.fc2 = nn.Linear(256, 2)
27 | 
28 |     def forward(self, x):
29 |         x = F.relu(self.conv1(x))  # 224 x 224 x 32
30 |         x = F.max_pool2d(x, 2, 2)  # 112 x 112 x 32
31 |         x = F.relu(self.conv2(x))  # 112 x 112 x 64
32 |         x = F.max_pool2d(x, 2, 2)  # 56 x 56 x 64
33 |         x = self.dropout1(x)
34 |         x = F.relu(self.conv3(x))  # 56 x 56 x 128
35 |         x = F.max_pool2d(x, 2, 2)  # 28 x 28 x 128
36 |         x = self.dropout2(x)
37 |         x = x.view(-1, 28 * 28 * 128)  # 100.352
38 |         x = F.relu(self.fc1(x))
39 |         x = self.fc2(x)
40 |         return x
41 | 


--------------------------------------------------------------------------------
/examples/components/PNEUMONIA/traininsilo/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_pneumonia_train_in_silo
 4 | version: 0.3.0
 5 | display_name: Pneumonia Train (in silo)
 6 | type: command
 7 | description: Component to train a pneumonia detection model on chest radiographs
 8 | is_deterministic: true
 9 | 
10 | distribution:
11 |   type: pytorch
12 |   
13 | inputs:
14 |   lr:
15 |     type: number
16 |     description: learning rate
17 |     default: 0.01
18 |     optional: true
19 |   epochs:
20 |     type: integer
21 |     description: total number of epochs for local training
22 |     default: 3
23 |     optional: true
24 |   batch_size:
25 |     type: integer
26 |     description: Training batch size
27 |     default: 32
28 |     optional: true
29 |   dp:
30 |     type: boolean
31 |     description: differential privacy
32 |     default: false
33 |     optional: true
34 |   dp_target_epsilon:
35 |     type: number
36 |     description: DP target epsilon
37 |     default: 50.0
38 |     optional: true
39 |   dp_target_delta:
40 |     type: number
41 |     description: DP target delta
42 |     default: 1e-5
43 |     optional: true
44 |   dp_max_grad_norm:
45 |     type: number
46 |     description: DP max gradient norm
47 |     default: 1.0
48 |     optional: true
49 |   total_num_of_iterations:
50 |     type: integer
51 |     description: Total num of iterations
52 |     default: 1
53 |     optional: true
54 |   dataset_name:
55 |     type: uri_folder
56 |     description: the data asset in Azure ML
57 |   iteration_num:
58 |     type: integer
59 |     description: Iteration number
60 |     default: 1
61 |     optional: true
62 |   checkpoint:
63 |     type: uri_folder
64 |     description: a given pre-existing model checkpoint
65 |     optional: true
66 |   metrics_prefix:
67 |     type: string
68 |     description: Metrics prefix
69 |     default: Default-prefix
70 |     optional: true
71 | 
72 | outputs:
73 |   model:
74 |     type: uri_folder
75 |     description: the output checkpoint
76 | 
77 | code: .
78 | 
79 | command: >-
80 |   python run.py --dataset_name ${{inputs.dataset_name}} $[[--iteration_num ${{inputs.iteration_num}}]] $[[--checkpoint ${{inputs.checkpoint}}]] --model ${{outputs.model}} $[[--lr ${{inputs.lr}}]] $[[--epochs ${{inputs.epochs}}]] $[[--batch_size ${{inputs.batch_size}}]] $[[--metrics_prefix ${{inputs.metrics_prefix}}]] $[[--dp ${{inputs.dp}}]] $[[--total_num_of_iterations ${{inputs.total_num_of_iterations}}]] $[[--dp_target_epsilon ${{inputs.dp_target_epsilon}}]] $[[--dp_target_delta ${{inputs.dp_target_delta}}]] $[[--dp_max_grad_norm ${{inputs.dp_max_grad_norm}}]]
81 | 
82 | environment: 
83 |   conda_file: ./conda.yaml
84 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
85 | 


--------------------------------------------------------------------------------
/examples/components/PNEUMONIA/upload_data/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: pneumonia_upload_data_conda_env
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |     - azure-identity==1.12.0
 9 |     - azure-keyvault==4.2.0
10 |     - azureml-core==1.47.0
11 |     - kaggle==1.5.12
12 |     - split-folders==0.5.1
13 | 


--------------------------------------------------------------------------------
/examples/components/PNEUMONIA/upload_data/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_pneumonia_upload_data
 3 | version: 0.3.0
 4 | display_name: Download Chest dataset and upload to silo storage
 5 | type: command
 6 | description: Component that downloads the pneumonia dataset from Kaggle, partitions it, and then uploads each partition to one of the silos' storages.
 7 | is_deterministic: true
 8 | 
 9 | inputs:
10 |   silo_count:
11 |     type: number
12 |     optional: false
13 |   silo_index:
14 |     type: number
15 |     optional: false
16 | 
17 | outputs:
18 |   raw_data_folder:
19 |     type: uri_folder
20 |     description: the output folder where the raw data will be written
21 | 
22 | code: .
23 | 
24 | command: >-
25 |   python run.py --silo_count ${{inputs.silo_count}} --silo_index ${{inputs.silo_index}} --raw_data_folder ${{outputs.raw_data_folder}}
26 | 
27 | environment: 
28 |   conda_file: ./conda.yaml
29 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
30 | 


--------------------------------------------------------------------------------
/examples/components/shared/samplers.py:
--------------------------------------------------------------------------------
 1 | ##########################################################################################
 2 | #                                         WARNING                                        #
 3 | ##########################################################################################
 4 | # Should this file change please update all copies of samplers.py file in the repository #
 5 | ##########################################################################################
 6 | 
 7 | import math
 8 | import torch
 9 | from torch.utils.data import Sampler
10 | 
11 | 
12 | class VerticallyDistributedBatchSampler(Sampler):
13 |     """Batch sampler that uses a distributed communication backend to distribute samples indexes to each worker."""
14 | 
15 |     def __init__(self, data_source, batch_size, comm, rank, world_size, shuffle=False):
16 |         """Initializes the batch sampler.
17 | 
18 |         Args:
19 |             data_source (torch.utils.data.Dataset): The dataset to sample from.
20 |             batch_size (int): The size of the batch to sample.
21 |             comm (AMLComm): The communicator to use for communication.
22 |             rank (int): The rank of the current worker.
23 |             world_size (int): The total number of workers.
24 |             shuffle (bool, optional): Whether to shuffle the indices. Defaults to False.
25 |         """
26 |         self.data_source = data_source
27 |         self.batch_size = batch_size
28 |         self.shuffle = shuffle
29 |         self.rank = rank
30 |         self.world_size = world_size
31 |         self.comm = comm
32 | 
33 |     def __iter__(self):
34 |         if self.rank == 0:
35 |             if self.shuffle:
36 |                 indices = torch.randperm(len(self.data_source))
37 |             else:
38 |                 indices = torch.arange(len(self.data_source))
39 | 
40 |             # Split the indices into batches
41 |             batches = [
42 |                 indices[i : i + self.batch_size]
43 |                 for i in range(0, len(indices), self.batch_size)
44 |             ]
45 | 
46 |             for batch in batches:
47 |                 for i in range(1, self.world_size):
48 |                     # Send the batch to contributor i
49 |                     self.comm.send(batch, i)
50 | 
51 |                 yield batch
52 |         else:
53 |             for i in range(0, len(self.data_source), self.batch_size):
54 |                 # Receive the batch from host
55 |                 batch = self.comm.recv(0)
56 |                 yield batch
57 | 
58 |     def __len__(self):
59 |         return math.ceil(len(self.data_source) / self.batch_size)
60 | 


--------------------------------------------------------------------------------
/examples/components/utils/aggregatemodelweights/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: agg_conda_env
 2 | channels:
 3 |   - defaults
 4 |   - pytorch
 5 | dependencies:
 6 |   - python=3.7.11
 7 |   - pytorch=1.12.1
 8 |   - torchvision=0.13.1
 9 |   - cudatoolkit=11.3
10 | 


--------------------------------------------------------------------------------
/examples/components/utils/aggregatemodelweights/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_aggregate_model_weights
 3 | version: 0.3.0
 4 | display_name: Aggregate PyTorch Model Weights (from all silos)
 5 | type: command
 6 | description: Component for aggregating pytorch model weights.
 7 | is_deterministic: true
 8 | 
 9 | inputs:
10 |   input_silo_1:
11 |     type: uri_folder
12 |     description: input from silo 1 (e.g., model weights, or gradient updates)
13 |     optional: false
14 |   input_silo_2:
15 |     type: uri_folder
16 |     description: input from silo 2 (e.g., model weights, or gradient updates)
17 |     optional: true
18 |   input_silo_3:
19 |     type: uri_folder
20 |     description: input from silo 3 (e.g., model weights, or gradient updates)
21 |     optional: true
22 |   input_silo_4:
23 |     type: uri_folder
24 |     description: input from silo 4 (e.g., model weights, or gradient updates)
25 |     optional: true
26 |   input_silo_5:
27 |     type: uri_folder
28 |     description: input from silo 5 (e.g., model weights, or gradient updates)
29 |     optional: true
30 |   ancillary_files:
31 |     type: boolean
32 |     description: Whether ancillary files need to be copied
33 |     optional: true
34 |   out_checkpoint_name:
35 |     type: string
36 |     description: the name of the output checkpoint, e.g. model, finetuned_state_dict
37 |     optional: true
38 | 
39 | 
40 | outputs:
41 |   aggregated_output:
42 |     type: uri_folder
43 |     description: the aggregated model or gradiants, residing in the orchestrator compute.
44 | 
45 | code: .
46 | 
47 | command: >-
48 |   python run.py --output ${{outputs.aggregated_output}} 
49 |   --extension pt 
50 |   --checkpoints ${{inputs.input_silo_1}} 
51 |   $[[${{inputs.input_silo_2}}]] 
52 |   $[[${{inputs.input_silo_3}}]] 
53 |   $[[${{inputs.input_silo_4}}]] 
54 |   $[[${{inputs.input_silo_5}}]] 
55 |   $[[--ancillary_files ${{inputs.ancillary_files}}]] 
56 |   $[[--out_checkpoint_name ${{inputs.out_checkpoint_name}}]] 
57 | environment:
58 |   conda_file: ./conda.yaml
59 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04
60 | 


--------------------------------------------------------------------------------
/examples/components/utils/data_analysis/spec.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 3 | name: msft_fl_ccfraud_data_analysis_in_silo
 4 | version: 0.0.1
 5 | display_name: CC Fraud Data-analysis (in silo)
 6 | type: command
 7 | description: Component for data-analysis of tabular data from silo's blob storage
 8 | is_deterministic: true
 9 | 
10 | inputs:
11 |   training_data:
12 |     type: uri_file
13 |     description: training data in a given silo
14 |   testing_data:
15 |     type: uri_file
16 |     description: testing data in a given silo
17 |   categorical_columns:
18 |     type: string
19 |     description: Names of categorical columns
20 |     optional: true
21 |   onehot_columns_prefix:
22 |     type: string
23 |     description: PRefixes of one-hot encoded columns
24 |     optional: true
25 |   metrics_prefix:
26 |     type: string
27 |     description: Metrics prefix
28 |     default: Default-prefix
29 |     optional: true
30 |   silo_index:
31 |     type: integer
32 |     description: Silo index
33 |     optional: false
34 | 
35 | code: .
36 | 
37 | command: >-
38 |   python run.py 
39 |   --training_data ${{inputs.training_data}} 
40 |   --testing_data ${{inputs.testing_data}}
41 |   --silo_index ${{inputs.silo_index}}
42 |   $[[--categorical_columns ${{inputs.categorical_columns}}]] 
43 |   $[[--onehot_columns_prefix ${{inputs.onehot_columns_prefix}}]] 
44 |   $[[--metrics_prefix ${{inputs.metrics_prefix}}]]
45 | 
46 | # NOTE: using one of Azure ML's curated environments
47 | # which has all the dependencies needed for this job
48 | environment: azureml:AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest
49 | 


--------------------------------------------------------------------------------
/examples/components/utils/multiply_data_files/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: multiply_data_conda_env
 2 | channels:
 3 |   - defaults
 4 | dependencies:
 5 |   - python=3.8
 6 |   - pip=22.3.1
 7 |   - pip:
 8 |     - azureml-mlflow==1.48.0
 9 |     - tqdm==4.64.1
10 | 


--------------------------------------------------------------------------------
/examples/components/utils/multiply_data_files/spec.yaml:
--------------------------------------------------------------------------------
 1 | $schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
 2 | name: msft_fl_multiply_data_files
 3 | version: 0.3.0
 4 | display_name: Multiply Data Files
 5 | type: command
 6 | description: Component that multiplies the dataset on each silo.
 7 | is_deterministic: true
 8 | 
 9 | inputs:
10 |   input_folder:
11 |     type: uri_folder
12 |     description: Input directory path
13 |     optional: false
14 |   multiply: 
15 |     type: integer
16 |     description: Multiplication factor
17 |     default: 10
18 |     optional: true
19 | 
20 | outputs:
21 |   output_folder: # Multiplied data directory path
22 |     type: uri_folder
23 |     description: Output directory path
24 | 
25 | code: .
26 | 
27 | command: >-
28 |   python run.py --input ${{inputs.input_folder}} --output ${{outputs.output_folder}} $[[--multiply ${{inputs.multiply}}]]
29 | 
30 | environment: 
31 |   conda_file: ./conda.yaml
32 |   image: mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04


--------------------------------------------------------------------------------
/examples/pipelines/bank_marketing_vertical/config.yaml:
--------------------------------------------------------------------------------
 1 | # example yaml config
 2 | 
 3 | # using this to store references to Azure ML
 4 | aml:
 5 |   # subscription_id: "<SUBSCRIPTION_ID>"
 6 |   # resource_group_name: "<RESOURCE_GROUP>"
 7 |   # workspace_name: "<AML_WORKSPACE_NAME>"
 8 | 
 9 | # federated learning parameters
10 | federated_learning:
11 |   communication:
12 |     backend: socket
13 |     encrypted: false
14 |   
15 |   host:
16 |     compute: orchestrator-01
17 |     datastore: datastore_orchestrator
18 |     training_data:
19 |       type: uri_file
20 |       mode: 'download'
21 |       path: azureml://datastores/datastore_orchestrator/paths/federated_learning/bank_marketing_vertical/raw_train_data
22 |     testing_data:
23 |       type: uri_file
24 |       mode: 'download'
25 |       path: azureml://datastores/datastore_orchestrator/paths/federated_learning/bank_marketing_vertical/raw_test_data
26 | 
27 |   silos:
28 |     - compute: silo0-01
29 |       datastore: datastore_silo0
30 |       training_data:
31 |         type: uri_file
32 |         mode: 'download'
33 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/bank_marketing_vertical/raw_train_data
34 |       testing_data:
35 |         type: uri_file
36 |         mode: 'download'
37 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/bank_marketing_vertical/raw_test_data
38 |     - compute: silo1-01
39 |       datastore: datastore_silo1
40 |       training_data:
41 |         type: uri_file
42 |         mode: 'download'
43 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/bank_marketing_vertical/raw_train_data
44 |       testing_data:
45 |         type: uri_file
46 |         mode: 'download'
47 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/bank_marketing_vertical/raw_test_data
48 |     - compute: silo2-01
49 |       datastore: datastore_silo2
50 |       training_data:
51 |         type: uri_file
52 |         mode: 'download'
53 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/bank_marketing_vertical/raw_train_data
54 |       testing_data:
55 |         type: uri_file
56 |         mode: 'download'
57 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/bank_marketing_vertical/raw_test_data
58 | 
59 | # training parameters
60 | training_parameters:
61 |   epochs: 50 # number of epochs per iteration (in-silo training) 
62 |   lr: 1e-2 # learning rate
63 |   batch_size: 1000 # batch size
64 |   model_name: SimpleLinear
65 | 


--------------------------------------------------------------------------------
/examples/pipelines/ccfraud_vertical/config.yaml:
--------------------------------------------------------------------------------
 1 | # example yaml config
 2 | 
 3 | # using this to store references to Azure ML
 4 | aml:
 5 |   # subscription_id: "<SUBSCRIPTION_ID>"
 6 |   # resource_group_name: "<RESOURCE_GROUP>"
 7 |   # workspace_name: "<AML_WORKSPACE_NAME>"
 8 | 
 9 | # federated learning parameters
10 | federated_learning:
11 |   communication:
12 |     backend: socket
13 |     encrypted: false
14 | 
15 |   host:
16 |     compute: orchestrator-01
17 |     datastore: datastore_orchestrator
18 |     training_data:
19 |       type: uri_file
20 |       mode: 'download'
21 |       path: azureml://datastores/datastore_orchestrator/paths/federated_learning/ccfraud_vertical/raw_train_data
22 |     testing_data:
23 |       type: uri_file
24 |       mode: 'download'
25 |       path: azureml://datastores/datastore_orchestrator/paths/federated_learning/ccfraud_vertical/raw_test_data
26 | 
27 |   silos:
28 |     - compute: silo0-01
29 |       datastore: datastore_silo0
30 |       training_data:
31 |         type: uri_file
32 |         mode: 'download'
33 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/ccfraud_vertical/raw_train_data
34 |       testing_data:
35 |         type: uri_file
36 |         mode: 'download'
37 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/ccfraud_vertical/raw_test_data
38 |     - compute: silo1-01
39 |       datastore: datastore_silo1
40 |       training_data:
41 |         type: uri_file
42 |         mode: 'download'
43 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/ccfraud_vertical/raw_train_data
44 |       testing_data:
45 |         type: uri_file
46 |         mode: 'download'
47 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/ccfraud_vertical/raw_test_data
48 |     - compute: silo2-01
49 |       datastore: datastore_silo2
50 |       training_data:
51 |         type: uri_file
52 |         mode: 'download'
53 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/ccfraud_vertical/raw_train_data
54 |       testing_data:
55 |         type: uri_file
56 |         mode: 'download'
57 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/ccfraud_vertical/raw_test_data
58 | 
59 | # training parameters
60 | training_parameters:
61 |   epochs: 10 # number of epochs per iteration (in-silo training) 
62 |   lr: 1e-2 # learning rate
63 |   batch_size: 5000 # batch size
64 | 


--------------------------------------------------------------------------------
/examples/pipelines/ccfraud_vertical_fedonce/config.yaml:
--------------------------------------------------------------------------------
 1 | # example yaml config
 2 | 
 3 | # using this to store references to Azure ML
 4 | aml:
 5 |   # subscription_id: "<SUBSCRIPTION_ID>"
 6 |   # resource_group_name: "<RESOURCE_GROUP>"
 7 |   # workspace_name: "<AML_WORKSPACE_NAME>"
 8 | 
 9 | # federated learning parameters
10 | federated_learning:
11 |   host:
12 |     compute: orchestrator-01
13 |     datastore: datastore_orchestrator
14 |     training_data:
15 |       type: uri_file
16 |       mode: 'download'
17 |       path: azureml://datastores/datastore_orchestrator/paths/federated_learning/ccfraud_vertical_fedonce/raw_train_data
18 |     testing_data:
19 |       type: uri_file
20 |       mode: 'download'
21 |       path: azureml://datastores/datastore_orchestrator/paths/federated_learning/ccfraud_vertical_fedonce/raw_test_data
22 | 
23 |   silos:
24 |     - compute: silo0-01
25 |       datastore: datastore_silo0
26 |       training_data:
27 |         type: uri_file
28 |         mode: 'download'
29 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/ccfraud_vertical_fedonce/raw_train_data
30 |       testing_data:
31 |         type: uri_file
32 |         mode: 'download'
33 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/ccfraud_vertical_fedonce/raw_test_data
34 |     - compute: silo1-01
35 |       datastore: datastore_silo1
36 |       training_data:
37 |         type: uri_file
38 |         mode: 'download'
39 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/ccfraud_vertical_fedonce/raw_train_data
40 |       testing_data:
41 |         type: uri_file
42 |         mode: 'download'
43 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/ccfraud_vertical_fedonce/raw_test_data
44 |     - compute: silo2-01
45 |       datastore: datastore_silo2
46 |       training_data:
47 |         type: uri_file
48 |         mode: 'download'
49 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/ccfraud_vertical_fedonce/raw_train_data
50 |       testing_data:
51 |         type: uri_file
52 |         mode: 'download'
53 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/ccfraud_vertical_fedonce/raw_test_data
54 | 
55 | # training parameters
56 | training_parameters:
57 |   epochs: 10 # number of epochs per iteration (in-silo training) 
58 |   lr: 1e-2 # learning rate
59 |   batch_size: 5000 # batch size
60 | 


--------------------------------------------------------------------------------
/examples/pipelines/environment.yml:
--------------------------------------------------------------------------------
1 | name: fl_experiment_conda_env
2 | channels:
3 |   - defaults
4 | dependencies:
5 |   - python=3.10.4
6 |   - pip=22.3.1
7 |   - pip:
8 |     - -r requirements.txt


--------------------------------------------------------------------------------
/examples/pipelines/mnist_vertical/config.yaml:
--------------------------------------------------------------------------------
 1 | # example yaml config
 2 | 
 3 | # using this to store references to Azure ML
 4 | aml:
 5 |   # subscription_id: "<SUBSCRIPTION_ID>"
 6 |   # resource_group_name: "<RESOURCE_GROUP>"
 7 |   # workspace_name: "<AML_WORKSPACE_NAME>"
 8 | 
 9 | # federated learning parameters
10 | federated_learning:
11 |   communication:
12 |     backend: socket
13 |     encrypted: false
14 | 
15 |   host:
16 |     compute: orchestrator-01
17 |     datastore: datastore_orchestrator
18 |     training_data:
19 |       type: uri_file
20 |       mode: 'download'
21 |       path: azureml://datastores/datastore_orchestrator/paths/federated_learning/mnist_vertical/raw_train_data
22 |     testing_data:
23 |       type: uri_file
24 |       mode: 'download'
25 |       path: azureml://datastores/datastore_orchestrator/paths/federated_learning/mnist_vertical/raw_test_data
26 | 
27 |   silos:
28 |     - compute: silo0-01
29 |       datastore: datastore_silo0
30 |       training_data:
31 |         type: uri_file
32 |         mode: 'download'
33 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/mnist_vertical/raw_train_data
34 |       testing_data:
35 |         type: uri_file
36 |         mode: 'download'
37 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/mnist_vertical/raw_test_data
38 |     - compute: silo1-01
39 |       datastore: datastore_silo1
40 |       training_data:
41 |         type: uri_file
42 |         mode: 'download'
43 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/mnist_vertical/raw_train_data
44 |       testing_data:
45 |         type: uri_file
46 |         mode: 'download'
47 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/mnist_vertical/raw_test_data
48 |     - compute: silo2-01
49 |       datastore: datastore_silo2
50 |       training_data:
51 |         type: uri_file
52 |         mode: 'download'
53 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/mnist_vertical/raw_train_data
54 |       testing_data:
55 |         type: uri_file
56 |         mode: 'download'
57 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/mnist_vertical/raw_test_data
58 | 
59 | # training parameters
60 | training_parameters:
61 |   epochs: 10 # number of epochs per iteration (in-silo training) 
62 |   lr: 1e-3 # learning rate
63 |   batch_size: 128 # batch size


--------------------------------------------------------------------------------
/examples/pipelines/pneumonia/config.yaml:
--------------------------------------------------------------------------------
 1 | # example yaml config
 2 | 
 3 | # using this to store references to Azure ML
 4 | aml:
 5 |   # subscription_id: "<SUBSCRIPTION_ID>"
 6 |   # resource_group_name: "<RESOURCE_GROUP>"
 7 |   # workspace_name: "<AML_WORKSPACE_NAME>"
 8 | 
 9 | # federated learning parameters
10 | federated_learning:
11 |   orchestrator:
12 |     compute: "orchestrator-01"
13 |     datastore: "datastore_orchestrator"
14 | 
15 |   silos:
16 |     - name: silo0
17 |       computes: 
18 |       - silo0-01 # name of the compute for silo X
19 |       datastore: datastore_silo0
20 |       silo_data:
21 |         type: uri_folder
22 |         mode: 'download'
23 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/pneumonia
24 | 
25 |     - name: silo1
26 |       computes: 
27 |       - silo1-01 # we are repeating over the same config for silo 2
28 |       datastore: datastore_silo1
29 |       silo_data:
30 |         type: uri_folder
31 |         mode: 'download'
32 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/pneumonia
33 | 
34 |     - name: silo2
35 |       computes: 
36 |       - silo2-01 # we are repeating over the same config for silo 3
37 |       datastore: datastore_silo2
38 |       silo_data:
39 |         type: uri_folder
40 |         mode: 'download'
41 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/pneumonia
42 | 
43 | # training parameters
44 | training_parameters:
45 |   num_of_iterations: 2
46 |   epochs: 5
47 |   lr: 0.01
48 |   batch_size: 32
49 | 
50 |   # Differential privacy
51 |   dp: false # Flag to enable/disable differential privacy
52 |   dp_target_epsilon: 50.0 # Smaller epsilon means more privacy, more noise (it depends on the size of the training dataset. For more info, please visit https://opacus.ai/docs/faq#what-does-epsilon11-really-mean-how-about-delta )
53 |   dp_target_delta: 1e-5 # The target δ of the (ϵ,δ)-differential privacy guarantee. Generally, it should be set to be less than the inverse of the size of the training dataset. 
54 |   dp_max_grad_norm: 1.0 # Clip per-sample gradients to this norm (DP)
55 | 
56 |   # if you want to use the privacy_engine.make_private method, please set the value of dp_noise_multiplier parameter
57 |   # dp_noise_multiplier: 1.0 # Noise multiplier - to add noise to gradients (DP)


--------------------------------------------------------------------------------
/examples/pipelines/pneumonia_flwr/config.yaml:
--------------------------------------------------------------------------------
 1 | # EXAMPLE CONFIG FILE
 2 | 
 3 | # This file is intendedt to help contain all the parameters required
 4 | # to orchestrate our sample federated learning experiments.
 5 | # It is by no means necessary to run an FL experiment, just helpful.
 6 | # See submit.py for details on how to consume this file in python.
 7 | 
 8 | # This should work out of the box when running an experiment
 9 | # on one of our sandbox environments.
10 | 
11 | # Follow the instructions in the comments to adapt to your settings.
12 | 
13 | # References to Azure ML workspace (use cli args to override)
14 | aml:
15 |   # subscription_id: "<SUBSCRIPTION_ID>"
16 |   # resource_group_name: "<RESOURCE_GROUP>"
17 |   # workspace_name: "<AML_WORKSPACE_NAME>"
18 | 
19 | # Parameters to generate the FL graph
20 | federated_learning:
21 |   orchestrator:
22 |     # name of compute for orchestrator
23 |     compute: "orchestrator-01"
24 |     # name of datastore for orchestrator (saving model weights + aggregate)
25 |     datastore: "datastore_orchestrator"
26 | 
27 |   silos: # silos are provided as a list
28 |     - name: silo0
29 |       computes: 
30 |       - silo0-01 # name of the compute for silo X
31 |       datastore: datastore_silo0 # name of the datastore for silo X
32 |       # training inputs are specified below
33 |       # NOTE: in this demo, we're using public data from a url instead
34 |       silo_data:
35 |         type: uri_folder
36 |         mode: 'download'
37 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/pneumonia
38 | 
39 |     - name: silo1
40 |       computes: 
41 |       - silo1-01 # name of the compute for silo X
42 |       datastore: datastore_silo1 # name of the datastore for silo X
43 |       # training inputs are specified below
44 |       # NOTE: in this demo, we're using public data from a url instead
45 |       silo_data:
46 |         type: uri_folder
47 |         mode: 'download'
48 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/pneumonia
49 | 
50 |     - name: silo2
51 |       computes: 
52 |       - silo2-01 # name of the compute for silo X
53 |       datastore: datastore_silo2 # name of the datastore for silo X
54 |       # training inputs are specified below
55 |       # NOTE: in this demo, we're using public data from a url instead
56 |       silo_data:
57 |         type: uri_folder
58 |         mode: 'download'
59 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/pneumonia
60 | 
61 | # Training parameters
62 | training_parameters:
63 |   # how many loops of scatter-gather to run
64 |   num_of_iterations: 2
65 |   
66 |   # then typical training parameters
67 |   epochs: 3 # number of epochs per iteration (in-silo training) 
68 |   lr: 0.01 # learning rate
69 |   batch_size: 64 # batch size
70 | 


--------------------------------------------------------------------------------
/examples/pipelines/pneumonia_nvflare/pneumonia_federated/config/config_fed_client.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "format_version": 2,
 3 |     "executors": [
 4 |         {
 5 |             "tasks": [
 6 |                 "train",
 7 |                 "submit_model",
 8 |                 "validate"
 9 |             ],
10 |             "executor": {
11 |                 "id": "Executor",
12 |                 "path": "nvflare.app_common.executors.learner_executor.LearnerExecutor",
13 |                 "args": {
14 |                     "learner_id": "pt_learner"
15 |                 }
16 |             }
17 |         }
18 |     ],
19 |     "task_result_filters": [],
20 |     "task_data_filters": [],
21 |     "components": [
22 |         {
23 |             "id": "pt_learner",
24 |             "path": "pt_learner.PTLearner",
25 |             "args": {
26 |                 "lr": 0.0005,
27 |                 "epochs": 5,
28 |                 "dataset_path_env_var": "CLIENT_DATA_PATH",
29 |                 "analytic_sender_id": "analytic_sender"
30 |             }
31 |         },
32 |         {
33 |             "id": "analytic_sender",
34 |             "name": "AnalyticsSender",
35 |             "args": {}
36 |         },
37 |         {
38 |             "id": "event_to_fed",
39 |             "name": "ConvertToFedEvent",
40 |             "args": {
41 |                 "events_to_convert": [
42 |                     "analytix_log_stats"
43 |                 ],
44 |                 "fed_event_prefix": "fed."
45 |             }
46 |         }
47 |     ]
48 | }
49 | 


--------------------------------------------------------------------------------
/examples/pipelines/pneumonia_nvflare/pneumonia_federated/config/config_fed_server.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "format_version": 2,
 3 |     "server": {
 4 |         "heart_beat_timeout": 600
 5 |     },
 6 |     "task_data_filters": [],
 7 |     "task_result_filters": [],
 8 |     "components": [
 9 |         {
10 |             "id": "persistor",
11 |             "name": "PTFileModelPersistor",
12 |             "args": {
13 |                 "model": {
14 |                     "path": "pneumonia_network.PneumoniaNetwork"
15 |                 }
16 |             }
17 |         },
18 |         {
19 |             "id": "shareable_generator",
20 |             "path": "nvflare.app_common.shareablegenerators.full_model_shareable_generator.FullModelShareableGenerator",
21 |             "args": {}
22 |         },
23 |         {
24 |             "id": "aggregator",
25 |             "path": "nvflare.app_common.aggregators.intime_accumulate_model_aggregator.InTimeAccumulateWeightedAggregator",
26 |             "args": {
27 |                 "expected_data_kind": "WEIGHTS"
28 |             }
29 |         },
30 |         {
31 |             "id": "model_locator",
32 |             "path": "nvflare.app_common.pt.pt_file_model_locator.PTFileModelLocator",
33 |             "args": {
34 |                 "pt_persistor_id": "persistor"
35 |             }
36 |         },
37 |         {
38 |             "id": "json_generator",
39 |             "path": "nvflare.app_common.widgets.validation_json_generator.ValidationJsonGenerator",
40 |             "args": {}
41 |         },
42 |         {
43 |             "id": "tb_analytics_receiver",
44 |             "name": "MLFlowAnalyticsReceiver",
45 |             "path": "mlflow_receiver.MLFlowAnalyticsReceiver",
46 |             "args": {
47 |                 "events": [
48 |                     "fed.analytix_log_stats"
49 |                 ]
50 |             }
51 |         }
52 |     ],
53 |     "workflows": [
54 |         {
55 |             "id": "scatter_and_gather",
56 |             "name": "ScatterAndGather",
57 |             "args": {
58 |                 "min_clients": 1,
59 |                 "num_rounds": 1,
60 |                 "start_round": 0,
61 |                 "wait_time_after_min_received": 10,
62 |                 "aggregator_id": "aggregator",
63 |                 "persistor_id": "persistor",
64 |                 "shareable_generator_id": "shareable_generator",
65 |                 "train_task_name": "train",
66 |                 "train_timeout": 0
67 |             }
68 |         },
69 |         {
70 |             "id": "cross_site_validate",
71 |             "name": "CrossSiteModelEval",
72 |             "args": {
73 |                 "model_locator_id": "model_locator"
74 |             }
75 |         }
76 |     ]
77 | }


--------------------------------------------------------------------------------
/examples/pipelines/pneumonia_nvflare/pneumonia_federated/custom/pneumonia_network.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import torch
16 | import torch.nn as nn
17 | import torch.nn.functional as F
18 | 
19 | 
20 | class PneumoniaNetwork(nn.Module):
21 |     def __init__(self):
22 |         super(PneumoniaNetwork, self).__init__()
23 |         dropout = 0.2
24 | 
25 |         self.conv1 = nn.Conv2d(
26 |             in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1
27 |         )
28 |         self.conv2 = nn.Conv2d(
29 |             in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1
30 |         )
31 |         self.conv3 = nn.Conv2d(
32 |             in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1
33 |         )
34 | 
35 |         self.dropout1 = nn.Dropout(dropout)
36 |         self.dropout2 = nn.Dropout(dropout)
37 | 
38 |         self.fc1 = nn.Linear(28 * 28 * 128, 256)
39 |         self.fc2 = nn.Linear(256, 2)
40 | 
41 |     def forward(self, x):
42 |         x = F.relu(self.conv1(x))  # 224 x 224 x 32
43 |         x = F.max_pool2d(x, 2, 2)  # 112 x 112 x 32
44 |         x = F.relu(self.conv2(x))  # 112 x 112 x 64
45 |         x = F.max_pool2d(x, 2, 2)  # 56 x 56 x 64
46 |         x = self.dropout1(x)
47 |         x = F.relu(self.conv3(x))  # 56 x 56 x 128
48 |         x = F.max_pool2d(x, 2, 2)  # 28 x 28 x 128
49 |         x = self.dropout2(x)
50 |         x = x.view(-1, 28 * 28 * 128)  # 100.352
51 |         x = F.relu(self.fc1(x))
52 |         x = self.fc2(x)
53 |         return x
54 | 


--------------------------------------------------------------------------------
/examples/pipelines/pneumonia_nvflare/pneumonia_federated/custom/pt_constants.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | class PTConstants:
17 |     PTServerName = "server"
18 |     PTFileModelName = "FL_global_model.pt"
19 |     PTLocalModelName = "local_model.pt"
20 | 
21 |     PTModelsDir = "models"
22 |     CrossValResultsJsonFilename = "cross_val_results.json"
23 | 


--------------------------------------------------------------------------------
/examples/pipelines/requirements.txt:
--------------------------------------------------------------------------------
1 | azure-identity
2 | azure-ai-ml==1.4.0
3 | omegaconf
4 | 


--------------------------------------------------------------------------------
/examples/pipelines/utils/multiply_data_files/config.yaml:
--------------------------------------------------------------------------------
 1 | # CONFIG FILE FOR MULTIPLYING DATASETS
 2 | 
 3 | # References to Azure ML workspace (use cli args to override)
 4 | aml:
 5 |   # subscription_id: "<SUBSCRIPTION_ID>"
 6 |   # resource_group_name: "<RESOURCE_GROUP>"
 7 |   # workspace_name: "<AML_WORKSPACE_NAME>"
 8 | 
 9 | # Parameters to generate the FL graph
10 | federated_learning:
11 |   silos: # silos are provided as a list
12 |     - compute: silo0-01 # name of the compute for silo X
13 |       datastore: datastore_silo0 # name of the datastore for silo X
14 |       input_data:
15 |         type: uri_folder
16 |         mode: 'download'
17 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/pneumonia
18 |       output_data:
19 |         type: uri_folder
20 |         mode: 'upload'
21 |         path: azureml://datastores/datastore_silo0/paths/federated_learning/multiplied_data/pneumonia
22 |     - compute: silo1-01 # we are repeating over the same config for silo 2
23 |       datastore: datastore_silo1
24 |       input_data:
25 |         type: uri_folder
26 |         mode: 'download'
27 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/pneumonia
28 |       output_data:
29 |         type: uri_folder
30 |         mode: 'upload'
31 |         path: azureml://datastores/datastore_silo1/paths/federated_learning/multiplied_data/pneumonia
32 |     - compute: silo2-01 # we are repeating over the same config for silo 3
33 |       datastore: datastore_silo2
34 |       input_data:
35 |         type: uri_folder
36 |         mode: 'download'
37 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/pneumonia
38 |       output_data:
39 |         type: uri_folder
40 |         mode: 'upload'
41 |         path: azureml://datastores/datastore_silo2/paths/federated_learning/multiplied_data/pneumonia
42 | 


--------------------------------------------------------------------------------
/examples/pipelines/utils/upload_data/config.yaml:
--------------------------------------------------------------------------------
 1 | # CONFIG FILE FOR UPLOADING DATASETS TO CORRESPONDING REGIONS
 2 | 
 3 | # References to Azure ML workspace (use cli args to override)
 4 | aml:
 5 |   # subscription_id: "<SUBSCRIPTION_ID>"
 6 |   # resource_group_name: "<RESOURCE_GROUP>"
 7 |   # workspace_name: "<AML_WORKSPACE_NAME>"
 8 | 
 9 | # Parameters to generate the FL graph
10 | federated_learning:
11 |   host:
12 |     compute: orchestrator-01
13 |     datastore: datastore_orchestrator
14 |   silos: # silos are provided as a list
15 |     - compute: silo0-01 # name of the compute for silo X
16 |       datastore: datastore_silo0 # name of the datastore for silo X
17 | 
18 |     - compute: silo1-01 # we are repeating over the same config for silo 2
19 |       datastore: datastore_silo1
20 | 
21 |     - compute: silo2-01 # we are repeating over the same config for silo 3
22 |       datastore: datastore_silo2
23 | 
24 | confidentiality:
25 |   enable: false
26 |   keyvault: https://kv-fldemo.vault.azure.net # url of the keyvault
27 |   key_name: dev-rsa-key # name of the secret containing your encryption public key
28 | 


--------------------------------------------------------------------------------
/mlops/arm/README.md:
--------------------------------------------------------------------------------
1 | These ARM templates are generated automatically from the ![bicep scripts](../bicep/). Please refer to bicep for the source of truth.


--------------------------------------------------------------------------------
/mlops/bicep/modules/azureml/attach_aks_training_to_azureml.bicep:
--------------------------------------------------------------------------------
 1 | // This BICEP script will attach an AKS cluster
 2 | // to a given AzureML workspace for training (NOT inferencing).
 3 | 
 4 | // resource group must be specified as scope in az cli or module call
 5 | targetScope = 'resourceGroup'
 6 | 
 7 | // required parameters
 8 | @description('Name of AzureML workspace to attach compute+storage to.')
 9 | param machineLearningName string
10 | 
11 | @description('The region of the machine learning workspace')
12 | param machineLearningRegion string = resourceGroup().location
13 | 
14 | @description('Resource ID of the AKS cluster.')
15 | param aksResourceId string
16 | 
17 | @description('Region of the AKS cluster.')
18 | param aksRegion string
19 | 
20 | @description('How to name this compute in Azure ML')
21 | param amlComputeName string
22 | 
23 | @description('Name of the existing UAI for the compute cluster.')
24 | param computeUaiName string
25 | 
26 | // provision a user assigned identify for this silo
27 | resource uai 'Microsoft.ManagedIdentity/userAssignedIdentities@2022-01-31-preview' existing = {
28 |   name: computeUaiName
29 |   scope: resourceGroup()
30 | }
31 | 
32 | var identityPrincipalId = uai.properties.principalId
33 | var userAssignedIdentities = {'/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${uai.name}': {}}
34 | 
35 | resource workspace 'Microsoft.MachineLearningServices/workspaces@2022-05-01' existing = {
36 |   name: machineLearningName
37 |   scope: resourceGroup()
38 | }
39 | 
40 | // attach the AKS cluster to the workspace
41 | resource aksAzuremlCompute 'Microsoft.MachineLearningServices/workspaces/computes@2021-01-01' = {
42 |   name: amlComputeName
43 |   parent: workspace
44 |   location: machineLearningRegion
45 |   identity: {
46 |     type: 'UserAssigned'
47 |     userAssignedIdentities: userAssignedIdentities
48 |   }
49 |   properties: {
50 |     computeType: 'Kubernetes'
51 |     computeLocation: aksRegion
52 |     resourceId: aksResourceId
53 |     description: 'AKS cluster attached to AzureML workspace'
54 |     properties: {
55 |     }
56 |   }
57 | }
58 | 
59 | // output the compute config for next actions (permission model)
60 | output identityPrincipalId string = identityPrincipalId
61 | output compute string = aksAzuremlCompute.name
62 | 


--------------------------------------------------------------------------------
/mlops/bicep/modules/networking/private_dns_zone.bicep:
--------------------------------------------------------------------------------
 1 | // Provision a private DNS Zone
 2 | 
 3 | @description('Name of the private DNS zone')
 4 | param name string
 5 | 
 6 | @description('Location of the private DNS zone (default: global)')
 7 | param location string = 'global'
 8 | 
 9 | @description('Tags for curation of resources')
10 | param tags object = {}
11 | 
12 | @description('Optional: link the private DNS zone to a given virtual network')
13 | param linkToVirtualNetworkId string = ''
14 | 
15 | // create the dns zone
16 | resource privateDnsZone 'Microsoft.Network/privateDnsZones@2020-06-01' = {
17 |   name: name
18 |   location: location
19 |   tags: tags 
20 | }
21 | 
22 | // create the link
23 | resource privateDnsZoneVnetLink 'Microsoft.Network/privateDnsZones/virtualNetworkLinks@2020-06-01' = if (!empty(linkToVirtualNetworkId)) {
24 |   name: uniqueString(linkToVirtualNetworkId, name, location)
25 |   parent: privateDnsZone
26 |   location: location
27 |   properties: {
28 |     registrationEnabled: false
29 |     virtualNetwork: {
30 |       id: linkToVirtualNetworkId
31 |     }
32 |   }
33 | }
34 | 
35 | output name string = privateDnsZone.name
36 | output id string = privateDnsZone.id
37 | 


--------------------------------------------------------------------------------
/mlops/bicep/modules/networking/vnet.bicep:
--------------------------------------------------------------------------------
 1 | // Creates a virtual network
 2 | 
 3 | targetScope = 'resourceGroup'
 4 | 
 5 | @description('Name of the virtual network resource')
 6 | param virtualNetworkName string
 7 | 
 8 | @description('Group ID of the network security group')
 9 | param networkSecurityGroupId string
10 | 
11 | @description('Azure region of the deployment')
12 | param location string = resourceGroup().location
13 | 
14 | @description('Virtual network address prefix')
15 | param vnetAddressPrefix string = '10.0.0.0/16'
16 | 
17 | @description('Training subnets names and address prefix')
18 | param subnets array = [
19 |   {
20 |     name: 'snet-training'
21 |     addressPrefix: '10.0.0.0/24'
22 |   }
23 | ]
24 | 
25 | @description('List of service endpoints expected on this vnet')
26 | param serviceEndpoints array = [
27 |   'Microsoft.KeyVault'
28 |   'Microsoft.ContainerRegistry'
29 |   'Microsoft.Storage'
30 | ]
31 | 
32 | @description('Tags to add to the resources')
33 | param tags object = {}
34 | 
35 | var serviceEndpointsDefinition = [for service in serviceEndpoints: { service: service }]
36 | var subnetsDefinition = [for subnet in subnets: {
37 |   name: subnet.name
38 |   properties: {
39 |     addressPrefix: subnet.addressPrefix
40 |     privateEndpointNetworkPolicies: 'Disabled'
41 |     privateLinkServiceNetworkPolicies: 'Disabled'
42 |     serviceEndpoints: serviceEndpointsDefinition
43 |     networkSecurityGroup: {
44 |       id: networkSecurityGroupId
45 |     }
46 |   }
47 | }]
48 | 
49 | resource virtualNetwork 'Microsoft.Network/virtualNetworks@2022-01-01' = {
50 |   name: virtualNetworkName
51 |   location: location
52 |   tags: tags
53 |   properties: {
54 |     addressSpace: {
55 |       addressPrefixes: [
56 |         vnetAddressPrefix
57 |       ]
58 |     }
59 |     subnets: subnetsDefinition
60 |   }
61 | }
62 | 
63 | output id string = virtualNetwork.id
64 | output name string = virtualNetwork.name
65 | 


--------------------------------------------------------------------------------
/mlops/bicep/modules/networking/vnet_peering.bicep:
--------------------------------------------------------------------------------
 1 | // Peers two vnet (from different regions)
 2 | // see https://learn.microsoft.com/en-us/azure/virtual-network/virtual-network-peering-overview
 3 | 
 4 | targetScope = 'resourceGroup'
 5 | 
 6 | @description('Set the local VNet name')
 7 | param existingVirtualNetworkNameSource string
 8 | 
 9 | @description('Set the remote VNet name')
10 | param existingVirtualNetworkNameTarget string
11 | 
12 | @description('Sets the remote VNet Resource group')
13 | param existingVirtualNetworkNameTargetResourceGroupName string = resourceGroup().name
14 | 
15 | param useGatewayFromSourceToTarget bool = false
16 | param allowVirtualNetworkAccess bool = true
17 | 
18 | resource _vnet_peering 'Microsoft.Network/virtualNetworks/virtualNetworkPeerings@2022-01-01' = {
19 |   name: '${existingVirtualNetworkNameSource}/peering-to-${existingVirtualNetworkNameTarget}'
20 |   properties: {
21 |     allowVirtualNetworkAccess: allowVirtualNetworkAccess
22 |     allowForwardedTraffic: false
23 |     allowGatewayTransit: false
24 |     useRemoteGateways: useGatewayFromSourceToTarget
25 |     remoteVirtualNetwork: {
26 |       id: resourceId(existingVirtualNetworkNameTargetResourceGroupName, 'Microsoft.Network/virtualNetworks', existingVirtualNetworkNameTarget)
27 |     }
28 |   }
29 | }
30 | 
31 | resource _vnet_peering_back 'Microsoft.Network/virtualNetworks/virtualNetworkPeerings@2022-01-01' = {
32 |   name: '${existingVirtualNetworkNameTarget}/peering-to-${existingVirtualNetworkNameSource}'
33 |   properties: {
34 |     allowVirtualNetworkAccess: allowVirtualNetworkAccess
35 |     allowForwardedTraffic: false
36 |     allowGatewayTransit: useGatewayFromSourceToTarget
37 |     useRemoteGateways: false
38 |     remoteVirtualNetwork: {
39 |       id: resourceId(resourceGroup().name, 'Microsoft.Network/virtualNetworks', existingVirtualNetworkNameSource)
40 |     }
41 |   }
42 | }
43 | 
44 | output id string = _vnet_peering.id
45 | 


--------------------------------------------------------------------------------
/mlops/bicep/modules/permissions/msi_storage_rw.bicep:
--------------------------------------------------------------------------------
 1 | // Assigns roles to a given User Assigned Identity
 2 | // towards a given storage account
 3 | 
 4 | @description('Full path to storage')
 5 | param storageAccountName string
 6 | 
 7 | @description('PrincipalId of the managed identity')
 8 | param identityPrincipalId string
 9 | 
10 | @description('Role definition IDs for the compute towards the internal storage')
11 | param computeToStorageRoles array = [
12 |   // see https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles
13 |   'ba92f5b4-2d11-453d-a403-e96b0029c9fe' // Storage Blob Data Contributor
14 |   '81a9662b-bebf-436f-a333-f67b29880f12' // Storage Account Key Operator Service Role
15 |   'c12c1c16-33a1-487b-954d-41c89c60f349' // Reader and Data Access
16 | ]
17 | 
18 | resource storage 'Microsoft.Storage/storageAccounts@2022-05-01' existing = {
19 |   name: storageAccountName
20 | }
21 | 
22 | resource roleAssignments 'Microsoft.Authorization/roleAssignments@2022-04-01' = [ for roleId in computeToStorageRoles: {
23 |   scope: storage
24 |   name: guid(resourceGroup().id, storage.id, identityPrincipalId, roleId)
25 |   properties: {
26 |     roleDefinitionId: '/subscriptions/${subscription().subscriptionId}/providers/Microsoft.Authorization/roleDefinitions/${roleId}'
27 |     principalId: identityPrincipalId
28 |     principalType: 'ServicePrincipal'
29 |   }
30 |   dependsOn: [
31 |     storage
32 |   ]
33 | }]
34 | 


--------------------------------------------------------------------------------
/mlops/bicep/modules/resources/private_acr.bicep:
--------------------------------------------------------------------------------
 1 | // Creates an Azure Container Registry with Azure Private Link endpoint
 2 | 
 3 | // resource group must be specified as scope in az cli or module call
 4 | targetScope = 'resourceGroup'
 5 | 
 6 | @description('Azure region of the deployment')
 7 | param location string
 8 | 
 9 | @description('Tags to add to the resources')
10 | param tags object = {}
11 | 
12 | @description('Container registry name')
13 | param containerRegistryName string
14 | 
15 | @description('Resource ID of the subnet')
16 | param subnetId string
17 | 
18 | @description('Name of the private DNS zone')
19 | param privateDNSZoneName string = 'privatelink${environment().suffixes.acrLoginServer}'
20 | 
21 | @description('Optional: static IPs for the 2 PLEs (comma separated)')
22 | param acrPLEStaticIPs string = ''
23 | 
24 | var containerRegistryNameCleaned = replace(containerRegistryName, '-', '')
25 | 
26 | resource containerRegistry 'Microsoft.ContainerRegistry/registries@2021-09-01' = {
27 |   name: containerRegistryNameCleaned
28 |   location: location
29 |   tags: tags
30 |   sku: {
31 |     name: 'Premium'
32 |   }
33 |   properties: {
34 |     adminUserEnabled: true
35 |     dataEndpointEnabled: false
36 |     networkRuleBypassOptions: 'AzureServices'
37 |     networkRuleSet: {
38 |       defaultAction: 'Deny'
39 |     }
40 |     policies: {
41 |       quarantinePolicy: {
42 |         status: 'disabled'
43 |       }
44 |       retentionPolicy: {
45 |         status: 'enabled'
46 |         days: 7
47 |       }
48 |       trustPolicy: {
49 |         status: 'disabled'
50 |         type: 'Notary'
51 |       }
52 |     }
53 |     publicNetworkAccess: 'Disabled'
54 |     zoneRedundancy: 'Disabled'
55 |   }
56 | }
57 | 
58 | module privateEndpoint '../networking/private_endpoint.bicep' = {
59 |   name: '${containerRegistry.name}-endpoint-to-vnet'
60 |   scope: resourceGroup()
61 |   params: {
62 |     tags: tags
63 |     location: location
64 |     resourceServiceId: containerRegistry.id
65 |     pleRootName: 'ple-${containerRegistry.name}'
66 |     subnetId: subnetId
67 |     privateDNSZoneName: privateDNSZoneName
68 |     groupId: 'registry'
69 |     memberNames: [ 'registry', 'registry_data_${location}' ]
70 |     useStaticIPAddress: !empty(acrPLEStaticIPs)
71 |     privateIPAddress: acrPLEStaticIPs
72 |   }
73 | }
74 | 
75 | output containerRegistryId string = containerRegistry.id
76 | 


--------------------------------------------------------------------------------
/mlops/bicep/modules/resources/private_appinsights.bicep:
--------------------------------------------------------------------------------
 1 | // Creates an Application Insights instance as dependency for Azure ML
 2 | 
 3 | // resource group must be specified as scope in az cli or module call
 4 | targetScope = 'resourceGroup'
 5 | 
 6 | @description('Azure region of the deployment')
 7 | param location string = resourceGroup().location
 8 | 
 9 | @description('Tags to add to the resources')
10 | param tags object = {}
11 | 
12 | @description('Application Insights resource name')
13 | param applicationInsightsName string
14 | 
15 | resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = {
16 |   name: applicationInsightsName
17 |   location: location
18 |   tags: tags
19 |   kind: 'web'
20 |   properties: {
21 |     Application_Type: 'web'
22 |     DisableIpMasking: false
23 |     DisableLocalAuth: false
24 |     Flow_Type: 'Bluefield'
25 |     ForceCustomerStorageForProfiler: false
26 |     ImmediatePurgeDataOn30Days: true
27 |     IngestionMode: 'ApplicationInsights'
28 |     publicNetworkAccessForIngestion: 'Enabled'
29 |     publicNetworkAccessForQuery: 'Disabled'
30 |     Request_Source: 'rest'
31 |   }
32 | }
33 | 
34 | output applicationInsightsId string = applicationInsights.id
35 | 


--------------------------------------------------------------------------------
/mlops/bicep/modules/resources/private_keyvault.bicep:
--------------------------------------------------------------------------------
 1 | // Creates a KeyVault with Private Link Endpoint
 2 | 
 3 | // resource group must be specified as scope in az cli or module call
 4 | targetScope = 'resourceGroup'
 5 | 
 6 | @description('The Azure Region to deploy the resources into')
 7 | param location string = resourceGroup().location
 8 | 
 9 | @description('Tags to apply to the Key Vault Instance')
10 | param tags object = {}
11 | 
12 | @description('The name of the Key Vault')
13 | param keyvaultName string
14 | 
15 | @description('The Subnet ID where the Key Vault Private Link is to be created')
16 | param subnetId string
17 | 
18 | @description('Name of the private DNS zone')
19 | param privateDNSZoneName string = 'privatelink${environment().suffixes.keyvaultDns}'
20 | 
21 | resource keyVault 'Microsoft.KeyVault/vaults@2022-07-01' = {
22 |   name: keyvaultName
23 |   location: location
24 |   tags: tags
25 |   properties: {
26 |     tenantId: subscription().tenantId
27 |     createMode: 'default'
28 |     sku: {
29 |       name: 'standard'
30 |       family: 'A'
31 |     }
32 | 
33 |     // usage
34 |     enabledForDeployment: false
35 |     enabledForDiskEncryption: true
36 |     enabledForTemplateDeployment: false
37 |     enableRbacAuthorization: true
38 | 
39 |     // loss protection
40 |     enablePurgeProtection: true
41 |     enableSoftDelete: true
42 |     softDeleteRetentionInDays: 7
43 | 
44 |     // networking
45 |     publicNetworkAccess: 'Disabled'
46 |     networkAcls: {
47 |       bypass: 'AzureServices'
48 |       defaultAction: 'Deny'
49 |     }
50 |   }
51 | }
52 | 
53 | module privateEndpoint '../networking/private_endpoint.bicep' = {
54 |   name: '${keyVault.name}-endpoint-to-vnet'
55 |   scope: resourceGroup()
56 |   params: {
57 |     tags: tags
58 |     location: keyVault.location
59 |     resourceServiceId: keyVault.id
60 |     pleRootName: 'ple-${keyVault.name}'
61 |     subnetId: subnetId
62 |     privateDNSZoneName: privateDNSZoneName
63 |     groupId: 'vault'
64 |   }
65 | }
66 | 
67 | output keyvaultId string = keyVault.id
68 | 


--------------------------------------------------------------------------------
/mlops/bicep/modules/storages/existing_blob_storage_datastore.bicep:
--------------------------------------------------------------------------------
 1 | // Creates a datastore for an existing storage account in the same tenant
 2 | @description('Name of AzureML workspace to attach compute+storage to.')
 3 | param machineLearningName string
 4 | 
 5 | @description('Existing storage account name to attach to the pair.')
 6 | param storageAccountName string
 7 | 
 8 | @description('Azure region of the storage to create')
 9 | param storageRegion string
10 | 
11 | @description('Resource group of the existing storage account to attach to the pair.')
12 | param storageAccountResourceGroup string = resourceGroup().name
13 | 
14 | @description('SubscriptionId of the existing storage account to attach to the pair.')
15 | param storageAccountSubscriptionId string = subscription().subscriptionId
16 | 
17 | @description('Name of the storage container resource to create for the pair')
18 | param containerName string = 'private'
19 | 
20 | @description('Name of the datastore for attaching the storage to the AzureML workspace.')
21 | param datastoreName string = replace('datastore_${storageAccountName}','-','_')
22 | 
23 | @description('Tags to add to the resources')
24 | param tags object = {}
25 | 
26 | var storageId = '/subscriptions/${storageAccountSubscriptionId}/resourceGroups/${storageAccountResourceGroup}/providers/Microsoft.Storage/storageAccounts/${storageAccountName}'
27 | 
28 | // attach as a datastore in AzureML
29 | resource datastore 'Microsoft.MachineLearningServices/workspaces/datastores@2022-06-01-preview' = {
30 |   name: '${machineLearningName}/${datastoreName}'
31 |   properties: {
32 |     credentials: {
33 |       credentialsType: 'None'
34 |     }
35 |     description: 'Private storage in region ${storageRegion}'
36 |     properties: {}
37 |     datastoreType: 'AzureBlob'
38 | 
39 |     accountName: storageAccountName
40 |     containerName: containerName
41 |     resourceGroup: storageAccountResourceGroup
42 |     subscriptionId: storageAccountSubscriptionId
43 |     tags: tags
44 |   }
45 | }
46 | 
47 | // output storage references
48 | output storageId string = storageId
49 | output storageName string = storageAccountName
50 | output containerName string = containerName
51 | output datastoreName string = datastore.name
52 | 


--------------------------------------------------------------------------------
/mlops/bicep/sandbox_fl_confidential.bicep:
--------------------------------------------------------------------------------
 1 | // This BICEP script will fully provision a federated learning sandbox
 2 | // with eyes-off orchestrator and silos storages
 3 | // and only confidential compute clusters in orchestrator and silos
 4 | 
 5 | targetScope = 'resourceGroup'
 6 | 
 7 | // please specify the base name for all resources
 8 | @description('Base name of the demo, used for creating all resources as prefix')
 9 | param demoBaseName string = 'fldemo'
10 | 
11 | @description('Region of the orchestrator (workspace, central storage and compute).')
12 | param orchestratorRegion string = 'eastus'
13 | 
14 | @description('List of each region in which to create an internal silo.')
15 | param siloRegions array = [
16 |   'eastus'
17 |   'eastus'
18 |   'eastus'
19 | ]
20 | 
21 | @description('The VM used for creating compute clusters in orchestrator and silos.')
22 | param computeSKU string = 'Standard_DC4as_v5'
23 | 
24 | @description('Uses public network access for the orchestrator storage, allowing it to be eyes-on.')
25 | param orchestratorEyesOn bool = false
26 | 
27 | @description('Apply vnet peering to allow for vertical FL')
28 | param applyVNetPeering bool = true
29 | 
30 | @description('Provide your Kaggle API user name to run our samples relying on Kaggle datasets.')
31 | param kaggleUsername string = ''
32 | 
33 | @description('Provide your Kaggle API key to run our samples relying on Kaggle datasets.')
34 | @secure()
35 | param kaggleKey string = ''
36 | 
37 | 
38 | // run the generic sandbox bicep script with proper arguments
39 | module sandbox 'vnet_publicip_sandbox_aks_confcomp_setup.bicep' = {
40 |   name: 'sandbox-${demoBaseName}'
41 |   params: {
42 |     demoBaseName: demoBaseName
43 |     orchestratorRegion: orchestratorRegion
44 |     siloRegions: siloRegions
45 | 
46 |     // computes
47 |     computeSKU: computeSKU
48 |   
49 |     // eyes-on/eyes-off settings
50 |     orchestratorStorageNetworkAccess: orchestratorEyesOn ? 'public' : 'private'
51 |     siloStorageNetworkAccess: 'private'
52 | 
53 |     // ready for vertical FL
54 |     applyVNetPeering: applyVNetPeering
55 |   }
56 | }
57 | 
58 | // Add kaggle secrets if given
59 | resource kaggleSecretUsername 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
60 |   name: 'ws-shkv-${demoBaseName}/kaggleusername'
61 |   properties: {
62 |     value: kaggleUsername
63 |   }
64 |   dependsOn: [
65 |     sandbox
66 |   ]
67 | }
68 | 
69 | resource kaggleSecretKey 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
70 |   name: 'ws-shkv-${demoBaseName}/kagglekey'
71 |   properties: {
72 |     value: kaggleKey
73 |   }
74 |   dependsOn: [
75 |     sandbox
76 |   ]
77 | }
78 | 


--------------------------------------------------------------------------------
/mlops/bicep/sandbox_fl_eyesoff_cpu.bicep:
--------------------------------------------------------------------------------
 1 | // This BICEP script will fully provision a federated learning sandbox
 2 | // with eyes-off orchestrator and silos storages
 3 | // and only one compute (cpu by default)
 4 | 
 5 | targetScope = 'resourceGroup'
 6 | 
 7 | // please specify the base name for all resources
 8 | @description('Base name of the demo, used for creating all resources as prefix')
 9 | param demoBaseName string = 'fldemo'
10 | 
11 | @description('Region of the orchestrator (workspace, central storage and compute).')
12 | param orchestratorRegion string = resourceGroup().location
13 | 
14 | @description('List of each region in which to create an internal silo.')
15 | param siloRegions array = [
16 |   'australiaeast'
17 |   'eastus'
18 |   'westeurope'
19 | ]
20 | 
21 | @description('The VM used for creating compute clusters in orchestrator and silos.')
22 | param computeSKU string = 'Standard_DS4_v2'
23 | 
24 | @description('Uses public network access for the orchestrator storage, allowing it to be eyes-on.')
25 | param orchestratorEyesOn bool = false
26 | 
27 | @description('Apply vnet peering to allow for vertical FL')
28 | param applyVNetPeering bool = true
29 | 
30 | @description('Provide your Kaggle API user name to run our samples relying on Kaggle datasets.')
31 | param kaggleUsername string = ''
32 | 
33 | @description('Provide your Kaggle API key to run our samples relying on Kaggle datasets.')
34 | @secure()
35 | param kaggleKey string = ''
36 | 
37 | 
38 | // run the generic sandbox bicep script with proper arguments
39 | module sandbox 'vnet_publicip_sandbox_setup.bicep' = {
40 |   name: 'sandbox-${demoBaseName}'
41 |   params: {
42 |     demoBaseName: demoBaseName
43 |     orchestratorRegion: orchestratorRegion
44 |     siloRegions: siloRegions
45 | 
46 |     // computes
47 |     compute1SKU: computeSKU
48 |     compute2: false
49 |   
50 |     // eyes-on/eyes-off settings
51 |     orchestratorStorageNetworkAccess: orchestratorEyesOn ? 'public' : 'private'
52 |     siloStorageNetworkAccess: 'private'
53 | 
54 |     // ready for vertical FL
55 |     applyVNetPeering: applyVNetPeering
56 |   }
57 | }
58 | 
59 | // Add kaggle secrets if given
60 | resource kaggleSecretUsername 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
61 |   name: 'ws-shkv-${demoBaseName}/kaggleusername'
62 |   properties: {
63 |     value: kaggleUsername
64 |   }
65 |   dependsOn: [
66 |     sandbox
67 |   ]
68 | }
69 | 
70 | resource kaggleSecretKey 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
71 |   name: 'ws-shkv-${demoBaseName}/kagglekey'
72 |   properties: {
73 |     value: kaggleKey
74 |   }
75 |   dependsOn: [
76 |     sandbox
77 |   ]
78 | }
79 | 


--------------------------------------------------------------------------------
/mlops/bicep/sandbox_fl_eyesoff_gpu.bicep:
--------------------------------------------------------------------------------
 1 | // This BICEP script will fully provision a federated learning sandbox
 2 | // with eyes-off orchestrator and silos storages
 3 | // and only one compute (gpu by default)
 4 | 
 5 | targetScope = 'resourceGroup'
 6 | 
 7 | // please specify the base name for all resources
 8 | @description('Base name of the demo, used for creating all resources as prefix')
 9 | param demoBaseName string = 'fldemo'
10 | 
11 | @description('Region of the orchestrator (workspace, central storage and compute).')
12 | param orchestratorRegion string = resourceGroup().location
13 | 
14 | @description('List of each region in which to create an internal silo.')
15 | param siloRegions array = [
16 |   'australiaeast'
17 |   'eastus'
18 |   'westeurope'
19 | ]
20 | 
21 | @description('The VM used for creating compute clusters in orchestrator and silos.')
22 | param computeSKU string = 'Standard_NC6'
23 | 
24 | @description('Uses public network access for the orchestrator storage, allowing it to be eyes-on.')
25 | param orchestratorEyesOn bool = false
26 | 
27 | @description('Apply vnet peering to allow for vertical FL')
28 | param applyVNetPeering bool = true
29 | 
30 | @description('Provide your Kaggle API user name to run our samples relying on Kaggle datasets.')
31 | param kaggleUsername string = ''
32 | 
33 | @description('Provide your Kaggle API key to run our samples relying on Kaggle datasets.')
34 | @secure()
35 | param kaggleKey string = ''
36 | 
37 | 
38 | // run the generic sandbox bicep script with proper arguments
39 | module sandbox 'vnet_publicip_sandbox_setup.bicep' = {
40 |   name: 'sandbox-${demoBaseName}'
41 |   params: {
42 |     demoBaseName: demoBaseName
43 |     orchestratorRegion: orchestratorRegion
44 |     siloRegions: siloRegions
45 | 
46 |     // computes
47 |     compute1SKU: computeSKU
48 |     compute2: false
49 |   
50 |     // eyes-on/eyes-off settings
51 |     orchestratorStorageNetworkAccess: orchestratorEyesOn ? 'public' : 'private'
52 |     siloStorageNetworkAccess: 'private'
53 | 
54 |     // ready for vertical FL
55 |     applyVNetPeering: applyVNetPeering
56 |   }
57 | }
58 | 
59 | // Add kaggle secrets if given
60 | resource kaggleSecretUsername 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
61 |   name: 'ws-shkv-${demoBaseName}/kaggleusername'
62 |   properties: {
63 |     value: kaggleUsername
64 |   }
65 |   dependsOn: [
66 |     sandbox
67 |   ]
68 | }
69 | 
70 | resource kaggleSecretKey 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
71 |   name: 'ws-shkv-${demoBaseName}/kagglekey'
72 |   properties: {
73 |     value: kaggleKey
74 |   }
75 |   dependsOn: [
76 |     sandbox
77 |   ]
78 | }
79 | 


--------------------------------------------------------------------------------
/mlops/bicep/sandbox_fl_eyeson_cpu.bicep:
--------------------------------------------------------------------------------
 1 | // This BICEP script will fully provision a federated learning sandbox
 2 | // with eyes-on access to the orchestrator and silos.
 3 | // and only one compute (cpu by default)
 4 | 
 5 | targetScope = 'resourceGroup'
 6 | 
 7 | // please specify the base name for all resources
 8 | @description('Base name of the demo, used for creating all resources as prefix')
 9 | param demoBaseName string = 'fldemo'
10 | 
11 | @description('Region of the orchestrator (workspace, central storage and compute).')
12 | param orchestratorRegion string = resourceGroup().location
13 | 
14 | @description('List of each region in which to create an internal silo.')
15 | param siloRegions array = [
16 |   'australiaeast'
17 |   'eastus'
18 |   'westeurope'
19 | ]
20 | 
21 | @description('The VM used for creating compute clusters in orchestrator and silos.')
22 | param computeSKU string = 'Standard_DS4_v2'
23 | 
24 | @description('Apply vnet peering to allow for vertical FL')
25 | param applyVNetPeering bool = true
26 | 
27 | @description('Provide your Kaggle API user name to run our samples relying on Kaggle datasets.')
28 | param kaggleUsername string = ''
29 | 
30 | @description('Provide your Kaggle API key to run our samples relying on Kaggle datasets.')
31 | @secure()
32 | param kaggleKey string = ''
33 | 
34 | 
35 | // run the generic sandbox bicep script with proper arguments
36 | module sandbox 'vnet_publicip_sandbox_setup.bicep' = {
37 |   name: 'sandbox-${demoBaseName}'
38 |   params: {
39 |     demoBaseName: demoBaseName
40 |     orchestratorRegion: orchestratorRegion
41 |     siloRegions: siloRegions
42 | 
43 |     // computes
44 |     compute1SKU: computeSKU
45 |     compute2: false
46 |   
47 |     // eyes-on/eyes-off settings
48 |     orchestratorStorageNetworkAccess: 'public'
49 |     siloStorageNetworkAccess: 'public'
50 | 
51 |     // ready for vertical FL
52 |     applyVNetPeering: applyVNetPeering
53 |   }
54 | }
55 | 
56 | // Add kaggle secrets if given
57 | resource kaggleSecretUsername 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
58 |   name: 'ws-shkv-${demoBaseName}/kaggleusername'
59 |   properties: {
60 |     value: kaggleUsername
61 |   }
62 |   dependsOn: [
63 |     sandbox
64 |   ]
65 | }
66 | 
67 | resource kaggleSecretKey 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
68 |   name: 'ws-shkv-${demoBaseName}/kagglekey'
69 |   properties: {
70 |     value: kaggleKey
71 |   }
72 |   dependsOn: [
73 |     sandbox
74 |   ]
75 | }
76 | 


--------------------------------------------------------------------------------
/mlops/bicep/sandbox_fl_eyeson_cpu_gpu.bicep:
--------------------------------------------------------------------------------
 1 | // This BICEP script will fully provision a federated learning sandbox
 2 | // with eyes-on access to the orchestrator and silos.
 3 | // and two computes (cpu and gpu)
 4 | 
 5 | targetScope = 'resourceGroup'
 6 | 
 7 | // please specify the base name for all resources
 8 | @description('Base name of the demo, used for creating all resources as prefix')
 9 | param demoBaseName string = 'fldemo'
10 | 
11 | @description('Region of the orchestrator (workspace, central storage and compute).')
12 | param orchestratorRegion string = resourceGroup().location
13 | 
14 | @description('List of each region in which to create an internal silo.')
15 | param siloRegions array = [
16 |   'australiaeast'
17 |   'eastus'
18 |   'westeurope'
19 | ]
20 | 
21 | @description('The VM used for creating compute clusters in orchestrator and silos.')
22 | param primarySKU string = 'Standard_DS4_v2'
23 | 
24 | @description('The VM used for creating a second compute cluster in orchestrator and silos.')
25 | param secondarySKU string = 'Standard_NC6'
26 | 
27 | @description('Apply vnet peering to allow for vertical FL')
28 | param applyVNetPeering bool = true
29 | 
30 | @description('Provide your Kaggle API user name to run our samples relying on Kaggle datasets.')
31 | param kaggleUsername string = ''
32 | 
33 | @description('Provide your Kaggle API key to run our samples relying on Kaggle datasets.')
34 | @secure()
35 | param kaggleKey string = ''
36 | 
37 | 
38 | // run the generic sandbox bicep script with proper arguments
39 | module sandbox 'vnet_publicip_sandbox_setup.bicep' = {
40 |   name: 'sandbox-${demoBaseName}'
41 |   params: {
42 |     demoBaseName: demoBaseName
43 |     orchestratorRegion: orchestratorRegion
44 |     siloRegions: siloRegions
45 | 
46 |     // computes
47 |     compute1SKU: primarySKU
48 |     compute2: true
49 |     compute2SKU: secondarySKU
50 |   
51 |     // eyes-on/eyes-off settings
52 |     orchestratorStorageNetworkAccess: 'public'
53 |     siloStorageNetworkAccess: 'public'
54 | 
55 |     // ready for vertical FL
56 |     applyVNetPeering: applyVNetPeering
57 |   }
58 | }
59 | 
60 | // Add kaggle secrets if given
61 | resource kaggleSecretUsername 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
62 |   name: 'ws-shkv-${demoBaseName}/kaggleusername'
63 |   properties: {
64 |     value: kaggleUsername
65 |   }
66 |   dependsOn: [
67 |     sandbox
68 |   ]
69 | }
70 | 
71 | resource kaggleSecretKey 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
72 |   name: 'ws-shkv-${demoBaseName}/kagglekey'
73 |   properties: {
74 |     value: kaggleKey
75 |   }
76 |   dependsOn: [
77 |     sandbox
78 |   ]
79 | }
80 | 


--------------------------------------------------------------------------------
/mlops/bicep/sandbox_fl_eyeson_gpu.bicep:
--------------------------------------------------------------------------------
 1 | // This BICEP script will fully provision a federated learning sandbox
 2 | // with eyes-on access to the orchestrator and silos.
 3 | // and only one compute (gpu by default)
 4 | 
 5 | targetScope = 'resourceGroup'
 6 | 
 7 | // please specify the base name for all resources
 8 | @description('Base name of the demo, used for creating all resources as prefix')
 9 | param demoBaseName string = 'fldemo'
10 | 
11 | @description('Region of the orchestrator (workspace, central storage and compute).')
12 | param orchestratorRegion string = resourceGroup().location
13 | 
14 | @description('List of each region in which to create an internal silo.')
15 | param siloRegions array = [
16 |   'australiaeast'
17 |   'eastus'
18 |   'westeurope'
19 | ]
20 | 
21 | @description('The VM used for creating compute clusters in orchestrator and silos.')
22 | param computeSKU string = 'Standard_NC6'
23 | 
24 | @description('Apply vnet peering to allow for vertical FL')
25 | param applyVNetPeering bool = true
26 | 
27 | @description('Provide your Kaggle API user name to run our samples relying on Kaggle datasets.')
28 | param kaggleUsername string = ''
29 | 
30 | @description('Provide your Kaggle API key to run our samples relying on Kaggle datasets.')
31 | @secure()
32 | param kaggleKey string = ''
33 | 
34 | 
35 | // run the generic sandbox bicep script with proper arguments
36 | module sandbox 'vnet_publicip_sandbox_setup.bicep' = {
37 |   name: 'sandbox-${demoBaseName}'
38 |   params: {
39 |     demoBaseName: demoBaseName
40 |     orchestratorRegion: orchestratorRegion
41 |     siloRegions: siloRegions
42 |   
43 |     // computes
44 |     compute1SKU: computeSKU
45 |     compute2: false
46 |   
47 |     // eyes-on/eyes-off settings
48 |     orchestratorStorageNetworkAccess: 'public'
49 |     siloStorageNetworkAccess: 'public'
50 | 
51 |     // ready for vertical FL
52 |     applyVNetPeering: applyVNetPeering
53 |   }
54 | }
55 | 
56 | // Add kaggle secrets if given
57 | resource kaggleSecretUsername 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
58 |   name: 'ws-shkv-${demoBaseName}/kaggleusername'
59 |   properties: {
60 |     value: kaggleUsername
61 |   }
62 |   dependsOn: [
63 |     sandbox
64 |   ]
65 | }
66 | 
67 | resource kaggleSecretKey 'Microsoft.KeyVault/vaults/secrets@2022-07-01' = if (!empty(kaggleUsername)) {
68 |   name: 'ws-shkv-${demoBaseName}/kagglekey'
69 |   properties: {
70 |     value: kaggleKey
71 |   }
72 |   dependsOn: [
73 |     sandbox
74 |   ]
75 | }
76 | 


--------------------------------------------------------------------------------
/mlops/k8s_templates/README.md:
--------------------------------------------------------------------------------
 1 | # Kubernetes Templates
 2 | 
 3 | ## Contents
 4 | This folder contains example yaml templates you can use for creating kubernetes (k8s) clusters meeting different types of requirements.
 5 | 
 6 | > Note: Please keep in mind that for both use cases you need to create an AzureML instance type, process is documented at the end of the [following document](../../docs/provisioning/silo_open_aks_with_cc.md)
 7 | 
 8 | ## Templates for creating on-premises k8s clusters with access to local data
 9 | The use of templates is documented in [this tutorial](../../docs/tutorials/read-local-data-in-k8s-silo.md).
10 | - [k8s_config.yaml](./k8s_config.yaml): for creating a k8s cluster using [kind](https://kind.sigs.k8s.io/). There is an extra mount added to the cluster, which is used to access the local data.
11 | - [pv.yaml](./pv.yaml), [pvc.yaml](./pvc.yaml), [deploy_pvc](./deploy_pvc.yaml): for creating a [Persistent Volume](https://kubernetes.io/docs/concepts/storage/persistent-volumes/), claiming it, and deploying.
12 | 
13 | ## Templates for creating k8s clusters using Confidential Compute
14 | The use of templates is documented in [this tutorial](../../docs/provisioning/silo_open_aks_with_cc.md).
15 | 
16 | - ...


--------------------------------------------------------------------------------
/mlops/k8s_templates/deploy_pvc.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: pvc
 5 |   namespace: default
 6 |   labels:
 7 |     app: demolocaldata
 8 | spec:
 9 |   replicas: 1
10 |   selector:
11 |     matchLabels:
12 |       app: pvc
13 |   template:
14 |     metadata:
15 |       labels:
16 |         app: pvc
17 |     spec:
18 |       containers:
19 |       - name: name
20 |         image: gcr.io/google-containers/echoserver:1.10
21 |         volumeMounts:
22 |         - mountPath: /localdata # Path on the docker file system where the local data folder was mounted.
23 |           name: localdata
24 |       volumes:
25 |         - name: localdata
26 |           persistentVolumeClaim:
27 |             claimName: demolocaldata


--------------------------------------------------------------------------------
/mlops/k8s_templates/instance-type.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: amlarc.azureml.com/v1alpha1
 2 | kind: InstanceType
 3 | metadata:
 4 |   name: computeinstancetype
 5 | spec:
 6 |   resources:
 7 |     limits:
 8 |       cpu: "2"
 9 |       memory: "8Gi"
10 |     requests:
11 |       cpu: "1"
12 |       memory: "4Gi"


--------------------------------------------------------------------------------
/mlops/k8s_templates/k8s_config.yaml:
--------------------------------------------------------------------------------
 1 | kind: Cluster
 2 | apiVersion: kind.x-k8s.io/v1alpha4
 3 | name: my-kind-cluster
 4 | nodes:
 5 | - role: control-plane
 6 |   image: kindest/node:v1.24.6@sha256:97e8d00bc37a7598a0b32d1fabd155a96355c49fa0d4d4790aab0f161bf31be1 # We recommend keeping that 1.24.6 k8s version that we used in our tests.
 7 |   extraMounts:
 8 |   - hostPath: `/path/to/data` # The path to the folder you want to expose. If you're using linux, this is the verbatim path; if you're using Windows and your data are located at C:\path\to\data, the path should be: /run/desktop/mnt/host/c/path/to/data
 9 |     containerPath: /localdata # Path on the docker file system where to mount the local folder.
10 | 


--------------------------------------------------------------------------------
/mlops/k8s_templates/pv.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolume
 3 | metadata:
 4 |   name: demolocaldata
 5 |   labels:
 6 |     app: demolocaldata
 7 | spec:
 8 |   storageClassName: manual
 9 | 
10 |   capacity:
11 |     storage: 1Gi
12 |   accessModes:
13 |     - ReadWriteMany
14 |   claimRef: # the claimRef is here to provide a "connection" between this pv and the pvc.
15 |     apiVersion: v1
16 |     kind: PersistentVolumeClaim
17 |     name: demolocaldata 
18 |     namespace: default
19 |   hostPath:
20 |     path: /localdata # the path in the docker file system that we want to expose in k8s 


--------------------------------------------------------------------------------
/mlops/k8s_templates/pvc.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: PersistentVolumeClaim
 3 | metadata:
 4 |   name: demolocaldata
 5 |   namespace: default
 6 |   # The label and annotation below are required for the Azure ML job to have access to the mounted folders.
 7 |   labels:
 8 |     app: demolocaldata
 9 |     ml.azure.com/pvc: "true"
10 |   annotations:
11 |     ml.azure.com/mountpath: "/mnt/localdata" # The path from which the local data will be accessed during the Azure ML job. You can change that to a different path if you want.
12 | spec:
13 |   storageClassName: manual
14 |   accessModes:
15 |     - ReadWriteMany
16 |   resources:
17 |     requests:
18 |       # The amount of the volume's storage to request.
19 |       storage: 1Gi


--------------------------------------------------------------------------------
/tests/examples/components/shared/utils.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | 
 3 | 
 4 | def get_free_port():
 5 |     s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 6 |     s.bind(("", 0))
 7 |     s.listen(1)
 8 |     port = s.getsockname()[1]
 9 |     s.close()
10 |     return str(port)
11 | 


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url https://download.pytorch.org/whl/cpu
2 | coverage==7.2.2
3 | torch
4 | redis==4.5.4
5 | azure-keyvault==4.2.0
6 | azureml-core==1.47.0
7 | azureml-mlflow==1.48.0
8 | 


--------------------------------------------------------------------------------