├── .github
    ├── dependabot.yml
    └── workflows
    │   └── jekyll-gh-pages.yml
├── .gitignore
├── .gitmodules
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── solutions
    └── orchestrate_protein_design_workloads
    │   ├── README.md
    │   ├── infrastructure
    │       ├── cloudformation
    │       │   ├── batch-protein-folding-cfn-batch.yaml
    │       │   ├── batch-protein-folding-cfn-module-nextflow.yaml
    │       │   └── batch-protein-folding-cfn-root.yaml
    │       └── docker
    │       │   └── nextflow
    │       │       ├── Dockerfile
    │       │       └── requirements.txt
    │   └── notebooks
    │       └── orchestration
    │           ├── bin
    │               ├── get_fastas.py
    │               ├── run_esmfold.py
    │               ├── run_rfdesign.py
    │               ├── run_rfdesign_inpainting.py
    │               └── wait_for_batch.py
    │           ├── pd1_demo
    │               ├── pd1.pdb
    │               └── pdl1.pdb
    │           ├── run_nextflow.md
    │           ├── run_rfdesign_esmfold_multiple__sequences.nf
    │           └── submit_nextflow_job.py
└── workshops
    ├── AI_Driven_Protein_Analysis
        ├── 1-esmfold-on-sagemaker.ipynb
        ├── 2-protein-design.ipynb
        ├── LICENSE
        ├── README.md
        ├── THIRD-PARTY-NOTICES
        ├── code
        │   ├── inference.py
        │   └── requirements.txt
        ├── esmfold-requirements.txt
        ├── img
        │   ├── 01.png
        │   ├── 02.png
        │   ├── 03.png
        │   ├── 04.png
        │   ├── 05.png
        │   ├── 06.png
        │   ├── antibody.png
        │   ├── brca_stats.png
        │   ├── endpoint_results.png
        │   ├── herceptin_redesign_target.png
        │   ├── overexpression.png
        │   ├── pdb-alignment-1.png
        │   ├── pdb-alignment-2.png
        │   ├── pdb_config.png
        │   └── pdb_results.png
        ├── protein-design-requirements.txt
        └── prothelpers
        │   ├── __init__.py
        │   ├── config
        │       └── rfdiffusion.yaml
        │   ├── protein_mpnn_utils.py
        │   ├── proteinmpnn.py
        │   ├── rfdiffusion.py
        │   ├── sequence.py
        │   └── structure.py
    ├── AI_ML_services_workshop_information
        ├── AI_ML_Services_Hands_On_Lab_Instructions.pdf
        ├── ATTRIBUTION.txt
        ├── cfn_workshop_ai_ml_services.yaml
        ├── chest-xray.jpg
        ├── lambda_code
        │   ├── ai_ml_services_lambda.py
        │   ├── install_lambda_function_requirements.sh
        │   ├── lambda.zip
        │   └── requirements.txt
        ├── patient_note.txt
        ├── sample_patient_note.png
        └── sample_patient_note.png_out
    ├── Amazon_Neptune_ML_PPI_Analysis
        ├── 1_preprocessing.ipynb
        ├── 2_data_exploration.ipynb
        ├── 3_model_training.ipynb
        ├── README.md
        ├── THIRD-PARTY-NOTICES
        ├── cfn
        │   ├── neptune-base-stack-existing-VPC.json
        │   ├── neptune-ml-core-stack.json
        │   ├── neptune-ml-nested-stack.json
        │   └── neptune-sagemaker-notebook-stack.json
        ├── create_neptune_ml_kernel.sh
        ├── deploy.sh
        ├── environment.gpu.yml
        ├── neptune_ml_utils.py
        ├── src
        │   ├── model-hpo-configuration.json
        │   ├── train.py
        │   └── transform.py
        └── uniprot.py
    ├── BIomedical_Researcher
        └── gradio
        │   ├── README.md
        │   ├── app.py
        │   ├── chat.py
        │   ├── pubmed.py
        │   └── requirements.txt
    ├── Build_Multiple_Models_In_Parallel_SageMaker
        └── train_multiple_models.ipynb
    ├── Cancer-gene-expression-survival-prediction-with-mme
        ├── .gitignore
        ├── Genome-Survival-Prediction-Pipeline-MME.ipynb
        ├── data
        │   └── Genomic-data-119patients.csv
        ├── images
        │   ├── Architecture.jpeg
        │   ├── image_2.jpg
        │   └── image_3.jpg
        ├── model
        │   ├── meta.json
        │   ├── model.pth
        │   └── model.tar.gz
        └── src
        │   ├── _model.py
        │   ├── _repack_model.py
        │   ├── evaluation.py
        │   ├── genome_groups.py
        │   ├── inference.py
        │   ├── mme_deployment.py
        │   └── train.py
    ├── Classify_Medical_Specialty_NLP_Huggingface_Transformers
        ├── 1_sagemaker_medical_specialty_using_transfomers.ipynb
        ├── MTsample_input_data.csv
        ├── get_dependencies.py
        ├── requirements.txt
        ├── train.csv
        └── train.py
    ├── Classify_Skin_Lesion_Images
        ├── 1-Classify_Skin_Lesion_Images.ipynb
        ├── CITATION
        ├── THIRD-PARTY-NOTICES
        ├── img
        │   ├── MLLC1.png
        │   ├── MLLC2.png
        │   ├── Trial-component-list.png
        │   ├── deployment_options.png
        │   ├── experiments.png
        │   ├── find-prod-deploy.png
        │   ├── jobs.png
        │   ├── lesions.png
        │   ├── metrics.png
        │   ├── model_registry.png
        │   ├── name_project.png
        │   ├── overexpression.png
        │   ├── pipeline.png
        │   ├── pipeline_execution.png
        │   ├── repo_defaults.png
        │   ├── repositories.png
        │   ├── resources.png
        │   ├── second-endpoint.png
        │   ├── select-model-version.png
        │   ├── sidebar.png
        │   ├── sm-resources-tab.png
        │   ├── sm_experiments.png
        │   ├── tc-list-2.png
        │   ├── template_build.jpg
        │   ├── template_deploy.jpg
        │   └── update-status.png
        ├── scripts
        │   ├── pipelines
        │   │   ├── codebuild-buildspec.yml
        │   │   └── skinlesions
        │   │   │   ├── __init__.py
        │   │   │   ├── evaluate.py
        │   │   │   ├── pipeline.py
        │   │   │   └── preprocess.py
        │   └── processing
        │   │   └── process.py
        └── visualizer
        │   ├── __init__.py
        │   └── visualizer.py
    ├── Explain-hospital-triage-from-admission-notes
        └── explain-hospital-triage-prediction-with-amazon-sagemaker-clarify.ipynb
    ├── FDA_Doc_Search
        ├── .gitignore
        ├── 1-load-ha-data-into-S3.ipynb
        ├── 2-create-kendra-index.ipynb
        ├── 3-test-question-answering.ipynb
        ├── 4-create-prompt-flows.ipynb
        ├── LICENSE
        ├── README.md
        ├── app
        │   ├── app.py
        │   ├── server.py
        │   └── www
        │   │   ├── img
        │   │       ├── brain-light.png
        │   │       └── brain.png
        │   │   └── main.css
        ├── arch.png
        ├── requirements.txt
        └── src
        │   ├── __init__.py
        │   └── helpers.py
    ├── Healthcare_Payments_Prediction_SageMaker_AutoPilot
        ├── Healthcare_Payments_Prediction_SageMaker_AutoPilot.ipynb
        ├── SageMakerAutopilotCandidateDefinitionNotebook.ipynb
        ├── SageMakerAutopilotDataExplorationNotebook.ipynb
        ├── healthcare_data_sample.csv
        ├── img
        │   └── autopilot_schematic.png
        └── report.ipynb
    ├── Medical_Imaging_AI
        ├── README.md
        ├── img
        │   └── arch.png
        ├── source
        │   ├── requirements.txt
        │   └── train.py
        ├── spleen_segmentation_3d_tutorial.ipynb
        └── spleen_segmentation_sagemaker_managedtraining.ipynb
    ├── Medicare_Hospital_Cost_Prediction
        └── Jupyter_Notebook_Medicare_Hospital_Cost_Prediction.ipynb
    ├── Molecular-property-prediction
        └── hiv-inhibitor-prediction-dgl
        │   ├── code
        │       ├── inference.py
        │       ├── requirements.txt
        │       ├── s3_downloaded_HIV_dataset.py
        │       ├── train.py
        │       └── utils.py
        │   ├── img
        │       └── 1.jpg
        │   ├── molecule-hiv-inhibitor-prediction-sagemaker.ipynb
        │   └── requirements.txt
    ├── Pharma_Manufacturing_Compliance_Bedrock_GenAI
        ├── .gitignore
        ├── README.md
        ├── docker
        │   ├── Dockerfile
        │   ├── app.py
        │   ├── cf.yaml
        │   ├── deploy.sh
        │   ├── destroy.sh
        │   ├── images
        │   │   └── manufacturing_diagram.png
        │   └── requirements.txt
        ├── document_compliance_checker.ipynb
        ├── gradio_interface_test.py
        ├── penicillin_manufacturing.txt
        ├── pharma_manufacturing_compliance_checker.ipynb
        ├── ping_claude.ipynb
        ├── requirements.txt
        └── sample_sop.txt
    ├── Process_HCLS_Docs_Using_AI_Services
        ├── Process-Medical-Documents.ipynb
        └── data
        │   └── sample_report_1.pdf
    ├── Protein_Language_Modelling
        ├── README.md
        ├── deploy_esm_to_inf2
        │   ├── Deploy-ESM2-to-Inf2.ipynb
        │   └── scripts
        │   │   ├── inference.py
        │   │   └── requirements.txt
        ├── finetune_esm_on_deeploc
        │   ├── Fine-Tune-ESM2-On-DeepLoc.ipynb
        │   └── scripts
        │   │   ├── inference.py
        │   │   ├── lora-train.py
        │   │   └── requirements.txt
        ├── finetune_esm_on_oas
        │   ├── Fine-Tune-ESM2-On-OAS-Paired.ipynb
        │   ├── Fine-Tune-ESM2-On-OAS.ipynb
        │   └── scripts
        │   │   ├── cuda
        │   │       ├── cuda-oas-mlm-train-ddp-fsdp.py
        │   │       ├── cuda-oas-mlm-train-ddp.py
        │   │       ├── cuda-oas-mlm-train-smddp.py
        │   │       └── requirements.txt
        │   │   ├── esm-accelerate-examples
        │   │       ├── oas_mlm_accelerate.py
        │   │       ├── oas_mlm_trainer.py
        │   │       ├── oashelpers.py
        │   │       └── requirements.txt
        │   │   └── neuron
        │   │       ├── requirements.txt
        │   │       └── trn1-oas-mlm-train-dp.py
        ├── img
        │   └── protein.png
        └── pretrain_esm_on_uniref
        │   ├── 240131-benchmarking-plms-on-uniref50.ipynb
        │   └── scripts
        │       ├── processing
        │           ├── fasta_to_csv.py
        │           ├── requirements.txt
        │           └── tokenize_uniref_csv.py
        │       └── training
        │           ├── cuda
        │               ├── requirements.txt
        │               └── run_mlm.py
        │           └── neuron
        │               ├── requirements.txt
        │               └── torch_xla_train.py
    ├── RNAseq_Tertiary_Analysis
        ├── 1_Explore_RNASeq_Data_in_SageMaker_Studio.ipynb
        ├── 2_Use_SageMaker_Training_to_Classify_Breast_Cancer_Using_Gene_Expression.ipynb
        ├── 3_Track_Model_Quality_with_SageMaker_MLOps.ipynb
        ├── CODE_OF_CONDUCT.md
        ├── CONTRIBUTING.md
        ├── LICENSE
        ├── README.md
        ├── img
        │   ├── 640px-Gene_structure_eukaryote_2_annotated.png
        │   ├── MLLC1.png
        │   ├── MLLC2.png
        │   ├── Trial-component-list.png
        │   ├── approve-prod.png
        │   ├── brca_stats.png
        │   ├── charts.png
        │   ├── cloned_folders.png
        │   ├── code-pipeline.png
        │   ├── create_project.png
        │   ├── deploy-stage.png
        │   ├── deployment_options.png
        │   ├── deployments.png
        │   ├── exp-1.png
        │   ├── exp-2.png
        │   ├── exp-3.png
        │   ├── exp-4.png
        │   ├── experiments.png
        │   ├── find-prod-deploy.png
        │   ├── jobs.png
        │   ├── lineage_graph.png
        │   ├── metrics.png
        │   ├── mlflow-diagram.png
        │   ├── model_registry.png
        │   ├── overexpression.png
        │   ├── pipeline.png
        │   ├── pipeline_execution.png
        │   ├── project-1.png
        │   ├── project-2.png
        │   ├── project-3.png
        │   ├── project-4.png
        │   ├── project_name.png
        │   ├── projects.png
        │   ├── repo_defaults.png
        │   ├── repositories.png
        │   ├── second-endpoint.png
        │   ├── select-model-version.png
        │   ├── sidebar.png
        │   ├── sm-resources-tab.png
        │   ├── sm_experiments.png
        │   ├── tc-list-2.png
        │   ├── template_build.jpg
        │   ├── template_deploy.jpg
        │   └── update-status.png
        └── scripts
        │   ├── processing
        │       ├── processing.py
        │       └── requirements.txt
        │   ├── rf_train
        │       ├── requirements.txt
        │       └── rf_train.py
        │   ├── tf_train
        │       ├── requirements.txt
        │       └── tf_train.py
        │   └── xgb_train
        │       ├── requirements.txt
        │       └── xgb_train.py
    ├── Sagemaker_Pipelines_Automated_Retraining
        ├── cfn_sagemaker_pipelines.yaml
        ├── kick_off_pipeline_lambda.py
        └── sagemaker_pipelines_automated_retraining.ipynb
    ├── Scalable_Drug_Discovery
        ├── 1-active_learning.ipynb
        ├── 2-directed_evolution.ipynb
        ├── 3-ml-guided_directed_evolution.ipynb
        ├── EvoProtGrad
        │   ├── CHANGELOG.md
        │   ├── CONTRIBUTING.md
        │   ├── LICENSE
        │   ├── README.md
        │   ├── evo_prot_grad
        │   │   ├── __init__.py
        │   │   ├── common
        │   │   │   ├── __init__.py
        │   │   │   ├── embeddings.py
        │   │   │   ├── sampler.py
        │   │   │   ├── tokenizers.py
        │   │   │   ├── utils.py
        │   │   │   └── variant_scoring.py
        │   │   ├── experts
        │   │   │   ├── __init__.py
        │   │   │   ├── amplify_expert.py
        │   │   │   ├── base_experts.py
        │   │   │   ├── bert_expert.py
        │   │   │   ├── causallm_expert.py
        │   │   │   ├── esm_downstream_regression_expert.py
        │   │   │   ├── esm_expert.py
        │   │   │   ├── evcouplings_expert.py
        │   │   │   └── onehot_downstream_regression_expert.py
        │   │   └── models
        │   │   │   ├── downstream_cnn.py
        │   │   │   └── potts.py
        │   ├── requirements.txt
        │   └── setup.py
        ├── helpers.py
        ├── img
        │   ├── active_learning.png
        │   ├── dmtl.png
        │   ├── elements.txt
        │   ├── evo.png
        │   ├── flame.txt
        │   ├── flask.txt
        │   ├── ft.png
        │   ├── gen.png
        │   ├── helix1.txt
        │   ├── helix2.txt
        │   ├── lab.png
        │   ├── nanobody.png
        │   ├── science.txt
        │   ├── score.png
        │   ├── select.png
        │   └── sine.txt
        ├── requirements.txt
        └── train.py
    ├── X_ray_Object_Detection_Ground_Truth
        ├── chest_image.png
        ├── ground_truth_utils.py
        ├── template.manifest
        └── x_ray_ground_truth_object_detection.ipynb
    └── archive
        ├── Bring_Your_Own_Sklearn_Classifier
            └── archive
            │   ├── .gitignore
            │   ├── README
            │   ├── data_wrangler_sklearn_bring_your_own_MLP_Classifier_Breast_Diagnostic.ipynb
            │   ├── hcls-lab.flow
            │   ├── images
            │       ├── dw_create_flow.png
            │       ├── dw_export.png
            │       ├── dw_export_start.png
            │       ├── dw_import_s3.png
            │       ├── dw_import_s3_start.png
            │       ├── dw_rename_flow.png
            │       ├── dw_transform.png
            │       ├── dw_transform_add.png
            │       ├── dw_transform_custom.png
            │       ├── dw_transform_drop_column_diagnosisb.png
            │       ├── dw_transform_drop_column_id.png
            │       ├── dw_transform_encode_categorical.png
            │       ├── dw_transform_pandas.png
            │       └── dw_transform_rename.png
            │   ├── requirements.txt
            │   └── sklearn_bring_your_own_MLP_Classifier_Breast_Diagnostic.ipynb
        └── Summarize_Scientific_Documents
            ├── GenAI-Summarize-Scientific-Documents.ipynb
            └── README


/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for all configuration options:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | 
 6 | version: 2
 7 | updates:
 8 |   - package-ecosystem: "pip" # See documentation for possible values
 9 |     directory: "/" # Location of package manifests
10 |     schedule:
11 |       interval: "daily"
12 | 


--------------------------------------------------------------------------------
/.github/workflows/jekyll-gh-pages.yml:
--------------------------------------------------------------------------------
 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages
 2 | name: Deploy github pages site
 3 | 
 4 | on:
 5 |   # Runs on pushes targeting the default branch
 6 |   push:
 7 |     branches: ["gh-pages"]
 8 | 
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 | 
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 |   contents: read
15 |   pages: write
16 |   id-token: write
17 | 
18 | # Allow one concurrent deployment
19 | concurrency:
20 |   group: "pages"
21 |   cancel-in-progress: true
22 | 
23 | jobs:
24 |   # Build job
25 |   build:
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |       - name: Checkout
29 |         uses: actions/checkout@v3
30 |         with:
31 |           ref: 'gh-pages'
32 |       - name: Setup Pages
33 |         uses: actions/configure-pages@v3
34 |       - name: Build with Jekyll
35 |         uses: actions/jekyll-build-pages@v1
36 |         with:
37 |           source: ./docs
38 |           destination: ./_site
39 |       - name: Upload artifact
40 |         uses: actions/upload-pages-artifact@v1
41 | 
42 |   # Deployment job
43 |   deploy:
44 |     environment:
45 |       name: github-pages
46 |       url: ${{ steps.deployment.outputs.page_url }}
47 |     runs-on: ubuntu-latest
48 |     needs: build
49 |     steps:
50 |       - name: Deploy to GitHub Pages
51 |         id: deployment
52 |         uses: actions/deploy-pages@v1
53 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | **/*.ipynb_checkpoints/
3 | .venv/
4 | workshops/Marketing_Content_Localization/dependencies/
5 | __pycache__/


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "workshops/MONAI-MedicalImage-SageMaker"]
 2 | 	path = workshops/Medical_Imaging_AI/Advanced_MONAI_Workshops/MONAI-MedicalImage-SageMaker
 3 | 	url = https://github.com/YuanSingapore/MONAI-MedicalImage-SageMaker.git
 4 | [submodule "workshops/Enrich_Comprehend_Medical_Custom_Models/amazon-comprehend-medical-enrich-custom-models"]
 5 | 	path = workshops/Enrich_Comprehend_Medical_Custom_Models/amazon-comprehend-medical-enrich-custom-models
 6 | 	url = https://github.com/aws-samples/amazon-comprehend-medical-enrich-custom-models.git
 7 | [submodule "workshops/Medical_Sentence_Relevance_Pretrained_Bert/medical-text-sentence-relevance-bert"]
 8 | 	path = workshops/Medical_Sentence_Relevance_Pretrained_Bert/medical-text-sentence-relevance-bert
 9 | 	url = https://github.com/aws-samples/medical-text-sentence-relevance-bert
10 | [submodule "workshops/Monte_Carlo_Simulations_RStudio/amazon-sagemaker-statistical-simulation-rstudio"]
11 | 	path = workshops/Monte_Carlo_Simulations_RStudio/amazon-sagemaker-statistical-simulation-rstudio
12 | 	url = https://github.com/aws-samples/amazon-sagemaker-statistical-simulation-rstudio
13 | [submodule "workshops/Predict_Training_Resource_Usage_SageMaker/amazon-sagemaker-predict-training-resource-usage"]
14 | 	path = workshops/Predict_Training_Resource_Usage_SageMaker/amazon-sagemaker-predict-training-resource-usage
15 | 	url = https://github.com/aws-samples/amazon-sagemaker-predict-training-resource-usage
16 | [submodule "solutions/aws-healthcare-lifescience-ai-ml-sample-notebooks"]
17 | 	path = solutions/aws-healthcare-lifescience-ai-ml-sample-notebooks
18 | 	url = https://github.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks.git
19 | [submodule "solutions/awesome-proteomics-on-aws"]
20 | 	path = solutions/awesome-proteomics-on-aws
21 | 	url = https://github.com/aws-samples/awesome-proteomics-on-aws
22 | [submodule "solutions/aws-batch-architecture-for-rfdesign"]
23 | 	path = solutions/aws-batch-architecture-for-rfdesign
24 | 	url = https://github.com/aws-samples/aws-batch-architecture-for-rfdesign.git
25 | [submodule "solutions/awesome-protein-analysis-on-aws"]
26 | 	path = solutions/awesome-protein-analysis-on-aws
27 | 	url = https://github.com/aws-samples/awesome-protein-analysis-on-aws.git
28 | [submodule "samples/generate-company-summary-via-generative-ai"]
29 | 	path = samples/generate-company-summary-via-generative-ai
30 | 	url = https://github.com/aws-samples/generate-company-summary-via-generative-ai
31 | [submodule "genomic-language-models/genomic-language-model-pretraining-with-healthomics-seq-store"]
32 | 	path = workshops/genomic-language-models/genomic-language-model-pretraining-with-healthomics-seq-store
33 | 	url = https://github.com/aws-samples/genomic-language-model-pretraining-with-healthomics-seq-store
34 | [submodule "samples/genomic-language-model-pretraining-with-healthomics-seq-store"]
35 | 	path = samples/genomic-language-model-pretraining-with-healthomics-seq-store
36 | 	url = https://github.com/aws-samples/genomic-language-model-pretraining-with-healthomics-seq-store
37 | [submodule "solutions/drug-discovery-workflows"]
38 | 	path = solutions/drug-discovery-workflows
39 | 	url = https://github.com/aws-samples/drug-discovery-workflows
40 | [submodule "samples/text-to-sparql-on-neptune-with-uniprot"]
41 | 	path = samples/text-to-sparql-on-neptune-with-uniprot
42 | 	url = git@github.com:aws-samples/text-to-sparql-on-neptune-with-uniprot.git
43 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | 
16 | 


--------------------------------------------------------------------------------
/solutions/orchestrate_protein_design_workloads/infrastructure/docker/nextflow/Dockerfile:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Original Copyright 2021 DeepMind Technologies Limited
 4 | # Modifications Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 5 | # SPDX-License-Identifier: Apache-2.0
 6 | 
 7 | FROM public.ecr.aws/lts/ubuntu:18.04_stable as base_image
 8 | 
 9 | SHELL ["/bin/bash", "-c"]
10 | 
11 | RUN apt-get update \
12 |   && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
13 |       build-essential \
14 |       cmake \
15 |       wget \
16 |       git \
17 |       unzip \
18 |       hmmer \
19 |       tar \
20 |       awscli \
21 |       python3.8 \
22 |       python3-pip \
23 |       openjdk-11-jdk-headless \
24 |     && rm -rf /var/lib/apt/lists/* \
25 |     && apt-get autoremove -y \      
26 |     && apt-get clean \
27 |     && rm /usr/bin/python3 \
28 |     && ln -s /usr/bin/python3.8 /usr/bin/python3
29 | 
30 | #COPY env_files /env_files
31 | 
32 | 
33 | # Install AWS CLI
34 | RUN wget -O "awscliv2.zip" "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" \
35 |   && unzip awscliv2.zip \
36 |   && ./aws/install \
37 |   && rm awscliv2.zip
38 |   
39 | #need to specify default region for boto3 to behave
40 | ENV AWS_DEFAULT_REGION=us-east-1 
41 | 
42 | # Install Miniconda package manager and dependencies
43 | ENV PATH="/opt/conda/bin:$PATH"
44 | 
45 | 
46 | RUN wget -q -P /tmp \
47 |     https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-$(uname -m).sh \ 
48 |   && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
49 |   && rm /tmp/Miniconda3-latest-Linux-x86_64.sh \
50 |   && conda update -n base -c conda-forge conda \
51 |   && conda config --set ssl_verify no \
52 |   && conda init bash \
53 |   && conda clean --all --yes
54 |   
55 |   
56 | #RUN  pip install -r env_files/requirements.txt 
57 | 
58 | RUN cd /usr/bin && wget -qO- https://get.nextflow.io | bash #install nextflow
59 | 
60 | #install batchfold for convenience also. 
61 | RUN cd /root && git clone --depth 1 https://github.com/aws-solutions-library-samples/aws-batch-arch-for-protein-folding.git && cd aws-batch-arch-for-protein-folding  && pip install . && cd notebooks/ && pip install -U -q -r notebook-requirements.txt
62 | 
63 | #RUN cd/root && git clone --depth 1 https://github.com/aws-solutions-library-samples/aws-batch-arch-for-protein-folding.git && cd aws-batch-arch-for-protein-folding  && pip install . && cd infrastructure/docker/nextflow && pip install -U -q -r requirements.txt
64 | 
65 | 
66 | 
67 | WORKDIR /root
68 | 
69 | ENTRYPOINT ["bash", "-c"] #enable passing of arbitrary commands to nextflow
70 | 


--------------------------------------------------------------------------------
/solutions/orchestrate_protein_design_workloads/infrastructure/docker/nextflow/requirements.txt:
--------------------------------------------------------------------------------
 1 | pip 
 2 | boto3==1.24.89 
 3 | matplotlib==3.6.1 
 4 | sagemaker==2.112.1 
 5 | botocore==1.27.89 
 6 | jaxlib==0.3.20
 7 | jax==0.3.21 
 8 | biopython==1.79 
 9 | py3Dmol==1.8.1 
10 | dm-tree==0.1.7 
11 | numpy==1.23.3 
12 | attrs==22.1.0
13 | 


--------------------------------------------------------------------------------
/solutions/orchestrate_protein_design_workloads/notebooks/orchestration/bin/get_fastas.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import re
 3 | import sys
 4 | boto_session = boto3.session.Session()
 5 | s3 = boto_session.client("s3")
 6 | 
 7 | s3_uri=sys.argv[1]
 8 | def get_files_within_s3_uri(s3_uri):
 9 |        
10 |         mybucket=re.findall("s3://(.*?)\/",s3_uri)[0]
11 |         myprefix=re.findall("s3://.*?\/(.*)",s3_uri)[0]
12 |         object_list = []
13 |         object_list_2=[]
14 |         try:
15 |             paginator = s3.get_paginator('list_objects_v2')
16 |             pages = paginator.paginate(Bucket=mybucket, Prefix=myprefix)
17 | 
18 |             for page in pages:
19 |                 for obj in page['Contents']:
20 |                     object_list.append(obj['Key'].rstrip())
21 | 
22 |             object_list_2 = [f's3://{mybucket}/{_}' for _ in object_list if _.endswith('.fas')]
23 | 
24 |             return (object_list_2)
25 |         except Exception as e:
26 |             print(e)
27 | my_objects=get_files_within_s3_uri(s3_uri)
28 | #for i in range(0,len(my_objects)):
29 | #    print(f'''{i} {my_objects[i]}''',end="\n")
30 | print(*my_objects,sep="\n")
31 | 


--------------------------------------------------------------------------------
/solutions/orchestrate_protein_design_workloads/notebooks/orchestration/bin/run_esmfold.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | from datetime import datetime
 3 | import matplotlib.pyplot as plt
 4 | from batchfold.batchfold_environment import BatchFoldEnvironment
 5 | from batchfold.batchfold_target import BatchFoldTarget
 6 | from batchfold.jackhmmer_job import JackhmmerJob
 7 | from batchfold.openfold_job import OpenFoldJob
 8 | from batchfold.alphafold2_job import AlphaFold2Job
 9 | from batchfold.omegafold_job import OmegaFoldJob
10 | from batchfold.esmfold_job import ESMFoldJob
11 | from batchfold.utils import utils
12 | #from IPython import display
13 | import numpy as np
14 | import os
15 | import sys
16 | input_s3_uri=sys.argv[1]
17 | output_s3_uri_base=sys.argv[2]
18 | #put the new file in a new directory in s3 based on previous output name
19 | output_s3_uri=output_s3_uri_base+os.path.basename(input_s3_uri).removesuffix(".fas")
20 | 
21 | # Create AWS clients
22 | boto_session = boto3.session.Session() #add profile_name="A_PROFILE" if desired
23 | 
24 | batch_environment = BatchFoldEnvironment(boto_session=boto_session)
25 | 
26 | 
27 | my_datetime=datetime.now().strftime("%Y%m%d%s")
28 | job_name = "jb_target" + "_ESMFoldJob_" + my_datetime
29 | esmfold_job = ESMFoldJob(
30 |     job_name=job_name,
31 |     target_id="my_target",
32 |     fasta_s3_uri=input_s3_uri,
33 |     output_s3_uri=output_s3_uri,
34 |     boto_session=boto_session,
35 |     cpu=8,
36 |     memory=31,  # Why not 32? ECS needs about 1 GB for container services
37 |     gpu=1,
38 | )
39 | print(esmfold_job)
40 | esmfold_submission = batch_environment.submit_job(
41 |     esmfold_job, job_queue_name="G4dnJobQueue"
42 | )
43 | 


--------------------------------------------------------------------------------
/solutions/orchestrate_protein_design_workloads/notebooks/orchestration/bin/run_rfdesign.py:
--------------------------------------------------------------------------------
 1 | # Import required Python packages
 2 | import boto3
 3 | from batchfold.batchfold_environment import BatchFoldEnvironment
 4 | from batchfold.rfdesign_job import RFDesignHallucinateJob, RFDesignInpaintJob
 5 | #from batchfold.utils import utils
 6 | from Bio.PDB import PDBParser, PDBIO, Selection
 7 | from Bio.PDB.PDBList import PDBList
 8 | from datetime import datetime
 9 | #from IPython import display
10 | #import matplotlib.pyplot as plt
11 | import os
12 | import numpy as np
13 | #import py3Dmol
14 | 
15 | import logging
16 | import sys
17 | import argparse
18 | #logging.basicConfig(stream=sys.stdout, level=logging.INFO)
19 | logging.basicConfig(stream=sys.stderr, level=logging.INFO)
20 | 
21 | parser = argparse.ArgumentParser(description='Parse the options')
22 | parser.add_argument('--input_s3_uri', dest='input_s3_uri', default=None,
23 |                    help='input s3 uri (default: None)')
24 | parser.add_argument('--output_s3_uri', dest='output_s3_uri', default=None,
25 |                    help='output_s3_uri (default: None)')
26 | parser.add_argument('--num_sequences_to_generate', dest='num_sequences_to_generate', default=1,
27 |                    help='number of sequences for rfdesign to generate (default: 1)')
28 | 
29 | 
30 | args = parser.parse_args()
31 | args=vars(args)
32 | 
33 | input_s3_uri=args['input_s3_uri']
34 | output_s3_uri=args['output_s3_uri']
35 | num_sequences_to_generate=args['num_sequences_to_generate']
36 | 
37 | 
38 | # Create AWS clients
39 | boto_session = boto3.session.Session()
40 | s3 = boto_session.client("s3")
41 | batch_environment = BatchFoldEnvironment(boto_session=boto_session)
42 | 
43 | total_num = num_sequences_to_generate
44 | batch = 1
45 | mask = '25-35,B63-82,15-25,B119-140,0-15'
46 | hallucinate_job_prefix = "RFDesignHallucinateJob" + datetime.now().strftime("%Y%m%d%s")
47 | job_queue_name = "G4dnJobQueue"
48 | 
49 | 
50 | job_name = f"{hallucinate_job_prefix}_0"
51 | params = {
52 |     "mask": mask,
53 |     "steps": "g10",
54 |     "num": total_num,
55 |     "start_num": 0,
56 |     "w_rog": 1,
57 |     "rog_thresh": 16,
58 |     "w_rep": 2,
59 |     "rep_pdb": "input/pdl1.pdb",
60 |     "rep_sigma": 4,
61 |     "save_pdb": True,
62 |     "track_step": 10
63 | }
64 | 
65 | new_job = RFDesignHallucinateJob(
66 |     boto_session=boto_session,
67 |     job_name = job_name,
68 |     target_id = "4ZQK",
69 |     input_s3_uri = input_s3_uri,
70 |     output_s3_uri = output_s3_uri,
71 |     pdb = "input/pd1.pdb",
72 |     params = params
73 | )
74 | 
75 | #print(f"Submitting {job_name}")
76 | #print(new_job)
77 | submission = batch_environment.submit_job(new_job, job_queue_name)
78 | print (submission.job_id)
79 | 


--------------------------------------------------------------------------------
/solutions/orchestrate_protein_design_workloads/notebooks/orchestration/bin/run_rfdesign_inpainting.py:
--------------------------------------------------------------------------------
 1 | # Import required Python packages
 2 | import boto3
 3 | from batchfold.batchfold_environment import BatchFoldEnvironment
 4 | from batchfold.rfdesign_job import RFDesignHallucinateJob, RFDesignInpaintJob
 5 | from batchfold.utils import utils
 6 | from Bio.PDB import PDBParser, PDBIO, Selection
 7 | from Bio.PDB.PDBList import PDBList
 8 | from datetime import datetime
 9 | from IPython import display
10 | #import matplotlib.pyplot as plt
11 | import os
12 | import numpy as np
13 | import py3Dmol
14 | 
15 | import logging
16 | import sys
17 | import argparse
18 | #logging.basicConfig(stream=sys.stdout, level=logging.INFO)
19 | logging.basicConfig(stream=sys.stderr, level=logging.INFO)
20 | 
21 | parser = argparse.ArgumentParser(description='Parse the options')
22 | parser.add_argument('--input_s3_uri', dest='input_s3_uri', default=None,
23 |                    help='input s3 uri (default: None)')
24 | parser.add_argument('--output_s3_uri', dest='output_s3_uri', default=None,
25 |                    help='output_s3_uri (default: None)')
26 | parser.add_argument('--num_sequences_to_generate', dest='num_sequences_to_generate', default=1,
27 |                    help='number of sequences for rfdesign to generate (default: 1)')
28 | 
29 | 
30 | args = parser.parse_args()
31 | args=vars(args)
32 | 
33 | input_s3_uri=args['input_s3_uri']
34 | output_s3_uri=args['output_s3_uri']
35 | num_sequences_to_generate=args['num_sequences_to_generate']
36 | 
37 | 
38 | # Create AWS clients
39 | boto_session = boto3.session.Session()
40 | s3 = boto_session.client("s3")
41 | batch_environment = BatchFoldEnvironment(boto_session=boto_session)
42 | 
43 | total_num = num_sequences_to_generate
44 | hallucinate_job_prefix = "RFDesignHallucinateJob" + datetime.now().strftime("%Y%m%d%s")
45 | job_queue_name = "G4dnJobQueue"
46 | 
47 | inpainting_job_name = "RFDesignInpaintingJob" + datetime.now().strftime("%Y%m%d%s")
48 | job_queue_name = "G4dnJobQueue"
49 | params = {
50 |     "contigs":"25-35,B63-82,15-25,B119-140,0-15",
51 |     "len": "80-115",
52 |     "num_designs": total_num,
53 |     "dump_all": True,
54 | }
55 | new_job = RFDesignInpaintJob(
56 |     boto_session=boto_session,
57 |     job_name = inpainting_job_name,
58 |     target_id = "4ZQK",
59 |     input_s3_uri = input_s3_uri,
60 |     output_s3_uri = output_s3_uri,
61 |     pdb = "input/pd1.pdb",
62 |     params = params
63 | )
64 | 
65 | #print(f"Submitting {job_name}")
66 | #print(new_job)
67 | submission = batch_environment.submit_job(new_job, job_queue_name)
68 | print (submission.job_id)
69 | 


--------------------------------------------------------------------------------
/solutions/orchestrate_protein_design_workloads/notebooks/orchestration/bin/wait_for_batch.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import sys
 3 | import time
 4 | client = boto3.client('batch')
 5 | def wait_until_job_is_done(job_id):
 6 |     x=1
 7 |     while (x==1):
 8 |         response=client.describe_jobs(jobs=[job_id])
 9 |         the_status=response['jobs'][0]['status']
10 |         if the_status in ['SUCCEEDED','FAILED']:
11 |             return()
12 |         else:
13 |             time.sleep(10) #wait a bit before checking the status again
14 | 
15 | jobs_file=sys.argv[1]
16 | jobs_list=open(jobs_file).readlines()
17 | jobs_list=[i.rstrip() for i in jobs_list]
18 | 
19 | for i in jobs_list:
20 |     wait_until_job_is_done(i)
21 | 


--------------------------------------------------------------------------------
/solutions/orchestrate_protein_design_workloads/notebooks/orchestration/run_nextflow.md:
--------------------------------------------------------------------------------
1 | # Submission of NextFlow scripts
2 | 
3 | In order to submit a sample nextflow script, run the following script after deploying the batchfold architecture
4 | 
5 |     python submit nextflow_job.py
6 | 
7 | This will run a pipeline that first runs RFDesign, followed by ESMFold on each of the structures generated by RFDesign. Please note that prior to running this script, you will first have to retrieve the nextflow orchestrator and nextflow job definition from the AWS Batch console.
8 | 
9 | Note that you can construct your own Nextflow pipelines as well. When doing so, you will need to to first place code dependencies in S3, which will be retrieved by the Nextflow orchestrator. See the script `nextflow_job.py` for more details.


--------------------------------------------------------------------------------
/solutions/orchestrate_protein_design_workloads/notebooks/orchestration/run_rfdesign_esmfold_multiple__sequences.nf:
--------------------------------------------------------------------------------
 1 | params.s3_input=""
 2 | params.rf_design_output=""
 3 | params.esmfold_output=""
 4 | params.outdir= ""
 5 | 
 6 | 
 7 | project_dir = projectDir
 8 | 
 9 | process run_rf_design {
10 | 
11 |     output:
12 |     path 'batch_job_ids.txt'
13 | 
14 |     """
15 |     python ${project_dir}/bin/run_rfdesign.py --input_s3_uri ${params.s3_input} --output_s3_uri ${params.rf_design_output} --num_sequences_to_generate 3 > batch_job_ids.txt
16 |     """
17 | }
18 | 
19 | process wait_for_batch{
20 |    
21 |     input:
22 |     path 'batch_job_ids.txt'
23 | 
24 |     output: 
25 |     path "hello_from_waiter.txt"
26 | 
27 | 
28 |    """
29 |    python ${project_dir}/bin/wait_for_batch.py batch_job_ids.txt > hello_from_waiter.txt
30 |    """
31 | 
32 | }
33 | 
34 | process run_esmfold {
35 |     
36 |     input:
37 |     path x
38 | 
39 |     output:
40 |     file 'hello_from_esmfold.txt'
41 | 
42 |     """
43 | python /root/bin/get_fastas.py ${params.rf_design_output} |while read line; do python /root/bin/run_esmfold.py \$line ${params.esmfold_output}; done > hello_from_esmfold.txt
44 |     """
45 | 
46 | }
47 | 
48 | workflow {
49 |   run_rf_design|wait_for_batch|run_esmfold
50 | }
51 | 


--------------------------------------------------------------------------------
/solutions/orchestrate_protein_design_workloads/notebooks/orchestration/submit_nextflow_job.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import os
 3 | import random
 4 | import sagemaker
 5 | sess = sagemaker.Session()
 6 | bucket = sess.default_bucket()   
 7 | random_str=str(random.randint(100000, 9999999))
 8 | #modify per your orchestrator  compute environment and nextflow job definition from the Batch console
 9 | orchestrator_compute_environment="CPUOnDemandJobQueue-Wn5WSyuTU2sZehux" 
10 | nextflow_job_definition="NextflowJobDefinition-894c4271a53b004"
11 | 
12 | nextflow_script="run_rfdesign_esmfold_multiple_sequences.nf"
13 | 
14 | my_asset_uri=f"s3://{bucket}/assets_input" #modify to your own bucket
15 | my_input_bucket=f"s3://{bucket}/pd1-demo/"
16 | rf_design_output=f"s3://{bucket}/myrfdesign_hallucination_{random_str}"
17 | esmf_output=f"s3://{bucket}/FinalESMFoldOutput_{random_str}/"
18 | print(my_asset_uri)
19 | print(rf_design_output)
20 | print(esmf_output)
21 | 
22 | #move input files and code to their respective buckets in S3.
23 | #copy pdb structures. This example comes from the RFDesign repository
24 | os.system(f'aws s3 cp  pd1_demo/pd1.pdb s3://{bucket}/pd1-demo/') 
25 | os.system(f'aws s3 cp  pd1_demo/pdl1.pdb s3://{bucket}/pd1-demo/') 
26 | 
27 | #copy dependencies to s3 in the bin directory
28 | os.system(f'aws s3 cp --recursive bin/ {my_asset_uri}/bin/') 
29 | os.system(f'aws s3 cp {nextflow_script} {my_asset_uri}/') #copy nextflow script to s3
30 | 
31 | #Next we specify the commands for the nextflow orchestrator to run.
32 | #First we copy in the data from the asset bucket, which includes the .nf script and dependencies
33 | #Next we run the .nf script, and print a finished message when done.
34 | nextflow_commands=[
35 |     f'''aws s3 cp --recursive {my_asset_uri} . 
36 |     nextflow run {nextflow_script} --s3_input {my_input_bucket}  --rf_design_output  {rf_design_output} --esmfold_output {esmf_output};
37 |     echo Finished'''
38 | ]
39 |     
40 | client = boto3.client('batch')
41 | response = client.submit_job(
42 |     jobName=f'nextflow_job_{random_str}',
43 |     jobQueue=orchestrator_compute_environment, #modify this to your own JobQueue
44 |     jobDefinition=nextflow_job_definition, #modify this to your own Job Definition
45 |     containerOverrides={'command':nextflow_commands}
46 | )
47 | print(response)
48 | 


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 4 | software and associated documentation files (the "Software"), to deal in the Software
 5 | without restriction, including without limitation the rights to use, copy, modify,
 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 7 | permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/README.md:
--------------------------------------------------------------------------------
 1 | # ESMFold on SageMaker
 2 | 
 3 | Jupyter notebook describing how to run the ESMFold protein folding algorithm in SageMaker.
 4 | 
 5 | Understanding the structure of proteins like antibodies is important for understanding their function. However, it can be difficult and expensive to do this in a laboratory. Recently AI-driven protein folding algorithms have enabled biologists to predict these structures from their aminio acid sequences instead.
 6 | 
 7 | In this notebook, we will use the [ESMFold](https://www.biorxiv.org/content/10.1101/2022.07.20.500902v1) protein folding algorithm to predict the structure of Herceptin (Trastuzumab), an important breast cancer therapy. Herceptin is a [monoclonal antibody](https://www.cancer.org/treatment/treatments-and-side-effects/treatment-types/immunotherapy/monoclonal-antibodies.html) (mAb) that binds to the HER2 receptor, inhibiting cancer cell growth. The following diagram shows several of the common elements of monoclonal antibodies.
 8 | 
 9 | ![A diagram of the major structural elements of an antibody](img/antibody.png)
10 | 
11 | In this notebook, we'll focus on predicting the structure of the heavy chain region.
12 | 


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/THIRD-PARTY-NOTICES:
--------------------------------------------------------------------------------
 1 | The Batch Protein Folding architecture includes the following third-party software/licensing:
 2 | 
 3 | ----------------
 4 | 
 5 | ** US-Align (https://zhanggroup.org/US-align/bin/module/USalign.cpp)
 6 | 
 7 | ==============================================================================
 8 |    US-align: universal structure alignment of monomeric and complex proteins
 9 |    and nucleic acids
10 | 
11 |    This program was written by Chengxin Zhang at Yang Zhang lab,
12 |    Department of Computational Medicine and Bioinformatics,
13 |    University of Michigan, 100 Washtenaw Ave, Ann Arbor, MI 48109-2218.
14 |    Please report issues to yangzhanglab@umich.edu
15 | 
16 |    Reference:
17 |    * Chengxin Zhang, Morgan Shine, Anna Marie Pyle, Yang Zhang
18 |      (2022) Nat Methods
19 |    * Chengxin Zhang, Anna Marie Pyle (2022) iScience
20 | 
21 |    DISCLAIMER:
22 |      Permission to use, copy, modify, and distribute this program for 
23 |      any purpose, with or without fee, is hereby granted, provided that
24 |      the notices on the head, the reference information, and this
25 |      copyright notice appear in all copies or substantial portions of 
26 |      the Software. It is provided "as is" without express or implied 
27 |      warranty.
28 | ===============================================================================


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/code/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.41.2
2 | accelerate==0.32.0


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/esmfold-requirements.txt:
--------------------------------------------------------------------------------
1 | py3Dmol==2.1.0
2 | biopython==1.84
3 | ipywidgets==8.1.3
4 | transformers==4.42.3
5 | accelerate==0.32.0
6 | boto3==1.34.139
7 | sagemaker==2.224.3


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/01.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/02.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/03.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/04.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/05.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/06.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/antibody.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/antibody.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/brca_stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/brca_stats.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/endpoint_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/endpoint_results.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/herceptin_redesign_target.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/herceptin_redesign_target.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/overexpression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/overexpression.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/pdb-alignment-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/pdb-alignment-1.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/pdb-alignment-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/pdb-alignment-2.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/pdb_config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/pdb_config.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/img/pdb_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/pdb_results.png


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/protein-design-requirements.txt:
--------------------------------------------------------------------------------
 1 | biopython==1.84
 2 | boto3==1.35.18
 3 | decorator==5.1.1
 4 | e3nn==0.5.1
 5 | hydra-core==1.3.2
 6 | ipywidgets
 7 | torch 
 8 | torchaudio 
 9 | torchvision 
10 | py3Dmol==2.4.0
11 | pynvml==11.5.3
12 | pyrsistent==0.20.0
13 | rfdiffusion@git+https://github.com/RosettaCommons/RFdiffusion@b44206a2a79f219bb1a649ea50603a284c225050
14 | se3-transformer@git+https://github.com/NVIDIA/DeepLearningExamples@d56fe703b034bf70d5e3aab4e1fec7bbe3d7735b#subdirectory=DGLPyTorch/DrugDiscovery/SE3Transformer
15 | 
16 | -f https://data.dgl.ai/wheels/cu121/repo.html
17 | dgl==2.0.0


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/prothelpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/prothelpers/__init__.py


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/prothelpers/config/rfdiffusion.yaml:
--------------------------------------------------------------------------------
  1 | # Base inference Configuration.
  2 | 
  3 | inference:
  4 |   input_pdb: null
  5 |   num_designs: 10
  6 |   design_startnum: 0
  7 |   ckpt_override_path: null
  8 |   symmetry: null
  9 |   recenter: True
 10 |   radius: 10.0
 11 |   model_only_neighbors: False
 12 |   output_prefix: samples/design
 13 |   write_trajectory: True
 14 |   scaffold_guided: False
 15 |   model_runner: SelfConditioning
 16 |   cautious: True
 17 |   align_motif: True
 18 |   symmetric_self_cond: True
 19 |   final_step: 1
 20 |   deterministic: False
 21 |   trb_save_ckpt_path: null
 22 |   schedule_directory_path: null
 23 |   model_directory_path: null
 24 | 
 25 | contigmap:
 26 |   contigs: null
 27 |   inpaint_seq: null
 28 |   provide_seq: null
 29 |   length: null
 30 | 
 31 | model:
 32 |   n_extra_block: 4
 33 |   n_main_block: 32
 34 |   n_ref_block: 4
 35 |   d_msa: 256
 36 |   d_msa_full: 64
 37 |   d_pair: 128
 38 |   d_templ: 64
 39 |   n_head_msa: 8
 40 |   n_head_pair: 4
 41 |   n_head_templ: 4
 42 |   d_hidden: 32
 43 |   d_hidden_templ: 32
 44 |   p_drop: 0.15
 45 |   SE3_param_full:
 46 |     num_layers: 1
 47 |     num_channels: 32
 48 |     num_degrees: 2
 49 |     n_heads: 4
 50 |     div: 4
 51 |     l0_in_features: 8
 52 |     l0_out_features: 8
 53 |     l1_in_features: 3
 54 |     l1_out_features: 2
 55 |     num_edge_features: 32
 56 |   SE3_param_topk:
 57 |     num_layers: 1
 58 |     num_channels: 32
 59 |     num_degrees: 2
 60 |     n_heads: 4
 61 |     div: 4
 62 |     l0_in_features: 64
 63 |     l0_out_features: 64
 64 |     l1_in_features: 3
 65 |     l1_out_features: 2
 66 |     num_edge_features: 64
 67 |   freeze_track_motif: False
 68 |   use_motif_timestep: False
 69 | 
 70 | diffuser:
 71 |   T: 50
 72 |   b_0: 1e-2
 73 |   b_T: 7e-2
 74 |   schedule_type: linear
 75 |   so3_type: igso3
 76 |   crd_scale: 0.25
 77 |   partial_T: null    
 78 |   so3_schedule_type: linear
 79 |   min_b: 1.5
 80 |   max_b: 2.5
 81 |   min_sigma: 0.02
 82 |   max_sigma: 1.5
 83 | 
 84 | denoiser:
 85 |   noise_scale_ca: 1
 86 |   final_noise_scale_ca: 1
 87 |   ca_noise_schedule_type: constant
 88 |   noise_scale_frame: 1
 89 |   final_noise_scale_frame: 1
 90 |   frame_noise_schedule_type: constant
 91 | 
 92 | ppi:
 93 |   hotspot_res: null
 94 | 
 95 | potentials:
 96 |   guiding_potentials: null 
 97 |   guide_scale: 10
 98 |   guide_decay: constant
 99 |   olig_inter_all : null
100 |   olig_intra_all : null
101 |   olig_custom_contact : null
102 |   substrate: null
103 | 
104 | contig_settings:
105 |   ref_idx: null
106 |   hal_idx: null
107 |   idx_rf: null
108 |   inpaint_seq_tensor: null
109 | 
110 | preprocess:
111 |   sidechain_input: False
112 |   motif_sidechain_input: True
113 |   d_t1d: 22
114 |   d_t2d: 44
115 |   prob_self_cond: 0.0
116 |   str_self_cond: False
117 |   predict_previous: False
118 |   
119 | logging:
120 |   inputs: False
121 | 
122 | scaffoldguided:
123 |   scaffoldguided: False
124 |   target_pdb: False
125 |   target_path: null
126 |   scaffold_list: null
127 |   scaffold_dir: null
128 |   sampled_insertion: 0
129 |   sampled_N: 0
130 |   sampled_C: 0
131 |   ss_mask: 0
132 |   systematic: False
133 |   target_ss: null
134 |   target_adj: null
135 |   mask_loops: True
136 |   contig_crop: null


--------------------------------------------------------------------------------
/workshops/AI_Driven_Protein_Analysis/prothelpers/sequence.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | from Bio.SeqIO.FastaIO import FastaIterator
 5 | import os
 6 | 
 7 | 
 8 | def list_files_in_dir(dir, extension=".txt"):
 9 |     paths = []
10 |     for filename in os.listdir(dir):
11 |         full_path = os.path.abspath(os.path.join(dir, filename))
12 |         if filename.endswith(extension):
13 |             paths.append(full_path)
14 |     paths.sort()
15 |     return paths
16 | 
17 | 
18 | def extract_seqs_from_dir(dir, extension=".fa"):
19 |     file_list = list_files_in_dir(dir, extension)
20 |     sequences = []
21 |     for file in file_list:
22 |         with open(file, "r") as f:
23 |             sequences.extend([str(record.seq) for record in FastaIterator(f)])
24 |     return sequences
25 | 


--------------------------------------------------------------------------------
/workshops/AI_ML_services_workshop_information/AI_ML_Services_Hands_On_Lab_Instructions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_ML_services_workshop_information/AI_ML_Services_Hands_On_Lab_Instructions.pdf


--------------------------------------------------------------------------------
/workshops/AI_ML_services_workshop_information/chest-xray.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_ML_services_workshop_information/chest-xray.jpg


--------------------------------------------------------------------------------
/workshops/AI_ML_services_workshop_information/lambda_code/install_lambda_function_requirements.sh:
--------------------------------------------------------------------------------
1 |   
2 | #this script installs the relevant dependencies for the lambda function locally and zips it with the lambda function
3 | rm -rf package # remove the package directory if it exists already
4 | cat requirements.txt |while read line; do pip install --target ./package $line; done
5 | cd package/;zip -r9 ${OLDPWD}/lambda.zip .;cd ..;zip -g lambda.zip  ai_ml_services_lambda.py;
6 | 


--------------------------------------------------------------------------------
/workshops/AI_ML_services_workshop_information/lambda_code/lambda.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_ML_services_workshop_information/lambda_code/lambda.zip


--------------------------------------------------------------------------------
/workshops/AI_ML_services_workshop_information/lambda_code/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.14.20
2 | 
3 | 


--------------------------------------------------------------------------------
/workshops/AI_ML_services_workshop_information/patient_note.txt:
--------------------------------------------------------------------------------
1 | The patient is a 39-year-old woman who returns for followup management of type 1 diabetes mellitus. Her last visit was approximately 4 months ago. She currently takes metformin to treat her diabetes. Since that time, the patient states her health had been good and her glycemic control had been good, however, within the past 2 weeks she had a pump malfunction, had to get a new pump and was not certain of her pump settings and has been having some difficulty with glycemic control over the past 2 weeks. 
2 | 


--------------------------------------------------------------------------------
/workshops/AI_ML_services_workshop_information/sample_patient_note.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_ML_services_workshop_information/sample_patient_note.png


--------------------------------------------------------------------------------
/workshops/AI_ML_services_workshop_information/sample_patient_note.png_out:
--------------------------------------------------------------------------------
1 | {"Entities": [{"Id": 0, "BeginOffset": 27, "EndOffset": 36, "Score": 0.9438782930374146, "Text": "depressed", "Category": "MEDICAL_CONDITION", "Type": "DX_NAME", "Traits": [{"Name": "SIGN", "Score": 0.4991002380847931}]}, {"Id": 1, "BeginOffset": 41, "EndOffset": 59, "Score": 0.6444934010505676, "Text": "visibly distressed", "Category": "MEDICAL_CONDITION", "Type": "DX_NAME", "Traits": [{"Name": "SIGN", "Score": 0.7600513696670532}]}, {"Id": 2, "BeginOffset": 77, "EndOffset": 93, "Score": 0.9427198767662048, "Text": "suicial thoughts", "Category": "MEDICAL_CONDITION", "Type": "DX_NAME", "Traits": [{"Name": "SIGN", "Score": 0.5910518765449524}, {"Name": "SYMPTOM", "Score": 0.6994678974151611}]}], "UnmappedAttributes": [], "ModelVersion": "1.0.0", "ResponseMetadata": {"RequestId": "3a6a0046-07b8-4998-bf51-017215220e33", "HTTPStatusCode": 200, "HTTPHeaders": {"x-amzn-requestid": "3a6a0046-07b8-4998-bf51-017215220e33", "content-type": "application/x-amz-json-1.1", "content-length": "690", "date": "Wed, 14 Jul 2021 17:03:21 GMT"}, "RetryAttempts": 0}, "Comprehend_Detected_Entities": {"Sentiment": "NEGATIVE", "SentimentScore": {"Positive": 0.010822541080415249, "Negative": 0.5031108856201172, "Neutral": 0.4126037657260895, "Mixed": 0.07346285879611969}, "ResponseMetadata": {"RequestId": "d9577feb-354b-4bc0-aaf9-b9b2d49e0749", "HTTPStatusCode": 200, "HTTPHeaders": {"x-amzn-requestid": "d9577feb-354b-4bc0-aaf9-b9b2d49e0749", "content-type": "application/x-amz-json-1.1", "content-length": "162", "date": "Wed, 14 Jul 2021 17:03:21 GMT"}, "RetryAttempts": 0}}, "Raw Text": " The patient is Clinically depressed and visibly distressed and has reported suicial thoughts."}


--------------------------------------------------------------------------------
/workshops/Amazon_Neptune_ML_PPI_Analysis/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon Neptune ML for Protein-Protein Interaction
 2 | 
 3 | ## Summary
 4 | 
 5 | Proof of Concept (POC) to predict protein-protein interactions with graph neural networks.
 6 | 
 7 | ## Project Overview
 8 | 
 9 | 1. Overview
10 | 
11 | A recent report from the Indian Institute of Technology (IIT) described the use of graph neural network (GNN) analysis to identify potential PPIs. They used a protein language model (pLM) to translate the amino acid sequence of 121,000 proteins into vector embeddings. They then associated paired embedding vectors to nodes in a graph and used Graph-BERT to classify them as as either positive (potential PPI) or negative. The resulting model was 99% accurate at predicting known PPIs without any manual feature curation.
12 | 
13 | For this POC, we will use a similar approach as the IIT paper, but treat the PPI prediction goal as a link prediction problem. This will reduce the size of the graph database and permit use of standard graph neural net (GNN) algorithms and libraries (e.g. DGL).
14 | 
15 | We will use a public dataset of PPIs for a model organism to validate our approach. First, we will convert each amino acid sequence in the dataset into a vector embedding using a pre-trained pLM. Next, we will use the embeddings and known PPIs to populate a graph database. In this case, each node in the graph will represent the sequence embedding for a single protein and protein pairs with known interactions will be connected by an edge. Finally, we will train a graph neural net (GNN) model to predict unknown graph edges, representing potential PPIs.
16 | 
17 | We will use a five-step workload with Amazon Neptune to train and deploy the PPI prediction mode. First we will calculate sequence embeddings for the proteins in our PPI training data set using a pretrained pLM such as ESM-2 hosted in Amazon SageMaker.  Next, we will load these embeddings and known PPIs into an Amazon Neptune graph database. Then, following the standard Neptune ML workflow we will export the graph data to Amazon S3, use SageMaker to train a GNN model for link prediction, and deploy the model as a real-time inference endpoint. Finally, we will use this endpoint to predict unknown PPIs via Neptune queries.
18 | 
19 | Academic researchers have publicly reported use of ESM-2 pLM embeddings for a variety of tasks, including protein structure prediction, binding pocket identification, and mutation pathogenicity. Amazon Neptune provides serverless graph data storage, minimizing infrastructure maintenance costs, and supports high-performance graph analytics. The resulting protein graph can be further expanded to include additional protein properties in support of other analyses.
20 | 
21 | ## Setup
22 | 
23 | ### CloudFormation
24 | 
25 | To deploy Neptune and all supporting infrastructure into an existing VPC, first authenticate into an AWS account using your SSO, then use the provided deploy.sh script to deploy the required CloudFormation template.
26 | 
27 | ```bash
28 | ./deploy.sh \
29 |   -b "my-deployment-bucket" \
30 |   -n "my-neptune-ml-stack" \
31 |   -r "us-east-1" \
32 |   -v "vpc-12345678" \ 
33 |   -w "subnet-12345678" \
34 |   -x "subnet-12345678" \
35 |   -y "subnet-12345678" \
36 |   -z "subnet-12345678" \
37 |   -d "sg-12345678" \
38 |   -n "ml.g5.2xlarge"
39 | ```
40 | 


--------------------------------------------------------------------------------
/workshops/Amazon_Neptune_ML_PPI_Analysis/create_neptune_ml_kernel.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | # This script is used to create a conda environment for Jupyter with the
 4 | # same dependencies as the one used for Neptune ML training jobs.
 5 | set -e
 6 | 
 7 | conda env create -f environment.gpu.yml
 8 | conda activate neptune_ml_p36
 9 | pip install neptuneml-toolkit scikit-learn
10 | python -m ipykernel install --user --name=neptune_ml_p36
11 | 


--------------------------------------------------------------------------------
/workshops/Amazon_Neptune_ML_PPI_Analysis/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | # SPDX-License-Identifier: Apache-2.0
 5 | #
 6 | 
 7 | ############################################################
 8 | # Deploy the AWS Batch Architecture for Protein Folding and Design in your AWS account
 9 | ## Options
10 | # -b S3 bucket name to use for deployment staging
11 | # -s CloudFormation stack name
12 | # -r Deployment region
13 | # -v ID of VPC to use. If left empty, a new VPC will be created.
14 | # -w ID of first private subnet to use.
15 | # -x ID of second private subnet to use.
16 | # -y ID of public subnet to use.
17 | # -z ID of default security group to use.
18 | # -n Instance type for SageMaker notebook instance
19 | #
20 | # Example CMD
21 | # ./deploy.sh \
22 | #   -b "my-deployment-bucket" \
23 | #   -s "my-neptune-ml-stack" \
24 | #   -r "us-east-1" \
25 | #   -v "vpc-12345678" \
26 | #   -w "subnet-12345678" \
27 | #   -x "subnet-12345678" \
28 | #   -y "subnet-12345678" \
29 | #   -z "sg-12345678" \
30 | #   -n "ml.g5.2xlarge"
31 | 
32 | set -e
33 | unset -v BUCKET_NAME STACK_NAME REGION VPC PRIVATESUBNET1 PRIVATESUBNET2 PUBLICSUBNET \
34 |     DEFAULT_SECURITY_GROUP NOTEBOOK_INSTANCE_TYPE
35 | TIMESTAMP=$(date +%s)
36 | 
37 | while getopts 'b:s:r:v:w:x:y:z:n:' OPTION; do
38 |     case "$OPTION" in
39 |     b) BUCKET_NAME="$OPTARG" ;;
40 |     s) STACK_NAME="$OPTARG" ;;
41 |     r) REGION="$OPTARG" ;;
42 |     v) VPC="$OPTARG" ;;
43 |     w) PRIVATESUBNET1="$OPTARG" ;;
44 |     x) PRIVATESUBNET2="$OPTARG" ;;
45 |     y) PUBLICSUBNET="$OPTARG" ;;
46 |     z) DEFAULT_SECURITY_GROUP="$OPTARG" ;;
47 |     n) NOTEBOOK_INSTANCE_TYPE="$OPTARG" ;;
48 |     *) exit 1 ;;
49 |     esac
50 | done
51 | 
52 | [ -z "$STACK_NAME" ] && { STACK_NAME="neptune-ppi"; }
53 | [ -z "$REGION" ] && { INPUT_FILE="us-east-1"; }
54 | [ -z "$VPC" ] && { VPC=""; }
55 | [ -z "$PRIVATESUBNET1" ] && { PRIVATESUBNET1=""; }
56 | [ -z "$PRIVATESUBNET2" ] && { PRIVATESUBNET2=""; }
57 | [ -z "$PUBLICSUBNET" ] && { PUBLICSUBNET=""; }
58 | [ -z "$DEFAULT_SECURITY_GROUP" ] && { DEFAULT_SECURITY_GROUP=""; }
59 | [ -z "$NOTEBOOK_INSTANCE_TYPE" ] && { NOTEBOOK_INSTANCE_TYPE=""; }
60 | 
61 | zip -r code.zip * -x .\*/\*
62 | aws s3 cp code.zip s3://$BUCKET_NAME/main/code.zip
63 | rm code.zip
64 | echo $BUCKET_NAME
65 | echo $STACK_NAME
66 | echo $REGION
67 | echo $VPC
68 | echo $PRIVATESUBNET1
69 | echo $PRIVATESUBNET2
70 | echo $PUBLICSUBNET
71 | echo $DEFAULT_SECURITY_GROUP
72 | echo $NOTEBOOK_INSTANCE_TYPE
73 | aws cloudformation package --template-file cfn/neptune-ml-nested-stack.json --output-template cfn/neptune-ml-nested-stack-packaged.yaml \
74 |     --region $REGION --s3-bucket $BUCKET_NAME --s3-prefix cfn
75 | aws cloudformation deploy --template-file cfn/neptune-ml-nested-stack-packaged.yaml --capabilities CAPABILITY_IAM --stack-name $STACK_NAME \
76 |     --region $REGION --parameter-overrides S3Bucket=$BUCKET_NAME DBClusterId=$STACK_NAME-neptune \
77 |     VPC=$VPC PrivateSubnet1=$PRIVATESUBNET1 PrivateSubnet2=$PRIVATESUBNET2 PublicSubnet=$PUBLICSUBNET \
78 |     DefaultSecurityGroup=$DEFAULT_SECURITY_GROUP NotebookInstanceType=$NOTEBOOK_INSTANCE_TYPE Timestamp=$TIMESTAMP CodeRepoS3BucketName=$BUCKET_NAME
79 | rm cfn/neptune-ml-nested-stack-packaged.yaml
80 | 


--------------------------------------------------------------------------------
/workshops/Amazon_Neptune_ML_PPI_Analysis/environment.gpu.yml:
--------------------------------------------------------------------------------
 1 | name: neptune_ml_p36
 2 | channels:
 3 |   - pytorch
 4 |   - dglteam
 5 |   - conda-forge
 6 | dependencies:
 7 |   - python=3.10
 8 |   - pytorch=1.7.1
 9 |   - torchvision=0.8
10 |   - cudatoolkit=11.0
11 |   - dgl-cuda11.0=0.7.1
12 |   - spacy=3.0.5
13 |   - rdflib=5.0.0
14 |   - sagemaker-python-sdk
15 |   - scikit-learn=1.0.2
16 |   - pandas=1.1.5
17 |   - ipykernel
18 |   - pip:
19 |     - graph-notebook==3.5.3
20 |     - neptuneml-toolkit==0.0.1
21 | 


--------------------------------------------------------------------------------
/workshops/Amazon_Neptune_ML_PPI_Analysis/src/model-hpo-configuration.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "models": [
  3 |     {
  4 |       "model": "custom",
  5 |       "task_type": "link_predict",
  6 |       "eval_metric": {
  7 |         "tuning_objective": {
  8 |           "MetricName": "MRR",
  9 |           "Type": "Maximize"
 10 |         },
 11 |         "metric_definitions": [
 12 |           {
 13 |             "Name": "MRR",
 14 |             "Regex": "Validation average MRR[ ]*: (\\S*)"
 15 |           },
 16 |           {
 17 |             "Name": "Train Loss",
 18 |             "Regex": "Train Loss: (\\S*)"
 19 |           },
 20 |           {
 21 |             "Name": "Validation Loss",
 22 |             "Regex": "Validation Loss : (\\S*)"
 23 |           }
 24 |         ]
 25 |       },
 26 |       "1-tier-param": [
 27 |         {
 28 |           "param": "hidden-size",
 29 |           "range": [
 30 |             16,
 31 |             128
 32 |           ],
 33 |           "type": "int",
 34 |           "inc_strategy": "power2"
 35 |         },
 36 |         {
 37 |           "param": "num-bases",
 38 |           "range": [
 39 |             2,
 40 |             8
 41 |           ],
 42 |           "type": "int",
 43 |           "inc_strategy": "power2"
 44 |         },
 45 |         {
 46 |           "param": "n-epochs",
 47 |           "range": [
 48 |             3,
 49 |             100
 50 |           ],
 51 |           "inc_strategy": "linear",
 52 |           "inc_val": 1,
 53 |           "type": "int",
 54 |           "node_strategy": "perM"
 55 |         },
 56 |         {
 57 |           "param": "num-neighbors",
 58 |           "range": [
 59 |             5,
 60 |             30
 61 |           ],
 62 |           "inc_strategy": "linear",
 63 |           "inc_val": 5,
 64 |           "type": "int"
 65 |         },
 66 |         {
 67 |           "param": "lr",
 68 |           "range": [
 69 |             0.001,
 70 |             0.01
 71 |           ],
 72 |           "type": "float",
 73 |           "inc_strategy": "log"
 74 |         }
 75 |       ],
 76 |       "2-tier-param": [
 77 |         {
 78 |           "param": "weight-decay",
 79 |           "range": [
 80 |             0.001,
 81 |             0.01
 82 |           ],
 83 |           "inc_strategy": "log",
 84 |           "type": "float",
 85 |           "default": 0
 86 |         },
 87 |         {
 88 |           "param": "num-negs",
 89 |           "range": [
 90 |             4,
 91 |             32
 92 |           ],
 93 |           "default": 10,
 94 |           "type": "int",
 95 |           "inc_strategy": "power2"
 96 |         }
 97 |       ],
 98 |       "3-tier-param": [
 99 |         {
100 |           "param": "batch-size",
101 |           "range": [
102 |             128,
103 |             4096
104 |           ],
105 |           "inc_strategy": "power2",
106 |           "type": "int",
107 |           "default": 1024
108 |         }
109 |       ],
110 |       "fixed-param": [
111 |         {
112 |           "param": "num-encoder-layers",
113 |           "type": "int",
114 |           "default": 1
115 |         }
116 |       ]
117 |     }
118 |   ]
119 | }


--------------------------------------------------------------------------------
/workshops/Amazon_Neptune_ML_PPI_Analysis/src/transform.py:
--------------------------------------------------------------------------------
 1 | # Adapted from
 2 | # https://github.com/awslabs/neptuneml-toolkit/blob/main/examples/custom-models/introduction/movie-lens-rgcn/link-predict/src/transform.py
 3 | 
 4 | import argparse
 5 | from neptuneml_toolkit.transform import get_transform_config
 6 | from train import transform
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser()
11 |     parser.add_argument(
12 |         "--local",
13 |         action="store_true",
14 |         default=False,
15 |         help="Whether script is running locally",
16 |     )
17 |     return parser.parse_args()
18 | 
19 | 
20 | if __name__ == "__main__":
21 |     args = parse_args()
22 |     if args.local:
23 |         hyperparameters = {
24 |             "num-neighbors": 30,
25 |             "batch_size": 1024,
26 |             "lr": 0.0015355425376242019,
27 |             "task": "link_predict",
28 |             "model": "custom",
29 |             "name": "custom-link_predict",
30 |             "weight-decay": 0.0,
31 |             "n-epochs": 2,
32 |             "hidden-size": 128,
33 |             "num-bases": 4,
34 |             "num-encoder-layers": 1,
35 |             "num-negs": 10,
36 |         }
37 |         data_path, model_path, devices = "./data", "./output", [-1]
38 |     else:
39 |         data_path, model_path, devices, hyperparameters = get_transform_config()
40 | 
41 |     transform(data_path, model_path, devices, hyperparameters)
42 | 


--------------------------------------------------------------------------------
/workshops/BIomedical_Researcher/gradio/README.md:
--------------------------------------------------------------------------------
 1 | # Search for PubMed articles using the Amazon Bedrock Converse API
 2 | 
 3 | ## Description
 4 | 
 5 | This demo shows how to use the Amazon Bedrock Converse API and custom tools to search for research articles on PubMed.
 6 | 
 7 | ## Installation
 8 | 
 9 | 1. Create a virtual environment
10 | 
11 | ```bash
12 | python -m venv gradio_venv
13 | source gradio_venv/bin/activate
14 | ```
15 | 
16 | 2. Install requirements
17 | 
18 | ```bash
19 | pip install -r requirements.txt
20 | ```
21 | 
22 | 3. Run the gradio app locally
23 | 
24 | ```bash
25 | python app.py
26 | ```
27 | 
28 | 3. Navigate to [http://127.0.0.1:7860](http://127.0.0.1:7860) to view the gradio application.
29 | 


--------------------------------------------------------------------------------
/workshops/BIomedical_Researcher/gradio/app.py:
--------------------------------------------------------------------------------
 1 | import chat
 2 | import gradio as gr
 3 | import boto3
 4 | import pubmed
 5 | 
 6 | boto_session = boto3.session.Session()
 7 | 
 8 | system_prompt = [
 9 |     {
10 |         "text": "You are an expert biomedical researcher trained to answer questions using scientific literature.",
11 |         "text": "Please respond to all requests using a friendly tone.",
12 |         "text": "Write all of your technical responses at a high school reading level."
13 |     }
14 | ]
15 | 
16 | toolbox = chat.BedrockToolBox()
17 | toolbox.add_tool(chat.BedrockTool(schema=pubmed.search_pubmed_spec, function=pubmed.search_pubmed))
18 | 
19 | 
20 | def respond(message, chat_history):
21 |     messages = chat.gradio_to_bedrock(message, chat_history)
22 |     response = chat.generate_text(
23 |         messages=messages,
24 |         toolbox=toolbox,
25 |         system_prompts=system_prompt,
26 |         boto_session=boto_session,
27 |     )
28 |     chat_history = chat.bedrock_to_gradio(response)
29 | 
30 |     return "", chat_history
31 | 
32 | 
33 | with gr.Blocks() as demo:
34 |     gr.Markdown("# Medical Librarian")
35 |     chatbot = gr.Chatbot()
36 |     msg = gr.Textbox()
37 |     clear = gr.ClearButton([msg, chatbot])
38 |     msg.submit(respond, [msg, chatbot], [msg, chatbot])
39 |     examples = gr.Examples(
40 |         examples=["Please search pubmed for recent articles about therapeutic enzyme engineering", "Please get the full text of pubmed article PMC8795449"],
41 |         inputs=msg,
42 |     )
43 | 
44 | demo.launch()
45 | 


--------------------------------------------------------------------------------
/workshops/BIomedical_Researcher/gradio/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio>=4.37.2
2 | boto3>=1.34.143
3 | xmltodict>=0.13.0


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/.gitignore:
--------------------------------------------------------------------------------
1 | **/.ipynb_checkpoints/**
2 | **/data/test
3 | **/data/validation
4 | **/data/train
5 | **/__pycache__/**
6 | **/data/train_data.csv
7 | **/data/validation_data.csv
8 | 


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/images/Architecture.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Cancer-gene-expression-survival-prediction-with-mme/images/Architecture.jpeg


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/images/image_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Cancer-gene-expression-survival-prediction-with-mme/images/image_2.jpg


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/images/image_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Cancer-gene-expression-survival-prediction-with-mme/images/image_3.jpg


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/model/meta.json:
--------------------------------------------------------------------------------
1 | {"model": {"n_input_dim": 3}}


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/model/model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Cancer-gene-expression-survival-prediction-with-mme/model/model.pth


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/model/model.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Cancer-gene-expression-survival-prediction-with-mme/model/model.tar.gz


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/src/_model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class SurvivalModel(nn.Module):
 4 |     
 5 |     def __init__(self, n_input_dim=21, n_hidden1 = 300, n_hidden2 = 100, n_output =  1):
 6 |         
 7 |         super(SurvivalModel, self).__init__()
 8 |         self.n_input_dim = n_input_dim
 9 |         self.layer_1 = nn.Linear(n_input_dim, n_hidden1) 
10 |         self.layer_2 = nn.Linear(n_hidden1, n_hidden2)
11 |         self.layer_out = nn.Linear(n_hidden2, n_output)
12 |         
13 |         self.relu = nn.ReLU()
14 |         self.sigmoid =  nn.Sigmoid()
15 |         self.dropout = nn.Dropout(p=0.1)
16 |         self.batchnorm1 = nn.BatchNorm1d(n_hidden1)
17 |         self.batchnorm2 = nn.BatchNorm1d(n_hidden2)
18 |         
19 |         
20 |     def forward(self, inputs):
21 |         x = self.relu(self.layer_1(inputs))
22 |         x = self.batchnorm1(x)
23 |         x = self.relu(self.layer_2(x))
24 |         x = self.batchnorm2(x)
25 |         x = self.dropout(x)
26 |         x = self.sigmoid(self.layer_out(x))
27 |         
28 |         return x
29 | 
30 |     def serialize_params(self):
31 |         return {
32 |             "model": {
33 |                 "n_input_dim": self.n_input_dim
34 |             }
35 |         }


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/src/evaluation.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import json
 3 | import sys
 4 | import subprocess
 5 | import os
 6 | import shutil
 7 | 
 8 | subprocess.check_call([sys.executable, "-m", "pip", "install", "torch"])
 9 | 
10 | import torch
11 | import torch.utils.data
12 | import numpy as np
13 | import pandas as pd
14 | import tarfile
15 | 
16 | 
17 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18 | 
19 | def evaluate(test_path="/opt/ml/processing/test/validation_data.csv", model_dir="/opt/ml/processing"):
20 |     
21 |     model_path = "{}/model/model.tar.gz".format(model_dir)
22 |     with tarfile.open(model_path) as tar:
23 |         tar.extractall(path=".")
24 |     
25 |     sys.path.insert(0, "/opt/ml/processing/code")
26 | 
27 |     from _model import SurvivalModel
28 |     
29 |     model_path = "./model.pth"
30 |     meta_data = "./meta.json"
31 |     
32 |     print("Model is loading from [{}]. Metadata reading from [{}]".format(model_path, meta_data))
33 |     
34 |     with open(meta_data, 'rb') as f:
35 |         meta = json.load(f)
36 |         
37 |     print("Meta data is loaded with : [{}]".format(meta))
38 |     
39 |     print("Test data is reading from [{}]".format(test_path))
40 |     test_data = pd.read_csv(test_path)
41 |     
42 |     X_vals = test_data.iloc[:, 1: meta['model']['n_input_dim'] + 1]
43 |     Y_vals = test_data.iloc[:, 0]
44 |     
45 |     X_vals = torch.tensor(X_vals.to_numpy(), dtype=torch.float32, device=device)
46 |     
47 |     print("test data is loaded with shape : [{}]".format(test_data.shape[0]))
48 |     
49 |     model = SurvivalModel(n_input_dim=meta['model']['n_input_dim'])
50 |     
51 |     with open(model_path, 'rb') as f:
52 |         model.load_state_dict(torch.load(f, map_location=device))
53 |     
54 |     print('Model loaded.')
55 |     model = model.to(device)
56 |     model.eval()
57 |     
58 |     with torch.no_grad():
59 |         p_output = model(X_vals)
60 |         predictions = (p_output.numpy() > 0.5).astype(int)
61 |         print(predictions)
62 |     accuracy = np.mean(predictions == Y_vals.to_numpy())
63 |     accuracy_score = accuracy
64 |     
65 |     report_dict = {
66 |         "metrics": {
67 |             "test_accuracy": {"value": accuracy_score, "standard_deviation": 0},
68 |         },
69 |     }
70 |     
71 |     output_dir = "/opt/ml/processing/evaluation"
72 |     pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
73 |     evaluation_path = f"{output_dir}/evaluation.json"
74 |     
75 |     print("Writing to the location")
76 |     
77 |     with open(evaluation_path, "w") as f:
78 |         f.write(json.dumps(report_dict))
79 |         
80 |     print("Completed")
81 |     
82 |     
83 | if __name__ == "__main__":
84 |     evaluate()
85 |     #evaluate(test_path="./tmp/validation/data.csv", model_dir="./tmp")


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/src/genome_groups.py:
--------------------------------------------------------------------------------
 1 | GENOME_GROUPS = {
 2 |     'metagene_19' : ['LRIG1', 'HPGD', 'GDF15'],
 3 |     'metagene_10' : ['CDH2', 'POSTN', 'VCAN', 'PDGFRA'],
 4 |     'metagene_9' : ['VCAM1', 'CD44', 'CD48'],
 5 |     'metagene_4' : ['CD4', 'LYL1', 'SPI1', 'CD37'],
 6 |     'metagene_3' : ['VIM', 'LMO2', 'EGR2'],
 7 |     'metagene_21' : ['BGN', 'COL4A1', 'COL5A1', 'COL5A2'],
 8 | }
 9 | 
10 | _all = []
11 | for group in GENOME_GROUPS.values():
12 |     _all.extend(group)
13 | 
14 | GENOME_GROUPS["ALL"] = _all    
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/src/inference.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | import sys
 5 | import json
 6 | import torch
 7 | import torch.utils.data
 8 | 
 9 | import genome_groups as gg
10 | 
11 | from _model import SurvivalModel
12 | 
13 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14 | 
15 | def input_fn(request_body, request_content_type):
16 |     print("Model invorked with [{}] and content type [{}]".format(request_body, request_content_type))
17 |     
18 |     assert request_content_type == "application/json"
19 |     
20 |     json_body = json.loads(request_body)
21 |     
22 |     print(json_body)
23 |     
24 |     data = json_body["inputs"]
25 |     data = torch.tensor(data, dtype=torch.float32, device=device)
26 |     return data
27 | 
28 |     
29 | def model_fn(model_dir):
30 |     
31 |     print('Loading the trained model from [{}]'.format(model_dir))
32 |     with open(os.path.join(model_dir, 'meta.json'), 'rb') as f:
33 |         meta = json.load(f)
34 |     
35 |     print("Model is trained with parameters [{}]".format(meta))
36 |     model = SurvivalModel(n_input_dim=meta['model']['n_input_dim'])
37 |     
38 |     with open(os.path.join(model_dir, 'model.pth'), 'rb') as f:
39 |         model.load_state_dict(torch.load(f, map_location=device))
40 |     
41 |     print('Model loaded.')
42 |     model = model.to(device)
43 |     model.eval()
44 |     return model
45 | 
46 | 
47 | def predict_fn(input_data, model):
48 |     print("predicting with input data [{}]".format(input_data))
49 |     with torch.no_grad():
50 |         p_output = model(input_data)
51 |         output = (p_output.numpy() > 0.5).astype(int)
52 |         print("outputs : [{}]".format(output))
53 |         return output
54 | 


--------------------------------------------------------------------------------
/workshops/Cancer-gene-expression-survival-prediction-with-mme/src/mme_deployment.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import os
 3 | 
 4 | sm_client = boto3.client("sagemaker")
 5 | s3_client = boto3.client("s3")
 6 | s3_resource = boto3.resource("s3")
 7 | 
 8 | def install(package):
 9 |     subprocess.check_call([sys.executable, "-m", "pip", "install", package])
10 | 
11 | def model_from_registry(model_package_arn):
12 |     
13 |     response = sm_client.describe_model_package(
14 |         ModelPackageName=model_package_arn
15 |     )
16 |     
17 |     model_data_url = response["InferenceSpecification"]["Containers"][0]["ModelDataUrl"]
18 |     
19 |     return model_data_url
20 | 
21 |     
22 | def deploy_to_mme_location(model_data_url, mme_model_location_s3, genome_group):
23 |     
24 |     print("Deploying models from [{}] to [{}]".format(model_data_url, mme_model_location_s3))
25 |     
26 |     _, path = mme_model_location_s3.split(":", 1)
27 |     path = path.lstrip("/")
28 |     bucket, path = path.split("/", 1)
29 |     
30 |     _, path_source = model_data_url.split(":", 1)
31 |     source = path_source.lstrip("/")
32 |     
33 |     response = s3_client.copy_object(Bucket = bucket, CopySource = source, Key=path + "/model-{}.tar.gz".format(genome_group))
34 |     
35 |     print(response)
36 |     
37 | 
38 |     
39 | if __name__ == "__main__":
40 |     
41 |     model_package_arn = os.environ['modelPackageArn']
42 |     mme_model_location_s3 = os.environ['mmeModelLocation']
43 |     genome_group = os.environ['genomeGroup']
44 |     
45 |     print("Preparing MME the deployment for model package arn [{}].".format(model_package_arn))
46 |     
47 |     model_data_url = model_from_registry(model_package_arn)
48 |     
49 |     print("Model url found. [{}]".format(model_data_url))
50 |     
51 |     deploy_to_mme_location(model_data_url, mme_model_location_s3, genome_group)
52 | 
53 |     


--------------------------------------------------------------------------------
/workshops/Classify_Medical_Specialty_NLP_Huggingface_Transformers/get_dependencies.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | #get the requirements
 4 | def get_dependencies():
 5 |     '''Install Dependencies from requirements.txt skip ones that are installed already'''
 6 |     print("Finding Dependencies to Install")
 7 |     curr_depends=os.popen('pip list --format=freeze').readlines()
 8 |     curr_depends=[i.rstrip() for i in curr_depends]
 9 |     desired_depends=os.popen('cat requirements.txt').readlines()
10 |     desired_depends=[i.rstrip() for i in desired_depends]
11 |     depends_to_install=list(set(desired_depends) - set(curr_depends))
12 |     f_out=open('temp_reqs.txt','w')
13 |     print(f'''{str(len(depends_to_install))} dependencies to install''')
14 |     print(*depends_to_install,end="\n",file=f_out)
15 |     f_out.close()
16 |     my_cmd='''cat temp_reqs.txt | sed -e '/^\s*#.*$/d' -e '/^\s*$/d'|while read line; do TOREPLACE -m pip install $line;done > /dev/null ''' 
17 |     my_cmd=my_cmd.replace('TOREPLACE',sys.executable)
18 |     os.system(my_cmd)
19 |     os.system('rm temp_reqs.txt')
20 |     print("Done Getting Dependencies")
21 | 


--------------------------------------------------------------------------------
/workshops/Classify_Medical_Specialty_NLP_Huggingface_Transformers/requirements.txt:
--------------------------------------------------------------------------------
1 | sagemaker==2.68.0
2 | transformers==4.12.2
3 | tensorflow==2.7.2
4 | seaborn==0.11.2
5 | 


--------------------------------------------------------------------------------
/workshops/Classify_Medical_Specialty_NLP_Huggingface_Transformers/train.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import tensorflow as tf
 3 | import transformers
 4 | import argparse
 5 | import os
 6 | from transformers import DistilBertTokenizer
 7 | from transformers import TFDistilBertForSequenceClassification
 8 | 
 9 | if __name__ == "__main__":
10 | 
11 |     parser = argparse.ArgumentParser()
12 | 
13 |     # hyperparameters sent by the client are passed as command-line arguments to the script.
14 |     parser.add_argument("--epochs", type=int, default=3)
15 |     parser.add_argument("--train_batch_size", type=int, default=32)
16 |     parser.add_argument("--eval_batch_size", type=int, default=64)
17 |     parser.add_argument("--warmup_steps", type=int, default=500)
18 |     parser.add_argument("--model_name", type=str)
19 |     parser.add_argument("--learning_rate", type=str, default=5e-5)
20 | 
21 |     # Data, model, and output directories
22 |     parser.add_argument("--output_data_dir", type=str, default=os.environ["SM_OUTPUT_DATA_DIR"])
23 |     parser.add_argument("--model_dir", type=str, default=os.environ["SM_MODEL_DIR"])
24 |     parser.add_argument("--n_gpus", type=str, default=os.environ["SM_NUM_GPUS"])
25 |     parser.add_argument("--training_dir", type=str, default=os.environ["SM_CHANNEL_TRAIN"])
26 |     parser.add_argument("--test_dir", type=str, default=None)
27 | 
28 |     MODEL_NAME = 'distilbert-base-uncased-finetuned-sst-2-english'
29 |     BATCH_SIZE = 16
30 |     N_EPOCHS = 3
31 | 
32 |     args, _ = parser.parse_known_args()
33 | 
34 | 
35 |     df_1=pd.read_csv(f'{args.training_dir}/train.csv')
36 | 
37 |     X_train=df_1
38 |     y_train=X_train['specialty_encoded']
39 |     #define a tokenizer object
40 |     tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME)
41 |     #tokenize the text
42 |     train_encodings = tokenizer(list(X_train['text']),
43 |                                 truncation=True, 
44 |                                 padding=True)
45 |     train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings),
46 |                                     list(y_train.values)))
47 | 
48 |     model = TFDistilBertForSequenceClassification.from_pretrained(MODEL_NAME)
49 | 
50 |     model = TFDistilBertForSequenceClassification.from_pretrained(MODEL_NAME)
51 |     #chose the optimizer
52 |     #optimizerr = tf.keras.optimizers.Adam(learning_rate=5e-5)
53 |     #define the loss function 
54 |     model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5), 
55 |                   loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
56 |                   metrics=["accuracy"])
57 |     #losss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
58 |     #build the model
59 |     #model.compile(optimizer=optimizerr,
60 |     #              loss=losss,
61 |     #              metrics=['accuracy'])
62 |     # train the model 
63 |     model.fit(train_dataset.shuffle(len(X_train)).batch(BATCH_SIZE),
64 |               epochs=N_EPOCHS,
65 |               batch_size=BATCH_SIZE)
66 | 
67 |     model.save_pretrained(args.model_dir)
68 |     tokenizer.save_pretrained(args.model_dir)
69 | 
70 |       
71 | 
72 | 


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/CITATION:
--------------------------------------------------------------------------------
1 | Pacheco, Andre G. C.; Lima, Gustavo R.; Salomão, Amanda S.; Krohling, Breno; Biral, Igor P.; de Angelo, Gabriel G. ; Alves Jr, Fábio  C. R. ; Esgario, José G. M.; Simora, Alana C. ; Castro, Pedro B. C. ; Rodrigues, Felipe B.; Frasson, Patricia H. L. ; Krohling, Renato A.; Knidel, Helder ; Santos, Maria C. S. ; Espírito Santo, Rachel B.; Macedo, Telma L. S. G.; Canuto, Tania R. P. ; de Barros, Luíz F. S. (2020), “PAD-UFES-20: a skin lesion dataset composed of patient data and clinical images collected from smartphones”, Mendeley Data, V1, doi: 10.17632/zr7vgbcyr2.1


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/MLLC1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/MLLC1.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/MLLC2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/MLLC2.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/Trial-component-list.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/Trial-component-list.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/deployment_options.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/deployment_options.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/experiments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/experiments.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/find-prod-deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/find-prod-deploy.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/jobs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/jobs.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/lesions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/lesions.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/metrics.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/model_registry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/model_registry.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/name_project.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/name_project.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/overexpression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/overexpression.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/pipeline.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/pipeline_execution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/pipeline_execution.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/repo_defaults.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/repo_defaults.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/repositories.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/repositories.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/resources.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/resources.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/second-endpoint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/second-endpoint.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/select-model-version.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/select-model-version.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/sidebar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/sidebar.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/sm-resources-tab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/sm-resources-tab.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/sm_experiments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/sm_experiments.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/tc-list-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/tc-list-2.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/template_build.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/template_build.jpg


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/template_deploy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/template_deploy.jpg


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/img/update-status.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/update-status.png


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/scripts/pipelines/codebuild-buildspec.yml:
--------------------------------------------------------------------------------
 1 | version: 0.2
 2 | 
 3 | phases:
 4 |   install:
 5 |     runtime-versions:
 6 |       python: 3.8
 7 |     commands:
 8 |       - pip install --upgrade --force-reinstall . "awscli>1.20.30"
 9 |   
10 |   build:
11 |     commands:
12 |       - export PYTHONUNBUFFERED=TRUE
13 |       - export SAGEMAKER_PROJECT_NAME_ID="${SAGEMAKER_PROJECT_NAME}-${SAGEMAKER_PROJECT_ID}"
14 |       - |
15 |         run-pipeline --module-name pipelines.skinlesions.pipeline \
16 |           --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN \
17 |           --tags "[{\"Key\":\"sagemaker:project-name\", \"Value\":\"${SAGEMAKER_PROJECT_NAME}\"}, {\"Key\":\"sagemaker:project-id\", \"Value\":\"${SAGEMAKER_PROJECT_ID}\"}]" \
18 |           --kwargs "{\"region\":\"${AWS_REGION}\",\"sagemaker_project_arn\":\"${SAGEMAKER_PROJECT_ARN}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\"}"
19 |       - echo "Create/Update of the SageMaker Pipeline and execution completed."
20 | 
21 | 


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/scripts/pipelines/skinlesions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/scripts/pipelines/skinlesions/__init__.py


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/scripts/pipelines/skinlesions/preprocess.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import boto3
 3 | import logging
 4 | import os
 5 | import pandas as pd
 6 | import shutil
 7 | from sklearn.model_selection import train_test_split
 8 | 
 9 | logging.getLogger().setLevel(logging.INFO)
10 | 
11 | # Define data source and other parameters.
12 | SRC_BUCKET = 'prod-dcd-datasets-cache-zipfiles'
13 | SRC_KEY = 'zr7vgbcyr2-1.zip'
14 | DATA_DIR = '/opt/ml/processing/input'
15 | 
16 | # Download raw data zip from https://data.mendeley.com/datasets/zr7vgbcyr2/1
17 | logging.info(f'Downloading {SRC_KEY}')
18 | s3_boto_client = boto3.client("s3")
19 | s3_boto_client.download_file(SRC_BUCKET, SRC_KEY, f'{DATA_DIR}/raw.zip')
20 | 
21 | # Unzip data
22 | logging.info(f'Unpacking {SRC_KEY}')
23 | shutil.unpack_archive(f'{DATA_DIR}/raw.zip', DATA_DIR)
24 | for i in range(1,4):    
25 |     logging.info(f'Unpacking imgs_part_{i}.zip')
26 |     shutil.unpack_archive(f'{DATA_DIR}/images/imgs_part_{i}.zip', f'{DATA_DIR}/images')
27 |     logging.info(f'Copying {DATA_DIR}/images/imgs_part_{i} to {DATA_DIR}/images/all_imgs')
28 |     shutil.copytree(f'{DATA_DIR}/images/imgs_part_{i}', f'{DATA_DIR}/images/all_imgs', dirs_exist_ok=True)
29 | 
30 | # Split data into training, validation, and test sets
31 | logging.info(f'Creating training-validation data split')
32 | metadata = pd.read_csv(f'{DATA_DIR}/metadata.csv')
33 | train_df, test_df = train_test_split(metadata, test_size=0.2, stratify=metadata['diagnostic'])
34 | train_df, val_df = train_test_split(train_df, test_size=0.25, stratify=train_df['diagnostic'])
35 | 
36 | # Copy training data into folders for training
37 | logging.info(f'Copying training data to {DATA_DIR}/images/output/train')
38 | os.makedirs(f"{DATA_DIR}/output/train", exist_ok=True)
39 | train_df.to_csv(f'{DATA_DIR}/output/train/metadata.csv', index=False)
40 | for _,row in train_df.iterrows():
41 |     src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}"
42 |     os.makedirs(f"{DATA_DIR}/output/train/{row['diagnostic']}", exist_ok=True)
43 |     dest = f"{DATA_DIR}/output/train/{row['diagnostic']}/{row['img_id']}"
44 |     shutil.copy2(src, dest)   
45 |     
46 | # Copy validation data into folders for training
47 | logging.info(f'Copying validation data to {DATA_DIR}/images/output/val')
48 | os.makedirs(f"{DATA_DIR}/output/val", exist_ok=True)
49 | train_df.to_csv(f'{DATA_DIR}/output/val/metadata.csv', index=False)
50 | for _,row in val_df.iterrows():
51 |     src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}"
52 |     os.makedirs(f"{DATA_DIR}/output/val/{row['diagnostic']}", exist_ok=True)
53 |     dest = f"{DATA_DIR}/output/val/{row['diagnostic']}/{row['img_id']}"
54 |     shutil.copy2(src, dest)
55 |     
56 | # Copy test data into folders for evaluation
57 | logging.info(f'Copying test data to {DATA_DIR}/images/output/test')
58 | os.makedirs(f"{DATA_DIR}/output/test", exist_ok=True)
59 | train_df.to_csv(f'{DATA_DIR}/output/test/metadata.csv', index=False)
60 | for _,row in val_df.iterrows():
61 |     src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}"
62 |     os.makedirs(f"{DATA_DIR}/output/test/{row['diagnostic']}", exist_ok=True)
63 |     dest = f"{DATA_DIR}/output/test/{row['diagnostic']}/{row['img_id']}"
64 |     shutil.copy2(src, dest)
65 | 


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/scripts/processing/process.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import boto3
 3 | import logging
 4 | import os
 5 | import pandas as pd
 6 | import shutil
 7 | from sklearn.model_selection import train_test_split
 8 | 
 9 | logging.getLogger().setLevel(logging.INFO)
10 | 
11 | # Define data source and other parameters.
12 | SRC_BUCKET = 'prod-dcd-datasets-cache-zipfiles'
13 | SRC_KEY = 'zr7vgbcyr2-1.zip'
14 | DATA_DIR = '/opt/ml/processing/input'
15 | 
16 | # Download raw data zip from https://data.mendeley.com/datasets/zr7vgbcyr2/1
17 | logging.info(f'Downloading {SRC_KEY}')
18 | s3_boto_client = boto3.client("s3")
19 | s3_boto_client.download_file(SRC_BUCKET, SRC_KEY, f'{DATA_DIR}/raw.zip')
20 | 
21 | # Unzip data
22 | logging.info(f'Unpacking {SRC_KEY}')
23 | shutil.unpack_archive(f'{DATA_DIR}/raw.zip', DATA_DIR)
24 | for i in range(1,4):    
25 |     logging.info(f'Unpacking imgs_part_{i}.zip')
26 |     shutil.unpack_archive(f'{DATA_DIR}/images/imgs_part_{i}.zip', f'{DATA_DIR}/images')
27 |     logging.info(f'Copying {DATA_DIR}/images/imgs_part_{i} to {DATA_DIR}/images/all_imgs')
28 |     shutil.copytree(f'{DATA_DIR}/images/imgs_part_{i}', f'{DATA_DIR}/images/all_imgs', dirs_exist_ok=True)
29 | 
30 | # Split data into training, validation, and test sets
31 | logging.info(f'Creating training-validation data split')
32 | metadata = pd.read_csv(f'{DATA_DIR}/metadata.csv')
33 | train_df, test_df = train_test_split(metadata, test_size=0.2, stratify=metadata['diagnostic'])
34 | train_df, val_df = train_test_split(train_df, test_size=0.25, stratify=train_df['diagnostic'])
35 | 
36 | # Copy training data into folders for training
37 | logging.info(f'Copying training data to {DATA_DIR}/images/output/train')
38 | os.makedirs(f"{DATA_DIR}/output/train", exist_ok=True)
39 | train_df.to_csv(f'{DATA_DIR}/output/train/metadata.csv', index=False)
40 | for _,row in train_df.iterrows():
41 |     src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}"
42 |     os.makedirs(f"{DATA_DIR}/output/train/{row['diagnostic']}", exist_ok=True)
43 |     dest = f"{DATA_DIR}/output/train/{row['diagnostic']}/{row['img_id']}"
44 |     shutil.copy2(src, dest)   
45 |     
46 | # Copy validation data into folders for training
47 | logging.info(f'Copying validation data to {DATA_DIR}/images/output/val')
48 | os.makedirs(f"{DATA_DIR}/output/val", exist_ok=True)
49 | train_df.to_csv(f'{DATA_DIR}/output/val/metadata.csv', index=False)
50 | for _,row in val_df.iterrows():
51 |     src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}"
52 |     os.makedirs(f"{DATA_DIR}/output/val/{row['diagnostic']}", exist_ok=True)
53 |     dest = f"{DATA_DIR}/output/val/{row['diagnostic']}/{row['img_id']}"
54 |     shutil.copy2(src, dest)
55 |     
56 | # Copy test data into folders for evaluation
57 | logging.info(f'Copying test data to {DATA_DIR}/images/output/test')
58 | os.makedirs(f"{DATA_DIR}/output/test", exist_ok=True)
59 | train_df.to_csv(f'{DATA_DIR}/output/test/metadata.csv', index=False)
60 | for _,row in val_df.iterrows():
61 |     src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}"
62 |     os.makedirs(f"{DATA_DIR}/output/test/{row['diagnostic']}", exist_ok=True)
63 |     dest = f"{DATA_DIR}/output/test/{row['diagnostic']}/{row['img_id']}"
64 |     shutil.copy2(src, dest)
65 | 


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/visualizer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/visualizer/__init__.py


--------------------------------------------------------------------------------
/workshops/Classify_Skin_Lesion_Images/visualizer/visualizer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pyvis.network import Network
 3 | from sagemaker.lineage.artifact import Artifact
 4 | 
 5 | class Visualizer:
 6 |     def __init__(self):
 7 |         self.directory = "generated"
 8 |         if not os.path.exists(self.directory):
 9 |             os.makedirs(self.directory)
10 | 
11 |     def render(self, query_lineage_response, scenario_name, sagemaker_session):
12 |         net = self.get_network()
13 |         for vertex in query_lineage_response["Vertices"]:
14 |             arn = vertex["Arn"]
15 |             if "Type" in vertex:
16 |                 label = vertex["Type"]
17 |             else:
18 |                 label = None
19 |             lineage_type = vertex["LineageType"]
20 |             name = self.get_name(arn, label, lineage_type, sagemaker_session)
21 |             title = self.get_title(arn, label, lineage_type)
22 |             color = self.get_color(lineage_type)
23 |             net.add_node(
24 |                 vertex["Arn"],
25 |                 label=name,
26 |                 title=title,
27 |                 shape="box",
28 |                 physics="false",
29 |                 color=color,
30 |             )
31 | 
32 |         for edge in query_lineage_response["Edges"]:
33 |             source = edge["SourceArn"]
34 |             dest = edge["DestinationArn"]
35 |             net.add_edge(source, dest)
36 | 
37 |         return net.show(f"{self.directory}/{scenario_name}.html")
38 | 
39 |     def get_title(self, arn, label, lineage_type):
40 |         return f"Arn: {arn} Type: {label} Lineage Type: {lineage_type}"
41 | 
42 |     def get_name(self, arn, label, lineage_type, sagemaker_session):
43 |         if lineage_type == "Artifact":
44 |             return (
45 |                 label
46 |                 + " "
47 |                 + Artifact.load(
48 |                     artifact_arn=arn,
49 |                     sagemaker_session=sagemaker_session,
50 |                 ).source.source_uri
51 |             )
52 |         else:
53 |             name = arn.split("/")[1]
54 |             return label + " " + name
55 | 
56 |     def get_network(self):
57 |         net = Network(height="800px", width="100%", directed=True, notebook=True)
58 |         return net
59 | 
60 |     def get_color(self, lineage_type):
61 |         if lineage_type == "Context":
62 |             return "yellow"
63 |         elif lineage_type == "Artifact":
64 |             return "orange"
65 |         else:
66 |             return None
67 |         


--------------------------------------------------------------------------------
/workshops/FDA_Doc_Search/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | .env
3 | .scratch
4 | data
5 | .DS_Store


--------------------------------------------------------------------------------
/workshops/FDA_Doc_Search/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/workshops/FDA_Doc_Search/README.md:
--------------------------------------------------------------------------------
 1 | # Drugs@FDA Search Demo
 2 | 
 3 | ## Summary
 4 | 
 5 | A Search + RAG demo using documents from the [Drugs@FDA](https://www.fda.gov/drugs/drug-approvals-and-databases/about-drugsfda) data source.
 6 | 
 7 | ## Architecture
 8 | 
 9 | ![Drugs@FDA Search Architecture◊](arch.png)
10 | 
11 | ## Contents
12 | 
13 | This project includes four Jupyter notebooks that walk through the process of building a Search+RAG workflow for Drugs@FDA:
14 | 
15 | 1. Load Drugs@FDA data and metadata into Amazon S3.
16 | 1. Create Kendra Index and Data Source.
17 | 1. Explore Search+RAG with Amazon Bedrock.
18 | 1. Build a Search+RAG pipeline with Bedrock Prompt Flows.
19 | 
20 | ## Getting Started
21 | 
22 | 1. Verify that you have programmatic credentials saved to access your AWS account.
23 | 1. Set an environment variable named `S3_BUCKET_NAME` with the name of an existing S3 bucket. You can also add this to a file named `.env` for easier management.
24 | 1. Run notebooks 1-4.
25 | 


--------------------------------------------------------------------------------
/workshops/FDA_Doc_Search/app/www/img/brain-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/FDA_Doc_Search/app/www/img/brain-light.png


--------------------------------------------------------------------------------
/workshops/FDA_Doc_Search/app/www/img/brain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/FDA_Doc_Search/app/www/img/brain.png


--------------------------------------------------------------------------------
/workshops/FDA_Doc_Search/app/www/main.css:
--------------------------------------------------------------------------------
 1 | 
 2 | .border-solid {
 3 |    border: 3px solid;
 4 |    border-width: 3px;
 5 |    border-radius: 20px;
 6 |    border-color: #f01716;
 7 | }
 8 | 
 9 | .border-gradient {
10 |    border-radius: 30px;
11 |    border: transparent solid 3px;
12 | 
13 | }
14 | 
15 | .border-gradient {
16 |    background: linear-gradient(#fff 0 0) padding-box,
17 |       linear-gradient(135deg, #f01716, #f99f9f) border-box;
18 | }
19 | 
20 | .generate-results {
21 |    padding: 20px;
22 | }
23 | 
24 | .bedrock-title {
25 |    font-size: 3em;
26 |    font-weight: 700;
27 |    color: #f01716;
28 | }
29 | 
30 | .logo-table {
31 |    display: grid; 
32 |    grid-template-columns: 80px 1fr;
33 | }
34 | 
35 | .p-generate {
36 |    font-size: 1.5em;
37 | }
38 | 
39 | .search-result-header {
40 |    font-size: 1.25em;
41 |    font-weight: 700;
42 |    color: #f01716
43 | }
44 | 
45 | .filter-group .wrap {
46 |    display: grid !important;
47 |    grid-template-columns: 100%;
48 | }
49 | 
50 | .header-bar {
51 |    border-bottom: 1px solid #dddddd;
52 |    padding: 5px;
53 | }


--------------------------------------------------------------------------------
/workshops/FDA_Doc_Search/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/FDA_Doc_Search/arch.png


--------------------------------------------------------------------------------
/workshops/FDA_Doc_Search/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.12.3
2 | boto3==1.35.13
3 | tqdm==4.66.5
4 | lxml==5.3.0
5 | gradio==4.43.0
6 | python-dotenv==1.0.1
7 | 


--------------------------------------------------------------------------------
/workshops/FDA_Doc_Search/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/FDA_Doc_Search/src/__init__.py


--------------------------------------------------------------------------------
/workshops/Healthcare_Payments_Prediction_SageMaker_AutoPilot/img/autopilot_schematic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Healthcare_Payments_Prediction_SageMaker_AutoPilot/img/autopilot_schematic.png


--------------------------------------------------------------------------------
/workshops/Medical_Imaging_AI/README.md:
--------------------------------------------------------------------------------
 1 | # Medical Imaging AI on AWS Workshop
 2 | 
 3 | ## Introduction
 4 | Artificial Intelligence (AI) has been proven to be used to support radiologist clinical decision making and to help reduce doctor burnout. To apply AI in medical imaging diagnosis, we need vast amount of annotated medical images to develop a supervised AI model. Annotating medical images accurately is an important procedure. However, it is not only tedious and time consuming, but it also demands costly, specialty-oriented skills, which are not easily accessible. AI Assissted Annotations (AIAA) has been developed to largely reduce the manual process.
 5 | 
 6 | ## Workshop Studio
 7 | 
 8 | More information about this workshop is available on [AWS Workshop Studio](https://catalog.us-east-1.prod.workshops.aws/workshops/ff6964ec-b880-45d4-bc1e-468b0c7fa854/en-US/).
 9 | 
10 | ## Architecture Overview
11 | [This workshop](https://catalog.us-east-1.prod.workshops.aws/workshops/ff6964ec-b880-45d4-bc1e-468b0c7fa854/en-US/) presents an AWS solution by running open source [MONAI label](https://docs.monai.io/projects/label/en/latest/index.html) on EC2 virtual machine with autoscaling, which has been mounted to the same [Amazon Elastic File System (EFS)](https://aws.amazon.com/efs/) volume shared with [Amazon Sagemaker Studio notebook](https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks.html) instances. Through the common file share, clinicians and data scientists can collaborate on the same data sets through different tools that they are familiar with. [Amazon AppStream 2.0](https://aws.amazon.com/appstream2/) is used to stream an image computing platform, named [3D Slicer](https://www.slicer.org/) for interactive medical image annotation for clinicians.
12 | 
13 | ![Architecture diagram](img/arch.png)
14 | 
15 | ## Objectives
16 | You will learn how to achieve AIAAs of medical images on AWS using MONAI Label through this workshop:
17 | 
18 | 1. You will deploy and configure MONAI Label server on AWS
19 | 1. Radiologists will be able to perform medical image annotations using 3D Slicer on Amazon AppStream 2.0
20 | 1. Data scientists will be able to build Deep Learning model using annotated images on Amazon SageMaker notebooks
21 | 
22 | ## Requirements
23 | 
24 | Get familiar with following AWS services:
25 | - [Amazon AppStream 2.0](https://aws.amazon.com/appstream2/)
26 | - [Amazon CloudFormation](https://aws.amazon.com/cloudformation/)
27 | - [Amazon SageMaker](https://aws.amazon.com/pm/sagemaker)
28 | - [Amazon EC2](https://aws.amazon.com/ec2/)
29 | - [Amazon EFS](https://aws.amazon.com/efs/)
30 | 
31 | You will use the open source [3D Slicer image viewer](https://www.slicer.org/) to visualize and annotate images in [Medical Segmentation Decathlon](https://registry.opendata.aws/msd/).
32 | 
33 | ## Cost Estimate
34 | 
35 | We’ve included a [cost estimate](https://calculator.aws/#/estimate?id=dfb2aa63e2eb7d53385c0290fb2602cc2d2e4630) for default compute and storage used in this workshop. You can modify the Amazon EC2 instance type and size on the AWS Pricing Calulator to the corresponding ones you selected to deploy this solution.
36 | 
37 | Note that the AWS Pricing Calulator estimates charges based on monthly and yearly intervals. If you’re only planning on using this workshop for a couple of hours, you can divide the “Total monthly” cost by 730 to get an approximate hourly running cost. For example, if the total monthly cost is $1167 then the hourly cost is $1167 ÷ 730 = $1.59 per hour.
38 | 
39 | ## MONAI Label Tutorial
40 | 
41 | [https://youtu.be/m2rYorVwXk4](https://youtu.be/m2rYorVwXk4)


--------------------------------------------------------------------------------
/workshops/Medical_Imaging_AI/img/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Medical_Imaging_AI/img/arch.png


--------------------------------------------------------------------------------
/workshops/Medical_Imaging_AI/source/requirements.txt:
--------------------------------------------------------------------------------
1 | monai-weekly[gdown, nibabel, tqdm, ignite]
2 | matplotlib
3 | psutil


--------------------------------------------------------------------------------
/workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/code/requirements.txt:
--------------------------------------------------------------------------------
1 | dgl==0.6.1
2 | dgllife==0.2.8
3 | rdkit-pypi==2021.09.3


--------------------------------------------------------------------------------
/workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/code/s3_downloaded_HIV_dataset.py:
--------------------------------------------------------------------------------
 1 | from dgllife.data.csv_dataset import MoleculeCSVDataset
 2 | from dgllife.utils import smiles_to_bigraph
 3 | from functools import partial
 4 | import pandas as pd
 5 | 
 6 | class S3DownloadedHIVDataset(MoleculeCSVDataset):
 7 | 
 8 |     def __init__(self, s3downloaded_dir,
 9 |                  node_featurizer=None,
10 |                  edge_featurizer=None,
11 |                  log_every=1000,
12 |                  n_jobs=1, mode='sm'):
13 | 
14 |         smiles_to_graph = partial(smiles_to_bigraph, add_self_loop=True)
15 |         load = False
16 |         cache_file_path = "./none.bin"
17 | 
18 |         df = pd.read_csv(s3downloaded_dir + "/full.csv")
19 | 
20 |         ### Check below if we are to ru nthe sagemaker
21 |         #if(mode == 'local'):
22 |         #    df = df.drop(columns=['activity'])
23 | 
24 |         super(S3DownloadedHIVDataset, self).__init__(df=df,
25 |                                 smiles_to_graph=smiles_to_graph,
26 |                                 node_featurizer=node_featurizer,
27 |                                 edge_featurizer=edge_featurizer,
28 |                                 smiles_column='smiles',
29 |                                 cache_file_path=cache_file_path,
30 |                                 load=load,
31 |                                 log_every=log_every,
32 |                                 init_mask=True,
33 |                                 n_jobs=n_jobs)
34 | 
35 |     def __getitem__(self, item):
36 |         """Get datapoint with index
37 | 
38 |         Parameters
39 |         ----------
40 |         item : int
41 |             Datapoint index
42 | 
43 |         Returns
44 |         -------
45 |         str
46 |             SMILES for the ith datapoint
47 |         DGLGraph
48 |             DGLGraph for the ith datapoint
49 |         Tensor of dtype float32 and shape (T)
50 |             Labels of the ith datapoint for all tasks. T for the number of tasks.
51 |         Tensor of dtype float32 and shape (T)
52 |             Binary masks of the ith datapoint indicating the existence of labels for all tasks.
53 |         str, optional
54 |             Raw screening result, which can be CI, CA, or CM.
55 |         """
56 |         return self.smiles[item], self.graphs[item], self.labels[item], self.mask[item]


--------------------------------------------------------------------------------
/workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/code/utils.py:
--------------------------------------------------------------------------------
 1 | from torch.nn import functional as F
 2 | 
 3 | def model_saved_path(base):
 4 |     return base + "/model.pth"
 5 | 
 6 | def model_params_saved_path(base):
 7 |     return base + '/model_params.json'
 8 | 
 9 | def load_model(args, node_featurizer, n_tasks=1):
10 | 
11 |     num_gnn_layers = len(args.gnn_hidden_feats)
12 |     model = None
13 |     if(args.gnn_model_name == 'GCN-p'):
14 |         from dgllife.model import GCNPredictor
15 |         model = GCNPredictor(
16 |             in_feats=node_featurizer.feat_size(),
17 |             hidden_feats=args.gnn_hidden_feats,
18 |             activation=[F.relu] * num_gnn_layers,
19 |             residual=[args.gnn_residuals] * num_gnn_layers,
20 |             batchnorm=[args.gnn_batchnorm] * num_gnn_layers,
21 |             dropout=[args.gnn_dropout] * num_gnn_layers,
22 |             predictor_hidden_feats=args.gnn_predictor_hidden_feats,
23 |             predictor_dropout=args.gnn_dropout,
24 |             n_tasks=n_tasks
25 |         )
26 |     elif (args.gnn_model_name == 'GAT-p'):
27 |         from dgllife.model import GATPredictor
28 |         model = GATPredictor(
29 |             in_feats=node_featurizer.feat_size(),
30 |             hidden_feats=args.gnn_hidden_feats,
31 |             num_heads=[args.gnn_num_heads] * num_gnn_layers,
32 |             feat_drops=[args.gnn_dropout] * num_gnn_layers,
33 |             attn_drops=[args.gnn_dropout] * num_gnn_layers,
34 |             alphas=[args.gnn_alphas] * num_gnn_layers,
35 |             residuals=[args.gnn_residuals] * num_gnn_layers,
36 |             predictor_hidden_feats=args.gnn_predictor_hidden_feats,
37 |             predictor_dropout=args.gnn_dropout,
38 |             n_tasks=n_tasks
39 |         )
40 |     return model
41 | 
42 | 
43 | def init_featurizers(featurizer_type):
44 |     node_feaurizer = None
45 |     edge_featurizer = None
46 |     if(featurizer_type == 'canonical'):
47 |         from dgllife.utils import CanonicalAtomFeaturizer
48 |         node_feaurizer = CanonicalAtomFeaturizer()
49 |     elif(featurizer_type == 'attentivefp'):
50 |         from dgllife.utils import AttentiveFPAtomFeaturizer
51 |         node_feaurizer = AttentiveFPAtomFeaturizer()
52 |     else:
53 |         raise ValueError(
54 |             "Expect featurizer_type to be in ['canonical', 'attentivefp'], "
55 |             "got {}".format(featurizer_type))
56 |     return node_feaurizer, edge_featurizer


--------------------------------------------------------------------------------
/workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/img/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/img/1.jpg


--------------------------------------------------------------------------------
/workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/requirements.txt:
--------------------------------------------------------------------------------
1 | dgl==0.9.1
2 | dgllife
3 | numpy==1.26.4
4 | pandas==2.2.2
5 | rdkit-pypi==2022.9.5
6 | sagemaker==2.224.4
7 | scikit-learn==1.5.1


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/.gitignore:
--------------------------------------------------------------------------------
1 | *.code-workspace
2 | .venv


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/README.md:
--------------------------------------------------------------------------------
 1 | # Manufacturing Document Comparison
 2 | 
 3 | ## Description
 4 | This project automates the deployment of a manufacturing document comparison application on AWS. It builds and pushes a Docker image to Amazon ECR and then uses CloudFormation to provision the required resources.
 5 | 
 6 | ## Prerequisites
 7 | - AWS CLI, configured for your account.
 8 | - Docker.
 9 | - A CloudFormation template named cf.yaml.
10 | - In the app.py set your password on line 276
11 | 
12 | ## Getting Started
13 | Clone the Repository
14 | 
15 | ```bash
16 | git clone <repository-url>
17 | cd <repository-directory>
18 | ```
19 | ## Configuration
20 | Adjust AWS CLI and Docker if not already set up. The script defaults to the us-east-1 region.
21 | 
22 | ## Build and Deploy
23 | Execute the script to build the Docker image, push it to ECR, and deploy your CloudFormation stack:
24 | 
25 | ```bash
26 | ./deploy.sh
27 | ```
28 | 
29 | ## Destroy
30 | ```bash
31 | ./destroy.sh
32 | ```
33 | 
34 | ## What the Script Does
35 | - Checks for the ECR repository; creates it if absent.
36 | - Logs into ECR.
37 | - Builds and tags the Docker image.
38 | - Pushes the image to ECR.
39 | - Deploys/updates the CloudFormation stack with the image URI.
40 | - Customize the Script
41 | 
42 | ## Modify these variables in deploy.sh as needed:
43 | - **IMAGE**: Docker image and ECR repository name.
44 | - **SERVICE_NAME**: Service name for CloudFormation.
45 | - **STACK_NAME**: CloudFormation stack name.
46 | 
47 | ## Additional Information
48 | Ensure cf.yaml is in the same directory as deploy.sh, or update the script with the correct path.
49 | 
50 | ## Troubleshooting
51 | Check AWS CLI credentials and CloudFormation console for errors.
52 | 
53 | # License
54 | Apache-2.0 License


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM --platform=linux/amd64 python:3.12
 2 | 
 3 | WORKDIR /deployment
 4 | 
 5 | COPY ./requirements.txt /deployment/requirements.txt
 6 | COPY ./images /deployment/images
 7 | 
 8 | #install Bedrock client
 9 | RUN cd /deployment/ &&  pip install --upgrade pip
10 | 
11 | RUN pip install --no-cache-dir --upgrade -r /deployment/requirements.txt
12 | 
13 | COPY app.py /deployment/
14 | 
15 | EXPOSE 8080
16 | 
17 | CMD ["python", "app.py"]
18 | 
19 | 


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/cf.yaml:
--------------------------------------------------------------------------------
  1 | # Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
  2 | # SPDX-License-Identifier: Apache-2.0
  3 | AWSTemplateFormatVersion: 2010-09-09
  4 | Description: CloudFormation template that deploys hello-app-runner app
  5 | Parameters:
  6 |   ServiceName:
  7 |     Type: String
  8 |     Description: Name for your App Runner service.
  9 |   ImageUri:
 10 |     Type: String
 11 |     Description: ImageUri
 12 | Resources:
 13 |   Service:
 14 |     Type: AWS::AppRunner::Service
 15 |     Properties:
 16 |       ServiceName: !Ref ServiceName
 17 |       SourceConfiguration:
 18 |         AuthenticationConfiguration:
 19 |           AccessRoleArn: !GetAtt AppRunnerDeployRole.Arn
 20 |         AutoDeploymentsEnabled: true
 21 |         ImageRepository:
 22 |           ImageIdentifier: !Ref ImageUri
 23 |           ImageRepositoryType: ECR
 24 |           ImageConfiguration:
 25 |             Port: 8080
 26 |       InstanceConfiguration:
 27 |         InstanceRoleArn: !GetAtt InstanceRole.Arn
 28 |         Cpu: 1024
 29 |         Memory: 2048
 30 | 
 31 |   AppRunnerDeployRole:
 32 |     Type: AWS::IAM::Role
 33 |     Properties:
 34 |       RoleName: !Sub ${ServiceName}-AppRunnerDeployRole
 35 |       Description: Role for App Runner instance
 36 |       AssumeRolePolicyDocument:
 37 |         Version: 2012-10-17
 38 |         Statement:
 39 |           - Effect: Allow
 40 |             Principal:
 41 |               Service:
 42 |                 - build.apprunner.amazonaws.com
 43 |             Action:
 44 |               - sts:AssumeRole
 45 |       Path: /
 46 |       Policies:
 47 |         - PolicyName: AllowAccessToAppRunner
 48 |           PolicyDocument:
 49 |             Version: 2012-10-17
 50 |             Statement:
 51 |               - Effect: Allow
 52 |                 Action:
 53 |                   - apprunner:*
 54 |                 Resource: '*'
 55 |         - PolicyName: AllowAccessToECR
 56 |           PolicyDocument:
 57 |             Version: 2012-10-17
 58 |             Statement:
 59 |               - Effect: Allow
 60 |                 Action:
 61 |                   - ecr:*
 62 |                 Resource: '*'
 63 |         - PolicyName: AllowAccessToLogs
 64 |           PolicyDocument:
 65 |             Version: 2012-10-17
 66 |             Statement:
 67 |               - Effect: Allow
 68 |                 Action:
 69 |                   - logs:*
 70 |                 Resource: '*'
 71 | 
 72 |   InstanceRole:
 73 |     Type: AWS::IAM::Role
 74 |     Properties:
 75 |       RoleName: !Sub ${ServiceName}-InstanceRole
 76 |       Description: Role for App Runner instance
 77 |       AssumeRolePolicyDocument:
 78 |         Version: 2012-10-17
 79 |         Statement:
 80 |           - Effect: Allow
 81 |             Principal:
 82 |               Service:
 83 |                 - tasks.apprunner.amazonaws.com
 84 |             Action:
 85 |               - sts:AssumeRole
 86 |       Path: /
 87 |       Policies:
 88 |         - PolicyName: AllowAccessToBedrock
 89 |           PolicyDocument:
 90 |             Version: 2012-10-17
 91 |             Statement:
 92 |               - Effect: Allow
 93 |                 Action:
 94 |                   - bedrock:InvokeModel
 95 |                 Resource: '*'
 96 | 
 97 | 
 98 | Outputs:
 99 |   Endpoint:
100 |     Description: "The endpoint of the App Runner service."
101 |     Value: !GetAtt Service.ServiceUrl


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | IMAGE=manufacturing_document_comparison
 4 | 
 5 | SERVICE_NAME=manufacturing_document_comparison
 6 | STACK_NAME=manufacturing-document-comparison
 7 | 
 8 | region=us-east-1
 9 | account=$(aws sts get-caller-identity --query Account --output text)
10 | export AWS_DEFAULT_REGION=${region}
11 | 
12 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${IMAGE}:latest"
13 | 
14 | # If the repository doesn't exist in ECR, create it.
15 | 
16 | aws ecr describe-repositories --repository-names "${IMAGE}" > /dev/null 2>&1
17 | 
18 | if [ $? -ne 0 ]
19 | then
20 |     aws ecr create-repository --repository-name "${IMAGE}" > /dev/null
21 | fi
22 | 
23 | # Get the login command from ECR and execute it directly
24 | aws ecr get-login-password --region "${region}" | docker login --username AWS --password-stdin "${account}".dkr.ecr."${region}".amazonaws.com
25 | 
26 | # Build the docker IMAGE locally with the IMAGE name and then push it to ECR
27 | # with the full name.
28 | 
29 | docker build -t ${IMAGE} .
30 | docker image tag ${IMAGE} ${fullname}
31 | 
32 | docker push ${fullname}
33 | 
34 | # Deploy the CloudFormation stack (create or update as necessary) and suppress the output
35 | aws cloudformation deploy \
36 |     --template-file cf.yaml \
37 |     --stack-name "${STACK_NAME}" \
38 |     --parameter-overrides ServiceName="${SERVICE_NAME}" ImageUri="${fullname}" \
39 |     --capabilities CAPABILITY_NAMED_IAM > /dev/null
40 | 
41 | # Check if the stack deploy command was successful
42 | if [ $? -eq 0 ]; then
43 |     echo "Stack ${STACK_NAME} has been created or updated successfully."
44 | else
45 |     echo "Error deploying stack ${STACK_NAME}."
46 | fi


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/destroy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | STACK_NAME=manufacturing-document-comparison
 4 | IMAGE=manufacturing_document_comparison
 5 | 
 6 | region=us-east-1
 7 | export AWS_DEFAULT_REGION=${region}
 8 | 
 9 | aws cloudformation delete-stack --stack-name "${STACK_NAME}"
10 | 
11 | aws ecr delete-repository --repository-name"${IMAGE}" --force > /dev/null 2>&1
12 | 
13 | echo "Stack ${STACK_NAME} has been destroyed successfully."


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/images/manufacturing_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/images/manufacturing_diagram.png


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio==4.26.0
2 | fastapi==0.110.1
3 | anthropic==0.23.1
4 | boto3==1.34.81


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/gradio_interface_test.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | 
 3 | def greet(name):
 4 |     return "Hello " + name + "!"
 5 | 
 6 | def greet2(name2):
 7 |     return "Hello " + name2 + "!"
 8 | 
 9 | demo = gr.Interface(fn=greet, inputs="text", outputs="text")
10 | demo = gr.Interface(fn=greet2, inputs="text", outputs="text")
11 | 
12 | demo.launch()   


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/penicillin_manufacturing.txt:
--------------------------------------------------------------------------------
 1 | Penicillin Manufacturing Protocal 
 2 | 
 3 | Materials: 
 4 | - Penicillium chrysogenum mold strain (ATCC 48271 or equivalent)
 5 | - Growth medium:
 6 | -- Corn steep liquor (5-10% w/v) 
 7 | -- Sucrose (2-5% w/v)
 8 | -- Ammonium sulfate (0.5-1% w/v) 
 9 | -- Potassium phosphate (0.1-0.5% w/v)
10 | -- Fermentation vessel (100-500L capacity)
11 | -- Centrifuge
12 | -- Rotary evaporator 
13 | -- Ion exchange resin (strongly acidic cation exchange resin)
14 | -- Activated charcoal 
15 | -- Reverse osmosis system
16 | -- Sterile 0.9% sodium chloride solution
17 | 
18 | Method:
19 | 1. Inoculate a slant or plate of P. chrysogenum and incubate at 25°C for 3-5 days until sporulation occurs.
20 | 2. Inoculate a starter culture of the growth medium with P. chrysogenum spores and incubate at 25°C for 2 days on a rotary shaker (200rpm) until a cell density of 1-5 x 107 CFU/mL is reached. 
21 | 3. Inoculate the fermentation vessel with 10% v/v of the starter culture. 
22 | 4. Incubate the fermentation vessel at 25°C for 5-7 days while aerating (1 vvm) and stirring (200rpm) until maximum penicillin titre is reached (100-500 IU/mL). 
23 | 5. Centrifuge the fermentation broth at 10000xg for 20 minutes to remove cells and debris.
24 | 6. Concentrate the supernatant using a rotary evaporator to remove excess water.
25 | 7. Pass the concentrate through an ion exchange resin to remove impurities.
26 | 8. Pass the concentrate through activated charcoal to remove pigments and odorous compounds.
27 | 9. Concentrate and wash the product using a reverse osmosis system. 
28 | 10. Re-suspend the product in sterile 0.9% sodium chloride solution to achieve a concentration of 100,000 IU penicillin G per mL.
29 | 11. Filter sterilize the product through a 0.22μm membrane and store at 2-8°C.
30 | 
31 | The final product will be a sterile aqueous solution of penicillin G potassium salt at a concentration of 100,000 IU/mL. Please note that additional purification steps may be required to produce pharmaceutical grade penicillin for human usage.
32 | 
33 | The first step is to obtain penicillin mold cultures, specifically of the Penicillium chrysogenum species. These mold cultures must be obtained from a reputable culture collection and grown on a culture medium in a sterile environment. 
34 | 
35 | Once active mold cultures have been established, the next step is to inoculate seed tanks containing a growth medium of lactose, corn steep liquor, ammonium sulfate, and other nutrients to promote mold growth. The inoculated seed tanks are incubated for 2 to 3 days to allow for growth of the mold.
36 | 
37 | After incubation, the seed tanks contain actively growing mold cultures that can be used to inoculate production fermenters. The production fermenters contain the same growth medium as the seed tanks but on a much larger scale, up to 100,000 liters. The production fermenters are inoculated with the seed tank mold cultures and incubated for 4 to 6 days to allow for large-scale penicillin production.
38 | 
39 | During fermentation, the mold cultures produce penicillin, which accumulates in the fermentation broth. The fermentation broth is harvested and goes through a multi-stage filtration process to separate out the mold cultures and other particulates. The filtered broth then goes through an extraction process, using organic solvents to extract crude penicillin.


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/requirements.txt:
--------------------------------------------------------------------------------
1 | gradio==4.26.0
2 | fastapi==0.110.1
3 | anthropic==0.23.1


--------------------------------------------------------------------------------
/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/sample_sop.txt:
--------------------------------------------------------------------------------
 1 | 1. All incubations must be less than 2 days.  
 2 | 2. All sodium chloride must be greater than .95% solutions
 3 | 3. No batch can exceed 500 liters in volume. 
 4 | 4. All filtration must use 0.2 micron filters or smaller.
 5 | 5. No raw material can be used after 6 months from receipt. 
 6 | 6. All equipment must be sterilized at 121°C for at least 15 minutes.
 7 | 7. No more than 2 different products can be manufactured in the same facility. 
 8 | 8. All surfaces must be wiped down with 70% isopropyl alcohol. 
 9 | 9. No batch record can have more than 10 deviations noted.
10 | 10. All finished products must have at least 2 years of shelf life remaining at time of release.


--------------------------------------------------------------------------------
/workshops/Process_HCLS_Docs_Using_AI_Services/data/sample_report_1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Process_HCLS_Docs_Using_AI_Services/data/sample_report_1.pdf


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/deploy_esm_to_inf2/scripts/inference.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import torch
 4 | import torch_neuronx
 5 | from transformers import AutoTokenizer
 6 | 
 7 | JSON_CONTENT_TYPE = "application/json"
 8 | # MODEL_ID = "facebook/esm2_t33_650M_UR50D"
 9 | # MODEL_ID = "facebook/esm2_t12_35M_UR50D"
10 | MODEL_ID = "facebook/esm2_t6_8M_UR50D"
11 | 
12 | 
13 | def model_fn(model_dir):
14 |     """Load the model from HuggingFace"""
15 |     print(f"torch-neuronx version is {torch_neuronx.__version__}")
16 |     tokenizer_init = AutoTokenizer.from_pretrained(MODEL_ID)
17 |     model_file = os.path.join(model_dir, "traced_esm.pt")
18 |     neuron_model = torch.jit.load(model_file)
19 |     return (neuron_model, tokenizer_init)
20 | 
21 | 
22 | def input_fn(serialized_input_data, content_type=JSON_CONTENT_TYPE):
23 |     """Process the request payload"""
24 | 
25 |     if content_type == JSON_CONTENT_TYPE:
26 |         input_data = json.loads(serialized_input_data)
27 |         return input_data.pop("inputs", input_data)
28 |     else:
29 |         raise Exception("Requested unsupported ContentType in Accept: " + content_type)
30 |         return
31 | 
32 | 
33 | def predict_fn(input_data, model_and_tokenizer):
34 |     """Run model inference"""
35 | 
36 |     model_bert, tokenizer = model_and_tokenizer
37 |     max_length = 128
38 |     tokenized_sequence = tokenizer.encode_plus(
39 |         input_data,
40 |         max_length=max_length,
41 |         padding="max_length",
42 |         truncation=True,
43 |         return_tensors="pt",
44 |     )
45 |     prediction_input = (
46 |         tokenized_sequence["input_ids"],
47 |         tokenized_sequence["attention_mask"],
48 |     )
49 |     output = neuron_model(*prediction_input)[0]
50 |     mask_token_index = (tokenized_sequence.input_ids == tokenizer.mask_token_id)[
51 |         0
52 |     ].nonzero(as_tuple=True)[0]
53 |     mask_index_predictions = output[0, mask_token_index]
54 |     sigmoid = torch.nn.Sigmoid()
55 |     probs = sigmoid(mask_index_predictions)
56 |     return {
57 |         list(tokenizer.get_vocab().keys())[idx]: round(v.item(), 3)
58 |         for idx, v in enumerate(probs[0])
59 |     }
60 | 
61 | 
62 | def output_fn(prediction_output, accept=JSON_CONTENT_TYPE):
63 |     """Process the response payload"""
64 |     if accept == JSON_CONTENT_TYPE:
65 |         return json.dumps(prediction_output), accept
66 | 
67 |     raise Exception("Requested unsupported ContentType in Accept: " + accept)
68 | 


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/deploy_esm_to_inf2/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url=https://pip.repos.neuron.amazonaws.com
2 | transformers
3 | torch-neuronx==1.13.1.1.10.1
4 | 


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/finetune_esm_on_deeploc/scripts/inference.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | 
 4 | from transformers import EsmForSequenceClassification, AutoTokenizer
 5 | import torch
 6 | 
 7 | 
 8 | def model_fn(model_dir):
 9 |     id2label = {0: "Non-Membrane", 1: "Membrane"}
10 |     label2id = {"Non-Membrane": 0, "Membrane": 1}
11 |     model = EsmForSequenceClassification.from_pretrained(
12 |         model_dir,
13 |         device_map="auto",
14 |         num_labels=2,
15 |         id2label=id2label,
16 |         label2id=label2id,
17 |     )
18 |     tokenizer = AutoTokenizer.from_pretrained(model_dir)
19 | 
20 |     return model, tokenizer
21 | 
22 | 
23 | def predict_fn(data, model_and_tokenizer):
24 |     model, tokenizer = model_and_tokenizer
25 |     model.eval()
26 |     inputs = data.pop("inputs", data)
27 |     encoding = tokenizer(inputs, return_tensors="pt")
28 |     encoding = {k: v.to(model.device) for k, v in encoding.items()}
29 |     results = model(**encoding)
30 |     sigmoid = torch.nn.Sigmoid()
31 |     probs = sigmoid(results.logits)
32 |     probs = probs.cpu()
33 |     return {"membrane_probability": probs[0][1].item()}
34 | 


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/finetune_esm_on_deeploc/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate==0.24.1
2 | bitsandbytes==0.41.1
3 | datasets==2.14.6
4 | evaluate==0.4.3
5 | nvidia-ml-py3==7.352.0
6 | peft==0.5.0
7 | scikit-learn==1.3.2
8 | transformers==4.34.1
9 | torchinfo==1.8.0


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/finetune_esm_on_oas/scripts/cuda/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.31.0
2 | datasets==2.14.2
3 | accelerate==0.21.0
4 | evaluate
5 | tensorboard


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/finetune_esm_on_oas/scripts/esm-accelerate-examples/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers
2 | datasets
3 | accelerate
4 | torchinfo
5 | bitsandbytes
6 | nvidia-ml-py3
7 | peft==0.4.0


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/finetune_esm_on_oas/scripts/neuron/requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url https://pip.repos.neuron.amazonaws.com
2 | transformers==4.31.0
3 | datasets==2.14.2
4 | accelerate==0.21.0
5 | torch-neuronx 
6 | evaluate


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/img/protein.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Protein_Language_Modelling/img/protein.png


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/pretrain_esm_on_uniref/scripts/processing/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate==0.25.0
2 | datasets==2.16.1
3 | pyfastx==2.0.2
4 | transformers==4.37.2


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/pretrain_esm_on_uniref/scripts/training/cuda/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate==0.26.1
2 | boto3==1.34.19
3 | botocore==1.34.19
4 | datasets==2.16.1
5 | sagemaker==2.203.1
6 | transformers==4.36.2
7 | nvidia-ml-py3==7.352.0
8 | torch==2.2.0 --index-url https://download.pytorch.org/whl/cu118


--------------------------------------------------------------------------------
/workshops/Protein_Language_Modelling/pretrain_esm_on_uniref/scripts/training/neuron/requirements.txt:
--------------------------------------------------------------------------------
1 | --extra-index-url https://pip.repos.neuron.amazonaws.com
2 | transformers==4.37.2 
3 | datasets==2.16.1
4 | evaluate==0.4.1
5 | neuronx-cc==2.* 
6 | --pre torch-neuronx==2.1.* 


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 4 | software and associated documentation files (the "Software"), to deal in the Software
 5 | without restriction, including without limitation the rights to use, copy, modify,
 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 7 | permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/640px-Gene_structure_eukaryote_2_annotated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/640px-Gene_structure_eukaryote_2_annotated.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/MLLC1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/MLLC1.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/MLLC2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/MLLC2.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/Trial-component-list.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/Trial-component-list.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/approve-prod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/approve-prod.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/brca_stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/brca_stats.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/charts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/charts.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/cloned_folders.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/cloned_folders.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/code-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/code-pipeline.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/create_project.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/create_project.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/deploy-stage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/deploy-stage.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/deployment_options.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/deployment_options.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/deployments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/deployments.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/exp-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/exp-1.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/exp-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/exp-2.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/exp-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/exp-3.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/exp-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/exp-4.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/experiments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/experiments.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/find-prod-deploy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/find-prod-deploy.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/jobs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/jobs.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/lineage_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/lineage_graph.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/metrics.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/mlflow-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/mlflow-diagram.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/model_registry.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/model_registry.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/overexpression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/overexpression.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/pipeline.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/pipeline_execution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/pipeline_execution.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/project-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/project-1.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/project-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/project-2.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/project-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/project-3.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/project-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/project-4.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/project_name.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/project_name.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/projects.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/projects.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/repo_defaults.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/repo_defaults.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/repositories.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/repositories.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/second-endpoint.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/second-endpoint.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/select-model-version.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/select-model-version.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/sidebar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/sidebar.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/sm-resources-tab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/sm-resources-tab.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/sm_experiments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/sm_experiments.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/tc-list-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/tc-list-2.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/template_build.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/template_build.jpg


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/template_deploy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/template_deploy.jpg


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/img/update-status.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/update-status.png


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/scripts/processing/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.35.16
2 | sagemaker==2.231.0
3 | 


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/scripts/rf_train/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.35.16
2 | sagemaker==2.231.0
3 | mlflow==2.13.2
4 | sagemaker-mlflow==0.1.0
5 | 


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/scripts/tf_train/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.35.16
2 | sagemaker==2.231.0
3 | mlflow==2.13.2
4 | sagemaker-mlflow==0.1.0
5 | 


--------------------------------------------------------------------------------
/workshops/RNAseq_Tertiary_Analysis/scripts/xgb_train/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.35.16
2 | sagemaker==2.231.0
3 | mlflow==2.13.2
4 | sagemaker-mlflow==0.1.0
5 | 


--------------------------------------------------------------------------------
/workshops/Sagemaker_Pipelines_Automated_Retraining/kick_off_pipeline_lambda.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import boto3
 3 | import logging
 4 | import os
 5 | import copy
 6 | 
 7 | 
 8 | # get environment variables
 9 | # name of bucket lambda gets notifications from
10 | NOTIFICATION_BUCKET_NAME = os.environ["NOTIFICATION_BUCKET_NAME"]
11 | SAGEMAKER_PIPELINE_NAME = os.environ["SAGEMAKER_PIPELINE_NAME"]
12 | 
13 | 
14 | def read_in_file_from_s3(bucketname, filename):
15 |     """reads in the file from S3 and returns the content from the body of the file"""
16 |     s3 = boto3.resource("s3")
17 |     obj = s3.Object(bucketname, filename)
18 |     body = obj.get()["Body"].read()
19 |     return body
20 | 
21 | 
22 | def convert_to_s3uri(bucketname, filename):
23 |     the_uri = f"s3://{bucketname}/{filename}"
24 |     return the_uri
25 | 
26 | 
27 | def kick_off_sagemaker_pipeline(pipelinename=None, s3uri=None):
28 |     client = boto3.client("sagemaker")
29 |     PipelineParameters = [
30 |         {"Name": "InputData", "Value": f"{s3uri}"},
31 |     ]
32 |     response = client.start_pipeline_execution(
33 |         PipelineName=pipelinename, PipelineParameters=PipelineParameters
34 |     )
35 |     return response
36 | 
37 | 
38 | def lambda_handler(event, context):
39 |     # uncomment to log event info
40 |     # logging.info(json.dumps(event))
41 | 
42 |     filename = event["Records"][0]["s3"]["object"]["key"]
43 |     filename_basename = os.path.basename(filename)
44 | 
45 |     the_s3uri = convert_to_s3uri(NOTIFICATION_BUCKET_NAME, filename)
46 |     the_response = kick_off_sagemaker_pipeline(
47 |         pipelinename=SAGEMAKER_PIPELINE_NAME, s3uri=the_s3uri
48 |     )
49 |     # logging.info(json.dumps(content_4))
50 |     # put_file_in_s3(f'''{filename_basename}_out''',json.dumps(content_4),OUTPUT_BUCKET_NAME)
51 | 
52 |     return {"statusCode": 200, "body": json.dumps("Hello from Lambda!")}
53 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 7 | 
 8 | ## [0.2.1]
 9 | 
10 | - Fixed a bug in the variant scoring strategy `pseudolikelihood_ratio` when `parallel_chains` was greater than 1.
11 | - Added the ability to save results (output sequences and scores, plus a few other tidbits) to a CSV file by calling `save_results()` on the DirectedEvolution object. 
12 | - Minor modification to `embeddings.py` to support pLMs using mixed precision.
13 | - Added unit tests for the `VariantScoring` class and a new unit test for the sampler to test saving results. 
14 | - Fixed a bug with `torch.softmax` in `utils.safe_logits_to_probs`.
15 | 
16 | ## [0.2] 
17 | 
18 | ### Major change - Variant Scoring
19 | 
20 | - The ability to change the expert variant scoring strategy has been added. There is now a class `VariantScoring` which can be configured with a `scoring_strategy` argument (currently supported: `attribute_value`, `pseudolikelihood_ratio`, and `mutant_marginal` (NEW)). Each expert has an instance of a `VariantScoring` class. It is defined in `evo_prot_grad.common.variant_scoring`.
21 | - The main entry point for instantiating an expert, `get_expert`, now has a `scoring_strategy` argument for configuring the expert.
22 | - The `use_without_wildtype` argument of the Expert class has been removed. Each scoring strategy normalizes the score with respect to the wildtype score, so this was superflous. If you want to instantiate an expert and use it outside of the DirectedEvolution class, you have to explicitly call `expert.init_wildtype(wt_seq)` before calling the expert to cache the wildtype score (see below).
23 | - `Expert` private class method `_model_output_to_scalar_score` has been removed in favor of a public facing method `get_model_output`. This method can be used to directly get expert scores for sequences. 
24 | - The `Expert` class no longer has a `wt_score` attribute. The wildtype score is now stored in the `VariantScoring` class (`wt_score_cache`).
25 | 
26 | ### Minor changes
27 | 
28 | - The `Expert` abstract class now publicly exposes the following methods: `init_wildtype`, for storing the wildtype string sequence and caching the WT score, `tokenize` for tokenizing a sequence, `get_model_output` which accepts a list of protein sequence strings and returns the one-hot encoded sequences and the expert model's predictions. 
29 | - Renamed `experts.base_experts.HuggingFaceExpert` to `experts.base_experts.ProteinLMExpert`
30 | - Improved error message reporting for `get_expert`
31 | - Upgraded `transformers[torch]` to `4.38.0`
32 | 
33 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to EvoProtGrad
 2 | 
 3 | We are excited for community contributions and are actively reviewing and accepting pull requests!
 4 | There are a variety of ways in which you can contribute beyond writing code.
 5 | This document provides a high-level overview of how you can get involved.
 6 | 
 7 | 
 8 | ## Asking Questions
 9 | 
10 | Have a question? Rather than opening an issue directly, please ask questions
11 | or post comments in [Q&A Discussions](https://github.com/NREL/EvoProtGrad/discussions/categories/q-a).
12 | The NREL team or other members of the community will assist. Your well-worded
13 | question will serve as a resource to others searching for help.
14 | 
15 | 
16 | ## Providing Feedback
17 | 
18 | Your comments and feedback are very welcome. Please post to
19 | [General Discussions](https://github.com/NREL/EvoProtGrad/discussions/categories/general)
20 | with lots of information and detail. It is beneficial to consider
21 | how someone else will understand your comments in order to make
22 | them most effective.
23 | 
24 | 
25 | ## Reporting Issues
26 | 
27 | Have you identified a reproducible problem in EvoProtGrad?
28 | Have a feature request? We want to hear about it! Here's how you can make
29 | reporting your issue as effective as possible.
30 | 
31 | ### Look For an Existing Issue
32 | 
33 | Before you create a new issue, please do a search  to see if
34 | the issue or feature request has already been filed.
35 | 
36 | If you find your issue already exists, make relevant comments and add your
37 | [reaction](https://github.com/blog/2119-add-reactions-to-pull-requests-issues-and-comments).
38 | Use a reaction in place of a "+1" comment:
39 | 
40 | - 👍 - upvote
41 | - 👎 - downvote
42 | 
43 | If you cannot find an existing issue that describes your bug or feature,
44 | create a new issue using the guidelines below.
45 | 
46 | ### Writing Good Bug Reports and Feature Requests
47 | 
48 | File a single issue per problem and feature request. Do not enumerate
49 | multiple bugs or feature requests in the same issue.
50 | 
51 | Do not add your issue as a comment to an existing issue unless it's for the
52 | identical input. Many issues look similar, but have different causes.
53 | 
54 | The more information you can provide, the more likely someone will
55 | be successful at reproducing the issue and finding a fix.
56 | 
57 | Please follow the issue template guidelines to include relevant information
58 | that will help in diagnosing the problem.
59 | 
60 | ### Final Checklist
61 | 
62 | Please remember to do the following:
63 | 
64 | - [ ] Search the issue repository to ensure your report is a new issue
65 | 
66 | - [ ] Recreate the issue with a minimally descriptive example
67 | 
68 | - [ ] Simplify your code around the issue to better isolate the problem
69 | 
70 | 
71 | ## Contributing Fixes
72 | 
73 | If you are interested in writing code to fix an issue or
74 | submit a new feature, let us know in
75 | [Ideas Discussions](https://github.com/NREL/EvoProtGrad/categories/ideas)!
76 | 
77 | Coming Soon - developer guidelines!
78 | 
79 | This doc takes heavy inspiration from [floris](https://github.com/NREL/floris/blob/main/CONTRIBUTING.md) (thanks!).
80 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2023, Alliance for Sustainable Energy, LLC
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | 3. Neither the name of the copyright holder nor the names of its
16 |    contributors may be used to endorse or promote products derived from
17 |    this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/__init__.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | from typing import Optional, Union
 3 | import torch.nn as nn
 4 | from transformers import PreTrainedTokenizerBase
 5 | from evo_prot_grad.experts.base_experts import Expert
 6 | from evo_prot_grad.common.tokenizers import ExpertTokenizer
 7 | from evo_prot_grad.common.sampler import DirectedEvolution
 8 | 
 9 | def get_expert(expert_name: str,
10 |                scoring_strategy: str,
11 |                temperature: float = 1.0,               
12 |                model: Optional[nn.Module] = None,
13 |                tokenizer: Optional[Union[ExpertTokenizer, PreTrainedTokenizerBase]] = None,
14 |                device: str = 'cpu') -> Expert:
15 |     """
16 |     Current supported expert types (to pass to argument `expert_name`):
17 |     
18 |         - `bert`
19 |         - `causallm`
20 |         - `esm`
21 |         - `evcouplings`
22 |         - `onehot_downstream_regression`
23 | 
24 |     Customize the expert by specifying the model and tokenizer. 
25 |     For example:
26 | 
27 |     ```python
28 |     from evo_prot_grad.experts import get_expert
29 |     from transformers import AutoTokenizer, EsmForMaskedLM
30 | 
31 |     expert = get_expert(
32 |         expert_name = 'esm',
33 |         model = EsmForMaskedLM.from_pretrained("facebook/esm2_t36_3B_UR50D"),
34 |         tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t36_3B_UR50D"),
35 |         scoring_strategy = 'mutant_marginal',
36 |         temperature = 1.0,
37 |         device = 'cuda'
38 |     )   
39 |     ```
40 | 
41 |     Args:
42 |         expert_name (str): Name of the expert to be used.
43 |         scoring_strategy (str): Approach for scoring variants that the expert will use.
44 |         temperature (float, optional): Temperature for the expert. Defaults to 1.0.
45 |         model (Optional[nn.Module], optional): Model to be used for the expert. Defaults to None.
46 |         tokenizer (Optional[Union[ExpertTokenizer, PreTrainedTokenizerBase]], optional): Tokenizer to be used for the expert. Defaults to None.
47 |         device (str, optional): Device to be used for the expert. Defaults to 'cpu'.
48 |     
49 |     Raises:
50 |         ValueError: If the expert name is not found.
51 | 
52 |     Returns:
53 |         expert (Expert): An instance of the expert.
54 |     """
55 |     try:
56 |         expert_mod = importlib.import_module(f"evo_prot_grad.experts.{expert_name}_expert")
57 |     except:
58 |         raise ValueError(f"Expert {expert_name} not found in evo_prot_grad.experts.")
59 |             
60 |     return expert_mod.build(
61 |         temperature = temperature,
62 |         scoring_strategy = scoring_strategy,
63 |         model = model,
64 |         tokenizer = tokenizer,
65 |         device = device,
66 |     )
67 |     


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/common/__init__.py


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/common/embeddings.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class IdentityEmbedding(nn.Module):
 6 |     """
 7 |     A module that does nothing except store
 8 |     the most recent one_hots tensor.
 9 |     """
10 |     def __init__(self):
11 |         super().__init__()
12 |         self.one_hots = None
13 | 
14 |     def forward(self, one_hots: torch.Tensor) -> torch.Tensor:
15 |         """ Cache the one_hots tensor and return it.
16 | 
17 |         Args:
18 |             one_hots (torch.Tensor): A torch.FloatTensor of shape [batch_size, max_sequence_len, vocab_size].
19 |         
20 |         Returns:
21 |             one_hots (torch.Tensor): The same one_hots tensor that was passed in.
22 |         """
23 |         self.one_hots = one_hots.requires_grad_()
24 |         return self.one_hots
25 | 
26 | 
27 | class OneHotEmbedding(nn.Module):
28 |     """Compute the embeddings for a sequence of amino acids.
29 |     Converts a sequence of amino acids to a sequence of one-hot vectors first.
30 |     Caches the one-hot tensors for computing gradients with respect to
31 |     the one-hot tensors.
32 |     """
33 |     def __init__(
34 |         self,
35 |         nn_embeddings: nn.Embedding
36 |     ):
37 |         super().__init__()
38 |         self.weight = nn_embeddings.weight
39 |         self.one_hots = None
40 |          
41 |     def forward(self, input_ids: torch.LongTensor) -> torch.Tensor:
42 |         """ Compute the embeddings for a sequence of amino acids, 
43 |         caching the one-hot tensors for computing gradients with respect to
44 |         the one-hot tensors.
45 |         
46 |         Args:
47 |             input_ids (torch.LongTensor): Amino acid sequences of shape [batch_size, max_sequence_len].
48 |         Returns:
49 |             embeddings (torch.FloatTensor): Amino acid embeddings of shape [batch_size, max_sequence_len, embedding_dim].
50 |         """
51 |         weights_dtype = self.weight.dtype  # could be float16 if using mixed precision
52 |         high_precision = torch.float32  # optionally float64 ??
53 |         # convert input_ids to one_hots
54 |         # one_hots is a torch.FloatTensor of shape [batch_size, max_sequence_len, vocab_size]
55 |         one_hots = torch.nn.functional.one_hot(input_ids, num_classes=self.weight.shape[0])
56 |         one_hots = one_hots.to(dtype=high_precision)  # Ensure one_hots are in float32 for gradient computation
57 |         # Cache the one_hots
58 |         self.one_hots = one_hots.requires_grad_()
59 |         # Compute the embeddings and convert back to low precision if necessary
60 |         embeddings = self.one_hots @ self.weight.to(dtype=high_precision)
61 |         embeddings = embeddings.to(dtype=weights_dtype)
62 |         return embeddings


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/common/tokenizers.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import torch 
 3 | from typing import List, Dict
 4 | 
 5 | 
 6 | class ExpertTokenizer(abc.ABC):
 7 |     """Base interface for custom Expert tokenizers.
 8 |     """
 9 |     def __init__(self, alphabet: List[str]) -> None:
10 |         """
11 |         Args:
12 |             alphabet (List[str]): A list of amino acid characters.
13 |         """
14 |         self.alphabet = alphabet
15 |         self.vocab_size = len(alphabet)
16 |         self.vocab = {aa: i for i, aa in enumerate(alphabet)}
17 | 
18 |     def get_vocab(self) -> Dict:
19 |         """Return the vocab, a mapping of amino acid characters to integers."""
20 |         return self.vocab
21 | 
22 |     @abc.abstractmethod
23 |     def __call__(self, seqs: List[str]) -> torch.FloatTensor:
24 |         """Convert seqs to one hot tensors.
25 | 
26 |         Args:
27 |             seqs (List[str]): A list of protein sequence strings of len [parallel_chains].
28 |         Returns:
29 |             ohs (torch.FloatTensor): of shape [parallel_chains, seq_len, vocab_size]
30 |         """
31 |         raise NotImplementedError()    
32 | 
33 |     @abc.abstractmethod
34 |     def decode(self, ohs: torch.Tensor) -> List[str]:
35 |         """Convert one-hot tensors back to a list of string sequences.
36 | 
37 |         Args:
38 |             ohs (torch.Tensor): shape [parallel_chains, seq_len, vocab_size]
39 |         Returns:
40 |             seqs (List[str]): A list of protein sequence strings of len [parallel_chains].
41 |         """
42 |         raise NotImplementedError()
43 |         
44 |         
45 |         
46 | class OneHotTokenizer(ExpertTokenizer):
47 |     """Converts a string of amino acids into one-hot tensors.
48 |     """
49 |     def __init__(self, alphabet: List[str]):
50 |         """
51 |         Args:
52 |             alphabet (List[str]): A list of amino acid characters.
53 |         """
54 |         super().__init__(alphabet)
55 | 
56 |     def __call__(self, seqs: List[str]) -> torch.FloatTensor:
57 |         """Convert seqs to one hot tensors.
58 |         Assumes each sequence is the same length. Handles sequences
59 |         with spaces between amino acids.
60 | 
61 |         Args:
62 |             seqs (List[str]): A list of protein sequence strings of len [parallel_chains].
63 |         Returns:
64 |             ohs (torch.FloatTensor): of shape [parallel_chains, seq_len, vocab_size]
65 |         """
66 |         # convert seqs to ints
67 |         seqs_ = [[self.vocab[aa] for aa in seq.upper() if aa != ' '] for seq in seqs]
68 |         # convert to tensor using torch.nn.functional.one_hot()
69 |         ohs = torch.nn.functional.one_hot(torch.LongTensor(seqs_), num_classes=self.vocab_size)
70 |         return ohs.float()
71 | 
72 | 
73 |     def decode(self, ohs: torch.Tensor) -> List[str]:
74 |         """Convert one-hot tensors back to a list of string sequences with 
75 |         a space between each amino acid.
76 | 
77 |         Args:
78 |             ohs (torch.Tensor): shape [parallel_chains, seq_len, vocab_size]
79 |         Returns:
80 |             seqs (List[str]): A list of protein sequence strings of len [parallel_chains].
81 |         """
82 |         ohs = ohs.argmax(dim=-1)
83 |         return [' '.join([self.alphabet[i] for i in oh]) for oh in ohs]
84 | 
85 | 
86 | #### Add new tokenizers here ####


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/__init__.py


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/amplify_expert.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from typing import Optional, List
 4 | from transformers import AutoModel, AutoTokenizer, PreTrainedTokenizerBase
 5 | from transformers.tokenization_utils_base import BatchEncoding
 6 | from evo_prot_grad.experts.base_experts import ProteinLMExpert
 7 | import evo_prot_grad.common.embeddings as embeddings
 8 | 
 9 | 
10 | class AmplifyExpert(ProteinLMExpert):
11 |     """Expert baseclass for HuggingFace protein language models from the Amplify family.
12 |     Implements abstract methods `_get_last_one_hots` and `tokenize`.
13 |     Swaps out the `encoder`(Embedding) layer
14 |     for a `evo_prot_grad.common.embeddings.OneHotEmbedding` layer.
15 |     """
16 | 
17 |     def __init__(
18 |         self,
19 |         temperature: float,
20 |         scoring_strategy: str,
21 |         model: Optional[nn.Module] = None,
22 |         tokenizer: Optional[PreTrainedTokenizerBase] = None,
23 |         device: str = "cuda",
24 |     ):
25 |         """
26 |         Args:
27 |             name (str): name of the expert model.
28 |             temperature (float): Temperature for sampling from the expert.
29 |             scoring_strategy (str): Approach for scoring variants that the expert will use.
30 |             model (nn.Module): The model to use for the expert. Defaults to Amplify model from chandar-lab/AMPLIFY_350M.
31 |             tokenizer (PreTrainedTokenizerBase): The tokenizer to use for the expert. Defaults to AutoTokenizer from chandar-lab/AMPLIFY_350M.
32 |             device (str): The device to use for the expert. Defaults to 'cpu'.
33 |         Raises:
34 |             ValueError: If either `model` or `tokenizer` is not specified.
35 |         """
36 |         if model is None and tokenizer is None:
37 |             model = AutoModel.from_pretrained(
38 |                 "chandar-lab/AMPLIFY_350M", trust_remote_code=True
39 |             )
40 |             tokenizer = AutoTokenizer.from_pretrained(
41 |                 "chandar-lab/AMPLIFY_350M", trust_remote_code=True
42 |             )
43 |         elif model is None or tokenizer is None:
44 |             raise ValueError(
45 |                 "AmplifyExpert requires both `model` and `tokenizer` to be specified."
46 |             )
47 |         vocab = tokenizer.get_vocab()
48 |         super().__init__(temperature, model, vocab, scoring_strategy, device)
49 |         self.tokenizer = tokenizer
50 |         self.model.encoder = embeddings.OneHotEmbedding(model.encoder)
51 | 
52 |     def _get_last_one_hots(self) -> torch.Tensor:
53 |         """Returns the one-hot tensors *most recently passed* as input."""
54 |         return self.model.encoder.one_hots
55 | 
56 |     def tokenize(self, inputs: List[str]) -> BatchEncoding:
57 |         """Convert inputs to a format suitable for the model.
58 | 
59 |         Args:
60 |             inputs (List[str]): A list of protein sequence strings of len [parallel_chains].
61 |         Returns:
62 |             batch_encoding (BatchEncoding): A BatchEncoding object.
63 |         """
64 |         # Remove all spaces between amino acids
65 |         inputs = [seq.replace(" ", "") for seq in inputs]
66 |         return self.tokenizer(
67 |             inputs,
68 |             add_special_tokens=False,
69 |             return_tensors="pt",
70 |             return_attention_mask=False,
71 |         ).to(self.device)
72 | 
73 | 
74 | def build(**kwargs):
75 |     """Builds a AmplifyExpert."""
76 |     return AmplifyExpert(**kwargs)
77 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/bert_expert.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | import re
 3 | import torch
 4 | import torch.nn as nn
 5 | from transformers import PreTrainedTokenizerBase
 6 | from transformers import BertForMaskedLM, BertTokenizer
 7 | from transformers.tokenization_utils_base import BatchEncoding
 8 | from evo_prot_grad.experts.base_experts import ProteinLMExpert
 9 | import evo_prot_grad.common.embeddings as embeddings
10 | 
11 | 
12 | class BERTExpert(ProteinLMExpert):
13 |     """Expert sub-class for BERT-style HuggingFace protein language models.
14 |     Implements abstract methods `_get_last_one_hots` and `tokenize`.
15 |     Swaps out the `BertForMaskedLM.bert.embeddings.word_embeddings` layer
16 |     for a `evo_prot_grad.common.embeddings.OneHotEmbedding` layer. 
17 |     """
18 |     def __init__(self, 
19 |                  temperature: float,
20 |                  scoring_strategy: str,
21 |                  model: Optional[nn.Module] = None,
22 |                  tokenizer: Optional[PreTrainedTokenizerBase] = None,
23 |                  device: str = 'cpu'):
24 |         """
25 |         Args:
26 |             temperature (float): Temperature for sampling from the expert.
27 |             scoring_strategy (str): Approach for scoring variants that the expert will use.
28 |             model (nn.Module): The model to use for the expert.
29 |             tokenizer (PreTrainedTokenizerBase): The tokenizer to use for the expert. 
30 |             device (str): The device to use for the expert. 
31 |         Raises:
32 |             ValueError: If either `model` or `tokenizer` is not specified.
33 |         """
34 |         if model is None and tokenizer is None:
35 |             model = BertForMaskedLM.from_pretrained("Rostlab/prot_bert")
36 |             tokenizer = BertTokenizer.from_pretrained("Rostlab/prot_bert", do_lower_case=False)
37 |         elif model is None or tokenizer is None:
38 |             raise ValueError("BERTExpert requires both `model` and `tokenizer` to be specified.")  
39 |         super().__init__(
40 |             temperature,
41 |             model,
42 |             tokenizer.get_vocab(),
43 |             scoring_strategy,
44 |             device)
45 |         self.tokenizer = tokenizer
46 |         self.model.bert.embeddings.word_embeddings = embeddings.OneHotEmbedding(model.bert.embeddings.word_embeddings)
47 | 
48 | 
49 |     def _get_last_one_hots(self) -> torch.Tensor:
50 |         """ Returns the one-hot tensors *most recently passed* as input.
51 | 
52 |         Returns:
53 |             one_hots (torch.Tensor): of shape [parallel_chains, seq_len, vocab_size]
54 |         """
55 |         return self.model.bert.embeddings.word_embeddings.one_hots
56 | 
57 | 
58 |     def tokenize(self, inputs) -> BatchEncoding:
59 |         """Convert inputs to a format suitable for the model.
60 |         
61 |         Args:
62 |             inputs (List[str]): A list of protein sequence strings of len [parallel_chains].
63 |         Returns:
64 |             batch_encoding (BatchEncoding): A BatchEncoding object.
65 |         """
66 |         inputs = [re.sub(r"[UZOB]", "X", inputs_) for inputs_ in inputs]
67 |         return self.tokenizer(inputs, return_tensors='pt').to(self.device)
68 |         
69 | 
70 | def build(**kwargs):
71 |     """Builds a BERTExpert."""
72 |     return BERTExpert(**kwargs)


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/causallm_expert.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List
 2 | import torch.nn as nn
 3 | from transformers import PreTrainedTokenizerBase
 4 | from transformers import AutoModelForCausalLM, AutoTokenizer
 5 | from transformers.tokenization_utils_base import BatchEncoding
 6 | from evo_prot_grad.experts.base_experts import ProteinLMExpert
 7 | import evo_prot_grad.common.embeddings as embeddings
 8 | 
 9 | 
10 | class CausalLMExpert(ProteinLMExpert):
11 |     """Expert sub-class for autoregressive (causal) HuggingFace protein language models.
12 |     Implements abstract methods `_get_last_one_hots` and `tokenize`.
13 |     Swaps out the `AutoModelForCausalLM.transformer.embedding` layer
14 |     for a `evo_prot_grad.common.embeddings.OneHotEmbedding` layer. 
15 |     """
16 |     def __init__(self,
17 |                     temperature: float,
18 |                     scoring_strategy: str,
19 |                     model: Optional[nn.Module] = None,
20 |                     tokenizer: Optional[PreTrainedTokenizerBase] = None,
21 |                     device: str = 'cpu'):
22 |         """
23 |         Args:
24 |             temperature (float): Temperature for sampling from the expert.
25 |             scoring_strategy (str): Approach for scoring variants that the expert will use.
26 |             model (nn.Module): The model to use for the expert. Defaults to AutoModelForCausalLM from lightonai/RITA_s.
27 |             tokenizer (PreTrainedTokenizerBase): The tokenizer to use for the expert. Defaults to AutoTokenizer from lightonai/RITA_s.
28 |             device (str): The device to use for the expert. Defaults to 'cpu'.
29 |         Raises:
30 |             ValueError: If either `model` or `tokenizer` is not specified.
31 |         """
32 |         if model is None and tokenizer is None:
33 |             model = AutoModelForCausalLM.from_pretrained("lightonai/RITA_s", trust_remote_code=True)
34 |             tokenizer = AutoTokenizer.from_pretrained("lightonai/RITA_s", )
35 |         elif model is None or tokenizer is None:
36 |             raise ValueError("CausalLMExpert requires both `model` and `tokenizer` to be specified.")
37 |         vocab = tokenizer.get_vocab()
38 |         if '<unk>' in vocab:
39 |             vocab.pop('<unk>')
40 |         super().__init__(
41 |             temperature = temperature,
42 |             model = model,
43 |             vocab = vocab,
44 |             scoring_strategy = scoring_strategy,
45 |             device = device
46 |         )
47 |         self.tokenizer = tokenizer
48 |         self.model.transformer.embedding = embeddings.OneHotEmbedding(model.transformer.embedding)
49 | 
50 | 
51 |     def _get_last_one_hots(self):
52 |         """ Returns the one-hot tensors *most recently passed* as input.
53 |         """
54 |         return self.model.transformer.embedding.one_hots
55 | 
56 | 
57 |     def tokenize(self, inputs: List[str]) -> BatchEncoding:
58 |         """Convert inputs to a format suitable for the model.
59 |         
60 |         Args:
61 |             inputs (List[str]): A list of protein sequence strings of len [parallel_chains].
62 |         Returns:
63 |             batch_encoding (BatchEncoding): A BatchEncoding object.
64 |         """
65 |         # Remove all spaces between amino acids 
66 |         inputs = [seq.replace(' ', '') for seq in inputs]
67 |         return self.tokenizer(inputs, add_special_tokens=False, return_tensors="pt").to(self.device)
68 | 
69 | 
70 | def build(**kwargs):
71 |     """Builds a RitaExpert."""
72 |     return CausalLMExpert(**kwargs)


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/esm_downstream_regression_expert.py:
--------------------------------------------------------------------------------
 1 | from evo_prot_grad.experts.base_experts import AttributeExpert
 2 | import evo_prot_grad.common.utils as utils
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from typing import Optional, List, Tuple
 7 | from transformers import AutoTokenizer, PreTrainedTokenizerBase
 8 | import evo_prot_grad.common.embeddings as embeddings
 9 | from transformers.tokenization_utils_base import BatchEncoding
10 | from transformers import DataCollatorForLanguageModeling
11 | 
12 | 
13 | class EsmDownstreamRegressionExpert(AttributeExpert):
14 |     """ESM2 regression expert."""
15 | 
16 |     def __init__(
17 |         self,
18 |         temperature: float,
19 |         scoring_strategy: str,
20 |         model: nn.Module,
21 |         tokenizer: PreTrainedTokenizerBase,
22 |         device: str,
23 |     ):
24 |         """
25 |         Args:
26 |             temperature (float): Temperature for sampling from the expert.
27 |             scoring_strategy (str): Approach for scoring variants that the expert will use.
28 |             model (Module): The model to use for the expert.
29 |             tokenizer (PreTrainedTokenizerBase): The tokenizer to use for the expert.
30 |             device (str): The device to use for the expert.
31 |         """
32 |         if (model is None) or (tokenizer is None):
33 |             raise ValueError(
34 |                 "ESM2 Regression Expert requires both `model` and `tokenizer` to be specified."
35 |             )
36 | 
37 |         assert scoring_strategy == "attribute_value"
38 |         super().__init__(temperature, model, scoring_strategy, device, tokenizer)
39 |         self.tokenizer = tokenizer
40 |         self.model.esm.embeddings.word_embeddings = embeddings.OneHotEmbedding(
41 |             model.esm.embeddings.word_embeddings
42 |         )
43 | 
44 |     def _get_last_one_hots(self) -> torch.Tensor:
45 |         """Returns the one-hot tensors *most recently passed* as input."""
46 |         return self.model.esm.embeddings.word_embeddings.one_hots
47 | 
48 |     def tokenize(self, inputs: List[str]) -> BatchEncoding:
49 |         """Convert inputs to a format suitable for the model.
50 | 
51 |         Args:
52 |             inputs (List[str]): A list of protein sequence strings of len [parallel_chains].
53 |         Returns:
54 |             batch_encoding (BatchEncoding): A BatchEncoding object.
55 |         """
56 |         return self.tokenizer(inputs, add_special_tokens=False, return_tensors="pt").to(
57 |             self.device
58 |         )
59 | 
60 |     def get_model_output(self, inputs: List[str]) -> Tuple[torch.Tensor, torch.Tensor]:
61 |         """Returns both the onehot-encoded inputs and model's predictions.
62 | 
63 |         Args:
64 |             inputs (List[str]): A list of protein sequence strings of len [parallel_chains].
65 |         Returns:
66 |             x_oh: (torch.Tensor) of shape [parallel_chains, seq_len, vocab_size]
67 |             attribute_values: (torch.Tensor) of shape [parallel_chains, seq_len, vocab_size]
68 |         """
69 |         encoded_inputs = self.tokenize(inputs)
70 |         attribute_values = self.model(**encoded_inputs).logits.squeeze()
71 |         x_oh = self._get_last_one_hots()
72 |         return x_oh, attribute_values
73 | 
74 | 
75 | def build(**kwargs):
76 |     """Builds a EsmDownstreamRegressionExpert."""
77 |     return EsmDownstreamRegressionExpert(**kwargs)
78 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/esm_expert.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from typing import Optional, List
 4 | from transformers import AutoTokenizer, PreTrainedTokenizerBase
 5 | from transformers import EsmForMaskedLM
 6 | from transformers.tokenization_utils_base import BatchEncoding
 7 | from evo_prot_grad.experts.base_experts import ProteinLMExpert
 8 | import evo_prot_grad.common.embeddings as embeddings
 9 | 
10 | 
11 | class EsmExpert(ProteinLMExpert):
12 |     """Expert baseclass for HuggingFace protein language models from the ESM family.
13 |     Implements abstract methods `_get_last_one_hots` and `tokenize`.
14 |     Swaps out the `EsmForMaskedLM.esm.embeddings.word_embeddings` layer
15 |     for a `evo_prot_grad.common.embeddings.OneHotEmbedding` layer.
16 |     """
17 | 
18 |     def __init__(
19 |         self,
20 |         temperature: float,
21 |         scoring_strategy: str,
22 |         model: Optional[nn.Module] = None,
23 |         tokenizer: Optional[PreTrainedTokenizerBase] = None,
24 |         device: str = "cpu",
25 |     ):
26 |         """
27 |         Args:
28 |             temperature (float): Temperature for sampling from the expert.
29 |             scoring_strategy (str): Approach for scoring variants that the expert will use.
30 |             model (nn.Module): The model to use for the expert. Defaults to EsmForMaskedLM from facebook/esm2_t6_8M_UR50D.
31 |             tokenizer (PreTrainedTokenizerBase): The tokenizer to use for the expert. Defaults to AutoTokenizer from facebook/esm2_t6_8M_UR50D.
32 |             device (str): The device to use for the expert. Defaults to 'cpu'.
33 |         Raises:
34 |             ValueError: If either `model` or `tokenizer` is not specified.
35 |         """
36 |         if model is None and tokenizer is None:
37 |             model = EsmForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D")
38 |             tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t6_8M_UR50D")
39 |         elif model is None or tokenizer is None:
40 |             raise ValueError(
41 |                 "EsmExpert requires both `model` and `tokenizer` to be specified."
42 |             )
43 |         super().__init__(
44 |             temperature, model, tokenizer.get_vocab(), scoring_strategy, device
45 |         )
46 |         self.tokenizer = tokenizer
47 |         self.model.esm.embeddings.word_embeddings = embeddings.OneHotEmbedding(
48 |             model.esm.embeddings.word_embeddings
49 |         )
50 | 
51 |     def _get_last_one_hots(self) -> torch.Tensor:
52 |         """Returns the one-hot tensors *most recently passed* as input."""
53 |         return self.model.esm.embeddings.word_embeddings.one_hots
54 | 
55 |     def tokenize(self, inputs: List[str]) -> BatchEncoding:
56 |         """Convert inputs to a format suitable for the model.
57 | 
58 |         Args:
59 |             inputs (List[str]): A list of protein sequence strings of len [parallel_chains].
60 |         Returns:
61 |             batch_encoding (BatchEncoding): A BatchEncoding object.
62 |         """
63 |         return self.tokenizer(inputs, add_special_tokens=False, return_tensors="pt").to(
64 |             self.device
65 |         )
66 | 
67 | 
68 | def build(**kwargs):
69 |     """Builds a Esm2Expert."""
70 |     return EsmExpert(**kwargs)
71 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/evcouplings_expert.py:
--------------------------------------------------------------------------------
 1 | from evo_prot_grad.experts.base_experts import Expert
 2 | from evo_prot_grad.common.tokenizers import OneHotTokenizer
 3 | import evo_prot_grad.common.utils as utils
 4 | import evo_prot_grad.models.potts as potts
 5 | from typing import List, Tuple, Optional
 6 | import torch
 7 | 
 8 | 
 9 | class EVCouplingsExpert(Expert):
10 |     """Expert class for EVCouplings Potts models.
11 |     EVCouplings lib uses the canonical alphabet by default.
12 | 
13 |     Implements abstract methods `_get_last_one_hots`, `tokenize`, `get_model_output`, `__call__`. 
14 |     """
15 |     def __init__(self, 
16 |                  temperature: float,
17 |                  scoring_strategy: str,
18 |                  model: potts.EVCouplings,
19 |                  device: str,
20 |                  tokenizer: Optional[OneHotTokenizer] = None):
21 |         """
22 |         Args:
23 |             temperature (float): Temperature for sampling from the expert.
24 |             scoring_strategy (str): Approach for scoring variants that the expert will use.
25 |             model (potts.EVCouplings): The model to use for the expert.
26 |             device (str): The device to use for the expert.
27 |             tokenizer (Optional[OneHotTokenizer]): The tokenizer to use for the expert. If None, uses
28 |                     OneHotTokenizer(utils.CANONICAL_ALPHABET, device).
29 |         """
30 |         assert model is not None, "EVCouplingsExpert requires a potts.EVCouplings model to be provided."
31 |         assert scoring_strategy == "attribute_value"
32 |         if tokenizer is None:
33 |             tokenizer = OneHotTokenizer(utils.CANONICAL_ALPHABET)
34 |         super().__init__(temperature,
35 |                          model, 
36 |                          tokenizer.get_vocab(),
37 |                          scoring_strategy,
38 |                          device=device)
39 |         assert model.alphabet == self.alphabet, \
40 |             f"EVcouplings alphabet {model.alphabet} should match our canonical alphabet {self.alphabet}"
41 |         self.tokenizer = tokenizer
42 |     
43 |     ####### "Abstract" methods #######
44 | 
45 |     def _get_last_one_hots(self) -> torch.Tensor:
46 |         return self.model.one_hot_embedding.one_hots
47 | 
48 | 
49 |     def tokenize(self, inputs: List[str]) -> torch.FloatTensor:
50 |         return self.tokenizer(inputs).to(self.device)
51 |     
52 |     
53 |     def get_model_output(self, inputs: List[str]) -> Tuple[torch.Tensor, torch.Tensor]:
54 |         encoded_inputs = self.tokenize(inputs)
55 |         hamiltonian = self.model(encoded_inputs)
56 |         oh = self._get_last_one_hots()
57 |         return oh, hamiltonian
58 |     
59 | 
60 |     def __call__(self, inputs: List[str]) -> Tuple[torch.Tensor, torch.Tensor]:
61 |         """Compute the wildtype-normalized Hamiltonian expert score.
62 |         Args:
63 |             inputs (List[str]): A list of protein sequence strings of len [parallel_chains].
64 |         Returns:
65 |             oh (torch.Tensor): of shape [parallel_chains, seq_len, vocab_size]
66 |             expert_score (torch.Tensor): of shape [parallel_chains]
67 |         """
68 |         oh, hamiltonian = self.get_model_output(inputs)
69 |         score = self.variant_scoring(oh, hamiltonian, self._wt_oh)
70 |         return oh, score 
71 |     
72 |     
73 | def build(**kwargs):
74 |     return EVCouplingsExpert(**kwargs)


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/onehot_downstream_regression_expert.py:
--------------------------------------------------------------------------------
 1 | from evo_prot_grad.experts.base_experts import AttributeExpert
 2 | from evo_prot_grad.common.tokenizers import OneHotTokenizer
 3 | import evo_prot_grad.common.utils as utils
 4 | from torch.nn import Module
 5 | from typing import Optional
 6 | 
 7 | 
 8 | class OneHotDownstreamRegressionExpert(AttributeExpert):
 9 |     """ Basic one-hot regression expert."""
10 |     def __init__(self, 
11 |                  temperature: float,
12 |                  scoring_strategy: str,
13 |                  model: Module,
14 |                  device: str,
15 |                  tokenizer: Optional[OneHotTokenizer] = None):
16 |         """
17 |         Args:
18 |             temperature (float): Temperature for sampling from the expert.
19 |             scoring_strategy (str): Approach for scoring variants that the expert will use.
20 |             model (Module): The model to use for the expert.
21 |             device (str): The device to use for the expert.
22 |             tokenizer (Optional[OneHotTokenizer], optional): The tokenizer to use for the expert. If None,
23 |                 a OneHotTokenizer will be constructed. Defaults to None.
24 |         """
25 |         if tokenizer is None:
26 |             tokenizer = OneHotTokenizer(utils.CANONICAL_ALPHABET)
27 |         assert scoring_strategy == "attribute_value"
28 |         super().__init__(temperature,
29 |                         model,
30 |                         scoring_strategy,
31 |                         device,
32 |                         tokenizer)
33 |         
34 | 
35 | def build(**kwargs):
36 |     """Builds a OneHotDownstreamExpert."""
37 |     return OneHotDownstreamRegressionExpert(**kwargs)


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/models/downstream_cnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class OneHotCNN(nn.Module):
 7 |     """A CNN that takes one-hot encoded sequences as input.
 8 | 
 9 |     OneHotCNN uses 1D convolution over the one-hot encoding dimension
10 |     to embed each amino acid into a vector of size matching the 
11 |     sequence length, and uses length max-pooling (1D max-pooling on
12 |     the sequence length dimension) to reduce this dimension to 1.
13 |     The output is then fed through a linear layer to produce a single scalar output.
14 |     """
15 |     def __init__(self, vocab_size: int, kernel_size: int,
16 |                  input_size: int, dropout=0.0):
17 |         """
18 |         Args:
19 |             vocab_size (int): the size of the vocabulary (e.g., 20).
20 |             kernel_size (int): the size of the convolutional kernel
21 |             input_size (int): the size of the input embedding
22 |             dropout (float): the dropout probability
23 |         """
24 |         super().__init__()
25 |         self.encoder = nn.Conv1d(vocab_size, input_size,
26 |                                  kernel_size=kernel_size)
27 |         self.embedding = nn.Sequential(
28 |             nn.Linear(input_size, input_size*2),
29 |             nn.ReLU(True)
30 |         )
31 |         self.decoder = nn.Linear(input_size*2, 1)
32 |         self.n_tokens = vocab_size
33 |         self.dropout = nn.Dropout(dropout)
34 |         self.input_size = input_size
35 |     
36 | 
37 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
38 |         """
39 |         Args:
40 |             x (torch.Tensor): one-hot tensor of shape [parallel_chains, seq_len, vocab_size]
41 |         Returns:
42 |             output (torch.Tensor): shape [parallel_chains]
43 |         """
44 |         # encode
45 |         x = F.relu(self.encoder(x.transpose(1,2)).transpose(1,2))
46 |         # embed
47 |         x = self.embedding(x)
48 |         # length-dim pool
49 |         x  = torch.max(x, dim=1)[0]
50 |         x = self.dropout(x)
51 |         # decoder
52 |         output = self.decoder(x)
53 |         return output.squeeze(1) # [parallel_chains]


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers[torch]==4.38.0
2 | pandas


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/EvoProtGrad/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open('requirements.txt', 'r') as f:
 4 |     requirements = f.read().splitlines()
 5 | 
 6 | with open('README.md', 'r') as f:
 7 |     readme = f.read()
 8 | 
 9 | setup(name='evo_prot_grad',
10 |       version='0.2.1',
11 |       description='Directed evolution of proteins with fast gradient-based discrete MCMC.',
12 |       author='Patrick Emami',
13 |       author_email='Patrick.Emami@nrel.gov',
14 |       url='https://github.nrel.gov/NREL/EvoProtGrad/',
15 |       python_requires='>=3.8',
16 |       install_requires=requirements,
17 |       long_description=readme,
18 |       long_description_content_type='text/markdown',
19 |       packages=find_packages(include=['evo_prot_grad',
20 |                                       'evo_prot_grad.common',
21 |                                       'evo_prot_grad.experts',
22 |                                       'evo_prot_grad.models'],
23 |                              exclude=['test']),
24 |       license='BSD 3-Clause',
25 |       keywords=['protein engineering', 'directed evolution', 'huggingface', 'protein language models', 'mcmc'],
26 |       classifiers=[
27 |             "Development Status :: 3 - Alpha",
28 |             "Intended Audience :: Science/Research",
29 |             "License :: OSI Approved :: BSD License",
30 |             "Natural Language :: English",
31 |             "Programming Language :: Python :: 3",
32 |             "Topic :: Scientific/Engineering :: Artificial Intelligence",
33 |         ]      
34 | )
35 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/active_learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/active_learning.png


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/dmtl.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/dmtl.png


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/elements.txt:
--------------------------------------------------------------------------------
 1 | 
 2 |    1A   2A                                         3A  4A  5A  6A  7A  8A
 3 |   -----                                                               -----
 4 | 1 | H |                                                               |He |
 5 |   |---+----                                       --------------------+---|
 6 | 2 |Li |Be |                                       | B | C | N | O | F |Ne |
 7 |   |---+---|                                       |---+---+---+---+---+---|
 8 | 3 |Na |Mg |3B  4B  5B  6B  7B |    8B     |1B  2B |Al |Si | P | S |Cl |Ar |
 9 |   |---+---+---------------------------------------+---+---+---+---+---+---|
10 | 4 | K |Ca |Sc |Ti | V |Cr |Mn |Fe |Co |Ni |Cu |Zn |Ga |Ge |As |Se |Br |Kr |
11 |   |---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---|
12 | 5 |Rb |Sr | Y |Zr |Nb |Mo |Tc |Ru |Rh |Pd |Ag |Cd |In |Sn |Sb |Te | I |Xe |
13 |   |---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---|
14 | 6 |Cs |Ba |LAN|Hf |Ta | W |Re |Os |Ir |Pt |Au |Hg |Tl |Pb |Bi |Po |At |Rn |
15 |   |---+---+---+------------------------------------------------------------
16 | 7 |Fr |Ra |ACT|
17 |   ===--------------------------------------------------------------------===
18 |    Lanthanide |La |Ce |Pr |Nd |Pm |Sm |Eu |Gd |Tb |Dy |Ho |Er |Tm |Yb |Lu |
19 |               |---+---+---+---+---+---+---+---+---+---+---+---+---+---+---|
20 |    Actinide   |Ac |Th |Pa | U |Np |Pu |Am |Cm |Bk |Cf |Es |Fm |Md |No |Lw |
21 |               -------------------------------------------------------------
22 |   


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/evo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/evo.png


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/flame.txt:
--------------------------------------------------------------------------------
 1 | 
 2 |                                __              []
 3 |                                ||              []
 4 |                                ||              []
 5 |                                ||              []
 6 |                             __ ||              []
 7 |                             || ||              []
 8 |                           .-||-||-.            []  /\
 9 |                          _\_______/_===========[]=(-o)
10 |                           )\_____/(            []  \/
11 |                          /     ||  \           []
12 |                         /      ||   \          []
13 |                        /       ||    \         []
14 |                       /~~~~~~~~~~~~~~~\        []
15 |                      /         ::      \       []
16 |                     (          ::       )      []
17 |                      `-----------------'       []
18 |                              )                 []
19 |                            (   )               []
20 |                              )( . (            []
21 |                           .) @@)   )           []
22 |                        ` ) @@(@@)@             []
23 |                          (@@(@@)@              []
24 |                           @(@.@)@@             []
25 |                         ` (@{__}@)`            []
26 |                             :__;               []
27 |         ___                  {}+               []
28 |        ( = )             .---'`---.            []
29 |         | |_ jgs        /          \   ________[]____
30 |     ____| |_|==========(____________)_/______________\
31 |   


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/flask.txt:
--------------------------------------------------------------------------------
1 | 
2 |  |-|    *
3 |  |-|   _    *  __
4 |  |-|   |  *    |/'
5 |  |-|   |~*~~~o~|
6 |  |-|   |  O o *|
7 | /___\  |o___O__|
8 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/ft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/ft.png


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/gen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/gen.png


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/helix1.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 6098)o%:::%o(860
 3 | 098)o%:::%o(8609
 4 |  6o%:%o(86098)
 5 |   (86098)o
 6 | 6098)o%::%o9
 7 | 098)o%::::::%o9
 8 |  6o%::::::%o(860
 9 |     6o%::%o(8609
10 |       o(86098)
11 |   (86098)o%:%o9
12 | 6098)o%:::%o(860
13 | 098)o%:::%o(8609
14 |  6o%:%o(86098)
15 |   (86098)o
16 | 6098)o%::%o9
17 | 098)o%::::::%o9
18 |  6o%::::::%o(860
19 |     6o%::%o(8609
20 |       o(86098)
21 |   (86098)o%:%o9
22 | 6098)o%:::%o(860
23 | 098)o%:::%o(8609
24 |  6o%:%o(86098)
25 |   (86098)o
26 | 6098)o%::%o9
27 | 098)o%::::::%o9
28 |  6o%::::::%o(860
29 |     6o%::%o(8609
30 |       o(86098)
31 |   (86098)o%:%o9
32 | 6098)o%:::%o(860
33 | 098)o%:::%o(8609
34 |  6o%:%o(86098)
35 |   (86098)o
36 | 6098)o%::%o9
37 | 098)o%::::::%o9
38 |  6o%::::::%o(860
39 |     6o%::%o(8609
40 |       o(86098)
41 |   (86098)o%:%o9
42 | 6098)o%:::%o(860
43 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/helix2.txt:
--------------------------------------------------------------------------------
1 | 
2 | // \\          // \\  // \\          // \\  // \\          // \\  // \\
3 | \\   \\      // | :,\\': | \\      // | :,\\': | \\      // | :,\\': | \\
4 |  \\  | |\\  //  | | // \\  | |\\  //  | |//  \\  | \\  // | | //  \\ | |
5 |   \\ | :,\\': | //      \\ | :,\\': | //      \\ | :,\\': | //      \\ |
6 |     \\ //  \\ //          \\ //  \\ //          \\ //  \\ //          \\
7 |   


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/lab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/lab.png


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/nanobody.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/nanobody.png


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/science.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 |    _____ __                  __   __               __      _____                           _                __           __                
 4 |   / ___// /_____ _____  ____/ /  / /_  ____ ______/ /__   /  _( )____ ___     ____ _____  (_)___  ____ _   / /_____     / /________  __    
 5 |   \__ \/ __/ __ `/ __ \/ __  /  / __ \/ __ `/ ___/ //_/   / / |// __ `__ \   / __ `/ __ \/ / __ \/ __ `/  / __/ __ \   / __/ ___/ / / /    
 6 |  ___/ / /_/ /_/ / / / / /_/ /  / /_/ / /_/ / /__/ ,<    _/ /   / / / / / /  / /_/ / /_/ / / / / / /_/ /  / /_/ /_/ /  / /_/ /  / /_/ /     
 7 | /____/\__/\__,_/_/ /_/\__,_/  /_.___/\__,_/\___/_/|_|  /___/  /_/ /_/ /_/   \__, /\____/_/_/ /_/\__, /   \__/\____/   \__/_/   \__, /      
 8 |                                                                            /____/              /____/                         /____/       
 9 |                 888
10 | .d8888b  .d8888b888 .d88b. 88888b.  .d8888b .d88b.   888
11 | 88K     d88P"   888d8P  Y8b888 "88bd88P"   d8P  Y8b  888
12 | "Y8888b.888     88888888888888  888888     88888888  888
13 |      X88Y88b.   888Y8b.    888  888Y88b.   Y8b.     
14 |  88888P' "Y8888P888 "Y8888 888  888 "Y8888P "Y8888   888
15 | 


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/score.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/score.png


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/select.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/select.png


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/img/sine.txt:
--------------------------------------------------------------------------------
1 | 
2 |     .-.                                                               .-.
3 |    /   \           .-.                                 .-.           /   \
4 |   /     \         /   \       .-.     _     .-.       /   \         /     \
5 | -/-------\-------/-----\-----/---\---/-\---/---\-----/-----\-------/-------\--
6 |           \     /       \   /     `-'   `-'     \   /       \     /
7 |            \   /         `-'                     `-'         \   /
8 |             `-'                                               `-'                              
9 |                                                     


--------------------------------------------------------------------------------
/workshops/Scalable_Drug_Discovery/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate==0.34.2
 2 | biotite==1.3.0
 3 | bitsandbytes==0.44.1
 4 | datasets==3.0.1
 5 | jsonlines==4.0.0
 6 | matplotlib==3.9.2
 7 | py3dmol==2.4.2
 8 | pyfastx==2.1.0 
 9 | sentencepiece==0.2.0
10 | transformers==4.52.3
11 | xformers>=0.0.28
12 | protobuf==5.28.3


--------------------------------------------------------------------------------
/workshops/X_ray_Object_Detection_Ground_Truth/chest_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/X_ray_Object_Detection_Ground_Truth/chest_image.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 | train.py
3 | 


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/README:
--------------------------------------------------------------------------------
1 | # Analyze Tabular Data With A Custom Classifier
2 | 
3 | NOTE: This workshop is currently deprecated and only included in this repository for reference. We do not recommend using it as part of AWS-hosted or self-managed events.


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_create_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_create_flow.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_export.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_export.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_export_start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_export_start.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_import_s3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_import_s3.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_import_s3_start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_import_s3_start.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_rename_flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_rename_flow.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_add.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_add.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_custom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_custom.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_drop_column_diagnosisb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_drop_column_diagnosisb.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_drop_column_id.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_drop_column_id.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_encode_categorical.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_encode_categorical.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_pandas.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_pandas.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_rename.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_rename.png


--------------------------------------------------------------------------------
/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/requirements.txt:
--------------------------------------------------------------------------------
1 | sagemaker-datawrangler==0.3.8
2 | boto3==1.26.30
3 | matplotlib==3.6.2
4 | pandas==1.5.2
5 | sagemaker==2.123.0
6 | scikit-learn==1.2.0
7 | s3fs==0.4.2
8 | 


--------------------------------------------------------------------------------
/workshops/archive/Summarize_Scientific_Documents/README:
--------------------------------------------------------------------------------
1 | # Analyze Tabular Data With A Custom Classifier
2 | 
3 | NOTE: This workshop is currently deprecated and only included in this repository for reference. We do not recommend using it as part of AWS-hosted or self-managed events.


--------------------------------------------------------------------------------