├── .github ├── dependabot.yml └── workflows │ └── jekyll-gh-pages.yml ├── .gitignore ├── .gitmodules ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── solutions └── orchestrate_protein_design_workloads │ ├── README.md │ ├── infrastructure │ ├── cloudformation │ │ ├── batch-protein-folding-cfn-batch.yaml │ │ ├── batch-protein-folding-cfn-module-nextflow.yaml │ │ └── batch-protein-folding-cfn-root.yaml │ └── docker │ │ └── nextflow │ │ ├── Dockerfile │ │ └── requirements.txt │ └── notebooks │ └── orchestration │ ├── bin │ ├── get_fastas.py │ ├── run_esmfold.py │ ├── run_rfdesign.py │ ├── run_rfdesign_inpainting.py │ └── wait_for_batch.py │ ├── pd1_demo │ ├── pd1.pdb │ └── pdl1.pdb │ ├── run_nextflow.md │ ├── run_rfdesign_esmfold_multiple__sequences.nf │ └── submit_nextflow_job.py └── workshops ├── AI_Driven_Protein_Analysis ├── 1-esmfold-on-sagemaker.ipynb ├── 2-protein-design.ipynb ├── LICENSE ├── README.md ├── THIRD-PARTY-NOTICES ├── code │ ├── inference.py │ └── requirements.txt ├── esmfold-requirements.txt ├── img │ ├── 01.png │ ├── 02.png │ ├── 03.png │ ├── 04.png │ ├── 05.png │ ├── 06.png │ ├── antibody.png │ ├── brca_stats.png │ ├── endpoint_results.png │ ├── herceptin_redesign_target.png │ ├── overexpression.png │ ├── pdb-alignment-1.png │ ├── pdb-alignment-2.png │ ├── pdb_config.png │ └── pdb_results.png ├── protein-design-requirements.txt └── prothelpers │ ├── __init__.py │ ├── config │ └── rfdiffusion.yaml │ ├── protein_mpnn_utils.py │ ├── proteinmpnn.py │ ├── rfdiffusion.py │ ├── sequence.py │ └── structure.py ├── AI_ML_services_workshop_information ├── AI_ML_Services_Hands_On_Lab_Instructions.pdf ├── ATTRIBUTION.txt ├── cfn_workshop_ai_ml_services.yaml ├── chest-xray.jpg ├── lambda_code │ ├── ai_ml_services_lambda.py │ ├── install_lambda_function_requirements.sh │ ├── lambda.zip │ └── requirements.txt ├── patient_note.txt ├── sample_patient_note.png └── sample_patient_note.png_out ├── Amazon_Neptune_ML_PPI_Analysis ├── 1_preprocessing.ipynb ├── 2_data_exploration.ipynb ├── 3_model_training.ipynb ├── README.md ├── THIRD-PARTY-NOTICES ├── cfn │ ├── neptune-base-stack-existing-VPC.json │ ├── neptune-ml-core-stack.json │ ├── neptune-ml-nested-stack.json │ └── neptune-sagemaker-notebook-stack.json ├── create_neptune_ml_kernel.sh ├── deploy.sh ├── environment.gpu.yml ├── neptune_ml_utils.py ├── src │ ├── model-hpo-configuration.json │ ├── train.py │ └── transform.py └── uniprot.py ├── BIomedical_Researcher └── gradio │ ├── README.md │ ├── app.py │ ├── chat.py │ ├── pubmed.py │ └── requirements.txt ├── Build_Multiple_Models_In_Parallel_SageMaker └── train_multiple_models.ipynb ├── Cancer-gene-expression-survival-prediction-with-mme ├── .gitignore ├── Genome-Survival-Prediction-Pipeline-MME.ipynb ├── data │ └── Genomic-data-119patients.csv ├── images │ ├── Architecture.jpeg │ ├── image_2.jpg │ └── image_3.jpg ├── model │ ├── meta.json │ ├── model.pth │ └── model.tar.gz └── src │ ├── _model.py │ ├── _repack_model.py │ ├── evaluation.py │ ├── genome_groups.py │ ├── inference.py │ ├── mme_deployment.py │ └── train.py ├── Classify_Medical_Specialty_NLP_Huggingface_Transformers ├── 1_sagemaker_medical_specialty_using_transfomers.ipynb ├── MTsample_input_data.csv ├── get_dependencies.py ├── requirements.txt ├── train.csv └── train.py ├── Classify_Skin_Lesion_Images ├── 1-Classify_Skin_Lesion_Images.ipynb ├── CITATION ├── THIRD-PARTY-NOTICES ├── img │ ├── MLLC1.png │ ├── MLLC2.png │ ├── Trial-component-list.png │ ├── deployment_options.png │ ├── experiments.png │ ├── find-prod-deploy.png │ ├── jobs.png │ ├── lesions.png │ ├── metrics.png │ ├── model_registry.png │ ├── name_project.png │ ├── overexpression.png │ ├── pipeline.png │ ├── pipeline_execution.png │ ├── repo_defaults.png │ ├── repositories.png │ ├── resources.png │ ├── second-endpoint.png │ ├── select-model-version.png │ ├── sidebar.png │ ├── sm-resources-tab.png │ ├── sm_experiments.png │ ├── tc-list-2.png │ ├── template_build.jpg │ ├── template_deploy.jpg │ └── update-status.png ├── scripts │ ├── pipelines │ │ ├── codebuild-buildspec.yml │ │ └── skinlesions │ │ │ ├── __init__.py │ │ │ ├── evaluate.py │ │ │ ├── pipeline.py │ │ │ └── preprocess.py │ └── processing │ │ └── process.py └── visualizer │ ├── __init__.py │ └── visualizer.py ├── Explain-hospital-triage-from-admission-notes └── explain-hospital-triage-prediction-with-amazon-sagemaker-clarify.ipynb ├── FDA_Doc_Search ├── .gitignore ├── 1-load-ha-data-into-S3.ipynb ├── 2-create-kendra-index.ipynb ├── 3-test-question-answering.ipynb ├── 4-create-prompt-flows.ipynb ├── LICENSE ├── README.md ├── app │ ├── app.py │ ├── server.py │ └── www │ │ ├── img │ │ ├── brain-light.png │ │ └── brain.png │ │ └── main.css ├── arch.png ├── requirements.txt └── src │ ├── __init__.py │ └── helpers.py ├── Healthcare_Payments_Prediction_SageMaker_AutoPilot ├── Healthcare_Payments_Prediction_SageMaker_AutoPilot.ipynb ├── SageMakerAutopilotCandidateDefinitionNotebook.ipynb ├── SageMakerAutopilotDataExplorationNotebook.ipynb ├── healthcare_data_sample.csv ├── img │ └── autopilot_schematic.png └── report.ipynb ├── Medical_Imaging_AI ├── README.md ├── img │ └── arch.png ├── source │ ├── requirements.txt │ └── train.py ├── spleen_segmentation_3d_tutorial.ipynb └── spleen_segmentation_sagemaker_managedtraining.ipynb ├── Medicare_Hospital_Cost_Prediction └── Jupyter_Notebook_Medicare_Hospital_Cost_Prediction.ipynb ├── Molecular-property-prediction └── hiv-inhibitor-prediction-dgl │ ├── code │ ├── inference.py │ ├── requirements.txt │ ├── s3_downloaded_HIV_dataset.py │ ├── train.py │ └── utils.py │ ├── img │ └── 1.jpg │ ├── molecule-hiv-inhibitor-prediction-sagemaker.ipynb │ └── requirements.txt ├── Pharma_Manufacturing_Compliance_Bedrock_GenAI ├── .gitignore ├── README.md ├── docker │ ├── Dockerfile │ ├── app.py │ ├── cf.yaml │ ├── deploy.sh │ ├── destroy.sh │ ├── images │ │ └── manufacturing_diagram.png │ └── requirements.txt ├── document_compliance_checker.ipynb ├── gradio_interface_test.py ├── penicillin_manufacturing.txt ├── pharma_manufacturing_compliance_checker.ipynb ├── ping_claude.ipynb ├── requirements.txt └── sample_sop.txt ├── Process_HCLS_Docs_Using_AI_Services ├── Process-Medical-Documents.ipynb └── data │ └── sample_report_1.pdf ├── Protein_Language_Modelling ├── README.md ├── deploy_esm_to_inf2 │ ├── Deploy-ESM2-to-Inf2.ipynb │ └── scripts │ │ ├── inference.py │ │ └── requirements.txt ├── finetune_esm_on_deeploc │ ├── Fine-Tune-ESM2-On-DeepLoc.ipynb │ └── scripts │ │ ├── inference.py │ │ ├── lora-train.py │ │ └── requirements.txt ├── finetune_esm_on_oas │ ├── Fine-Tune-ESM2-On-OAS-Paired.ipynb │ ├── Fine-Tune-ESM2-On-OAS.ipynb │ └── scripts │ │ ├── cuda │ │ ├── cuda-oas-mlm-train-ddp-fsdp.py │ │ ├── cuda-oas-mlm-train-ddp.py │ │ ├── cuda-oas-mlm-train-smddp.py │ │ └── requirements.txt │ │ ├── esm-accelerate-examples │ │ ├── oas_mlm_accelerate.py │ │ ├── oas_mlm_trainer.py │ │ ├── oashelpers.py │ │ └── requirements.txt │ │ └── neuron │ │ ├── requirements.txt │ │ └── trn1-oas-mlm-train-dp.py ├── img │ └── protein.png └── pretrain_esm_on_uniref │ ├── 240131-benchmarking-plms-on-uniref50.ipynb │ └── scripts │ ├── processing │ ├── fasta_to_csv.py │ ├── requirements.txt │ └── tokenize_uniref_csv.py │ └── training │ ├── cuda │ ├── requirements.txt │ └── run_mlm.py │ └── neuron │ ├── requirements.txt │ └── torch_xla_train.py ├── RNAseq_Tertiary_Analysis ├── 1_Explore_RNASeq_Data_in_SageMaker_Studio.ipynb ├── 2_Use_SageMaker_Training_to_Classify_Breast_Cancer_Using_Gene_Expression.ipynb ├── 3_Track_Model_Quality_with_SageMaker_MLOps.ipynb ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── img │ ├── 640px-Gene_structure_eukaryote_2_annotated.png │ ├── MLLC1.png │ ├── MLLC2.png │ ├── Trial-component-list.png │ ├── approve-prod.png │ ├── brca_stats.png │ ├── charts.png │ ├── cloned_folders.png │ ├── code-pipeline.png │ ├── create_project.png │ ├── deploy-stage.png │ ├── deployment_options.png │ ├── deployments.png │ ├── exp-1.png │ ├── exp-2.png │ ├── exp-3.png │ ├── exp-4.png │ ├── experiments.png │ ├── find-prod-deploy.png │ ├── jobs.png │ ├── lineage_graph.png │ ├── metrics.png │ ├── mlflow-diagram.png │ ├── model_registry.png │ ├── overexpression.png │ ├── pipeline.png │ ├── pipeline_execution.png │ ├── project-1.png │ ├── project-2.png │ ├── project-3.png │ ├── project-4.png │ ├── project_name.png │ ├── projects.png │ ├── repo_defaults.png │ ├── repositories.png │ ├── second-endpoint.png │ ├── select-model-version.png │ ├── sidebar.png │ ├── sm-resources-tab.png │ ├── sm_experiments.png │ ├── tc-list-2.png │ ├── template_build.jpg │ ├── template_deploy.jpg │ └── update-status.png └── scripts │ ├── processing │ ├── processing.py │ └── requirements.txt │ ├── rf_train │ ├── requirements.txt │ └── rf_train.py │ ├── tf_train │ ├── requirements.txt │ └── tf_train.py │ └── xgb_train │ ├── requirements.txt │ └── xgb_train.py ├── Sagemaker_Pipelines_Automated_Retraining ├── cfn_sagemaker_pipelines.yaml ├── kick_off_pipeline_lambda.py └── sagemaker_pipelines_automated_retraining.ipynb ├── Scalable_Drug_Discovery ├── 1-active_learning.ipynb ├── 2-directed_evolution.ipynb ├── 3-ml-guided_directed_evolution.ipynb ├── EvoProtGrad │ ├── CHANGELOG.md │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── README.md │ ├── evo_prot_grad │ │ ├── __init__.py │ │ ├── common │ │ │ ├── __init__.py │ │ │ ├── embeddings.py │ │ │ ├── sampler.py │ │ │ ├── tokenizers.py │ │ │ ├── utils.py │ │ │ └── variant_scoring.py │ │ ├── experts │ │ │ ├── __init__.py │ │ │ ├── amplify_expert.py │ │ │ ├── base_experts.py │ │ │ ├── bert_expert.py │ │ │ ├── causallm_expert.py │ │ │ ├── esm_downstream_regression_expert.py │ │ │ ├── esm_expert.py │ │ │ ├── evcouplings_expert.py │ │ │ └── onehot_downstream_regression_expert.py │ │ └── models │ │ │ ├── downstream_cnn.py │ │ │ └── potts.py │ ├── requirements.txt │ └── setup.py ├── helpers.py ├── img │ ├── active_learning.png │ ├── dmtl.png │ ├── elements.txt │ ├── evo.png │ ├── flame.txt │ ├── flask.txt │ ├── ft.png │ ├── gen.png │ ├── helix1.txt │ ├── helix2.txt │ ├── lab.png │ ├── nanobody.png │ ├── science.txt │ ├── score.png │ ├── select.png │ └── sine.txt ├── requirements.txt └── train.py ├── X_ray_Object_Detection_Ground_Truth ├── chest_image.png ├── ground_truth_utils.py ├── template.manifest └── x_ray_ground_truth_object_detection.ipynb └── archive ├── Bring_Your_Own_Sklearn_Classifier └── archive │ ├── .gitignore │ ├── README │ ├── data_wrangler_sklearn_bring_your_own_MLP_Classifier_Breast_Diagnostic.ipynb │ ├── hcls-lab.flow │ ├── images │ ├── dw_create_flow.png │ ├── dw_export.png │ ├── dw_export_start.png │ ├── dw_import_s3.png │ ├── dw_import_s3_start.png │ ├── dw_rename_flow.png │ ├── dw_transform.png │ ├── dw_transform_add.png │ ├── dw_transform_custom.png │ ├── dw_transform_drop_column_diagnosisb.png │ ├── dw_transform_drop_column_id.png │ ├── dw_transform_encode_categorical.png │ ├── dw_transform_pandas.png │ └── dw_transform_rename.png │ ├── requirements.txt │ └── sklearn_bring_your_own_MLP_Classifier_Breast_Diagnostic.ipynb └── Summarize_Scientific_Documents ├── GenAI-Summarize-Scientific-Documents.ipynb └── README /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | -------------------------------------------------------------------------------- /.github/workflows/jekyll-gh-pages.yml: -------------------------------------------------------------------------------- 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages 2 | name: Deploy github pages site 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["gh-pages"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow one concurrent deployment 19 | concurrency: 20 | group: "pages" 21 | cancel-in-progress: true 22 | 23 | jobs: 24 | # Build job 25 | build: 26 | runs-on: ubuntu-latest 27 | steps: 28 | - name: Checkout 29 | uses: actions/checkout@v3 30 | with: 31 | ref: 'gh-pages' 32 | - name: Setup Pages 33 | uses: actions/configure-pages@v3 34 | - name: Build with Jekyll 35 | uses: actions/jekyll-build-pages@v1 36 | with: 37 | source: ./docs 38 | destination: ./_site 39 | - name: Upload artifact 40 | uses: actions/upload-pages-artifact@v1 41 | 42 | # Deployment job 43 | deploy: 44 | environment: 45 | name: github-pages 46 | url: ${{ steps.deployment.outputs.page_url }} 47 | runs-on: ubuntu-latest 48 | needs: build 49 | steps: 50 | - name: Deploy to GitHub Pages 51 | id: deployment 52 | uses: actions/deploy-pages@v1 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | **/*.ipynb_checkpoints/ 3 | .venv/ 4 | workshops/Marketing_Content_Localization/dependencies/ 5 | __pycache__/ -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "workshops/MONAI-MedicalImage-SageMaker"] 2 | path = workshops/Medical_Imaging_AI/Advanced_MONAI_Workshops/MONAI-MedicalImage-SageMaker 3 | url = https://github.com/YuanSingapore/MONAI-MedicalImage-SageMaker.git 4 | [submodule "workshops/Enrich_Comprehend_Medical_Custom_Models/amazon-comprehend-medical-enrich-custom-models"] 5 | path = workshops/Enrich_Comprehend_Medical_Custom_Models/amazon-comprehend-medical-enrich-custom-models 6 | url = https://github.com/aws-samples/amazon-comprehend-medical-enrich-custom-models.git 7 | [submodule "workshops/Medical_Sentence_Relevance_Pretrained_Bert/medical-text-sentence-relevance-bert"] 8 | path = workshops/Medical_Sentence_Relevance_Pretrained_Bert/medical-text-sentence-relevance-bert 9 | url = https://github.com/aws-samples/medical-text-sentence-relevance-bert 10 | [submodule "workshops/Monte_Carlo_Simulations_RStudio/amazon-sagemaker-statistical-simulation-rstudio"] 11 | path = workshops/Monte_Carlo_Simulations_RStudio/amazon-sagemaker-statistical-simulation-rstudio 12 | url = https://github.com/aws-samples/amazon-sagemaker-statistical-simulation-rstudio 13 | [submodule "workshops/Predict_Training_Resource_Usage_SageMaker/amazon-sagemaker-predict-training-resource-usage"] 14 | path = workshops/Predict_Training_Resource_Usage_SageMaker/amazon-sagemaker-predict-training-resource-usage 15 | url = https://github.com/aws-samples/amazon-sagemaker-predict-training-resource-usage 16 | [submodule "solutions/aws-healthcare-lifescience-ai-ml-sample-notebooks"] 17 | path = solutions/aws-healthcare-lifescience-ai-ml-sample-notebooks 18 | url = https://github.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks.git 19 | [submodule "solutions/awesome-proteomics-on-aws"] 20 | path = solutions/awesome-proteomics-on-aws 21 | url = https://github.com/aws-samples/awesome-proteomics-on-aws 22 | [submodule "solutions/aws-batch-architecture-for-rfdesign"] 23 | path = solutions/aws-batch-architecture-for-rfdesign 24 | url = https://github.com/aws-samples/aws-batch-architecture-for-rfdesign.git 25 | [submodule "solutions/awesome-protein-analysis-on-aws"] 26 | path = solutions/awesome-protein-analysis-on-aws 27 | url = https://github.com/aws-samples/awesome-protein-analysis-on-aws.git 28 | [submodule "samples/generate-company-summary-via-generative-ai"] 29 | path = samples/generate-company-summary-via-generative-ai 30 | url = https://github.com/aws-samples/generate-company-summary-via-generative-ai 31 | [submodule "genomic-language-models/genomic-language-model-pretraining-with-healthomics-seq-store"] 32 | path = workshops/genomic-language-models/genomic-language-model-pretraining-with-healthomics-seq-store 33 | url = https://github.com/aws-samples/genomic-language-model-pretraining-with-healthomics-seq-store 34 | [submodule "samples/genomic-language-model-pretraining-with-healthomics-seq-store"] 35 | path = samples/genomic-language-model-pretraining-with-healthomics-seq-store 36 | url = https://github.com/aws-samples/genomic-language-model-pretraining-with-healthomics-seq-store 37 | [submodule "solutions/drug-discovery-workflows"] 38 | path = solutions/drug-discovery-workflows 39 | url = https://github.com/aws-samples/drug-discovery-workflows 40 | [submodule "samples/text-to-sparql-on-neptune-with-uniprot"] 41 | path = samples/text-to-sparql-on-neptune-with-uniprot 42 | url = git@github.com:aws-samples/text-to-sparql-on-neptune-with-uniprot.git 43 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /solutions/orchestrate_protein_design_workloads/infrastructure/docker/nextflow/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Original Copyright 2021 DeepMind Technologies Limited 4 | # Modifications Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 | # SPDX-License-Identifier: Apache-2.0 6 | 7 | FROM public.ecr.aws/lts/ubuntu:18.04_stable as base_image 8 | 9 | SHELL ["/bin/bash", "-c"] 10 | 11 | RUN apt-get update \ 12 | && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ 13 | build-essential \ 14 | cmake \ 15 | wget \ 16 | git \ 17 | unzip \ 18 | hmmer \ 19 | tar \ 20 | awscli \ 21 | python3.8 \ 22 | python3-pip \ 23 | openjdk-11-jdk-headless \ 24 | && rm -rf /var/lib/apt/lists/* \ 25 | && apt-get autoremove -y \ 26 | && apt-get clean \ 27 | && rm /usr/bin/python3 \ 28 | && ln -s /usr/bin/python3.8 /usr/bin/python3 29 | 30 | #COPY env_files /env_files 31 | 32 | 33 | # Install AWS CLI 34 | RUN wget -O "awscliv2.zip" "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" \ 35 | && unzip awscliv2.zip \ 36 | && ./aws/install \ 37 | && rm awscliv2.zip 38 | 39 | #need to specify default region for boto3 to behave 40 | ENV AWS_DEFAULT_REGION=us-east-1 41 | 42 | # Install Miniconda package manager and dependencies 43 | ENV PATH="/opt/conda/bin:$PATH" 44 | 45 | 46 | RUN wget -q -P /tmp \ 47 | https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-$(uname -m).sh \ 48 | && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \ 49 | && rm /tmp/Miniconda3-latest-Linux-x86_64.sh \ 50 | && conda update -n base -c conda-forge conda \ 51 | && conda config --set ssl_verify no \ 52 | && conda init bash \ 53 | && conda clean --all --yes 54 | 55 | 56 | #RUN pip install -r env_files/requirements.txt 57 | 58 | RUN cd /usr/bin && wget -qO- https://get.nextflow.io | bash #install nextflow 59 | 60 | #install batchfold for convenience also. 61 | RUN cd /root && git clone --depth 1 https://github.com/aws-solutions-library-samples/aws-batch-arch-for-protein-folding.git && cd aws-batch-arch-for-protein-folding && pip install . && cd notebooks/ && pip install -U -q -r notebook-requirements.txt 62 | 63 | #RUN cd/root && git clone --depth 1 https://github.com/aws-solutions-library-samples/aws-batch-arch-for-protein-folding.git && cd aws-batch-arch-for-protein-folding && pip install . && cd infrastructure/docker/nextflow && pip install -U -q -r requirements.txt 64 | 65 | 66 | 67 | WORKDIR /root 68 | 69 | ENTRYPOINT ["bash", "-c"] #enable passing of arbitrary commands to nextflow 70 | -------------------------------------------------------------------------------- /solutions/orchestrate_protein_design_workloads/infrastructure/docker/nextflow/requirements.txt: -------------------------------------------------------------------------------- 1 | pip 2 | boto3==1.24.89 3 | matplotlib==3.6.1 4 | sagemaker==2.112.1 5 | botocore==1.27.89 6 | jaxlib==0.3.20 7 | jax==0.3.21 8 | biopython==1.79 9 | py3Dmol==1.8.1 10 | dm-tree==0.1.7 11 | numpy==1.23.3 12 | attrs==22.1.0 13 | -------------------------------------------------------------------------------- /solutions/orchestrate_protein_design_workloads/notebooks/orchestration/bin/get_fastas.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import re 3 | import sys 4 | boto_session = boto3.session.Session() 5 | s3 = boto_session.client("s3") 6 | 7 | s3_uri=sys.argv[1] 8 | def get_files_within_s3_uri(s3_uri): 9 | 10 | mybucket=re.findall("s3://(.*?)\/",s3_uri)[0] 11 | myprefix=re.findall("s3://.*?\/(.*)",s3_uri)[0] 12 | object_list = [] 13 | object_list_2=[] 14 | try: 15 | paginator = s3.get_paginator('list_objects_v2') 16 | pages = paginator.paginate(Bucket=mybucket, Prefix=myprefix) 17 | 18 | for page in pages: 19 | for obj in page['Contents']: 20 | object_list.append(obj['Key'].rstrip()) 21 | 22 | object_list_2 = [f's3://{mybucket}/{_}' for _ in object_list if _.endswith('.fas')] 23 | 24 | return (object_list_2) 25 | except Exception as e: 26 | print(e) 27 | my_objects=get_files_within_s3_uri(s3_uri) 28 | #for i in range(0,len(my_objects)): 29 | # print(f'''{i} {my_objects[i]}''',end="\n") 30 | print(*my_objects,sep="\n") 31 | -------------------------------------------------------------------------------- /solutions/orchestrate_protein_design_workloads/notebooks/orchestration/bin/run_esmfold.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | from datetime import datetime 3 | import matplotlib.pyplot as plt 4 | from batchfold.batchfold_environment import BatchFoldEnvironment 5 | from batchfold.batchfold_target import BatchFoldTarget 6 | from batchfold.jackhmmer_job import JackhmmerJob 7 | from batchfold.openfold_job import OpenFoldJob 8 | from batchfold.alphafold2_job import AlphaFold2Job 9 | from batchfold.omegafold_job import OmegaFoldJob 10 | from batchfold.esmfold_job import ESMFoldJob 11 | from batchfold.utils import utils 12 | #from IPython import display 13 | import numpy as np 14 | import os 15 | import sys 16 | input_s3_uri=sys.argv[1] 17 | output_s3_uri_base=sys.argv[2] 18 | #put the new file in a new directory in s3 based on previous output name 19 | output_s3_uri=output_s3_uri_base+os.path.basename(input_s3_uri).removesuffix(".fas") 20 | 21 | # Create AWS clients 22 | boto_session = boto3.session.Session() #add profile_name="A_PROFILE" if desired 23 | 24 | batch_environment = BatchFoldEnvironment(boto_session=boto_session) 25 | 26 | 27 | my_datetime=datetime.now().strftime("%Y%m%d%s") 28 | job_name = "jb_target" + "_ESMFoldJob_" + my_datetime 29 | esmfold_job = ESMFoldJob( 30 | job_name=job_name, 31 | target_id="my_target", 32 | fasta_s3_uri=input_s3_uri, 33 | output_s3_uri=output_s3_uri, 34 | boto_session=boto_session, 35 | cpu=8, 36 | memory=31, # Why not 32? ECS needs about 1 GB for container services 37 | gpu=1, 38 | ) 39 | print(esmfold_job) 40 | esmfold_submission = batch_environment.submit_job( 41 | esmfold_job, job_queue_name="G4dnJobQueue" 42 | ) 43 | -------------------------------------------------------------------------------- /solutions/orchestrate_protein_design_workloads/notebooks/orchestration/bin/run_rfdesign.py: -------------------------------------------------------------------------------- 1 | # Import required Python packages 2 | import boto3 3 | from batchfold.batchfold_environment import BatchFoldEnvironment 4 | from batchfold.rfdesign_job import RFDesignHallucinateJob, RFDesignInpaintJob 5 | #from batchfold.utils import utils 6 | from Bio.PDB import PDBParser, PDBIO, Selection 7 | from Bio.PDB.PDBList import PDBList 8 | from datetime import datetime 9 | #from IPython import display 10 | #import matplotlib.pyplot as plt 11 | import os 12 | import numpy as np 13 | #import py3Dmol 14 | 15 | import logging 16 | import sys 17 | import argparse 18 | #logging.basicConfig(stream=sys.stdout, level=logging.INFO) 19 | logging.basicConfig(stream=sys.stderr, level=logging.INFO) 20 | 21 | parser = argparse.ArgumentParser(description='Parse the options') 22 | parser.add_argument('--input_s3_uri', dest='input_s3_uri', default=None, 23 | help='input s3 uri (default: None)') 24 | parser.add_argument('--output_s3_uri', dest='output_s3_uri', default=None, 25 | help='output_s3_uri (default: None)') 26 | parser.add_argument('--num_sequences_to_generate', dest='num_sequences_to_generate', default=1, 27 | help='number of sequences for rfdesign to generate (default: 1)') 28 | 29 | 30 | args = parser.parse_args() 31 | args=vars(args) 32 | 33 | input_s3_uri=args['input_s3_uri'] 34 | output_s3_uri=args['output_s3_uri'] 35 | num_sequences_to_generate=args['num_sequences_to_generate'] 36 | 37 | 38 | # Create AWS clients 39 | boto_session = boto3.session.Session() 40 | s3 = boto_session.client("s3") 41 | batch_environment = BatchFoldEnvironment(boto_session=boto_session) 42 | 43 | total_num = num_sequences_to_generate 44 | batch = 1 45 | mask = '25-35,B63-82,15-25,B119-140,0-15' 46 | hallucinate_job_prefix = "RFDesignHallucinateJob" + datetime.now().strftime("%Y%m%d%s") 47 | job_queue_name = "G4dnJobQueue" 48 | 49 | 50 | job_name = f"{hallucinate_job_prefix}_0" 51 | params = { 52 | "mask": mask, 53 | "steps": "g10", 54 | "num": total_num, 55 | "start_num": 0, 56 | "w_rog": 1, 57 | "rog_thresh": 16, 58 | "w_rep": 2, 59 | "rep_pdb": "input/pdl1.pdb", 60 | "rep_sigma": 4, 61 | "save_pdb": True, 62 | "track_step": 10 63 | } 64 | 65 | new_job = RFDesignHallucinateJob( 66 | boto_session=boto_session, 67 | job_name = job_name, 68 | target_id = "4ZQK", 69 | input_s3_uri = input_s3_uri, 70 | output_s3_uri = output_s3_uri, 71 | pdb = "input/pd1.pdb", 72 | params = params 73 | ) 74 | 75 | #print(f"Submitting {job_name}") 76 | #print(new_job) 77 | submission = batch_environment.submit_job(new_job, job_queue_name) 78 | print (submission.job_id) 79 | -------------------------------------------------------------------------------- /solutions/orchestrate_protein_design_workloads/notebooks/orchestration/bin/run_rfdesign_inpainting.py: -------------------------------------------------------------------------------- 1 | # Import required Python packages 2 | import boto3 3 | from batchfold.batchfold_environment import BatchFoldEnvironment 4 | from batchfold.rfdesign_job import RFDesignHallucinateJob, RFDesignInpaintJob 5 | from batchfold.utils import utils 6 | from Bio.PDB import PDBParser, PDBIO, Selection 7 | from Bio.PDB.PDBList import PDBList 8 | from datetime import datetime 9 | from IPython import display 10 | #import matplotlib.pyplot as plt 11 | import os 12 | import numpy as np 13 | import py3Dmol 14 | 15 | import logging 16 | import sys 17 | import argparse 18 | #logging.basicConfig(stream=sys.stdout, level=logging.INFO) 19 | logging.basicConfig(stream=sys.stderr, level=logging.INFO) 20 | 21 | parser = argparse.ArgumentParser(description='Parse the options') 22 | parser.add_argument('--input_s3_uri', dest='input_s3_uri', default=None, 23 | help='input s3 uri (default: None)') 24 | parser.add_argument('--output_s3_uri', dest='output_s3_uri', default=None, 25 | help='output_s3_uri (default: None)') 26 | parser.add_argument('--num_sequences_to_generate', dest='num_sequences_to_generate', default=1, 27 | help='number of sequences for rfdesign to generate (default: 1)') 28 | 29 | 30 | args = parser.parse_args() 31 | args=vars(args) 32 | 33 | input_s3_uri=args['input_s3_uri'] 34 | output_s3_uri=args['output_s3_uri'] 35 | num_sequences_to_generate=args['num_sequences_to_generate'] 36 | 37 | 38 | # Create AWS clients 39 | boto_session = boto3.session.Session() 40 | s3 = boto_session.client("s3") 41 | batch_environment = BatchFoldEnvironment(boto_session=boto_session) 42 | 43 | total_num = num_sequences_to_generate 44 | hallucinate_job_prefix = "RFDesignHallucinateJob" + datetime.now().strftime("%Y%m%d%s") 45 | job_queue_name = "G4dnJobQueue" 46 | 47 | inpainting_job_name = "RFDesignInpaintingJob" + datetime.now().strftime("%Y%m%d%s") 48 | job_queue_name = "G4dnJobQueue" 49 | params = { 50 | "contigs":"25-35,B63-82,15-25,B119-140,0-15", 51 | "len": "80-115", 52 | "num_designs": total_num, 53 | "dump_all": True, 54 | } 55 | new_job = RFDesignInpaintJob( 56 | boto_session=boto_session, 57 | job_name = inpainting_job_name, 58 | target_id = "4ZQK", 59 | input_s3_uri = input_s3_uri, 60 | output_s3_uri = output_s3_uri, 61 | pdb = "input/pd1.pdb", 62 | params = params 63 | ) 64 | 65 | #print(f"Submitting {job_name}") 66 | #print(new_job) 67 | submission = batch_environment.submit_job(new_job, job_queue_name) 68 | print (submission.job_id) 69 | -------------------------------------------------------------------------------- /solutions/orchestrate_protein_design_workloads/notebooks/orchestration/bin/wait_for_batch.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import sys 3 | import time 4 | client = boto3.client('batch') 5 | def wait_until_job_is_done(job_id): 6 | x=1 7 | while (x==1): 8 | response=client.describe_jobs(jobs=[job_id]) 9 | the_status=response['jobs'][0]['status'] 10 | if the_status in ['SUCCEEDED','FAILED']: 11 | return() 12 | else: 13 | time.sleep(10) #wait a bit before checking the status again 14 | 15 | jobs_file=sys.argv[1] 16 | jobs_list=open(jobs_file).readlines() 17 | jobs_list=[i.rstrip() for i in jobs_list] 18 | 19 | for i in jobs_list: 20 | wait_until_job_is_done(i) 21 | -------------------------------------------------------------------------------- /solutions/orchestrate_protein_design_workloads/notebooks/orchestration/run_nextflow.md: -------------------------------------------------------------------------------- 1 | # Submission of NextFlow scripts 2 | 3 | In order to submit a sample nextflow script, run the following script after deploying the batchfold architecture 4 | 5 | python submit nextflow_job.py 6 | 7 | This will run a pipeline that first runs RFDesign, followed by ESMFold on each of the structures generated by RFDesign. Please note that prior to running this script, you will first have to retrieve the nextflow orchestrator and nextflow job definition from the AWS Batch console. 8 | 9 | Note that you can construct your own Nextflow pipelines as well. When doing so, you will need to to first place code dependencies in S3, which will be retrieved by the Nextflow orchestrator. See the script `nextflow_job.py` for more details. -------------------------------------------------------------------------------- /solutions/orchestrate_protein_design_workloads/notebooks/orchestration/run_rfdesign_esmfold_multiple__sequences.nf: -------------------------------------------------------------------------------- 1 | params.s3_input="" 2 | params.rf_design_output="" 3 | params.esmfold_output="" 4 | params.outdir= "" 5 | 6 | 7 | project_dir = projectDir 8 | 9 | process run_rf_design { 10 | 11 | output: 12 | path 'batch_job_ids.txt' 13 | 14 | """ 15 | python ${project_dir}/bin/run_rfdesign.py --input_s3_uri ${params.s3_input} --output_s3_uri ${params.rf_design_output} --num_sequences_to_generate 3 > batch_job_ids.txt 16 | """ 17 | } 18 | 19 | process wait_for_batch{ 20 | 21 | input: 22 | path 'batch_job_ids.txt' 23 | 24 | output: 25 | path "hello_from_waiter.txt" 26 | 27 | 28 | """ 29 | python ${project_dir}/bin/wait_for_batch.py batch_job_ids.txt > hello_from_waiter.txt 30 | """ 31 | 32 | } 33 | 34 | process run_esmfold { 35 | 36 | input: 37 | path x 38 | 39 | output: 40 | file 'hello_from_esmfold.txt' 41 | 42 | """ 43 | python /root/bin/get_fastas.py ${params.rf_design_output} |while read line; do python /root/bin/run_esmfold.py \$line ${params.esmfold_output}; done > hello_from_esmfold.txt 44 | """ 45 | 46 | } 47 | 48 | workflow { 49 | run_rf_design|wait_for_batch|run_esmfold 50 | } 51 | -------------------------------------------------------------------------------- /solutions/orchestrate_protein_design_workloads/notebooks/orchestration/submit_nextflow_job.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | import random 4 | import sagemaker 5 | sess = sagemaker.Session() 6 | bucket = sess.default_bucket() 7 | random_str=str(random.randint(100000, 9999999)) 8 | #modify per your orchestrator compute environment and nextflow job definition from the Batch console 9 | orchestrator_compute_environment="CPUOnDemandJobQueue-Wn5WSyuTU2sZehux" 10 | nextflow_job_definition="NextflowJobDefinition-894c4271a53b004" 11 | 12 | nextflow_script="run_rfdesign_esmfold_multiple_sequences.nf" 13 | 14 | my_asset_uri=f"s3://{bucket}/assets_input" #modify to your own bucket 15 | my_input_bucket=f"s3://{bucket}/pd1-demo/" 16 | rf_design_output=f"s3://{bucket}/myrfdesign_hallucination_{random_str}" 17 | esmf_output=f"s3://{bucket}/FinalESMFoldOutput_{random_str}/" 18 | print(my_asset_uri) 19 | print(rf_design_output) 20 | print(esmf_output) 21 | 22 | #move input files and code to their respective buckets in S3. 23 | #copy pdb structures. This example comes from the RFDesign repository 24 | os.system(f'aws s3 cp pd1_demo/pd1.pdb s3://{bucket}/pd1-demo/') 25 | os.system(f'aws s3 cp pd1_demo/pdl1.pdb s3://{bucket}/pd1-demo/') 26 | 27 | #copy dependencies to s3 in the bin directory 28 | os.system(f'aws s3 cp --recursive bin/ {my_asset_uri}/bin/') 29 | os.system(f'aws s3 cp {nextflow_script} {my_asset_uri}/') #copy nextflow script to s3 30 | 31 | #Next we specify the commands for the nextflow orchestrator to run. 32 | #First we copy in the data from the asset bucket, which includes the .nf script and dependencies 33 | #Next we run the .nf script, and print a finished message when done. 34 | nextflow_commands=[ 35 | f'''aws s3 cp --recursive {my_asset_uri} . 36 | nextflow run {nextflow_script} --s3_input {my_input_bucket} --rf_design_output {rf_design_output} --esmfold_output {esmf_output}; 37 | echo Finished''' 38 | ] 39 | 40 | client = boto3.client('batch') 41 | response = client.submit_job( 42 | jobName=f'nextflow_job_{random_str}', 43 | jobQueue=orchestrator_compute_environment, #modify this to your own JobQueue 44 | jobDefinition=nextflow_job_definition, #modify this to your own Job Definition 45 | containerOverrides={'command':nextflow_commands} 46 | ) 47 | print(response) 48 | -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | software and associated documentation files (the "Software"), to deal in the Software 5 | without restriction, including without limitation the rights to use, copy, modify, 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/README.md: -------------------------------------------------------------------------------- 1 | # ESMFold on SageMaker 2 | 3 | Jupyter notebook describing how to run the ESMFold protein folding algorithm in SageMaker. 4 | 5 | Understanding the structure of proteins like antibodies is important for understanding their function. However, it can be difficult and expensive to do this in a laboratory. Recently AI-driven protein folding algorithms have enabled biologists to predict these structures from their aminio acid sequences instead. 6 | 7 | In this notebook, we will use the [ESMFold](https://www.biorxiv.org/content/10.1101/2022.07.20.500902v1) protein folding algorithm to predict the structure of Herceptin (Trastuzumab), an important breast cancer therapy. Herceptin is a [monoclonal antibody](https://www.cancer.org/treatment/treatments-and-side-effects/treatment-types/immunotherapy/monoclonal-antibodies.html) (mAb) that binds to the HER2 receptor, inhibiting cancer cell growth. The following diagram shows several of the common elements of monoclonal antibodies. 8 | 9 | ![A diagram of the major structural elements of an antibody](img/antibody.png) 10 | 11 | In this notebook, we'll focus on predicting the structure of the heavy chain region. 12 | -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/THIRD-PARTY-NOTICES: -------------------------------------------------------------------------------- 1 | The Batch Protein Folding architecture includes the following third-party software/licensing: 2 | 3 | ---------------- 4 | 5 | ** US-Align (https://zhanggroup.org/US-align/bin/module/USalign.cpp) 6 | 7 | ============================================================================== 8 | US-align: universal structure alignment of monomeric and complex proteins 9 | and nucleic acids 10 | 11 | This program was written by Chengxin Zhang at Yang Zhang lab, 12 | Department of Computational Medicine and Bioinformatics, 13 | University of Michigan, 100 Washtenaw Ave, Ann Arbor, MI 48109-2218. 14 | Please report issues to yangzhanglab@umich.edu 15 | 16 | Reference: 17 | * Chengxin Zhang, Morgan Shine, Anna Marie Pyle, Yang Zhang 18 | (2022) Nat Methods 19 | * Chengxin Zhang, Anna Marie Pyle (2022) iScience 20 | 21 | DISCLAIMER: 22 | Permission to use, copy, modify, and distribute this program for 23 | any purpose, with or without fee, is hereby granted, provided that 24 | the notices on the head, the reference information, and this 25 | copyright notice appear in all copies or substantial portions of 26 | the Software. It is provided "as is" without express or implied 27 | warranty. 28 | =============================================================================== -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/code/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.41.2 2 | accelerate==0.32.0 -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/esmfold-requirements.txt: -------------------------------------------------------------------------------- 1 | py3Dmol==2.1.0 2 | biopython==1.84 3 | ipywidgets==8.1.3 4 | transformers==4.42.3 5 | accelerate==0.32.0 6 | boto3==1.34.139 7 | sagemaker==2.224.3 -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/01.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/02.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/03.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/04.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/05.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/06.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/antibody.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/antibody.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/brca_stats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/brca_stats.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/endpoint_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/endpoint_results.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/herceptin_redesign_target.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/herceptin_redesign_target.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/overexpression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/overexpression.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/pdb-alignment-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/pdb-alignment-1.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/pdb-alignment-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/pdb-alignment-2.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/pdb_config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/pdb_config.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/img/pdb_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/img/pdb_results.png -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/protein-design-requirements.txt: -------------------------------------------------------------------------------- 1 | biopython==1.84 2 | boto3==1.35.18 3 | decorator==5.1.1 4 | e3nn==0.5.1 5 | hydra-core==1.3.2 6 | ipywidgets 7 | torch 8 | torchaudio 9 | torchvision 10 | py3Dmol==2.4.0 11 | pynvml==11.5.3 12 | pyrsistent==0.20.0 13 | rfdiffusion@git+https://github.com/RosettaCommons/RFdiffusion@b44206a2a79f219bb1a649ea50603a284c225050 14 | se3-transformer@git+https://github.com/NVIDIA/DeepLearningExamples@d56fe703b034bf70d5e3aab4e1fec7bbe3d7735b#subdirectory=DGLPyTorch/DrugDiscovery/SE3Transformer 15 | 16 | -f https://data.dgl.ai/wheels/cu121/repo.html 17 | dgl==2.0.0 -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/prothelpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_Driven_Protein_Analysis/prothelpers/__init__.py -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/prothelpers/config/rfdiffusion.yaml: -------------------------------------------------------------------------------- 1 | # Base inference Configuration. 2 | 3 | inference: 4 | input_pdb: null 5 | num_designs: 10 6 | design_startnum: 0 7 | ckpt_override_path: null 8 | symmetry: null 9 | recenter: True 10 | radius: 10.0 11 | model_only_neighbors: False 12 | output_prefix: samples/design 13 | write_trajectory: True 14 | scaffold_guided: False 15 | model_runner: SelfConditioning 16 | cautious: True 17 | align_motif: True 18 | symmetric_self_cond: True 19 | final_step: 1 20 | deterministic: False 21 | trb_save_ckpt_path: null 22 | schedule_directory_path: null 23 | model_directory_path: null 24 | 25 | contigmap: 26 | contigs: null 27 | inpaint_seq: null 28 | provide_seq: null 29 | length: null 30 | 31 | model: 32 | n_extra_block: 4 33 | n_main_block: 32 34 | n_ref_block: 4 35 | d_msa: 256 36 | d_msa_full: 64 37 | d_pair: 128 38 | d_templ: 64 39 | n_head_msa: 8 40 | n_head_pair: 4 41 | n_head_templ: 4 42 | d_hidden: 32 43 | d_hidden_templ: 32 44 | p_drop: 0.15 45 | SE3_param_full: 46 | num_layers: 1 47 | num_channels: 32 48 | num_degrees: 2 49 | n_heads: 4 50 | div: 4 51 | l0_in_features: 8 52 | l0_out_features: 8 53 | l1_in_features: 3 54 | l1_out_features: 2 55 | num_edge_features: 32 56 | SE3_param_topk: 57 | num_layers: 1 58 | num_channels: 32 59 | num_degrees: 2 60 | n_heads: 4 61 | div: 4 62 | l0_in_features: 64 63 | l0_out_features: 64 64 | l1_in_features: 3 65 | l1_out_features: 2 66 | num_edge_features: 64 67 | freeze_track_motif: False 68 | use_motif_timestep: False 69 | 70 | diffuser: 71 | T: 50 72 | b_0: 1e-2 73 | b_T: 7e-2 74 | schedule_type: linear 75 | so3_type: igso3 76 | crd_scale: 0.25 77 | partial_T: null 78 | so3_schedule_type: linear 79 | min_b: 1.5 80 | max_b: 2.5 81 | min_sigma: 0.02 82 | max_sigma: 1.5 83 | 84 | denoiser: 85 | noise_scale_ca: 1 86 | final_noise_scale_ca: 1 87 | ca_noise_schedule_type: constant 88 | noise_scale_frame: 1 89 | final_noise_scale_frame: 1 90 | frame_noise_schedule_type: constant 91 | 92 | ppi: 93 | hotspot_res: null 94 | 95 | potentials: 96 | guiding_potentials: null 97 | guide_scale: 10 98 | guide_decay: constant 99 | olig_inter_all : null 100 | olig_intra_all : null 101 | olig_custom_contact : null 102 | substrate: null 103 | 104 | contig_settings: 105 | ref_idx: null 106 | hal_idx: null 107 | idx_rf: null 108 | inpaint_seq_tensor: null 109 | 110 | preprocess: 111 | sidechain_input: False 112 | motif_sidechain_input: True 113 | d_t1d: 22 114 | d_t2d: 44 115 | prob_self_cond: 0.0 116 | str_self_cond: False 117 | predict_previous: False 118 | 119 | logging: 120 | inputs: False 121 | 122 | scaffoldguided: 123 | scaffoldguided: False 124 | target_pdb: False 125 | target_path: null 126 | scaffold_list: null 127 | scaffold_dir: null 128 | sampled_insertion: 0 129 | sampled_N: 0 130 | sampled_C: 0 131 | ss_mask: 0 132 | systematic: False 133 | target_ss: null 134 | target_adj: null 135 | mask_loops: True 136 | contig_crop: null -------------------------------------------------------------------------------- /workshops/AI_Driven_Protein_Analysis/prothelpers/sequence.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from Bio.SeqIO.FastaIO import FastaIterator 5 | import os 6 | 7 | 8 | def list_files_in_dir(dir, extension=".txt"): 9 | paths = [] 10 | for filename in os.listdir(dir): 11 | full_path = os.path.abspath(os.path.join(dir, filename)) 12 | if filename.endswith(extension): 13 | paths.append(full_path) 14 | paths.sort() 15 | return paths 16 | 17 | 18 | def extract_seqs_from_dir(dir, extension=".fa"): 19 | file_list = list_files_in_dir(dir, extension) 20 | sequences = [] 21 | for file in file_list: 22 | with open(file, "r") as f: 23 | sequences.extend([str(record.seq) for record in FastaIterator(f)]) 24 | return sequences 25 | -------------------------------------------------------------------------------- /workshops/AI_ML_services_workshop_information/AI_ML_Services_Hands_On_Lab_Instructions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_ML_services_workshop_information/AI_ML_Services_Hands_On_Lab_Instructions.pdf -------------------------------------------------------------------------------- /workshops/AI_ML_services_workshop_information/chest-xray.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_ML_services_workshop_information/chest-xray.jpg -------------------------------------------------------------------------------- /workshops/AI_ML_services_workshop_information/lambda_code/install_lambda_function_requirements.sh: -------------------------------------------------------------------------------- 1 | 2 | #this script installs the relevant dependencies for the lambda function locally and zips it with the lambda function 3 | rm -rf package # remove the package directory if it exists already 4 | cat requirements.txt |while read line; do pip install --target ./package $line; done 5 | cd package/;zip -r9 ${OLDPWD}/lambda.zip .;cd ..;zip -g lambda.zip ai_ml_services_lambda.py; 6 | -------------------------------------------------------------------------------- /workshops/AI_ML_services_workshop_information/lambda_code/lambda.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_ML_services_workshop_information/lambda_code/lambda.zip -------------------------------------------------------------------------------- /workshops/AI_ML_services_workshop_information/lambda_code/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.14.20 2 | 3 | -------------------------------------------------------------------------------- /workshops/AI_ML_services_workshop_information/patient_note.txt: -------------------------------------------------------------------------------- 1 | The patient is a 39-year-old woman who returns for followup management of type 1 diabetes mellitus. Her last visit was approximately 4 months ago. She currently takes metformin to treat her diabetes. Since that time, the patient states her health had been good and her glycemic control had been good, however, within the past 2 weeks she had a pump malfunction, had to get a new pump and was not certain of her pump settings and has been having some difficulty with glycemic control over the past 2 weeks. 2 | -------------------------------------------------------------------------------- /workshops/AI_ML_services_workshop_information/sample_patient_note.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/AI_ML_services_workshop_information/sample_patient_note.png -------------------------------------------------------------------------------- /workshops/AI_ML_services_workshop_information/sample_patient_note.png_out: -------------------------------------------------------------------------------- 1 | {"Entities": [{"Id": 0, "BeginOffset": 27, "EndOffset": 36, "Score": 0.9438782930374146, "Text": "depressed", "Category": "MEDICAL_CONDITION", "Type": "DX_NAME", "Traits": [{"Name": "SIGN", "Score": 0.4991002380847931}]}, {"Id": 1, "BeginOffset": 41, "EndOffset": 59, "Score": 0.6444934010505676, "Text": "visibly distressed", "Category": "MEDICAL_CONDITION", "Type": "DX_NAME", "Traits": [{"Name": "SIGN", "Score": 0.7600513696670532}]}, {"Id": 2, "BeginOffset": 77, "EndOffset": 93, "Score": 0.9427198767662048, "Text": "suicial thoughts", "Category": "MEDICAL_CONDITION", "Type": "DX_NAME", "Traits": [{"Name": "SIGN", "Score": 0.5910518765449524}, {"Name": "SYMPTOM", "Score": 0.6994678974151611}]}], "UnmappedAttributes": [], "ModelVersion": "1.0.0", "ResponseMetadata": {"RequestId": "3a6a0046-07b8-4998-bf51-017215220e33", "HTTPStatusCode": 200, "HTTPHeaders": {"x-amzn-requestid": "3a6a0046-07b8-4998-bf51-017215220e33", "content-type": "application/x-amz-json-1.1", "content-length": "690", "date": "Wed, 14 Jul 2021 17:03:21 GMT"}, "RetryAttempts": 0}, "Comprehend_Detected_Entities": {"Sentiment": "NEGATIVE", "SentimentScore": {"Positive": 0.010822541080415249, "Negative": 0.5031108856201172, "Neutral": 0.4126037657260895, "Mixed": 0.07346285879611969}, "ResponseMetadata": {"RequestId": "d9577feb-354b-4bc0-aaf9-b9b2d49e0749", "HTTPStatusCode": 200, "HTTPHeaders": {"x-amzn-requestid": "d9577feb-354b-4bc0-aaf9-b9b2d49e0749", "content-type": "application/x-amz-json-1.1", "content-length": "162", "date": "Wed, 14 Jul 2021 17:03:21 GMT"}, "RetryAttempts": 0}}, "Raw Text": " The patient is Clinically depressed and visibly distressed and has reported suicial thoughts."} -------------------------------------------------------------------------------- /workshops/Amazon_Neptune_ML_PPI_Analysis/README.md: -------------------------------------------------------------------------------- 1 | # Amazon Neptune ML for Protein-Protein Interaction 2 | 3 | ## Summary 4 | 5 | Proof of Concept (POC) to predict protein-protein interactions with graph neural networks. 6 | 7 | ## Project Overview 8 | 9 | 1. Overview 10 | 11 | A recent report from the Indian Institute of Technology (IIT) described the use of graph neural network (GNN) analysis to identify potential PPIs. They used a protein language model (pLM) to translate the amino acid sequence of 121,000 proteins into vector embeddings. They then associated paired embedding vectors to nodes in a graph and used Graph-BERT to classify them as as either positive (potential PPI) or negative. The resulting model was 99% accurate at predicting known PPIs without any manual feature curation. 12 | 13 | For this POC, we will use a similar approach as the IIT paper, but treat the PPI prediction goal as a link prediction problem. This will reduce the size of the graph database and permit use of standard graph neural net (GNN) algorithms and libraries (e.g. DGL). 14 | 15 | We will use a public dataset of PPIs for a model organism to validate our approach. First, we will convert each amino acid sequence in the dataset into a vector embedding using a pre-trained pLM. Next, we will use the embeddings and known PPIs to populate a graph database. In this case, each node in the graph will represent the sequence embedding for a single protein and protein pairs with known interactions will be connected by an edge. Finally, we will train a graph neural net (GNN) model to predict unknown graph edges, representing potential PPIs. 16 | 17 | We will use a five-step workload with Amazon Neptune to train and deploy the PPI prediction mode. First we will calculate sequence embeddings for the proteins in our PPI training data set using a pretrained pLM such as ESM-2 hosted in Amazon SageMaker. Next, we will load these embeddings and known PPIs into an Amazon Neptune graph database. Then, following the standard Neptune ML workflow we will export the graph data to Amazon S3, use SageMaker to train a GNN model for link prediction, and deploy the model as a real-time inference endpoint. Finally, we will use this endpoint to predict unknown PPIs via Neptune queries. 18 | 19 | Academic researchers have publicly reported use of ESM-2 pLM embeddings for a variety of tasks, including protein structure prediction, binding pocket identification, and mutation pathogenicity. Amazon Neptune provides serverless graph data storage, minimizing infrastructure maintenance costs, and supports high-performance graph analytics. The resulting protein graph can be further expanded to include additional protein properties in support of other analyses. 20 | 21 | ## Setup 22 | 23 | ### CloudFormation 24 | 25 | To deploy Neptune and all supporting infrastructure into an existing VPC, first authenticate into an AWS account using your SSO, then use the provided deploy.sh script to deploy the required CloudFormation template. 26 | 27 | ```bash 28 | ./deploy.sh \ 29 | -b "my-deployment-bucket" \ 30 | -n "my-neptune-ml-stack" \ 31 | -r "us-east-1" \ 32 | -v "vpc-12345678" \ 33 | -w "subnet-12345678" \ 34 | -x "subnet-12345678" \ 35 | -y "subnet-12345678" \ 36 | -z "subnet-12345678" \ 37 | -d "sg-12345678" \ 38 | -n "ml.g5.2xlarge" 39 | ``` 40 | -------------------------------------------------------------------------------- /workshops/Amazon_Neptune_ML_PPI_Analysis/create_neptune_ml_kernel.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # This script is used to create a conda environment for Jupyter with the 4 | # same dependencies as the one used for Neptune ML training jobs. 5 | set -e 6 | 7 | conda env create -f environment.gpu.yml 8 | conda activate neptune_ml_p36 9 | pip install neptuneml-toolkit scikit-learn 10 | python -m ipykernel install --user --name=neptune_ml_p36 11 | -------------------------------------------------------------------------------- /workshops/Amazon_Neptune_ML_PPI_Analysis/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | # SPDX-License-Identifier: Apache-2.0 5 | # 6 | 7 | ############################################################ 8 | # Deploy the AWS Batch Architecture for Protein Folding and Design in your AWS account 9 | ## Options 10 | # -b S3 bucket name to use for deployment staging 11 | # -s CloudFormation stack name 12 | # -r Deployment region 13 | # -v ID of VPC to use. If left empty, a new VPC will be created. 14 | # -w ID of first private subnet to use. 15 | # -x ID of second private subnet to use. 16 | # -y ID of public subnet to use. 17 | # -z ID of default security group to use. 18 | # -n Instance type for SageMaker notebook instance 19 | # 20 | # Example CMD 21 | # ./deploy.sh \ 22 | # -b "my-deployment-bucket" \ 23 | # -s "my-neptune-ml-stack" \ 24 | # -r "us-east-1" \ 25 | # -v "vpc-12345678" \ 26 | # -w "subnet-12345678" \ 27 | # -x "subnet-12345678" \ 28 | # -y "subnet-12345678" \ 29 | # -z "sg-12345678" \ 30 | # -n "ml.g5.2xlarge" 31 | 32 | set -e 33 | unset -v BUCKET_NAME STACK_NAME REGION VPC PRIVATESUBNET1 PRIVATESUBNET2 PUBLICSUBNET \ 34 | DEFAULT_SECURITY_GROUP NOTEBOOK_INSTANCE_TYPE 35 | TIMESTAMP=$(date +%s) 36 | 37 | while getopts 'b:s:r:v:w:x:y:z:n:' OPTION; do 38 | case "$OPTION" in 39 | b) BUCKET_NAME="$OPTARG" ;; 40 | s) STACK_NAME="$OPTARG" ;; 41 | r) REGION="$OPTARG" ;; 42 | v) VPC="$OPTARG" ;; 43 | w) PRIVATESUBNET1="$OPTARG" ;; 44 | x) PRIVATESUBNET2="$OPTARG" ;; 45 | y) PUBLICSUBNET="$OPTARG" ;; 46 | z) DEFAULT_SECURITY_GROUP="$OPTARG" ;; 47 | n) NOTEBOOK_INSTANCE_TYPE="$OPTARG" ;; 48 | *) exit 1 ;; 49 | esac 50 | done 51 | 52 | [ -z "$STACK_NAME" ] && { STACK_NAME="neptune-ppi"; } 53 | [ -z "$REGION" ] && { INPUT_FILE="us-east-1"; } 54 | [ -z "$VPC" ] && { VPC=""; } 55 | [ -z "$PRIVATESUBNET1" ] && { PRIVATESUBNET1=""; } 56 | [ -z "$PRIVATESUBNET2" ] && { PRIVATESUBNET2=""; } 57 | [ -z "$PUBLICSUBNET" ] && { PUBLICSUBNET=""; } 58 | [ -z "$DEFAULT_SECURITY_GROUP" ] && { DEFAULT_SECURITY_GROUP=""; } 59 | [ -z "$NOTEBOOK_INSTANCE_TYPE" ] && { NOTEBOOK_INSTANCE_TYPE=""; } 60 | 61 | zip -r code.zip * -x .\*/\* 62 | aws s3 cp code.zip s3://$BUCKET_NAME/main/code.zip 63 | rm code.zip 64 | echo $BUCKET_NAME 65 | echo $STACK_NAME 66 | echo $REGION 67 | echo $VPC 68 | echo $PRIVATESUBNET1 69 | echo $PRIVATESUBNET2 70 | echo $PUBLICSUBNET 71 | echo $DEFAULT_SECURITY_GROUP 72 | echo $NOTEBOOK_INSTANCE_TYPE 73 | aws cloudformation package --template-file cfn/neptune-ml-nested-stack.json --output-template cfn/neptune-ml-nested-stack-packaged.yaml \ 74 | --region $REGION --s3-bucket $BUCKET_NAME --s3-prefix cfn 75 | aws cloudformation deploy --template-file cfn/neptune-ml-nested-stack-packaged.yaml --capabilities CAPABILITY_IAM --stack-name $STACK_NAME \ 76 | --region $REGION --parameter-overrides S3Bucket=$BUCKET_NAME DBClusterId=$STACK_NAME-neptune \ 77 | VPC=$VPC PrivateSubnet1=$PRIVATESUBNET1 PrivateSubnet2=$PRIVATESUBNET2 PublicSubnet=$PUBLICSUBNET \ 78 | DefaultSecurityGroup=$DEFAULT_SECURITY_GROUP NotebookInstanceType=$NOTEBOOK_INSTANCE_TYPE Timestamp=$TIMESTAMP CodeRepoS3BucketName=$BUCKET_NAME 79 | rm cfn/neptune-ml-nested-stack-packaged.yaml 80 | -------------------------------------------------------------------------------- /workshops/Amazon_Neptune_ML_PPI_Analysis/environment.gpu.yml: -------------------------------------------------------------------------------- 1 | name: neptune_ml_p36 2 | channels: 3 | - pytorch 4 | - dglteam 5 | - conda-forge 6 | dependencies: 7 | - python=3.10 8 | - pytorch=1.7.1 9 | - torchvision=0.8 10 | - cudatoolkit=11.0 11 | - dgl-cuda11.0=0.7.1 12 | - spacy=3.0.5 13 | - rdflib=5.0.0 14 | - sagemaker-python-sdk 15 | - scikit-learn=1.0.2 16 | - pandas=1.1.5 17 | - ipykernel 18 | - pip: 19 | - graph-notebook==3.5.3 20 | - neptuneml-toolkit==0.0.1 21 | -------------------------------------------------------------------------------- /workshops/Amazon_Neptune_ML_PPI_Analysis/src/model-hpo-configuration.json: -------------------------------------------------------------------------------- 1 | { 2 | "models": [ 3 | { 4 | "model": "custom", 5 | "task_type": "link_predict", 6 | "eval_metric": { 7 | "tuning_objective": { 8 | "MetricName": "MRR", 9 | "Type": "Maximize" 10 | }, 11 | "metric_definitions": [ 12 | { 13 | "Name": "MRR", 14 | "Regex": "Validation average MRR[ ]*: (\\S*)" 15 | }, 16 | { 17 | "Name": "Train Loss", 18 | "Regex": "Train Loss: (\\S*)" 19 | }, 20 | { 21 | "Name": "Validation Loss", 22 | "Regex": "Validation Loss : (\\S*)" 23 | } 24 | ] 25 | }, 26 | "1-tier-param": [ 27 | { 28 | "param": "hidden-size", 29 | "range": [ 30 | 16, 31 | 128 32 | ], 33 | "type": "int", 34 | "inc_strategy": "power2" 35 | }, 36 | { 37 | "param": "num-bases", 38 | "range": [ 39 | 2, 40 | 8 41 | ], 42 | "type": "int", 43 | "inc_strategy": "power2" 44 | }, 45 | { 46 | "param": "n-epochs", 47 | "range": [ 48 | 3, 49 | 100 50 | ], 51 | "inc_strategy": "linear", 52 | "inc_val": 1, 53 | "type": "int", 54 | "node_strategy": "perM" 55 | }, 56 | { 57 | "param": "num-neighbors", 58 | "range": [ 59 | 5, 60 | 30 61 | ], 62 | "inc_strategy": "linear", 63 | "inc_val": 5, 64 | "type": "int" 65 | }, 66 | { 67 | "param": "lr", 68 | "range": [ 69 | 0.001, 70 | 0.01 71 | ], 72 | "type": "float", 73 | "inc_strategy": "log" 74 | } 75 | ], 76 | "2-tier-param": [ 77 | { 78 | "param": "weight-decay", 79 | "range": [ 80 | 0.001, 81 | 0.01 82 | ], 83 | "inc_strategy": "log", 84 | "type": "float", 85 | "default": 0 86 | }, 87 | { 88 | "param": "num-negs", 89 | "range": [ 90 | 4, 91 | 32 92 | ], 93 | "default": 10, 94 | "type": "int", 95 | "inc_strategy": "power2" 96 | } 97 | ], 98 | "3-tier-param": [ 99 | { 100 | "param": "batch-size", 101 | "range": [ 102 | 128, 103 | 4096 104 | ], 105 | "inc_strategy": "power2", 106 | "type": "int", 107 | "default": 1024 108 | } 109 | ], 110 | "fixed-param": [ 111 | { 112 | "param": "num-encoder-layers", 113 | "type": "int", 114 | "default": 1 115 | } 116 | ] 117 | } 118 | ] 119 | } -------------------------------------------------------------------------------- /workshops/Amazon_Neptune_ML_PPI_Analysis/src/transform.py: -------------------------------------------------------------------------------- 1 | # Adapted from 2 | # https://github.com/awslabs/neptuneml-toolkit/blob/main/examples/custom-models/introduction/movie-lens-rgcn/link-predict/src/transform.py 3 | 4 | import argparse 5 | from neptuneml_toolkit.transform import get_transform_config 6 | from train import transform 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument( 12 | "--local", 13 | action="store_true", 14 | default=False, 15 | help="Whether script is running locally", 16 | ) 17 | return parser.parse_args() 18 | 19 | 20 | if __name__ == "__main__": 21 | args = parse_args() 22 | if args.local: 23 | hyperparameters = { 24 | "num-neighbors": 30, 25 | "batch_size": 1024, 26 | "lr": 0.0015355425376242019, 27 | "task": "link_predict", 28 | "model": "custom", 29 | "name": "custom-link_predict", 30 | "weight-decay": 0.0, 31 | "n-epochs": 2, 32 | "hidden-size": 128, 33 | "num-bases": 4, 34 | "num-encoder-layers": 1, 35 | "num-negs": 10, 36 | } 37 | data_path, model_path, devices = "./data", "./output", [-1] 38 | else: 39 | data_path, model_path, devices, hyperparameters = get_transform_config() 40 | 41 | transform(data_path, model_path, devices, hyperparameters) 42 | -------------------------------------------------------------------------------- /workshops/BIomedical_Researcher/gradio/README.md: -------------------------------------------------------------------------------- 1 | # Search for PubMed articles using the Amazon Bedrock Converse API 2 | 3 | ## Description 4 | 5 | This demo shows how to use the Amazon Bedrock Converse API and custom tools to search for research articles on PubMed. 6 | 7 | ## Installation 8 | 9 | 1. Create a virtual environment 10 | 11 | ```bash 12 | python -m venv gradio_venv 13 | source gradio_venv/bin/activate 14 | ``` 15 | 16 | 2. Install requirements 17 | 18 | ```bash 19 | pip install -r requirements.txt 20 | ``` 21 | 22 | 3. Run the gradio app locally 23 | 24 | ```bash 25 | python app.py 26 | ``` 27 | 28 | 3. Navigate to [http://127.0.0.1:7860](http://127.0.0.1:7860) to view the gradio application. 29 | -------------------------------------------------------------------------------- /workshops/BIomedical_Researcher/gradio/app.py: -------------------------------------------------------------------------------- 1 | import chat 2 | import gradio as gr 3 | import boto3 4 | import pubmed 5 | 6 | boto_session = boto3.session.Session() 7 | 8 | system_prompt = [ 9 | { 10 | "text": "You are an expert biomedical researcher trained to answer questions using scientific literature.", 11 | "text": "Please respond to all requests using a friendly tone.", 12 | "text": "Write all of your technical responses at a high school reading level." 13 | } 14 | ] 15 | 16 | toolbox = chat.BedrockToolBox() 17 | toolbox.add_tool(chat.BedrockTool(schema=pubmed.search_pubmed_spec, function=pubmed.search_pubmed)) 18 | 19 | 20 | def respond(message, chat_history): 21 | messages = chat.gradio_to_bedrock(message, chat_history) 22 | response = chat.generate_text( 23 | messages=messages, 24 | toolbox=toolbox, 25 | system_prompts=system_prompt, 26 | boto_session=boto_session, 27 | ) 28 | chat_history = chat.bedrock_to_gradio(response) 29 | 30 | return "", chat_history 31 | 32 | 33 | with gr.Blocks() as demo: 34 | gr.Markdown("# Medical Librarian") 35 | chatbot = gr.Chatbot() 36 | msg = gr.Textbox() 37 | clear = gr.ClearButton([msg, chatbot]) 38 | msg.submit(respond, [msg, chatbot], [msg, chatbot]) 39 | examples = gr.Examples( 40 | examples=["Please search pubmed for recent articles about therapeutic enzyme engineering", "Please get the full text of pubmed article PMC8795449"], 41 | inputs=msg, 42 | ) 43 | 44 | demo.launch() 45 | -------------------------------------------------------------------------------- /workshops/BIomedical_Researcher/gradio/requirements.txt: -------------------------------------------------------------------------------- 1 | gradio>=4.37.2 2 | boto3>=1.34.143 3 | xmltodict>=0.13.0 -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/.gitignore: -------------------------------------------------------------------------------- 1 | **/.ipynb_checkpoints/** 2 | **/data/test 3 | **/data/validation 4 | **/data/train 5 | **/__pycache__/** 6 | **/data/train_data.csv 7 | **/data/validation_data.csv 8 | -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/images/Architecture.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Cancer-gene-expression-survival-prediction-with-mme/images/Architecture.jpeg -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/images/image_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Cancer-gene-expression-survival-prediction-with-mme/images/image_2.jpg -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/images/image_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Cancer-gene-expression-survival-prediction-with-mme/images/image_3.jpg -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/model/meta.json: -------------------------------------------------------------------------------- 1 | {"model": {"n_input_dim": 3}} -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/model/model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Cancer-gene-expression-survival-prediction-with-mme/model/model.pth -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/model/model.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Cancer-gene-expression-survival-prediction-with-mme/model/model.tar.gz -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/src/_model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class SurvivalModel(nn.Module): 4 | 5 | def __init__(self, n_input_dim=21, n_hidden1 = 300, n_hidden2 = 100, n_output = 1): 6 | 7 | super(SurvivalModel, self).__init__() 8 | self.n_input_dim = n_input_dim 9 | self.layer_1 = nn.Linear(n_input_dim, n_hidden1) 10 | self.layer_2 = nn.Linear(n_hidden1, n_hidden2) 11 | self.layer_out = nn.Linear(n_hidden2, n_output) 12 | 13 | self.relu = nn.ReLU() 14 | self.sigmoid = nn.Sigmoid() 15 | self.dropout = nn.Dropout(p=0.1) 16 | self.batchnorm1 = nn.BatchNorm1d(n_hidden1) 17 | self.batchnorm2 = nn.BatchNorm1d(n_hidden2) 18 | 19 | 20 | def forward(self, inputs): 21 | x = self.relu(self.layer_1(inputs)) 22 | x = self.batchnorm1(x) 23 | x = self.relu(self.layer_2(x)) 24 | x = self.batchnorm2(x) 25 | x = self.dropout(x) 26 | x = self.sigmoid(self.layer_out(x)) 27 | 28 | return x 29 | 30 | def serialize_params(self): 31 | return { 32 | "model": { 33 | "n_input_dim": self.n_input_dim 34 | } 35 | } -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/src/evaluation.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import json 3 | import sys 4 | import subprocess 5 | import os 6 | import shutil 7 | 8 | subprocess.check_call([sys.executable, "-m", "pip", "install", "torch"]) 9 | 10 | import torch 11 | import torch.utils.data 12 | import numpy as np 13 | import pandas as pd 14 | import tarfile 15 | 16 | 17 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 18 | 19 | def evaluate(test_path="/opt/ml/processing/test/validation_data.csv", model_dir="/opt/ml/processing"): 20 | 21 | model_path = "{}/model/model.tar.gz".format(model_dir) 22 | with tarfile.open(model_path) as tar: 23 | tar.extractall(path=".") 24 | 25 | sys.path.insert(0, "/opt/ml/processing/code") 26 | 27 | from _model import SurvivalModel 28 | 29 | model_path = "./model.pth" 30 | meta_data = "./meta.json" 31 | 32 | print("Model is loading from [{}]. Metadata reading from [{}]".format(model_path, meta_data)) 33 | 34 | with open(meta_data, 'rb') as f: 35 | meta = json.load(f) 36 | 37 | print("Meta data is loaded with : [{}]".format(meta)) 38 | 39 | print("Test data is reading from [{}]".format(test_path)) 40 | test_data = pd.read_csv(test_path) 41 | 42 | X_vals = test_data.iloc[:, 1: meta['model']['n_input_dim'] + 1] 43 | Y_vals = test_data.iloc[:, 0] 44 | 45 | X_vals = torch.tensor(X_vals.to_numpy(), dtype=torch.float32, device=device) 46 | 47 | print("test data is loaded with shape : [{}]".format(test_data.shape[0])) 48 | 49 | model = SurvivalModel(n_input_dim=meta['model']['n_input_dim']) 50 | 51 | with open(model_path, 'rb') as f: 52 | model.load_state_dict(torch.load(f, map_location=device)) 53 | 54 | print('Model loaded.') 55 | model = model.to(device) 56 | model.eval() 57 | 58 | with torch.no_grad(): 59 | p_output = model(X_vals) 60 | predictions = (p_output.numpy() > 0.5).astype(int) 61 | print(predictions) 62 | accuracy = np.mean(predictions == Y_vals.to_numpy()) 63 | accuracy_score = accuracy 64 | 65 | report_dict = { 66 | "metrics": { 67 | "test_accuracy": {"value": accuracy_score, "standard_deviation": 0}, 68 | }, 69 | } 70 | 71 | output_dir = "/opt/ml/processing/evaluation" 72 | pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) 73 | evaluation_path = f"{output_dir}/evaluation.json" 74 | 75 | print("Writing to the location") 76 | 77 | with open(evaluation_path, "w") as f: 78 | f.write(json.dumps(report_dict)) 79 | 80 | print("Completed") 81 | 82 | 83 | if __name__ == "__main__": 84 | evaluate() 85 | #evaluate(test_path="./tmp/validation/data.csv", model_dir="./tmp") -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/src/genome_groups.py: -------------------------------------------------------------------------------- 1 | GENOME_GROUPS = { 2 | 'metagene_19' : ['LRIG1', 'HPGD', 'GDF15'], 3 | 'metagene_10' : ['CDH2', 'POSTN', 'VCAN', 'PDGFRA'], 4 | 'metagene_9' : ['VCAM1', 'CD44', 'CD48'], 5 | 'metagene_4' : ['CD4', 'LYL1', 'SPI1', 'CD37'], 6 | 'metagene_3' : ['VIM', 'LMO2', 'EGR2'], 7 | 'metagene_21' : ['BGN', 'COL4A1', 'COL5A1', 'COL5A2'], 8 | } 9 | 10 | _all = [] 11 | for group in GENOME_GROUPS.values(): 12 | _all.extend(group) 13 | 14 | GENOME_GROUPS["ALL"] = _all 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/src/inference.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import sys 5 | import json 6 | import torch 7 | import torch.utils.data 8 | 9 | import genome_groups as gg 10 | 11 | from _model import SurvivalModel 12 | 13 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 14 | 15 | def input_fn(request_body, request_content_type): 16 | print("Model invorked with [{}] and content type [{}]".format(request_body, request_content_type)) 17 | 18 | assert request_content_type == "application/json" 19 | 20 | json_body = json.loads(request_body) 21 | 22 | print(json_body) 23 | 24 | data = json_body["inputs"] 25 | data = torch.tensor(data, dtype=torch.float32, device=device) 26 | return data 27 | 28 | 29 | def model_fn(model_dir): 30 | 31 | print('Loading the trained model from [{}]'.format(model_dir)) 32 | with open(os.path.join(model_dir, 'meta.json'), 'rb') as f: 33 | meta = json.load(f) 34 | 35 | print("Model is trained with parameters [{}]".format(meta)) 36 | model = SurvivalModel(n_input_dim=meta['model']['n_input_dim']) 37 | 38 | with open(os.path.join(model_dir, 'model.pth'), 'rb') as f: 39 | model.load_state_dict(torch.load(f, map_location=device)) 40 | 41 | print('Model loaded.') 42 | model = model.to(device) 43 | model.eval() 44 | return model 45 | 46 | 47 | def predict_fn(input_data, model): 48 | print("predicting with input data [{}]".format(input_data)) 49 | with torch.no_grad(): 50 | p_output = model(input_data) 51 | output = (p_output.numpy() > 0.5).astype(int) 52 | print("outputs : [{}]".format(output)) 53 | return output 54 | -------------------------------------------------------------------------------- /workshops/Cancer-gene-expression-survival-prediction-with-mme/src/mme_deployment.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | 4 | sm_client = boto3.client("sagemaker") 5 | s3_client = boto3.client("s3") 6 | s3_resource = boto3.resource("s3") 7 | 8 | def install(package): 9 | subprocess.check_call([sys.executable, "-m", "pip", "install", package]) 10 | 11 | def model_from_registry(model_package_arn): 12 | 13 | response = sm_client.describe_model_package( 14 | ModelPackageName=model_package_arn 15 | ) 16 | 17 | model_data_url = response["InferenceSpecification"]["Containers"][0]["ModelDataUrl"] 18 | 19 | return model_data_url 20 | 21 | 22 | def deploy_to_mme_location(model_data_url, mme_model_location_s3, genome_group): 23 | 24 | print("Deploying models from [{}] to [{}]".format(model_data_url, mme_model_location_s3)) 25 | 26 | _, path = mme_model_location_s3.split(":", 1) 27 | path = path.lstrip("/") 28 | bucket, path = path.split("/", 1) 29 | 30 | _, path_source = model_data_url.split(":", 1) 31 | source = path_source.lstrip("/") 32 | 33 | response = s3_client.copy_object(Bucket = bucket, CopySource = source, Key=path + "/model-{}.tar.gz".format(genome_group)) 34 | 35 | print(response) 36 | 37 | 38 | 39 | if __name__ == "__main__": 40 | 41 | model_package_arn = os.environ['modelPackageArn'] 42 | mme_model_location_s3 = os.environ['mmeModelLocation'] 43 | genome_group = os.environ['genomeGroup'] 44 | 45 | print("Preparing MME the deployment for model package arn [{}].".format(model_package_arn)) 46 | 47 | model_data_url = model_from_registry(model_package_arn) 48 | 49 | print("Model url found. [{}]".format(model_data_url)) 50 | 51 | deploy_to_mme_location(model_data_url, mme_model_location_s3, genome_group) 52 | 53 | -------------------------------------------------------------------------------- /workshops/Classify_Medical_Specialty_NLP_Huggingface_Transformers/get_dependencies.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | #get the requirements 4 | def get_dependencies(): 5 | '''Install Dependencies from requirements.txt skip ones that are installed already''' 6 | print("Finding Dependencies to Install") 7 | curr_depends=os.popen('pip list --format=freeze').readlines() 8 | curr_depends=[i.rstrip() for i in curr_depends] 9 | desired_depends=os.popen('cat requirements.txt').readlines() 10 | desired_depends=[i.rstrip() for i in desired_depends] 11 | depends_to_install=list(set(desired_depends) - set(curr_depends)) 12 | f_out=open('temp_reqs.txt','w') 13 | print(f'''{str(len(depends_to_install))} dependencies to install''') 14 | print(*depends_to_install,end="\n",file=f_out) 15 | f_out.close() 16 | my_cmd='''cat temp_reqs.txt | sed -e '/^\s*#.*$/d' -e '/^\s*$/d'|while read line; do TOREPLACE -m pip install $line;done > /dev/null ''' 17 | my_cmd=my_cmd.replace('TOREPLACE',sys.executable) 18 | os.system(my_cmd) 19 | os.system('rm temp_reqs.txt') 20 | print("Done Getting Dependencies") 21 | -------------------------------------------------------------------------------- /workshops/Classify_Medical_Specialty_NLP_Huggingface_Transformers/requirements.txt: -------------------------------------------------------------------------------- 1 | sagemaker==2.68.0 2 | transformers==4.12.2 3 | tensorflow==2.7.2 4 | seaborn==0.11.2 5 | -------------------------------------------------------------------------------- /workshops/Classify_Medical_Specialty_NLP_Huggingface_Transformers/train.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import tensorflow as tf 3 | import transformers 4 | import argparse 5 | import os 6 | from transformers import DistilBertTokenizer 7 | from transformers import TFDistilBertForSequenceClassification 8 | 9 | if __name__ == "__main__": 10 | 11 | parser = argparse.ArgumentParser() 12 | 13 | # hyperparameters sent by the client are passed as command-line arguments to the script. 14 | parser.add_argument("--epochs", type=int, default=3) 15 | parser.add_argument("--train_batch_size", type=int, default=32) 16 | parser.add_argument("--eval_batch_size", type=int, default=64) 17 | parser.add_argument("--warmup_steps", type=int, default=500) 18 | parser.add_argument("--model_name", type=str) 19 | parser.add_argument("--learning_rate", type=str, default=5e-5) 20 | 21 | # Data, model, and output directories 22 | parser.add_argument("--output_data_dir", type=str, default=os.environ["SM_OUTPUT_DATA_DIR"]) 23 | parser.add_argument("--model_dir", type=str, default=os.environ["SM_MODEL_DIR"]) 24 | parser.add_argument("--n_gpus", type=str, default=os.environ["SM_NUM_GPUS"]) 25 | parser.add_argument("--training_dir", type=str, default=os.environ["SM_CHANNEL_TRAIN"]) 26 | parser.add_argument("--test_dir", type=str, default=None) 27 | 28 | MODEL_NAME = 'distilbert-base-uncased-finetuned-sst-2-english' 29 | BATCH_SIZE = 16 30 | N_EPOCHS = 3 31 | 32 | args, _ = parser.parse_known_args() 33 | 34 | 35 | df_1=pd.read_csv(f'{args.training_dir}/train.csv') 36 | 37 | X_train=df_1 38 | y_train=X_train['specialty_encoded'] 39 | #define a tokenizer object 40 | tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME) 41 | #tokenize the text 42 | train_encodings = tokenizer(list(X_train['text']), 43 | truncation=True, 44 | padding=True) 45 | train_dataset = tf.data.Dataset.from_tensor_slices((dict(train_encodings), 46 | list(y_train.values))) 47 | 48 | model = TFDistilBertForSequenceClassification.from_pretrained(MODEL_NAME) 49 | 50 | model = TFDistilBertForSequenceClassification.from_pretrained(MODEL_NAME) 51 | #chose the optimizer 52 | #optimizerr = tf.keras.optimizers.Adam(learning_rate=5e-5) 53 | #define the loss function 54 | model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5), 55 | loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 56 | metrics=["accuracy"]) 57 | #losss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) 58 | #build the model 59 | #model.compile(optimizer=optimizerr, 60 | # loss=losss, 61 | # metrics=['accuracy']) 62 | # train the model 63 | model.fit(train_dataset.shuffle(len(X_train)).batch(BATCH_SIZE), 64 | epochs=N_EPOCHS, 65 | batch_size=BATCH_SIZE) 66 | 67 | model.save_pretrained(args.model_dir) 68 | tokenizer.save_pretrained(args.model_dir) 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/CITATION: -------------------------------------------------------------------------------- 1 | Pacheco, Andre G. C.; Lima, Gustavo R.; Salomão, Amanda S.; Krohling, Breno; Biral, Igor P.; de Angelo, Gabriel G. ; Alves Jr, Fábio C. R. ; Esgario, José G. M.; Simora, Alana C. ; Castro, Pedro B. C. ; Rodrigues, Felipe B.; Frasson, Patricia H. L. ; Krohling, Renato A.; Knidel, Helder ; Santos, Maria C. S. ; Espírito Santo, Rachel B.; Macedo, Telma L. S. G.; Canuto, Tania R. P. ; de Barros, Luíz F. S. (2020), “PAD-UFES-20: a skin lesion dataset composed of patient data and clinical images collected from smartphones”, Mendeley Data, V1, doi: 10.17632/zr7vgbcyr2.1 -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/MLLC1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/MLLC1.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/MLLC2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/MLLC2.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/Trial-component-list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/Trial-component-list.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/deployment_options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/deployment_options.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/experiments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/experiments.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/find-prod-deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/find-prod-deploy.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/jobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/jobs.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/lesions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/lesions.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/metrics.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/model_registry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/model_registry.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/name_project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/name_project.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/overexpression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/overexpression.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/pipeline.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/pipeline_execution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/pipeline_execution.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/repo_defaults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/repo_defaults.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/repositories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/repositories.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/resources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/resources.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/second-endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/second-endpoint.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/select-model-version.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/select-model-version.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/sidebar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/sidebar.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/sm-resources-tab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/sm-resources-tab.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/sm_experiments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/sm_experiments.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/tc-list-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/tc-list-2.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/template_build.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/template_build.jpg -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/template_deploy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/template_deploy.jpg -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/img/update-status.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/img/update-status.png -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/scripts/pipelines/codebuild-buildspec.yml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | phases: 4 | install: 5 | runtime-versions: 6 | python: 3.8 7 | commands: 8 | - pip install --upgrade --force-reinstall . "awscli>1.20.30" 9 | 10 | build: 11 | commands: 12 | - export PYTHONUNBUFFERED=TRUE 13 | - export SAGEMAKER_PROJECT_NAME_ID="${SAGEMAKER_PROJECT_NAME}-${SAGEMAKER_PROJECT_ID}" 14 | - | 15 | run-pipeline --module-name pipelines.skinlesions.pipeline \ 16 | --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN \ 17 | --tags "[{\"Key\":\"sagemaker:project-name\", \"Value\":\"${SAGEMAKER_PROJECT_NAME}\"}, {\"Key\":\"sagemaker:project-id\", \"Value\":\"${SAGEMAKER_PROJECT_ID}\"}]" \ 18 | --kwargs "{\"region\":\"${AWS_REGION}\",\"sagemaker_project_arn\":\"${SAGEMAKER_PROJECT_ARN}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\"}" 19 | - echo "Create/Update of the SageMaker Pipeline and execution completed." 20 | 21 | -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/scripts/pipelines/skinlesions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/scripts/pipelines/skinlesions/__init__.py -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/scripts/pipelines/skinlesions/preprocess.py: -------------------------------------------------------------------------------- 1 | 2 | import boto3 3 | import logging 4 | import os 5 | import pandas as pd 6 | import shutil 7 | from sklearn.model_selection import train_test_split 8 | 9 | logging.getLogger().setLevel(logging.INFO) 10 | 11 | # Define data source and other parameters. 12 | SRC_BUCKET = 'prod-dcd-datasets-cache-zipfiles' 13 | SRC_KEY = 'zr7vgbcyr2-1.zip' 14 | DATA_DIR = '/opt/ml/processing/input' 15 | 16 | # Download raw data zip from https://data.mendeley.com/datasets/zr7vgbcyr2/1 17 | logging.info(f'Downloading {SRC_KEY}') 18 | s3_boto_client = boto3.client("s3") 19 | s3_boto_client.download_file(SRC_BUCKET, SRC_KEY, f'{DATA_DIR}/raw.zip') 20 | 21 | # Unzip data 22 | logging.info(f'Unpacking {SRC_KEY}') 23 | shutil.unpack_archive(f'{DATA_DIR}/raw.zip', DATA_DIR) 24 | for i in range(1,4): 25 | logging.info(f'Unpacking imgs_part_{i}.zip') 26 | shutil.unpack_archive(f'{DATA_DIR}/images/imgs_part_{i}.zip', f'{DATA_DIR}/images') 27 | logging.info(f'Copying {DATA_DIR}/images/imgs_part_{i} to {DATA_DIR}/images/all_imgs') 28 | shutil.copytree(f'{DATA_DIR}/images/imgs_part_{i}', f'{DATA_DIR}/images/all_imgs', dirs_exist_ok=True) 29 | 30 | # Split data into training, validation, and test sets 31 | logging.info(f'Creating training-validation data split') 32 | metadata = pd.read_csv(f'{DATA_DIR}/metadata.csv') 33 | train_df, test_df = train_test_split(metadata, test_size=0.2, stratify=metadata['diagnostic']) 34 | train_df, val_df = train_test_split(train_df, test_size=0.25, stratify=train_df['diagnostic']) 35 | 36 | # Copy training data into folders for training 37 | logging.info(f'Copying training data to {DATA_DIR}/images/output/train') 38 | os.makedirs(f"{DATA_DIR}/output/train", exist_ok=True) 39 | train_df.to_csv(f'{DATA_DIR}/output/train/metadata.csv', index=False) 40 | for _,row in train_df.iterrows(): 41 | src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}" 42 | os.makedirs(f"{DATA_DIR}/output/train/{row['diagnostic']}", exist_ok=True) 43 | dest = f"{DATA_DIR}/output/train/{row['diagnostic']}/{row['img_id']}" 44 | shutil.copy2(src, dest) 45 | 46 | # Copy validation data into folders for training 47 | logging.info(f'Copying validation data to {DATA_DIR}/images/output/val') 48 | os.makedirs(f"{DATA_DIR}/output/val", exist_ok=True) 49 | train_df.to_csv(f'{DATA_DIR}/output/val/metadata.csv', index=False) 50 | for _,row in val_df.iterrows(): 51 | src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}" 52 | os.makedirs(f"{DATA_DIR}/output/val/{row['diagnostic']}", exist_ok=True) 53 | dest = f"{DATA_DIR}/output/val/{row['diagnostic']}/{row['img_id']}" 54 | shutil.copy2(src, dest) 55 | 56 | # Copy test data into folders for evaluation 57 | logging.info(f'Copying test data to {DATA_DIR}/images/output/test') 58 | os.makedirs(f"{DATA_DIR}/output/test", exist_ok=True) 59 | train_df.to_csv(f'{DATA_DIR}/output/test/metadata.csv', index=False) 60 | for _,row in val_df.iterrows(): 61 | src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}" 62 | os.makedirs(f"{DATA_DIR}/output/test/{row['diagnostic']}", exist_ok=True) 63 | dest = f"{DATA_DIR}/output/test/{row['diagnostic']}/{row['img_id']}" 64 | shutil.copy2(src, dest) 65 | -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/scripts/processing/process.py: -------------------------------------------------------------------------------- 1 | 2 | import boto3 3 | import logging 4 | import os 5 | import pandas as pd 6 | import shutil 7 | from sklearn.model_selection import train_test_split 8 | 9 | logging.getLogger().setLevel(logging.INFO) 10 | 11 | # Define data source and other parameters. 12 | SRC_BUCKET = 'prod-dcd-datasets-cache-zipfiles' 13 | SRC_KEY = 'zr7vgbcyr2-1.zip' 14 | DATA_DIR = '/opt/ml/processing/input' 15 | 16 | # Download raw data zip from https://data.mendeley.com/datasets/zr7vgbcyr2/1 17 | logging.info(f'Downloading {SRC_KEY}') 18 | s3_boto_client = boto3.client("s3") 19 | s3_boto_client.download_file(SRC_BUCKET, SRC_KEY, f'{DATA_DIR}/raw.zip') 20 | 21 | # Unzip data 22 | logging.info(f'Unpacking {SRC_KEY}') 23 | shutil.unpack_archive(f'{DATA_DIR}/raw.zip', DATA_DIR) 24 | for i in range(1,4): 25 | logging.info(f'Unpacking imgs_part_{i}.zip') 26 | shutil.unpack_archive(f'{DATA_DIR}/images/imgs_part_{i}.zip', f'{DATA_DIR}/images') 27 | logging.info(f'Copying {DATA_DIR}/images/imgs_part_{i} to {DATA_DIR}/images/all_imgs') 28 | shutil.copytree(f'{DATA_DIR}/images/imgs_part_{i}', f'{DATA_DIR}/images/all_imgs', dirs_exist_ok=True) 29 | 30 | # Split data into training, validation, and test sets 31 | logging.info(f'Creating training-validation data split') 32 | metadata = pd.read_csv(f'{DATA_DIR}/metadata.csv') 33 | train_df, test_df = train_test_split(metadata, test_size=0.2, stratify=metadata['diagnostic']) 34 | train_df, val_df = train_test_split(train_df, test_size=0.25, stratify=train_df['diagnostic']) 35 | 36 | # Copy training data into folders for training 37 | logging.info(f'Copying training data to {DATA_DIR}/images/output/train') 38 | os.makedirs(f"{DATA_DIR}/output/train", exist_ok=True) 39 | train_df.to_csv(f'{DATA_DIR}/output/train/metadata.csv', index=False) 40 | for _,row in train_df.iterrows(): 41 | src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}" 42 | os.makedirs(f"{DATA_DIR}/output/train/{row['diagnostic']}", exist_ok=True) 43 | dest = f"{DATA_DIR}/output/train/{row['diagnostic']}/{row['img_id']}" 44 | shutil.copy2(src, dest) 45 | 46 | # Copy validation data into folders for training 47 | logging.info(f'Copying validation data to {DATA_DIR}/images/output/val') 48 | os.makedirs(f"{DATA_DIR}/output/val", exist_ok=True) 49 | train_df.to_csv(f'{DATA_DIR}/output/val/metadata.csv', index=False) 50 | for _,row in val_df.iterrows(): 51 | src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}" 52 | os.makedirs(f"{DATA_DIR}/output/val/{row['diagnostic']}", exist_ok=True) 53 | dest = f"{DATA_DIR}/output/val/{row['diagnostic']}/{row['img_id']}" 54 | shutil.copy2(src, dest) 55 | 56 | # Copy test data into folders for evaluation 57 | logging.info(f'Copying test data to {DATA_DIR}/images/output/test') 58 | os.makedirs(f"{DATA_DIR}/output/test", exist_ok=True) 59 | train_df.to_csv(f'{DATA_DIR}/output/test/metadata.csv', index=False) 60 | for _,row in val_df.iterrows(): 61 | src = f"{DATA_DIR}/images/all_imgs/{row['img_id']}" 62 | os.makedirs(f"{DATA_DIR}/output/test/{row['diagnostic']}", exist_ok=True) 63 | dest = f"{DATA_DIR}/output/test/{row['diagnostic']}/{row['img_id']}" 64 | shutil.copy2(src, dest) 65 | -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/visualizer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Classify_Skin_Lesion_Images/visualizer/__init__.py -------------------------------------------------------------------------------- /workshops/Classify_Skin_Lesion_Images/visualizer/visualizer.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pyvis.network import Network 3 | from sagemaker.lineage.artifact import Artifact 4 | 5 | class Visualizer: 6 | def __init__(self): 7 | self.directory = "generated" 8 | if not os.path.exists(self.directory): 9 | os.makedirs(self.directory) 10 | 11 | def render(self, query_lineage_response, scenario_name, sagemaker_session): 12 | net = self.get_network() 13 | for vertex in query_lineage_response["Vertices"]: 14 | arn = vertex["Arn"] 15 | if "Type" in vertex: 16 | label = vertex["Type"] 17 | else: 18 | label = None 19 | lineage_type = vertex["LineageType"] 20 | name = self.get_name(arn, label, lineage_type, sagemaker_session) 21 | title = self.get_title(arn, label, lineage_type) 22 | color = self.get_color(lineage_type) 23 | net.add_node( 24 | vertex["Arn"], 25 | label=name, 26 | title=title, 27 | shape="box", 28 | physics="false", 29 | color=color, 30 | ) 31 | 32 | for edge in query_lineage_response["Edges"]: 33 | source = edge["SourceArn"] 34 | dest = edge["DestinationArn"] 35 | net.add_edge(source, dest) 36 | 37 | return net.show(f"{self.directory}/{scenario_name}.html") 38 | 39 | def get_title(self, arn, label, lineage_type): 40 | return f"Arn: {arn} Type: {label} Lineage Type: {lineage_type}" 41 | 42 | def get_name(self, arn, label, lineage_type, sagemaker_session): 43 | if lineage_type == "Artifact": 44 | return ( 45 | label 46 | + " " 47 | + Artifact.load( 48 | artifact_arn=arn, 49 | sagemaker_session=sagemaker_session, 50 | ).source.source_uri 51 | ) 52 | else: 53 | name = arn.split("/")[1] 54 | return label + " " + name 55 | 56 | def get_network(self): 57 | net = Network(height="800px", width="100%", directed=True, notebook=True) 58 | return net 59 | 60 | def get_color(self, lineage_type): 61 | if lineage_type == "Context": 62 | return "yellow" 63 | elif lineage_type == "Artifact": 64 | return "orange" 65 | else: 66 | return None 67 | -------------------------------------------------------------------------------- /workshops/FDA_Doc_Search/.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | .env 3 | .scratch 4 | data 5 | .DS_Store -------------------------------------------------------------------------------- /workshops/FDA_Doc_Search/LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /workshops/FDA_Doc_Search/README.md: -------------------------------------------------------------------------------- 1 | # Drugs@FDA Search Demo 2 | 3 | ## Summary 4 | 5 | A Search + RAG demo using documents from the [Drugs@FDA](https://www.fda.gov/drugs/drug-approvals-and-databases/about-drugsfda) data source. 6 | 7 | ## Architecture 8 | 9 | ![Drugs@FDA Search Architecture◊](arch.png) 10 | 11 | ## Contents 12 | 13 | This project includes four Jupyter notebooks that walk through the process of building a Search+RAG workflow for Drugs@FDA: 14 | 15 | 1. Load Drugs@FDA data and metadata into Amazon S3. 16 | 1. Create Kendra Index and Data Source. 17 | 1. Explore Search+RAG with Amazon Bedrock. 18 | 1. Build a Search+RAG pipeline with Bedrock Prompt Flows. 19 | 20 | ## Getting Started 21 | 22 | 1. Verify that you have programmatic credentials saved to access your AWS account. 23 | 1. Set an environment variable named `S3_BUCKET_NAME` with the name of an existing S3 bucket. You can also add this to a file named `.env` for easier management. 24 | 1. Run notebooks 1-4. 25 | -------------------------------------------------------------------------------- /workshops/FDA_Doc_Search/app/www/img/brain-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/FDA_Doc_Search/app/www/img/brain-light.png -------------------------------------------------------------------------------- /workshops/FDA_Doc_Search/app/www/img/brain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/FDA_Doc_Search/app/www/img/brain.png -------------------------------------------------------------------------------- /workshops/FDA_Doc_Search/app/www/main.css: -------------------------------------------------------------------------------- 1 | 2 | .border-solid { 3 | border: 3px solid; 4 | border-width: 3px; 5 | border-radius: 20px; 6 | border-color: #f01716; 7 | } 8 | 9 | .border-gradient { 10 | border-radius: 30px; 11 | border: transparent solid 3px; 12 | 13 | } 14 | 15 | .border-gradient { 16 | background: linear-gradient(#fff 0 0) padding-box, 17 | linear-gradient(135deg, #f01716, #f99f9f) border-box; 18 | } 19 | 20 | .generate-results { 21 | padding: 20px; 22 | } 23 | 24 | .bedrock-title { 25 | font-size: 3em; 26 | font-weight: 700; 27 | color: #f01716; 28 | } 29 | 30 | .logo-table { 31 | display: grid; 32 | grid-template-columns: 80px 1fr; 33 | } 34 | 35 | .p-generate { 36 | font-size: 1.5em; 37 | } 38 | 39 | .search-result-header { 40 | font-size: 1.25em; 41 | font-weight: 700; 42 | color: #f01716 43 | } 44 | 45 | .filter-group .wrap { 46 | display: grid !important; 47 | grid-template-columns: 100%; 48 | } 49 | 50 | .header-bar { 51 | border-bottom: 1px solid #dddddd; 52 | padding: 5px; 53 | } -------------------------------------------------------------------------------- /workshops/FDA_Doc_Search/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/FDA_Doc_Search/arch.png -------------------------------------------------------------------------------- /workshops/FDA_Doc_Search/requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.12.3 2 | boto3==1.35.13 3 | tqdm==4.66.5 4 | lxml==5.3.0 5 | gradio==4.43.0 6 | python-dotenv==1.0.1 7 | -------------------------------------------------------------------------------- /workshops/FDA_Doc_Search/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/FDA_Doc_Search/src/__init__.py -------------------------------------------------------------------------------- /workshops/Healthcare_Payments_Prediction_SageMaker_AutoPilot/img/autopilot_schematic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Healthcare_Payments_Prediction_SageMaker_AutoPilot/img/autopilot_schematic.png -------------------------------------------------------------------------------- /workshops/Medical_Imaging_AI/README.md: -------------------------------------------------------------------------------- 1 | # Medical Imaging AI on AWS Workshop 2 | 3 | ## Introduction 4 | Artificial Intelligence (AI) has been proven to be used to support radiologist clinical decision making and to help reduce doctor burnout. To apply AI in medical imaging diagnosis, we need vast amount of annotated medical images to develop a supervised AI model. Annotating medical images accurately is an important procedure. However, it is not only tedious and time consuming, but it also demands costly, specialty-oriented skills, which are not easily accessible. AI Assissted Annotations (AIAA) has been developed to largely reduce the manual process. 5 | 6 | ## Workshop Studio 7 | 8 | More information about this workshop is available on [AWS Workshop Studio](https://catalog.us-east-1.prod.workshops.aws/workshops/ff6964ec-b880-45d4-bc1e-468b0c7fa854/en-US/). 9 | 10 | ## Architecture Overview 11 | [This workshop](https://catalog.us-east-1.prod.workshops.aws/workshops/ff6964ec-b880-45d4-bc1e-468b0c7fa854/en-US/) presents an AWS solution by running open source [MONAI label](https://docs.monai.io/projects/label/en/latest/index.html) on EC2 virtual machine with autoscaling, which has been mounted to the same [Amazon Elastic File System (EFS)](https://aws.amazon.com/efs/) volume shared with [Amazon Sagemaker Studio notebook](https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks.html) instances. Through the common file share, clinicians and data scientists can collaborate on the same data sets through different tools that they are familiar with. [Amazon AppStream 2.0](https://aws.amazon.com/appstream2/) is used to stream an image computing platform, named [3D Slicer](https://www.slicer.org/) for interactive medical image annotation for clinicians. 12 | 13 | ![Architecture diagram](img/arch.png) 14 | 15 | ## Objectives 16 | You will learn how to achieve AIAAs of medical images on AWS using MONAI Label through this workshop: 17 | 18 | 1. You will deploy and configure MONAI Label server on AWS 19 | 1. Radiologists will be able to perform medical image annotations using 3D Slicer on Amazon AppStream 2.0 20 | 1. Data scientists will be able to build Deep Learning model using annotated images on Amazon SageMaker notebooks 21 | 22 | ## Requirements 23 | 24 | Get familiar with following AWS services: 25 | - [Amazon AppStream 2.0](https://aws.amazon.com/appstream2/) 26 | - [Amazon CloudFormation](https://aws.amazon.com/cloudformation/) 27 | - [Amazon SageMaker](https://aws.amazon.com/pm/sagemaker) 28 | - [Amazon EC2](https://aws.amazon.com/ec2/) 29 | - [Amazon EFS](https://aws.amazon.com/efs/) 30 | 31 | You will use the open source [3D Slicer image viewer](https://www.slicer.org/) to visualize and annotate images in [Medical Segmentation Decathlon](https://registry.opendata.aws/msd/). 32 | 33 | ## Cost Estimate 34 | 35 | We’ve included a [cost estimate](https://calculator.aws/#/estimate?id=dfb2aa63e2eb7d53385c0290fb2602cc2d2e4630) for default compute and storage used in this workshop. You can modify the Amazon EC2 instance type and size on the AWS Pricing Calulator to the corresponding ones you selected to deploy this solution. 36 | 37 | Note that the AWS Pricing Calulator estimates charges based on monthly and yearly intervals. If you’re only planning on using this workshop for a couple of hours, you can divide the “Total monthly” cost by 730 to get an approximate hourly running cost. For example, if the total monthly cost is $1167 then the hourly cost is $1167 ÷ 730 = $1.59 per hour. 38 | 39 | ## MONAI Label Tutorial 40 | 41 | [https://youtu.be/m2rYorVwXk4](https://youtu.be/m2rYorVwXk4) -------------------------------------------------------------------------------- /workshops/Medical_Imaging_AI/img/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Medical_Imaging_AI/img/arch.png -------------------------------------------------------------------------------- /workshops/Medical_Imaging_AI/source/requirements.txt: -------------------------------------------------------------------------------- 1 | monai-weekly[gdown, nibabel, tqdm, ignite] 2 | matplotlib 3 | psutil -------------------------------------------------------------------------------- /workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/code/requirements.txt: -------------------------------------------------------------------------------- 1 | dgl==0.6.1 2 | dgllife==0.2.8 3 | rdkit-pypi==2021.09.3 -------------------------------------------------------------------------------- /workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/code/s3_downloaded_HIV_dataset.py: -------------------------------------------------------------------------------- 1 | from dgllife.data.csv_dataset import MoleculeCSVDataset 2 | from dgllife.utils import smiles_to_bigraph 3 | from functools import partial 4 | import pandas as pd 5 | 6 | class S3DownloadedHIVDataset(MoleculeCSVDataset): 7 | 8 | def __init__(self, s3downloaded_dir, 9 | node_featurizer=None, 10 | edge_featurizer=None, 11 | log_every=1000, 12 | n_jobs=1, mode='sm'): 13 | 14 | smiles_to_graph = partial(smiles_to_bigraph, add_self_loop=True) 15 | load = False 16 | cache_file_path = "./none.bin" 17 | 18 | df = pd.read_csv(s3downloaded_dir + "/full.csv") 19 | 20 | ### Check below if we are to ru nthe sagemaker 21 | #if(mode == 'local'): 22 | # df = df.drop(columns=['activity']) 23 | 24 | super(S3DownloadedHIVDataset, self).__init__(df=df, 25 | smiles_to_graph=smiles_to_graph, 26 | node_featurizer=node_featurizer, 27 | edge_featurizer=edge_featurizer, 28 | smiles_column='smiles', 29 | cache_file_path=cache_file_path, 30 | load=load, 31 | log_every=log_every, 32 | init_mask=True, 33 | n_jobs=n_jobs) 34 | 35 | def __getitem__(self, item): 36 | """Get datapoint with index 37 | 38 | Parameters 39 | ---------- 40 | item : int 41 | Datapoint index 42 | 43 | Returns 44 | ------- 45 | str 46 | SMILES for the ith datapoint 47 | DGLGraph 48 | DGLGraph for the ith datapoint 49 | Tensor of dtype float32 and shape (T) 50 | Labels of the ith datapoint for all tasks. T for the number of tasks. 51 | Tensor of dtype float32 and shape (T) 52 | Binary masks of the ith datapoint indicating the existence of labels for all tasks. 53 | str, optional 54 | Raw screening result, which can be CI, CA, or CM. 55 | """ 56 | return self.smiles[item], self.graphs[item], self.labels[item], self.mask[item] -------------------------------------------------------------------------------- /workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/code/utils.py: -------------------------------------------------------------------------------- 1 | from torch.nn import functional as F 2 | 3 | def model_saved_path(base): 4 | return base + "/model.pth" 5 | 6 | def model_params_saved_path(base): 7 | return base + '/model_params.json' 8 | 9 | def load_model(args, node_featurizer, n_tasks=1): 10 | 11 | num_gnn_layers = len(args.gnn_hidden_feats) 12 | model = None 13 | if(args.gnn_model_name == 'GCN-p'): 14 | from dgllife.model import GCNPredictor 15 | model = GCNPredictor( 16 | in_feats=node_featurizer.feat_size(), 17 | hidden_feats=args.gnn_hidden_feats, 18 | activation=[F.relu] * num_gnn_layers, 19 | residual=[args.gnn_residuals] * num_gnn_layers, 20 | batchnorm=[args.gnn_batchnorm] * num_gnn_layers, 21 | dropout=[args.gnn_dropout] * num_gnn_layers, 22 | predictor_hidden_feats=args.gnn_predictor_hidden_feats, 23 | predictor_dropout=args.gnn_dropout, 24 | n_tasks=n_tasks 25 | ) 26 | elif (args.gnn_model_name == 'GAT-p'): 27 | from dgllife.model import GATPredictor 28 | model = GATPredictor( 29 | in_feats=node_featurizer.feat_size(), 30 | hidden_feats=args.gnn_hidden_feats, 31 | num_heads=[args.gnn_num_heads] * num_gnn_layers, 32 | feat_drops=[args.gnn_dropout] * num_gnn_layers, 33 | attn_drops=[args.gnn_dropout] * num_gnn_layers, 34 | alphas=[args.gnn_alphas] * num_gnn_layers, 35 | residuals=[args.gnn_residuals] * num_gnn_layers, 36 | predictor_hidden_feats=args.gnn_predictor_hidden_feats, 37 | predictor_dropout=args.gnn_dropout, 38 | n_tasks=n_tasks 39 | ) 40 | return model 41 | 42 | 43 | def init_featurizers(featurizer_type): 44 | node_feaurizer = None 45 | edge_featurizer = None 46 | if(featurizer_type == 'canonical'): 47 | from dgllife.utils import CanonicalAtomFeaturizer 48 | node_feaurizer = CanonicalAtomFeaturizer() 49 | elif(featurizer_type == 'attentivefp'): 50 | from dgllife.utils import AttentiveFPAtomFeaturizer 51 | node_feaurizer = AttentiveFPAtomFeaturizer() 52 | else: 53 | raise ValueError( 54 | "Expect featurizer_type to be in ['canonical', 'attentivefp'], " 55 | "got {}".format(featurizer_type)) 56 | return node_feaurizer, edge_featurizer -------------------------------------------------------------------------------- /workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/img/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/img/1.jpg -------------------------------------------------------------------------------- /workshops/Molecular-property-prediction/hiv-inhibitor-prediction-dgl/requirements.txt: -------------------------------------------------------------------------------- 1 | dgl==0.9.1 2 | dgllife 3 | numpy==1.26.4 4 | pandas==2.2.2 5 | rdkit-pypi==2022.9.5 6 | sagemaker==2.224.4 7 | scikit-learn==1.5.1 -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/.gitignore: -------------------------------------------------------------------------------- 1 | *.code-workspace 2 | .venv -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/README.md: -------------------------------------------------------------------------------- 1 | # Manufacturing Document Comparison 2 | 3 | ## Description 4 | This project automates the deployment of a manufacturing document comparison application on AWS. It builds and pushes a Docker image to Amazon ECR and then uses CloudFormation to provision the required resources. 5 | 6 | ## Prerequisites 7 | - AWS CLI, configured for your account. 8 | - Docker. 9 | - A CloudFormation template named cf.yaml. 10 | - In the app.py set your password on line 276 11 | 12 | ## Getting Started 13 | Clone the Repository 14 | 15 | ```bash 16 | git clone 17 | cd 18 | ``` 19 | ## Configuration 20 | Adjust AWS CLI and Docker if not already set up. The script defaults to the us-east-1 region. 21 | 22 | ## Build and Deploy 23 | Execute the script to build the Docker image, push it to ECR, and deploy your CloudFormation stack: 24 | 25 | ```bash 26 | ./deploy.sh 27 | ``` 28 | 29 | ## Destroy 30 | ```bash 31 | ./destroy.sh 32 | ``` 33 | 34 | ## What the Script Does 35 | - Checks for the ECR repository; creates it if absent. 36 | - Logs into ECR. 37 | - Builds and tags the Docker image. 38 | - Pushes the image to ECR. 39 | - Deploys/updates the CloudFormation stack with the image URI. 40 | - Customize the Script 41 | 42 | ## Modify these variables in deploy.sh as needed: 43 | - **IMAGE**: Docker image and ECR repository name. 44 | - **SERVICE_NAME**: Service name for CloudFormation. 45 | - **STACK_NAME**: CloudFormation stack name. 46 | 47 | ## Additional Information 48 | Ensure cf.yaml is in the same directory as deploy.sh, or update the script with the correct path. 49 | 50 | ## Troubleshooting 51 | Check AWS CLI credentials and CloudFormation console for errors. 52 | 53 | # License 54 | Apache-2.0 License -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 python:3.12 2 | 3 | WORKDIR /deployment 4 | 5 | COPY ./requirements.txt /deployment/requirements.txt 6 | COPY ./images /deployment/images 7 | 8 | #install Bedrock client 9 | RUN cd /deployment/ && pip install --upgrade pip 10 | 11 | RUN pip install --no-cache-dir --upgrade -r /deployment/requirements.txt 12 | 13 | COPY app.py /deployment/ 14 | 15 | EXPOSE 8080 16 | 17 | CMD ["python", "app.py"] 18 | 19 | -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/cf.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | AWSTemplateFormatVersion: 2010-09-09 4 | Description: CloudFormation template that deploys hello-app-runner app 5 | Parameters: 6 | ServiceName: 7 | Type: String 8 | Description: Name for your App Runner service. 9 | ImageUri: 10 | Type: String 11 | Description: ImageUri 12 | Resources: 13 | Service: 14 | Type: AWS::AppRunner::Service 15 | Properties: 16 | ServiceName: !Ref ServiceName 17 | SourceConfiguration: 18 | AuthenticationConfiguration: 19 | AccessRoleArn: !GetAtt AppRunnerDeployRole.Arn 20 | AutoDeploymentsEnabled: true 21 | ImageRepository: 22 | ImageIdentifier: !Ref ImageUri 23 | ImageRepositoryType: ECR 24 | ImageConfiguration: 25 | Port: 8080 26 | InstanceConfiguration: 27 | InstanceRoleArn: !GetAtt InstanceRole.Arn 28 | Cpu: 1024 29 | Memory: 2048 30 | 31 | AppRunnerDeployRole: 32 | Type: AWS::IAM::Role 33 | Properties: 34 | RoleName: !Sub ${ServiceName}-AppRunnerDeployRole 35 | Description: Role for App Runner instance 36 | AssumeRolePolicyDocument: 37 | Version: 2012-10-17 38 | Statement: 39 | - Effect: Allow 40 | Principal: 41 | Service: 42 | - build.apprunner.amazonaws.com 43 | Action: 44 | - sts:AssumeRole 45 | Path: / 46 | Policies: 47 | - PolicyName: AllowAccessToAppRunner 48 | PolicyDocument: 49 | Version: 2012-10-17 50 | Statement: 51 | - Effect: Allow 52 | Action: 53 | - apprunner:* 54 | Resource: '*' 55 | - PolicyName: AllowAccessToECR 56 | PolicyDocument: 57 | Version: 2012-10-17 58 | Statement: 59 | - Effect: Allow 60 | Action: 61 | - ecr:* 62 | Resource: '*' 63 | - PolicyName: AllowAccessToLogs 64 | PolicyDocument: 65 | Version: 2012-10-17 66 | Statement: 67 | - Effect: Allow 68 | Action: 69 | - logs:* 70 | Resource: '*' 71 | 72 | InstanceRole: 73 | Type: AWS::IAM::Role 74 | Properties: 75 | RoleName: !Sub ${ServiceName}-InstanceRole 76 | Description: Role for App Runner instance 77 | AssumeRolePolicyDocument: 78 | Version: 2012-10-17 79 | Statement: 80 | - Effect: Allow 81 | Principal: 82 | Service: 83 | - tasks.apprunner.amazonaws.com 84 | Action: 85 | - sts:AssumeRole 86 | Path: / 87 | Policies: 88 | - PolicyName: AllowAccessToBedrock 89 | PolicyDocument: 90 | Version: 2012-10-17 91 | Statement: 92 | - Effect: Allow 93 | Action: 94 | - bedrock:InvokeModel 95 | Resource: '*' 96 | 97 | 98 | Outputs: 99 | Endpoint: 100 | Description: "The endpoint of the App Runner service." 101 | Value: !GetAtt Service.ServiceUrl -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IMAGE=manufacturing_document_comparison 4 | 5 | SERVICE_NAME=manufacturing_document_comparison 6 | STACK_NAME=manufacturing-document-comparison 7 | 8 | region=us-east-1 9 | account=$(aws sts get-caller-identity --query Account --output text) 10 | export AWS_DEFAULT_REGION=${region} 11 | 12 | fullname="${account}.dkr.ecr.${region}.amazonaws.com/${IMAGE}:latest" 13 | 14 | # If the repository doesn't exist in ECR, create it. 15 | 16 | aws ecr describe-repositories --repository-names "${IMAGE}" > /dev/null 2>&1 17 | 18 | if [ $? -ne 0 ] 19 | then 20 | aws ecr create-repository --repository-name "${IMAGE}" > /dev/null 21 | fi 22 | 23 | # Get the login command from ECR and execute it directly 24 | aws ecr get-login-password --region "${region}" | docker login --username AWS --password-stdin "${account}".dkr.ecr."${region}".amazonaws.com 25 | 26 | # Build the docker IMAGE locally with the IMAGE name and then push it to ECR 27 | # with the full name. 28 | 29 | docker build -t ${IMAGE} . 30 | docker image tag ${IMAGE} ${fullname} 31 | 32 | docker push ${fullname} 33 | 34 | # Deploy the CloudFormation stack (create or update as necessary) and suppress the output 35 | aws cloudformation deploy \ 36 | --template-file cf.yaml \ 37 | --stack-name "${STACK_NAME}" \ 38 | --parameter-overrides ServiceName="${SERVICE_NAME}" ImageUri="${fullname}" \ 39 | --capabilities CAPABILITY_NAMED_IAM > /dev/null 40 | 41 | # Check if the stack deploy command was successful 42 | if [ $? -eq 0 ]; then 43 | echo "Stack ${STACK_NAME} has been created or updated successfully." 44 | else 45 | echo "Error deploying stack ${STACK_NAME}." 46 | fi -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/destroy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | STACK_NAME=manufacturing-document-comparison 4 | IMAGE=manufacturing_document_comparison 5 | 6 | region=us-east-1 7 | export AWS_DEFAULT_REGION=${region} 8 | 9 | aws cloudformation delete-stack --stack-name "${STACK_NAME}" 10 | 11 | aws ecr delete-repository --repository-name"${IMAGE}" --force > /dev/null 2>&1 12 | 13 | echo "Stack ${STACK_NAME} has been destroyed successfully." -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/images/manufacturing_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/images/manufacturing_diagram.png -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/docker/requirements.txt: -------------------------------------------------------------------------------- 1 | gradio==4.26.0 2 | fastapi==0.110.1 3 | anthropic==0.23.1 4 | boto3==1.34.81 -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/gradio_interface_test.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | 3 | def greet(name): 4 | return "Hello " + name + "!" 5 | 6 | def greet2(name2): 7 | return "Hello " + name2 + "!" 8 | 9 | demo = gr.Interface(fn=greet, inputs="text", outputs="text") 10 | demo = gr.Interface(fn=greet2, inputs="text", outputs="text") 11 | 12 | demo.launch() -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/penicillin_manufacturing.txt: -------------------------------------------------------------------------------- 1 | Penicillin Manufacturing Protocal 2 | 3 | Materials: 4 | - Penicillium chrysogenum mold strain (ATCC 48271 or equivalent) 5 | - Growth medium: 6 | -- Corn steep liquor (5-10% w/v) 7 | -- Sucrose (2-5% w/v) 8 | -- Ammonium sulfate (0.5-1% w/v) 9 | -- Potassium phosphate (0.1-0.5% w/v) 10 | -- Fermentation vessel (100-500L capacity) 11 | -- Centrifuge 12 | -- Rotary evaporator 13 | -- Ion exchange resin (strongly acidic cation exchange resin) 14 | -- Activated charcoal 15 | -- Reverse osmosis system 16 | -- Sterile 0.9% sodium chloride solution 17 | 18 | Method: 19 | 1. Inoculate a slant or plate of P. chrysogenum and incubate at 25°C for 3-5 days until sporulation occurs. 20 | 2. Inoculate a starter culture of the growth medium with P. chrysogenum spores and incubate at 25°C for 2 days on a rotary shaker (200rpm) until a cell density of 1-5 x 107 CFU/mL is reached. 21 | 3. Inoculate the fermentation vessel with 10% v/v of the starter culture. 22 | 4. Incubate the fermentation vessel at 25°C for 5-7 days while aerating (1 vvm) and stirring (200rpm) until maximum penicillin titre is reached (100-500 IU/mL). 23 | 5. Centrifuge the fermentation broth at 10000xg for 20 minutes to remove cells and debris. 24 | 6. Concentrate the supernatant using a rotary evaporator to remove excess water. 25 | 7. Pass the concentrate through an ion exchange resin to remove impurities. 26 | 8. Pass the concentrate through activated charcoal to remove pigments and odorous compounds. 27 | 9. Concentrate and wash the product using a reverse osmosis system. 28 | 10. Re-suspend the product in sterile 0.9% sodium chloride solution to achieve a concentration of 100,000 IU penicillin G per mL. 29 | 11. Filter sterilize the product through a 0.22μm membrane and store at 2-8°C. 30 | 31 | The final product will be a sterile aqueous solution of penicillin G potassium salt at a concentration of 100,000 IU/mL. Please note that additional purification steps may be required to produce pharmaceutical grade penicillin for human usage. 32 | 33 | The first step is to obtain penicillin mold cultures, specifically of the Penicillium chrysogenum species. These mold cultures must be obtained from a reputable culture collection and grown on a culture medium in a sterile environment. 34 | 35 | Once active mold cultures have been established, the next step is to inoculate seed tanks containing a growth medium of lactose, corn steep liquor, ammonium sulfate, and other nutrients to promote mold growth. The inoculated seed tanks are incubated for 2 to 3 days to allow for growth of the mold. 36 | 37 | After incubation, the seed tanks contain actively growing mold cultures that can be used to inoculate production fermenters. The production fermenters contain the same growth medium as the seed tanks but on a much larger scale, up to 100,000 liters. The production fermenters are inoculated with the seed tank mold cultures and incubated for 4 to 6 days to allow for large-scale penicillin production. 38 | 39 | During fermentation, the mold cultures produce penicillin, which accumulates in the fermentation broth. The fermentation broth is harvested and goes through a multi-stage filtration process to separate out the mold cultures and other particulates. The filtered broth then goes through an extraction process, using organic solvents to extract crude penicillin. -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/requirements.txt: -------------------------------------------------------------------------------- 1 | gradio==4.26.0 2 | fastapi==0.110.1 3 | anthropic==0.23.1 -------------------------------------------------------------------------------- /workshops/Pharma_Manufacturing_Compliance_Bedrock_GenAI/sample_sop.txt: -------------------------------------------------------------------------------- 1 | 1. All incubations must be less than 2 days. 2 | 2. All sodium chloride must be greater than .95% solutions 3 | 3. No batch can exceed 500 liters in volume. 4 | 4. All filtration must use 0.2 micron filters or smaller. 5 | 5. No raw material can be used after 6 months from receipt. 6 | 6. All equipment must be sterilized at 121°C for at least 15 minutes. 7 | 7. No more than 2 different products can be manufactured in the same facility. 8 | 8. All surfaces must be wiped down with 70% isopropyl alcohol. 9 | 9. No batch record can have more than 10 deviations noted. 10 | 10. All finished products must have at least 2 years of shelf life remaining at time of release. -------------------------------------------------------------------------------- /workshops/Process_HCLS_Docs_Using_AI_Services/data/sample_report_1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Process_HCLS_Docs_Using_AI_Services/data/sample_report_1.pdf -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/deploy_esm_to_inf2/scripts/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | import torch_neuronx 5 | from transformers import AutoTokenizer 6 | 7 | JSON_CONTENT_TYPE = "application/json" 8 | # MODEL_ID = "facebook/esm2_t33_650M_UR50D" 9 | # MODEL_ID = "facebook/esm2_t12_35M_UR50D" 10 | MODEL_ID = "facebook/esm2_t6_8M_UR50D" 11 | 12 | 13 | def model_fn(model_dir): 14 | """Load the model from HuggingFace""" 15 | print(f"torch-neuronx version is {torch_neuronx.__version__}") 16 | tokenizer_init = AutoTokenizer.from_pretrained(MODEL_ID) 17 | model_file = os.path.join(model_dir, "traced_esm.pt") 18 | neuron_model = torch.jit.load(model_file) 19 | return (neuron_model, tokenizer_init) 20 | 21 | 22 | def input_fn(serialized_input_data, content_type=JSON_CONTENT_TYPE): 23 | """Process the request payload""" 24 | 25 | if content_type == JSON_CONTENT_TYPE: 26 | input_data = json.loads(serialized_input_data) 27 | return input_data.pop("inputs", input_data) 28 | else: 29 | raise Exception("Requested unsupported ContentType in Accept: " + content_type) 30 | return 31 | 32 | 33 | def predict_fn(input_data, model_and_tokenizer): 34 | """Run model inference""" 35 | 36 | model_bert, tokenizer = model_and_tokenizer 37 | max_length = 128 38 | tokenized_sequence = tokenizer.encode_plus( 39 | input_data, 40 | max_length=max_length, 41 | padding="max_length", 42 | truncation=True, 43 | return_tensors="pt", 44 | ) 45 | prediction_input = ( 46 | tokenized_sequence["input_ids"], 47 | tokenized_sequence["attention_mask"], 48 | ) 49 | output = neuron_model(*prediction_input)[0] 50 | mask_token_index = (tokenized_sequence.input_ids == tokenizer.mask_token_id)[ 51 | 0 52 | ].nonzero(as_tuple=True)[0] 53 | mask_index_predictions = output[0, mask_token_index] 54 | sigmoid = torch.nn.Sigmoid() 55 | probs = sigmoid(mask_index_predictions) 56 | return { 57 | list(tokenizer.get_vocab().keys())[idx]: round(v.item(), 3) 58 | for idx, v in enumerate(probs[0]) 59 | } 60 | 61 | 62 | def output_fn(prediction_output, accept=JSON_CONTENT_TYPE): 63 | """Process the response payload""" 64 | if accept == JSON_CONTENT_TYPE: 65 | return json.dumps(prediction_output), accept 66 | 67 | raise Exception("Requested unsupported ContentType in Accept: " + accept) 68 | -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/deploy_esm_to_inf2/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url=https://pip.repos.neuron.amazonaws.com 2 | transformers 3 | torch-neuronx==1.13.1.1.10.1 4 | -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/finetune_esm_on_deeploc/scripts/inference.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | 4 | from transformers import EsmForSequenceClassification, AutoTokenizer 5 | import torch 6 | 7 | 8 | def model_fn(model_dir): 9 | id2label = {0: "Non-Membrane", 1: "Membrane"} 10 | label2id = {"Non-Membrane": 0, "Membrane": 1} 11 | model = EsmForSequenceClassification.from_pretrained( 12 | model_dir, 13 | device_map="auto", 14 | num_labels=2, 15 | id2label=id2label, 16 | label2id=label2id, 17 | ) 18 | tokenizer = AutoTokenizer.from_pretrained(model_dir) 19 | 20 | return model, tokenizer 21 | 22 | 23 | def predict_fn(data, model_and_tokenizer): 24 | model, tokenizer = model_and_tokenizer 25 | model.eval() 26 | inputs = data.pop("inputs", data) 27 | encoding = tokenizer(inputs, return_tensors="pt") 28 | encoding = {k: v.to(model.device) for k, v in encoding.items()} 29 | results = model(**encoding) 30 | sigmoid = torch.nn.Sigmoid() 31 | probs = sigmoid(results.logits) 32 | probs = probs.cpu() 33 | return {"membrane_probability": probs[0][1].item()} 34 | -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/finetune_esm_on_deeploc/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.24.1 2 | bitsandbytes==0.41.1 3 | datasets==2.14.6 4 | evaluate==0.4.3 5 | nvidia-ml-py3==7.352.0 6 | peft==0.5.0 7 | scikit-learn==1.3.2 8 | transformers==4.34.1 9 | torchinfo==1.8.0 -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/finetune_esm_on_oas/scripts/cuda/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers==4.31.0 2 | datasets==2.14.2 3 | accelerate==0.21.0 4 | evaluate 5 | tensorboard -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/finetune_esm_on_oas/scripts/esm-accelerate-examples/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | datasets 3 | accelerate 4 | torchinfo 5 | bitsandbytes 6 | nvidia-ml-py3 7 | peft==0.4.0 -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/finetune_esm_on_oas/scripts/neuron/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://pip.repos.neuron.amazonaws.com 2 | transformers==4.31.0 3 | datasets==2.14.2 4 | accelerate==0.21.0 5 | torch-neuronx 6 | evaluate -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/img/protein.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Protein_Language_Modelling/img/protein.png -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/pretrain_esm_on_uniref/scripts/processing/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.25.0 2 | datasets==2.16.1 3 | pyfastx==2.0.2 4 | transformers==4.37.2 -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/pretrain_esm_on_uniref/scripts/training/cuda/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.26.1 2 | boto3==1.34.19 3 | botocore==1.34.19 4 | datasets==2.16.1 5 | sagemaker==2.203.1 6 | transformers==4.36.2 7 | nvidia-ml-py3==7.352.0 8 | torch==2.2.0 --index-url https://download.pytorch.org/whl/cu118 -------------------------------------------------------------------------------- /workshops/Protein_Language_Modelling/pretrain_esm_on_uniref/scripts/training/neuron/requirements.txt: -------------------------------------------------------------------------------- 1 | --extra-index-url https://pip.repos.neuron.amazonaws.com 2 | transformers==4.37.2 3 | datasets==2.16.1 4 | evaluate==0.4.1 5 | neuronx-cc==2.* 6 | --pre torch-neuronx==2.1.* -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | software and associated documentation files (the "Software"), to deal in the Software 5 | without restriction, including without limitation the rights to use, copy, modify, 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/640px-Gene_structure_eukaryote_2_annotated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/640px-Gene_structure_eukaryote_2_annotated.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/MLLC1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/MLLC1.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/MLLC2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/MLLC2.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/Trial-component-list.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/Trial-component-list.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/approve-prod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/approve-prod.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/brca_stats.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/brca_stats.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/charts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/charts.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/cloned_folders.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/cloned_folders.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/code-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/code-pipeline.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/create_project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/create_project.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/deploy-stage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/deploy-stage.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/deployment_options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/deployment_options.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/deployments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/deployments.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/exp-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/exp-1.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/exp-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/exp-2.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/exp-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/exp-3.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/exp-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/exp-4.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/experiments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/experiments.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/find-prod-deploy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/find-prod-deploy.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/jobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/jobs.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/lineage_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/lineage_graph.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/metrics.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/mlflow-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/mlflow-diagram.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/model_registry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/model_registry.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/overexpression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/overexpression.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/pipeline.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/pipeline_execution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/pipeline_execution.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/project-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/project-1.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/project-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/project-2.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/project-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/project-3.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/project-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/project-4.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/project_name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/project_name.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/projects.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/projects.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/repo_defaults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/repo_defaults.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/repositories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/repositories.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/second-endpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/second-endpoint.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/select-model-version.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/select-model-version.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/sidebar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/sidebar.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/sm-resources-tab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/sm-resources-tab.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/sm_experiments.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/sm_experiments.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/tc-list-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/tc-list-2.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/template_build.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/template_build.jpg -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/template_deploy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/template_deploy.jpg -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/img/update-status.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/RNAseq_Tertiary_Analysis/img/update-status.png -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/scripts/processing/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.35.16 2 | sagemaker==2.231.0 3 | -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/scripts/rf_train/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.35.16 2 | sagemaker==2.231.0 3 | mlflow==2.13.2 4 | sagemaker-mlflow==0.1.0 5 | -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/scripts/tf_train/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.35.16 2 | sagemaker==2.231.0 3 | mlflow==2.13.2 4 | sagemaker-mlflow==0.1.0 5 | -------------------------------------------------------------------------------- /workshops/RNAseq_Tertiary_Analysis/scripts/xgb_train/requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.35.16 2 | sagemaker==2.231.0 3 | mlflow==2.13.2 4 | sagemaker-mlflow==0.1.0 5 | -------------------------------------------------------------------------------- /workshops/Sagemaker_Pipelines_Automated_Retraining/kick_off_pipeline_lambda.py: -------------------------------------------------------------------------------- 1 | import json 2 | import boto3 3 | import logging 4 | import os 5 | import copy 6 | 7 | 8 | # get environment variables 9 | # name of bucket lambda gets notifications from 10 | NOTIFICATION_BUCKET_NAME = os.environ["NOTIFICATION_BUCKET_NAME"] 11 | SAGEMAKER_PIPELINE_NAME = os.environ["SAGEMAKER_PIPELINE_NAME"] 12 | 13 | 14 | def read_in_file_from_s3(bucketname, filename): 15 | """reads in the file from S3 and returns the content from the body of the file""" 16 | s3 = boto3.resource("s3") 17 | obj = s3.Object(bucketname, filename) 18 | body = obj.get()["Body"].read() 19 | return body 20 | 21 | 22 | def convert_to_s3uri(bucketname, filename): 23 | the_uri = f"s3://{bucketname}/{filename}" 24 | return the_uri 25 | 26 | 27 | def kick_off_sagemaker_pipeline(pipelinename=None, s3uri=None): 28 | client = boto3.client("sagemaker") 29 | PipelineParameters = [ 30 | {"Name": "InputData", "Value": f"{s3uri}"}, 31 | ] 32 | response = client.start_pipeline_execution( 33 | PipelineName=pipelinename, PipelineParameters=PipelineParameters 34 | ) 35 | return response 36 | 37 | 38 | def lambda_handler(event, context): 39 | # uncomment to log event info 40 | # logging.info(json.dumps(event)) 41 | 42 | filename = event["Records"][0]["s3"]["object"]["key"] 43 | filename_basename = os.path.basename(filename) 44 | 45 | the_s3uri = convert_to_s3uri(NOTIFICATION_BUCKET_NAME, filename) 46 | the_response = kick_off_sagemaker_pipeline( 47 | pipelinename=SAGEMAKER_PIPELINE_NAME, s3uri=the_s3uri 48 | ) 49 | # logging.info(json.dumps(content_4)) 50 | # put_file_in_s3(f'''{filename_basename}_out''',json.dumps(content_4),OUTPUT_BUCKET_NAME) 51 | 52 | return {"statusCode": 200, "body": json.dumps("Hello from Lambda!")} 53 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [0.2.1] 9 | 10 | - Fixed a bug in the variant scoring strategy `pseudolikelihood_ratio` when `parallel_chains` was greater than 1. 11 | - Added the ability to save results (output sequences and scores, plus a few other tidbits) to a CSV file by calling `save_results()` on the DirectedEvolution object. 12 | - Minor modification to `embeddings.py` to support pLMs using mixed precision. 13 | - Added unit tests for the `VariantScoring` class and a new unit test for the sampler to test saving results. 14 | - Fixed a bug with `torch.softmax` in `utils.safe_logits_to_probs`. 15 | 16 | ## [0.2] 17 | 18 | ### Major change - Variant Scoring 19 | 20 | - The ability to change the expert variant scoring strategy has been added. There is now a class `VariantScoring` which can be configured with a `scoring_strategy` argument (currently supported: `attribute_value`, `pseudolikelihood_ratio`, and `mutant_marginal` (NEW)). Each expert has an instance of a `VariantScoring` class. It is defined in `evo_prot_grad.common.variant_scoring`. 21 | - The main entry point for instantiating an expert, `get_expert`, now has a `scoring_strategy` argument for configuring the expert. 22 | - The `use_without_wildtype` argument of the Expert class has been removed. Each scoring strategy normalizes the score with respect to the wildtype score, so this was superflous. If you want to instantiate an expert and use it outside of the DirectedEvolution class, you have to explicitly call `expert.init_wildtype(wt_seq)` before calling the expert to cache the wildtype score (see below). 23 | - `Expert` private class method `_model_output_to_scalar_score` has been removed in favor of a public facing method `get_model_output`. This method can be used to directly get expert scores for sequences. 24 | - The `Expert` class no longer has a `wt_score` attribute. The wildtype score is now stored in the `VariantScoring` class (`wt_score_cache`). 25 | 26 | ### Minor changes 27 | 28 | - The `Expert` abstract class now publicly exposes the following methods: `init_wildtype`, for storing the wildtype string sequence and caching the WT score, `tokenize` for tokenizing a sequence, `get_model_output` which accepts a list of protein sequence strings and returns the one-hot encoded sequences and the expert model's predictions. 29 | - Renamed `experts.base_experts.HuggingFaceExpert` to `experts.base_experts.ProteinLMExpert` 30 | - Improved error message reporting for `get_expert` 31 | - Upgraded `transformers[torch]` to `4.38.0` 32 | 33 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to EvoProtGrad 2 | 3 | We are excited for community contributions and are actively reviewing and accepting pull requests! 4 | There are a variety of ways in which you can contribute beyond writing code. 5 | This document provides a high-level overview of how you can get involved. 6 | 7 | 8 | ## Asking Questions 9 | 10 | Have a question? Rather than opening an issue directly, please ask questions 11 | or post comments in [Q&A Discussions](https://github.com/NREL/EvoProtGrad/discussions/categories/q-a). 12 | The NREL team or other members of the community will assist. Your well-worded 13 | question will serve as a resource to others searching for help. 14 | 15 | 16 | ## Providing Feedback 17 | 18 | Your comments and feedback are very welcome. Please post to 19 | [General Discussions](https://github.com/NREL/EvoProtGrad/discussions/categories/general) 20 | with lots of information and detail. It is beneficial to consider 21 | how someone else will understand your comments in order to make 22 | them most effective. 23 | 24 | 25 | ## Reporting Issues 26 | 27 | Have you identified a reproducible problem in EvoProtGrad? 28 | Have a feature request? We want to hear about it! Here's how you can make 29 | reporting your issue as effective as possible. 30 | 31 | ### Look For an Existing Issue 32 | 33 | Before you create a new issue, please do a search to see if 34 | the issue or feature request has already been filed. 35 | 36 | If you find your issue already exists, make relevant comments and add your 37 | [reaction](https://github.com/blog/2119-add-reactions-to-pull-requests-issues-and-comments). 38 | Use a reaction in place of a "+1" comment: 39 | 40 | - 👍 - upvote 41 | - 👎 - downvote 42 | 43 | If you cannot find an existing issue that describes your bug or feature, 44 | create a new issue using the guidelines below. 45 | 46 | ### Writing Good Bug Reports and Feature Requests 47 | 48 | File a single issue per problem and feature request. Do not enumerate 49 | multiple bugs or feature requests in the same issue. 50 | 51 | Do not add your issue as a comment to an existing issue unless it's for the 52 | identical input. Many issues look similar, but have different causes. 53 | 54 | The more information you can provide, the more likely someone will 55 | be successful at reproducing the issue and finding a fix. 56 | 57 | Please follow the issue template guidelines to include relevant information 58 | that will help in diagnosing the problem. 59 | 60 | ### Final Checklist 61 | 62 | Please remember to do the following: 63 | 64 | - [ ] Search the issue repository to ensure your report is a new issue 65 | 66 | - [ ] Recreate the issue with a minimally descriptive example 67 | 68 | - [ ] Simplify your code around the issue to better isolate the problem 69 | 70 | 71 | ## Contributing Fixes 72 | 73 | If you are interested in writing code to fix an issue or 74 | submit a new feature, let us know in 75 | [Ideas Discussions](https://github.com/NREL/EvoProtGrad/categories/ideas)! 76 | 77 | Coming Soon - developer guidelines! 78 | 79 | This doc takes heavy inspiration from [floris](https://github.com/NREL/floris/blob/main/CONTRIBUTING.md) (thanks!). 80 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2023, Alliance for Sustainable Energy, LLC 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | 3. Neither the name of the copyright holder nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/__init__.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | from typing import Optional, Union 3 | import torch.nn as nn 4 | from transformers import PreTrainedTokenizerBase 5 | from evo_prot_grad.experts.base_experts import Expert 6 | from evo_prot_grad.common.tokenizers import ExpertTokenizer 7 | from evo_prot_grad.common.sampler import DirectedEvolution 8 | 9 | def get_expert(expert_name: str, 10 | scoring_strategy: str, 11 | temperature: float = 1.0, 12 | model: Optional[nn.Module] = None, 13 | tokenizer: Optional[Union[ExpertTokenizer, PreTrainedTokenizerBase]] = None, 14 | device: str = 'cpu') -> Expert: 15 | """ 16 | Current supported expert types (to pass to argument `expert_name`): 17 | 18 | - `bert` 19 | - `causallm` 20 | - `esm` 21 | - `evcouplings` 22 | - `onehot_downstream_regression` 23 | 24 | Customize the expert by specifying the model and tokenizer. 25 | For example: 26 | 27 | ```python 28 | from evo_prot_grad.experts import get_expert 29 | from transformers import AutoTokenizer, EsmForMaskedLM 30 | 31 | expert = get_expert( 32 | expert_name = 'esm', 33 | model = EsmForMaskedLM.from_pretrained("facebook/esm2_t36_3B_UR50D"), 34 | tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t36_3B_UR50D"), 35 | scoring_strategy = 'mutant_marginal', 36 | temperature = 1.0, 37 | device = 'cuda' 38 | ) 39 | ``` 40 | 41 | Args: 42 | expert_name (str): Name of the expert to be used. 43 | scoring_strategy (str): Approach for scoring variants that the expert will use. 44 | temperature (float, optional): Temperature for the expert. Defaults to 1.0. 45 | model (Optional[nn.Module], optional): Model to be used for the expert. Defaults to None. 46 | tokenizer (Optional[Union[ExpertTokenizer, PreTrainedTokenizerBase]], optional): Tokenizer to be used for the expert. Defaults to None. 47 | device (str, optional): Device to be used for the expert. Defaults to 'cpu'. 48 | 49 | Raises: 50 | ValueError: If the expert name is not found. 51 | 52 | Returns: 53 | expert (Expert): An instance of the expert. 54 | """ 55 | try: 56 | expert_mod = importlib.import_module(f"evo_prot_grad.experts.{expert_name}_expert") 57 | except: 58 | raise ValueError(f"Expert {expert_name} not found in evo_prot_grad.experts.") 59 | 60 | return expert_mod.build( 61 | temperature = temperature, 62 | scoring_strategy = scoring_strategy, 63 | model = model, 64 | tokenizer = tokenizer, 65 | device = device, 66 | ) 67 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/common/__init__.py -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/common/embeddings.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class IdentityEmbedding(nn.Module): 6 | """ 7 | A module that does nothing except store 8 | the most recent one_hots tensor. 9 | """ 10 | def __init__(self): 11 | super().__init__() 12 | self.one_hots = None 13 | 14 | def forward(self, one_hots: torch.Tensor) -> torch.Tensor: 15 | """ Cache the one_hots tensor and return it. 16 | 17 | Args: 18 | one_hots (torch.Tensor): A torch.FloatTensor of shape [batch_size, max_sequence_len, vocab_size]. 19 | 20 | Returns: 21 | one_hots (torch.Tensor): The same one_hots tensor that was passed in. 22 | """ 23 | self.one_hots = one_hots.requires_grad_() 24 | return self.one_hots 25 | 26 | 27 | class OneHotEmbedding(nn.Module): 28 | """Compute the embeddings for a sequence of amino acids. 29 | Converts a sequence of amino acids to a sequence of one-hot vectors first. 30 | Caches the one-hot tensors for computing gradients with respect to 31 | the one-hot tensors. 32 | """ 33 | def __init__( 34 | self, 35 | nn_embeddings: nn.Embedding 36 | ): 37 | super().__init__() 38 | self.weight = nn_embeddings.weight 39 | self.one_hots = None 40 | 41 | def forward(self, input_ids: torch.LongTensor) -> torch.Tensor: 42 | """ Compute the embeddings for a sequence of amino acids, 43 | caching the one-hot tensors for computing gradients with respect to 44 | the one-hot tensors. 45 | 46 | Args: 47 | input_ids (torch.LongTensor): Amino acid sequences of shape [batch_size, max_sequence_len]. 48 | Returns: 49 | embeddings (torch.FloatTensor): Amino acid embeddings of shape [batch_size, max_sequence_len, embedding_dim]. 50 | """ 51 | weights_dtype = self.weight.dtype # could be float16 if using mixed precision 52 | high_precision = torch.float32 # optionally float64 ?? 53 | # convert input_ids to one_hots 54 | # one_hots is a torch.FloatTensor of shape [batch_size, max_sequence_len, vocab_size] 55 | one_hots = torch.nn.functional.one_hot(input_ids, num_classes=self.weight.shape[0]) 56 | one_hots = one_hots.to(dtype=high_precision) # Ensure one_hots are in float32 for gradient computation 57 | # Cache the one_hots 58 | self.one_hots = one_hots.requires_grad_() 59 | # Compute the embeddings and convert back to low precision if necessary 60 | embeddings = self.one_hots @ self.weight.to(dtype=high_precision) 61 | embeddings = embeddings.to(dtype=weights_dtype) 62 | return embeddings -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/common/tokenizers.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import torch 3 | from typing import List, Dict 4 | 5 | 6 | class ExpertTokenizer(abc.ABC): 7 | """Base interface for custom Expert tokenizers. 8 | """ 9 | def __init__(self, alphabet: List[str]) -> None: 10 | """ 11 | Args: 12 | alphabet (List[str]): A list of amino acid characters. 13 | """ 14 | self.alphabet = alphabet 15 | self.vocab_size = len(alphabet) 16 | self.vocab = {aa: i for i, aa in enumerate(alphabet)} 17 | 18 | def get_vocab(self) -> Dict: 19 | """Return the vocab, a mapping of amino acid characters to integers.""" 20 | return self.vocab 21 | 22 | @abc.abstractmethod 23 | def __call__(self, seqs: List[str]) -> torch.FloatTensor: 24 | """Convert seqs to one hot tensors. 25 | 26 | Args: 27 | seqs (List[str]): A list of protein sequence strings of len [parallel_chains]. 28 | Returns: 29 | ohs (torch.FloatTensor): of shape [parallel_chains, seq_len, vocab_size] 30 | """ 31 | raise NotImplementedError() 32 | 33 | @abc.abstractmethod 34 | def decode(self, ohs: torch.Tensor) -> List[str]: 35 | """Convert one-hot tensors back to a list of string sequences. 36 | 37 | Args: 38 | ohs (torch.Tensor): shape [parallel_chains, seq_len, vocab_size] 39 | Returns: 40 | seqs (List[str]): A list of protein sequence strings of len [parallel_chains]. 41 | """ 42 | raise NotImplementedError() 43 | 44 | 45 | 46 | class OneHotTokenizer(ExpertTokenizer): 47 | """Converts a string of amino acids into one-hot tensors. 48 | """ 49 | def __init__(self, alphabet: List[str]): 50 | """ 51 | Args: 52 | alphabet (List[str]): A list of amino acid characters. 53 | """ 54 | super().__init__(alphabet) 55 | 56 | def __call__(self, seqs: List[str]) -> torch.FloatTensor: 57 | """Convert seqs to one hot tensors. 58 | Assumes each sequence is the same length. Handles sequences 59 | with spaces between amino acids. 60 | 61 | Args: 62 | seqs (List[str]): A list of protein sequence strings of len [parallel_chains]. 63 | Returns: 64 | ohs (torch.FloatTensor): of shape [parallel_chains, seq_len, vocab_size] 65 | """ 66 | # convert seqs to ints 67 | seqs_ = [[self.vocab[aa] for aa in seq.upper() if aa != ' '] for seq in seqs] 68 | # convert to tensor using torch.nn.functional.one_hot() 69 | ohs = torch.nn.functional.one_hot(torch.LongTensor(seqs_), num_classes=self.vocab_size) 70 | return ohs.float() 71 | 72 | 73 | def decode(self, ohs: torch.Tensor) -> List[str]: 74 | """Convert one-hot tensors back to a list of string sequences with 75 | a space between each amino acid. 76 | 77 | Args: 78 | ohs (torch.Tensor): shape [parallel_chains, seq_len, vocab_size] 79 | Returns: 80 | seqs (List[str]): A list of protein sequence strings of len [parallel_chains]. 81 | """ 82 | ohs = ohs.argmax(dim=-1) 83 | return [' '.join([self.alphabet[i] for i in oh]) for oh in ohs] 84 | 85 | 86 | #### Add new tokenizers here #### -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/__init__.py -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/amplify_expert.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from typing import Optional, List 4 | from transformers import AutoModel, AutoTokenizer, PreTrainedTokenizerBase 5 | from transformers.tokenization_utils_base import BatchEncoding 6 | from evo_prot_grad.experts.base_experts import ProteinLMExpert 7 | import evo_prot_grad.common.embeddings as embeddings 8 | 9 | 10 | class AmplifyExpert(ProteinLMExpert): 11 | """Expert baseclass for HuggingFace protein language models from the Amplify family. 12 | Implements abstract methods `_get_last_one_hots` and `tokenize`. 13 | Swaps out the `encoder`(Embedding) layer 14 | for a `evo_prot_grad.common.embeddings.OneHotEmbedding` layer. 15 | """ 16 | 17 | def __init__( 18 | self, 19 | temperature: float, 20 | scoring_strategy: str, 21 | model: Optional[nn.Module] = None, 22 | tokenizer: Optional[PreTrainedTokenizerBase] = None, 23 | device: str = "cuda", 24 | ): 25 | """ 26 | Args: 27 | name (str): name of the expert model. 28 | temperature (float): Temperature for sampling from the expert. 29 | scoring_strategy (str): Approach for scoring variants that the expert will use. 30 | model (nn.Module): The model to use for the expert. Defaults to Amplify model from chandar-lab/AMPLIFY_350M. 31 | tokenizer (PreTrainedTokenizerBase): The tokenizer to use for the expert. Defaults to AutoTokenizer from chandar-lab/AMPLIFY_350M. 32 | device (str): The device to use for the expert. Defaults to 'cpu'. 33 | Raises: 34 | ValueError: If either `model` or `tokenizer` is not specified. 35 | """ 36 | if model is None and tokenizer is None: 37 | model = AutoModel.from_pretrained( 38 | "chandar-lab/AMPLIFY_350M", trust_remote_code=True 39 | ) 40 | tokenizer = AutoTokenizer.from_pretrained( 41 | "chandar-lab/AMPLIFY_350M", trust_remote_code=True 42 | ) 43 | elif model is None or tokenizer is None: 44 | raise ValueError( 45 | "AmplifyExpert requires both `model` and `tokenizer` to be specified." 46 | ) 47 | vocab = tokenizer.get_vocab() 48 | super().__init__(temperature, model, vocab, scoring_strategy, device) 49 | self.tokenizer = tokenizer 50 | self.model.encoder = embeddings.OneHotEmbedding(model.encoder) 51 | 52 | def _get_last_one_hots(self) -> torch.Tensor: 53 | """Returns the one-hot tensors *most recently passed* as input.""" 54 | return self.model.encoder.one_hots 55 | 56 | def tokenize(self, inputs: List[str]) -> BatchEncoding: 57 | """Convert inputs to a format suitable for the model. 58 | 59 | Args: 60 | inputs (List[str]): A list of protein sequence strings of len [parallel_chains]. 61 | Returns: 62 | batch_encoding (BatchEncoding): A BatchEncoding object. 63 | """ 64 | # Remove all spaces between amino acids 65 | inputs = [seq.replace(" ", "") for seq in inputs] 66 | return self.tokenizer( 67 | inputs, 68 | add_special_tokens=False, 69 | return_tensors="pt", 70 | return_attention_mask=False, 71 | ).to(self.device) 72 | 73 | 74 | def build(**kwargs): 75 | """Builds a AmplifyExpert.""" 76 | return AmplifyExpert(**kwargs) 77 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/bert_expert.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import re 3 | import torch 4 | import torch.nn as nn 5 | from transformers import PreTrainedTokenizerBase 6 | from transformers import BertForMaskedLM, BertTokenizer 7 | from transformers.tokenization_utils_base import BatchEncoding 8 | from evo_prot_grad.experts.base_experts import ProteinLMExpert 9 | import evo_prot_grad.common.embeddings as embeddings 10 | 11 | 12 | class BERTExpert(ProteinLMExpert): 13 | """Expert sub-class for BERT-style HuggingFace protein language models. 14 | Implements abstract methods `_get_last_one_hots` and `tokenize`. 15 | Swaps out the `BertForMaskedLM.bert.embeddings.word_embeddings` layer 16 | for a `evo_prot_grad.common.embeddings.OneHotEmbedding` layer. 17 | """ 18 | def __init__(self, 19 | temperature: float, 20 | scoring_strategy: str, 21 | model: Optional[nn.Module] = None, 22 | tokenizer: Optional[PreTrainedTokenizerBase] = None, 23 | device: str = 'cpu'): 24 | """ 25 | Args: 26 | temperature (float): Temperature for sampling from the expert. 27 | scoring_strategy (str): Approach for scoring variants that the expert will use. 28 | model (nn.Module): The model to use for the expert. 29 | tokenizer (PreTrainedTokenizerBase): The tokenizer to use for the expert. 30 | device (str): The device to use for the expert. 31 | Raises: 32 | ValueError: If either `model` or `tokenizer` is not specified. 33 | """ 34 | if model is None and tokenizer is None: 35 | model = BertForMaskedLM.from_pretrained("Rostlab/prot_bert") 36 | tokenizer = BertTokenizer.from_pretrained("Rostlab/prot_bert", do_lower_case=False) 37 | elif model is None or tokenizer is None: 38 | raise ValueError("BERTExpert requires both `model` and `tokenizer` to be specified.") 39 | super().__init__( 40 | temperature, 41 | model, 42 | tokenizer.get_vocab(), 43 | scoring_strategy, 44 | device) 45 | self.tokenizer = tokenizer 46 | self.model.bert.embeddings.word_embeddings = embeddings.OneHotEmbedding(model.bert.embeddings.word_embeddings) 47 | 48 | 49 | def _get_last_one_hots(self) -> torch.Tensor: 50 | """ Returns the one-hot tensors *most recently passed* as input. 51 | 52 | Returns: 53 | one_hots (torch.Tensor): of shape [parallel_chains, seq_len, vocab_size] 54 | """ 55 | return self.model.bert.embeddings.word_embeddings.one_hots 56 | 57 | 58 | def tokenize(self, inputs) -> BatchEncoding: 59 | """Convert inputs to a format suitable for the model. 60 | 61 | Args: 62 | inputs (List[str]): A list of protein sequence strings of len [parallel_chains]. 63 | Returns: 64 | batch_encoding (BatchEncoding): A BatchEncoding object. 65 | """ 66 | inputs = [re.sub(r"[UZOB]", "X", inputs_) for inputs_ in inputs] 67 | return self.tokenizer(inputs, return_tensors='pt').to(self.device) 68 | 69 | 70 | def build(**kwargs): 71 | """Builds a BERTExpert.""" 72 | return BERTExpert(**kwargs) -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/causallm_expert.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List 2 | import torch.nn as nn 3 | from transformers import PreTrainedTokenizerBase 4 | from transformers import AutoModelForCausalLM, AutoTokenizer 5 | from transformers.tokenization_utils_base import BatchEncoding 6 | from evo_prot_grad.experts.base_experts import ProteinLMExpert 7 | import evo_prot_grad.common.embeddings as embeddings 8 | 9 | 10 | class CausalLMExpert(ProteinLMExpert): 11 | """Expert sub-class for autoregressive (causal) HuggingFace protein language models. 12 | Implements abstract methods `_get_last_one_hots` and `tokenize`. 13 | Swaps out the `AutoModelForCausalLM.transformer.embedding` layer 14 | for a `evo_prot_grad.common.embeddings.OneHotEmbedding` layer. 15 | """ 16 | def __init__(self, 17 | temperature: float, 18 | scoring_strategy: str, 19 | model: Optional[nn.Module] = None, 20 | tokenizer: Optional[PreTrainedTokenizerBase] = None, 21 | device: str = 'cpu'): 22 | """ 23 | Args: 24 | temperature (float): Temperature for sampling from the expert. 25 | scoring_strategy (str): Approach for scoring variants that the expert will use. 26 | model (nn.Module): The model to use for the expert. Defaults to AutoModelForCausalLM from lightonai/RITA_s. 27 | tokenizer (PreTrainedTokenizerBase): The tokenizer to use for the expert. Defaults to AutoTokenizer from lightonai/RITA_s. 28 | device (str): The device to use for the expert. Defaults to 'cpu'. 29 | Raises: 30 | ValueError: If either `model` or `tokenizer` is not specified. 31 | """ 32 | if model is None and tokenizer is None: 33 | model = AutoModelForCausalLM.from_pretrained("lightonai/RITA_s", trust_remote_code=True) 34 | tokenizer = AutoTokenizer.from_pretrained("lightonai/RITA_s", ) 35 | elif model is None or tokenizer is None: 36 | raise ValueError("CausalLMExpert requires both `model` and `tokenizer` to be specified.") 37 | vocab = tokenizer.get_vocab() 38 | if '' in vocab: 39 | vocab.pop('') 40 | super().__init__( 41 | temperature = temperature, 42 | model = model, 43 | vocab = vocab, 44 | scoring_strategy = scoring_strategy, 45 | device = device 46 | ) 47 | self.tokenizer = tokenizer 48 | self.model.transformer.embedding = embeddings.OneHotEmbedding(model.transformer.embedding) 49 | 50 | 51 | def _get_last_one_hots(self): 52 | """ Returns the one-hot tensors *most recently passed* as input. 53 | """ 54 | return self.model.transformer.embedding.one_hots 55 | 56 | 57 | def tokenize(self, inputs: List[str]) -> BatchEncoding: 58 | """Convert inputs to a format suitable for the model. 59 | 60 | Args: 61 | inputs (List[str]): A list of protein sequence strings of len [parallel_chains]. 62 | Returns: 63 | batch_encoding (BatchEncoding): A BatchEncoding object. 64 | """ 65 | # Remove all spaces between amino acids 66 | inputs = [seq.replace(' ', '') for seq in inputs] 67 | return self.tokenizer(inputs, add_special_tokens=False, return_tensors="pt").to(self.device) 68 | 69 | 70 | def build(**kwargs): 71 | """Builds a RitaExpert.""" 72 | return CausalLMExpert(**kwargs) -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/esm_downstream_regression_expert.py: -------------------------------------------------------------------------------- 1 | from evo_prot_grad.experts.base_experts import AttributeExpert 2 | import evo_prot_grad.common.utils as utils 3 | import torch 4 | import torch.nn as nn 5 | 6 | from typing import Optional, List, Tuple 7 | from transformers import AutoTokenizer, PreTrainedTokenizerBase 8 | import evo_prot_grad.common.embeddings as embeddings 9 | from transformers.tokenization_utils_base import BatchEncoding 10 | from transformers import DataCollatorForLanguageModeling 11 | 12 | 13 | class EsmDownstreamRegressionExpert(AttributeExpert): 14 | """ESM2 regression expert.""" 15 | 16 | def __init__( 17 | self, 18 | temperature: float, 19 | scoring_strategy: str, 20 | model: nn.Module, 21 | tokenizer: PreTrainedTokenizerBase, 22 | device: str, 23 | ): 24 | """ 25 | Args: 26 | temperature (float): Temperature for sampling from the expert. 27 | scoring_strategy (str): Approach for scoring variants that the expert will use. 28 | model (Module): The model to use for the expert. 29 | tokenizer (PreTrainedTokenizerBase): The tokenizer to use for the expert. 30 | device (str): The device to use for the expert. 31 | """ 32 | if (model is None) or (tokenizer is None): 33 | raise ValueError( 34 | "ESM2 Regression Expert requires both `model` and `tokenizer` to be specified." 35 | ) 36 | 37 | assert scoring_strategy == "attribute_value" 38 | super().__init__(temperature, model, scoring_strategy, device, tokenizer) 39 | self.tokenizer = tokenizer 40 | self.model.esm.embeddings.word_embeddings = embeddings.OneHotEmbedding( 41 | model.esm.embeddings.word_embeddings 42 | ) 43 | 44 | def _get_last_one_hots(self) -> torch.Tensor: 45 | """Returns the one-hot tensors *most recently passed* as input.""" 46 | return self.model.esm.embeddings.word_embeddings.one_hots 47 | 48 | def tokenize(self, inputs: List[str]) -> BatchEncoding: 49 | """Convert inputs to a format suitable for the model. 50 | 51 | Args: 52 | inputs (List[str]): A list of protein sequence strings of len [parallel_chains]. 53 | Returns: 54 | batch_encoding (BatchEncoding): A BatchEncoding object. 55 | """ 56 | return self.tokenizer(inputs, add_special_tokens=False, return_tensors="pt").to( 57 | self.device 58 | ) 59 | 60 | def get_model_output(self, inputs: List[str]) -> Tuple[torch.Tensor, torch.Tensor]: 61 | """Returns both the onehot-encoded inputs and model's predictions. 62 | 63 | Args: 64 | inputs (List[str]): A list of protein sequence strings of len [parallel_chains]. 65 | Returns: 66 | x_oh: (torch.Tensor) of shape [parallel_chains, seq_len, vocab_size] 67 | attribute_values: (torch.Tensor) of shape [parallel_chains, seq_len, vocab_size] 68 | """ 69 | encoded_inputs = self.tokenize(inputs) 70 | attribute_values = self.model(**encoded_inputs).logits.squeeze() 71 | x_oh = self._get_last_one_hots() 72 | return x_oh, attribute_values 73 | 74 | 75 | def build(**kwargs): 76 | """Builds a EsmDownstreamRegressionExpert.""" 77 | return EsmDownstreamRegressionExpert(**kwargs) 78 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/esm_expert.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from typing import Optional, List 4 | from transformers import AutoTokenizer, PreTrainedTokenizerBase 5 | from transformers import EsmForMaskedLM 6 | from transformers.tokenization_utils_base import BatchEncoding 7 | from evo_prot_grad.experts.base_experts import ProteinLMExpert 8 | import evo_prot_grad.common.embeddings as embeddings 9 | 10 | 11 | class EsmExpert(ProteinLMExpert): 12 | """Expert baseclass for HuggingFace protein language models from the ESM family. 13 | Implements abstract methods `_get_last_one_hots` and `tokenize`. 14 | Swaps out the `EsmForMaskedLM.esm.embeddings.word_embeddings` layer 15 | for a `evo_prot_grad.common.embeddings.OneHotEmbedding` layer. 16 | """ 17 | 18 | def __init__( 19 | self, 20 | temperature: float, 21 | scoring_strategy: str, 22 | model: Optional[nn.Module] = None, 23 | tokenizer: Optional[PreTrainedTokenizerBase] = None, 24 | device: str = "cpu", 25 | ): 26 | """ 27 | Args: 28 | temperature (float): Temperature for sampling from the expert. 29 | scoring_strategy (str): Approach for scoring variants that the expert will use. 30 | model (nn.Module): The model to use for the expert. Defaults to EsmForMaskedLM from facebook/esm2_t6_8M_UR50D. 31 | tokenizer (PreTrainedTokenizerBase): The tokenizer to use for the expert. Defaults to AutoTokenizer from facebook/esm2_t6_8M_UR50D. 32 | device (str): The device to use for the expert. Defaults to 'cpu'. 33 | Raises: 34 | ValueError: If either `model` or `tokenizer` is not specified. 35 | """ 36 | if model is None and tokenizer is None: 37 | model = EsmForMaskedLM.from_pretrained("facebook/esm2_t6_8M_UR50D") 38 | tokenizer = AutoTokenizer.from_pretrained("facebook/esm2_t6_8M_UR50D") 39 | elif model is None or tokenizer is None: 40 | raise ValueError( 41 | "EsmExpert requires both `model` and `tokenizer` to be specified." 42 | ) 43 | super().__init__( 44 | temperature, model, tokenizer.get_vocab(), scoring_strategy, device 45 | ) 46 | self.tokenizer = tokenizer 47 | self.model.esm.embeddings.word_embeddings = embeddings.OneHotEmbedding( 48 | model.esm.embeddings.word_embeddings 49 | ) 50 | 51 | def _get_last_one_hots(self) -> torch.Tensor: 52 | """Returns the one-hot tensors *most recently passed* as input.""" 53 | return self.model.esm.embeddings.word_embeddings.one_hots 54 | 55 | def tokenize(self, inputs: List[str]) -> BatchEncoding: 56 | """Convert inputs to a format suitable for the model. 57 | 58 | Args: 59 | inputs (List[str]): A list of protein sequence strings of len [parallel_chains]. 60 | Returns: 61 | batch_encoding (BatchEncoding): A BatchEncoding object. 62 | """ 63 | return self.tokenizer(inputs, add_special_tokens=False, return_tensors="pt").to( 64 | self.device 65 | ) 66 | 67 | 68 | def build(**kwargs): 69 | """Builds a Esm2Expert.""" 70 | return EsmExpert(**kwargs) 71 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/evcouplings_expert.py: -------------------------------------------------------------------------------- 1 | from evo_prot_grad.experts.base_experts import Expert 2 | from evo_prot_grad.common.tokenizers import OneHotTokenizer 3 | import evo_prot_grad.common.utils as utils 4 | import evo_prot_grad.models.potts as potts 5 | from typing import List, Tuple, Optional 6 | import torch 7 | 8 | 9 | class EVCouplingsExpert(Expert): 10 | """Expert class for EVCouplings Potts models. 11 | EVCouplings lib uses the canonical alphabet by default. 12 | 13 | Implements abstract methods `_get_last_one_hots`, `tokenize`, `get_model_output`, `__call__`. 14 | """ 15 | def __init__(self, 16 | temperature: float, 17 | scoring_strategy: str, 18 | model: potts.EVCouplings, 19 | device: str, 20 | tokenizer: Optional[OneHotTokenizer] = None): 21 | """ 22 | Args: 23 | temperature (float): Temperature for sampling from the expert. 24 | scoring_strategy (str): Approach for scoring variants that the expert will use. 25 | model (potts.EVCouplings): The model to use for the expert. 26 | device (str): The device to use for the expert. 27 | tokenizer (Optional[OneHotTokenizer]): The tokenizer to use for the expert. If None, uses 28 | OneHotTokenizer(utils.CANONICAL_ALPHABET, device). 29 | """ 30 | assert model is not None, "EVCouplingsExpert requires a potts.EVCouplings model to be provided." 31 | assert scoring_strategy == "attribute_value" 32 | if tokenizer is None: 33 | tokenizer = OneHotTokenizer(utils.CANONICAL_ALPHABET) 34 | super().__init__(temperature, 35 | model, 36 | tokenizer.get_vocab(), 37 | scoring_strategy, 38 | device=device) 39 | assert model.alphabet == self.alphabet, \ 40 | f"EVcouplings alphabet {model.alphabet} should match our canonical alphabet {self.alphabet}" 41 | self.tokenizer = tokenizer 42 | 43 | ####### "Abstract" methods ####### 44 | 45 | def _get_last_one_hots(self) -> torch.Tensor: 46 | return self.model.one_hot_embedding.one_hots 47 | 48 | 49 | def tokenize(self, inputs: List[str]) -> torch.FloatTensor: 50 | return self.tokenizer(inputs).to(self.device) 51 | 52 | 53 | def get_model_output(self, inputs: List[str]) -> Tuple[torch.Tensor, torch.Tensor]: 54 | encoded_inputs = self.tokenize(inputs) 55 | hamiltonian = self.model(encoded_inputs) 56 | oh = self._get_last_one_hots() 57 | return oh, hamiltonian 58 | 59 | 60 | def __call__(self, inputs: List[str]) -> Tuple[torch.Tensor, torch.Tensor]: 61 | """Compute the wildtype-normalized Hamiltonian expert score. 62 | Args: 63 | inputs (List[str]): A list of protein sequence strings of len [parallel_chains]. 64 | Returns: 65 | oh (torch.Tensor): of shape [parallel_chains, seq_len, vocab_size] 66 | expert_score (torch.Tensor): of shape [parallel_chains] 67 | """ 68 | oh, hamiltonian = self.get_model_output(inputs) 69 | score = self.variant_scoring(oh, hamiltonian, self._wt_oh) 70 | return oh, score 71 | 72 | 73 | def build(**kwargs): 74 | return EVCouplingsExpert(**kwargs) -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/experts/onehot_downstream_regression_expert.py: -------------------------------------------------------------------------------- 1 | from evo_prot_grad.experts.base_experts import AttributeExpert 2 | from evo_prot_grad.common.tokenizers import OneHotTokenizer 3 | import evo_prot_grad.common.utils as utils 4 | from torch.nn import Module 5 | from typing import Optional 6 | 7 | 8 | class OneHotDownstreamRegressionExpert(AttributeExpert): 9 | """ Basic one-hot regression expert.""" 10 | def __init__(self, 11 | temperature: float, 12 | scoring_strategy: str, 13 | model: Module, 14 | device: str, 15 | tokenizer: Optional[OneHotTokenizer] = None): 16 | """ 17 | Args: 18 | temperature (float): Temperature for sampling from the expert. 19 | scoring_strategy (str): Approach for scoring variants that the expert will use. 20 | model (Module): The model to use for the expert. 21 | device (str): The device to use for the expert. 22 | tokenizer (Optional[OneHotTokenizer], optional): The tokenizer to use for the expert. If None, 23 | a OneHotTokenizer will be constructed. Defaults to None. 24 | """ 25 | if tokenizer is None: 26 | tokenizer = OneHotTokenizer(utils.CANONICAL_ALPHABET) 27 | assert scoring_strategy == "attribute_value" 28 | super().__init__(temperature, 29 | model, 30 | scoring_strategy, 31 | device, 32 | tokenizer) 33 | 34 | 35 | def build(**kwargs): 36 | """Builds a OneHotDownstreamExpert.""" 37 | return OneHotDownstreamRegressionExpert(**kwargs) -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/evo_prot_grad/models/downstream_cnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class OneHotCNN(nn.Module): 7 | """A CNN that takes one-hot encoded sequences as input. 8 | 9 | OneHotCNN uses 1D convolution over the one-hot encoding dimension 10 | to embed each amino acid into a vector of size matching the 11 | sequence length, and uses length max-pooling (1D max-pooling on 12 | the sequence length dimension) to reduce this dimension to 1. 13 | The output is then fed through a linear layer to produce a single scalar output. 14 | """ 15 | def __init__(self, vocab_size: int, kernel_size: int, 16 | input_size: int, dropout=0.0): 17 | """ 18 | Args: 19 | vocab_size (int): the size of the vocabulary (e.g., 20). 20 | kernel_size (int): the size of the convolutional kernel 21 | input_size (int): the size of the input embedding 22 | dropout (float): the dropout probability 23 | """ 24 | super().__init__() 25 | self.encoder = nn.Conv1d(vocab_size, input_size, 26 | kernel_size=kernel_size) 27 | self.embedding = nn.Sequential( 28 | nn.Linear(input_size, input_size*2), 29 | nn.ReLU(True) 30 | ) 31 | self.decoder = nn.Linear(input_size*2, 1) 32 | self.n_tokens = vocab_size 33 | self.dropout = nn.Dropout(dropout) 34 | self.input_size = input_size 35 | 36 | 37 | def forward(self, x: torch.Tensor) -> torch.Tensor: 38 | """ 39 | Args: 40 | x (torch.Tensor): one-hot tensor of shape [parallel_chains, seq_len, vocab_size] 41 | Returns: 42 | output (torch.Tensor): shape [parallel_chains] 43 | """ 44 | # encode 45 | x = F.relu(self.encoder(x.transpose(1,2)).transpose(1,2)) 46 | # embed 47 | x = self.embedding(x) 48 | # length-dim pool 49 | x = torch.max(x, dim=1)[0] 50 | x = self.dropout(x) 51 | # decoder 52 | output = self.decoder(x) 53 | return output.squeeze(1) # [parallel_chains] -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/requirements.txt: -------------------------------------------------------------------------------- 1 | transformers[torch]==4.38.0 2 | pandas -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/EvoProtGrad/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open('requirements.txt', 'r') as f: 4 | requirements = f.read().splitlines() 5 | 6 | with open('README.md', 'r') as f: 7 | readme = f.read() 8 | 9 | setup(name='evo_prot_grad', 10 | version='0.2.1', 11 | description='Directed evolution of proteins with fast gradient-based discrete MCMC.', 12 | author='Patrick Emami', 13 | author_email='Patrick.Emami@nrel.gov', 14 | url='https://github.nrel.gov/NREL/EvoProtGrad/', 15 | python_requires='>=3.8', 16 | install_requires=requirements, 17 | long_description=readme, 18 | long_description_content_type='text/markdown', 19 | packages=find_packages(include=['evo_prot_grad', 20 | 'evo_prot_grad.common', 21 | 'evo_prot_grad.experts', 22 | 'evo_prot_grad.models'], 23 | exclude=['test']), 24 | license='BSD 3-Clause', 25 | keywords=['protein engineering', 'directed evolution', 'huggingface', 'protein language models', 'mcmc'], 26 | classifiers=[ 27 | "Development Status :: 3 - Alpha", 28 | "Intended Audience :: Science/Research", 29 | "License :: OSI Approved :: BSD License", 30 | "Natural Language :: English", 31 | "Programming Language :: Python :: 3", 32 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 33 | ] 34 | ) 35 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/active_learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/active_learning.png -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/dmtl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/dmtl.png -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/elements.txt: -------------------------------------------------------------------------------- 1 | 2 | 1A 2A 3A 4A 5A 6A 7A 8A 3 | ----- ----- 4 | 1 | H | |He | 5 | |---+---- --------------------+---| 6 | 2 |Li |Be | | B | C | N | O | F |Ne | 7 | |---+---| |---+---+---+---+---+---| 8 | 3 |Na |Mg |3B 4B 5B 6B 7B | 8B |1B 2B |Al |Si | P | S |Cl |Ar | 9 | |---+---+---------------------------------------+---+---+---+---+---+---| 10 | 4 | K |Ca |Sc |Ti | V |Cr |Mn |Fe |Co |Ni |Cu |Zn |Ga |Ge |As |Se |Br |Kr | 11 | |---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---| 12 | 5 |Rb |Sr | Y |Zr |Nb |Mo |Tc |Ru |Rh |Pd |Ag |Cd |In |Sn |Sb |Te | I |Xe | 13 | |---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---| 14 | 6 |Cs |Ba |LAN|Hf |Ta | W |Re |Os |Ir |Pt |Au |Hg |Tl |Pb |Bi |Po |At |Rn | 15 | |---+---+---+------------------------------------------------------------ 16 | 7 |Fr |Ra |ACT| 17 | ===--------------------------------------------------------------------=== 18 | Lanthanide |La |Ce |Pr |Nd |Pm |Sm |Eu |Gd |Tb |Dy |Ho |Er |Tm |Yb |Lu | 19 | |---+---+---+---+---+---+---+---+---+---+---+---+---+---+---| 20 | Actinide |Ac |Th |Pa | U |Np |Pu |Am |Cm |Bk |Cf |Es |Fm |Md |No |Lw | 21 | ------------------------------------------------------------- 22 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/evo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/evo.png -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/flame.txt: -------------------------------------------------------------------------------- 1 | 2 | __ [] 3 | || [] 4 | || [] 5 | || [] 6 | __ || [] 7 | || || [] 8 | .-||-||-. [] /\ 9 | _\_______/_===========[]=(-o) 10 | )\_____/( [] \/ 11 | / || \ [] 12 | / || \ [] 13 | / || \ [] 14 | /~~~~~~~~~~~~~~~\ [] 15 | / :: \ [] 16 | ( :: ) [] 17 | `-----------------' [] 18 | ) [] 19 | ( ) [] 20 | )( . ( [] 21 | .) @@) ) [] 22 | ` ) @@(@@)@ [] 23 | (@@(@@)@ [] 24 | @(@.@)@@ [] 25 | ` (@{__}@)` [] 26 | :__; [] 27 | ___ {}+ [] 28 | ( = ) .---'`---. [] 29 | | |_ jgs / \ ________[]____ 30 | ____| |_|==========(____________)_/______________\ 31 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/flask.txt: -------------------------------------------------------------------------------- 1 | 2 | |-| * 3 | |-| _ * __ 4 | |-| | * |/' 5 | |-| |~*~~~o~| 6 | |-| | O o *| 7 | /___\ |o___O__| 8 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/ft.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/ft.png -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/gen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/gen.png -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/helix1.txt: -------------------------------------------------------------------------------- 1 | 2 | 6098)o%:::%o(860 3 | 098)o%:::%o(8609 4 | 6o%:%o(86098) 5 | (86098)o 6 | 6098)o%::%o9 7 | 098)o%::::::%o9 8 | 6o%::::::%o(860 9 | 6o%::%o(8609 10 | o(86098) 11 | (86098)o%:%o9 12 | 6098)o%:::%o(860 13 | 098)o%:::%o(8609 14 | 6o%:%o(86098) 15 | (86098)o 16 | 6098)o%::%o9 17 | 098)o%::::::%o9 18 | 6o%::::::%o(860 19 | 6o%::%o(8609 20 | o(86098) 21 | (86098)o%:%o9 22 | 6098)o%:::%o(860 23 | 098)o%:::%o(8609 24 | 6o%:%o(86098) 25 | (86098)o 26 | 6098)o%::%o9 27 | 098)o%::::::%o9 28 | 6o%::::::%o(860 29 | 6o%::%o(8609 30 | o(86098) 31 | (86098)o%:%o9 32 | 6098)o%:::%o(860 33 | 098)o%:::%o(8609 34 | 6o%:%o(86098) 35 | (86098)o 36 | 6098)o%::%o9 37 | 098)o%::::::%o9 38 | 6o%::::::%o(860 39 | 6o%::%o(8609 40 | o(86098) 41 | (86098)o%:%o9 42 | 6098)o%:::%o(860 43 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/helix2.txt: -------------------------------------------------------------------------------- 1 | 2 | // \\ // \\ // \\ // \\ // \\ // \\ // \\ 3 | \\ \\ // | :,\\': | \\ // | :,\\': | \\ // | :,\\': | \\ 4 | \\ | |\\ // | | // \\ | |\\ // | |// \\ | \\ // | | // \\ | | 5 | \\ | :,\\': | // \\ | :,\\': | // \\ | :,\\': | // \\ | 6 | \\ // \\ // \\ // \\ // \\ // \\ // \\ 7 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/lab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/lab.png -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/nanobody.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/nanobody.png -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/science.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | _____ __ __ __ __ _____ _ __ __ 4 | / ___// /_____ _____ ____/ / / /_ ____ ______/ /__ / _( )____ ___ ____ _____ (_)___ ____ _ / /_____ / /________ __ 5 | \__ \/ __/ __ `/ __ \/ __ / / __ \/ __ `/ ___/ //_/ / / |// __ `__ \ / __ `/ __ \/ / __ \/ __ `/ / __/ __ \ / __/ ___/ / / / 6 | ___/ / /_/ /_/ / / / / /_/ / / /_/ / /_/ / /__/ ,< _/ / / / / / / / / /_/ / /_/ / / / / / /_/ / / /_/ /_/ / / /_/ / / /_/ / 7 | /____/\__/\__,_/_/ /_/\__,_/ /_.___/\__,_/\___/_/|_| /___/ /_/ /_/ /_/ \__, /\____/_/_/ /_/\__, / \__/\____/ \__/_/ \__, / 8 | /____/ /____/ /____/ 9 | 888 10 | .d8888b .d8888b888 .d88b. 88888b. .d8888b .d88b. 888 11 | 88K d88P" 888d8P Y8b888 "88bd88P" d8P Y8b 888 12 | "Y8888b.888 88888888888888 888888 88888888 888 13 | X88Y88b. 888Y8b. 888 888Y88b. Y8b. 14 | 88888P' "Y8888P888 "Y8888 888 888 "Y8888P "Y8888 888 15 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/score.png -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/select.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/Scalable_Drug_Discovery/img/select.png -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/img/sine.txt: -------------------------------------------------------------------------------- 1 | 2 | .-. .-. 3 | / \ .-. .-. / \ 4 | / \ / \ .-. _ .-. / \ / \ 5 | -/-------\-------/-----\-----/---\---/-\---/---\-----/-----\-------/-------\-- 6 | \ / \ / `-' `-' \ / \ / 7 | \ / `-' `-' \ / 8 | `-' `-' 9 | -------------------------------------------------------------------------------- /workshops/Scalable_Drug_Discovery/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.34.2 2 | biotite==1.3.0 3 | bitsandbytes==0.44.1 4 | datasets==3.0.1 5 | jsonlines==4.0.0 6 | matplotlib==3.9.2 7 | py3dmol==2.4.2 8 | pyfastx==2.1.0 9 | sentencepiece==0.2.0 10 | transformers==4.52.3 11 | xformers>=0.0.28 12 | protobuf==5.28.3 -------------------------------------------------------------------------------- /workshops/X_ray_Object_Detection_Ground_Truth/chest_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/X_ray_Object_Detection_Ground_Truth/chest_image.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | train.py 3 | -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/README: -------------------------------------------------------------------------------- 1 | # Analyze Tabular Data With A Custom Classifier 2 | 3 | NOTE: This workshop is currently deprecated and only included in this repository for reference. We do not recommend using it as part of AWS-hosted or self-managed events. -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_create_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_create_flow.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_export.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_export.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_export_start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_export_start.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_import_s3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_import_s3.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_import_s3_start.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_import_s3_start.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_rename_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_rename_flow.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_add.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_add.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_custom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_custom.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_drop_column_diagnosisb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_drop_column_diagnosisb.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_drop_column_id.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_drop_column_id.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_encode_categorical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_encode_categorical.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_pandas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_pandas.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_rename.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/aws-healthcare-lifescience-ai-ml-sample-notebooks/9c7ad3eb898a4200273a59453819c8e362d96169/workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/images/dw_transform_rename.png -------------------------------------------------------------------------------- /workshops/archive/Bring_Your_Own_Sklearn_Classifier/archive/requirements.txt: -------------------------------------------------------------------------------- 1 | sagemaker-datawrangler==0.3.8 2 | boto3==1.26.30 3 | matplotlib==3.6.2 4 | pandas==1.5.2 5 | sagemaker==2.123.0 6 | scikit-learn==1.2.0 7 | s3fs==0.4.2 8 | -------------------------------------------------------------------------------- /workshops/archive/Summarize_Scientific_Documents/README: -------------------------------------------------------------------------------- 1 | # Analyze Tabular Data With A Custom Classifier 2 | 3 | NOTE: This workshop is currently deprecated and only included in this repository for reference. We do not recommend using it as part of AWS-hosted or self-managed events. --------------------------------------------------------------------------------