├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── check-links-self.yaml │ └── notebook-lint-self.yaml ├── .gitignore ├── .markdown-link-check.json ├── README.md ├── docs ├── Cloud_Lab_Terms_And_Conditions.md ├── Configure_AWSParallelCluster.md ├── Genomics_Workflows.md ├── Install_AWSParallelCluster.md ├── Intramural_STAKs.md ├── Jupyter_notebook.md ├── agc.md ├── auto-shutdown-instance.md ├── billing_and_cost_management.md ├── cis_hardened_vm.md ├── connect_ec2.md ├── connect_to_EC2.md ├── create_athena_database.md ├── create_code_repo.md ├── create_conda_env.md ├── ecr.md ├── environment.yml ├── extramural_account_registration.md ├── images │ ├── 1.click_support_center.png │ ├── 1_EC2_homepage.png │ ├── 1_NIH_login.png │ ├── 1_click_roles.png │ ├── 1_clone_respository1.png │ ├── 1_clone_respository2.png │ ├── 1_cloud_access.png │ ├── 1_find_code_commit.png │ ├── 1_find_ecr.png │ ├── 1_find_sagemaker.png │ ├── 1_open_marketplace.png │ ├── 1_select_instance_ID.png │ ├── 2.click_Create_case.png │ ├── 2_click_connect.png │ ├── 2_click_stak.png │ ├── 2_create_registry.png │ ├── 2_create_repository.png │ ├── 2_create_respository.png │ ├── 2_input_university.png │ ├── 2_manage_subscriptions.png │ ├── 2_monitoring_tab.png │ ├── 2_new_notebook_instance.png │ ├── 2_sagemaker_role.png │ ├── 3.technical_support.png │ ├── 3_add_repo_info.png │ ├── 3_configure_sagemaker_instance.png │ ├── 3_general_settings.png │ ├── 3_grant_access.png │ ├── 3_inline_policy.png │ ├── 3_paste_creds.png │ ├── 3_session_manager.png │ ├── 3_view_metrics.png │ ├── 4.describe_issue.png │ ├── 4_add_inline_form.png │ ├── 4_connect_ssh.png │ ├── 4_final_formv2.png │ ├── 4_list_docker_images.png │ ├── 4_open_jupyter.png │ ├── 4_stop_instance.png │ ├── 5.enter_contact_info.png │ ├── 5_edit_instance.png │ ├── 5_name_and_create.png │ ├── 5_select_kernel.png │ ├── 5_terminal.png │ ├── 6_aws_example_notebooks.png │ ├── 6_change_instance_type.png │ ├── 6_confirm_policy.png │ ├── 6_signin_logingov.png │ ├── 7_clone_repo.png │ ├── 7_mfa.png │ ├── 8_open_notebook.png │ ├── 9_run_notebook.png │ ├── AMI.png │ ├── EC2_add_tags.png │ ├── EC2_edit_tags.png │ ├── Ec2_filter_service.png │ ├── IAM_SSM_role.png │ ├── ParallelclusterUI.PNG │ ├── PubMed_chatbot_results.png │ ├── Q-IAM-role.png │ ├── Q-R-script.png │ ├── Q-amazon-q-jup.png │ ├── Q-code-completion-1.png │ ├── Q-code-completion.png │ ├── Q-domain-name.png │ ├── Q-explain.png │ ├── Q-fix.png │ ├── Q-iam-policy-review.png │ ├── Q-jupy-lab.png │ ├── Q-optimize-script.png │ ├── Q-optimize.png │ ├── Q-parallel-processing.png │ ├── Q-role-policy.png │ ├── Q-send-cell-with-prompt.png │ ├── Q-snakemake-cloud.png │ ├── Q-snakemake-cluod.png │ ├── Q-snakemake-wf.png │ ├── SSH.png │ ├── SSM.png │ ├── add_alert_threshold.png │ ├── add_env_path_aws.jpeg │ ├── add_script.png │ ├── add_tags_bucket.png │ ├── add_tags_sagemaker.png │ ├── athena │ │ ├── 10_create_database.png │ │ ├── 11_run_crawler.png │ │ ├── 1_select_data_sources.png │ │ ├── 2_click_create_dataset.png │ │ ├── 3_select_glue.png │ │ ├── 4_glue_catalog.png │ │ ├── 5_name_crawler.png │ │ ├── 6_click_add_data_source.png │ │ ├── 7_add_data_source.png │ │ ├── 8_create_role.png │ │ ├── 9_output_scheduling.png │ │ ├── browse_s3.png │ │ ├── choose_s3_bucket.png │ │ ├── result_location.png │ │ └── run_query.png │ ├── aws_batch_1.png │ ├── aws_batch_2.png │ ├── aws_batch_3.png │ ├── aws_batch_4.png │ ├── aws_batch_5.png │ ├── aws_batch_6.png │ ├── aws_batch_7.png │ ├── aws_batch_8.png │ ├── aws_cost_management.png │ ├── bedrock_agents_1.png │ ├── bedrock_agents_2.png │ ├── bedrock_agents_3.png │ ├── bedrock_agents_4.png │ ├── bedrock_agents_5.png │ ├── bedrock_agents_6.png │ ├── bedrock_agents_7.png │ ├── bedrock_chat_playground_1.png │ ├── bedrock_chat_playground_2.png │ ├── bedrock_chat_playground_3.png │ ├── bedrock_chat_playground_4.png │ ├── bedrock_knowledgebase_1.png │ ├── bedrock_knowledgebase_10.png │ ├── bedrock_knowledgebase_2.png │ ├── bedrock_knowledgebase_3.png │ ├── bedrock_knowledgebase_4.png │ ├── bedrock_knowledgebase_5.png │ ├── bedrock_knowledgebase_6.png │ ├── bedrock_knowledgebase_7.png │ ├── bedrock_knowledgebase_8.png │ ├── bedrock_knowledgebase_9.png │ ├── bedrock_model_access.png │ ├── bedrock_page.png │ ├── blast_costs.png │ ├── bucket_properties.png │ ├── budget_alerts.png │ ├── budget_scope.png │ ├── budget_type.png │ ├── change_end_date.png │ ├── click_configuration.png │ ├── configure_budget_aws.png │ ├── connect_ec2.png │ ├── cost_explorer.png │ ├── create-cluster.png │ ├── create_budget.png │ ├── create_notebook_instance.jpeg │ ├── ec2-filtered.png │ ├── edit_instance_aws.png │ ├── edit_tags_bucket.png │ ├── filter_tag.png │ ├── instance_type.png │ ├── kernel_vcftools_aws.jpeg │ ├── launch_instance.png │ ├── launcher_terminal.jpeg │ ├── launcher_vcftools_aws.jpeg │ ├── mac.png │ ├── memverge_mmb1.png │ ├── memverge_mmb10.png │ ├── memverge_mmb11.png │ ├── memverge_mmb12.png │ ├── memverge_mmb2.png │ ├── memverge_mmb3.png │ ├── memverge_mmb4.png │ ├── memverge_mmb5.png │ ├── memverge_mmb6.png │ ├── memverge_mmb7.png │ ├── memverge_mmb8.png │ ├── memverge_mmb9.png │ ├── nav-url-from-output.png │ ├── nav_budget.png │ ├── new_key.png │ ├── pcui.png │ ├── sagemaker_add_tags.png │ ├── sagemaker_edit_tags.png │ ├── sagemaker_edit_tags2.png │ ├── search_billing.png │ ├── security_group.png │ ├── submit_budget.png │ ├── update_notebook_instance.png │ └── windows.jpg ├── parabricks.md ├── request_enterprise_support.md ├── right_sizing_vm.md ├── service_quotas.md └── update_sagemaker_role.md └── notebooks ├── AWSBatch ├── .gitkeep ├── Intro_AWS_Batch.ipynb └── MemVerge_MMBatch.ipynb ├── ElasticBLAST └── run_elastic_blast.ipynb ├── GWAS └── GWAS_coat_color.ipynb ├── GenAI ├── AWS_Amazon_Q_Developer.ipynb ├── AWS_Bedrock_Intro.ipynb ├── AWS_GenAI_Huggingface.ipynb ├── AWS_GenAI_Jumpstart.ipynb ├── Pubmed_RAG_chatbot.ipynb └── example_scripts │ ├── bioinformatics_testing.py │ ├── kendra_chat_llama_2.py │ ├── langchain_chat_llama_2_zeroshot.py │ └── quick-actions-testing.ipynb ├── SRADownload └── SRA-Download.ipynb ├── Snakemake ├── AWS-ParallelCluster.ipynb └── aws-parallel-cluster-files │ ├── bioinformatics-example │ ├── Snakefile │ ├── config.yml │ └── environment.yml │ ├── hello-world-snakemake │ └── Snakefile │ └── hello-world.slurm ├── SpleenLiverSegmentation ├── README.md ├── SpleenSeg_Pretrained-4_27.ipynb └── monai_data │ └── Spleen_best_metric_model_pretrained.pth ├── pangolin └── pangolin_pipeline.ipynb └── rnaseq-myco-tutorial-main ├── LICENSE ├── README.md ├── RNAseq_pipeline.ipynb └── images ├── count-workflow.png ├── rnaseq-workflow.png └── table-cushman.png /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | 2 | --- 3 | name: Bug report 4 | about: Create a report to help us improve 5 | title: '' 6 | labels: bug 7 | assignees: kyleoconnell-NIH, dchesterNIH 8 | 9 | --- 10 | 11 | **Describe the bug** 12 | A clear and concise description of what the bug is. Please include the name and url of the tutorial you have an issue with. 13 | 14 | **To Reproduce** 15 | Steps to reproduce the behavior: 16 | 1. Go to '...' 17 | 2. Click on '....' 18 | 3. Scroll down to '....' 19 | 4. See error 20 | 21 | **Expected behavior** 22 | A clear and concise description of what you expected to happen. 23 | 24 | **Screenshots** 25 | If applicable, add screenshots to help explain your problem. 26 | 27 | **Cloud Environment (please complete the following information):** 28 | - Cloud Provider: [e.g. Google Cloud, AWS, Azure] 29 | - Machine Type: [e.g. n2-standard-8] 30 | - Operating system: [e.g. Debian 10] 31 | 32 | **Additional context** 33 | Add any other context about the problem here. 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: kyleoconnell-NIH, dchesterNIH 7 | 8 | --- 9 | **Is your feature request related to a problem? Please describe.** 10 | A clear and concise description of what the problem is. Ex. I want a tutorial that deals with medical image analysis [...] 11 | 12 | **Describe the solution you'd like** 13 | A clear and concise description of what you want to happen. Should this be a new tutorial or addition to existing tutorial. 14 | 15 | **Describe alternatives you've considered** 16 | A clear and concise description of any alternative solutions or features you've considered. 17 | 18 | **Additional context** 19 | Add any other context or screenshots about the feature request here. 20 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Pull Request Template 2 | 3 | ### Description 4 | 5 | *Provide a description of your changes here. If this PR is related to an issue, list the issue number/name here* 6 | 7 | ### Assignee 8 | 9 | *Assignees: @kyleoconnell-NIH* 10 | 11 | ## PR checklist 12 | *Please ensure the following:* 13 | - [ ] This comment contains a description of changes (with reason). 14 | - [ ] All changes were tested. 15 | - [ ] If you've fixed a bug mention the issue number/name. 16 | - [ ] Apply approriate tags (e.g. documentation, bug) 17 | -------------------------------------------------------------------------------- /.github/workflows/check-links-self.yaml: -------------------------------------------------------------------------------- 1 | name: 'Check Links' 2 | on: 3 | workflow_dispatch: 4 | push: 5 | paths-ignore: 6 | - './docs/agc.md' 7 | - './docs/Genomics_Workflows.md' 8 | schedule: 9 | - cron: '0 0 1 */3 *' 10 | 11 | jobs: 12 | link_check: 13 | name: 'Link Check' 14 | uses: STRIDES/NIHCloudLab/.github/workflows/check-links.yaml@main 15 | with: 16 | repo_link_ignore_list: "" 17 | -------------------------------------------------------------------------------- /.github/workflows/notebook-lint-self.yaml: -------------------------------------------------------------------------------- 1 | name: 'Lint Notebook' 2 | on: 3 | push: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '0 0 1 */3 *' 7 | permissions: 8 | contents: write 9 | id-token: write 10 | 11 | jobs: 12 | lint: 13 | name: 'Linting' 14 | uses: STRIDES/NIHCloudLab/.github/workflows/notebook-lint.yaml@main 15 | with: 16 | directory: . 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | https://github.com/STRIDES/NIHCloudLabAWS/blob/main/docs/Genomics_Workflows.md 2 | https://github.com/STRIDES/NIHCloudLabAWS/blob/main/docs/agc.md 3 | -------------------------------------------------------------------------------- /.markdown-link-check.json: -------------------------------------------------------------------------------- 1 | { 2 | "ignorePatterns": [ 3 | { 4 | "pattern": "^#" 5 | }, 6 | { 7 | "pattern": "^https://iam.nih.gov" 8 | } 9 | ], 10 | "replacementPatterns": [ 11 | { 12 | "pattern": "^/docs", 13 | "replacement": "https://github.com/STRIDES/NIHCloudLabAWS/tree/main/docs" 14 | }, 15 | { 16 | "pattern": "^/notebooks", 17 | "replacement": "https://github.com/STRIDES/NIHCloudLabAWS/tree/main/notebooks" 18 | } 19 | 20 | ], 21 | "timeout": "20s", 22 | "retryOn429": true, 23 | "retryCount": 5, 24 | "fallbackRetryDelay": "30s", 25 | "aliveStatusCodes": [200, 206] 26 | } -------------------------------------------------------------------------------- /docs/Cloud_Lab_Terms_And_Conditions.md: -------------------------------------------------------------------------------- 1 | 2 | ## NIH CLOUD LAB PROGRAM REQUIREMENTS 3 | 4 | Revised: February 2, 2022 5 | 6 | ### PROGRAM OVERVIEW 7 | 8 | Cloud Lab is a service maintained by the National Institutes of Health’s Center for Information Technology (CIT) providing NIH and NIH-supported researchers of various cloud skill levels an environment to evaluate cloud products, run proof of concepts (POCs) or “pilots,” gauge price and technical implications, and create a launchpad to accelerate research. Cloud Lab was created in partnership with major cloud providers and partners. 9 | 10 | In order to access Cloud Lab, you must adhere to the requirements, terms, and conditions set forth in this document. 11 | 12 | ### ELIGIBILITY REQUIREMENTS 13 | 14 | Eligible users for Cloud Lab include, but are not limited to, the following: 15 | 16 | Governmental User: Individuals employed by U.S. government agencies conducting biomedical research consistent with NIH’s mission, to seek fundamental knowledge about the nature and behavior of living systems and the application of that knowledge to enhance health, lengthen life, and reduce illness and disability. 17 | 18 | Non-Governmental User: Individuals serving as a PI or co-PI on an active NIH funding award (e.g., grant, contract, cooperative agreement, OTA), or designated as such on an application for an NIH funding award with the knowledge and permission of the NIH funding recipient or applicant; or individuals serving as PI or Co-PI on an active or proposed project under an NIH CRADA, with the knowledge and permission of the collaborator institution 19 | 20 | Specific requirements for the Eligible User include the following: 21 | 22 | Eligible User will store data of no greater than “moderate” risk level, as defined by NIST Federal Information Processing Standards Publication 199 (“FIPS 199”). 23 | 24 | Eligible User will not store data that is subject to the Health Insurance Portability and Accountability Act (HIPAA), nor will they store personally identifiable information (PII) or Protected Health Information (PHI), except by explicit written consent of NIH. 25 | 26 | Eligible User shall not store data in data centers outside of the Continental United States except by explicit written consent of NIH. 27 | 28 | Eligible Users shall adhere to all U.S. federal, state, and local laws and applicable to his/her use of the Cloud Services, including, but not limited to, intellectual property and export control laws. 29 | 30 | ### PROGRAM PARTICIPATION REQUIREMENTS 31 | 32 | The following requirements apply to all Eligible Users (hereafter called “applicants”) applying for receipt of Cloud Lab Creditsi , whether or not they are awarded such Credits: 33 | 34 | Applicant acknowledges that Cloud Lab Credits are not a part of NIH’s grant-making or other formal award processes. Furthermore, receipt of Cloud Lab Credits to a Principal Investigator (PI) or Co-PI is not the award of or equivalent to the award of a grant, but results only in the recipient becoming a “Active Participant” in Cloud Lab. 35 | 36 | Applicant acknowledges that the processes for conferring Cloud lab Credits is separate and distinct from the NIH grant award process, and NIH reserves the right to modify or adopt this in the future. 37 | 38 | Applicant acknowledges that information about applicant’s utilization of Cloud Lab Credits will be available to NIH, and that this information will be aggregated with other data to assess the feasibility of the Cloud Lab approach. 39 | 40 | Applicant acknowledges that NIH makes the final decisions regarding recipients and amounts of Credits. Applicant accepts these decisions are not reviewable and cannot be challenged or appealed. 41 | 42 | Applicant hereby consents to public disclosure of the contents of any application for Cloud Lab Credits. Specific information, including Applicant’s name and overarching research objectives, may be made publicly available in interim or final project reports. 43 | 44 | Applicant acknowledges that the Cloud Lab is a Federal Data System maintained by the U.S. Government. Misuse of a Federal Data System may subject users to civil and criminal penalties. 45 | 46 | In addition to the conditions included above, the following additional conditions apply to applicants who have received Cloud Lab Credits (hereafter called “Active Participants”): 47 | 48 | Active Participant agrees that the conferred Credit amount constitutes the full amount of resources that can be provided in the pilot for any individual application submission. Additional Credits would require a separate application. 49 | 50 | Active Participant agrees that the conferred Credit amount cannot be charged as a cost to an NIH funded research project and that any such charges are unallowable. . 51 | 52 | Active Participant agrees that all Cloud Lab Credits that are not expended within 6 months of the date of award will expire. Upon expiration of the credits or when credits are fully expended, whichever occurs first, the associated Cloud Lab account will be disabled and the credits will be returned to the NIH. 53 | 54 | Active Participant acknowledges that the Cloud Lab is a shared environment, which places considerable responsibility on the Active Participant for maintaining data and application security related to their own activities. 55 | 56 | Active Participant accepts responsibility for protecting intellectual property (IP) consistent with their home institution’s rules and regulations and with U.S. law. 57 | 58 | Active Participant acknowledges that the list of example responsibilities related to security provided here is not comprehensive, and that there may be civil and criminal penalties for data breaches and/or misuse of the NIH Cloud Lab For site security purposes and to ensure that this service remains available to all users, software programs are employed to monitor traffic to identify unauthorized attempts to upload or change information, or otherwise cause damage. 59 | 60 | Active Participants may be suspended from Cloud Lab, including loss of Credits balance, for violating these terms and conditions, for violating local, state, or federal law, or for any other reason. Written notice will be provided, and NIH may alert the Active Participant’s Organization of any violation of Cloud Lab terms of service. The occurrence of this will result in the suspension of all Credits to the Active Participant in question. This decision cannot be contested. 61 | 62 | Active Participants may opt-out of Cloud Lab for convenience at any time with 15 business days’ notice through submission of an intent letter to NIH CIT. Opting out consists of moving out any data and shutting down any cloud resources associated with their account, resulting in the account being terminated and credits returned to NIH. Similarly, NIH may terminate Active Participant’s access to Cloud Lab for convenience at any time with 15-business-day written notice. 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /docs/Configure_AWSParallelCluster.md: -------------------------------------------------------------------------------- 1 | ## Configuring AWS Parallel Cluster 2 | Make sure that you have parallel cluster installed first [link](https://docs.aws.amazon.com/parallelcluster/latest/ug/install-v3-parallelcluster.html). 3 | 4 | ### 1. Setup your AWS credentials 5 | 6 | To connect to your AWS console paste in your Short Term Access Keys following [these instructions](/docs/Intramural_STAKs.md). These should last 12 hours, and although normally you could just enter the key and secret key using aws configure, with short term keys you also need the session token, so make sure you include that. 7 | 8 | Next, use aws configure to set your default region: 9 | 10 | `aws configure` 11 | 12 | AWS Access Key ID [None]: ABCDEFGHIJKLEXAMPLE 13 | AWS Secret Access Key [None]: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY 14 | Default AWS Region name [us-east-1]: us-east-1 15 | Default output format [None]: 16 | 17 | ### 2. Configure your Parallel Cluster 18 | 19 | Run the following command: 20 | `pcluster configure` 21 | 22 | Once command is run the following configuration options will display: 23 | 24 | Pick a region: 25 | 26 | Allowed values for the AWS Region ID: 27 | 1. af-south-1 28 | 2. ap-east-1 29 | 3. ap-northeast-1 30 | 4. ap-northeast-2 31 | 5. ap-south-1 32 | 6. ap-southeast-1 33 | 7. ap-southeast-2 34 | 8. ca-central-1 35 | 9. eu-central-1 36 | 10. eu-north-1 37 | 11. eu-south-1 38 | 12. eu-west-1 39 | 13. eu-west-2 40 | 14. eu-west-3 41 | 15. me-south-1 42 | 16. sa-east-1 43 | 17. us-east-1 44 | 18. us-east-2 45 | 19. us-west-1 46 | 20. us-west-2 47 | 48 | Choose the scheduler to use with your cluster: 49 | 50 | Allowed values for Scheduler: 51 | 1. slurm 52 | 2. awsbatch 53 | 54 | Choose an operating system: 55 | 56 | Allowed values for Operating System: 57 | 1. alinux2 58 | 2. centos7 59 | 3. ubuntu1804 60 | 4. ubuntu2004 61 | 62 | The minimum and maximum size of the cluster of compute nodes is entered. This is measured in number of instances: 63 | 64 | Minimum cluster size (instances) [0]: 65 | Maximum cluster size (instances) [10]: 66 | 67 | The head and compute nodes instance types are entered. For instance types, your account instance limits are large enough to meet your requirements, [instance types](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-on-demand-instances.html#ec2-on-demand-instances-limits). You can select something really small for the head node (Master) since it is just handling scheduling, and then compute is up to you based on requirements. 68 | 69 | Master instance type [instance type]: 70 | Compute instance type [instance type]: 71 | 72 | 73 | The key pair is selected from the key pairs registered with Amazon EC2 in the selected AWS Region. If you do not yet have a key pair, go to the EC2 console and [create one](/docs/connect_to_EC2.md): 74 | 75 | Allowed values for EC2 Key Pair Name: 76 | 1. example1-uswest1-key 77 | 2. example2-uswest1-key 78 | 79 | After the previous steps are completed, decide whether to use an existing VPC or let AWS ParallelCluster create a VPC for you. If you don't have a properly configured VPC, AWS ParallelCluster can create a new one. It either uses both the head and compute nodes in the same public subnet, or only the head node in a public subnet with all nodes in a private subnet. It's possible to reach your limit on number of VPCs in a AWS Region. The default limit is five VPCs for each AWS Region. 80 | 81 | **For Cloud Lab accounts we recommend you do not try to create a VPC, but rather use the existing VPCs associated with your account. When it asks for your VPC, just paste in one of the two options it gives you**. 82 | 83 | If you don't create a new VPC, you must select an existing VPC: 84 | 85 | Automate VPC creation? (y/n) [n]: n 86 | Allowed values for VPC ID: 87 | # id name number_of_subnets 88 | --- --------------------- --------------------------------- ------------------- 89 | 1 vpc-ID ParallelClusterVPC-NUMBER 2 90 | 2 vpc-ID ParallelClusterVPC-NUMBER 5 91 | 92 | 93 | After the VPC has been selected, you need to decide whether to use existing subnets or create new ones. Again for Cloud Lab accounts select **No**: 94 | 95 | Automate Subnet creation? (y/n) [n]: n 96 | 97 | Allowed values for head node Subnet ID: 98 | # id name size availability_zone 99 | --- ------------------------ ----------------------------------------------------------------------- ------ ------------------- 100 | 1 subnet-ID Subnet1_name 32 us-east-1b 101 | 2 subnet-ID Subnet2_name 32 us-east-1a 102 | 103 | **Paste in one of the subnet IDs** 104 | 105 | ### 3. Launch Cluster 106 | 107 | Before launching the default configuration allows public IP addresses, which typically is not allowed in the NIH environment , to fix this: 108 | 109 | Modify the pcluster config file (Will be in a hidden file in directory you installed and configured the Parrallel Cluster), the default name of the Pcluster configuration file is "config" 110 | 111 | $ vi ~/.parallelcluster/config 112 | 113 | add a line under the "[vpc_default]" block 114 | 115 | use_public_ips = false 116 | 117 | once line is added make sure to save the file: 118 | 119 | wq! 120 | 121 | 122 | When all settings contain valid values, you can launch the cluster by running the create command: 123 | 124 | $ pcluster create 125 | 126 | After the cluster reaches the "CREATE_COMPLETE" status, you can connect to it by using your normal SSH client settings. 127 | 128 | ## 4. Deactivate venv when finished working 129 | 130 | To deactivate a pip environment, just type `deactivate`. 131 | For conda environment type `conda deactivate`. 132 | 133 | -------------------------------------------------------------------------------- /docs/Genomics_Workflows.md: -------------------------------------------------------------------------------- 1 | ### Creating the Genomics Core Environment 2 | When running genomics workflows using AWS Batch you need three pieces. The first is a private virtual private cloud (VPC). This has been configured for you so you can skip the creation of this step. The second piece you need is the Genomics Workflow Core Environment and the third is a Workflow Orchestration environment which will be either Cromwell or Nextflow. 3 | 4 | Begin with the creation of the core environment using the Stack Formation template launched from [here](https://docs.opendata.aws/genomics-workflows/core-env/introduction.html). Click `Launch Stack` and then leave the first page as defaults and click `Next`. Name your stack name, and then name the bucket where you want genomic core files to go. In theory you can have Stack Formation create a bucket for you, but we recommend creating an empty S3 bucket ahead of time, listing that bucket in the cell that says `S3 Bucket Name`, and then for `Existing Bucket` say `Yes`. For `VPC ID`, click the only option, and for `VPC Subnet IDs` select both private subnets. `Number of Subnets` = 2. You can leave the rest of this page as default, although you may want to increase `Default Min vCPU` to have more CPUs always running in your Batch cluster. Click `Next`, click `Next` again, and then on the review page, review your selections and agree to the terms at the bottom. When you click `Create Stack` at the bottom, it is going to create core infrastructure, including an EC2 Auto Scaling Group that will handle job submissions from Cromwell or Nextflow. 5 | 6 | ### Cromwell on AWS Batch 7 | If you want to submit jobs to a Cromwell server on AWS, you can follow the instructions on the [Genomics Workflows Page](https://aws.amazon.com/blogs/architecture/automated-launch-of-genomics-workflows/), but with a few tweaks that we identified in our testing. On the Cromwell page, you can skip the VPC (virtual private cloud) creation because your account already has a default VPC. 8 | 9 | Click `Launch Stack` for `Cromwell Resource`, then leave the first page as defaults and click `Next`. On the next page, create a unique stack name, like cromwell-resources-DATE. `Namespace` can be the same as `Stack name` or else the project name. For `GWFCoreNamespace`, put in the name of your Genomics Workflow Core environment. For `VPC ID` you can select the only option, then for `Server Subnet ID` pick either of the two options. For `Database Subnet IDs` select the two subnets. The rest can be left as default, or modified as desired, then click `Next`. The following page can be left as default, then click `Next`. On the review page, review all your selections, then click `Launch Stack`. 10 | 11 | The stack will take about 10 minutes to finish, you can monitor progress under `CloudFormation > Stacks`. If you select the Stack name, then `Events`, you can monitor the stack being created. If you have an error, this is the best place to figure out where things went wrong. 12 | 13 | Once the stack is launched, you can submit jobs to the server a few different ways. The first is using the Swagger UI, but to access this you can't use a public DNS address like the documentation suggests because cloud lab does not allow external IP addresses. Thus, you need to use the internal IP, which you can find on the info page of the Cromwell Server EC2 instance. Find the Prive IP Address, and paste this into your Browser. This address may be blocked on Chrome, so if that is the case for you, try a different browser. 14 | 15 | You can also submit jobs from the command line from with the Cromwell EC2 instance. While viewing the instance in EC2, click `Connect`, then click `Session Manager` and then `Connect`. Now you can use something like the example command in the AWS docs. 16 | 17 | ### Nextflow on AWS Batch 18 | Before launching the stack, you need to make an edit to the Dockerfile for Nextflow from the Genomics Core S3 Bucket. There is some kind of version conflict that is being resolved, but for now, this hack will circumvent the issue. Go to your Genomics Core Bucket and drill down to the zipped file containers.zip. For example: `s3://AGC_Bucket/_gwfcore/GWF-STACK-NAME/artifacts/containers.zip`. Unzip the file either locally or in cloud shell, then swap out the Dockerfile with the following: 19 | ``` 20 | ARG VERSION=latest 21 | FROM public.ecr.aws/seqera-labs/nextflow:${VERSION} AS build 22 | 23 | # The upstream nextflow containers are based on alpine 24 | # which are not compatible with the aws cli 25 | FROM public.ecr.aws/amazonlinux/amazonlinux:2 AS final 26 | COPY --from=build /usr/local/bin/nextflow /usr/bin/nextflow 27 | 28 | RUN yum update -y \ 29 | && yum install -y \ 30 | curl \ 31 | which \ 32 | hostname \ 33 | java \ 34 | unzip \ 35 | && yum clean -y all 36 | RUN rm -rf /var/cache/yum 37 | 38 | # install awscli v2 39 | RUN curl -s "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "/tmp/awscliv2.zip" \ 40 | && unzip -q /tmp/awscliv2.zip -d /tmp \ 41 | && /tmp/aws/install -b /usr/bin \ 42 | && rm -rf /tmp/aws* 43 | 44 | ENV JAVA_HOME /usr/lib/jvm/jre-openjdk/ 45 | 46 | # invoke nextflow once to download dependencies 47 | RUN nextflow -version 48 | 49 | # install a custom entrypoint script that handles being run within an AWS Batch Job 50 | COPY nextflow.aws.sh /opt/bin/nextflow.aws.sh 51 | RUN chmod +x /opt/bin/nextflow.aws.sh 52 | 53 | WORKDIR /opt/work 54 | ENTRYPOINT ["/opt/bin/nextflow.aws.sh"] 55 | ``` 56 | Then rezip the containers directory, either using the zip command or right clicking, but be sure if you are righ clicking that you select all the child directories, and not the original parent directory. For example, instead of clicking the `containers` folder and zipping it, select all the child folders like `_common` and `bcftools` etc. and zip those into a zip file called environments.zip. Upload this to the same place in S3, and now you are ready to launch the Nextflow stack. 57 | 58 | Navigate to [this page](https://docs.opendata.aws/genomics-workflows/orchestration/nextflow/nextflow-overview.html) and then scroll down to `Nextflow Resources` and click `Launch Stack`. Leave the first page as defaults and click `Next`. On the second page, create a unique stack name, like nextflow-resources-DATE. `Namespace` can be the same as `Stack name` or else the project name. For `GWFCoreNamespace`, put in the name of your Genomics Workflow Core environment. The rest can be left as default, or modified as desired, then click `Next`. The following page can be left as default, then click `Next`. On the review page, review all your selections, then click `Launch Stack`. The stack will take about 10 minutes to finish, you can monitor progress under `CloudFormation > Stacks`. If you select the Stack name, then `Events`, you can monitor the stack being created. If you have an error, this is the best place to figure out where things went wrong. 59 | 60 | -------------------------------------------------------------------------------- /docs/Install_AWSParallelCluster.md: -------------------------------------------------------------------------------- 1 | # Installing the AWS ParallelCluster User interface (UI) 2 | 3 | ## To install an instance of the AWS ParallelCluster UI choose an AWS Cloud Formation quick-create link for the AWS region you want the cluster in. 4 | 5 | The AWS ParallelCluster UI is a web-based user interface that mirrors the AWS ParallelCluster pcluster CLI, while providing a console-like experience. You install and access the AWS ParallelCluster UI in your AWS account. When you run it, the AWS ParallelCluster UI accesses an instance of the AWS ParallelCluster API hosted on Amazon API Gateway in your AWS account. 6 | 7 | The AWS documentation for installing the PCUI can be found [here](https://docs.aws.amazon.com/parallelcluster/latest/ug/install-pcui-v3.html). 8 | 9 | To install an instance of the AWS ParallelCluster UI (PCUI), you choose an AWS CloudFormation quick-create link for the AWS Region that you create clusters in. The quick-create URL takes you to a Create Stack Wizard where you provide quick-create stack template inputs and deploy the stack. 10 | 11 | The quick link for creating this stack within the us-east-1 region can be found [here](https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/create/review?stackName=parallelcluster-ui&templateURL=https://parallelcluster-ui-release-artifacts-us-east-1.s3.us-east-1.amazonaws.com/parallelcluster-ui.yaml) 12 | 13 | **Use an AWS CloudFormation quick-create link to deploy an PCUI stack with nested Amazon Cognito, API Gateway, and Amazon EC2 Systems Manager stacks.** 14 | 15 | 1. Sign in to the AWS Management Console. 16 | 2. Deploy the PCUI by choosing an AWS Region quick-create link from the table at the start of this section. This takes you to the CloudFormation **Create Stack Wizard** in the console. 17 | 3. Enter a valid email address for **Admin's Email.** 18 | 4. Enter an **ImageBuilder Custom VPC**. This can be found in the VPC Dashboard on AWS Console. 19 | 5. After deployment completes successfully, the PCUI sends you a temporary password to this email address. You use the temporary password to access the PCUI. If you delete the email before you save or use the temporary password, you must delete the stack and reinstall the PCUI. 20 | 6. Keep the rest of the form blank or enter values for (optional) parameters to customize the PCUI build. 21 | 7. Note the stack name for use in later steps. 22 | 8. Navigate to **Capabilities.** Agree to the CloudFormation capabilities. 23 | 9. Choose **Create.** It takes about 15 minutes to complete the AWS ParallelCluster API and PCUI deployment. 24 | 10. View the stack details as the stack is created. 25 | 11. After the deployment completes, open the admin email that was sent to the address you entered. It contains a temporary password that you use to access the PCUI. If you permanently delete the email and you haven’t yet logged in to the PCUI, you must delete the PCUI stack you created and reinstall the PCUI. 26 | 12. In the AWS CloudFormation console list of stacks, choose the link to the stack name that you noted in a previous step. 27 | 13. In **Stack details**, choose **Outputs** and select the link for the key named **StacknameURL** to open the PCUI. Stackname is the name that you noted in a previous step. 28 | 14. Enter the temporary password. Follow the steps to create your own password and log in. 29 | 15. You are now on the home page of the PCUI in the AWS Region that you selected. 30 | 31 | ![parallel cluster UI image](images/ParallelclusterUI.PNG) 32 | 33 | 34 | # Installing the AWS ParallelCluster CLI: 35 | 36 | - Using a virtual environment (recommended) 37 | - Using a conda virtual environment 38 | 39 | We recommend conducting all steps within AWS Cloud Shell, but you could also use a small Ec2 instance or your local machine's terminal. 40 | 41 | ## 1.0 Install within pip virtual environment 42 | 43 | ## 1.1 Install pip virtual environment 44 | 45 | If virtualenv is not installed, install virtualenv using pip3 46 | 47 | -Linux, macOS, or Unix 48 | 49 | $ python3 -m pip install --upgrade pip 50 | $ python3 -m pip install --user --upgrade virtualenv 51 | 52 | 53 | -Windows 54 | 55 | $ pip3 install --user --upgrade virtualenv 56 | 57 | 58 | ### 1.2 Create a pip virtual environment and name it 59 | 60 | -Linux, macOS, or Unix 61 | 62 | $ python3 -m virtualenv ~/name 63 | 64 | 65 | -Windows 66 | 67 | C:\>virtualenv %USERPROFILE%\name 68 | 69 | ### 1.3 Activate your new virtual environment 70 | 71 | -Linux, macOS, or Unix 72 | 73 | $ source ~/name/bin/activate 74 | 75 | -Windows 76 | 77 | C:\>%USERPROFILE%\name\Scripts\activate 78 | 79 | ### 1.4 Install AWS ParallelCluster into your virtual environment. 80 | 81 | -Linux, macOS, or Unix 82 | 83 | (name)~$ python3 -m pip install --upgrade "aws-parallelcluster<3.0" 84 | 85 | -Windows 86 | 87 | (apc-ve) C:\>pip3 install --upgrade "aws-parallelcluster<3.0" 88 | 89 | ## 2.0 Install within conda virtual environment 90 | 91 | ### 2.1 (Optional) Install mamba 92 | ``` 93 | curl -L -O https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh 94 | bash Mambaforge-$(uname)-$(uname -m).sh -b -p $HOME/mambaforge 95 | # Add to your PATH 96 | export PATH="$HOME/mambaforge/bin:$PATH" 97 | ``` 98 | ### 2.2 Create environment and install parallel cluster 99 | ``` 100 | mamba create -n pcluster -c conda-forge aws-parallelcluster -y 101 | ``` 102 | 103 | ### 2.3 Activate environment 104 | ``` 105 | source activate pcluster 106 | ``` 107 | Or, using mamba activate 108 | ``` 109 | mamba init 110 | mamba activate pcluster 111 | ``` 112 | 113 | ## 4. Verify that AWS ParallelCluster is installed correctly 114 | 115 | -Linux, macOS, or Unix 116 | 117 | $ pcluster version 118 | 119 | -Windows 120 | 121 | (apc-ve) C:\>pcluster version 122 | 123 | Output should display version number. 124 | 125 | ## 5. Move on to configuring and connecting to your cluster 126 | Follow the [next instructions](/docs/Configure_AWSParallelCluster.md) on how to configure your cluster via the command line. Otherwise, check out our tutorial for using Parallel Cluster via the console to run **Snakemake workflows** [here](https://github.com/STRIDES/NIHCloudLabAWS/blob/main/notebooks/Snakemake/AWS-ParallelCluster.ipynb). 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /docs/Intramural_STAKs.md: -------------------------------------------------------------------------------- 1 | ### Obtaining your Short Term Access Keys for Intramural Cloud Lab Users 2 | 3 | If you have an NIH identity, you can access your Short Term Access Keys (STAKs) at the same place you access the AWS console (https://iam.nih.gov, VPN or Campus only). 4 | 5 | 1. Click `Cloud Access`. 6 | 7 | 8 | 9 | 2. On the line with your account name, click **Short-term access keys**. 10 | 11 | 12 | 13 | 3. Copy the three lines from Option 1. The only method that will work for authentication in Cloud Lab is to paste all three of these into your terminal, because you will need the two keys and the session token. You will need to redo this step whenever your connection is lost or your keys expire. If you are using a Sagemaker notebook, you won't need to authenticate, but for EC2 or Cloud Shell you will need to authenticate. 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /docs/Jupyter_notebook.md: -------------------------------------------------------------------------------- 1 | 2 | # Guide to spinning up a Sagemaker Notebook instance and importing an example notebook 3 | 4 | ## Spin up the instance 5 | 6 | 1. Go to `Services > (A) Machine Learning > (B) Amazon SageMaker`, or just search for `Sagemaker` in the top search bar. 7 | 8 | 9 | 10 | 2. On the left menu, click **Notebook > Notebook Instance**, then click **Create notebook instance**. 11 | 12 | 13 | 14 | 3. Fill out the required fields. If you need help selecting an instance go [here](https://aws.amazon.com/sagemaker/pricing/) to `On-Demand Pricing`. Click '**Additional configuration**, then increase your disk size to fit your needs. You can also create a Lifecycle configuration to enable auto-shutdown of idle VMs. See our [other guide](/docs/auto-shutdown-instance.md) on how to do this. If you want to benchmark the cost of an instance, add a tag such as key=`Project`, value=`cloudlab`, and then you can filter this later in your [billing dashboard](/docs/billing_and_cost_management.md). Finally, click **Create notebook instance** at the bottom. 15 | 16 | 17 | 18 | ### Import our training notebook 19 | 20 | 4. Your instance will say `Pending` for a few minutes. Once the instance says `InService` (i.e. running) we can click `Open JupyterLab` on the far right of the screen. 21 | 22 | 23 | 24 | 5. There are a lot of options here for opening different types of notebooks, as well as a terminal window. If you want to start a new notebook, you can select a kernel, if you aren't sure what to select, we recommend `conda_python3`. 25 | 26 | 27 | 28 | 6. Before importing this repository and opening the example notebook, begin by looking at the AWS example notebooks by clicking the bottom icon on the far left that looks like a brain. You will see that most of these are generic data science and ML topics, but there are a few biomedically-relevant examples, with several notebooks focused on cancer data. These notebooks are a great way to learn some basic functionality of AWS like ingesting data, training and running ML/AI models, and running R notebooks. You can also explore a variety of more advanced applications. Open a few notebooks and copy them to your workspace to see how that works. 29 | 30 | 31 | 32 | 7. Now you can copy in a custom notebook and some example data. From the base directory, (A) click the git icon on the middle left bar, it kind of looks like the letter 'T' with a tilt. (B) Click `Clone a Repository`, and then (C) paste the address to this repo (from the green box in the top right of the GitHub main directory) into the box. Or you can just open a terminal and type the following: 33 | 34 | ``` 35 | git clone https://github.com/STRIDES/NIHCloudLabAWS.git 36 | ``` 37 | 38 | 39 | 40 | 41 | 8. If you used the user interface you will have the NIHCloudLabAWS directory available in your file browser. If you used the terminal, it will clone into the $HOME directory, so you will need to copy the repo into the Sagemaker directory before it will show up in your file browser. Now (A) navigate to NIHCloudLabAWS > tutorials > notebooks > GWAS > GWAS_coat_color.ipynb. Explore this notebook and see how data moves in and out of the SageMaker environment. You can also manually add files, whether notebooks or data using the up arrow in the top left navigation menu, or by just dragging them in. (B) You can easily switch between different kernels in the top right, whether R or Python or Spark. 42 | 43 | 44 | 45 | 46 | 9. Here's a few tips if you are new to notebooks. The navigation menu in the top left controls the control panel that is the equivalent to your directory structure. The panel above the notebook itself controls the notebook options. Most of these are obvious, but a few you will use often are: 47 | + the plus sign to add a cell 48 | + the scissors to cut a cell 49 | + stop to stop a running process 50 | + run a cell with the play button or use shift + enter/return. You can also use CMD + Enter, but it will only run the current cell and not move to the next cell. 51 | 52 | 10. Above that menu you will see an option called (A) `Kernel` which is useful if you need to reset the kernel, you can click Kernel > Restart Kernel and Clear All Outputs. This will give you a clean restart. You can also use Kernel > Change Kernel if you need to switch between Kernel environments. Also worth noting that when you run a cell, sometimes it doesn't produce any output, but things are running in the background. (B) If the brackets next to a cell have an * then it is still running. (C) You can also look at the bottom where the kernel is listed (e.g. Python 3 | status) and it will show either Idle or Busy depending on if anything is running or not. 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /docs/agc.md: -------------------------------------------------------------------------------- 1 | ### [Deprecated] Using the Amazon Genomics CLI (agc) in Cloud Lab 2 | 3 | **IMPORTANT: Amazon Genomics CLI (agc) has been deprecated, please use Amazon [HealthOmics](https://docs.aws.amazon.com/omics/) instead** 4 | 5 | The following instructions follow the [AWS docs](https://aws.github.io/amazon-genomics-cli/docs/), but with some tips and tricks specific to Cloud Lab. Read more about the specifics of the agc tool in the [docs](https://aws.github.io/amazon-genomics-cli/docs/concepts/). 6 | 7 | You can do all of the following from your local computer using the AWS SDK, but to ensure things go smoothly in the Cloud Lab environment, we recommend you spin up a VM and follow along from within your EC2 or Sagemaker instance. If using Sagemaker, it is easier to use the terminal than a notebook, but both ways should work. Do not use Cloud Shell, because Cloud Shell is ephemeral and will not save your environment between sessions. 8 | 9 | Because agc uses AWS Batch for computation, you can get away with spinning up a small VM, like the t2 micro for this tutorial. Once you VM is ready, ssh into it. If you are using a notebook, open the Jupyter environment. 10 | 11 | Make sure you set up your environment following the [Prerequisites](https://aws.github.io/amazon-genomics-cli/docs/getting-started/prerequisites/) page. Install the AWS CLI if running locally, and if on EC2 or Sagemaker, you just need to install nodejs. Make sure you run `aws configure` and input your Short-term Access keys. Intramural users can access keys following [these instructions](/docs/Intramural_STAKs.md). 12 | 13 | Now run the [install instructions](https://aws.github.io/amazon-genomics-cli/docs/getting-started/installation/) and add agc to the path with `export PATH=$HOME/bin:$PATH`. If the install went well, go ahead and activate. You need to add a few flags to the base `activate` command. Create the bucket before running this command, and make sure that the bucket is empty. 14 | 15 | You can find the VPC ID by navigating in the AWS console to the VPC page (via the console search bar) and copying the ID that shows up in blue link text. Or, with the AWS CLI and jq, `aws ec2 describe-vpcs | jq '.Vpcs[0].VpcId'`. 16 | 17 | You can find the subnet IDs on the VPC page as well; look in the left navigation menu for the "Subnets" section and copy the IDs that show up in blue link text. Or, with the AWS CLI and jq, `aws ec2 describe-subnets | jq '.Subnets[].SubnetId'`. 18 | 19 | You can create a new bucket by navigating to the S3 page and creating a new bucket. Or, with the AWS CLI `aws s3api create-bucket --bucket `. 20 | 21 | ``` 22 | agc account activate --vpc --subnets --subnets --bucket 23 | ``` 24 | 25 | If for some reason the activation fails, you need to go to s3 and delete the bucket agc created and then try again. If you can't get the account to activate after a few attempts, start from scratch with this command `agc account deactivate --force`. You may also need to go to Stack Formation and delete any of the stacks related to AGC. 26 | 27 | If everything works, then it will take about 4 minutes to finish bootstrapping the infrastructure. Now configure you email (`agc configure email you@youremail.com`) and you are all set up! 28 | 29 | Now try running some examples. You can start with the [Hello World example](https://aws.github.io/amazon-genomics-cli/docs/getting-started/helloworld/) or skip ahead to the other [tutorials](https://aws.github.io/amazon-genomics-cli/docs/tutorials/). If a context deployment fails, go to `Cloud Formation` in the AWS console and delete the responsible stack, then try again. 30 | 31 | Once in a project, you can list the available contexts with `agc context list`, and you can list the available workflows with `agc workflow list`. If you want to check on any of the agc commands, just type `agc` or agc + subcommand like `agc workflow` and you will get a help menu. 32 | 33 | **Please note that if you need to run snakemake as opposed to Nextflow etc, we will need to make some modification to your account's network. Please email CloudLab@nih.gov with the subject 'Snakemake Genomics CLI DNS modification Request'.** 34 | 35 | One other note...this workflow will create a bunch of Stacks in Cloud Formation. When you are done testing this functionality, please go in and manually delete the stacks. Our clean up scripts (for giving your account to someone else) are unable to wipe Cloud Formation. Thank you! 36 | 37 | -------------------------------------------------------------------------------- /docs/auto-shutdown-instance.md: -------------------------------------------------------------------------------- 1 | # Guide to implementing auto-shutdown features in virtual machines (EC2 or Sagemaker) 2 | 3 | ## Autoshutdown EC2 instance 4 | 5 | There already exists a [great guide](https://successengineer.medium.com/how-to-automatically-turn-off-your-ec2-instance-in-2021-b73374e51090) on how to configure autoshutdown on EC2. 6 | Just note that when you set the Alarm Thresholds, you may want to set the threshold percent to be higher than 2% to increase the sensitivity and make it easier for your machine to shut down. 7 | 8 | ## Autoshudown Sagemaker instance 9 | 10 | Configuring auto shutdown on Sagemaker instances is also relatively simple. 11 | 12 | ### Configuring a new instance 13 | 14 | 1. On the Sagemaker page, click **Create notebook instance**. 15 | 16 | 17 | 18 | 2. Under *Additional Configuration* click the box under *Lifecycle configuration - optional*, then select **Create a new lifecycle configuration**. 19 | 20 | 21 | 22 | 3. Name your configuration something like `idle-shutdown-sagemaker` and then paste in the following code under *Start notebook*. This code snippet will shutdown your VM after 3600 seconds (1 hr) of inactivity. If you want that time to be shorter, change it to something like 1800 (30 min). 23 | 24 | ``` 25 | #!/bin/bash 26 | 27 | set -e 28 | 29 | # PARAMETERS 30 | IDLE_TIME=3600 31 | 32 | echo "Fetching the autostop script" 33 | wget https://raw.githubusercontent.com/aws-samples/amazon-sagemaker-notebook-instance-lifecycle-config-samples/master/scripts/auto-stop-idle/autostop.py 34 | 35 | echo "Starting the SageMaker autostop script in cron" 36 | 37 | (crontab -l 2>/dev/null; echo "*/5 * * * * /usr/bin/python $PWD/autostop.py --time $IDLE_TIME --ignore-connections") | crontab - 38 | ``` 39 | 40 | 41 | 42 | 4. Click **Create configuration**, then click **Create notebook instance** 43 | 44 | ### Configuring an existing instance 45 | 46 | The instructions for adding auto-shutdown to an existing instance are almost identical. 47 | 48 | 1. Select the instance you want to modify, and click **Edit** in the top right. Your instance does need to be stopped. 49 | 50 | 51 | 52 | 2. Now under *Additional configuration* select *Lifecycle configuration - optional* and follow the instructions above for 2–4. 53 | 54 | 3. Restart your instance and confirm that it will shut down after the specified amount of time. 55 | 56 | -------------------------------------------------------------------------------- /docs/billing_and_cost_management.md: -------------------------------------------------------------------------------- 1 | # Guide to AWS Billing and Cost Management 2 | 3 | Understanding how to manager your costs can be difficult in the cloud. For one thing, you have to keep track of how much you have spent with the obvious services, like EC2, Sagemaker and S3. On the other hand, how can you figure out how much you are being charged for your network (VPC)? 4 | Further, some Cloud Lab users are interested in understanding how to forcast cloud costs for a larger project. For example, if you want to understand the cost of calling somatic variants on 100 samples, but in Cloud Lab you plan to benchmark using five samples. How would you go about doing that? 5 | This guide aims to answer these questions. 6 | 7 | ## 1. Resource Tagging 8 | 9 | One of the first steps to understanding costs is resource tagging. Billing reports will be aggregated across time and services, and it can be hard to figure out how much did that variant calling pipeline cost to run? 10 | Tagging allows you attach metadata to resources that you can later filter for in Billing reports. AWS has a comprehensive Tagging guide [here](https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html) 11 | You can add a tag to pretty much any resource but let's look at a few examples. 12 | 13 | ### Add tags to a storage bucket 14 | 15 | 1. Select the bucket and then click **Properties**. 16 | 17 | 18 | 19 | 2. Scroll down to *Tags* and click **Edit**. 20 | 21 | 22 | 23 | 3. Add a few tags that help identify the filtering you want to do later, feel free to look at the [AWS guide](https://docs.aws.amazon.com/general/latest/gr/aws_tagging.html) for examples. 24 | 25 | 26 | 27 | ### Add tags to an EC2 instance 28 | 29 | This assumes your instance already exists. You can also tag a new instance during creation using the same method. 30 | 31 | 1. Select the instance and click the **Tags** tab, then **Manage Tags**. 32 | 33 | 34 | 35 | 2. Add your tags, then click **Save**. 36 | 37 | 38 | 39 | ### Add tags to a Sagemaker instance 40 | 41 | 1. Select the Sagemaker instance and scroll down to *Tags*. 42 | 43 | 2. Click **Edit**, then add tags as described above. 44 | 45 | 46 | 47 | 48 | 49 | ## 2. Explore Billing Reports 50 | 51 | You can find a lot of billing tools by searching for billing in the bar at the top of your console. 52 | 53 | 54 | 55 | However, the best billing tool for Cloud Lab use is the cost explorer. 56 | 57 | 1. Go to the (A) *Console Home Page*, then to (B) *AWS Cost Management*. 58 | 59 | 60 | 61 | 2. Click on **Cost Explorer** on the left panel 62 | 63 | 64 | 65 | 3. Click on (A) the data range, then (B) change the end date to today's date. By default it will show you billing to the end of last month so you won't see your current month charges. 66 | 67 | 68 | 69 | 4. Filter for different parameters on the right. Here we can filter by *Service* to select only costs related to EC2. 70 | 71 | 72 | 73 | Now we see only costs related to EC2. 74 | 75 | 76 | 77 | 5. Filter for the tags we added in Part 1 to benchmark a specific analysis. In this case, we are going to select **BLAST**. 78 | 79 | 80 | 81 | Now we can see the costs related to the analyses with the BLAST tag. If you don't see the tags you added before, make sure you have waited ~12 hours. AWS aggregates costs about three times per day, so those costs may have just not shown up yet. 82 | 83 | 84 | 85 | 6. Explore the other options available. You can change the plot type, change the filtering, and use several other tools within *Cost Management*. 86 | 87 | ## 3. Create Budget Alerts 88 | 89 | One way to help stay on budget is to create budget alerts. You can do this using the *Budgets* tool within *Cost Management*. 90 | 91 | 92 | 93 | 1. Click **Create a budget**. 94 | 95 | 96 | 97 | 2. Select your budget type. We recommend *Cost budget*. Click `Next`. 98 | 99 | 100 | 101 | 3. On the next page, enter a Budget Name. Under budget amount, select **Annually** for *Period*. Under *Budget renewal type* select **Expiring budget**. For *Budgeting method* select **Fixed** and then type **500** for the Budget Amount. 102 | 103 | 104 | 105 | 4. You can leave the rest as default and then click **Next** 106 | 107 | 108 | 109 | 5. Click **Add an alert threshold** 110 | 111 | 112 | 113 | 6. Configure your budget alerts as desired. Here we set one alert for when the budget reaches 50%, but you could set several alerts to let you know when you have reached 25%, 50%, 75% and then 95% for example. 114 | 115 | 116 | 117 | 7. Click **Next**. On the following page, click **Create Budget** 118 | 119 | 120 | 121 | -------------------------------------------------------------------------------- /docs/cis_hardened_vm.md: -------------------------------------------------------------------------------- 1 | # How to spin up CIS hardened VMs 2 | 3 | 1. Search for *AWS Marketplace* in the search bar. 4 | 5 | -------------------------------------------------------------------------------- /docs/connect_ec2.md: -------------------------------------------------------------------------------- 1 | # Connect to your EC2 instance in Cloud Lab 2 | Connecting to instances is a little different in Cloud Lab because external IP addresses are not allowed, such as your laptop's IP, and when on the VPN, 3 | your private IP is changed. 4 | 5 | There are two ways you can connect to an instance in Cloud Lab: 6 | 7 | + When launching an instance, we can set a broad IP range for the security group that will encompass the VPN's private IP, without allowing traffic from the whole internet. 8 | + We can grant an IAM role that allows SSM (Session Manager) access if that is preferred over SSH. 9 | 10 | ### Launch an instance 11 | 12 | 1. To launch an instance, click the orange `Launch Instances` button in the top right. 13 | 14 | ![launch_instance](/docs/images/launch_instance.png) 15 | 16 | 2. Name your instance under `Name and tags`. 17 | 18 | 3. Under `Application and OS Images (Amazon Machine Image` you can select your base machine image (AMI). Usually, the Amazon base image will work fine, but for some instances you may want to use a different base image. Also, some Marketplace solutions will use different base images. One reason this matters is that to use the session manager (SSM) to connect to your instance, you will need to select an Amazon AMI here. If you need a different AMI and also need to connect via SSM, please email CloudLab@nih.gov and we can help you write a startup script to launch SSM. 19 | 20 | ![AMI](/docs/images/AMI.png) 21 | 22 | 4. Next, under `Instance type`, select your instance type. You can click `Compare instance types` to see the full specs of all available instances. 23 | 24 | ![instance_type](/docs/images/instance_type.png) 25 | 26 | 5. Under `Key pair (login)` select your key pair. If you need to generate a new key pair, click `Create new key pair`, then give your key a name, select `RSA` and `.pem`, then `Create key pair`. Make sure you secure your key and never share with other users or make available on Github. 27 | 28 | ![new_key](/docs/images/new_key.png) 29 | 30 | 6. Now, under `Network settings` we wil set our security group that allows us to SSH into the instance. You can either create a security group and then select `Select existing security group`, or leave the default selected for `Create security group`. Leave the box checked that says `Allow SSH traffic from` and then in the dropdown select `Custom` and then in the search box type `10.0.0.0/8`. This provides a range of IP addresses that should encompass your VPN's private IP. This may not work, if your IP is not in that range, so scroll down to `Additional Troubleshooting`. 31 | 32 | ![security_group](/docs/images/security_group.png) 33 | 34 | 7. Go to `Configure storage` and set the GB that you will need. You can always resize later by editing the instance. 35 | 36 | 8. If you plan to only connect via SSH, and not SSM, go ahead and click `Launch Instance` on the bottom right part of the screen. However, if you would like to have SSM access available, and are using an Amazon Linux AMI, then click the `Advanced details` drop down arrow. Under `IAM instance profile`, select `SSM-Role-For-EC2`. You may need to search for it. If you are not using an Amazon Linux AMI then follow these [instructions](https://aws.amazon.com/premiumsupport/knowledge-center/install-ssm-agent-ec2-linux/). For more info on adding IAM roles go [here](https://docs.aws.amazon.com/systems-manager/latest/userguide/setup-instance-profile.html). 37 | 38 | ![IAM](/docs/images/IAM_SSM_role.png) 39 | 40 | 9. Now click launch. 41 | 42 | ### Connect to you instance 43 | 44 | 1. Once your instance is running, and *Status check* says *2/2 checks passed*. Click on the instance ID to look at the details for that instance. 45 | 46 | 47 | 48 | 2. Click **Connect** 49 | 50 | 51 | 52 | 3. SSH and Session Manager (SSM) should both be available under the `Connect` menu once your instance is running. If you can not access Session Manager go back up to Step #8 and add the IAM Role. When you connect via SSM, you will usually end up in the `/usr/bin`. If this happens, then type `cd` to get back to the ssm-user directory. If you need to also use the ec2-user, then use `sudo su` and then you can switch between ec2 and ssm users profiles. 53 | 54 | 4. Now let's try connecting via SSH. Select *SSH client*, and then copy the example given at the bottom. Note that the example username is typically `ec2-user` but if you are using Ubuntu as the operating system, the username is `ubuntu`, and you may need to modify the example EC2 gives you. 55 | 56 | 57 | 58 | 5. Open a terminal on Mac or Linux. On Windows connect [using PuTTY](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/putty.html). Paste in the command from #4. If everything works, you will now be SSH'd into your EC2 machine. If you are unable to connect, or get a command timeout, there is likely an issue with the CIDR ranges in the security group not including your IP address. If you are connected via wifi, or else connected via ethernet, as well as if you are on the VPN, your IP can be different. To help troubleshoot these issues, see the next section. If you still are stuck, submit a support ticket. 59 | 60 | 61 | 62 | ### Additional Troubleshooting 63 | If your assigned IP is outside the range of 10.0.0.0/8, then you will need to update your security group to include your assigned IP. Unfortunately, you will need to do this every time you sign in to the VPN. To keep things simple, you can go to Network and Security > Security Groups on the left of the EC2 menu, select the Security group ID associated with your instance, click Actions at the top, and click `Edit inbound rules`. Click Add rule, select SSH, custom, and then paste in your IP followed by /32. This should allow you to SSH into the instance. 64 | 65 | If you are on campus, your IP may remain stable, you will need to check. To find your current IP, run ifconfig( macOS/linux user) or ipconfig(Windows user) and identify the assigned IP address. For Windows, look for DNS suffix `nih.gov` which will show up if you are connected to the VPN. 66 | 67 | ![windows](/docs/images/windows.jpg) 68 | 69 | For macOS: Look for utun(number) where number is the highest number of the utun series, which should have an IP assigned. This could be utun1-3. You can also identify which interface is the wired connection by running “networksetup -listallhardwareports” which tells you the port type along the ID. For example, “en0” or “en1” might be the wired connection on your machine. Once you determine which ID this is, run “ifconfig” and find the corresponding ID. That will give you the private IP that is assigned to that port and that would need to be used in the security group to allow SSH access. 70 | 71 | ![mac](/docs/images/mac.png) 72 | 73 | Once you have your IP, update the security group as described above and then try and SSH into the instance again. 74 | 75 | 76 | -------------------------------------------------------------------------------- /docs/connect_to_EC2.md: -------------------------------------------------------------------------------- 1 | ### How to connect to NIH Cloud Lab EC2 instances on AWS 2 | Connecting to instances is a little different in Cloud Lab because external IP addresses are not allowed, such as your laptop's IP, and when on the VPN, 3 | your private IP is changed. 4 | 5 | There are two ways you can connect to an instance in Cloud Lab: 6 | + When launching an instance, we can set a broad IP range for the security group that will encompass the VPN's private IP, without allowing traffic from the whole internet. 7 | + We can grant an IAM role that allows SSM (Session Manager) access if that is preferred over SSH. 8 | 9 | 1. To launch an instance, click the orange `Launch Instances` button in the top right. 10 | 11 | ![launch_instance](/docs/images/launch_instance.png) 12 | 13 | 2. Name your instance under `Name and tags`. 14 | 15 | 3. Under `Application and OS Images (Amazon Machine Image` you can select your base machine image (AMI). Usually, the Amazon base image will work fine, but for some instances you may want to use a different base image. Also, some Marketplace solutions will use different base images. One reason this matters is that to use the session manager (SSM) to connect to your instance, you will need to select an Amazon AMI here. If you need a different AMI and also need to connect via SSM, please email CloudLab@nih.gov and we can help you write a startup script to launch SSM. 16 | 17 | ![AMI](/docs/images/AMI.png) 18 | 19 | 4. Next, under `Instance type`, select your instance type. You can click `Compare instance types` to see the full specs of all available instances. 20 | 21 | ![instance_type](/docs/images/instance_type.png) 22 | 23 | 5. Under `Key pair (login)` select your key pair. If you need to generate a new key pair, click `Create new key pair`, then give your key a name, select `RSA` and `.pem`, then `Create key pair`. Make sure you secure your key and never share with other users or make available on Github. 24 | 25 | ![new_key](/docs/images/new_key.png) 26 | 27 | 6. Now, under `Network settings` we wil set our security group that allows us to SSH into the instance. You can either create a security group and then select `Select existing security group`, or leave the default selected for `Create security group`. Leave the box checked that says `Allow SSH traffic from` and then in the dropdown select `Custom` and then in the search box type `10.0.0.0/8`. This provides a range of IP addresses that should encompass your VPN's private IP. This may not work, if your IP is not in that range, so scroll down to `Additional Troubleshooting`. 28 | 29 | ![security_group](/docs/images/security_group.png) 30 | 31 | 7. Go to `Configure storage` and set the GB that you will need. You can always resize later by editing the instance. 32 | 33 | 8. If you plan to only connect via SSH, and not SSM, go ahead and click `Launch Instance` on the bottom right part of the screen. However, if you would like to have SSM access available, and are using an Amazon AMI, then click the `Advanced details` drop down arrow. Under `IAM instance profile`, select `SSM-Role-For-EC2`. You may need to search for it. 34 | 35 | ![IAM](/docs/images/IAM_SSM_role.png) 36 | 37 | 9. Now you are ready to launch, and SSH and SSM should both be available under the `Connect` menu once your instance is running. 38 | 39 | ![connect](/docs/images/connect_ec2.png) 40 | 41 | Here is the view if you try and connect via SSM. It will open a terminal in your browser window. Once in, switch to the ec2-user by typing `sudo su ec2-user` 42 | 43 | ![SSM](/docs/images/SSM.png) 44 | 45 | To connect via SSH, click this SSH option. 46 | 47 | ![SSH](/docs/images/SSH.png) 48 | 49 | Then copy the example text under number four. For security reasons we don't include a screenshot, but it will be something like: 50 | 51 | `ssh -i ".pem" ec2-user@`. Paste that into a terminal or Cloud Shell window. If not using an Amazon AMI, the user name will be different, for example, an Ubuntu image will have the username `ubuntu`. 52 | 53 | ### Additional Troubleshooting 54 | If your assigned IP is outside the range of 10.0.0.0/8, then you will need to update your security group to include your assigned IP. Unfortunately, you will need to do this every time you sign in to the VPN. To keep things simple, you can go to Network and Security > Security Groups on the left of the EC2 menu, select the Security group ID associated with your instance, click Actions at the top, and click `Edit inbound rules`. Click Add rule, select SSH, custom, and then paste in your IP followed by /32. This should allow you to SSH into the instance. 55 | 56 | If you are on campus, your IP may remain stable, you will need to check. To find your current IP, run ifconfig( macOS/linux user) or ipconfig(Windows user) and identify the assigned IP address. For Windows, look for DNS suffix `nih.gov` which will show up if you are connected to the VPN. 57 | 58 | ![windows](/docs/images/windows.jpg) 59 | 60 | For macOS: Look for utun(number) where number is the highest number of the utun series, which should have an IP assigned. This could be utun1-3. 61 | 62 | ![mac](/docs/images/mac.png) 63 | 64 | Once you have your IP, update the security group as described above and then try and SSH into the instance again. 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /docs/create_athena_database.md: -------------------------------------------------------------------------------- 1 | # Creating and Searching a database using Amazon Athena 2 | 3 | 1) Navigate to the Amazon Athena homepage. Click **Data sources and catalogs**. 4 | 5 | 6 | 7 | 2) Click **Create data source**. Note that you probably won't yet have any data sources listed as we do in the following screenshot. 8 | 9 | 10 | 11 | 3) Select *S3 - AWS Glue Data Catalog*. Scroll down and click **Next**. 12 | 13 | 14 | 15 | 4) Select *AWS Glue Catalog in this account* and *Create a crawler in AWS Glue*. Click **Create in AWS Glue**. 16 | 17 | 18 | 19 | 5) Name your crawler and then click **Next**. Make sure you do not include `-` or any special characters other than `_` in the name, otherwise you can have issues further down. 20 | 21 | 22 | 23 | 6) Click **Add a data source**. 24 | 25 | 26 | 27 | 7) Now we add the data source. 28 | 29 | 30 | 31 | 8) Select **Create an IAM role**, give your role some kind of name like `sraCrawler`. Click on `Update choosen IAM Role`. This will add a role and grant it permissions to access the public S3 bucket. Click **Next**. 32 | 33 | 34 | 35 | 9) For **Target database**, Click **Add database**. 36 | 37 | 38 | 39 | 10) Name your database. Click **Create database**. 40 | 41 | 42 | 43 | 11) Click **Run crawler**. 44 | 45 | 46 | 47 | ## Query the database via Athena user interface 48 | 49 | 1) Navigate to the `Amazon Athena > Query editor`. Before you run you need to set up query result location in Amazon S3. Click `Edit setting`. 50 | 51 | 52 | 53 | 2) Click `Browse S3`. 54 | 55 | 56 | 57 | 3) Choose a S3 bucket. 58 | 59 | 60 | 61 | 4) After saving the setting you can run your query. 62 | 63 | 64 | 65 | 66 | 67 | ## Query a databse via Jupyter Notebook 68 | 69 | You can query a database via a Jupyter Notebook. We provide an example [here](https://github.com/STRIDES/NIHCloudLabAWS/blob/main/notebooks/SRADownload/SRA-Download.ipynb), as well as [these examples](https://github.com/ncbi/ASHG-Workshop-2021). -------------------------------------------------------------------------------- /docs/create_code_repo.md: -------------------------------------------------------------------------------- 1 | # Guide to using AWS CodeCommit 2 | 3 | **IMPORTANT: This AWS service will be transitioned out, only existing customers since July 25, 2024 can continue to use this service** 4 | 5 | Most NIH Cloud Lab users are probably used to using GitHub to manage and access code. The cloud providers also offer their own managed git repositories, which allows you to keep all your code within AWS without any external integrations. 6 | Follow this guide to learn how to set up and use AWS CodeCommit. 7 | 8 | ### Create a CodeCommit Repository 9 | 10 | 1. Navigate to the CodeCommit page. 11 | 12 | 13 | 14 | 2. Click **Create repository** 15 | 16 | 17 | 18 | 3. Enter the necessary information for your repository. Feel free to add tags to track costs. Click **Create**. 19 | 20 | 21 | 22 | ### Authenticate and Set Up Environment 23 | 24 | All the instructions for setting up your environment are outlined in this [AWS documentation](https://docs.aws.amazon.com/codecommit/latest/userguide/setting-up-https-unixes.html). Because we are using Short Term Access Keys, we need to use the AWS CLI credential helper. 25 | 26 | 1. Open a compute environment of choice, either [EC2](https://github.com/STRIDES/NIHCloudLabAWS/blob/main/docs/connect_ec2.md), a [Sagemaker Notebook](https://github.com/STRIDES/NIHCloudLabAWS/blob/main/docs/Jupyter_notebook.md), or [Cloud Shell](https://aws.amazon.com/cloudshell/). 27 | 28 | 2. Authenticate the AWS CLI using your [Short Term Access Keys](https://github.com/STRIDES/NIHCloudLabAWS/blob/main/docs/Intramural_STAKs.md). Make sure you redo this whenever you disconnect as these keys reset periodically. 29 | 30 | 3. If using an EC2 instance, [install Git](https://git-scm.com/download/linux). AWS Linux machines will install with `sudo yum install git -y`. Cloud Shell and Sagemaker Instances already have Git installed. 31 | 32 | 4. Set up the credential helper. Run the following code to update the git config. 33 | 34 | ``` 35 | git config --global credential.helper '!aws codecommit credential-helper $@' 36 | git config --global credential.UseHttpPath true 37 | ``` 38 | 39 | ### Clone the Repository and Push Code 40 | 41 | 1. You should not be authenticated and ready to clone the repository locally. To copy the url path, you can (A) click on the HTTPS link on the CodeCommit page. 42 | 43 | 44 | 45 | Or, (B) you can go into the details for your repository, and click **Clone URL** in the top right. 46 | 47 | 48 | 49 | If you get an error, re-authenticate with your Short Term Access Keys. 50 | 51 | 2. Now you can use regular git commands to add, commit, push etc. If you have to reinitialize your keys, then you may need to run `git init` to reinitialize the repo. 52 | 53 | A simple workflow would look like this. 54 | 55 | ``` 56 | # Clone the repo 57 | git clone https://git-codecommit.us-east-1.amazonaws.com/v1/repos/cloud-lab-test-repo 58 | # CD into the repo 59 | cd cloud-lab-test-repo 60 | # Copy in the files you want to commit to the CodeCommit repository 61 | cp ../example_file.txt . 62 | # Create a new branch, Code Commit does not create a default branch, so you need to create a branch to push to. 63 | git checkout -b cloud-lab-branch 64 | # Stage your files 65 | git add example_file.txt 66 | # Commit 67 | git commit -m 'cloud lab test commit' 68 | # Push to CodeCommit Repo 69 | git push origin cloud-lab 70 | # Test pulling files back down 71 | git pull git pull origin cloud-lab 72 | ``` 73 | You should now see your file(s) in the CodeCommit Repository. 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /docs/create_conda_env.md: -------------------------------------------------------------------------------- 1 | # Creating a conda environment on a Virtual Machine (notebooks or otherwise) 2 | 3 | The instructions for creating conda environments on EC2 virtual machines are exactly the same as those for Sagemaker notebooks. The reason is that notebooks have conda pre-installed and so don't play well with changing the conda environment within the notebook. Thus, if you are using a notebook, follow all steps from within a terminal, which you can access from the launcher. 4 | 5 | 6 | 7 | ## 1. Create a conda environment 8 | 9 | ### Install mamba 10 | ``` 11 | curl -L -O https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh 12 | bash Mambaforge-$(uname)-$(uname -m).sh -b -p $HOME/mambaforge 13 | ``` 14 | 15 | ### Export to Path 16 | `export PATH="$HOME/mambaforge/bin:$PATH"` 17 | 18 | From within the notebook you can add Mamba to path with 19 | ``` 20 | import os 21 | os.environ["PATH"] += os.pathsep + os.environ["HOME"]+"/Mambaforge/bin" 22 | ``` 23 | 24 | ### Create and activate the environment 25 | `mamba create -n vcftools -c bioconda vcftools ipykernel -y` 26 | 27 | You can also create the environment using a yaml file like this: `mamba env create -f environment.yml` 28 | 29 | `source activate vcftools` 30 | 31 | ## 2. Create a custom kernel for your notebook instance 32 | 33 | ### Create the kernel using ipykernel 34 | 35 | `python -m ipykernel install --user --name=vcftools` 36 | 37 | If you get a `no module named ipykernel`, then run `pip3 install ipykernel`. 38 | 39 | ### Open the kernel 40 | 41 | Now you can switch to the kernel either from the launcher 42 | 43 | 44 | 45 | Or, from the top right from within the notebook. 46 | 47 | 48 | 49 | ### Add bin to path 50 | On AWS we need to add one extra step to add the environment bin to our kernel environment. Although we have a separate kernel, the conda environment does not copy over correctly. 51 | 52 | From within the notebook run 53 | 54 | `os.environ["PATH"] += os.pathsep + os.environ["HOME"]+"/mambaforge/envs/vcftools/bin/"` 55 | 56 | Notice in the screenshot that vcftools is not available until we add the environment bin to PATH. If you were to then switch back to a previous conda environment, like base, `source activate base`, vcftools won't be available anymore. 57 | 58 | 59 | -------------------------------------------------------------------------------- /docs/ecr.md: -------------------------------------------------------------------------------- 1 | # How to build a Docker container and push to the Elastic Container Registry 2 | 3 | This doc outlines how to create a Docker container and how to push that container to the Amazon Elastic Container Registry. Find more info [here](https://docs.aws.amazon.com/AmazonECR/latest/userguide/docker-push-ecr-image.html) 4 | 5 | ## Create an Elastic Container Registry 6 | 7 | 1. Navigate to the Elastic Container Registry Page. It's easiest to just search at the top. 8 | 9 | 10 | 11 | 2. Click **CREATE REPOSITORY** 12 | 13 | 14 | 15 | 3. Fill in the details and click **Create repository**. For this example, name your repository `bamtools-example`. 16 | 17 | 18 | 19 | ## Build and Stage your Container 20 | 21 | We are going to use an example [Dockerfile](https://github.com/BioContainers/containers/blob/master/bamtools/2.4.0/Dockerfile) for BAMtools from [BioContainers](https://github.com/BioContainers/containers) to build a quick container just to practice pushing to Artifact Registry. Copy that file locally and save as `Dockerfile`. 22 | 23 | From within a compute environment (Sagemaker, EC2, or Cloud Shell), run the following: 24 | 25 | `docker build -t bamtools:latest . --no-cache` 26 | 27 | Test your container by running it and make sure Bamtools is available. 28 | 29 | `docker run bamtools:latest bamtools -h` 30 | 31 | Authenticate to the registry. 32 | 33 | `aws ecr get-login-password --region region | docker login --username AWS --password-stdin aws_account_id.dkr.ecr.region.amazonaws.com` 34 | 35 | List your local images, and find the Image ID. 36 | 37 | `docker images` 38 | 39 | 40 | 41 | Tag the container to the registry. 42 | 43 | `docker tag aws_account_id.dkr.ecr.region.amazonaws.com/my-repository:tag` 44 | 45 | Now push to the Registry. 46 | 47 | `docker push aws_account_id.dkr.ecr.region.amazonaws.com/my-repository:tag` 48 | 49 | Pull the container and make sure it all works as expected. 50 | 51 | `docker pull aws_account_id.dkr.ecr.region.amazonaws.com/my-repository:tag` 52 | 53 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | name: vcftools 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | - defaults 6 | dependencies: 7 | - sra-tools >2.9.1 8 | - pigz =2.6 9 | - pbzip2 =1.1 10 | - trimmomatic ==0.36 11 | - fastqc ==0.11.9 12 | - multiqc ==1.10.1 13 | - salmon ==1.5.1 14 | - vcftools == 0.1.16 15 | -------------------------------------------------------------------------------- /docs/extramural_account_registration.md: -------------------------------------------------------------------------------- 1 | # Registering for NIH Cloud Lab – Instructions for NIH-Affiliated Researchers 2 | 3 | 1. From an incognito or private browsing window, navigate to this [this link](https://nih-cloudlab.firebaseapp.com) and wait for a login page to load. 4 | 5 | 2. Scroll down and click **Research Organization**. If you select another login option your registration will not work. 6 | 7 | ![initial nih login page](/docs/images/1_NIH_login.png) 8 | 9 | 3. Search for and select your University. If you University is listed, continue to `Registering with your University Account`. If your University is not listed, skip ahead to ![Login with Login.gov](#Option-2–Registering-with-Login.gov). 10 | 11 | Option 1 – Registering with a University Account 12 | 13 | _If your university is listed in the dropdown, you can use your university username and password to complete the registration process. Follow these steps:_ 14 | 15 | 4. Select the name of your university. 16 | 17 | ![University Search](/docs/images/2_input_university.png) 18 | 19 | 5. Log in with your university credentials. 20 | 21 | 6. Grant access to NIH. 22 | 23 | ![Grant Access](/docs/images/3_grant_access.png) 24 | 25 | 7. Fill out the form. Enter your name, university email address, a description of how you plan to use NIH Cloud Lab, and your NIH program officer’s name. Click **Submit** and you are finished. You will receive an email from cloudlab@nih.gov when your credits are ready – typically within a few days. 26 | 27 | ![Final Form](/docs/images/4_final_formv2.png) 28 | 29 | # Option 2 – Registering with Login.gov 30 | A Login.gov account is a free account that members of the public can create to access a variety of government resources. If your university is not listed in the dropdown, you can use a Login.gov account to complete the registration process. Follow these steps: 31 | 32 | 1. From an incognito or private browsing window, navigate to [this link](https://nih-cloudlab.firebaseapp.com) and wait for a login page to load. 33 | 34 | 2. Scroll down and click **Research Organization**. If you select another login option your registration will not work. 35 | 36 | ![initial nih login page](/docs/images/1_NIH_login.png) 37 | 38 | 3. Type in **Login.gov** or **National Science Foundation** (if you have an account with NSF) and follow the prompts to sign in to your account. The images shown here are only for the *login.gov* authentication. If you do not have a Login.gov account, you can create one following [these instructions](https://login.gov/help/get-started/create-your-account/) or the instructions attached to the registration email. **Please register using your university email address** 39 | 40 | ![Sign In Login.gov](/docs/images/6_signin_logingov.png) 41 | 42 | 4. Complete MFA, in this case a one time code. 43 | 44 | ![MFA](/docs/images/7_mfa.png) 45 | 46 | 6. Grant access to NIH. 47 | 48 | ![Grant Access](/docs/images/3_grant_access.png) 49 | 50 | 7. Fill out the form. You won't be able to enter your name, and your university email will be pre-populated. Enter a description of how you plan to use NIH Cloud Lab and your NIH program officer’s name. Click **Submit** and you are finished. You will receive an email from cloudlab@nih.gov when your credits are ready – typically within a few days. 51 | 52 | ![Final Form](/docs/images/4_final_formv2.png) 53 | -------------------------------------------------------------------------------- /docs/images/1.click_support_center.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1.click_support_center.png -------------------------------------------------------------------------------- /docs/images/1_EC2_homepage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_EC2_homepage.png -------------------------------------------------------------------------------- /docs/images/1_NIH_login.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_NIH_login.png -------------------------------------------------------------------------------- /docs/images/1_click_roles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_click_roles.png -------------------------------------------------------------------------------- /docs/images/1_clone_respository1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_clone_respository1.png -------------------------------------------------------------------------------- /docs/images/1_clone_respository2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_clone_respository2.png -------------------------------------------------------------------------------- /docs/images/1_cloud_access.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_cloud_access.png -------------------------------------------------------------------------------- /docs/images/1_find_code_commit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_find_code_commit.png -------------------------------------------------------------------------------- /docs/images/1_find_ecr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_find_ecr.png -------------------------------------------------------------------------------- /docs/images/1_find_sagemaker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_find_sagemaker.png -------------------------------------------------------------------------------- /docs/images/1_open_marketplace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_open_marketplace.png -------------------------------------------------------------------------------- /docs/images/1_select_instance_ID.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/1_select_instance_ID.png -------------------------------------------------------------------------------- /docs/images/2.click_Create_case.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2.click_Create_case.png -------------------------------------------------------------------------------- /docs/images/2_click_connect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2_click_connect.png -------------------------------------------------------------------------------- /docs/images/2_click_stak.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2_click_stak.png -------------------------------------------------------------------------------- /docs/images/2_create_registry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2_create_registry.png -------------------------------------------------------------------------------- /docs/images/2_create_repository.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2_create_repository.png -------------------------------------------------------------------------------- /docs/images/2_create_respository.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2_create_respository.png -------------------------------------------------------------------------------- /docs/images/2_input_university.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2_input_university.png -------------------------------------------------------------------------------- /docs/images/2_manage_subscriptions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2_manage_subscriptions.png -------------------------------------------------------------------------------- /docs/images/2_monitoring_tab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2_monitoring_tab.png -------------------------------------------------------------------------------- /docs/images/2_new_notebook_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2_new_notebook_instance.png -------------------------------------------------------------------------------- /docs/images/2_sagemaker_role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/2_sagemaker_role.png -------------------------------------------------------------------------------- /docs/images/3.technical_support.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/3.technical_support.png -------------------------------------------------------------------------------- /docs/images/3_add_repo_info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/3_add_repo_info.png -------------------------------------------------------------------------------- /docs/images/3_configure_sagemaker_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/3_configure_sagemaker_instance.png -------------------------------------------------------------------------------- /docs/images/3_general_settings.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/3_general_settings.png -------------------------------------------------------------------------------- /docs/images/3_grant_access.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/3_grant_access.png -------------------------------------------------------------------------------- /docs/images/3_inline_policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/3_inline_policy.png -------------------------------------------------------------------------------- /docs/images/3_paste_creds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/3_paste_creds.png -------------------------------------------------------------------------------- /docs/images/3_session_manager.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/3_session_manager.png -------------------------------------------------------------------------------- /docs/images/3_view_metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/3_view_metrics.png -------------------------------------------------------------------------------- /docs/images/4.describe_issue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/4.describe_issue.png -------------------------------------------------------------------------------- /docs/images/4_add_inline_form.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/4_add_inline_form.png -------------------------------------------------------------------------------- /docs/images/4_connect_ssh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/4_connect_ssh.png -------------------------------------------------------------------------------- /docs/images/4_final_formv2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/4_final_formv2.png -------------------------------------------------------------------------------- /docs/images/4_list_docker_images.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/4_list_docker_images.png -------------------------------------------------------------------------------- /docs/images/4_open_jupyter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/4_open_jupyter.png -------------------------------------------------------------------------------- /docs/images/4_stop_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/4_stop_instance.png -------------------------------------------------------------------------------- /docs/images/5.enter_contact_info.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/5.enter_contact_info.png -------------------------------------------------------------------------------- /docs/images/5_edit_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/5_edit_instance.png -------------------------------------------------------------------------------- /docs/images/5_name_and_create.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/5_name_and_create.png -------------------------------------------------------------------------------- /docs/images/5_select_kernel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/5_select_kernel.png -------------------------------------------------------------------------------- /docs/images/5_terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/5_terminal.png -------------------------------------------------------------------------------- /docs/images/6_aws_example_notebooks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/6_aws_example_notebooks.png -------------------------------------------------------------------------------- /docs/images/6_change_instance_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/6_change_instance_type.png -------------------------------------------------------------------------------- /docs/images/6_confirm_policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/6_confirm_policy.png -------------------------------------------------------------------------------- /docs/images/6_signin_logingov.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/6_signin_logingov.png -------------------------------------------------------------------------------- /docs/images/7_clone_repo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/7_clone_repo.png -------------------------------------------------------------------------------- /docs/images/7_mfa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/7_mfa.png -------------------------------------------------------------------------------- /docs/images/8_open_notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/8_open_notebook.png -------------------------------------------------------------------------------- /docs/images/9_run_notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/9_run_notebook.png -------------------------------------------------------------------------------- /docs/images/AMI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/AMI.png -------------------------------------------------------------------------------- /docs/images/EC2_add_tags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/EC2_add_tags.png -------------------------------------------------------------------------------- /docs/images/EC2_edit_tags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/EC2_edit_tags.png -------------------------------------------------------------------------------- /docs/images/Ec2_filter_service.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Ec2_filter_service.png -------------------------------------------------------------------------------- /docs/images/IAM_SSM_role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/IAM_SSM_role.png -------------------------------------------------------------------------------- /docs/images/ParallelclusterUI.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/ParallelclusterUI.PNG -------------------------------------------------------------------------------- /docs/images/PubMed_chatbot_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/PubMed_chatbot_results.png -------------------------------------------------------------------------------- /docs/images/Q-IAM-role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-IAM-role.png -------------------------------------------------------------------------------- /docs/images/Q-R-script.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-R-script.png -------------------------------------------------------------------------------- /docs/images/Q-amazon-q-jup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-amazon-q-jup.png -------------------------------------------------------------------------------- /docs/images/Q-code-completion-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-code-completion-1.png -------------------------------------------------------------------------------- /docs/images/Q-code-completion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-code-completion.png -------------------------------------------------------------------------------- /docs/images/Q-domain-name.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-domain-name.png -------------------------------------------------------------------------------- /docs/images/Q-explain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-explain.png -------------------------------------------------------------------------------- /docs/images/Q-fix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-fix.png -------------------------------------------------------------------------------- /docs/images/Q-iam-policy-review.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-iam-policy-review.png -------------------------------------------------------------------------------- /docs/images/Q-jupy-lab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-jupy-lab.png -------------------------------------------------------------------------------- /docs/images/Q-optimize-script.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-optimize-script.png -------------------------------------------------------------------------------- /docs/images/Q-optimize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-optimize.png -------------------------------------------------------------------------------- /docs/images/Q-parallel-processing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-parallel-processing.png -------------------------------------------------------------------------------- /docs/images/Q-role-policy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-role-policy.png -------------------------------------------------------------------------------- /docs/images/Q-send-cell-with-prompt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-send-cell-with-prompt.png -------------------------------------------------------------------------------- /docs/images/Q-snakemake-cloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-snakemake-cloud.png -------------------------------------------------------------------------------- /docs/images/Q-snakemake-cluod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-snakemake-cluod.png -------------------------------------------------------------------------------- /docs/images/Q-snakemake-wf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/Q-snakemake-wf.png -------------------------------------------------------------------------------- /docs/images/SSH.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/SSH.png -------------------------------------------------------------------------------- /docs/images/SSM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/SSM.png -------------------------------------------------------------------------------- /docs/images/add_alert_threshold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/add_alert_threshold.png -------------------------------------------------------------------------------- /docs/images/add_env_path_aws.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/add_env_path_aws.jpeg -------------------------------------------------------------------------------- /docs/images/add_script.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/add_script.png -------------------------------------------------------------------------------- /docs/images/add_tags_bucket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/add_tags_bucket.png -------------------------------------------------------------------------------- /docs/images/add_tags_sagemaker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/add_tags_sagemaker.png -------------------------------------------------------------------------------- /docs/images/athena/10_create_database.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/10_create_database.png -------------------------------------------------------------------------------- /docs/images/athena/11_run_crawler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/11_run_crawler.png -------------------------------------------------------------------------------- /docs/images/athena/1_select_data_sources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/1_select_data_sources.png -------------------------------------------------------------------------------- /docs/images/athena/2_click_create_dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/2_click_create_dataset.png -------------------------------------------------------------------------------- /docs/images/athena/3_select_glue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/3_select_glue.png -------------------------------------------------------------------------------- /docs/images/athena/4_glue_catalog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/4_glue_catalog.png -------------------------------------------------------------------------------- /docs/images/athena/5_name_crawler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/5_name_crawler.png -------------------------------------------------------------------------------- /docs/images/athena/6_click_add_data_source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/6_click_add_data_source.png -------------------------------------------------------------------------------- /docs/images/athena/7_add_data_source.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/7_add_data_source.png -------------------------------------------------------------------------------- /docs/images/athena/8_create_role.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/8_create_role.png -------------------------------------------------------------------------------- /docs/images/athena/9_output_scheduling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/9_output_scheduling.png -------------------------------------------------------------------------------- /docs/images/athena/browse_s3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/browse_s3.png -------------------------------------------------------------------------------- /docs/images/athena/choose_s3_bucket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/choose_s3_bucket.png -------------------------------------------------------------------------------- /docs/images/athena/result_location.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/result_location.png -------------------------------------------------------------------------------- /docs/images/athena/run_query.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/athena/run_query.png -------------------------------------------------------------------------------- /docs/images/aws_batch_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/aws_batch_1.png -------------------------------------------------------------------------------- /docs/images/aws_batch_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/aws_batch_2.png -------------------------------------------------------------------------------- /docs/images/aws_batch_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/aws_batch_3.png -------------------------------------------------------------------------------- /docs/images/aws_batch_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/aws_batch_4.png -------------------------------------------------------------------------------- /docs/images/aws_batch_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/aws_batch_5.png -------------------------------------------------------------------------------- /docs/images/aws_batch_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/aws_batch_6.png -------------------------------------------------------------------------------- /docs/images/aws_batch_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/aws_batch_7.png -------------------------------------------------------------------------------- /docs/images/aws_batch_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/aws_batch_8.png -------------------------------------------------------------------------------- /docs/images/aws_cost_management.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/aws_cost_management.png -------------------------------------------------------------------------------- /docs/images/bedrock_agents_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_agents_1.png -------------------------------------------------------------------------------- /docs/images/bedrock_agents_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_agents_2.png -------------------------------------------------------------------------------- /docs/images/bedrock_agents_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_agents_3.png -------------------------------------------------------------------------------- /docs/images/bedrock_agents_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_agents_4.png -------------------------------------------------------------------------------- /docs/images/bedrock_agents_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_agents_5.png -------------------------------------------------------------------------------- /docs/images/bedrock_agents_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_agents_6.png -------------------------------------------------------------------------------- /docs/images/bedrock_agents_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_agents_7.png -------------------------------------------------------------------------------- /docs/images/bedrock_chat_playground_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_chat_playground_1.png -------------------------------------------------------------------------------- /docs/images/bedrock_chat_playground_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_chat_playground_2.png -------------------------------------------------------------------------------- /docs/images/bedrock_chat_playground_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_chat_playground_3.png -------------------------------------------------------------------------------- /docs/images/bedrock_chat_playground_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_chat_playground_4.png -------------------------------------------------------------------------------- /docs/images/bedrock_knowledgebase_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_knowledgebase_1.png -------------------------------------------------------------------------------- /docs/images/bedrock_knowledgebase_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_knowledgebase_10.png -------------------------------------------------------------------------------- /docs/images/bedrock_knowledgebase_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_knowledgebase_2.png -------------------------------------------------------------------------------- /docs/images/bedrock_knowledgebase_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_knowledgebase_3.png -------------------------------------------------------------------------------- /docs/images/bedrock_knowledgebase_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_knowledgebase_4.png -------------------------------------------------------------------------------- /docs/images/bedrock_knowledgebase_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_knowledgebase_5.png -------------------------------------------------------------------------------- /docs/images/bedrock_knowledgebase_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_knowledgebase_6.png -------------------------------------------------------------------------------- /docs/images/bedrock_knowledgebase_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_knowledgebase_7.png -------------------------------------------------------------------------------- /docs/images/bedrock_knowledgebase_8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_knowledgebase_8.png -------------------------------------------------------------------------------- /docs/images/bedrock_knowledgebase_9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_knowledgebase_9.png -------------------------------------------------------------------------------- /docs/images/bedrock_model_access.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_model_access.png -------------------------------------------------------------------------------- /docs/images/bedrock_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bedrock_page.png -------------------------------------------------------------------------------- /docs/images/blast_costs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/blast_costs.png -------------------------------------------------------------------------------- /docs/images/bucket_properties.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/bucket_properties.png -------------------------------------------------------------------------------- /docs/images/budget_alerts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/budget_alerts.png -------------------------------------------------------------------------------- /docs/images/budget_scope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/budget_scope.png -------------------------------------------------------------------------------- /docs/images/budget_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/budget_type.png -------------------------------------------------------------------------------- /docs/images/change_end_date.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/change_end_date.png -------------------------------------------------------------------------------- /docs/images/click_configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/click_configuration.png -------------------------------------------------------------------------------- /docs/images/configure_budget_aws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/configure_budget_aws.png -------------------------------------------------------------------------------- /docs/images/connect_ec2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/connect_ec2.png -------------------------------------------------------------------------------- /docs/images/cost_explorer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/cost_explorer.png -------------------------------------------------------------------------------- /docs/images/create-cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/create-cluster.png -------------------------------------------------------------------------------- /docs/images/create_budget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/create_budget.png -------------------------------------------------------------------------------- /docs/images/create_notebook_instance.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/create_notebook_instance.jpeg -------------------------------------------------------------------------------- /docs/images/ec2-filtered.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/ec2-filtered.png -------------------------------------------------------------------------------- /docs/images/edit_instance_aws.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/edit_instance_aws.png -------------------------------------------------------------------------------- /docs/images/edit_tags_bucket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/edit_tags_bucket.png -------------------------------------------------------------------------------- /docs/images/filter_tag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/filter_tag.png -------------------------------------------------------------------------------- /docs/images/instance_type.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/instance_type.png -------------------------------------------------------------------------------- /docs/images/kernel_vcftools_aws.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/kernel_vcftools_aws.jpeg -------------------------------------------------------------------------------- /docs/images/launch_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/launch_instance.png -------------------------------------------------------------------------------- /docs/images/launcher_terminal.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/launcher_terminal.jpeg -------------------------------------------------------------------------------- /docs/images/launcher_vcftools_aws.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/launcher_vcftools_aws.jpeg -------------------------------------------------------------------------------- /docs/images/mac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/mac.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb1.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb10.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb11.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb12.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb2.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb3.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb4.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb5.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb6.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb7.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb8.png -------------------------------------------------------------------------------- /docs/images/memverge_mmb9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/memverge_mmb9.png -------------------------------------------------------------------------------- /docs/images/nav-url-from-output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/nav-url-from-output.png -------------------------------------------------------------------------------- /docs/images/nav_budget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/nav_budget.png -------------------------------------------------------------------------------- /docs/images/new_key.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/new_key.png -------------------------------------------------------------------------------- /docs/images/pcui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/pcui.png -------------------------------------------------------------------------------- /docs/images/sagemaker_add_tags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/sagemaker_add_tags.png -------------------------------------------------------------------------------- /docs/images/sagemaker_edit_tags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/sagemaker_edit_tags.png -------------------------------------------------------------------------------- /docs/images/sagemaker_edit_tags2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/sagemaker_edit_tags2.png -------------------------------------------------------------------------------- /docs/images/search_billing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/search_billing.png -------------------------------------------------------------------------------- /docs/images/security_group.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/security_group.png -------------------------------------------------------------------------------- /docs/images/submit_budget.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/submit_budget.png -------------------------------------------------------------------------------- /docs/images/update_notebook_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/update_notebook_instance.png -------------------------------------------------------------------------------- /docs/images/windows.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/docs/images/windows.jpg -------------------------------------------------------------------------------- /docs/parabricks.md: -------------------------------------------------------------------------------- 1 | ### Using the Parabricks AMI in Cloud Lab 2 | 3 | To use Parabricks within AWS, Search AWS Marketplace in the top search box. Then go to Discover Products, then search Parabricks. Click NVIDIA Clara Parabricks Pipelines, then Continue to Subscribe, then Accept Terms. You will now pay $0.30 per hour to use the software, plus whatever costs of the VM when they are running. It will take a few minutes for the subscription to complete, and then click Continue to Configuration. We recommend you choose the newest software, select your region, then click Continue to Launch, select Launch Through EC2 under Choose Action, and then select your machine type. The p3dn.24xlarge will be the fasterst machine, but look at all the available options and consider the cost of each machine. Expect this machine to run a 30x human genomic through GATK HaplotypeCaller in about 40 minutes. Now you launch the machine line any other EC2 instance and you will be ready to use pbrun, the Parabricks CLI. 4 | -------------------------------------------------------------------------------- /docs/request_enterprise_support.md: -------------------------------------------------------------------------------- 1 | # Request Enterprise Support in AWS Cloud Lab 2 | 3 | As a Cloud Lab user, you have access to AWS Enterprise support, which allows you to request help on technical subjects related to cloud infrastructure such as issues with SSH, your VPC is missing, or you don't have the right permissions for a certain tutorial. If you need help with a scientific-specific use case, like how to best call genomic variants in the cloud, then contact the Cloud Lab support team at **CloudLab@nih.gov**. 4 | 5 | To request AWS enterprise support: 6 | 7 | 1. Click the **?** icon in the top right of your AWS console, then click **Support Center** 8 | 9 | 10 | 11 | 2. Click **Create Case** 12 | 13 | 14 | 15 | 3. Select **Technical** and then select the *Service*, *Category*, and *Severity*. Here we are requesting help with an issue related to not being able to SSH into our EC2 instance. 16 | 17 | 18 | 19 | 4. Describe your issue. It can be helpful to attach error logs or screen shots. You should also put your account number in the box. 20 | 21 | 22 | 23 | 5. Finally, enter your contact info and click **Submit**. If you select the *Web* option, you will receive an email when your ticket has been modified. You will need to navigate back to *Support Center* and then view your case under *Open support cases*. 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /docs/right_sizing_vm.md: -------------------------------------------------------------------------------- 1 | # Guide to right-sizing your VM 2 | 3 | ## Monitor your CPU usage on EC2 4 | 5 | 1. Go to the EC2 console, and select the instance ID of the instance you want to monitor. 6 | 7 | 8 | 9 | 2. Scroll down and select the *Monitoring* tab. 10 | 11 | 12 | 13 | 3. If you would like more information for a given metric, click the three dots and select *View in metrics*. 14 | 15 | 16 | 17 | 4. Now you are on the CloudWatch page, and from here, you can go deep into a wide variety of metrics. Follow [this AWS guide](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/GettingStarted.html) for more information. 18 | 19 | 5. If you need to change the size of your VM based on over- or under- CPU or memory utilization, first stop the instance. 20 | 21 | 22 | 23 | 6. Change the instance type. In the top right, click *Actions > Instance settings > Change instance type.* 24 | 25 | 26 | 27 | 7. In the dropdown, select a new instance type and click **Apply**. 28 | 29 | 30 | 31 | ## Monitor your CPU usage on a Sagemaker instance 32 | 33 | You can monitor some aspects of Sagemaker jobs in [CloudWatch](https://docs.aws.amazon.com/sagemaker/latest/dg/monitoring-cloudwatch.html#cloudwatch-metrics-jobs), but unfortunately, you can not yet monitor CPU usage. If you want to monitor CPU or memory usage of a Sagemaker notebook, you will need to use Sagemaker Studio, but we do not yet have a guide on this product. 34 | -------------------------------------------------------------------------------- /docs/service_quotas.md: -------------------------------------------------------------------------------- 1 | If you try to spin up a GPU and sometimes large CPU machines, you may encounter a quota limit. This is in place to prevent users from accidentally spinning up really expensive resources and burning through their whole budget in a few days. 2 | 3 | If you would like to use a machine with a quota limit, you can follow [these instructions](https://docs.aws.amazon.com/servicequotas/latest/userguide/request-quota-increase.html). 4 | 5 | Here are some Cloud Lab specific instructions as well. 6 | + Go to `Services > Service Quotas` 7 | + Click on `Amazon Elastic Compute Cloud (EC2)` 8 | + Search `On-Demand`, or whatever instance type you are trying to modify (e.g. Spot etc) 9 | + Click `Running On-Demand P instances`. You need to request whatever instance family you are trying to run. This can be figured out by visiting the [EC2 instance type page](https://aws.amazon.com/ec2/instance-types/?trk=36c6da98-7b20-48fa-8225-4784bced9843&sc_channel=ps&sc_campaign=acquisition&sc_medium=ACQ-P|PS-GO|Brand|Desktop|SU|Compute|EC2|US|EN|Text&s_kwcid=AL!4422!3!536392622533!e!!g!!aws%20ec2%20instance%20types&ef_id=Cj0KCQjwgYSTBhDKARIsAB8Kuksn1rVhJBBjVbeIAs0DZx_ral7xl0eW-kL8KgMaMmNH8j7gJ0VHHMgaAnn5EALw_wcB:G:s&s_kwcid=AL!4422!3!536392622533!e!!g!!aws%20ec2%20instance%20types) and looking at the prefix of the instance type. For example, p4d.24xlarge will be from the P family so you need to request `Running On-Demand P instances` and so on for the other machine types. 10 | + Click `Request quota increase` 11 | + The quota value is the number of vCPUs allowed for an instance family, so set it to the max CPUs you expect to use 12 | + Click `Request` 13 | + In theory, you should only have to wait a few minutes for everything to be updated on the back end and then you can try and launch the blocked instance type again. In practice, it may take up to 30 minutes, so be patient and keep checking back until the `Applied quota value` is updated 14 | + If you are still having trouble, please contact STRIDES support 15 | -------------------------------------------------------------------------------- /docs/update_sagemaker_role.md: -------------------------------------------------------------------------------- 1 | # Update the Sagemaker role to use Amazon Athena 2 | 3 | If when using using Athena from a notebook, you get the following permissions error: `An error occurred (AccessDeniedException) when calling the StartQueryExecution operation: User: arn:aws:sts::055102001469:assumed-role/sagemaker-notebook-instance-role/SageMaker is not authorized to perform: athena:StartQueryExecution on resource: arn:aws:athena:us-east-1:055102001469:workgroup/primary because no identity-based policy allows the athena:StartQueryExecution action` then you need to update your IAM permissions. 4 | 5 | 1) Go to IAM, and click on **Roles**. 6 | 7 | 8 | 9 | 2) Search for the `sagemaker-notebook-instance-role` from the error message. Select the role in blue. 10 | 11 | 12 | 13 | 3) Click **Add permissions** then **Create inline policy**. 14 | 15 | 16 | 17 | 4) Fill out the form as in the screen shot below to attach the permission to the Sagemaker Role. Click **Review Policy** 18 | 19 | 20 | 21 | 5) Name your policy and click **Create policy** 22 | 23 | 24 | 25 | 6) Confirm that the new policy is listed for the Sagemaker Role. 26 | 27 | 28 | -------------------------------------------------------------------------------- /notebooks/AWSBatch/.gitkeep: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /notebooks/ElasticBLAST/run_elastic_blast.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "8c3f3bb2", 6 | "metadata": {}, 7 | "source": [ 8 | "# Run ElasticBLAST using AWS Batch" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "af8ad165", 14 | "metadata": {}, 15 | "source": [ 16 | "## Overview\n", 17 | "This notebook helps you to run Blast in a scalable manner using AWS Batch. The script will spin up and later tear down your cluster to execute the Blast jobs. This notebook is based on the [this tutorial](https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/quickstart-aws.html). Make sure you select a kernel with Python 3.7 for the Elastic BLAST install. One good option is `conda_python3`. " 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "id": "1855fc00", 23 | "metadata": {}, 24 | "source": [ 25 | "## Prerequisites\n", 26 | "You need to make sure you have permissions use to use Cloud Formation, Batch, and SageMaker" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "id": "13e66dc9", 32 | "metadata": {}, 33 | "source": [ 34 | "## Learning Objectives\n", 35 | "+ Learn to use Batch to scale compute jobs.\n", 36 | "+ Learn how to use BLAST in the cloud." 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "a3c61097", 42 | "metadata": {}, 43 | "source": [ 44 | "## Get Started" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "06e31b2a", 50 | "metadata": {}, 51 | "source": [ 52 | "### Install packages" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "id": "d96bb988", 59 | "metadata": { 60 | "scrolled": true 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "! pip3 install elastic-blast" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "id": "684e79f6", 70 | "metadata": {}, 71 | "source": [ 72 | "Test your install, it should print out a version and full help menu." 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "id": "2aa11ccc", 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "! elastic-blast --version\n", 83 | "! elastic-blast --help" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "id": "58b59cb0", 89 | "metadata": {}, 90 | "source": [ 91 | "### Create a bucket " 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "id": "79ebb807-7fd5-466b-b380-67f2a62a2caf", 97 | "metadata": {}, 98 | "source": [ 99 | "For this tutorial you will need to create a bucket if one does not yet exist, make sure to pick a unique name otherwise you will run into a error." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "id": "319ff226", 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "BUCKET = 'elasticblast-test-zy'\n", 110 | "! aws s3 mb s3://$BUCKET" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "id": "449d7511", 116 | "metadata": {}, 117 | "source": [ 118 | "### Create a config file that defines the job parameters" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "id": "b578c1ea", 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "! touch BDQA.ini" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "a1b0a866", 134 | "metadata": {}, 135 | "source": [ 136 | "Open the config file and add the following:\n", 137 | "```\n", 138 | "[cloud-provider]\n", 139 | "aws-region = us-east-1\n", 140 | "aws-vpc = vpc-0eaafe0236e351a36\n", 141 | "aws-subnet = subnet-043d7614ae5dc30c9\n", 142 | "aws-key-pair = cloud-lab-testing\n", 143 | "\n", 144 | "[cluster]\n", 145 | "num-nodes = 3\n", 146 | "labels = owner=ec2-user\n", 147 | "\n", 148 | "[blast]\n", 149 | "program = blastp\n", 150 | "db = refseq_protein\n", 151 | "queries = s3://elasticblast-test/queries/BDQA01.1.fsa_aa\n", 152 | "results = s3://elasticblast-sagemaker/results/BDQA\n", 153 | "options = -task blastp-fast -evalue 0.01 -outfmt \"7 std sskingdoms ssciname\"\n", 154 | "```\n", 155 | "\n", 156 | "You can add additional configuration values from [this guide](https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/configuration.html). If you need to run this a few times, make sure you either rename the ouput folder, or delete the results folder from the S3 bucket. If you are using your own data, make sure to modify the database and the S3 queries path." 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "id": "9a9f8192", 162 | "metadata": {}, 163 | "source": [ 164 | "### Submit the job" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "id": "398253e8", 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "! elastic-blast submit --cfg BDQA.ini" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "id": "9a8e7716", 180 | "metadata": {}, 181 | "source": [ 182 | "### Check results and troubleshoot" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "id": "94a43c5e", 188 | "metadata": {}, 189 | "source": [ 190 | "+ You can monitor the job initially by going to `CloudFormation` and viewing the events tab of the elastic blast stack. If there is an error, you should be able to pinpoint it in these event logs.\n", 191 | "+ You can view the progress by going to `AWS Batch`, select the Job queue that begins with `elasticblast`, and then make sure jobs are moving from Runnable to Running to Succeeded. The number of jobs that run together will be the number of nodes you selected in the config file. To run more jobs at once, increase the `cluster` parameter `num-nodes`. \n", 192 | "+ Finally, to view your outputs, look at the files in your S3 output bucket, something like `aws s3 ls s3://elasticblast-sagemaker/results/BDQA/`." 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "id": "d27edaa5", 198 | "metadata": {}, 199 | "source": [ 200 | "## Conclusions\n", 201 | "Here we submited a parallel Blast job to an AWS Batch cluster using Cloud Formation to handle provisioning and tear down of resources. " 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "id": "292947f1-5247-4da5-81bd-7fc8fc420ca4", 207 | "metadata": {}, 208 | "source": [ 209 | "## Clean Up" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "id": "e677ba64-38e0-49d4-919b-4bb51de83cdd", 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "! elastic-blast delete --cfg BDQA.ini" 220 | ] 221 | } 222 | ], 223 | "metadata": { 224 | "kernelspec": { 225 | "display_name": "conda_python3", 226 | "language": "python", 227 | "name": "conda_python3" 228 | }, 229 | "language_info": { 230 | "codemirror_mode": { 231 | "name": "ipython", 232 | "version": 3 233 | }, 234 | "file_extension": ".py", 235 | "mimetype": "text/x-python", 236 | "name": "python", 237 | "nbconvert_exporter": "python", 238 | "pygments_lexer": "ipython3", 239 | "version": "3.10.17" 240 | } 241 | }, 242 | "nbformat": 4, 243 | "nbformat_minor": 5 244 | } 245 | -------------------------------------------------------------------------------- /notebooks/GWAS/GWAS_coat_color.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "7a244bb3", 6 | "metadata": {}, 7 | "source": [ 8 | "# GWAS in the cloud\n", 9 | "## Overview\n", 10 | "We adapted the NIH CFDE tutorial from [here](https://training.nih-cfde.org/en/latest/Bioinformatic-Analyses/GWAS-in-the-cloud/background/) and fit it to a notebook. We have greatly simplified the instructions, so if you need or want more details, look at the full tutorial to find out more.\n", 11 | "\n", 12 | "Most of this notebook is written in Bash, but expects that you are using a Python kernel, until step 3, plotting where you will need to switch your kernel to R." 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "id": "3edafe63", 18 | "metadata": {}, 19 | "source": [ 20 | "## Learning Objectives\n", 21 | "The goal is to learn how to execute a GWAS analysis in a cloud environment" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "5d7ef396", 27 | "metadata": {}, 28 | "source": [ 29 | "## Prerequisites\n", 30 | "+ You only need access to a Sagemaker notebook environment to run this notebook" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "id": "39ee9668", 36 | "metadata": {}, 37 | "source": [ 38 | "## Get Started" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "id": "8fbf6304", 44 | "metadata": {}, 45 | "source": [ 46 | "### Install packages and set up environment\n", 47 | "\n", 48 | "#### Download the data\n", 49 | "use %%bash to denote a bash block. You can also use '!' to denote a single bash command within a Python notebook" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "id": "8ec900bd", 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "! mkdir GWAS" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "id": "3c20d2c1-1f0a-441d-9b5e-4ed018ec01ed", 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "%%bash\n", 70 | "curl -LO https://de.cyverse.org/dl/d/E0A502CC-F806-4857-9C3A-BAEAA0CCC694/pruned_coatColor_maf_geno.vcf.gz\n", 71 | "curl -LO https://de.cyverse.org/dl/d/3B5C1853-C092-488C-8C2F-CE6E8526E96B/coatColor.pheno" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "id": "4d43ae73", 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "%%bash\n", 82 | "mv *.gz GWAS\n", 83 | "mv *.pheno GWAS\n", 84 | "ls GWAS" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "37567b92-71f1-4c7d-8cce-c08e82b25557", 90 | "metadata": {}, 91 | "source": [ 92 | "### Install dependencies" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "id": "8876e58e-1b5b-4ac7-a46d-a0227be29ec8", 98 | "metadata": {}, 99 | "source": [ 100 | "Ensure that you have miniforge or conda forge to install packages with mamba for faster installation." 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "id": "b219074a", 107 | "metadata": { 108 | "scrolled": true 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "# install everything else\n", 113 | "! mamba install -y -c bioconda plink vcftools" 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "id": "3de2fc4c", 119 | "metadata": {}, 120 | "source": [ 121 | "## Analyze" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "id": "08155a2f-f117-4e1f-9366-2405e790290a", 127 | "metadata": {}, 128 | "source": [ 129 | "For the remainder of this tutorial we will be working in the GWAS directory." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "id": "f6450bd6-6ca4-41da-9a91-9e4c009fc339", 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "cd GWAS" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "id": "013d960d", 145 | "metadata": {}, 146 | "source": [ 147 | "### Make map and ped files from the vcf file to feed into plink" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "id": "6570875d", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "!vcftools --gzvcf pruned_coatColor_maf_geno.vcf.gz --plink --out coatColor" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "id": "b9a38761", 163 | "metadata": {}, 164 | "source": [ 165 | "### Create a list of minor alleles\n", 166 | "For more info on these terms, look at step 2 at https://training.nih-cfde.org/en/latest/Bioinformatic-Analyses/GWAS-in-the-cloud/analyze/" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "id": "6c868a67", 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "#unzip vcf\n", 177 | "! vcftools --gzvcf pruned_coatColor_maf_geno.vcf.gz --recode --out pruned_coatColor_maf_geno" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "id": "8e11f991", 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "#create list of minor alleles\n", 188 | "! cat pruned_coatColor_maf_geno.recode.vcf | awk 'BEGIN{FS=\"\\t\";OFS=\"\\t\";}/#/{next;}{{if($3==\".\")$3=$1\":\"$2;}print $3,$5;}' > minor_alleles" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "id": "8cff47e3", 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "! head GWAS/minor_alleles" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "id": "56d901c7", 204 | "metadata": {}, 205 | "source": [ 206 | "### Run quality controls" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "id": "dafa14a6", 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "#calculate missingness per locus\n", 217 | "! plink --file coatColor --make-pheno coatColor.pheno \"yellow\" --missing --out miss_stat --noweb --dog --reference-allele minor_alleles --allow-no-sex --adjust" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "id": "5cf5f51b", 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "#take a look at lmiss, which is the per locus rates of missingness\n", 228 | "! head miss_stat.lmiss" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "id": "915bb263", 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "#peek at imiss which is the individual rates of missingness\n", 239 | "! head miss_stat.imiss" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "id": "4c11ca71", 245 | "metadata": {}, 246 | "source": [ 247 | "### Convert to plink binary format" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "id": "3b8f2d7f", 254 | "metadata": {}, 255 | "outputs": [], 256 | "source": [ 257 | "! plink --file coatColor --allow-no-sex --dog --make-bed --noweb --out coatColor.binary" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "id": "e36f6cd7", 263 | "metadata": {}, 264 | "source": [ 265 | "### Run a simple association step (the GWAS part!)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "id": "f926ef9b", 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "! plink --bfile coatColor.binary --make-pheno coatColor.pheno \"yellow\" --assoc --reference-allele minor_alleles --allow-no-sex --adjust --dog --noweb --out coatColor" 276 | ] 277 | }, 278 | { 279 | "cell_type": "markdown", 280 | "id": "b397d484", 281 | "metadata": {}, 282 | "source": [ 283 | "### Identify statistical cutoffs\n", 284 | "This code finds the equivalent of 0.05 and 0.01 p value in the negative-log-transformed p values file. We will use these cutoffs to draw horizontal lines in the Manhattan plot for visualization of haplotypes that cross the 0.05 and 0.01 statistical threshold (i.e. have a statistically significant association with yellow coat color)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "id": "b94e1e2a", 291 | "metadata": {}, 292 | "outputs": [], 293 | "source": [ 294 | "%%bash\n", 295 | "unad_cutoff_sug=$(tail -n+2 coatColor.assoc.adjusted | awk '$10>=0.05' | head -n1 | awk '{print $3}')\n", 296 | "unad_cutoff_conf=$(tail -n+2 coatColor.assoc.adjusted | awk '$10>=0.01' | head -n1 | awk '{print $3}')" 297 | ] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "id": "1f52e97c", 302 | "metadata": {}, 303 | "source": [ 304 | "## Plotting\n", 305 | "In this tutorial, plotting is done in R, so at this point you can change your kernel to R in the top right. Wait for it to say 'idle' in the bottom left, then continue. You could also plot using Python native packages and maintain the Python notebook kernel." 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "id": "effb5acd", 311 | "metadata": {}, 312 | "source": [ 313 | "### Install qqman" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "id": "60feed89", 320 | "metadata": {}, 321 | "outputs": [], 322 | "source": [ 323 | "install.packages('qqman', contriburl=contrib.url('http://cran.r-project.org/'))" 324 | ] 325 | }, 326 | { 327 | "cell_type": "markdown", 328 | "id": "d3f1fcd2", 329 | "metadata": {}, 330 | "source": [ 331 | "### Run the plotting function" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "id": "a7e8cd2b", 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "#make sure you are still CD in GWAS, when you change kernel it may reset to home\n", 342 | "setwd('GWAS')" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "id": "7946a3a7", 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "require(qqman)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "id": "0d28ef2c", 359 | "metadata": {}, 360 | "outputs": [], 361 | "source": [ 362 | "data=read.table(\"coatColor.assoc\", header=TRUE)" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": null, 368 | "id": "8e5207be", 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "data=data[!is.na(data$P),]" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": null, 378 | "id": "6330b1e0", 379 | "metadata": {}, 380 | "outputs": [], 381 | "source": [ 382 | "manhattan(data, p = \"P\", col = c(\"blue4\", \"orange3\"),\n", 383 | " suggestiveline = 12,\n", 384 | " genomewideline = 15,\n", 385 | " chrlabs = c(1:38, \"X\"), annotateTop=TRUE, cex = 1.2)" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "id": "26787d84", 391 | "metadata": {}, 392 | "source": [ 393 | "In our graph, haplotypes in four parts of the genome (chromosome 2, 5, 28 and X) are found to be associated with an increased occurrence of the yellow coat color phenotype.\n", 394 | "\n", 395 | "The top associated mutation is a nonsense SNP in the gene MC1R known to control pigment production. The MC1R allele encoding yellow coat color contains a single base change (from C to T) at the 916th nucleotide." 396 | ] 397 | }, 398 | { 399 | "cell_type": "markdown", 400 | "id": "2f6e1ef6", 401 | "metadata": {}, 402 | "source": [ 403 | "### Conclusion\n", 404 | "Here we learned how to run a simple GWAS analysis in the cloud" 405 | ] 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "id": "044a04d8", 410 | "metadata": {}, 411 | "source": [ 412 | "## Clean up\n", 413 | "Make sure you stop this Jupyter Notebook Session, or delete it if you don't plan to use if further.\n", 414 | "\n", 415 | "If you used a bucket you can also [delete the buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/delete-bucket.html) if you don't want to pay for the data: `aws s3 rb s3://bucket-name --force`." 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": null, 421 | "id": "81f7b485-dc2f-4d69-9c7b-7d40a063237c", 422 | "metadata": {}, 423 | "outputs": [], 424 | "source": [] 425 | } 426 | ], 427 | "metadata": { 428 | "kernelspec": { 429 | "display_name": "R", 430 | "language": "R", 431 | "name": "ir" 432 | }, 433 | "language_info": { 434 | "codemirror_mode": "r", 435 | "file_extension": ".r", 436 | "mimetype": "text/x-r-source", 437 | "name": "R", 438 | "pygments_lexer": "r", 439 | "version": "4.4.3" 440 | } 441 | }, 442 | "nbformat": 4, 443 | "nbformat_minor": 5 444 | } 445 | -------------------------------------------------------------------------------- /notebooks/GenAI/AWS_GenAI_Jumpstart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "cf9eae37-eb34-4afa-89cf-a0b4371978e5", 6 | "metadata": {}, 7 | "source": [ 8 | "# Deploying a Model from Jumpstart to Sagemaker Notebook" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "b49b72e7", 14 | "metadata": {}, 15 | "source": [ 16 | "## Overview" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "363b6429-2757-4021-b062-a9c2667c2162", 22 | "metadata": {}, 23 | "source": [ 24 | "Foundational models (FM) are flexible and reusable models that can be used for any use case, these models are extensible allowing the user to customize attributes of the model. Some example for FMs are text generization (e.g., summarizing test), chatbots, and image generation.These model can be accessed through Jumpstart and in this tutorial we will be using [Llama2](https://llama.meta.com/llama2/). To see what other FMs are available in AWS you can go to `Amazon Sagemaker > Jumpstart > Foundational Models`." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "id": "3a2224bf", 30 | "metadata": {}, 31 | "source": [ 32 | "## Learning Objectives\n", 33 | "+ Learn about Foundation Models\n", 34 | "+ Learn how to deploy models to SageMaker via Jumpstart" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "id": "67a66b3c", 40 | "metadata": {}, 41 | "source": [ 42 | "## Prerequisites\n", 43 | "You just need access to SageMaker and Model Jumpstart" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "id": "be5dd789", 49 | "metadata": {}, 50 | "source": [ 51 | "## Get Started" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "id": "d50731d6-20d3-44d7-baf0-98daf19cf487", 57 | "metadata": {}, 58 | "source": [ 59 | "### Install packages and set up environment" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "id": "2ee5a5e2-6302-4ab2-bf4f-c253daa09730", 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "%pip install --upgrade --quiet sagemaker" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "id": "7780b813-5050-4acb-a3c0-0a961e21958a", 75 | "metadata": {}, 76 | "source": [ 77 | "Next we will specify the model name and version we want to deploy here we want to deploy the Llama2 chatbot with 7 billion parameters." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "id": "6cf1429a-314e-49b6-a4f7-16a3e52319af", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "model_id, model_version = \"meta-textgeneration-llama-2-13b-f\", \"2.*\"" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "id": "e739c65b-9bd7-4b7f-9ab0-63268b3dda99", 93 | "metadata": {}, 94 | "source": [ 95 | "Now we will create our endpoint! An endpoint allows us to interact with our model. Sagemaker not only creates the endpoint but at the same time attaches and deploys our model from our endpoint in one step. This will take 1 to 5 mins." 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "id": "c29d29db-3f81-4774-b11f-31d2c2575e74", 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "from sagemaker.jumpstart.model import JumpStartModel\n", 106 | "\n", 107 | "model = JumpStartModel(model_id=model_id, model_version=model_version)\n", 108 | "predictor = model.deploy()\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "id": "d6285342-c9a7-41bb-aef5-5f7d35966afa", 114 | "metadata": {}, 115 | "source": [ 116 | "Imports the JsonSerializer which converts .NET objects into their JSON equivalent for Sagemaker to communicate instructions to our endpoint. These instructions are packaged into a payload which we will define in the next step." 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "id": "ecb3c3c1-52da-4357-8665-a3e131c26821", 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "from sagemaker.serializers import JSONSerializer\n", 127 | "predictor.serializer = JSONSerializer()\n", 128 | "predictor.content_type = \"application/json\"" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "ca5f0f70-5c77-4025-9fe0-cc91c69d535f", 134 | "metadata": {}, 135 | "source": [ 136 | "The following function will allows us to pass inputs and parameters so that we can tune our model however we like." 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "id": "cf078472-3604-4f42-aa73-95e631dd6acc", 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "def print_dialog(payload, response):\n", 147 | " dialog = payload[\"inputs\"][0]\n", 148 | " for msg in dialog:\n", 149 | " print(f\"{msg['role'].capitalize()}: {msg['content']}\\n\")\n", 150 | " print(\n", 151 | " f\"> {response[0]['generation']['role'].capitalize()}: {response[0]['generation']['content']}\"\n", 152 | " )\n", 153 | " print(\"\\n==================================\\n\")" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "id": "b62d2e4f-66c5-4c1a-bf32-b3993ef10ff7", 159 | "metadata": {}, 160 | "source": [ 161 | "Now we can define our payload which will hold our input which passes our role as user and the content which will be our question \"what is brain cancer?\". Our parameters allow us to tune our model through max number of new tokens, temperature, and top p:\n", 162 | "- **Max_New_Tokens:** The size of the output sequence, not including the tokens in the prompt.\n", 163 | "- **Top_p (nucleus):** The cumulative probability cutoff for token selection. Lower values mean sampling from a smaller, more top-weighted nucleus. Must be a number from 0 to 1.\n", 164 | "- **Temperature:** Controls randomness, higher values increase diversity meaning a more unique response make the model to think harder. Must be a number from 0 to 1." 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "id": "59d9b99b-3b74-4e31-b78a-742525330969", 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "payload = {\n", 175 | " \"inputs\": [\n", 176 | " [\n", 177 | " {\"role\": \"user\", \"content\": \"what is brain cancer?\"},\n", 178 | " ]\n", 179 | " ],\n", 180 | " \"parameters\": {\"max_new_tokens\": 512, \"top_p\": 0.9, \"temperature\": 0.6},\n", 181 | "}\n", 182 | "try:\n", 183 | " response = predictor.predict(payload, custom_attributes=\"accept_eula=true\")\n", 184 | " print_dialog(payload, response)\n", 185 | "except Exception as e:\n", 186 | " print(e)" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "id": "9a9d6604", 192 | "metadata": {}, 193 | "source": [ 194 | "## Conclusions\n", 195 | "Congrats! You deployed a model to an endpoint and queried it! " 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "id": "52572399", 201 | "metadata": {}, 202 | "source": [ 203 | "## Clean Up" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "id": "ec45e77c-113f-4e1c-8297-24e9ac613944", 209 | "metadata": {}, 210 | "source": [ 211 | "**Warning:** Once you are done don't forget to delete your endpoint, model, buckets, and shutdown or delete your Sagemaker notebook to avoid additional charges!" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "id": "b8ea9087-9d2d-48e1-a9ab-e2b515851930", 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "# Delete the SageMaker endpoint\n", 222 | "predictor.delete_model()\n", 223 | "predictor.delete_endpoint()" 224 | ] 225 | } 226 | ], 227 | "metadata": {}, 228 | "nbformat": 4, 229 | "nbformat_minor": 5 230 | } 231 | -------------------------------------------------------------------------------- /notebooks/GenAI/example_scripts/bioinformatics_testing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import subprocess 3 | 4 | # Step 1: Read the sample sheet 5 | sample_sheet = pd.read_csv('samplesheet.csv') 6 | 7 | # Step 2: Run FastQC 8 | for index, row in sample_sheet.iterrows(): 9 | fastqc_command = f"fastqc {row['file_path']} -o ./fastqc_results/" 10 | subprocess.run(fastqc_command, shell=True) 11 | 12 | # Step 3: Run MultiQC 13 | multiqc_command = "multiqc ./fastqc_results/ -o ./multiqc_report/" 14 | subprocess.run(multiqc_command, shell=True) 15 | 16 | # Step 4: Run STAR aligner 17 | for index, row in sample_sheet.iterrows(): 18 | star_command = f"STAR --genomeDir /path/to/genome --readFilesIn {row['file_path']} --outFileNamePrefix ./star_results/{row['sample_id']}" 19 | subprocess.run(star_command, shell=True) 20 | 21 | # Step 5: Index BAM files with Samtools 22 | for index, row in sample_sheet.iterrows(): 23 | bam_file = f"./star_results/{row['sample_id']}.bam" 24 | samtools_command = f"samtools index {bam_file}" 25 | subprocess.run(samtools_command, shell=True) 26 | 27 | -------------------------------------------------------------------------------- /notebooks/GenAI/example_scripts/kendra_chat_llama_2.py: -------------------------------------------------------------------------------- 1 | from langchain.retrievers import AmazonKendraRetriever 2 | from langchain.chains import ConversationalRetrievalChain 3 | from langchain.prompts import PromptTemplate 4 | from langchain.llms import SagemakerEndpoint 5 | from langchain.llms.sagemaker_endpoint import LLMContentHandler 6 | import sys 7 | import json 8 | import os 9 | 10 | class bcolors: 11 | HEADER = '\033[95m' 12 | OKBLUE = '\033[94m' 13 | OKCYAN = '\033[96m' 14 | OKGREEN = '\033[92m' 15 | WARNING = '\033[93m' 16 | FAIL = '\033[91m' 17 | ENDC = '\033[0m' 18 | BOLD = '\033[1m' 19 | UNDERLINE = '\033[4m' 20 | 21 | MAX_HISTORY_LENGTH = 1 22 | 23 | def build_chain(): 24 | region = os.environ["AWS_REGION"] 25 | kendra_index_id = os.environ["KENDRA_INDEX_ID"] 26 | endpoint_name = os.environ["LLAMA_2_ENDPOINT"] 27 | 28 | class ContentHandler(LLMContentHandler): 29 | content_type = "application/json" 30 | accepts = "application/json" 31 | 32 | def transform_input(self, prompt: str, model_kwargs: dict) -> bytes: 33 | input_str = json.dumps({"inputs": 34 | [[ 35 | #{"role": "system", "content": ""}, 36 | {"role": "user", "content": prompt}, 37 | ]], 38 | **model_kwargs 39 | }) 40 | #print(input_str) 41 | 42 | return input_str.encode('utf-8') 43 | 44 | def transform_output(self, output: bytes) -> str: 45 | response_json = json.loads(output.read().decode("utf-8")) 46 | 47 | return response_json[0]['generation']['content'] 48 | 49 | content_handler = ContentHandler() 50 | 51 | llm=SagemakerEndpoint( 52 | endpoint_name=endpoint_name, 53 | region_name=region, 54 | model_kwargs={"parameters": {"max_new_tokens": 1000, "top_p": 0.9,"temperature":0.6}}, 55 | endpoint_kwargs={"CustomAttributes":"accept_eula=true"}, 56 | content_handler=content_handler, 57 | ) 58 | 59 | retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region) 60 | 61 | prompt_template = """ 62 | Ignore everything before. 63 | 64 | Instruction: 65 | I want you to act as a research paper summarizer. I will provide you with a research paper on a specific topic in English, and you will create a summary. The summary should be concise and should accurately and objectively communicate the takeaway of the paper. You should not include any personal opinions or interpretations in your summary, but rather focus on objectively presenting the information from the paper. Your summary should be written in your own words and ensure that your summary is clear, concise, and accurately reflects the content of the original paper. 66 | 67 | First, provide a concise summary. Then provides the sources. 68 | 69 | {question} Answer "don't know" if not present in the document. 70 | {context} 71 | Solution:""" 72 | PROMPT = PromptTemplate( 73 | template=prompt_template, input_variables=["context", "question"], 74 | ) 75 | 76 | condense_qa_template = """ 77 | Chat History: 78 | {chat_history} 79 | Here is a new question for you: {question} 80 | Standalone question:""" 81 | standalone_question_prompt = PromptTemplate.from_template(condense_qa_template) 82 | 83 | qa = ConversationalRetrievalChain.from_llm( 84 | llm=llm, 85 | retriever=retriever, 86 | condense_question_prompt=standalone_question_prompt, 87 | return_source_documents=True, 88 | combine_docs_chain_kwargs={"prompt":PROMPT}, 89 | ) 90 | return qa 91 | 92 | def run_chain(chain, prompt: str, history=[]): 93 | print(prompt) 94 | return chain({"question": prompt, "chat_history": history}) 95 | 96 | if __name__ == "__main__": 97 | chat_history = [] 98 | qa = build_chain() 99 | print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC) 100 | print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) 101 | print(">", end=" ", flush=True) 102 | for query in sys.stdin: 103 | if (query.strip().lower().startswith("new search:")): 104 | query = query.strip().lower().replace("new search:","") 105 | chat_history = [] 106 | elif (len(chat_history) == MAX_HISTORY_LENGTH): 107 | chat_history.pop(0) 108 | result = run_chain(qa, query, chat_history) 109 | chat_history.append((query, result["answer"])) 110 | print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC) 111 | if 'source_documents' in result: 112 | print(bcolors.OKGREEN + 'Sources:') 113 | for d in result['source_documents']: 114 | print(d.metadata['source']) 115 | print(bcolors.ENDC) 116 | print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) 117 | print(">", end=" ", flush=True) 118 | print(bcolors.OKBLUE + "Bye" + bcolors.ENDC) 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /notebooks/GenAI/example_scripts/langchain_chat_llama_2_zeroshot.py: -------------------------------------------------------------------------------- 1 | from langchain_community.retrievers import PubMedRetriever 2 | from langchain.chains import ConversationalRetrievalChain 3 | from langchain.prompts import PromptTemplate 4 | from langchain_community.llms import SagemakerEndpoint 5 | from langchain_community.llms.sagemaker_endpoint import LLMContentHandler 6 | import sys 7 | import json 8 | import os 9 | 10 | 11 | class bcolors: 12 | HEADER = '\033[95m' 13 | OKBLUE = '\033[94m' 14 | OKCYAN = '\033[96m' 15 | OKGREEN = '\033[92m' 16 | WARNING = '\033[93m' 17 | FAIL = '\033[91m' 18 | ENDC = '\033[0m' 19 | BOLD = '\033[1m' 20 | UNDERLINE = '\033[4m' 21 | 22 | MAX_HISTORY_LENGTH = 1 23 | 24 | def build_chain(): 25 | region = os.environ["AWS_REGION"] 26 | endpoint_name = os.environ["LLAMA_2_ENDPOINT"] 27 | 28 | class ContentHandler(LLMContentHandler): 29 | content_type = "application/json" 30 | accepts = "application/json" 31 | 32 | def transform_input(self, prompt: str, model_kwargs: dict) -> bytes: 33 | input_str = json.dumps({"inputs": 34 | [[ 35 | #{"role": "system", "content": ""}, 36 | {"role": "user", "content": prompt}, 37 | ]], 38 | **model_kwargs 39 | }) 40 | #print(input_str) 41 | 42 | return input_str.encode('utf-8') 43 | 44 | def transform_output(self, output: bytes) -> str: 45 | response_json = json.loads(output.read().decode("utf-8")) 46 | 47 | return response_json[0]['generation']['content'] 48 | 49 | content_handler = ContentHandler() 50 | 51 | llm=SagemakerEndpoint( 52 | endpoint_name=endpoint_name, 53 | region_name=region, 54 | model_kwargs={"parameters": {"max_new_tokens": 1000, "top_p": 0.9,"temperature":0.6}}, 55 | endpoint_kwargs={"CustomAttributes":"accept_eula=true"}, 56 | content_handler=content_handler, 57 | ) 58 | 59 | retriever= PubMedRetriever() 60 | 61 | prompt_template = """ 62 | Ignore everything before. 63 | 64 | Instruction: 65 | I want you to act as a research paper summarizer. I will provide you with a research paper on a specific topic in English, and you will create a summary. The summary should be concise and should accurately and objectively communicate the takeaway of the paper. You should not include any personal opinions or interpretations in your summary, but rather focus on objectively presenting the information from the paper. Your summary should be written in your own words and ensure that your summary is clear, concise, and accurately reflects the content of the original paper. 66 | 67 | First, provide a concise summary then provide the sources. 68 | 69 | {question} Answer "don't know" if not present in the document. 70 | {context} 71 | Solution:""" 72 | PROMPT = PromptTemplate( 73 | template=prompt_template, input_variables=["context", "question"], 74 | ) 75 | 76 | condense_qa_template = """ 77 | Chat History: 78 | {chat_history} 79 | Here is a new question for you: {question} 80 | Standalone question:""" 81 | standalone_question_prompt = PromptTemplate.from_template(condense_qa_template) 82 | 83 | qa = ConversationalRetrievalChain.from_llm( 84 | llm=llm, 85 | retriever=retriever, 86 | condense_question_prompt=standalone_question_prompt, 87 | return_source_documents=True, 88 | combine_docs_chain_kwargs={"prompt":PROMPT}, 89 | ) 90 | return qa 91 | 92 | def run_chain(chain, prompt: str, history=[]): 93 | print(prompt) 94 | return chain({"question": prompt, "chat_history": history}) 95 | 96 | if __name__ == "__main__": 97 | chat_history = [] 98 | qa = build_chain() 99 | print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC) 100 | print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) 101 | print(">", end=" ", flush=True) 102 | for query in sys.stdin: 103 | if (query.strip().lower().startswith("new search:")): 104 | query = query.strip().lower().replace("new search:","") 105 | chat_history = [] 106 | elif (len(chat_history) == MAX_HISTORY_LENGTH): 107 | chat_history.pop(0) 108 | result = run_chain(qa, query, chat_history) 109 | chat_history.append((query, result["answer"])) 110 | print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC) 111 | print(bcolors.ENDC) 112 | print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) 113 | print(">", end=" ", flush=True) 114 | print(bcolors.OKBLUE + "Bye" + bcolors.ENDC) 115 | -------------------------------------------------------------------------------- /notebooks/GenAI/example_scripts/quick-actions-testing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "bed9c0c9-4756-4161-b4be-e32ce3a58bff", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "#Cell 1\n", 11 | "#import libraries\n", 12 | "\n", 13 | "import pandas as pd\n", 14 | "import numpy as np" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "id": "64f8d006-28f3-4d83-ae3a-9e23cccff5d7", 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | " Gene Expression_Level Sample_ID Condition\n", 28 | "0 GeneA 12.5 S1 Control\n", 29 | "1 GeneB 8.3 S2 Treated\n", 30 | "2 GeneC 15.2 S3 Control\n", 31 | "3 GeneD 7.8 S4 Treated\n" 32 | ] 33 | } 34 | ], 35 | "source": [ 36 | "#Cell 2\n", 37 | "\n", 38 | "# Initialize data of lists\n", 39 | "data = {\n", 40 | " 'Gene': ['GeneA', 'GeneB', 'GeneC', 'GeneD'],\n", 41 | " 'Expression_Level': [12.5, 8.3, 15.2, 7.8],\n", 42 | " 'Sample_ID': ['S1', 'S2', 'S3', 'S4'],\n", 43 | " 'Condition': ['Control', 'Treated', 'Control', 'Treated']\n", 44 | "}\n", 45 | "\n", 46 | "# Create DataFrame\n", 47 | "df = pd.DataFrame(data)\n", 48 | "\n", 49 | "# Display the DataFrame\n", 50 | "print(df)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 5, 56 | "id": "8ca19300-1635-4a8a-9ef8-f9554bc1baac", 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "ename": "NameError", 61 | "evalue": "name 'describe' is not defined", 62 | "output_type": "error", 63 | "traceback": [ 64 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 65 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 66 | "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# View summary statistics\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mdescribe\u001b[49m()\u001b[38;5;241m.\u001b[39mdf\n", 67 | "\u001b[0;31mNameError\u001b[0m: name 'describe' is not defined" 68 | ] 69 | } 70 | ], 71 | "source": [ 72 | "#Cell 3\n", 73 | "\n", 74 | "# Error debugging test /fix\n", 75 | "# View summary statistics\n", 76 | "describe().df" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "id": "550a402e-66bd-4890-a063-e3d82679c0a8", 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "#Cell 4\n", 87 | "\n", 88 | "# Optimize selection test /optimize\n", 89 | "# Add additional data to the dataframe\n", 90 | "df['Sample_Type'] = ['Tissue1', 'Tissue2', 'Tissue1', 'Tissue2']\n", 91 | "df['P_Value'] = [0.05, 0.01, 0.03, 0.07]" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "id": "0d45c1aa-2075-4c8a-9ecc-94fb03a71f78", 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "#Cell 5\n", 102 | "\n", 103 | "#Explain selection test /explain\n", 104 | "#Plot results\n", 105 | "import matplotlib.pyplot as plt\n", 106 | "\n", 107 | "df.plot(x='Sample_ID', y='Expression_Level', kind='line')\n", 108 | "plt.show()" 109 | ] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 3 (ipykernel)", 115 | "language": "python", 116 | "name": "python3" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 3 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython3", 128 | "version": "3.11.10" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 5 133 | } 134 | -------------------------------------------------------------------------------- /notebooks/Snakemake/aws-parallel-cluster-files/bioinformatics-example/Snakefile: -------------------------------------------------------------------------------- 1 | #Snakefile 2 | configfile: "config.yaml" 3 | 4 | SAMPLES = ["A", "B"] 5 | 6 | rule all: 7 | input: 8 | expand("mapped_reads/{sample}.bam", sample=SAMPLES), 9 | expand("sorted_reads/{sample}.bam.bai", sample=SAMPLES) 10 | "calls/all.vcf" 11 | 12 | rule bwa_index: 13 | input: 14 | "data/genome.fa" 15 | output: 16 | "data/genome.fa.bwt" 17 | conda: 18 | config["conda_env"] 19 | shell: 20 | """ 21 | bwa index {input} 22 | """ 23 | 24 | rule bwa_map: 25 | input: 26 | genome="data/genome.fa", 27 | fastq="data/samples/{sample}.fastq", 28 | index="data/genome.fa.bwt" 29 | output: 30 | "mapped_reads/{sample}.bam" 31 | conda: 32 | config["conda_env"] 33 | shell: 34 | """ 35 | bwa mem {input.genome} {input.fastq} > mapped_reads/{wildcards.sample}.sam 36 | samtools view -Sb mapped_reads/{wildcards.sample}.sam > {output} 37 | rm mapped_reads/{wildcards.sample}.sam 38 | """ 39 | 40 | rule samtools_sort: 41 | input: 42 | "mapped_reads/{sample}.bam" 43 | output: 44 | "sorted_reads/{sample}.bam" 45 | conda: 46 | config["conda_env"] 47 | shell: 48 | "samtools sort -T sorted_reads/{wildcards.sample} -O bam {input} > {output}" 49 | 50 | rule samtools_index: 51 | input: 52 | "sorted_reads/{sample}.bam" 53 | output: 54 | "sorted_reads/{sample}.bam.bai" 55 | conda: 56 | config["conda_env"] 57 | shell: 58 | "samtools index {input}" 59 | 60 | rule bcftools_call: 61 | input: 62 | fa="data/genome.fa", 63 | bam=expand("sorted_reads/{sample}.bam", sample=SAMPLES), 64 | bai=expand("sorted_reads/{sample}.bam.bai", sample=SAMPLES) 65 | output: 66 | "calls/all.vcf" 67 | conda: 68 | config["conda_env"] 69 | shell: 70 | "bcftools mpileup -f {input.fa} {input.bam} | bcftools call -mv - > {output}" 71 | -------------------------------------------------------------------------------- /notebooks/Snakemake/aws-parallel-cluster-files/bioinformatics-example/config.yml: -------------------------------------------------------------------------------- 1 | conda_env: "envs/environment.yml" 2 | -------------------------------------------------------------------------------- /notebooks/Snakemake/aws-parallel-cluster-files/bioinformatics-example/environment.yml: -------------------------------------------------------------------------------- 1 | name: bioinformatics-test 2 | channels: 3 | - bioconda 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - bwa 8 | - samtools 9 | - bcftools 10 | - matplotlib 11 | - pandas 12 | - pysam 13 | -------------------------------------------------------------------------------- /notebooks/Snakemake/aws-parallel-cluster-files/hello-world-snakemake/Snakefile: -------------------------------------------------------------------------------- 1 | #Snakefile 2 | rule all: 3 | input: 4 | "output.txt" 5 | 6 | rule example_rule: 7 | output: 8 | "output.txt" 9 | shell: 10 | """ 11 | echo 'Hello, World!' > {output} 12 | """ 13 | -------------------------------------------------------------------------------- /notebooks/Snakemake/aws-parallel-cluster-files/hello-world.slurm: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=hello-world 3 | #SBATCH --output=hello-world.out 4 | #SBATCH --error=hello-world.err 5 | #SBATCH --ntasks=1 6 | #SBATCH --time=00:01:00 7 | 8 | echo "Hello, World!" > ~/workdir/hello-world.out 9 | echo "This job ran on node: $(hostname)" >> /home/workdir/scripts/hello-world.out 10 | -------------------------------------------------------------------------------- /notebooks/SpleenLiverSegmentation/README.md: -------------------------------------------------------------------------------- 1 | # Spleen Segmentation with Liver Example using NVIDIA Models and MONAI 2 | _We have put together a training example that segments the Spleen in 3D CT Images. At the end is an example of combining both the Spleen model and the Liver model._ 3 | 4 | *Nvidia has changed some of the models used in this tutorial and it may crash, if you have issues, try commenting out the liver model, we are working on a patch* 5 | 6 | ## Introduction 7 | Two pre-trained models from NVIDIA are used in this training, a Spleen model and Liver. 8 | The Spleen model is additionally retrained on the medical decathlon spleen dataset: [http://medicaldecathlon.com/](http://medicaldecathlon.com/) 9 | Data is not necessary to be downloaded to run the notebook. The notebook downloads the data during it's run. 10 | The notebook uses the Python package [MONAI](https://monai.io/), the Medical Open Network for Artificial Intelligence. 11 | 12 | - Spleen Model - [clara_pt_spleen_ct_segmentation_V2](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/monaitoolkit/models/monai_spleen_ct_segmentation) 13 | - Liver Model - [clara_pt_liver_and_tumor_ct_segmentation_V1]() 14 | 15 | ## Outcomes 16 | After following along with this notebook the user will be familiar with: 17 | - Downloading public datasets using MONAI 18 | - Using MONAI transformations for training 19 | - Downloading a pretrained NVIDIA Clara model using MONAI 20 | - Retrain model using MONAI 21 | - Visualizing medical images in python/matplotlib 22 | 23 | ## Installing MONAI 24 | Please follow the [instructions](https://monai.io/started.html#installation) on MONAI's website for up to date install. 25 | Installing MONAI in a notebook environment can be completed with the commands: 26 | - !python -c "import monai" || pip install -q 'monai[all]' 27 | - !python -c "import matplotlib" || pip install -q matplotlib 28 | 29 | ## Dependencies 30 | _It is recommended to use an NVIDIA GPU for training. If the user does not have access to a NVIDIA GPU then it is recommended to skip the training cells._ 31 | 32 | The following packages and versions were installed during the testing of this notebook: 33 | - MONAI version: 0.8.1 34 | - Numpy version: 1.21.1 35 | - Pytorch version: 1.9.0 36 | - Pytorch Ignite version: 0.4.8 37 | - Nibabel version: 3.2.1 38 | - scikit-image version: 0.18.2 39 | - Pillow version: 8.3.1 40 | - Tensorboard version: 2.5.0 41 | - gdown version: 3.13.0 42 | - TorchVision version: 0.10.0+cu111 43 | - tqdm version: 4.61.2 44 | - lmdb version: 1.2.1 45 | - psutil version: 5.8.0 46 | - pandas version: 1.3.0 47 | - einops version: 0.3.0 48 | - transformers version: 4.18.0 49 | - mlflow version: 1.25.1 50 | -------------------------------------------------------------------------------- /notebooks/SpleenLiverSegmentation/monai_data/Spleen_best_metric_model_pretrained.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/notebooks/SpleenLiverSegmentation/monai_data/Spleen_best_metric_model_pretrained.pth -------------------------------------------------------------------------------- /notebooks/pangolin/pangolin_pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "31e8c3cd", 6 | "metadata": {}, 7 | "source": [ 8 | "# Pangolin SARS-CoV-2 Pipeline Notebook" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "22f95828", 14 | "metadata": {}, 15 | "source": [ 16 | "## Overview" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "25e25f08", 22 | "metadata": {}, 23 | "source": [ 24 | "We are going to run a standard covid bioinformatics pipeline using the [Pangolin workflow](https://cov-lineages.org/resources/pangolin/usage.html). We will run the whole analysis within this notebook environment." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "id": "0f67dfae", 30 | "metadata": {}, 31 | "source": [ 32 | "## Learning Objectives\n", 33 | "Learn how to run a simple bioinformatic workflow within a Jupyter notebook environment on AWS." 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "id": "e7a574ce", 39 | "metadata": {}, 40 | "source": [ 41 | "## Prerequisites\n", 42 | "+ You only need access to a Sagemaker environment to run this notebook" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "id": "2881a142", 48 | "metadata": {}, 49 | "source": [ 50 | "## Get Started" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "id": "03541941", 56 | "metadata": {}, 57 | "source": [ 58 | "### Install packages and set up environment" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "id": "f994b990", 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "import os\n", 69 | "\n", 70 | "CPU = os.cpu_count()\n", 71 | "print(f\"Number of threads available: {CPU}\")\n" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "id": "f421805e", 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "# install biopython to import packages below\n", 82 | "! pip install biopython" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "id": "b13fec78-c33e-498b-85bb-50319149b542", 88 | "metadata": {}, 89 | "source": [ 90 | "Ensure that you have condaforge or miniforge installed to use mamba for faster package installation." 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "id": "fd936fd6", 97 | "metadata": { 98 | "scrolled": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "# install everything else\n", 103 | "! mamba install -y -c conda-forge -c bioconda sra-tools pangolin iqtree" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "id": "5a99cf0d", 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "#import libraries\n", 114 | "import os\n", 115 | "from Bio import SeqIO\n", 116 | "from Bio import Entrez" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "id": "dc694629", 122 | "metadata": {}, 123 | "source": [ 124 | "### Set up your directory structure and remove files from previous runs if they exist" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "id": "8f831fca", 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "if not os.path.exists('pangolin_analysis'):\n", 135 | " os.mkdir('pangolin_analysis')\n", 136 | "\n", 137 | "os.chdir('pangolin_analysis')" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "id": "bcc61e6c-edf7-4d50-9027-bffd68b0ff15", 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "ls" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "id": "6423ca5d", 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "if os.path.exists('sarscov2_sequences.fasta'):\n", 158 | " os.remove('sarscov2_sequences.fasta')\n", 159 | "\n", 160 | "! rm sarscov2_*\n", 161 | "! rm lineage_report.csv" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "id": "9d7015e6", 167 | "metadata": {}, 168 | "source": [ 169 | "### Fetch viral sequences using a list of accession IDs" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "id": "16824bcf", 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "#give a list of accession number for sars sequences\n", 180 | "acc_nums=['NC_045512','LR757995','LR757996','OL698718','OL677199','OL672836','MZ914912','MZ916499','MZ908464','MW580573','MW580574','MW580576','MW991906','MW931310','MW932027','MW424864','MW453109','MW453110']\n", 181 | "\n", 182 | "print('the number of sequences we will analyze = ',len(acc_nums))" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "id": "9e382d33", 188 | "metadata": {}, 189 | "source": [ 190 | "Let this block run without going to the next until it finishes, otherwise you may get an error about too many requests. If that happens, reset your kernel and just rerun everything (except installing software)." 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "id": "a28a7122", 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [ 200 | "# use the bio.entrez toolkit within biopython to download the accession numbers\n", 201 | "# save those sequences to a single fasta file\n", 202 | "Entrez.email = \"email@example.com\" # tell NCBI who you are\n", 203 | "\n", 204 | "filename = \"sarscov2_seqs.fasta\"\n", 205 | "\n", 206 | "if not os.path.isfile(filename):\n", 207 | " # Downloading...\n", 208 | " for acc in acc_nums:\n", 209 | " net_handle = Entrez.efetch(\n", 210 | " db=\"nucleotide\", id=acc, rettype=\"fasta\", retmode=\"text\"\n", 211 | " )\n", 212 | " out_handle = open(filename, \"a\")\n", 213 | " out_handle.write(net_handle.read())\n", 214 | " out_handle.close()\n", 215 | " net_handle.close()\n", 216 | " print(\"Saved\",acc)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "id": "56acb7cc", 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "# make sure our fasta file has the same number of seqs as the acc_nums list\n", 227 | "\n", 228 | "print('the number of seqs in our fasta file: ')\n", 229 | "\n", 230 | "! grep '>' sarscov2_seqs.fasta | wc -l" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "id": "8606c352", 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "# let's peek at our new fasta file\n", 241 | "! head sarscov2_seqs.fasta" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "id": "2db37b4e", 247 | "metadata": {}, 248 | "source": [ 249 | "### Run pangolin to identify lineages and output alignment\n", 250 | "Here we call pangolin, give it our input sequences and the number of threads. We also tell it to output the alignment. The full list of pangolin parameters can be found in the pangolin [docs](https://cov-lineages.org/resources/pangolin/usage.html)." 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "id": "f1a17a74", 257 | "metadata": { 258 | "scrolled": true 259 | }, 260 | "outputs": [], 261 | "source": [ 262 | "! pangolin sarscov2_seqs.fasta --threads $CPU" 263 | ] 264 | }, 265 | { 266 | "cell_type": "markdown", 267 | "id": "b0e56a4b", 268 | "metadata": {}, 269 | "source": [ 270 | "You can view the output file from pangolin called lineage_report.csv (within pangolin_analysis folder) by double clicking on the file, or by right clicking and downloading. What lineages are present in the dataset? Is Omicron in there?" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "id": "37e6efbe", 276 | "metadata": {}, 277 | "source": [ 278 | "### Run iqtree to estimate maximum likelihood tree for our sequences\n", 279 | "iqtree can find the best nucleotide model for the data, but here we are going to assign a model to save time (HKY) and just estimate the phylogeny without any bootstrap support values. " 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "id": "f2782855", 286 | "metadata": {}, 287 | "outputs": [], 288 | "source": [ 289 | "# run iqtree with threads = $CPU variable.\n", 290 | "# if you exclude the -m it will do a phylogenetic model search before tree search\n", 291 | "! iqtree -s sequences.aln.fasta -nt $CPU -m HKY --prefix sarscov2_tree --redo-tree" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "id": "c7197dd4", 297 | "metadata": {}, 298 | "source": [ 299 | "### Download the tree and view in tree viewer like [FigTree](http://tree.bio.ed.ac.uk/software/figtree/)! " 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "id": "7a5b8a1b", 305 | "metadata": {}, 306 | "source": [ 307 | "### Conclusions\n", 308 | "That's it! Now you know how to run a simple workflow using a Sagemaker notebook environment" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "id": "88457512", 314 | "metadata": {}, 315 | "source": [ 316 | "## Clean up\n", 317 | "Make sure you shut down this VM, or delete it if you don't plan to use if further.\n", 318 | "\n", 319 | "You can also [delete the buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/delete-bucket.html) if you don't want to pay for the data: `aws s3 rb s3://bucket-name --force`" 320 | ] 321 | } 322 | ], 323 | "metadata": { 324 | "kernelspec": { 325 | "display_name": "conda_python3", 326 | "language": "python", 327 | "name": "conda_python3" 328 | }, 329 | "language_info": { 330 | "codemirror_mode": { 331 | "name": "ipython", 332 | "version": 3 333 | }, 334 | "file_extension": ".py", 335 | "mimetype": "text/x-python", 336 | "name": "python", 337 | "nbconvert_exporter": "python", 338 | "pygments_lexer": "ipython3", 339 | "version": "3.10.17" 340 | } 341 | }, 342 | "nbformat": 4, 343 | "nbformat_minor": 5 344 | } 345 | -------------------------------------------------------------------------------- /notebooks/rnaseq-myco-tutorial-main/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 MaineINBRE 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /notebooks/rnaseq-myco-tutorial-main/README.md: -------------------------------------------------------------------------------- 1 | # rnaseq-myco-tutorial 2 | Tutorial on RNA-Seq data analysis from a study of gene expression in a prokaryote. Open the notebook in Sagemaker and try and run all the way through. Learn about downloading data, conda environments, and bash commands. 3 | -------------------------------------------------------------------------------- /notebooks/rnaseq-myco-tutorial-main/RNAseq_pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# RNA-Seq Analysis Training Demo" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Overview" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "This short tutorial demonstrates how to run an RNA-Seq workflow using a prokaryotic data set. Steps in the workflow include read trimming, read QC, read mapping, and counting mapped reads per gene to quantitate gene expression.\n", 22 | "\n", 23 | "This tutorials uses example sequence data procured from the Sally Molloy labratory at the University of Maine; which investigates the transcriptome changes in prophage infected, versus non-prophage infected M. chelonae bacteria. The respective article can be found [here](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8191103/).\n", 24 | "\n", 25 | "\n", 26 | "\n", 27 | "![RNA-Seq workflow](images/rnaseq-workflow.png)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "## Learning Objectives\n", 35 | "Learn how to run a simple RNAseq analysis in a Sagemaker environment" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Prerequisites\n", 43 | "+ You only need access to a SageMaker environment to run this notebook" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "## Get Started" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Set up environment and install packages" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "Note that within Jupyter you can run a bash comman either by using the magic '!' in front of your command, or by adding %%bash to the top of your cell." 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "For example:\\\n", 72 | "%%bash\\\n", 73 | "example command\n", 74 | "\n", 75 | "or\\\n", 76 | "!example command" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "Now install the dependencies, we are using mamba to install packages!" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": { 90 | "scrolled": true 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "# install everything else\n", 95 | "! mamba install -c conda-forge -c bioconda -c defaults -y sra-tools pigz=2.6 pbzip2=1.1 trimmomatic=0.36 fastqc=0.11.9 multiqc=1.10.1 salmon=1.5.1 " 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "scrolled": true 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "# install everything else\n", 107 | "! mamba install -c conda-forge -c bioconda -c defaults -y sra-tools pigz pbzip2 trimmomatic fastqc multiqc salmon" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "Create a set of directories to store the reads, reference sequence files, and output files.\n" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "! mkdir -p data data/raw_fastq data/trimmed data/fastqc data/aligned data/reference" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "### Copy FASTQ Files\n", 131 | "So that this tutorial runs quickly, we will only analyze 50,000 reads from one sample from two treatment groups instead of analyzing all the reads from all six samples. These files are hosted in a public Google storage bucket." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "!curl https://storage.googleapis.com/nigms-sandbox/me-inbre-rnaseq-pipelinev2/data/raw_fastqSub/SRR13349122_1.fastq --output data/raw_fastq/SRR13349122_1.fastq\n", 141 | "!curl https://storage.googleapis.com/nigms-sandbox/me-inbre-rnaseq-pipelinev2/data/raw_fastqSub/SRR13349122_2.fastq --output data/raw_fastq/SRR13349122_2.fastq\n", 142 | "!curl https://storage.googleapis.com/nigms-sandbox/me-inbre-rnaseq-pipelinev2/data/raw_fastqSub/SRR13349128_1.fastq --output data/raw_fastq/SRR13349128_1.fastq\n", 143 | "!curl https://storage.googleapis.com/nigms-sandbox/me-inbre-rnaseq-pipelinev2/data/raw_fastqSub/SRR13349128_2.fastq --output data/raw_fastq/SRR13349128_2.fastq" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "### Copy reference transcriptome files that will be used by Salmon\n", 151 | "Salmon is a tool that aligns RNA-Seq reads to a set of transcripts rather than the entire genome." 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "!curl https://storage.googleapis.com/nigms-sandbox/me-inbre-rnaseq-pipelinev2/data/reference/M_chelonae_transcripts.fasta --output data/reference/M_chelonae_transcripts.fasta\n", 161 | "!curl https://storage.googleapis.com/nigms-sandbox/me-inbre-rnaseq-pipelinev2/data/reference/decoys.txt --output data/reference/decoys.txt" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "### Copy data file for Trimmomatic" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "!curl https://storage.googleapis.com/nigms-sandbox/me-inbre-rnaseq-pipelinev2/config/TruSeq3-PE.fa --output TruSeq3-PE.fa" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "### Run Trimmomatic\n", 185 | "Trimmomatic will trim off any adapter sequences or low quality sequence it detects in the FASTQ files." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "%%bash\n", 195 | "trimmomatic PE -threads 2 data/raw_fastq/SRR13349122_1.fastq data/raw_fastq/SRR13349122_2.fastq data/trimmed/SRR13349122_1_trimmed.fastq data/trimmed/SRR13349122_2_trimmed.fastq data/trimmed/SRR13349122_1_trimmed_unpaired.fastq data/trimmed/SRR13349122_2_trimmed_unpaired.fastq ILLUMINACLIP:TruSeq3-PE.fa:2:30:10:2:keepBothReads LEADING:3 TRAILING:3 MINLEN:36\n", 196 | "trimmomatic PE -threads 2 data/raw_fastq/SRR13349128_1.fastq data/raw_fastq/SRR13349128_2.fastq data/trimmed/SRR13349128_1_trimmed.fastq data/trimmed/SRR13349128_2_trimmed.fastq data/trimmed/SRR13349128_1_trimmed_unpaired.fastq data/trimmed/SRR13349128_2_trimmed_unpaired.fastq ILLUMINACLIP:TruSeq3-PE.fa:2:30:10:2:keepBothReads LEADING:3 TRAILING:3 MINLEN:36" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "### Run FastQC\n", 204 | "FastQC is an invaluable tool that allows you to evaluate whether there are problems with a set of reads. For example, it will provide a report of whether there is any bias in the sequence composition of the reads." 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "Once FastQC is done running, look at the outputs in data/fastqc. What can you say about the quality of the two samples we are looking at here? " 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "%%bash\n", 221 | "fastqc -o data/fastqc data/trimmed/SRR13349122_1_trimmed.fastq\n", 222 | "fastqc -o data/fastqc data/trimmed/SRR13349128_1_trimmed.fastq" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "### Run MultiQC\n", 230 | "MultiQC reads in the FastQC reports and generates a compiled report for all the analyzed FASTQ files." 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "Just as with fastqc, we can look at the mulitqc results after it finishes at data/multiqc_data" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "%%bash\n", 247 | "multiqc -f data/fastqc -f\n", 248 | "mv multiqc_data/ data/" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "### Index the Transcriptome so that trimmed reads can be mapped using salmon" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "! salmon index -t data/reference/M_chelonae_transcripts.fasta -p 8 -i data/reference/transcriptome_index --decoys data/reference/decoys.txt -k 31 --keepDuplicates" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "### Run salmon to map reads to transcripts and quantify expression levels\n", 272 | "Salmon aligns the trimmed reads to the reference transcriptome and generates the read counts per transcript. In this analysis, each gene has a single transcript." 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": { 279 | "scrolled": true 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "%%bash\n", 284 | "salmon quant -i data/reference/transcriptome_index -l SR -r data/trimmed/SRR13349122_1_trimmed.fastq -p 8 --validateMappings -o data/quants/SRR13349122_quant\n", 285 | "salmon quant -i data/reference/transcriptome_index -l SR -r data/trimmed/SRR13349128_1_trimmed.fastq -p 8 --validateMappings -o data/quants/SRR13349128_quant" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "### Report the top 10 most highly expressed genes in the samples" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "Top 10 most highly expressed genes in the wild-type sample.\n" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [ 308 | "! sort -nrk 4,4 data/quants/SRR13349122_quant/quant.sf | head -10" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "Top 10 most highly expressed genes in the double lysogen sample.\n" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "! sort -nrk 4,4 data/quants/SRR13349128_quant/quant.sf | head -10" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "### Report the expression of a putative acyl-ACP desaturase (BB28_RS16545) that was downregulated in the double lysogen relative to wild-type\n", 332 | "A acyl-transferase was reported to be downregulated in the double lysogen as shown in the table of the top 20 upregulated and downregulated genes from the paper describing the study." 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "Use `grep` to report the expression in the wild-type sample. The fields in the Salmon `quant.sf` file are as follows. The level of expression is reported in the Transcripts Per Million (`TPM`) and number of reads (`NumReads`) fields: \n", 340 | "`Name Length EffectiveLength TPM NumReads`" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": {}, 347 | "outputs": [], 348 | "source": [ 349 | "! grep 'BB28_RS16545' data/quants/SRR13349122_quant/quant.sf" 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": {}, 355 | "source": [ 356 | "Use `grep` to report the expression in the double lysogen sample. The fields in the Salmon `quant.sf` file are as follows. The level of expression is reported in the Transcripts Per Million (`TPM`) and number of reads (`NumReads`) fields: \n", 357 | "`Name Length EffectiveLength TPM NumReads`" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "! grep 'BB28_RS16545' data/quants/SRR13349128_quant/quant.sf" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "## Conclusions\n", 374 | "Here, you worked through a simple RNAseq analysis within a Sagemaker environment. For more RNAseq examples, check out the [NIGMS Sandbox RNAseq module](https://github.com/NIGMS/RNA-Seq-Differential-Expression-Analysis). " 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "## Clean up\n", 382 | "Make sure you shut down this VM, or delete it if you don't plan to use it further.\n", 383 | "\n", 384 | "You can also [delete the buckets](https://docs.aws.amazon.com/AmazonS3/latest/userguide/delete-bucket.html) if you don't want to pay for the data: `aws s3 rb s3://bucket-name --force`" 385 | ] 386 | } 387 | ], 388 | "metadata": { 389 | "kernelspec": { 390 | "display_name": "conda_python3", 391 | "language": "python", 392 | "name": "conda_python3" 393 | }, 394 | "language_info": { 395 | "codemirror_mode": { 396 | "name": "ipython", 397 | "version": 3 398 | }, 399 | "file_extension": ".py", 400 | "mimetype": "text/x-python", 401 | "name": "python", 402 | "nbconvert_exporter": "python", 403 | "pygments_lexer": "ipython3", 404 | "version": "3.10.17" 405 | } 406 | }, 407 | "nbformat": 4, 408 | "nbformat_minor": 4 409 | } 410 | -------------------------------------------------------------------------------- /notebooks/rnaseq-myco-tutorial-main/images/count-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/notebooks/rnaseq-myco-tutorial-main/images/count-workflow.png -------------------------------------------------------------------------------- /notebooks/rnaseq-myco-tutorial-main/images/rnaseq-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/notebooks/rnaseq-myco-tutorial-main/images/rnaseq-workflow.png -------------------------------------------------------------------------------- /notebooks/rnaseq-myco-tutorial-main/images/table-cushman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/STRIDES/NIHCloudLabAWS/7a845e3a88633cef058a2e863159557470e2bddc/notebooks/rnaseq-myco-tutorial-main/images/table-cushman.png --------------------------------------------------------------------------------