├── .DS_Store ├── .gitignore ├── DATA.md ├── Hu_metagenomes.matrix.png ├── INSTALL.md ├── Makefile ├── README.md ├── _static ├── Hu_metaG_comparison.png ├── Hu_metagenomes.matrix.png ├── SBT.png ├── SRR1976948_1.qc_fastqc.html ├── SRR1976948_1.qc_fastqc.zip ├── SRR1976948_1.qc_fastqc │ ├── Icons │ │ ├── error.png │ │ ├── fastqc_icon.png │ │ ├── tick.png │ │ └── warning.png │ ├── Images │ │ ├── duplication_levels.png │ │ ├── kmer_profiles.png │ │ ├── per_base_gc_content.png │ │ ├── per_base_n_content.png │ │ ├── per_base_quality.png │ │ ├── per_base_sequence_content.png │ │ ├── per_sequence_gc_content.png │ │ ├── per_sequence_quality.png │ │ └── sequence_length_distribution.png │ ├── fastqc_data.txt │ ├── fastqc_report.html │ └── summary.txt ├── SRR1976948_1_fastqc.html ├── SRR1976948_1_fastqc.zip ├── SRR1976948_1_fastqc │ ├── Icons │ │ ├── error.png │ │ ├── fastqc_icon.png │ │ ├── tick.png │ │ └── warning.png │ ├── Images │ │ ├── duplication_levels.png │ │ ├── kmer_profiles.png │ │ ├── per_base_gc_content.png │ │ ├── per_base_n_content.png │ │ ├── per_base_quality.png │ │ ├── per_base_sequence_content.png │ │ ├── per_sequence_gc_content.png │ │ ├── per_sequence_quality.png │ │ └── sequence_length_distribution.png │ ├── fastqc_data.txt │ ├── fastqc_report.html │ └── summary.txt ├── SRR1976948_2.qc_fastqc.html ├── SRR1976948_2.qc_fastqc.zip ├── SRR1976948_2.qc_fastqc │ ├── Icons │ │ ├── error.png │ │ ├── fastqc_icon.png │ │ ├── tick.png │ │ └── warning.png │ ├── Images │ │ ├── duplication_levels.png │ │ ├── kmer_profiles.png │ │ ├── per_base_gc_content.png │ │ ├── per_base_n_content.png │ │ ├── per_base_quality.png │ │ ├── per_base_sequence_content.png │ │ ├── per_sequence_gc_content.png │ │ ├── per_sequence_quality.png │ │ └── sequence_length_distribution.png │ ├── fastqc_data.txt │ ├── fastqc_report.html │ └── summary.txt ├── SRR1976948_2_fastqc.html ├── SRR1976948_2_fastqc.zip ├── SRR1976948_2_fastqc │ ├── Icons │ │ ├── error.png │ │ ├── fastqc_icon.png │ │ ├── tick.png │ │ └── warning.png │ ├── Images │ │ ├── duplication_levels.png │ │ ├── kmer_profiles.png │ │ ├── per_base_gc_content.png │ │ ├── per_base_n_content.png │ │ ├── per_base_quality.png │ │ ├── per_base_sequence_content.png │ │ ├── per_sequence_gc_content.png │ │ ├── per_sequence_quality.png │ │ └── sequence_length_distribution.png │ ├── fastqc_data.txt │ ├── fastqc_report.html │ └── summary.txt ├── Sourmash_flow_diagrams_QC.png ├── Sourmash_flow_diagrams_compare.png ├── Sourmash_flow_diagrams_compute.png ├── Sourmash_flow_diagrams_gather.png ├── Sourmash_flow_diagrams_search.png ├── ecoli_cmp.matrix.png ├── kmers-metapalette.png ├── labibi.css ├── labibi.js └── sourmash_quality_filtering_workflow.png ├── _templates └── page.html ├── anvio.md ├── assemble-metaspades.md ├── assemble.md ├── assembly-evaluation.md ├── aws-ssh ├── images │ ├── win-putty-1.png │ ├── win-putty-2.png │ ├── win-putty-3.png │ └── win-putty-4.png ├── index.md ├── log-in-with-ssh-mac.rst └── log-in-with-ssh-win.rst ├── aws ├── boot.rst ├── configure-firewall.rst ├── creating-ami.rst ├── images │ ├── .DS_Store │ ├── add-volume-1.png │ ├── add-volume-1b.png │ ├── add-volume-1c.png │ ├── add-volume-1d.png │ ├── add-volume-2.png │ ├── add-volume-3.png │ ├── add-volume-4.png │ ├── add-volume-5.png │ ├── add-volume-6.png │ ├── bak │ │ ├── 1.tiff │ │ ├── 10.tiff │ │ ├── 11.tiff │ │ ├── 2.tiff │ │ ├── 3.tiff │ │ ├── 4.tiff │ │ ├── 5.tiff │ │ ├── 6.tiff │ │ ├── 7.tiff │ │ ├── 8.tiff │ │ ├── 9.tiff │ │ ├── create-ami-1.tiff │ │ ├── create-ami-2.tiff │ │ ├── create-ami-3.tiff │ │ ├── network-1.tiff │ │ ├── network-2.tiff │ │ ├── network-3.tiff │ │ └── network-4.tiff │ ├── boot-0.png │ ├── boot-1.png │ ├── boot-10.png │ ├── boot-11.png │ ├── boot-2.png │ ├── boot-3.png │ ├── boot-4.png │ ├── boot-5.png │ ├── boot-6.png │ ├── boot-7.png │ ├── boot-8.png │ ├── boot-9.png │ ├── create-ami-1.png │ ├── create-ami-2.png │ ├── create-ami-3.png │ ├── create-snapshot-1.png │ ├── create-snapshot-2.png │ ├── create-snapshot-3.png │ ├── create-snapshot-4.png │ ├── network-0.png │ ├── network-1.png │ ├── network-2.png │ ├── network-3.png │ ├── network-4.png │ ├── orig │ │ ├── add-volume-1.png │ │ ├── add-volume-2.png │ │ ├── add-volume-3.png │ │ ├── add-volume-4.png │ │ ├── add-volume-5.png │ │ ├── add-volume-6.png │ │ ├── boot-1.png │ │ ├── boot-10.png │ │ ├── boot-11.png │ │ ├── boot-2.png │ │ ├── boot-3.png │ │ ├── boot-4.png │ │ ├── boot-5.png │ │ ├── boot-6.png │ │ ├── boot-7.png │ │ ├── boot-8.png │ │ ├── boot-9.png │ │ ├── create-ami-1.png │ │ ├── create-ami-2.png │ │ ├── create-ami-3.png │ │ ├── create-snapshot-1.png │ │ ├── create-snapshot-2.png │ │ ├── create-snapshot-3.png │ │ ├── create-snapshot-4.png │ │ ├── network-1.png │ │ ├── network-2.png │ │ ├── network-3.png │ │ ├── network-4.png │ │ ├── terminate-1.png │ │ ├── terminate-2.png │ │ └── terminate-3.png │ ├── terminate-1.png │ ├── terminate-2.png │ └── terminate-3.png ├── index.rst ├── install-rstudio-server.rst ├── login-shell-unix.rst ├── login-shell-win.rst ├── login-shell.rst ├── terminate-instance.rst ├── things-to-mention.rst └── volumes-and-snapshots.rst ├── binning.md ├── building-blast-database.rst ├── circos-build.tar.gz ├── circos_tutorial.rst ├── command-line.md ├── conf.py ├── day2-install.rst ├── files ├── 2014-zhang.png ├── OpenDesktop.png ├── Plotting-Salmon-Results.ipynb ├── VizBin-AddFiles.png ├── VizBin-LoadFile.png ├── VizBin-MoreOptions.png ├── VizBin-OpenDesktop.png ├── assembler-mapping.png ├── assembler-runtimes.png ├── assembly.png ├── calculate-contig-coverage.py ├── coverage.ipynb ├── evaluate_assembly_summary.pdf ├── extract-sequences.py ├── interacting-with-anvio.pdf ├── jupyter_working.png ├── kmer-trimming.graffle ├── kmer-trimming.png ├── plot-quant.ipynb ├── read-vs-contig-alignment.png ├── salmon-quantification-plots-cicese-inclass.ipynb ├── sourmash_tetramer-cluster-extract.ipynb └── sourmash_tetramer.ipynb ├── gather-counts.py ├── img ├── 007476.full.pdf ├── 2014-5-metagenomics-workshop.pdf ├── DC1_logo_small.png ├── DataONE_LOGO.jpg ├── Slide1.jpg ├── bootcamps │ ├── 2012-11-scripps.png │ ├── 2012-12-uta.png │ ├── 2013-01-mcgill.png │ └── 2013-01-mckellar.png ├── creative-commons-attribution-license.png ├── csv-mistake.png ├── excel-to-csv.png ├── excel_tables_example_sk_e1_p1_wrl.png ├── excel_tables_example_sk_e2_p1_wrl.png ├── excel_tables_example_sk_e2_p2.png ├── gvng.jpg ├── ip-address.png ├── lessons │ └── swc-shell │ │ ├── absolute_path.png │ │ ├── absolute_relative_path.png │ │ ├── command_shell.svg │ │ ├── decwriter.jpg │ │ ├── direct_shell_usage.png │ │ ├── filedir_challenge.png │ │ ├── filesystem.png │ │ ├── find_file_tree.png │ │ ├── google_vs_grep.png │ │ ├── home_directories.png │ │ ├── nano.png │ │ ├── nano_quotation.png │ │ ├── permissions_table.png │ │ ├── process_stdin_stdout.png │ │ ├── public_private_keys.png │ │ ├── relative_path.png │ │ ├── remote_shell_usage.png │ │ ├── running_a_process.png │ │ ├── running_wc.png │ │ ├── running_wc_sort.png │ │ ├── running_wc_sort_head.png │ │ ├── shell_as_process.png │ │ ├── shell_on_shell.png │ │ ├── vlad_homedir.png │ │ └── x_for_directories.png ├── mozilla-science-lab.png ├── nano1.png ├── nano2.png ├── osi-approved-license.png ├── readme │ ├── step1.png │ ├── step2.png │ ├── step3.png │ └── steps-src.svg ├── rss-icon-blue.png ├── setup │ ├── cygwin-icon.jpg │ ├── cygwin-terminal-300x175.jpg │ ├── gnome-terminal-300x195.jpg │ ├── mac-terminal-300x257.jpg │ └── ubuntu-terminal-300x197.jpg ├── site │ └── main_shadow.png ├── slides │ ├── enrolment.png │ └── workshops.png └── software-carpentry-banner.png ├── index.md ├── jetstream ├── boot.md ├── images │ ├── delete-1.png │ ├── delete-2.png │ ├── delete-3.png │ ├── delete-4.png │ ├── ec2-moba-2.png │ ├── ec2-moba-3.png │ ├── ec2-moba-4.png │ ├── ip-address.png │ ├── jetstream_login.png │ ├── login-1.png │ ├── login-1.thumb.png │ ├── login-10.png │ ├── login-10.thumb.png │ ├── login-11.png │ ├── login-11.thumb.png │ ├── login-12.png │ ├── login-12.thumb.png │ ├── login-2.png │ ├── login-2.thumb.png │ ├── login-3.png │ ├── login-3.thumb.png │ ├── login-4.png │ ├── login-4.thumb.png │ ├── login-5.png │ ├── login-5.thumb.png │ ├── login-6.png │ ├── login-6.thumb.png │ ├── login-7.png │ ├── login-7.thumb.png │ ├── login-8.png │ ├── login-8.thumb.png │ ├── login-9.png │ ├── login-9.thumb.png │ ├── moba-1.png │ ├── password-change.png │ ├── possible_instance_problem.png │ ├── possible_instance_problem.thumb.png │ ├── resume-1.png │ ├── resume-2.png │ ├── resume-3.png │ ├── start-1.png │ ├── start-2.png │ ├── start-3.png │ ├── stop-1.png │ ├── stop-2.png │ ├── stop-3.png │ ├── stop-4.png │ ├── suspend-1.png │ └── suspend-2.png ├── login.md └── ssh_changepassword.md ├── kmer_trimming.rst ├── mapping.rst ├── prokka_tutorial.rst ├── quality.rst ├── requirements.txt ├── running-command-line-blast.md ├── salmon_tutorial.rst ├── slice.rst ├── sourmash.md ├── sourmash_compare.md ├── sourmash_gather.md ├── test.html ├── toc.rst ├── welcome.pptx ├── welcome.rst ├── whatnext.md └── workflow.md /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | html/* 2 | html 3 | doctrees 4 | *~ 5 | -------------------------------------------------------------------------------- /DATA.md: -------------------------------------------------------------------------------- 1 | # Data 2 | 3 | [SRR1976948_1.fastq.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1976948_1.fastq.gz) - First 1m reads of the SRA record 4 | 5 | [SRR1976948_2.fastq.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1976948_2.fastq.gz) - First 1m reads of the SRA record 6 | 7 | [SRR1977249_1.fastq.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1977249_1.fastq.gz) - First 1m reads of the SRA record 8 | 9 | [SRR1977249_2.fastq.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1977249_2.fastq.gz) - First 1m reads of the SRA record 10 | 11 | [SRR1977296_1.fastq.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1977296_1.fastq.gz) - First 1m reads of the SRA record 12 | 13 | [SRR1977296_2.fastq.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1977296_2.fastq.gz) - First 1m reads of the SRA record 14 | 15 | [SRR1976948.abundtrim.subset.pe.fq.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1976948.abundtrim.subset.pe.fq.gz) - abundtrim/subset swept/PE 16 | 17 | [SRR1977249.abundtrim.subset.pe.fq.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1977249.abundtrim.subset.pe.fq.gz) - abundtrim/subset swept/PE 18 | 19 | [all-genomes.fasta.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/all-genomes.fasta.gz) - all of the genomes from the study 20 | 21 | [subset-genomes.fasta.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/subset-genomes.fasta.gz) - ~1/8th of the genomes from the study 22 | 23 | [subset_assembly.fa.gz](https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/subset_assembly.fa.gz) - - assembly of the two abundtrim/subset data sets 24 | -------------------------------------------------------------------------------- /Hu_metagenomes.matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/Hu_metagenomes.matrix.png -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Building this site on your own 2 | 3 | 1. Make a virtualenv: 4 | 5 | python -m virtualenv ../build 6 | . ../build/bin/activate 7 | 8 | 2. Install the requirements: 9 | 10 | pip install -r requirements.txt 11 | 12 | 3. Run `make` 13 | 14 | 4. Look at the `html/` directory. 15 | 16 | 17 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = . 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | all: html 20 | 21 | help: 22 | @echo "Please use \`make ' where is one of" 23 | @echo " html to make standalone HTML files" 24 | @echo " dirhtml to make HTML files named index.html in directories" 25 | @echo " singlehtml to make a single large HTML file" 26 | @echo " pickle to make pickle files" 27 | @echo " json to make JSON files" 28 | @echo " htmlhelp to make HTML files and a HTML help project" 29 | @echo " qthelp to make HTML files and a qthelp project" 30 | @echo " devhelp to make HTML files and a Devhelp project" 31 | @echo " epub to make an epub" 32 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 33 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " linkcheck to check all external links for integrity" 41 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 42 | 43 | clean: 44 | -rm -rf html 45 | 46 | html: 47 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 48 | @echo 49 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 50 | 51 | dirhtml: 52 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 53 | @echo 54 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 55 | 56 | singlehtml: 57 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 58 | @echo 59 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 60 | 61 | pickle: 62 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 63 | @echo 64 | @echo "Build finished; now you can process the pickle files." 65 | 66 | json: 67 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 68 | @echo 69 | @echo "Build finished; now you can process the JSON files." 70 | 71 | htmlhelp: 72 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 73 | @echo 74 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 75 | ".hhp project file in $(BUILDDIR)/htmlhelp." 76 | 77 | qthelp: 78 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 79 | @echo 80 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 81 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 82 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/labibi.qhcp" 83 | @echo "To view the help file:" 84 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/labibi.qhc" 85 | 86 | devhelp: 87 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 88 | @echo 89 | @echo "Build finished." 90 | @echo "To view the help file:" 91 | @echo "# mkdir -p $$HOME/.local/share/devhelp/labibi" 92 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/labibi" 93 | @echo "# devhelp" 94 | 95 | epub: 96 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 97 | @echo 98 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 99 | 100 | latex: 101 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 102 | @echo 103 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 104 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 105 | "(use \`make latexpdf' here to do that automatically)." 106 | 107 | latexpdf: 108 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 109 | @echo "Running LaTeX files through pdflatex..." 110 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 111 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 112 | 113 | text: 114 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 115 | @echo 116 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 117 | 118 | man: 119 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 120 | @echo 121 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 122 | 123 | texinfo: 124 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 125 | @echo 126 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 127 | @echo "Run \`make' in that directory to run these through makeinfo" \ 128 | "(use \`make info' here to do that automatically)." 129 | 130 | info: 131 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 132 | @echo "Running Texinfo files through makeinfo..." 133 | make -C $(BUILDDIR)/texinfo info 134 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 135 | 136 | gettext: 137 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 138 | @echo 139 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 140 | 141 | changes: 142 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 143 | @echo 144 | @echo "The overview file is in $(BUILDDIR)/changes." 145 | 146 | linkcheck: 147 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 148 | @echo 149 | @echo "Link check complete; look for any errors in the above output " \ 150 | "or in $(BUILDDIR)/linkcheck/output.txt." 151 | 152 | doctest: 153 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 154 | @echo "Testing of doctests in the sources finished, look at the " \ 155 | "results in $(BUILDDIR)/doctest/output.txt." 156 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2017 / September / Environmental Metagenomics 2 | ============================================= 3 | 4 | This workshop was given on September 26th - 29th, 2017 at the CICESE in Ensenada, Baja California. 5 | 6 | Instructors: C. Titus Brown, Phillip Brooks, Harriet Alexander 7 | 8 | See https://2017-cicese-metagenomics.readthedocs.io/en/latest/ for the rendered version of this site. 9 | -------------------------------------------------------------------------------- /_static/Hu_metaG_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/Hu_metaG_comparison.png -------------------------------------------------------------------------------- /_static/Hu_metagenomes.matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/Hu_metagenomes.matrix.png -------------------------------------------------------------------------------- /_static/SBT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SBT.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc.zip -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Icons/error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Icons/error.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Icons/fastqc_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Icons/fastqc_icon.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Icons/tick.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Icons/tick.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Icons/warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Icons/warning.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Images/duplication_levels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Images/duplication_levels.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Images/kmer_profiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Images/kmer_profiles.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Images/per_base_gc_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Images/per_base_gc_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Images/per_base_n_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Images/per_base_n_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Images/per_base_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Images/per_base_quality.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Images/per_base_sequence_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Images/per_base_sequence_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Images/per_sequence_gc_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Images/per_sequence_gc_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Images/per_sequence_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Images/per_sequence_quality.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/Images/sequence_length_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1.qc_fastqc/Images/sequence_length_distribution.png -------------------------------------------------------------------------------- /_static/SRR1976948_1.qc_fastqc/summary.txt: -------------------------------------------------------------------------------- 1 | PASS Basic Statistics SRR1976948_1.qc.fq.gz 2 | WARN Per base sequence quality SRR1976948_1.qc.fq.gz 3 | PASS Per sequence quality scores SRR1976948_1.qc.fq.gz 4 | WARN Per base sequence content SRR1976948_1.qc.fq.gz 5 | PASS Per base GC content SRR1976948_1.qc.fq.gz 6 | FAIL Per sequence GC content SRR1976948_1.qc.fq.gz 7 | PASS Per base N content SRR1976948_1.qc.fq.gz 8 | WARN Sequence Length Distribution SRR1976948_1.qc.fq.gz 9 | PASS Sequence Duplication Levels SRR1976948_1.qc.fq.gz 10 | PASS Overrepresented sequences SRR1976948_1.qc.fq.gz 11 | WARN Kmer Content SRR1976948_1.qc.fq.gz 12 | -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | SRR1976948_1.fastq.gz FastQC Report 4 | 5 | 194 | 195 | 196 | 197 |
198 |
FastQCFastQC Report
199 |
200 | Sat 22 Apr 2017
201 | SRR1976948_1.fastq.gz 202 |
203 |
204 | 220 |
221 |

[OK] Basic Statistics

222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 |
MeasureValue
FilenameSRR1976948_1.fastq.gz
File typeConventional base calls
EncodingSanger / Illumina 1.9
Total Sequences1000000
Filtered Sequences0
Sequence length251
%GC44
256 |
257 |

[FAIL] Per base sequence quality

258 |

Per base quality graph

259 |
260 |

[OK] Per sequence quality scores

261 |

Per Sequence quality graph

262 |
263 |

[WARN] Per base sequence content

264 |

Per base sequence content

265 |
266 |

[OK] Per base GC content

267 |

Per base GC content graph

268 |
269 |

[FAIL] Per sequence GC content

270 |

Per sequence GC content graph

271 |
272 |

[OK] Per base N content

273 |

N content graph

274 |
275 |

[OK] Sequence Length Distribution

276 |

Sequence length distribution

277 |
278 |

[OK] Sequence Duplication Levels

279 |

Duplication level graph

280 |
281 |

[OK] Overrepresented sequences

282 |

No overrepresented sequences

283 |
284 |

[WARN] Kmer Content

285 |

Kmer graph

286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 |
SequenceCountObs/Exp OverallObs/Exp MaxMax Obs/Exp Position
AAAAA15234003.65591556.1976423240-247
TTTTT12450103.17310124.1512938
309 |
310 |
311 | -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc.zip -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Icons/error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Icons/error.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Icons/fastqc_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Icons/fastqc_icon.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Icons/tick.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Icons/tick.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Icons/warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Icons/warning.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Images/duplication_levels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Images/duplication_levels.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Images/kmer_profiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Images/kmer_profiles.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Images/per_base_gc_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Images/per_base_gc_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Images/per_base_n_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Images/per_base_n_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Images/per_base_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Images/per_base_quality.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Images/per_base_sequence_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Images/per_base_sequence_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Images/per_sequence_gc_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Images/per_sequence_gc_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Images/per_sequence_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Images/per_sequence_quality.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/Images/sequence_length_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_1_fastqc/Images/sequence_length_distribution.png -------------------------------------------------------------------------------- /_static/SRR1976948_1_fastqc/summary.txt: -------------------------------------------------------------------------------- 1 | PASS Basic Statistics SRR1976948_1.fastq.gz 2 | FAIL Per base sequence quality SRR1976948_1.fastq.gz 3 | PASS Per sequence quality scores SRR1976948_1.fastq.gz 4 | WARN Per base sequence content SRR1976948_1.fastq.gz 5 | PASS Per base GC content SRR1976948_1.fastq.gz 6 | FAIL Per sequence GC content SRR1976948_1.fastq.gz 7 | PASS Per base N content SRR1976948_1.fastq.gz 8 | PASS Sequence Length Distribution SRR1976948_1.fastq.gz 9 | PASS Sequence Duplication Levels SRR1976948_1.fastq.gz 10 | PASS Overrepresented sequences SRR1976948_1.fastq.gz 11 | WARN Kmer Content SRR1976948_1.fastq.gz 12 | -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc.zip -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Icons/error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Icons/error.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Icons/fastqc_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Icons/fastqc_icon.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Icons/tick.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Icons/tick.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Icons/warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Icons/warning.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Images/duplication_levels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Images/duplication_levels.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Images/kmer_profiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Images/kmer_profiles.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Images/per_base_gc_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Images/per_base_gc_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Images/per_base_n_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Images/per_base_n_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Images/per_base_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Images/per_base_quality.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Images/per_base_sequence_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Images/per_base_sequence_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Images/per_sequence_gc_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Images/per_sequence_gc_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Images/per_sequence_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Images/per_sequence_quality.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/Images/sequence_length_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2.qc_fastqc/Images/sequence_length_distribution.png -------------------------------------------------------------------------------- /_static/SRR1976948_2.qc_fastqc/summary.txt: -------------------------------------------------------------------------------- 1 | PASS Basic Statistics SRR1976948_2.qc.fq.gz 2 | FAIL Per base sequence quality SRR1976948_2.qc.fq.gz 3 | PASS Per sequence quality scores SRR1976948_2.qc.fq.gz 4 | WARN Per base sequence content SRR1976948_2.qc.fq.gz 5 | WARN Per base GC content SRR1976948_2.qc.fq.gz 6 | FAIL Per sequence GC content SRR1976948_2.qc.fq.gz 7 | PASS Per base N content SRR1976948_2.qc.fq.gz 8 | WARN Sequence Length Distribution SRR1976948_2.qc.fq.gz 9 | PASS Sequence Duplication Levels SRR1976948_2.qc.fq.gz 10 | PASS Overrepresented sequences SRR1976948_2.qc.fq.gz 11 | FAIL Kmer Content SRR1976948_2.qc.fq.gz 12 | -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc.zip -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Icons/error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Icons/error.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Icons/fastqc_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Icons/fastqc_icon.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Icons/tick.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Icons/tick.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Icons/warning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Icons/warning.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Images/duplication_levels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Images/duplication_levels.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Images/kmer_profiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Images/kmer_profiles.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Images/per_base_gc_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Images/per_base_gc_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Images/per_base_n_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Images/per_base_n_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Images/per_base_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Images/per_base_quality.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Images/per_base_sequence_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Images/per_base_sequence_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Images/per_sequence_gc_content.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Images/per_sequence_gc_content.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Images/per_sequence_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Images/per_sequence_quality.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/Images/sequence_length_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/SRR1976948_2_fastqc/Images/sequence_length_distribution.png -------------------------------------------------------------------------------- /_static/SRR1976948_2_fastqc/summary.txt: -------------------------------------------------------------------------------- 1 | PASS Basic Statistics SRR1976948_2.fastq.gz 2 | FAIL Per base sequence quality SRR1976948_2.fastq.gz 3 | PASS Per sequence quality scores SRR1976948_2.fastq.gz 4 | WARN Per base sequence content SRR1976948_2.fastq.gz 5 | WARN Per base GC content SRR1976948_2.fastq.gz 6 | FAIL Per sequence GC content SRR1976948_2.fastq.gz 7 | PASS Per base N content SRR1976948_2.fastq.gz 8 | PASS Sequence Length Distribution SRR1976948_2.fastq.gz 9 | PASS Sequence Duplication Levels SRR1976948_2.fastq.gz 10 | PASS Overrepresented sequences SRR1976948_2.fastq.gz 11 | FAIL Kmer Content SRR1976948_2.fastq.gz 12 | -------------------------------------------------------------------------------- /_static/Sourmash_flow_diagrams_QC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/Sourmash_flow_diagrams_QC.png -------------------------------------------------------------------------------- /_static/Sourmash_flow_diagrams_compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/Sourmash_flow_diagrams_compare.png -------------------------------------------------------------------------------- /_static/Sourmash_flow_diagrams_compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/Sourmash_flow_diagrams_compute.png -------------------------------------------------------------------------------- /_static/Sourmash_flow_diagrams_gather.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/Sourmash_flow_diagrams_gather.png -------------------------------------------------------------------------------- /_static/Sourmash_flow_diagrams_search.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/Sourmash_flow_diagrams_search.png -------------------------------------------------------------------------------- /_static/ecoli_cmp.matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/ecoli_cmp.matrix.png -------------------------------------------------------------------------------- /_static/kmers-metapalette.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/kmers-metapalette.png -------------------------------------------------------------------------------- /_static/labibi.css: -------------------------------------------------------------------------------- 1 | @import url('default.css'); 2 | 3 | 4 | div.bodywrapper { 5 | margin: 0 0 0 0; 6 | } 7 | 8 | /* Styles for floating Edit on GitHub box */ 9 | #editor-trap { 10 | margin: 1em; 11 | padding: 1em; 12 | border: 1px solid black; 13 | box-shadow: 0 0 3px black; 14 | width: 200px; 15 | 16 | background: #fefabc; 17 | position: fixed; 18 | bottom: 1em; 19 | left: 1em; 20 | font-size: 60%; 21 | text-align: left; 22 | z-index: 2; 23 | 24 | -moz-transform: rotate(-4deg); 25 | -webkit-transform: rotate(-4deg); 26 | -o-transform: rotate(-4deg); 27 | -ms-transform: rotate(-4deg); 28 | transform: rotate(-4deg); 29 | box-shadow: 0px 4px 6px #333; 30 | -moz-box-shadow: 0px 4px 6px #333; 31 | -webkit-box-shadow: 0px 4px 6px #333; 32 | 33 | 34 | cursor: pointer; 35 | } 36 | 37 | #editor-trap h3 { 38 | margin: 0 0 0.5em 0; 39 | padding: 0; 40 | background: transparent; 41 | } 42 | 43 | #editor-trap ol { 44 | margin: 0; 45 | padding: 0 0 0 2em; 46 | } 47 | 48 | /* Hide trick */ 49 | 50 | #editor-trap.toggled > * { 51 | display: none; 52 | } 53 | 54 | 55 | #editor-trap.toggled > h3 { 56 | display: block; 57 | } 58 | 59 | 60 | -------------------------------------------------------------------------------- /_static/labibi.js: -------------------------------------------------------------------------------- 1 | // Store editor pop-up help state in localStorage 2 | // so it does not re-pop-up itself between page loads. 3 | // Do not even to pretend to support IE gracefully. 4 | (function($) { 5 | 6 | $(document).ready(function() { 7 | var box = $("#editor-trap"); 8 | var klass = "toggled"; 9 | var storageKey = "toggled"; 10 | 11 | function toggle() { 12 | box.toggleClass(klass); 13 | // Store the toggle status in local storage as "has value string" or null 14 | window.localStorage.setItem(storageKey, box.hasClass(klass) ? "toggled" : "not-toggled"); 15 | } 16 | 17 | box.click(toggle); 18 | 19 | // Check the persistent state of the editor pop-up 20 | // Note that localStorage does not necessarily support boolean values (ugh!) 21 | // http://stackoverflow.com/questions/3263161/cannot-set-boolean-values-in-localstorage 22 | var v = window.localStorage.getItem(storageKey); 23 | if(v == "toggled" || !v) { 24 | box.addClass(klass); 25 | } 26 | 27 | }); 28 | 29 | })(jQuery); 30 | -------------------------------------------------------------------------------- /_static/sourmash_quality_filtering_workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/_static/sourmash_quality_filtering_workflow.png -------------------------------------------------------------------------------- /_templates/page.html: -------------------------------------------------------------------------------- 1 | {% extends "!page.html" %} 2 | 3 | {# To customize for your project, search and replace 'labibi' stuff. 4 | You might also want to replace the UA- string for google analytics. 5 | 6 | Set what you need in conf.py 7 | #} 8 | 9 | {# set in conf.py: google_analytics_id = 'UA-36028965-1' #} 10 | {# set in conf.py: disqus_shortname = 'labibi' #} 11 | 12 | {# set these in conf.py: 13 | github_base_account = 'ctb' 14 | github_project = 'labibi' 15 | #} 16 | 17 | {%- set render_sidebar = 0 %} 18 | 19 | {##################################################} 20 | {# for plone-derived "edit me" & Google analytics #} 21 | {##################################################} 22 | 23 | {% block footer %} 24 | {{ super() }} 25 | 26 | {# Add plone-derived 'edit me' sticky note #} 27 | {% if github_base_account %} 28 |
29 |

Edit this document!

30 | 31 |

32 | This file can be edited directly through the Web. Anyone can 33 | update and fix errors in this document with few clicks -- 34 | no downloads needed. 35 |

36 | 37 |

    38 | 39 |
  1. 40 | Go to 41 | 42 | {{ title }} 43 | on GitHub. 44 |
  2. 45 | 46 |
  3. 47 | Edit files using GitHub's text editor in your web browser (see the 'Edit' tab on the top right of the file) 48 |
  4. 49 | 50 |
  5. 51 | Fill in the Commit message text box at the bottom of the page describing why 52 | you made the changes. Press the Propose file change button next to it when done. 53 |
  6. 54 | 55 |
  7. 56 | Then click Send a pull request. 57 |
  8. 58 | 59 |
  9. 60 | Your changes are now queued for review under the project's Pull requests tab on GitHub! 61 |
  10. 62 |
63 | 64 |

65 | For an introduction to the documentation format please see the reST primer. 66 |

67 | 68 |
69 | 70 | {% endif %} 71 | 72 | 82 | 83 | {% endblock %} 84 | 85 | {#########################} 86 | {# for disqus commenting #} 87 | {#########################} 88 | 89 | {% macro comments() %} 90 | 91 |
92 | 103 | 104 | comments powered by Disqus 105 | 106 | {% endmacro %} 107 | 108 | {% block extrahead %} 109 | 110 | {% if github_base_account %} 111 | 112 | {% else %} 113 | 120 | {% endif %} 121 | 122 | {% endblock %} 123 | 124 | {%- block body %} 125 | {{ super() }} 126 | 127 |
128 | 129 | LICENSE: 130 | This documentation and all textual/graphic site content is released 131 | under 132 | Creative Commons - 0 133 | (CC0) -- fork @ 134 | github. 135 | 136 |
137 | 138 | {% if disqus_shortname %} 139 | {{ comments() }} 140 | {% else %} 141 | 142 | {% endif %} 143 | 144 | {%- endblock %} 145 | 146 | {####} 147 | 148 | {% block sidebar2 %} 149 | {% endblock %} 150 | 151 | {% block sidebar1 %} 152 | {% endblock %} 153 | 154 | -------------------------------------------------------------------------------- /assemble-metaspades.md: -------------------------------------------------------------------------------- 1 | # Assembly with metaSPAdes 2 | 3 | Here are the instructions for assembling the metagenomic data with a different assembler. Here we are showing [metaSPAdes](https://www.ncbi.nlm.nih.gov/pubmed/28298430), one of the assembler options highlighted in the [CAMI](https://www.biorxiv.org/content/early/2017/06/12/099127) paper. 4 | 5 | metaSPAdes takes longer than MEGAHIT to run the assembly, so we will not be running it during the course. Rather it is available for download [here](LINK). If you are interested in running the assembly yourself, however, please see below. 6 | 7 | Install metaSPAdes: 8 | 9 | ``` 10 | wget http://cab.spbu.ru/files/release3.10.1/SPAdes-3.10.1-Linux.tar.gz 11 | tar -xzf SPAdes-3.10.1-Linux.tar.gz 12 | export PATH=$PATH:~/SPAdes-3.10.1-Linux/bin/ 13 | ``` 14 | 15 | Concatenate the two sets of reads: 16 | 17 | ``` 18 | cd ~/data 19 | for x in *gz 20 | do 21 | gunzip $x 22 | done 23 | 24 | cat *fq > coassembly.fq 25 | 26 | cd ~ 27 | mkdir assembly-spades 28 | cd assembly-spades 29 | ``` 30 | 31 | And assemble with metaSPAdes: 32 | 33 | ``` 34 | metaspades.py --12 ~/data/coassembly.fq -o metaS-assembly 35 | ``` 36 | -------------------------------------------------------------------------------- /assemble.md: -------------------------------------------------------------------------------- 1 | Run the MEGAHIT assembler 2 | ========================= 3 | 4 | [MEGAHIT](https://github.com/voutcn/megahit) is a very fast, quite good assembler designed for metagenomes. 5 | 6 | First, install it: 7 | 8 | ``` 9 | cd ~/ 10 | git clone https://github.com/voutcn/megahit.git 11 | cd megahit 12 | make 13 | ``` 14 | 15 | Now, download some data: 16 | 17 | ``` 18 | mkdir ~/data 19 | cd ~/data 20 | curl -O https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1976948.abundtrim.subset.pe.fq.gz 21 | curl -O https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1977249.abundtrim.subset.pe.fq.gz 22 | ``` 23 | 24 | These are data that have been run through k-mer abundance trimming (see k-mer and subsampled so that we can run an assembly in a fairly short time period). 25 | 26 | ---- 27 | 28 | Now, finally, run the assembler! 29 | 30 | ``` 31 | mkdir ~/assembly 32 | cd ~/assembly 33 | ln -fs ../data/*.subset.pe.fq.gz . 34 | 35 | ~/megahit/megahit --12 SRR1976948.abundtrim.subset.pe.fq.gz,SRR1977249.abundtrim.subset.pe.fq.gz \ 36 | -o combined 37 | ``` 38 | 39 | This will take about 15 minutes; at the end you should see output like 40 | this: 41 | 42 | ``` 43 | ... 7713 contigs, total 13168567 bp, min 200 bp, max 54372 bp, avg 1707 bp, N50 4305 bp 44 | ... ALL DONE. Time elapsed: 899.612093 seconds 45 | ``` 46 | 47 | The output assembly will be in `combined/final.contigs.fa`. 48 | 49 | ## While the assembly runs... 50 | 51 | * Discuss CAMI paper. 52 | * What does, and doesn't, assemble? 53 | * How good is assembly anyway? 54 | 55 | #### Questions for Discussion: 56 | 57 | * Why would we assemble, vs looking at raw reads? What are the advantages and disadvantages? 58 | * What are the technology tradeoffs between Illumina HiSeq, Illumina MiSeq, and PacBio? (Also see [this paper (http://ivory.idyll.org/blog/2015-sharon-paper.html).) 59 | * What kind of experimental design considerations should you have if you plan to assemble? 60 | 61 | Some figures for your consideration: 62 | The first two come from work by Dr. Sherine Awad on analyzing the data from Shakya et al (2014). The third comes from an analysis of read search vs contig search of a protein database. 63 | 64 | ![](files/assembler-runtimes.png) 65 | 66 | ![](files/assembler-mapping.png) 67 | 68 | ![](files/read-vs-contig-alignment.png) 69 | 70 | ## After the assembly is finished 71 | 72 | Let's first take a look at the assembly: 73 | 74 | ``` 75 | less combined/final.contigs.fa 76 | ``` 77 | At this point we can do a bunch of things: 78 | 79 | * annotate the assembly with [Prokka](prokka_tutorial.md); 80 | * evaluate the assembly's inclusion of k-mers and reads; 81 | * set up a BLAST database so that we can search it for genes of interest; 82 | * quantify the abundance of the contigs or genes in the assembly, using the original read data set [Salmon](salmon_tutorial.rst); 83 | * Identify contigs with similar tetra-mer abundance and coverage with [Binning](binning.md); 84 | -------------------------------------------------------------------------------- /assembly-evaluation.md: -------------------------------------------------------------------------------- 1 | # Evaluating Metagenomic Assemblies 2 | 3 | So, you have generated an assembly of your metagenome from short-read data. What now? How can you tell how *good* it is? 4 | 5 | Now, onto getting quantitative metrics: 6 | 7 | Now we can run a few stats on our assembly. To do this we will use [QUAST](http://quast.sourceforge.net/quast): 8 | 9 | ``` 10 | cd ~/ 11 | git clone https://github.com/ablab/quast.git -b release_4.5 12 | export PYTHONPATH=$(pwd)/quast/libs/ 13 | ``` 14 | 15 | Now, run QUAST on the assembly: 16 | 17 | ``` 18 | cd ~/assembly 19 | mkdir quast-evaluation 20 | cd quast-evaluation 21 | ln -fs ../combined/final.contigs.fa megahit.contigs.fa 22 | ~/quast/quast.py megahit.contigs.fa -o megahit-report 23 | cat combined-report/report.txt 24 | ``` 25 | 26 | What does this say about our assembly? What do the stats *not* tell us? 27 | 28 | For thought and discussion: a few [nice slides](files/evaluate_assembly_summary.pdf) from Jessica Blanton on assessing assembly quality. 29 | 30 | The stats that are reported by QUAST do not mean much on their own-- for them to be more meaningful it is helpful to have another assembly against which to compare them. For that reason we have assembled the same data using a different assembler-- metaSPAdes. To see how the assembly was generated and to try running the assembly on your own later you can follow the [metaSPAdes tutorial](assembly-spades.html). It took ~2 times as long as MEGAHIT on this dataset. 31 | 32 | So, for now, download the metaSPAdes assembly: 33 | 34 | ``` 35 | cd ~/assembly/quast-evaluation/ 36 | curl -LO https://osf.io/h29jk/download 37 | mv download metaspades.contigs.fa.gz 38 | gunzip metaspades.contigs.fa.gz 39 | ``` 40 | Now, adjust the scripts used previously to calculate the same metrics for the new assembly. How do the two compare? What metrics should you care about? 41 | 42 | Let's look at the two reports in parallel! 43 | 44 | ``` 45 | paste */report.txt | cut -f1-2, 4 46 | ``` 47 | 48 | How do they compare? What does it mean? Does quast give us enough information to tell which assembly is better? 49 | -------------------------------------------------------------------------------- /aws-ssh/images/win-putty-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws-ssh/images/win-putty-1.png -------------------------------------------------------------------------------- /aws-ssh/images/win-putty-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws-ssh/images/win-putty-2.png -------------------------------------------------------------------------------- /aws-ssh/images/win-putty-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws-ssh/images/win-putty-3.png -------------------------------------------------------------------------------- /aws-ssh/images/win-putty-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws-ssh/images/win-putty-4.png -------------------------------------------------------------------------------- /aws-ssh/index.md: -------------------------------------------------------------------------------- 1 | # Logging in to your remote machine with SSH 2 | 3 | [Log in with SSH on Mac and Linux](log-in-with-ssh-mac.html) 4 | 5 | [Log in with SSH on Windows](log-in-with-ssh-win.html) 6 | -------------------------------------------------------------------------------- /aws-ssh/log-in-with-ssh-mac.rst: -------------------------------------------------------------------------------- 1 | ======================================================= 2 | Logging into your instance "in the cloud" (Mac version) 3 | ======================================================= 4 | 5 | OK, so we've given you a running computer. How do you access it? 6 | 7 | The two thing you'll need are the network name of your computer, which 8 | you can get from the spreadsheet linked to in the hackmd; and the 9 | 'cicese.pem' file that we've given you on the hackmd. 10 | 11 | Copy the name, and connect to that computer with ssh under the username 12 | 'ubuntu', as follows: 13 | 14 | First, move your private key file from wherever you downloaded it onto 15 | your Desktop. 16 | 17 | Next, start Terminal (in Applications... Utilities...) and type:: 18 | 19 | chmod og-rwx ~/Desktop/cicese.pem 20 | 21 | to set the permissions on the private key file to "closed to others". 22 | 23 | Then type:: 24 | 25 | ssh -i ~/Desktop/cicese.pem ubuntu@ec2-???-???-???-???.compute-1.amazonaws.com 26 | 27 | (but you have to replace the stuff after the '@' sign with the name of the host). 28 | 29 | Here, you're logging in as user 'ubuntu' to the machine 30 | 'ec2-174-129-122-189.compute-1.amazonaws.com' using the authentication 31 | key located in 'cicese.pem' on your Desktop. 32 | 33 | You should now see a text line that starts with something like 34 | ``ubuntu@ip-10-235-34-223:~$``. You're in! Now type:: 35 | 36 | sudo bash 37 | cd /root 38 | 39 | to switch into superuser mode (see: http://xkcd.com/149/) and go to your 40 | home directory. 41 | 42 | This is where the rest of the tutorials will start! 43 | 44 | To log out, type:: 45 | 46 | exit 47 | logout 48 | 49 | or just close the window. 50 | -------------------------------------------------------------------------------- /aws-ssh/log-in-with-ssh-win.rst: -------------------------------------------------------------------------------- 1 | =========================================================== 2 | Logging into your instance "in the cloud" (Windows version) 3 | =========================================================== 4 | 5 | You'll need the .ppk file from the hackmd, and the hostname from the 6 | spreadsheet, to log in. 7 | 8 | Logging into your EC2 instance with Putty 9 | ========================================= 10 | 11 | Open up putty, and enter your hostname into the Host Name box. 12 | 13 | .. image:: images/win-putty-1.png 14 | :width: 50% 15 | 16 | ---- 17 | 18 | Now, go find the 'SSH' section and enter your ppk file (generated above 19 | by puttygen). Then select 'Open'. 20 | 21 | ---- 22 | 23 | .. image:: images/win-putty-2.png 24 | :width: 50% 25 | 26 | ---- 27 | 28 | Log in as "ubuntu". 29 | 30 | .. image:: images/win-putty-3.png 31 | :width: 50% 32 | 33 | ---- 34 | 35 | Declare victory! 36 | 37 | .. image:: images/win-putty-4.png 38 | :width: 50% 39 | 40 | ---- 41 | 42 | Here, you're logging in as user 'ubuntu' to the machine 43 | 'ec2-174-129-122-189.compute-1.amazonaws.com' using the authentication 44 | key located in 'cicese.ppk' on your Desktop. 45 | 46 | (Your key will be named 'cicese.ppk', and your hostname will be the 47 | one in the spreadsheet.) 48 | 49 | You should now see a text line that starts with something like 50 | ``ubuntu@ip-10-235-34-223:~$``. You're in! Now type:: 51 | 52 | sudo bash 53 | cd /root 54 | 55 | to switch into superuser mode (see: http://xkcd.com/149/) and go to your 56 | home directory. 57 | 58 | This is where the rest of the tutorials will start! 59 | 60 | To log out, type:: 61 | 62 | exit 63 | logout 64 | 65 | or just close the window. 66 | -------------------------------------------------------------------------------- /aws/boot.rst: -------------------------------------------------------------------------------- 1 | ************************************** 2 | Start an Amazon Web Services computer: 3 | ************************************** 4 | 5 | This page shows you how to create a new "AWS instance", or a running 6 | computer. 7 | 8 | ---- 9 | 10 | Start at the Amazon Web Services console (http://aws.amazon.com/ and 11 | sign in to the console). 12 | 13 | 0. Select "EC2 - virtual servers in the cloud" 14 | ============================================== 15 | 16 | .. thumbnail:: images/boot-0.png 17 | :width: 20% 18 | 19 | 1. Switch to zone US West (N California) 20 | ======================================== 21 | 22 | .. thumbnail:: images/boot-1.png 23 | :width: 20% 24 | 25 | 2. Click on "Launch instance." 26 | ============================== 27 | 28 | 3. Select "Community AMIs." 29 | =========================== 30 | 31 | .. thumbnail:: images/boot-2.png 32 | :width: 20% 33 | 34 | 4. Search for ami-05384865 (ubuntu-wily-15.10-amd64-server) 35 | =========================================================== 36 | 37 | Use ami-05384865. 38 | 39 | .. thumbnail:: images/boot-3.png 40 | :width: 20% 41 | 42 | 5. Click on "Select." 43 | ===================== 44 | 45 | 6. Choose m4.xlarge. 46 | ==================== 47 | 48 | .. thumbnail:: images/boot-4.png 49 | :width: 20% 50 | 51 | 7. Click "Review and Launch." 52 | ============================= 53 | 54 | 8. Click "Launch." 55 | ================== 56 | 57 | .. thumbnail:: images/boot-5.png 58 | :width: 20% 59 | 60 | 9. Select "Create a new key pair." 61 | ================================== 62 | 63 | Note: you only need to do this the first time you create an instance. 64 | If you know where your amazon-key.pem file is, you can select 'Use an 65 | existing key pair' here. But you can always create a new key pair if 66 | you want, too. 67 | 68 | If you have an existing key pair, go to step 12, "Launch instance." 69 | 70 | .. thumbnail:: images/boot-6.png 71 | :width: 20% 72 | 73 | 10. Enter name 'amazon-key'. 74 | ============================ 75 | 76 | 11. Click "Download key pair." 77 | ============================== 78 | 79 | 12. Click "Launch instance." 80 | ============================ 81 | 82 | 13. Select View instances (lower right) 83 | ======================================= 84 | 85 | .. thumbnail:: images/boot-8.png 86 | :width: 20% 87 | 88 | 14. Bask in the glory of your running instance 89 | ============================================== 90 | 91 | Note that for your instance name you can use either "Public IP" or 92 | "Public DNS". Here, the machine only has a public IP. 93 | 94 | .. thumbnail:: images/boot-9.png 95 | :width: 20% 96 | 97 | You can now :doc:`login-shell` or :doc:`configure-firewall`. 98 | -------------------------------------------------------------------------------- /aws/configure-firewall.rst: -------------------------------------------------------------------------------- 1 | ******************************** 2 | Configure your instance firewall 3 | ******************************** 4 | 5 | Normally, Amazon computers only allow shell logins via ssh (port 22 6 | access). If we want to run a Web service or something else, we need 7 | to give the outside world access to other network locations on the 8 | computer. 9 | 10 | Below, we will open ports 8000-9000, which will let us run things like 11 | RStudio Server. If you want to run other things, like a Web server, 12 | you'll need to find the port(s) associated with those services and 13 | open those instead of 8000-9000. (Tip: Web servers run on port 80.) 14 | 15 | 1. Select 'Security Groups' 16 | --------------------------- 17 | 18 | Find "Security Groups" in the lower pane of your instance's 19 | information page, and click on "launch-wizard-N". 20 | 21 | .. thumbnail:: images/network-0.png 22 | :width: 20% 23 | 24 | 2. Select 'Inbound' 25 | ------------------- 26 | 27 | .. thumbnail:: images/network-1.png 28 | :width: 20% 29 | 30 | 3. Select 'Edit' 31 | ---------------- 32 | 33 | .. thumbnail:: images/network-2.png 34 | :width: 20% 35 | 36 | 4. Select 'Add Rule' 37 | -------------------- 38 | 39 | .. thumbnail:: images/network-3.png 40 | :width: 20% 41 | 42 | 5. Enter rule information 43 | ------------------------- 44 | 45 | Add a new rule: Custom TCP, 8000-9000, Source Anywhere. 46 | 47 | 6. Select 'Save'. 48 | ----------------- 49 | 50 | 7. Return to the Instances page. 51 | -------------------------------- 52 | 53 | .. thumbnail:: images/network-4.png 54 | :width: 20% 55 | 56 | ---- 57 | 58 | You're done! 59 | 60 | Go back to the index: :doc:`index` 61 | -------------------------------------------------------------------------------- /aws/creating-ami.rst: -------------------------------------------------------------------------------- 1 | ************************************** 2 | Creating your own Amazon Machine Image 3 | ************************************** 4 | 5 | 1. Actions, Create image 6 | ------------------------ 7 | 8 | .. thumbnail:: images/create-ami-1.png 9 | :width: 20% 10 | 11 | 2. Fill out name and description 12 | -------------------------------- 13 | 14 | .. thumbnail:: images/create-ami-2.png 15 | :width: 20% 16 | 17 | 3. Wait for it to become available 18 | ---------------------------------- 19 | 20 | .. thumbnail:: images/create-ami-3.png 21 | :width: 20% 22 | 23 | Go back to the index: :doc:`index` 24 | -------------------------------------------------------------------------------- /aws/images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/.DS_Store -------------------------------------------------------------------------------- /aws/images/add-volume-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/add-volume-1.png -------------------------------------------------------------------------------- /aws/images/add-volume-1b.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/add-volume-1b.png -------------------------------------------------------------------------------- /aws/images/add-volume-1c.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/add-volume-1c.png -------------------------------------------------------------------------------- /aws/images/add-volume-1d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/add-volume-1d.png -------------------------------------------------------------------------------- /aws/images/add-volume-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/add-volume-2.png -------------------------------------------------------------------------------- /aws/images/add-volume-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/add-volume-3.png -------------------------------------------------------------------------------- /aws/images/add-volume-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/add-volume-4.png -------------------------------------------------------------------------------- /aws/images/add-volume-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/add-volume-5.png -------------------------------------------------------------------------------- /aws/images/add-volume-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/add-volume-6.png -------------------------------------------------------------------------------- /aws/images/bak/1.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/1.tiff -------------------------------------------------------------------------------- /aws/images/bak/10.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/10.tiff -------------------------------------------------------------------------------- /aws/images/bak/11.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/11.tiff -------------------------------------------------------------------------------- /aws/images/bak/2.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/2.tiff -------------------------------------------------------------------------------- /aws/images/bak/3.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/3.tiff -------------------------------------------------------------------------------- /aws/images/bak/4.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/4.tiff -------------------------------------------------------------------------------- /aws/images/bak/5.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/5.tiff -------------------------------------------------------------------------------- /aws/images/bak/6.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/6.tiff -------------------------------------------------------------------------------- /aws/images/bak/7.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/7.tiff -------------------------------------------------------------------------------- /aws/images/bak/8.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/8.tiff -------------------------------------------------------------------------------- /aws/images/bak/9.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/9.tiff -------------------------------------------------------------------------------- /aws/images/bak/create-ami-1.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/create-ami-1.tiff -------------------------------------------------------------------------------- /aws/images/bak/create-ami-2.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/create-ami-2.tiff -------------------------------------------------------------------------------- /aws/images/bak/create-ami-3.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/create-ami-3.tiff -------------------------------------------------------------------------------- /aws/images/bak/network-1.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/network-1.tiff -------------------------------------------------------------------------------- /aws/images/bak/network-2.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/network-2.tiff -------------------------------------------------------------------------------- /aws/images/bak/network-3.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/network-3.tiff -------------------------------------------------------------------------------- /aws/images/bak/network-4.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/bak/network-4.tiff -------------------------------------------------------------------------------- /aws/images/boot-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-0.png -------------------------------------------------------------------------------- /aws/images/boot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-1.png -------------------------------------------------------------------------------- /aws/images/boot-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-10.png -------------------------------------------------------------------------------- /aws/images/boot-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-11.png -------------------------------------------------------------------------------- /aws/images/boot-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-2.png -------------------------------------------------------------------------------- /aws/images/boot-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-3.png -------------------------------------------------------------------------------- /aws/images/boot-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-4.png -------------------------------------------------------------------------------- /aws/images/boot-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-5.png -------------------------------------------------------------------------------- /aws/images/boot-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-6.png -------------------------------------------------------------------------------- /aws/images/boot-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-7.png -------------------------------------------------------------------------------- /aws/images/boot-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-8.png -------------------------------------------------------------------------------- /aws/images/boot-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/boot-9.png -------------------------------------------------------------------------------- /aws/images/create-ami-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/create-ami-1.png -------------------------------------------------------------------------------- /aws/images/create-ami-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/create-ami-2.png -------------------------------------------------------------------------------- /aws/images/create-ami-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/create-ami-3.png -------------------------------------------------------------------------------- /aws/images/create-snapshot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/create-snapshot-1.png -------------------------------------------------------------------------------- /aws/images/create-snapshot-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/create-snapshot-2.png -------------------------------------------------------------------------------- /aws/images/create-snapshot-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/create-snapshot-3.png -------------------------------------------------------------------------------- /aws/images/create-snapshot-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/create-snapshot-4.png -------------------------------------------------------------------------------- /aws/images/network-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/network-0.png -------------------------------------------------------------------------------- /aws/images/network-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/network-1.png -------------------------------------------------------------------------------- /aws/images/network-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/network-2.png -------------------------------------------------------------------------------- /aws/images/network-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/network-3.png -------------------------------------------------------------------------------- /aws/images/network-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/network-4.png -------------------------------------------------------------------------------- /aws/images/orig/add-volume-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/add-volume-1.png -------------------------------------------------------------------------------- /aws/images/orig/add-volume-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/add-volume-2.png -------------------------------------------------------------------------------- /aws/images/orig/add-volume-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/add-volume-3.png -------------------------------------------------------------------------------- /aws/images/orig/add-volume-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/add-volume-4.png -------------------------------------------------------------------------------- /aws/images/orig/add-volume-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/add-volume-5.png -------------------------------------------------------------------------------- /aws/images/orig/add-volume-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/add-volume-6.png -------------------------------------------------------------------------------- /aws/images/orig/boot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-1.png -------------------------------------------------------------------------------- /aws/images/orig/boot-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-10.png -------------------------------------------------------------------------------- /aws/images/orig/boot-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-11.png -------------------------------------------------------------------------------- /aws/images/orig/boot-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-2.png -------------------------------------------------------------------------------- /aws/images/orig/boot-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-3.png -------------------------------------------------------------------------------- /aws/images/orig/boot-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-4.png -------------------------------------------------------------------------------- /aws/images/orig/boot-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-5.png -------------------------------------------------------------------------------- /aws/images/orig/boot-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-6.png -------------------------------------------------------------------------------- /aws/images/orig/boot-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-7.png -------------------------------------------------------------------------------- /aws/images/orig/boot-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-8.png -------------------------------------------------------------------------------- /aws/images/orig/boot-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/boot-9.png -------------------------------------------------------------------------------- /aws/images/orig/create-ami-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/create-ami-1.png -------------------------------------------------------------------------------- /aws/images/orig/create-ami-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/create-ami-2.png -------------------------------------------------------------------------------- /aws/images/orig/create-ami-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/create-ami-3.png -------------------------------------------------------------------------------- /aws/images/orig/create-snapshot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/create-snapshot-1.png -------------------------------------------------------------------------------- /aws/images/orig/create-snapshot-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/create-snapshot-2.png -------------------------------------------------------------------------------- /aws/images/orig/create-snapshot-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/create-snapshot-3.png -------------------------------------------------------------------------------- /aws/images/orig/create-snapshot-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/create-snapshot-4.png -------------------------------------------------------------------------------- /aws/images/orig/network-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/network-1.png -------------------------------------------------------------------------------- /aws/images/orig/network-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/network-2.png -------------------------------------------------------------------------------- /aws/images/orig/network-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/network-3.png -------------------------------------------------------------------------------- /aws/images/orig/network-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/network-4.png -------------------------------------------------------------------------------- /aws/images/orig/terminate-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/terminate-1.png -------------------------------------------------------------------------------- /aws/images/orig/terminate-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/terminate-2.png -------------------------------------------------------------------------------- /aws/images/orig/terminate-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/orig/terminate-3.png -------------------------------------------------------------------------------- /aws/images/terminate-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/terminate-1.png -------------------------------------------------------------------------------- /aws/images/terminate-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/terminate-2.png -------------------------------------------------------------------------------- /aws/images/terminate-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/aws/images/terminate-3.png -------------------------------------------------------------------------------- /aws/index.rst: -------------------------------------------------------------------------------- 1 | .. labibi documentation master file, created by 2 | sphinx-quickstart on Sun Nov 4 10:10:29 2012. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Starting up an Amazon Web Services machine 7 | ========================================== 8 | 9 | Start here: :doc:`boot` 10 | ----------------------- 11 | 12 | Full table of contents: 13 | ----------------------- 14 | 15 | .. toctree:: 16 | :maxdepth: 2 17 | 18 | boot 19 | login-shell 20 | login-shell-unix 21 | login-shell-win 22 | configure-firewall 23 | creating-ami 24 | volumes-and-snapshots 25 | terminate-instance 26 | things-to-mention 27 | install-rstudio-server 28 | 29 | 30 | Indices and tables 31 | ================== 32 | 33 | * :ref:`genindex` 34 | * :ref:`modindex` 35 | * :ref:`search` 36 | 37 | 38 | .. @@ add AWS costs 39 | 40 | -------------------------------------------------------------------------------- /aws/install-rstudio-server.rst: -------------------------------------------------------------------------------- 1 | *********************************** 2 | Running RStudio Server in the cloud 3 | *********************************** 4 | 5 | In this section, we will run RStudio Server on a remote Amazon machine. 6 | This will require starting up an instance, configuring its network firewall, 7 | and installing and running some software. 8 | 9 | .. @@remember to terminate 10 | .. @@can we reboot and have it sart up again? 11 | .. @@diagram laying out zone etc. 12 | 13 | Reference documentation for running RStudio Server on Ubuntu: 14 | 15 | https://www.rstudio.com/products/rstudio/download-server/ 16 | 17 | ----- 18 | 19 | 1. Start up an Amazon instance 20 | ------------------------------ 21 | 22 | Start an ami-05384865 on an m4.xlarge machine, as per the instructions here: 23 | 24 | :doc:`boot`. 25 | 26 | 2. Configure your network firewall 27 | ---------------------------------- 28 | 29 | Normally, Amazon computers only allow shell logins via ssh. 30 | Since we want to run a Web service, we need to give the outside world 31 | access to other network locations on the computer. 32 | 33 | Follow these instructions: 34 | 35 | :doc:`configure-firewall` 36 | 37 | (You can do this while the computer is booting.) 38 | 39 | 3. Log in via the shell 40 | ----------------------- 41 | 42 | Follow these instructions to log in via the shell: 43 | 44 | :doc:`login-shell`. 45 | 46 | 4. Set a password for the 'ubuntu' account 47 | ------------------------------------------ 48 | 49 | Amazon Web Services computers normally require a key (the .pem file) 50 | instead of a login password, but RStudio Server will need us to log in 51 | with a password. So we need to configure a password for the account 52 | we're going to use (which is 'ubuntu') 53 | 54 | Create a password like so:: 55 | 56 | sudo passwd ubuntu 57 | 58 | and set it to something you'll remember. 59 | 60 | 5. Install R and the gdebi tool 61 | ------------------------------- 62 | 63 | .. @@ reference debian install instructions https://help.ubuntu.com/community/AptGet/Howto and https://www.debian.org/doc/manuals/debian-faq/ch-pkgtools.en.html 64 | 65 | Update the software catalog and install a few things:: 66 | 67 | sudo apt-get update && sudo apt-get -y install gdebi-core r-base 68 | 69 | This will take a few minutes. 70 | 71 | 6. Download & install RStudio Server 72 | ------------------------------------ 73 | 74 | :: 75 | 76 | wget https://download2.rstudio.org/rstudio-server-0.99.891-amd64.deb 77 | sudo gdebi -n rstudio-server-0.99.891-amd64.deb 78 | 79 | Upon success, you should see:: 80 | 81 | Mar 07 15:20:18 ip-172-31-6-68 systemd[1]: Starting RStudio Server... 82 | Mar 07 15:20:18 ip-172-31-6-68 systemd[1]: Started RStudio Server. 83 | 84 | 7. Open your RStudio Server instance 85 | ------------------------------------ 86 | 87 | Finally, go to 'http://' + your hostname + ':8787' in a browser, 88 | eg. :: 89 | 90 | http://ec2-XX-YY-33-165.us-west-1.compute.amazonaws.com:8787/ 91 | 92 | and log into RStudio with username 'ubuntu' and the password 93 | you set it to above. 94 | 95 | Voila! 96 | 97 | ---- 98 | 99 | You can now just go ahead and use this, or you can "stop" it, or you 100 | can freeze into an AMI for later use. 101 | 102 | Note that on reboot, RStudio Server will start up again and all your files 103 | will be there. 104 | 105 | Go back to the index: :doc:`index`. 106 | 107 | .. @@CTB demonstrate graphing, etc. 108 | .. revisiting what we did... 109 | 110 | .. @@ rebooting; converting to AMI 111 | 112 | .. @@ meditations on file transfer 113 | -------------------------------------------------------------------------------- /aws/login-shell-unix.rst: -------------------------------------------------------------------------------- 1 | ************************************************** 2 | Log into your instance from a Mac or Linux machine 3 | ************************************************** 4 | 5 | You'll need to do two things: first, set the permissions on 6 | ``amazon-key.pem``:: 7 | 8 | chmod og-rwx ~/Downloads/amazon-key.pem 9 | 10 | Then, ssh into your new machine using your key:: 11 | 12 | ssh -i ~/Downloads/amazon-key.pem -l ubuntu MACHINE_NAE 13 | 14 | where you should replace MACHINE_NAME with the public IP or hostname 15 | of your EC2 instance, which is located at the top of the host 16 | information box (see screenshot below). It should be something like 17 | ``54.183.148.114`` or ``ec2-XXX-YYY.amazonaws.com``. 18 | 19 | Here are some screenshots! 20 | 21 | Change permissions and execute ssh 22 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 23 | 24 | .. thumbnail:: images/boot-10.png 25 | :width: 20% 26 | 27 | Successful login 28 | ~~~~~~~~~~~~~~~~ 29 | 30 | .. thumbnail:: images/boot-11.png 31 | :width: 20% 32 | 33 | Host information box - MACHINE_NAME location 34 | ============================================ 35 | 36 | .. thumbnail:: images/boot-9.png 37 | :width: 20% 38 | 39 | ---- 40 | 41 | Logging in is the starting point for most of the follow-on tutorials. 42 | For example, you can now install and run software on your EC2 instance. 43 | 44 | Go back to the top page to continue: :doc:`index` 45 | 46 | -------------------------------------------------------------------------------- /aws/login-shell-win.rst: -------------------------------------------------------------------------------- 1 | ********************************************* 2 | Log into your instance from a Windows machine 3 | ********************************************* 4 | 5 | Go follow the instructions this URL: 6 | 7 | https://angus.readthedocs.org/en/2015/amazon/log-in-with-mobaxterm-win.html 8 | 9 | Logging in is the starting point for most of the follow-on tutorials. 10 | For example, you can now install and run software on your EC2 instance. 11 | 12 | Go back to the top page to continue: :doc:`index` 13 | -------------------------------------------------------------------------------- /aws/login-shell.rst: -------------------------------------------------------------------------------- 1 | ****************************************** 2 | Log into your instance with the UNIX shell 3 | ****************************************** 4 | 5 | You will need the ``amazon-key.pem`` file that was downloaded in 6 | step 11 of booting up your new instance (see :doc:`boot`). 7 | 8 | Then, you can either :doc:`login-shell-unix` or :doc:`login-shell-win`. 9 | 10 | Log into your instance via the UNIX shell (Mac/Linux) 11 | ===================================================== 12 | 13 | See: :doc:`login-shell-unix` 14 | 15 | Log into your instance via MobaXTerm (Windows) 16 | ============================================== 17 | 18 | See: :doc:`login-shell-win` 19 | 20 | ----- 21 | 22 | Logging in is the starting point for most of the follow-on tutorials. 23 | For example, you can now install and run software on your EC2 instance. 24 | 25 | Go back to the top page to continue: :doc:`index` 26 | -------------------------------------------------------------------------------- /aws/terminate-instance.rst: -------------------------------------------------------------------------------- 1 | ************************* 2 | Terminating your instance 3 | ************************* 4 | 5 | Amazon will happily charge you for running instances and/or associated 6 | ephemeral storage until the cows come home - it's your responsibility 7 | to turn things off. The Right Way to do this for running instances 8 | is to terminate. 9 | 10 | The caveat here is that *everything ephemeral* will be deleted 11 | (excluding volumes that you created/attached). So you want to make sure 12 | you transfer off anything you care about. 13 | 14 | To terminate: 15 | 16 | 1. Select Actions, Instance State, Terminate 17 | -------------------------------------------- 18 | 19 | In the 'Instances' tab, select your instance and then go to the Actions menu. 20 | 21 | .. thumbnail:: images/terminate-1.png 22 | :width: 20% 23 | 24 | 2. Agree to terminate. 25 | ---------------------- 26 | 27 | .. thumbnail:: images/terminate-2.png 28 | :width: 20% 29 | 30 | 3. Verify status on your instance page. 31 | --------------------------------------- 32 | 33 | Instance state should be either "shutting down" or "terminated". 34 | 35 | .. thumbnail:: images/terminate-3.png 36 | :width: 20% 37 | 38 | ----- 39 | 40 | Return to index: :doc:`index` 41 | -------------------------------------------------------------------------------- /aws/things-to-mention.rst: -------------------------------------------------------------------------------- 1 | ***************************** 2 | Things to mention and discuss 3 | ***************************** 4 | 5 | When do disks go away? 6 | ---------------------- 7 | 8 | * never on reboot; 9 | * ephemeral disks go away on stop; 10 | * AMI-attached volumes go away on terminate; 11 | * attached volumes never go away on terminate and have to be 12 | explicitly deleted; 13 | * snapshots only go away when you explicitly delete them. 14 | 15 | What are you charged for? 16 | ------------------------- 17 | 18 | * you are charged for a running instance at the @@instance price rates; 19 | * ephemeral storage/instance-specific storage is included within that. 20 | 21 | * when you stop an instance, you are charged at disk-space rates for 22 | the stopped disk; 23 | 24 | * when you create a volume, you are charged for that volume until you delete 25 | it; 26 | 27 | * when you create a snapshot, you are charged for that snapshot until you 28 | delete it. 29 | 30 | To make sure you're not getting charged, go to your Instance view and 31 | clear all search filters; anything that is "running" or "stopped" is 32 | costing you. Also check your volumes and your snapshots - they should be 33 | empty. 34 | 35 | .. @@ account details/running costs link? 36 | 37 | ---- 38 | 39 | Regions vs zones: 40 | ----------------- 41 | 42 | * AMIs and Snapshots (and keys and security groups) are per region; 43 | * Volumes and instances are per zone; 44 | 45 | .. @@ image 46 | -------------------------------------------------------------------------------- /aws/volumes-and-snapshots.rst: -------------------------------------------------------------------------------- 1 | ****************************************************** 2 | Working with persistent storage: volumes and snapshots 3 | ****************************************************** 4 | 5 | Volumes are basically UNIX disks ("block devices") that will persist 6 | after you terminate your instance. They are tied to a zone within a 7 | region and can only be mounted on instances within that zone. 8 | 9 | Snapshots are an Amazon-specific thing that let you communicate data 10 | on volumes between accounts. They are "read-only" backups that are 11 | created from volumes; they can be used to create new volumes in turn, 12 | and can also be shared with specific people (or made public). 13 | Snapshots are tied to a region but not a zone. 14 | 15 | Creating persistent volumes to store data 16 | ========================================= 17 | 18 | 0. Locate your instance *zone* 19 | ------------------------------ 20 | 21 | .. thumbnail:: images/add-volume-1d.png 22 | :width: 20% 23 | 24 | 1. Click on the volumes tab 25 | --------------------------- 26 | 27 | .. thumbnail:: images/add-volume-1.png 28 | :width: 20% 29 | 30 | 2. 'Create Volume' 31 | ------------------ 32 | 33 | .. thumbnail:: images/add-volume-1b.png 34 | :width: 20% 35 | 36 | 3. Configure your volume to have the same zone as your instance 37 | --------------------------------------------------------------- 38 | 39 | .. thumbnail:: images/add-volume-1c.png 40 | :width: 20% 41 | 42 | 4. Wait for your volume to be available 43 | --------------------------------------- 44 | 45 | .. thumbnail:: images/add-volume-2.png 46 | :width: 20% 47 | 48 | 5. Select volume, Actions, Attach volume 49 | ---------------------------------------- 50 | 51 | .. thumbnail:: images/add-volume-3.png 52 | :width: 20% 53 | 54 | 6. Select instance, attachment point, and Attach 55 | ------------------------------------------------ 56 | 57 | Here, your attachment point will be '/dev/sdf' and your block device will 58 | be named '/dev/xvdf'. 59 | 60 | .. thumbnail:: images/add-volume-4.png 61 | :width: 20% 62 | 63 | 7. On your instance, list block devices 64 | --------------------------------------- 65 | 66 | Type:: 67 | 68 | lsblk 69 | 70 | You should see something like this:: 71 | 72 | NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT 73 | xvda 202:0 0 8G 0 disk 74 | `-xvda1 202:1 0 8G 0 part / 75 | xvdf 202:80 0 100G 0 disk 76 | 77 | Now format the disk (ONLY ON EMPTY DISKS - THIS WILL ERASE ANY DATA ON 78 | THE DISK):: 79 | 80 | sudo mkfs -t ext4 /dev/xvdf 81 | 82 | and mount the disk:: 83 | 84 | sudo mkdir /disk 85 | sudo mount /dev/xvdf /disk 86 | sudo chmod a+rwxt /disk 87 | 88 | and voila, anything you put on /disk will be on the volume that you allocated! 89 | 90 | The command 'df -h' will show you what disks are actually mounted & where. 91 | 92 | Detaching volumes 93 | ----------------- 94 | 95 | 1. Unmount it from the instance 96 | ------------------------------- 97 | 98 | Change out of the directory, stop any running programs using it, and then:: 99 | 100 | sudo umount /disk 101 | 102 | 2. Detach 103 | --------- 104 | 105 | On the 'volumes' tab in your EC2 console, go to Actions, Detach. 106 | 107 | .. thumbnail:: images/add-volume-5.png 108 | :width: 20% 109 | 110 | 3. Yes, detach. 111 | --------------- 112 | 113 | .. thumbnail:: images/add-volume-6.png 114 | :width: 20% 115 | 116 | Note, volumes remain attached when you reboot or stop an instance, but 117 | are (of course) detached when you terminate an instance. 118 | 119 | Creating snapshots of volumes 120 | ============================= 121 | 122 | 1. Actions, Create snapshot 123 | --------------------------- 124 | 125 | .. thumbnail:: images/create-snapshot-1.png 126 | :width: 20% 127 | 128 | 2. Fill out name and description 129 | -------------------------------- 130 | 131 | .. thumbnail:: images/create-snapshot-2.png 132 | :width: 20% 133 | 134 | 3. Click 'Close' & wait. 135 | ------------------------ 136 | 137 | .. thumbnail:: images/create-snapshot-3.png 138 | :width: 20% 139 | -------------------------------------------------------------------------------- /binning.md: -------------------------------------------------------------------------------- 1 | 2 | # Binning a Metagenomic Assembly 3 | 4 | A common approach following metagenome assembly is binning, a process by which assembled contigs are collected into groups or 'bins' that might then be assigned some taxonomic affiliation. There are many different tools that can be used for binning (see [CAMI review for more details](http://biorxiv.org/content/early/2017/01/09/099127)). Here, we will be using [MaxBin](https://microbiomejournal.biomedcentral.com/articles/10.1186/2049-2618-2-26) and [MetaBAT](https://bitbucket.org/berkeleylab/metabat), which are both user friendly and highly cited. To use these binners, we will first need to map our data against the assembled metagenome using bwa and then estimate relative abundances by contig. We will then inspect the bins generated by MaxBin and MetaBAT using VizBin. 5 | 6 | ### Installing binners 7 | MaxBin 8 | 9 | ``` 10 | cd 11 | curl https://downloads.jbei.org/data/microbial_communities/MaxBin/getfile.php?MaxBin-2.2.2.tar.gz > MaxBin-2.2.2.tar.gz 12 | tar xzvf MaxBin-2.2.2.tar.gz 13 | cd MaxBin-2.2.2/src 14 | make 15 | cd 16 | git clone https://github.com/COL-IU/FragGeneScan.git 17 | cd FragGeneScan 18 | make clean 19 | make fgs 20 | cd 21 | git clone https://github.com/loneknightpy/idba.git 22 | cd idba 23 | ./build.sh 24 | sudo apt-get install bowtie2 hmmer 25 | export PATH=$PATH:~/idba/bin 26 | export PATH=$PATH:~/FragGeneScan 27 | export PATH=$PATH:~/MaxBin-2.2.2 28 | cd 29 | ``` 30 | MetaBAT 31 | 32 | ``` 33 | cd 34 | curl -L https://bitbucket.org/berkeleylab/metabat/downloads/metabat-static-binary-linux-x64_v0.32.4.tar.gz > metabatv0.32.4.tar.gz 35 | tar xvf metabatv0.32.4.tar.gz 36 | ``` 37 | Time to finally run the Binners! 38 | **Note**: MaxBin can take a lot of time to run and bin your metagenome. As this is a workshop, we are doing two things that sacrifice *quality* for *speed*. 39 | 40 | 1. We are only using 2 of the 6 datasets that were generated for the 41 | this project. Most binning software, rely upon 42 | many samples to accurately bin data. And, we have subsampled the 43 | data to make it faster to proess. 44 | 45 | 2. We are limiting the number of iterations that are performed through 46 | the MaxBin expectation-maximization algorithm (5 iterations instead of 47 | 50+). This will likely limit the quality of the bins we get 48 | out. So, users beware and read [the user's manual](https://downloads.jbei.org/data/microbial_communities/MaxBin/README.txt) 49 | before proceeding with your own data analysis. 50 | 51 | ### Binning 1 - MaxBin 52 | -- 53 | 54 | Maxbin uses **read coverage** & **tetranucleotide frequencies** for each contig, and **marker gene counts** for each bin 55 | 56 | First, we will get a list of the count files that we have to pass to MaxBin 57 | 58 | ``` 59 | mkdir ~/binning 60 | cd ~/binning 61 | mkdir maxbin 62 | cd maxbin 63 | ls ~/mapping/*coverage.tab > abundance.list 64 | ``` 65 | Now, on to the actual binning 66 | 67 | ``` 68 | run_MaxBin.pl -contig ~/mapping/subset_assembly.fa -abund_list abundance.list -max_iteration 5 -out mbin 69 | ``` 70 | 71 | This will generate a series of files. Take a look at the files generated. In particular you should see a series of \*.fasta files preceded by numbers. These are the different genome bins predicted by MaxBin. 72 | 73 | Take a look at the mbin.summary file. What is shown? 74 | 75 | Now, we are going to generate a concatenated file that contains all of our genome bins put together. We will change the fasta header name to include the bin number so that we can tell them apart later. 76 | 77 | ``` 78 | for file in mbin.*.fasta 79 | do 80 | num=${file//[!0-9]/} 81 | sed -e "/^>/ s/$/ ${num}/" mbin.$num.fasta >> maxbin_binned.concat.fasta 82 | done 83 | ``` 84 | And finally make an annotation file for visualization 85 | 86 | ``` 87 | echo label > maxbin_annotation.list 88 | grep ">" maxbin_binned.concat.fasta |cut -f2 -d ' '>> maxbin_annotation.list 89 | ``` 90 | ### Binning 2 - MetaBAT 91 | -- 92 | 93 | MetaBAT uses **read coverage**, **coverage variance**, & **tetranucleotide frequencies** for each contig. This is done with a custom script 94 | 95 | ``` 96 | cd ~/binning 97 | mkdir metabat 98 | cd metabat 99 | ln -fs ~/mapping/*abundtrim*sorted.bam . 100 | ~/metabat/jgi_summarize_bam_contig_depths --outputDepth depth_var.txt *bam 101 | ``` 102 | Run MetaBAT script 103 | 104 | *Note that we are outputting info to a logfile* 105 | 106 | ``` 107 | ~/metabat/metabat -i ~/mapping/subset_assembly.fa -a depth_var.txt --verysensitive -o metabat -v > log.txt 108 | ``` 109 | Make the .fasta file of all binned sequences 110 | 111 | ``` 112 | for file in metabat.*.fa 113 | do 114 | num=${file//[!0-9]/} 115 | sed -e "/^>/ s/$/ ${num}/" metabat.$num.fa >> metabat_binned.concat.fasta 116 | done 117 | ``` 118 | Make an annotation file of the bin numbers for annotation in VizBin 119 | 120 | ``` 121 | echo label > metabat_annotation.list 122 | grep ">" metabat_binned.concat.fasta |cut -f2 -d ' '>> metabat_annotation.list 123 | ``` 124 | 125 | ## Visualizing the bins 126 | 127 | Now that we have our binned data from both MetaBAT and MaxBin there are several different things we can do. One thing we might want to do is check the quality of the binning-- a useful tool for this is [CheckM](http://ecogenomics.github.io/CheckM/). Today, for the sake of time, we will visualize the bins that we just generated using VizBin. 128 | 129 | First, install VizBin:: 130 | 131 | ``` 132 | cd 133 | sudo apt-get install libatlas3-base libopenblas-base default-jre 134 | curl -L https://github.com/claczny/VizBin/blob/master/VizBin-dist.jar?raw=true > VizBin-dist.jar 135 | ``` 136 | 137 | VizBin can run in OSX, Linux, and Windows but is has specific Java requirements (> 7.0). 138 | 139 | ## Optional 140 | 141 | To simplify things we are going to run VizBin in the desktop emulator through JetStream (which is ... a bit clunky). So, go back to the Jetstream and open up the web desktop simulator. 142 | 143 | ![](./files/VizBin-OpenDesktop.png) 144 | 145 | Open the terminal through the desktop simulator and open VizBin: 146 | 147 | ``` 148 | java -jar VizBin-dist.jar 149 | ``` 150 | This should prompt VizBin to open in another window. First we will look at the output of the MaxBin assembly. Click the choose button to open file browser to navigate to the binning folder (`~/mapping/binning`). There you will find the concatenated binned fasta file (`maxbin_binned.concat.fasta`). Upload this file and hit run. 151 | 152 | ![](./files/VizBin-LoadFile.png) 153 | 154 | What do you see? Read up a bit on [VizBin](https://microbiomejournal.biomedcentral.com/articles/10.1186/s40168-014-0066-1) to see how the visualization is generated. 155 | 156 | Now, upload the maxbin_annotation.list file as an annotation file to VizBin. The annotation file contains the bin id for each of the contigs in the assembly that were binned. 157 | 158 | ![](./files/VizBin-AddFiles.png) 159 | 160 | Now, do the same for MetaBat! 161 | 162 | Compare the results of the two binning methods- 163 | - How many bins were found? 164 | - How distinct are the bins? 165 | -------------------------------------------------------------------------------- /circos-build.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/circos-build.tar.gz -------------------------------------------------------------------------------- /circos_tutorial.rst: -------------------------------------------------------------------------------- 1 | ====================================== 2 | Using and Installing Circos 3 | ====================================== 4 | 5 | .. 6 | 7 | Circos is a powerful visualization tool that allows for the creation of circular graphics to display complex genomic data (e.g. genome comparisons). On top of the circular ideogram generated can be layered any number of graphical information (heatmaps, scatter plots, etc.). 8 | 9 | The goals of this tutorial are to: 10 | 11 | * Install circos on your Ubuntu system 12 | * Use Circos to visualize our metagenomic data 13 | 14 | Note: Beyond this brief crash course , circos is very well-documented and has a great series of `tutorials `__ and `course `__ materials that are useful. 15 | 16 | Installing Circos 17 | ================================================== 18 | 19 | You'll need to install one additional ubuntu package, libgd:: 20 | 21 | sudo apt-get -y install libgd-perl 22 | 23 | Make a directory called circos and navigate into it. There, we will download and extract the latest version of circos: 24 | :: 25 | cd 26 | mkdir circos 27 | cd circos 28 | curl -O http://dib-training.ucdavis.edu.s3.amazonaws.com/metagenomics-scripps-2016-10-12/circos-0.69-3.tar.gz 29 | tar -xvzf circos-0.69-3.tar.gz 30 | 31 | Circos runs within Perl and as such does not need to be compiled to run. So, we can just add the location of circos to our path variable. (Alternatively, you can append this statement to the end of your ``.bashrc`` file.) 32 | :: 33 | export PATH=~/circos/circos-0.69-3/bin:$PATH 34 | 35 | Circos does, however, require quite a few additional perl modules to operate correctly. To see what modules are missing and need to be downloaded type the following: 36 | :: 37 | circos -modules > modules 38 | 39 | Now, to download all of these we will be using CPAN, a package manager for perl. We are going to pick out all the missing modules and then loop over those modules and download them using cpan. 40 | :: 41 | grep missing modules |cut -f13 -d " " > missing_modules 42 | for mod in $(cat missing_modules); 43 | do 44 | sudo cpan install $mod; 45 | done 46 | 47 | This will take a while to run. When it is done check that you now have all modules downloaded by typing: 48 | :: 49 | circos -modules 50 | 51 | If you got all 'ok' then you are good to go! 52 | 53 | And with that, circos should be up and ready to go. Run the example by navigating to the examples folder within the circos folder. 54 | :: 55 | cd ~/circos/circos-0.69-3/example 56 | bash run 57 | 58 | This will take a little bit to run but should generate a file called ``circos.png``. Open it and you can get an idea of the huge variety of things that are possible with circos and a lot of patience. We will not be attempting anything that complex today, however. 59 | 60 | Visualizing Gene Coverage and Orientation 61 | ========================================== 62 | First, let's make a directory where we will be doing all of our work for plotting: 63 | :: 64 | mkdir ~/circos/plotting 65 | cd ~/circos/plotting 66 | 67 | Now, link in the ``*gff`` file output from prokka (which we will use to define the location of genes in each of our genomes), the genome assembly file ``final.contigs.fa``, and the ``SRR*counts`` files that we generated with salmon: 68 | :: 69 | ln -fs ~/data/prokka_annotation/*gff . 70 | ln -fs ~/data/final.contigs.fa . 71 | ln -fs ~/quant/*counts . 72 | 73 | We also need to grab a set of useful scripts and config files for this plotting exercise: 74 | :: 75 | curl -L -O https://github.com/ngs-docs/2016-metagenomics-sio/raw/master/circos-build.tar.gz 76 | tar -xvzf circos-build.tar.gz 77 | curl -L -O https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/subset_assembly.fa.gz 78 | gunzip subset_assembly.fa.gz 79 | mv subset_assembly.fa final.contigs.fa 80 | 81 | We are going to limit the data we are trying to visualize and get longest contigs from our assembly. We can do this using a script from the khmer package: 82 | :: 83 | 84 | extract-long-sequences.py final.contigs.fa -l 24000 -o final.contigs.long.fa 85 | cp ~/data/quant/*counts . 86 | 87 | Next, we will run a script that processes the data from the the files that we just moved to create circos-acceptable files. This is really the crux of using circos: figuring out how to get your data into the correct format. 88 | :: 89 | 90 | python parse_data_for_circos.py 91 | 92 | If you are interested-- take a look at the script and the input files to see how these data were manipulated. 93 | 94 | Circos operates off of three main types of files: 1) a config files that dictate the style and inputs to your circos plot, 2) a karyotype file that defines the size and layout of your "chromosomes", and 3) any data files that you call in your config file that detail attributes you want to plot. 95 | 96 | The above script generated our karyotype file and four different data files. What are they? How are they oriented? 97 | 98 | Now, we all that is left is actually running circos. Navigate into the circos-build directory and type ``circos``: 99 | :: 100 | cd circos-build 101 | circos 102 | 103 | This command should generate an ``circos.svg`` and ``circos.png``. Check out the ``circos.png``! 104 | 105 | Now, let's take a look at the file that controls this crazy figure-- ``circos.config``. 106 | 107 | Try changing a few parameters-- colors, radius, size, to see what you can do. Again, if you are into this type of visualization, do check out the extensive `tutorial `__. 108 | 109 | References 110 | =========== 111 | * http://genome.cshlp.org/content/early/2009/06/15/gr.092759.109.abstract 112 | * http://circos.ca/documentation/tutorials 113 | * http://circos.ca/documentation/course/ 114 | -------------------------------------------------------------------------------- /day2-install.rst: -------------------------------------------------------------------------------- 1 | ================================= 2 | Day 2 - installation instructions 3 | ================================= 4 | 5 | (Instructions mostly copied from :doc:`quality`!) 6 | 7 | Use image "Ubuntu 14.04.3" 8 | 9 | Run:: 10 | 11 | sudo apt-get -y update && \ 12 | sudo apt-get -y install trimmomatic fastqc python-pip \ 13 | samtools zlib1g-dev ncurses-dev python-dev 14 | 15 | Install anaconda:: 16 | 17 | curl -O https://repo.continuum.io/archive/Anaconda3-4.2.0-Linux-x86_64.sh 18 | bash Anaconda3-4.2.0-Linux-x86_64.sh 19 | 20 | Then update your environment and install `khmer `__ 21 | and `sourmash `__:: 22 | 23 | source ~/.bashrc 24 | 25 | conda install -n root pip -y 26 | pip install https://github.com/dib-lab/khmer/archive/master.zip 27 | pip install https://github.com/dib-lab/sourmash/archive/2017-ucsc-metagenome.zip 28 | 29 | (See `the sourmash docs for this workshop `__ for some 30 | details on the sourmash install.) 31 | 32 | Running Jupyter Notebook 33 | ------------------------ 34 | 35 | Let's also run a Jupyter Notebook in your home directory. Configure 36 | it a teensy bit more securely, and also have it run in the background. 37 | 38 | Generate a config:: 39 | 40 | jupyter notebook --generate-config 41 | 42 | Add a password, have it not run a browser, and put it on port 8000 43 | by default:: 44 | 45 | cat >> ~/.jupyter/jupyter_notebook_config.py <`__ . 75 | -------------------------------------------------------------------------------- /files/2014-zhang.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/2014-zhang.png -------------------------------------------------------------------------------- /files/OpenDesktop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/OpenDesktop.png -------------------------------------------------------------------------------- /files/VizBin-AddFiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/VizBin-AddFiles.png -------------------------------------------------------------------------------- /files/VizBin-LoadFile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/VizBin-LoadFile.png -------------------------------------------------------------------------------- /files/VizBin-MoreOptions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/VizBin-MoreOptions.png -------------------------------------------------------------------------------- /files/VizBin-OpenDesktop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/VizBin-OpenDesktop.png -------------------------------------------------------------------------------- /files/assembler-mapping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/assembler-mapping.png -------------------------------------------------------------------------------- /files/assembler-runtimes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/assembler-runtimes.png -------------------------------------------------------------------------------- /files/assembly.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/assembly.png -------------------------------------------------------------------------------- /files/calculate-contig-coverage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | @author: halexand 4 | """ 5 | import pandas as pd 6 | import sys 7 | def calc_cov(infile): 8 | df=pd.read_table(infile, header=None, index_col=0) 9 | df.columns=['depth', 'numbase', 'lens', 'fraction'] 10 | df['coverage']=(df.depth * df.numbase )/df.lens 11 | out=df.groupby(level=0).sum().coverage 12 | out=out.drop('genome') 13 | out.to_csv(infile+'.coverage.tab', sep='\t') 14 | 15 | if __name__ == "__main__": 16 | infile=sys.argv[1] 17 | calc_cov(infile) 18 | -------------------------------------------------------------------------------- /files/evaluate_assembly_summary.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/evaluate_assembly_summary.pdf -------------------------------------------------------------------------------- /files/extract-sequences.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python2 2 | import screed 3 | import argparse 4 | 5 | def main(): 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('listfile') 8 | parser.add_argument('ffn_file') 9 | parser.add_argument('outfile') 10 | args = parser.parse_args() 11 | 12 | nameset = set() 13 | for name in open(args.listfile): 14 | name = name.strip() 15 | nameset.add(name) 16 | print 'loaded %d names' % (len(nameset),) 17 | 18 | outfp = open(args.outfile, 'w') 19 | 20 | m = 0 21 | for n, record in enumerate(screed.open(args.ffn_file)): 22 | if n % 1000 == 0: 23 | print '...', n, m 24 | name = record.name.split()[0] 25 | if name in nameset: 26 | # keep the sequence 27 | m += 1 28 | outfp.write('>%s\n%s\n' % (record.name, record.sequence)) 29 | 30 | 31 | 32 | if __name__ == '__main__': 33 | main() 34 | -------------------------------------------------------------------------------- /files/interacting-with-anvio.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/interacting-with-anvio.pdf -------------------------------------------------------------------------------- /files/jupyter_working.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/jupyter_working.png -------------------------------------------------------------------------------- /files/kmer-trimming.graffle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/kmer-trimming.graffle -------------------------------------------------------------------------------- /files/kmer-trimming.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/kmer-trimming.png -------------------------------------------------------------------------------- /files/read-vs-contig-alignment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/files/read-vs-contig-alignment.png -------------------------------------------------------------------------------- /files/sourmash_tetramer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Use sourmash to do tetramer nt frequency analysis\n", 8 | "\n", 9 | "Below, we're going to use [sourmash](http://sourmash.readthedocs.io/en/latest/) to look at the tetramer content (k=4) of our assembled contigs.\n", 10 | "\n", 11 | "## First, construct some signatures by running 'sourmash' at the command line." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 5, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [ 21 | { 22 | "name": "stdout", 23 | "output_type": "stream", 24 | "text": [ 25 | "/home/tx160085\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "cd" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 6, 36 | "metadata": { 37 | "collapsed": false 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | "mkdir: cannot create directory ‘sourmash’: File exists\r\n" 45 | ] 46 | } 47 | ], 48 | "source": [ 49 | "!mkdir sourmash" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 7, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "/home/tx160085/sourmash\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "cd ~/sourmash" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 8, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "ls" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 9, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "\n", 94 | "|| This is the script extract-long-sequences.py in khmer.\n", 95 | "|| You are running khmer version 0+unknown\n", 96 | "|| You are also using screed version 1.0\n", 97 | "||\n", 98 | "|| If you use this script in a publication, please cite EACH of the following:\n", 99 | "||\n", 100 | "|| * MR Crusoe et al., 2015. http://dx.doi.org/10.12688/f1000research.6924.1\n", 101 | "||\n", 102 | "|| Please see http://khmer.readthedocs.io/en/latest/citations.html for details.\n", 103 | "\n", 104 | "wrote to: long-contigs.fa\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "# extract 5kb or longer sequences using khmer's 'extract-long-sequences'\n", 110 | "!extract-long-sequences.py -l 5000 ../mapping/subset_assembly.fa -o long-contigs.fa" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 10, 116 | "metadata": { 117 | "collapsed": false 118 | }, 119 | "outputs": [ 120 | { 121 | "name": "stdout", 122 | "output_type": "stream", 123 | "text": [ 124 | "# running sourmash subcommand: compute\n", 125 | "setting num_hashes to 0 because --scaled is set\n", 126 | "computing signatures for files: long-contigs.fa\n", 127 | "Computing signature for ksizes: [4, 5]\n", 128 | "Computing only DNA (and not protein) signatures.\n", 129 | "Computing a total of 2 signatures.\n", 130 | "Tracking abundance of input k-mers.\n", 131 | "calculated 1014 signatures for 507 sequences in long-contigs.fa\n" 132 | ] 133 | } 134 | ], 135 | "source": [ 136 | "# compute tetramer and pentamer nucleotide frequences for each contig\n", 137 | "# --singleton says \"compute signature for each sequence\"\n", 138 | "# -k 4,5 is ksizes to compute signatures for\n", 139 | "# --track-abundance says \"calculate frequencies of k-mers\"\n", 140 | "# --scaled=1 says \"don't subsample at all, keep all tetramers\"\n", 141 | "\n", 142 | "!sourmash compute --singleton long-contigs.fa -o long-contigs.sig -k 4,5 --track-abundance --scaled=1" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "collapsed": false 150 | }, 151 | "outputs": [ 152 | { 153 | "name": "stdout", 154 | "output_type": "stream", 155 | "text": [ 156 | "# running sourmash subcommand: compare\n", 157 | "loading long-contigs.sig\n", 158 | "...sig loading 506\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "# compare all x all and save to a numpy matrix 'long-contigs.matrix'\n", 164 | "!sourmash compare long-contigs.sig -o long-contigs.matrix -k 4 > /dev/null" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "metadata": {}, 170 | "source": [ 171 | "## Now, plot the comparisons using sourmash's internal API\n", 172 | "\n", 173 | "(This could be done at the command line with `sourmash plot`, too.)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": false 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "%matplotlib inline\n", 185 | "from sourmash_lib import fig\n", 186 | "import pylab" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "collapsed": false 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "# note: you can adjust plots with 'vmin=0.3' or some such in the plot_composite_matrix function.\n", 198 | "matrix, labels = fig.load_matrix_and_labels('long-contigs.matrix')\n", 199 | "_ = fig.plot_composite_matrix(matrix, labels, show_labels=False)\n", 200 | "pylab.savefig('tetramer.png')" 201 | ] 202 | } 203 | ], 204 | "metadata": { 205 | "anaconda-cloud": {}, 206 | "kernelspec": { 207 | "display_name": "Python [conda root]", 208 | "language": "python", 209 | "name": "conda-root-py" 210 | }, 211 | "language_info": { 212 | "codemirror_mode": { 213 | "name": "ipython", 214 | "version": 3 215 | }, 216 | "file_extension": ".py", 217 | "mimetype": "text/x-python", 218 | "name": "python", 219 | "nbconvert_exporter": "python", 220 | "pygments_lexer": "ipython3", 221 | "version": "3.5.2" 222 | } 223 | }, 224 | "nbformat": 4, 225 | "nbformat_minor": 1 226 | } 227 | -------------------------------------------------------------------------------- /gather-counts.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | """ 3 | This script gathers & converts Salmon output counts into something that 4 | edgeR can read ("counts files"). 5 | 6 | Run it in a directory above all of your Salmon output directories, and 7 | it will create a bunch of '.counts' files that you can load into R. 8 | 9 | See https://github.com/ngs-docs/2015-nov-adv-rna/ for background info. 10 | 11 | C. Titus Brown, 11/2015 12 | """ 13 | import os, os.path 14 | import sys 15 | import csv 16 | 17 | def process_quant_file(root, filename, outname): 18 | """ 19 | Convert individual quant.sf files into .counts files (transcripts\tcount). 20 | """ 21 | print >>sys.stderr, 'Loading counts from:', root, filename 22 | outfp = open(outname, 'w') 23 | print >>outfp, "transcript\tcount" 24 | 25 | d = {} 26 | full_file = os.path.join(root, filename) 27 | for line in open(full_file): 28 | if line.startswith('Name'): 29 | continue 30 | name, length, eff_length, tpm, count = line.strip().split('\t') 31 | 32 | print >>outfp, "%s\t%s" % (name, float(tpm)) 33 | 34 | 35 | def main(): 36 | """ 37 | Find all the quant.sf files, convert them into properly named .counts 38 | files. 39 | 40 | Here, "proper name" means "directory.counts". 41 | """ 42 | quantlist = [] 43 | 44 | start_dir = '.' 45 | print >>sys.stderr, 'Starting in:', os.path.abspath(start_dir) 46 | for root, dirs, files in os.walk('.'): 47 | for filename in files: 48 | if filename.endswith('quant.sf'): 49 | dirname = os.path.basename(root) 50 | outname = dirname + '.counts' 51 | process_quant_file(root, filename, dirname + '.counts') 52 | quantlist.append(outname) 53 | 54 | break 55 | 56 | print ",\n".join([ "\"%s\"" % i for i in sorted(quantlist)]) 57 | 58 | if __name__ == '__main__': 59 | main() 60 | -------------------------------------------------------------------------------- /img/007476.full.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/007476.full.pdf -------------------------------------------------------------------------------- /img/2014-5-metagenomics-workshop.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/2014-5-metagenomics-workshop.pdf -------------------------------------------------------------------------------- /img/DC1_logo_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/DC1_logo_small.png -------------------------------------------------------------------------------- /img/DataONE_LOGO.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/DataONE_LOGO.jpg -------------------------------------------------------------------------------- /img/Slide1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/Slide1.jpg -------------------------------------------------------------------------------- /img/bootcamps/2012-11-scripps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/bootcamps/2012-11-scripps.png -------------------------------------------------------------------------------- /img/bootcamps/2012-12-uta.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/bootcamps/2012-12-uta.png -------------------------------------------------------------------------------- /img/bootcamps/2013-01-mcgill.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/bootcamps/2013-01-mcgill.png -------------------------------------------------------------------------------- /img/bootcamps/2013-01-mckellar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/bootcamps/2013-01-mckellar.png -------------------------------------------------------------------------------- /img/creative-commons-attribution-license.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/creative-commons-attribution-license.png -------------------------------------------------------------------------------- /img/csv-mistake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/csv-mistake.png -------------------------------------------------------------------------------- /img/excel-to-csv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/excel-to-csv.png -------------------------------------------------------------------------------- /img/excel_tables_example_sk_e1_p1_wrl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/excel_tables_example_sk_e1_p1_wrl.png -------------------------------------------------------------------------------- /img/excel_tables_example_sk_e2_p1_wrl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/excel_tables_example_sk_e2_p1_wrl.png -------------------------------------------------------------------------------- /img/excel_tables_example_sk_e2_p2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/excel_tables_example_sk_e2_p2.png -------------------------------------------------------------------------------- /img/gvng.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/gvng.jpg -------------------------------------------------------------------------------- /img/ip-address.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/ip-address.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/absolute_path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/absolute_path.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/absolute_relative_path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/absolute_relative_path.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/decwriter.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/decwriter.jpg -------------------------------------------------------------------------------- /img/lessons/swc-shell/direct_shell_usage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/direct_shell_usage.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/filedir_challenge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/filedir_challenge.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/filesystem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/filesystem.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/find_file_tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/find_file_tree.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/google_vs_grep.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/google_vs_grep.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/home_directories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/home_directories.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/nano.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/nano.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/nano_quotation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/nano_quotation.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/permissions_table.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/permissions_table.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/process_stdin_stdout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/process_stdin_stdout.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/public_private_keys.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/public_private_keys.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/relative_path.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/relative_path.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/remote_shell_usage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/remote_shell_usage.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/running_a_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/running_a_process.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/running_wc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/running_wc.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/running_wc_sort.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/running_wc_sort.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/running_wc_sort_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/running_wc_sort_head.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/shell_as_process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/shell_as_process.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/shell_on_shell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/shell_on_shell.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/vlad_homedir.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/vlad_homedir.png -------------------------------------------------------------------------------- /img/lessons/swc-shell/x_for_directories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/lessons/swc-shell/x_for_directories.png -------------------------------------------------------------------------------- /img/mozilla-science-lab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/mozilla-science-lab.png -------------------------------------------------------------------------------- /img/nano1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/nano1.png -------------------------------------------------------------------------------- /img/nano2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/nano2.png -------------------------------------------------------------------------------- /img/osi-approved-license.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/osi-approved-license.png -------------------------------------------------------------------------------- /img/readme/step1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/readme/step1.png -------------------------------------------------------------------------------- /img/readme/step2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/readme/step2.png -------------------------------------------------------------------------------- /img/readme/step3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/readme/step3.png -------------------------------------------------------------------------------- /img/rss-icon-blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/rss-icon-blue.png -------------------------------------------------------------------------------- /img/setup/cygwin-icon.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/setup/cygwin-icon.jpg -------------------------------------------------------------------------------- /img/setup/cygwin-terminal-300x175.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/setup/cygwin-terminal-300x175.jpg -------------------------------------------------------------------------------- /img/setup/gnome-terminal-300x195.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/setup/gnome-terminal-300x195.jpg -------------------------------------------------------------------------------- /img/setup/mac-terminal-300x257.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/setup/mac-terminal-300x257.jpg -------------------------------------------------------------------------------- /img/setup/ubuntu-terminal-300x197.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/setup/ubuntu-terminal-300x197.jpg -------------------------------------------------------------------------------- /img/site/main_shadow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/site/main_shadow.png -------------------------------------------------------------------------------- /img/slides/enrolment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/slides/enrolment.png -------------------------------------------------------------------------------- /img/slides/workshops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/slides/workshops.png -------------------------------------------------------------------------------- /img/software-carpentry-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/img/software-carpentry-banner.png -------------------------------------------------------------------------------- /index.md: -------------------------------------------------------------------------------- 1 | Environmental Metagenomics Workshop 2 | =================================== 3 | 4 | 26 - 29 September 2017 at CICESE 5 | -------------------------------- 6 | *Instructors: Harriet Alexander, Phil Brooks, and C. Titus Brown* 7 | 8 | These are the online materials for the environmental metagenomics workshop run at [CICESE](http://www.cicese.edu.mx/). 9 | 10 | We will be using HackMD to take collective notes throughout the course. If you wish to add to or follow along the [HackMD can be found here](https://hackmd.io/BwdgnArApgJgDANgLRQcAjEgLFARhJXGSJAJgGYYQFT0oAzYWoA=). 11 | 12 | ### Schedule 13 | #### Tuesday, Day 1: 14 | ##### Morning 15 | * [Welcome!](welcome.html) 16 | * [Logging onto an instance through Amazon Web Services (AWS)](aws-ssh/index.html) 17 | * [Working on the command line](command-line.html) 18 | 19 | ##### Lunch 20 | 21 | ##### Afternoon 22 | * [Command line blast](running-command-line-blast.html) 23 | 24 | ##### Homework 25 | * Read the [Critical Assessment of Metagenome Interpretation (CAMI) Paper](http://www.biorxiv.org/content/biorxiv/early/2017/01/09/099127.full.pdf); 26 | * Read [Hu et al. 2016](http://mbio.asm.org/content/7/1/e01669-15.abstract). This is the paper from which we pulled all of our sample data. 27 | 28 | #### Wednesday, Day 2: 29 | ##### Morning 30 | * Metagenomic workflows and considerations 31 | * [Evaluating short-read quality](quality.html) 32 | * [Assembling your short read data set with MEGAHIT](assemble.html) 33 | * [A brief discussion of workflows & repeatability](workflows.html) 34 | 35 | ##### Lunch 36 | 37 | ##### Afternoon 38 | * [Evaluating your assemblies](assembly-evaluation.html) 39 | 40 | 41 | #### Thursday, Day 3: 42 | ##### Morning 43 | * [Mapping short reads to the assembly](mapping.html) *time permitting* 44 | * [Sourmash compare](sourmash_compare.html) 45 | * Discussion of CAMI paper 46 | 47 | ##### Lunch 48 | 49 | ##### Afternoon 50 | * [Annotating your assembly with Prokka](prokka_tutorial.html) 51 | * [Quantifying abundance across samples with Salmon](salmon_tutorial.html) 52 | 53 | 54 | #### Friday, Day 4: 55 | ##### Morning 56 | * [Taxonomic classification with sourmash gather](sourmash_gather.html) 57 | * [Binning genomes out of your metagenome](binning.html) 58 | 59 | ##### Lunch 60 | 61 | ##### Afternoon 62 | * [Putting it all together with Anvi'o](anvio.html) 63 | 64 | 65 | 66 | Resources: 67 | 68 | [SEQ Answers](http://seqanswers.com/) 69 | 70 | [Biostars](https://www.biostars.org/) 71 | 72 | [Data Carpentry](http://www.datacarpentry.org/) 73 | 74 | [DIB Summer Institute](http://ivory.idyll.org/dibsi/) 75 | 76 | ### Undone 77 | 78 | See [the complete table of contents](toc.html) 79 | 80 | ### Technical information 81 | 82 | The github repository for this workshop is public at 83 | https://github.com/ngs-docs/2017-ucsc-metagenomics 84 | -------------------------------------------------------------------------------- /jetstream/boot.md: -------------------------------------------------------------------------------- 1 | # Booting a Jetstream Computer Instance for your use! 2 | 3 | What we're going to do here is walk through starting up an running 4 | computer (an "instance") on the Jetstream service. 5 | 6 | Below, we've provided screenshots of the whole process. You can click 7 | on them to zoom in a bit. The important areas to fill in are circled 8 | in red. 9 | 10 | Some of the details may vary -- for example, if you have your own XSEDE 11 | account, you may want to log in with that -- and the name of the operating 12 | system or "image" may also vary from "Ubuntu 16.04" depending on the 13 | workshop. 14 | 15 | ----- 16 | 17 | First, go to the Jetstream application at [https://use.jetstream-cloud.org/application](https://use.jetstream-cloud.org/application). 18 | 19 | Now: 20 | 21 | ## Request to log in to the Jetstream Portal 22 | 23 | Click the login link in the upper right. 24 | 25 | [![login](images/login-1.thumb.png)](../_images/login-1.png) 26 | 27 | ## Use "XSEDE" 28 | 29 | Choose "XSEDE" as your account provider (it should be the default) and click 30 | on "Continue". 31 | 32 | [![foo](images/login-2.thumb.png)](../_images/login-2.png) 33 | 34 | ## Fill in the username and password and click "Sign in" 35 | 36 | Fill in the username, which is 'tx160085' for the ANGUS workshop, 37 | and then enter the password (which we will tell you in class). 38 | 39 | [![foo](images/login-3.thumb.png)](../_images/login-3.png) 40 | 41 | ## Select Projects and "Create New Project" 42 | 43 | Now, this is something you only need to once if you have your own 44 | account - but if you're using a shared account like tx160085, you will 45 | need a way to keep your computers separate from everyone else's. 46 | 47 | We'll do this with Projects, which give you a bit of a workspace in which 48 | to keep things that belong to "you". 49 | 50 | Click on "Projects" up along the top. 51 | 52 | [![foo](images/login-5.thumb.png)](../_images/login-5.png) 53 | 54 | ## Name the project for yourself, click "create" 55 | 56 | Enter your name into the Project Name, and something simple like "ANGUS" 57 | into the description. Then click 'create'. 58 | 59 | [![foo](images/login-6.thumb.png)](../_images/login-6.png) 60 | 61 | ## Select the newly created project 62 | 63 | Click on your new project! 64 | 65 | [![foo](images/login-7.thumb.png)](../_images/login-7.png) 66 | 67 | ## Within the project, select "new" 68 | 69 | Now, select 'New' and then "Instance" from the dropdown menu to start up a new machine. 70 | 71 | [![foo](images/login-8.thumb.png)](../_images/login-8.png) 72 | 73 | ## Find the "Ubuntu 16.04" image, click on it 74 | 75 | Enter "Ubuntu 16.04" into the search bar - make sure it's from 76 | June 21st, 2017. 77 | 78 | [![foo](images/login-9.thumb.png)](../_images/login-9.png) 79 | 80 | ## Name it something simple and select 'm1.medium' 81 | 82 | Change the name after what we're doing - "workshop tutorial", for example, 83 | but it doesn't matter -- and select 'm1.medium'. 84 | 85 | [![foo](images/login-10.thumb.png)](../_images/login-10.png) 86 | 87 | ## Wait for it to become active 88 | 89 | It will now be booting up! This will take 2-10 minutes, depending. 90 | Just wait! Don't reload or anything. 91 | 92 | [![foo](images/login-11.thumb.png)](../_images/login-11.png) 93 | 94 | ## Click on your new instance to get more information! 95 | 96 | Now, you can either click "Open Web Shell", *or*, if you know how to use secure shell (ssh), 97 | you can ssh in as user 'tx160085' on the IP address of the machine - see 98 | circled information below. Click [Here](https://angus.readthedocs.io/en/2017/jetstream/login.html#) to access the tutorial for setting up a ssh connection from your local computer to your Jetstream instance. 99 | 100 | [![foo](images/login-12.thumb.png)](../_images/login-12.png) 101 | 102 | ## Miscellany 103 | 104 | There's a possibility that you'll be confronted with this when you log in to jetstream: 105 | 106 | [![foo](images/possible_instance_problem.thumb.png)](../_images/possible_instance_problem.png) 107 | 108 | A refresh of the page should get you past it. Please try not to actually move any instances to 109 | a new project; it's probably someone else's and it could confuse them :) 110 | 111 | ## Suspend your instance 112 | 113 | You can save your workspace so you can return to your instance at a later time without losing any of your files or information stored in memory, similiar to putting your physical computer to sleep. At the Instance Details screen, select the "Suspend" button. 114 | 115 | [![foo](images/suspend-1.png)](../_images/suspend-1.png) 116 | 117 | This will open up a dialogue window. Select the "Yes, suspend this instance" button. 118 | 119 | [![foo](images/suspend-2.png)](../_images/suspend-2.png) 120 | 121 | It may take Jetstream a few minutes to process, so wait until the progress bar says "Suspended." 122 | 123 | ### Resuming your instance 124 | 125 | To *wake-up* your instance, select the "Resume" button. 126 | 127 | [![foo](images/resume-1.png)](../_images/resume-1.png) 128 | 129 | This will open up a dialogue window. Select the "Yes, resume this instance" button. 130 | 131 | [![foo](images/resume-2.png)](../_images/resume-2.png) 132 | 133 | It may take Jetstream a few minutes to process, so wait until the progress bar says "Active." 134 | 135 | [![foo](images/resume-3.png)](../_images/resume-3.png) 136 | 137 | ## Shutting down your instance 138 | 139 | You can shut down your workspace so you can return to your instance another day without losing any of your files, similiar to shutting down your physical computer. You will retain your files, but you will lose any information stored in memory, such as your history on the command line. At the Instance Details screen, select the "Stop" button. 140 | 141 | [![foo](images/stop-1.png)](../_images/stop-1.png) 142 | 143 | This will open up a dialogue window. Select the "Yes, stop this instance" button. 144 | 145 | [![foo](images/stop-2.png)](../_images/stop-2.png) 146 | 147 | It may take Jetstream a few minutes to process, so wait until the progress bar says "Shutoff." 148 | 149 | [![foo](images/stop-3.png)](../_images/stop-3.png) 150 | 151 | [![foo](images/stop-4.png)](../_images/stop-4.png) 152 | 153 | ### Restarting your instance 154 | 155 | To start your instance again, select the "Start" button. 156 | 157 | [![foo](images/start-1.png)](../_images/start-1.png) 158 | 159 | This will open up a dialogue window. Select the "Yes, start this instance" button. 160 | 161 | [![foo](images/start-2.png)](../_images/start-2.png) 162 | 163 | It may take Jetstream a few minutes to process, so wait until the progress bar says "Active." 164 | 165 | [![foo](images/start-3.png)](../_images/start-3.png) 166 | 167 | ## Deleting your instance 168 | 169 | To completely remove your instance, you can select the "delete" buttom from the instance details page. 170 | 171 | [![foo](images/delete-1.png)](../_images/delete-1.png) 172 | 173 | This will open up a dialogue window. Select the "Yes, delete this instance" button. 174 | 175 | [![foo](images/delete-2.png)](../_images/delete-2.png) 176 | 177 | It may take Jetstream a few minutes to process your request. The instance should disappear from the project when it has been successfully deleted. 178 | 179 | [![foo](images/delete-3.png)](../_images/delete-3.png) 180 | 181 | [![foo](images/delete-4.png)](../_images/delete-4.png) 182 | -------------------------------------------------------------------------------- /jetstream/images/delete-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/delete-1.png -------------------------------------------------------------------------------- /jetstream/images/delete-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/delete-2.png -------------------------------------------------------------------------------- /jetstream/images/delete-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/delete-3.png -------------------------------------------------------------------------------- /jetstream/images/delete-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/delete-4.png -------------------------------------------------------------------------------- /jetstream/images/ec2-moba-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/ec2-moba-2.png -------------------------------------------------------------------------------- /jetstream/images/ec2-moba-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/ec2-moba-3.png -------------------------------------------------------------------------------- /jetstream/images/ec2-moba-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/ec2-moba-4.png -------------------------------------------------------------------------------- /jetstream/images/ip-address.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/ip-address.png -------------------------------------------------------------------------------- /jetstream/images/jetstream_login.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/jetstream_login.png -------------------------------------------------------------------------------- /jetstream/images/login-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-1.png -------------------------------------------------------------------------------- /jetstream/images/login-1.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-1.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-10.png -------------------------------------------------------------------------------- /jetstream/images/login-10.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-10.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-11.png -------------------------------------------------------------------------------- /jetstream/images/login-11.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-11.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-12.png -------------------------------------------------------------------------------- /jetstream/images/login-12.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-12.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-2.png -------------------------------------------------------------------------------- /jetstream/images/login-2.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-2.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-3.png -------------------------------------------------------------------------------- /jetstream/images/login-3.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-3.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-4.png -------------------------------------------------------------------------------- /jetstream/images/login-4.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-4.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-5.png -------------------------------------------------------------------------------- /jetstream/images/login-5.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-5.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-6.png -------------------------------------------------------------------------------- /jetstream/images/login-6.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-6.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-7.png -------------------------------------------------------------------------------- /jetstream/images/login-7.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-7.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-8.png -------------------------------------------------------------------------------- /jetstream/images/login-8.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-8.thumb.png -------------------------------------------------------------------------------- /jetstream/images/login-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-9.png -------------------------------------------------------------------------------- /jetstream/images/login-9.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/login-9.thumb.png -------------------------------------------------------------------------------- /jetstream/images/moba-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/moba-1.png -------------------------------------------------------------------------------- /jetstream/images/password-change.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/password-change.png -------------------------------------------------------------------------------- /jetstream/images/possible_instance_problem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/possible_instance_problem.png -------------------------------------------------------------------------------- /jetstream/images/possible_instance_problem.thumb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/possible_instance_problem.thumb.png -------------------------------------------------------------------------------- /jetstream/images/resume-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/resume-1.png -------------------------------------------------------------------------------- /jetstream/images/resume-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/resume-2.png -------------------------------------------------------------------------------- /jetstream/images/resume-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/resume-3.png -------------------------------------------------------------------------------- /jetstream/images/start-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/start-1.png -------------------------------------------------------------------------------- /jetstream/images/start-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/start-2.png -------------------------------------------------------------------------------- /jetstream/images/start-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/start-3.png -------------------------------------------------------------------------------- /jetstream/images/stop-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/stop-1.png -------------------------------------------------------------------------------- /jetstream/images/stop-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/stop-2.png -------------------------------------------------------------------------------- /jetstream/images/stop-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/stop-3.png -------------------------------------------------------------------------------- /jetstream/images/stop-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/stop-4.png -------------------------------------------------------------------------------- /jetstream/images/suspend-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/suspend-1.png -------------------------------------------------------------------------------- /jetstream/images/suspend-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/jetstream/images/suspend-2.png -------------------------------------------------------------------------------- /jetstream/login.md: -------------------------------------------------------------------------------- 1 | # Logging in to jetstream from your local terminal with a key file 2 | 3 | Some of us have had problems with the web shell and getting into the Jetstream 4 | portal. These materials will show you how to log in using an SSH key through your 5 | local terminal. 6 | 7 | ## Concerning Keys 8 | 9 | Cryptographic keys are a convenient and secure way to authenticate without having to use 10 | passwords. They consist of a pair of files called the public and private keys: the public part can 11 | be shared with whoever you'd like to authenticate with (in our case, Jetstream!), and the private 12 | part is kept "secret" on your machine. Things that are encrypted with the public key can be be 13 | decrypted with the private key, but it is computationally intractable (ie, it would take on the 14 | order of thousands of years) to determine a private key from a public key. You can read more about 15 | it [here](https://en.wikipedia.org/wiki/Public-key_cryptography). 16 | 17 | The good news is that there is already a registered public key for our Jetstream account. However, 18 | to make use of it, you'll need the private key. And so, we move on! 19 | 20 | ## Getting the Private Key 21 | 22 | The private key has been posted on slack in the `#general` channel. You can download it by 23 | visiting [here](https://dibsi.slack.com/files/camillescott/F60FMTCCA/angus_private_key), selecting 24 | **Actions**, and pressing download. 25 | 26 | ## Getting your instance IP address 27 | 28 | In order to connect to your instance, we need to know its IP address, its unique identifier on the 29 | internet. This is listed in your instance details, circled below: 30 | 31 | [![stuff](images/ip-address.png)](../_images/ip-address.png) 32 | 33 | Now, things diverge a little. 34 | 35 | ## On MacOS/Linux 36 | 37 | These systems have their own terminal by default. Find and open your terminal: on MacOS, you can 38 | search for Terminal in finder. 39 | 40 | We're going to assume that the key file ended up in your `Downloads` folder. In your terminal, 41 | run: 42 | 43 | ```bash 44 | cd && mv ~/Downloads/angus_private_key . 45 | ``` 46 | 47 | This puts the file in your home folder. Now, we need to set its permissions more strictly: 48 | 49 | ```bash 50 | chmod 600 angus_private_key 51 | ``` 52 | 53 | Finally, we can use the IP address from before, along with the common login name and the key, to log 54 | in: 55 | 56 | ```bash 57 | ssh -i angus_private_key tx160085@YOUR_IP_ADDRESS 58 | ``` 59 | 60 | You should now have access to atmosphere within your local terminal. 61 | 62 | ## On Windows 63 | 64 | For Windows, we first need to actually *install* a terminal. 65 | 66 | ### Install mobaxterm 67 | 68 | First, download [mobaxterm home edition (portable)](http://mobaxterm.mobatek.net/download-home-edition.html) 69 | and run it. 70 | 71 | ### Start a new session 72 | 73 | [![foo](images/moba-1.png)](../_images/moba-1.png) 74 | 75 | ### Fill in session settings 76 | 77 | Fill in your "remote host," which will be the IP address from earlier. Then select 78 | "specify username" and enter `tx160085`. 79 | 80 | [![foo](images/ec2-moba-2.png)](../_images/ec2-moba-2.png) 81 | 82 | ### Specify the session key 83 | 84 | Copy the downloaded private file onto your primary hard disk (generally 85 | C:) and the put in the full path to it. 86 | 87 | [![foo](images/ec2-moba-3.png)](../_images/ec2-moba-3.png) 88 | 89 | ### Click OK 90 | 91 | Victory! 92 | 93 | [![foo](images/ec2-moba-4.png)](../_images/ec2-moba-4.png) 94 | -------------------------------------------------------------------------------- /jetstream/ssh_changepassword.md: -------------------------------------------------------------------------------- 1 | # Adding password to a Jetstream instance 2 | 3 | To add a password to your Jetstream instance, 'Open the Web Shell' from the instance page. 4 | 5 | ``` 6 | sudo password tx160085 7 | ``` 8 | Enter a password when prompted. The letters will not display when you type, so do not be alarmed. 9 | 10 | ![](images/password-change.png) 11 | 12 | Exit out of the Web Shell. 13 | 14 | Open your terminal and login: 15 | 16 | ``` 17 | ssh tx160085@149.165.157.62 18 | ``` 19 | 20 | Type your password when prompted. 21 | 22 | ![](images/jetstream_login.png) 23 | -------------------------------------------------------------------------------- /kmer_trimming.rst: -------------------------------------------------------------------------------- 1 | ============================= 2 | K-mer Spectral Error Trimming 3 | ============================= 4 | 5 | (Optional) 6 | 7 | khmer documentation: http://khmer.readthedocs.io/en/latest 8 | 9 | If you plot a k-mer abundance histogram of the samples, you'll 10 | notice something: there's an awful lot of unique (abundance=1) k-mers. 11 | These are erroneous k-mers caused by sequencing errors. 12 | 13 | In a new Python3 Jupyter Notebook, run:: 14 | 15 | cd ~/work 16 | 17 | and then :: 18 | 19 | !abundance-dist-single.py -M 1e9 -k 21 SRR1976948_1.fastq.gz SRR1976948_1.fastq.gz.dist 20 | 21 | and in another cell:: 22 | 23 | %matplotlib inline 24 | import numpy 25 | from pylab import * 26 | dist1 = numpy.loadtxt('SRR1976948_1.fastq.gz.dist', skiprows=1, delimiter=',') 27 | plot(dist1[:,0], dist1[:,1]) 28 | axis(xmax=50) 29 | 30 | Many of these errors remain even after you do the Trimmomatic run; you can 31 | see this with:: 32 | 33 | !abundance-dist-single.py -M 1e9 -k 21 SRR1976948_1.qc.fq.gz SRR1976948_1.qc.fq.gz.dist 34 | 35 | and then plot:: 36 | 37 | dist2 = numpy.loadtxt('SRR1976948_1.qc.fq.gz.dist', skiprows=1, delimiter=',') 38 | plot(dist1[:,0], dist1[:,1], label='untrimmed') 39 | plot(dist2[:,0], dist2[:,1], label='trimmed') 40 | legend(loc='upper right') 41 | axis(xmax=50) 42 | 43 | This is for 44 | two reasons: 45 | 46 | First, Trimmomatic trims based solely on the quality score, which is 47 | a statistical statement about the correctness of a base - a Q score 48 | of 30 means that, of 1000 bases with that Q score, 1 of those 49 | bases will be wrong. So, a base can have a high Q score and still 50 | be wrong! (and **many** bases will have a low Q score and still be 51 | correct) 52 | 53 | Second, we trimmed **very** lightly - only bases that had a very low 54 | quality were removed. This was intentional because with assembly, 55 | you want to retain as much coverage as possible, and the assembler 56 | will generally figure out what the "correct" base is from the coverage. 57 | 58 | An alternative to trimming based on the quality scores is to trim based on 59 | k-mer abundance - this is known as k-mer spectral error trimming. K-mer 60 | spectral error trimming *always* beats quality score trimming in terms 61 | of eliminating errors; e.g. look at this table from `Zhang et al., 2014 `__: 62 | 63 | .. thumbnail:: files/2014-zhang.png 64 | :width: 40% 65 | 66 | The basic logic is this: if you see low abundance k-mers in a high 67 | coverage data set, those k-mers are almost certainly the result of 68 | errors. (Caveat: strain variation could also create them.) 69 | 70 | In metagenomic data sets we do have the problem that we may have very 71 | low and very high coverage data. So we don't necessarily want to get 72 | rid of all low-abundance k-mers, because they may represent truly low 73 | abundance (but useful) data. 74 | 75 | As part of the khmer project in my lab, we have developed an approach 76 | that sorts reads into high abundance and low abundance reads, and only 77 | error trims the high abundance reads. 78 | 79 | .. thumbnail:: files/kmer-trimming.png 80 | :width: 40% 81 | 82 | This does mean that many errors may get left in the data set, because we 83 | have no way of figuring out if they are errors or simply low coverage, 84 | but that's OK (and you can always trim them off if you really care). 85 | 86 | .. Error profile@@ 87 | 88 | To run such error trimming, use the command ``trim-low-abund.py`` 89 | (at the command line, or prefix with a '!' in the notebook):: 90 | 91 | interleave-reads.py SRR1976948_1.qc.fq.gz SRR1976948_2.qc.fq.gz | 92 | trim-low-abund.py -V -M 8e9 -C 3 -Z 10 - -o SRR1976948.trim.fq 93 | 94 | Why (or why not) do k-mer trimming? 95 | ----------------------------------- 96 | 97 | If you can assemble your data set without k-mer trimming, there's no 98 | reason to do it. The reason we're error trimming here is to speed up 99 | the assembler (by removing data) and to decrease the memory requirements 100 | of the assembler (by removing a number of k-mers). 101 | 102 | To see how many k-mers we removed, you can examine the distribution as above, 103 | or use the ``unique-kmers.py`` script:: 104 | 105 | unique-kmers.py SRR1976948_1.qc.fq.gz SRR1976948_2.qc.fq.gz 106 | unique-kmers.py SRR1976948.trim.fq 107 | 108 | 109 | ---- 110 | 111 | Next: :doc:`assemble` 112 | 113 | -------------------------------------------------------------------------------- /mapping.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Mapping 3 | ======= 4 | 5 | Download bwa:: 6 | 7 | cd 8 | sudo apt-get install bwa samtools 9 | 10 | Downloading data 11 | ----------------- 12 | 13 | Now, go to a new directory and grab the data:: 14 | 15 | mkdir ~/mapping 16 | cd ~/mapping 17 | 18 | curl -O https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1976948.abundtrim.subset.pe.fq.gz 19 | curl -O https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1977249.abundtrim.subset.pe.fq.gz 20 | 21 | And extract the files:: 22 | 23 | for file in *fq.gz 24 | do 25 | gunzip $file 26 | done 27 | 28 | We will also need the assembly; rather than rebuilding it, you can download 29 | a copy that we saved for you:: 30 | 31 | curl -O https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/subset_assembly.fa.gz 32 | gunzip subset_assembly.fa 33 | 34 | Mapping the reads 35 | ----------------- 36 | 37 | First, we will need to to index the megahit assembly:: 38 | 39 | bwa index subset_assembly.fa 40 | 41 | to The reads are in paired-end/interleaved format, so you'll need to add the -p flag to indicate to bwa that these are paired end data:: 42 | 43 | Map the reads:: 44 | 45 | for i in *fq 46 | do 47 | bwa mem -p subset_assembly.fa $i > ${i}.aln.sam 48 | done 49 | 50 | Converting to BAM to visualize 51 | ------------------------------ 52 | 53 | First, index the assembly for samtools:: 54 | 55 | samtools faidx subset_assembly.fa 56 | 57 | Then, convert both SAM files to BAM files:: 58 | 59 | for i in *.sam 60 | do 61 | samtools import subset_assembly.fa $i $i.bam 62 | samtools sort $i.bam -o $i.bam.sorted.bam 63 | samtools index $i.bam.sorted.bam 64 | done 65 | 66 | Visualizing the read mapping 67 | ---------------------------- 68 | 69 | Find a contig name to visualize:: 70 | 71 | grep -v ^@ SRR1976948.abundtrim.subset.pe.fq.aln.sam | cut -f 3 | sort | uniq -c | sort -n | tail 72 | 73 | Pick one e.g. k99_13588. 74 | 75 | Now execute:: 76 | 77 | samtools tview SRR1976948.abundtrim.subset.pe.fq.aln.sam.bam.sorted.bam subset_assembly.fa -p k99_13588:400 78 | 79 | (use arrow keys to scroll, 'q' to quit; a key for what you are looking at: `pileup format`__.) 80 | 81 | Look at it in both mappings:: 82 | 83 | samtools tview SRR1977249.abundtrim.subset.pe.fq.aln.sam.bam.sorted.bam subset_assembly.fa -p k99_13588:400 84 | 85 | Why is the mapping so good?? 86 | 87 | ---- 88 | 89 | We can now use the mapped data to estimate mean coverage of contigs in our assembly. 90 | 91 | To do this we will be using `bedtools `__. to estimate coverage. 92 | 93 | First, install bedtools:: 94 | 95 | sudo apt-get install bedtools 96 | 97 | Now, use the genomeCoverageBed to quantify coverage from the bam files:: 98 | 99 | for i in *sorted.bam 100 | do 101 | genomeCoverageBed -ibam $i > ${i/.pe*/}.histogram.tab 102 | done 103 | 104 | Take a look at the output. 105 | 106 | 1. Contig name 107 | 2. Depth of coverage 108 | 3. Number of bases on contig depth equal to column 2 109 | 4. Size of contig (or entire genome) in base pairs 110 | 5. Fraction of bases on contig (or entire genome) with depth equal to column 2 111 | 112 | To get an esimate of mean coverage for a contig we sum (Depth of coverage) * (Number of bases on contig) / (Length of the contig). We have a quick script that will do this calculation. 113 | 114 | Download it:: 115 | 116 | 117 | wget https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/master/files/calculate-contig-coverage.py 118 | 119 | Install pandas:: 120 | 121 | sudo pip install pandas 122 | 123 | 124 | And then run it!:: 125 | 126 | 127 | for hist in *histogram.tab 128 | do 129 | python calculate-contig-coverage.py $hist 130 | done 131 | 132 | This will produce a new set of files that have the coverage information. 133 | 134 | --- 135 | 136 | Optional 137 | 138 | As a comparison, let's look at some untrimmed data. 139 | 140 | Grab untrimmed data:: 141 | 142 | curl -O https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1976948_1.fastq.gz 143 | curl -O https://s3-us-west-1.amazonaws.com/dib-training.ucdavis.edu/metagenomics-scripps-2016-10-12/SRR1976948_2.fastq.gz 144 | 145 | Now align this untrimmed data:: 146 | 147 | gunzip -c SRR1976948_1.fastq.gz | head -800000 > SRR1976948.1 148 | gunzip -c SRR1976948_2.fastq.gz | head -800000 > SRR1976948.2 149 | 150 | bwa aln subset_assembly.fa SRR1976948.1 > SRR1976948_1.untrimmed.sai 151 | bwa aln subset_assembly.fa SRR1976948.2 > SRR1976948_2.untrimmed.sai 152 | 153 | bwa sampe subset_assembly.fa SRR1976948_1.untrimmed.sai SRR1976948_2.untrimmed.sai SRR1976948.1 SRR1976948.2 > SRR1976948.untrimmed.sam 154 | 155 | i=SRR1976948.untrimmed.sam 156 | samtools import subset_assembly.fa $i $i.bam 157 | samtools sort $i.bam -o $i.bam.sorted.bam 158 | samtools index $i.bam.sorted.bam 159 | 160 | And now look:: 161 | 162 | samtools tview SRR1976948.untrimmed.sam.bam.sorted.bam subset_assembly.fa -p k99_13588:500 163 | 164 | You can also use 'Tablet' to view the downloaded BAM file - see `the Tablet paper `__. 165 | 166 | How is this different from the trimmed data? Look at a few different contigs. 167 | -------------------------------------------------------------------------------- /prokka_tutorial.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | Annotation with Prokka 3 | ====================== 4 | 5 | Prokka is a tool that facilitates the fast annotation of prokaryotic genomes. 6 | 7 | The goals of this tutorial are to: 8 | 9 | * Install Prokka 10 | * Use Prokka to annotate our genomes 11 | 12 | Installing Prokka 13 | ================= 14 | 15 | Download and extract the latest version of prokka: 16 | :: 17 | 18 | cd ~/ 19 | git clone https://github.com/tseemann/prokka.git 20 | 21 | 22 | We also will need some dependencies such as bioperl: 23 | :: 24 | 25 | sudo apt-get -y install bioperl libdatetime-perl libxml-simple-perl libdigest-md5-perl 26 | 27 | This may take a little while. 28 | 29 | and we need an XML package from perl 30 | :: 31 | 32 | sudo bash 33 | export PERL_MM_USE_DEFAULT=1 34 | export PERL_EXTUTILS_AUTOINSTALL="--defaultdeps" 35 | perl -MCPAN -e 'install "XML::Simple"' 36 | exit 37 | 38 | Now, you should be able to add Prokka to your ``$PATH`` and set up the index for the sequence database: 39 | :: 40 | 41 | export PATH=$PATH:$HOME/prokka/bin 42 | prokka --setupdb 43 | 44 | To make sure the database loaded directly:: 45 | 46 | prokka --listdb 47 | 48 | You should see something like:: 49 | 50 | tx160085@js-157-212:~$ prokka --listdb 51 | [17:04:15] Looking for databases in: /home/tx160085/prokka/bin/../db 52 | [17:04:15] * Kingdoms: Archaea Bacteria Mitochondria Viruses 53 | [17:04:15] * Genera: Enterococcus Escherichia Staphylococcus 54 | [17:04:15] * HMMs: HAMAP 55 | [17:04:15] * CMs: Bacteria Viruses 56 | 57 | Prokka uses a core set of the Uniprot-DB Kingdom sets against which it blasts your samples. It is possible to search in a more specific dataset, e.g. the genus Enterococcus, by adding a few flags to the command. 58 | 59 | --usegenus --genus Enterococcus 60 | 61 | Question: What do you think you would do for adding to the default databases? 62 | 63 | Prokka should be good to go now-- you can check to make sure that all is well by typing ``prokka``. This should print the help screen with all available options. You can find out more about Prokka databases `here `__. 64 | 65 | Running Prokka 66 | ============== 67 | 68 | Make a new directory for the annotation: 69 | :: 70 | 71 | cd ~/ 72 | mkdir annotation 73 | cd annotation 74 | 75 | Link the metagenome assembly file into this directory: 76 | :: 77 | 78 | ln -fs ~/mapping/subset_assembly.fa . 79 | 80 | Now it is time to run Prokka! There are tons of different ways to specialize the running of Prokka. We are going to keep it simple for now, though. It will take a little bit to run. 81 | :: 82 | 83 | prokka subset_assembly.fa --outdir prokka_annotation --prefix metagG --metagenome --kingdom Bacteria 84 | 85 | Question: Look at the results of the prokka analysis as it prepares your output file. What types of categories are you seeing flash by on the screen? 86 | 87 | Don't worry, the program tends to pause here:: 88 | 89 | Running: cat prokka_annotation\/sprot\.faa | parallel --gnu --plain -j 6 --block 242000 90 | --recstart '>' --pipe blastp -query --db /home/tx160085/prokka/bin/../db/kingdom/Bacteria/sprot 91 | -evalue 1e-06 -num_threads 1 -num_descriptions 1 -num_alignments 1 -seg no > prokka_annotation\/sprot\.blast 2> /dev/null 92 | 93 | This will generate a new folder called ``prokka_annotation`` in which will be a series of files, which are detailed `here `__. 94 | 95 | In particular, we will be using the ``*.ffn`` file to assess the relative read coverage within our metagenomes across the predicted genomic regions. 96 | 97 | Question: Take a moment and look inside the output files.:: 98 | 99 | cd ~/annotation/prokka_annotation 100 | less -S *.fsa 101 | 102 | less reminders: 103 | 104 | *Press space_bar to page down 105 | *Press q to exit the less commands 106 | 107 | Questions? 108 | ========= 109 | 110 | * What can I annotate with prokka? 111 | * Alternatives? 112 | * How do I submit my annotated files to `Genbank? ENA? `__? 113 | * Why is it called Prokka? 114 | 115 | 116 | 117 | References 118 | =========== 119 | 120 | * http://www.vicbioinformatics.com/software.prokka.shtml 121 | * https://www.ncbi.nlm.nih.gov/pubmed/24642063 122 | * https://github.com/tseemann/prokka/blob/master/README.md 123 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | recommonmark 3 | sphinxcontrib-images 4 | sphinx_bootstrap_theme 5 | 6 | -------------------------------------------------------------------------------- /running-command-line-blast.md: -------------------------------------------------------------------------------- 1 | # Running command-line BLAST 2 | 3 | The goal of this tutorial is to run you through a demonstration of the 4 | command line, which you may not have seen or used much before. 5 | 6 | [Start up an m1.medium instance running Ubuntu 16.04 on Jetstream.](jetstream/boot.html) 7 | 8 | All of the commands below can copy/pasted. 9 | 10 | ## Updating the software on the machine 11 | 12 | Copy and paste the following commands: 13 | 14 | ``` 15 | sudo apt-get update && sudo apt-get -y install python ncbi-blast+ 16 | ``` 17 | 18 | (make sure to hit enter after the paste -- sometimes the last line doesn't 19 | paste completely.) 20 | 21 | This updates the software list and installs the Python programming 22 | language and NCBI BLAST+. 23 | 24 | ## Running BLAST 25 | 26 | First! We need some data. Let's grab the mouse and zebrafish RefSeq 27 | protein data sets from NCBI, and put them in our home directory. If you've just logged 28 | in, you should be there already, but to be sure, we'll start by running `cd`. 29 | Now, we'll use `curl` to download the files: 30 | 31 | ``` 32 | cd 33 | curl -O ftp://ftp.ncbi.nih.gov/refseq/M_musculus/mRNA_Prot/mouse.1.protein.faa.gz 34 | curl -O ftp://ftp.ncbi.nih.gov/refseq/M_musculus/mRNA_Prot/mouse.2.protein.faa.gz 35 | curl -O ftp://ftp.ncbi.nih.gov/refseq/M_musculus/mRNA_Prot/mouse.3.protein.faa.gz 36 | 37 | curl -O ftp://ftp.ncbi.nih.gov/refseq/D_rerio/mRNA_Prot/zebrafish.1.protein.faa.gz 38 | ``` 39 | 40 | 41 | If you look at the files in the current directory, you should see four 42 | files, along with a directory called lost+found which is for system 43 | information: 44 | 45 | ``` 46 | ls -l 47 | ``` 48 | 49 | should show you: 50 | 51 | ``` 52 | total 21220 53 | -rw-rw-r-- 1 ubuntu ubuntu 1961399 Sep 25 13:24 mouse.1.protein.faa.gz 54 | -rw-rw-r-- 1 ubuntu ubuntu 2612053 Sep 25 13:24 mouse.2.protein.faa.gz 55 | -rw-rw-r-- 1 ubuntu ubuntu 3138707 Sep 25 13:24 mouse.3.protein.faa.gz 56 | -rw-rw-r-- 1 ubuntu ubuntu 14008886 Sep 25 13:24 zebrafish.1.protein.faa.gz 57 | ``` 58 | 59 | All four of the files are FASTA protein files (that's what the .faa 60 | suggests) that are compressed with `gzip` (that's what the .gz means). 61 | 62 | Uncompress them: 63 | 64 | ``` 65 | gunzip *.faa.gz 66 | ``` 67 | 68 | and let's look at the first few sequences in the file: 69 | 70 | ``` 71 | head mouse.1.protein.faa 72 | ``` 73 | 74 | These are protein sequences in FASTA format. FASTA format is something 75 | many of you have probably seen in one form or another -- it's pretty 76 | ubiquitous. It's a text file, containing records; each record 77 | starts with a line beginning with a '>', and then contains one or more 78 | lines of sequence text. 79 | 80 | Let's take those first two sequences and save them to a file. We'll 81 | do this using output redirection with '>', which says "take 82 | all the output and put it into this file here." 83 | 84 | 85 | ``` 86 | head -11 mouse.1.protein.faa > mm-first.fa 87 | ``` 88 | 89 | So now, for example, you can do `cat mm-first.fa` to see the contents of 90 | that file (or `less mm-first.fa`). 91 | 92 | Now let's BLAST these two sequences against the entire zebrafish 93 | protein data set. First, we need to tell BLAST that the zebrafish 94 | sequences are (a) a database, and (b) a protein database. That's done 95 | by calling 'makeblastdb': 96 | 97 | ``` 98 | makeblastdb -in zebrafish.1.protein.faa -dbtype prot 99 | ``` 100 | 101 | Next, we call BLAST to do the search: 102 | 103 | ``` 104 | blastp -query mm-first.fa -db zebrafish.1.protein.faa 105 | ``` 106 | 107 | This should run pretty quickly, but you're going to get a lot of output!! 108 | To save it to a file instead of watching it go past on the screen, 109 | ask BLAST to save the output to a file that we'll name `mm-first.x.zebrafish.txt`: 110 | 111 | ``` 112 | blastp -query mm-first.fa -db zebrafish.1.protein.faa -out mm-first.x.zebrafish.txt 113 | ``` 114 | 115 | and then you can 'page' through this file at your leisure by typing: 116 | 117 | ``` 118 | less mm-first.x.zebrafish.txt 119 | ``` 120 | 121 | (Type spacebar to move down, and 'q' to get out of paging mode.) 122 | 123 | ----- 124 | 125 | Let's do some more sequences (this one will take a little longer to run): 126 | 127 | ``` 128 | head -500 mouse.1.protein.faa > mm-second.fa 129 | blastp -query mm-second.fa -db zebrafish.1.protein.faa -out mm-second.x.zebrafish.txt 130 | ``` 131 | 132 | will compare the first 83 sequences. You can look at the output file with: 133 | 134 | ``` 135 | less mm-second.x.zebrafish.txt 136 | ``` 137 | 138 | (and again, type 'q' to get out of paging mode.) 139 | 140 | To get an output format that reads well into downstream applications, it is helpful to add the flag *-outfmt 6* 141 | 142 | ``` 143 | blastp -query mm-second.fa -db zebrafish.1.protein.faa -out mm-second.x.zebrafish.tbl.txt -outfmt 6 144 | ``` 145 | To see the results: 146 | 147 | ``` 148 | head mm-second.x.zebrafish.tbl.txt | less -S 149 | 150 | ``` 151 | less -S means it is scrollable in screen from left to right. 152 | type 'q' to exit less 153 | 154 | To find out how to customize blast outputs, it is helpful to look at the [BLAST® Command Line Applications User Manual.](https://www.ncbi.nlm.nih.gov/books/NBK279668/) 155 | 156 | 157 | 158 | 159 | 160 | 161 | Notes: 162 | 163 | * you can execute multiple commands at a time; 164 | 165 | * You might see a warning - 166 | 167 | `Selenocysteine (U) at position 310 replaced by X` 168 | 169 | what does this mean? 170 | 171 | * why did it take longer to BLAST ``mm-second.fa`` than ``mm-first.fa``? 172 | 173 | Things to mention and discuss: 174 | 175 | * `blastp` options and -help. 176 | * command line options, more generally - why so many? 177 | * automation rocks! 178 | 179 | Reminder: shut down your instance! 180 | 181 | Other topics to discuss: 182 | 183 | * when you shut down, you lose all your data 184 | * what computer(s) is this all happening on? 185 | 186 | ----- 187 | 188 | Next: [Comparing data sets with sourmash](sourmash_compare.html) 189 | 190 | -------------------------------------------------------------------------------- /slice.rst: -------------------------------------------------------------------------------- 1 | ============================== 2 | Slicing and dicing with k-mers 3 | ============================== 4 | 5 | (Note, this won't work with amplified data.) 6 | 7 | Extra resources: 8 | 9 | * `plotting notebook `__ 10 | 11 | --- 12 | 13 | At the command line, create a new directory and extract some data:: 14 | 15 | cd /mnt 16 | mkdir slice 17 | cd slice 18 | 19 | We're going to work with half the read data set for speed reasons -- :: 20 | 21 | gunzip -c ../mapping/SRR1976948.abundtrim.subset.pe.fq.gz | \ 22 | head -6000000 > SRR1976948.half.fq 23 | 24 | 25 | In a Jupyter Notebook (go to 'http://' + machine name + ':8000'), password 26 | 'davis', create new Python notebook "conda root", run:: 27 | 28 | cd /mnt/slice 29 | 30 | and then in another cell:: 31 | 32 | !load-into-counting.py -M 4e9 -k 31 SRR1976948.kh SRR1976948.half.fq 33 | 34 | and in another cell:: 35 | 36 | !abundance-dist.py SRR1976948.kh SRR1976948.half.fq SRR1976948.dist 37 | 38 | and in yet another cell:: 39 | 40 | %matplotlib inline 41 | import numpy 42 | from pylab import * 43 | dist1 = numpy.loadtxt('SRR1976948.dist', skiprows=1, delimiter=',') 44 | plot(dist1[:,0], dist1[:,1]) 45 | axis(ymax=10000, xmax=1000) 46 | 47 | Then:: 48 | 49 | python2 ~/khmer/sandbox/calc-median-distribution.py SRR1976948.kh \ 50 | SRR1976948.half.fq SRR1976948.readdist 51 | 52 | And:: 53 | 54 | python2 ~/khmer/sandbox/slice-reads-by-coverage.py SRR1976948.kh SRR1976948.half.fq slice.fq -m 0 -M 60 55 | 56 | Assemble the slice 57 | ------------------ 58 | 59 | (Re)install megahit:: 60 | 61 | cd 62 | git clone https://github.com/voutcn/megahit.git 63 | cd megahit 64 | make 65 | 66 | Go back to the slice directory and extract paired ends:: 67 | 68 | cd /mnt/slice 69 | extract-paired-ends.py slice.fq 70 | 71 | Assemble! :: 72 | 73 | ~/megahit/megahit --12 slice.fq.pe -o slice 74 | 75 | The contigs will be in ``slice/final.contigs.fa``. 76 | -------------------------------------------------------------------------------- /sourmash_gather.md: -------------------------------------------------------------------------------- 1 | Taxonomic classification with sourmash 2 | ====================================== 3 | 4 | A sourmash tutorial 5 | --- 6 | 7 | 8 | ## Objectives 9 | 10 | 1. Classify your reads into taxa 11 | 2. Compare taxonomic classification from multiple datasets 12 | 13 | ## At the beginning 14 | ``` 15 | mkdir sourmash_gather 16 | cd sourmash_gather 17 | curl -O https://s3-us-west-1.amazonaws.com/spacegraphcats.ucdavis.edu/microbe-genbank-sbt-k51-2017.05.09.tar.gz 18 | tar xzf microbe-genbank-sbt-k51-2017.05.09.tar.gz 19 | ``` 20 | ## Installing sourmash 21 | 22 | To install sourmash, run: 23 | 24 | ``` 25 | sudo apt-get -y update && \ 26 | sudo apt-get install -y python3.5-dev python3.5-venv make \ 27 | libc6-dev g++ zlib1g-dev 28 | ``` 29 | 30 | this installs Python 3.5. 31 | 32 | Now, create a local software install and populate it with Jupyter and 33 | other dependencies: 34 | 35 | ``` 36 | python3.5 -m venv ~/py3 37 | . ~/py3/bin/activate 38 | pip install -U pip 39 | pip install -U Cython 40 | pip install -U jupyter jupyter_client ipython pandas matplotlib scipy scikit-learn khmer 41 | 42 | pip install -U https://github.com/dib-lab/sourmash/archive/master.zip 43 | ``` 44 | 45 | ## Generate a signature for Illumina reads 46 | 47 | ![](_static/Sourmash_flow_diagrams_QC.png) 48 | ![](_static/Sourmash_flow_diagrams_compute.png) 49 | 50 | ## Calculate signatures 51 | ``` 52 | pip install osfclient 53 | osf -p ay94c fetch osfstorage/reads/SRR1976948.abundtrim.subset.pe.fq.gz 54 | ``` 55 | ``` 56 | sourmash compute -k51 --scaled 10000 ../work/SRR1976948.abundtrim.subset.pe.fq.gz -o SRR1976948.scaled10k.k51.sig 57 | ``` 58 | ## Taxonomic classification 59 | ``` 60 | sourmash gather -k51 SRR1976948.scaled10k.k51.sig genbank-k51.sbt.json --csv SRR1976948.scaled10k.k51.csv 61 | ``` 62 | What does the database look like and how does gather work? 63 | 64 | ![](_static/SBT.png) 65 | 66 | You should see output like this 67 | ``` 68 | loaded query: ../work/SRR1976948.abundtrim.s... (k=51, DNA) 69 | loaded 0 signatures and 1 databases total. 70 | overlap p_query p_match 71 | --------- ------- -------- 72 | 1.9 Mbp 9.0% 99.0% LGGB01000030.1 Synergistales bacteriu... 73 | 1.5 Mbp 7.1% 100.0% LGGN01000313.1 Proteiniphilum acetati... 74 | 1.5 Mbp 7.0% 99.3% LGGR01000252.1 Petrotoga mobilis isol... 75 | 1.4 Mbp 6.4% 99.3% LGGV01000121.1 Synergistales bacteriu... 76 | 0.7 Mbp 3.1% 98.5% LGGS01000125.1 Pelotomaculum thermopr... 77 | 0.6 Mbp 2.9% 100.0% LGFW01000077.1 Parcubacteria bacteriu... 78 | 0.6 Mbp 2.7% 98.3% LGGG01000064.1 Parcubacteria bacteriu... 79 | 1.1 Mbp 5.0% 44.6% LGHH01000265.1 Proteiniphilum sp. 51_... 80 | 0.5 Mbp 2.5% 24.1% LGGZ01000271.1 Thermotogales bacteriu... 81 | 1.4 Mbp 6.8% 23.8% LGHG01000227.1 Synergistales bacteriu... 82 | 0.5 Mbp 2.4% 57.7% LGHC01000091.1 Parcubacteria bacteriu... 83 | 110.0 kbp 0.5% 7.4% LGFQ01000086.1 Synergistales bacteriu... 84 | 230.0 kbp 1.1% 6.2% LGFC01000097.1 Petrotoga mobilis isol... 85 | 100.0 kbp 0.5% 8.9% LGFS01000020.1 Thermovirga lienii iso... 86 | 1.3 Mbp 5.9% 7.1% LGGE01000110.1 Synergistales bacteriu... 87 | 70.0 kbp 0.3% 87.5% LGHI01000016.1 Thermoplasmatales arch... 88 | found less than 10.0 kbp in common. => exiting 89 | 90 | found 16 matches total; 91 | the recovered matches hit 48.4% of the query 92 | ``` 93 | ## Download signatures for comparison 94 | ``` 95 | osf -p ay94c fetch osfstorage/gather_csvs/SRR1977249.reads.scaled10k.k51_gather_output.csv 96 | osf -p ay94c fetch osfstorage/gather_csvs/SRR1977296.reads.scaled10k.k51_gather_output.csv 97 | ``` 98 | 99 | ## Plot interactions with pyupset 100 | 101 | First, open jupyter notebook and then: 102 | ``` 103 | #Install pyupset to generate a visual representation of the "interactions" between datasets 104 | !pip install pyupset 105 | ``` 106 | ``` 107 | #Import pyupset and dependencies 108 | import pyupset as pyu 109 | import matplotlib as mpl 110 | import matplotlib.pyplot as plt 111 | from pickle import load 112 | import pandas as pd 113 | %matplotlib inline 114 | ``` 115 | ``` 116 | #Read in your data 117 | df1=pd.read_csv('SRR1976948.scaled10k.k51.csv') 118 | df2=pd.read_csv('SRR1977249.reads.scaled10k.k51_gather_output.csv') 119 | df3=pd.read_csv('SRR1977296.reads.scaled10k.k51_gather_output.csv') 120 | ``` 121 | ``` 122 | # Create a new dataframe with the column of interest and generate csv with the output 123 | 124 | df1[['name']] 125 | df2[['name']] 126 | df3[['name']] 127 | df1.to_csv('SRR1976948.scaled10k.k51.names.csv') 128 | df2.to_csv('SRR1977249.reads.scaled10k.k51_gather_output.names.csv') 129 | df3.to_csv('SRR1977296.reads.scaled10k.k51_gather_output.names.csv') 130 | ``` 131 | ``` 132 | # Import glob and create a dictionary of dataframes with name 'metaG*csv' with ',' delimiter. Split the file names by 133 | # '_' to generate unique file names for output. 134 | import glob 135 | 136 | genus_dict={} 137 | for file in glob.glob('*names.csv'): 138 | df=pd.read_csv(file, delimiter = ",") 139 | x=file.split('.')[0] 140 | genus_dict[x]=df 141 | ``` 142 | ``` 143 | pplot=pyu.plot(genus_dict, unique_keys = ['name']) 144 | pplot['figure'].savefig('Hu_metaG_comparison.png') 145 | ``` 146 | ![](_static/Hu_metaG_comparison.png) 147 | -------------------------------------------------------------------------------- /test.html: -------------------------------------------------------------------------------- 1 | 2 | <> 3 | 4 | <> 5 | 6 | 7 | <> 8 | 9 | 10 | karyotype = /mnt/circos/metag.karyotype.txt 11 | 12 | 13 | 14 | 15 | type = line 16 | thickness = 2 17 | 18 | 19 | 20 | max_gap = 1u 21 | file = /mnt/circos/metag.karyotype.txt 22 | color = vdgrey 23 | min = 0 24 | max = 0.015 25 | r0 = 0.5r 26 | r1 = 0.8r 27 | 28 | fill_color = vdgrey_a3 29 | 30 | 31 | 32 | color = vvlgreen 33 | y0 = 0.006 34 | 35 | 36 | color = vvlred 37 | y1 = 0.002 38 | 39 | 40 | 41 | 42 | 43 | color = lgrey_a2 44 | thickness = 1 45 | spacing = 0.025r 46 | 47 | 48 | 49 | 50 | 51 | 52 | condition = var(value) > 0.006 53 | color = dgreen 54 | fill_color = dgreen_a1 55 | 56 | 57 | 58 | condition = var(value) < 0.002 59 | color = dred 60 | fill_color = dred_a1 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | <> 70 | -------------------------------------------------------------------------------- /toc.rst: -------------------------------------------------------------------------------- 1 | Table of Contents 2 | ================= 3 | 4 | Tutorials: 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | index 10 | welcome 11 | aws/boot 12 | command-line 13 | running-command-line-blast 14 | quality 15 | assemble 16 | assembly-evaluation 17 | prokka_tutorial 18 | 19 | sourmash_compare 20 | 21 | binning 22 | salmon_tutorial 23 | mapping 24 | slice 25 | anvio 26 | circos_tutorial 27 | workflow 28 | whatsnext 29 | 30 | DATA 31 | INSTALL 32 | README 33 | 34 | day2-install 35 | kmer_trimming 36 | -------------------------------------------------------------------------------- /welcome.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngs-docs/2017-cicese-metagenomics/e16c6067ab9ab80f5768b59eecf2ddf0dbb1d537/welcome.pptx -------------------------------------------------------------------------------- /welcome.rst: -------------------------------------------------------------------------------- 1 | Welcome! 2 | ======== 3 | 4 | 1. Learning goals 5 | ----------------- 6 | 7 | For you: 8 | 9 | * get a first (or second) look at tools; 10 | * gain some experience in the basic command line; 11 | * get 80% of way to a complete analysis of some data; 12 | * introduction to philosophy and perspective of data analysis in science; 13 | 14 | 2. Safe space and code of conduct 15 | --------------------------------- 16 | 17 | This is intended to be a safe and friendly place for learning! 18 | 19 | Please see the Software Carpentry workshop Code of Conduct: http://software-carpentry.org/conduct.html 20 | 21 | In particular, please ask questions, because I guarantee you that your 22 | question will help others! 23 | 24 | 3. Instructor introductions 25 | --------------------------- 26 | 27 | Harriet Alexander - postdoc at UC Davis. 28 | 29 | Phil Brooks - postdoc at UC Davis. 30 | 31 | Titus Brown - prof at UC Davis in the School of Vet Med. 32 | 33 | 4. Amazon Web Service and cloud computing - why?! 34 | ---------------------------------------- 35 | 36 | * simplifies software installation; 37 | * can be used for bigger analyses quite easily; 38 | * good for "burst" capacity (just got a data set!) 39 | * accessible everywhere; 40 | 41 | 5. Sticky notes and how they work 42 | --------------------------------------------------- 43 | 44 | Basic rules: 45 | 46 | * no sticky note - "working on it" 47 | * green sticky note - "all is well" 48 | * red sticky note - "need help!" 49 | 50 | Place the sticky notes where we can see them from the back of the room -- 51 | e.g. on the back of your laptop. 52 | 53 | ---- 54 | 55 | Next: `Connecting to your instance `__ 56 | -------------------------------------------------------------------------------- /whatnext.md: -------------------------------------------------------------------------------- 1 | # What's next? 2 | 3 | * CAzy, KEGG 4 | 5 | * So you've got a lot of data... 6 | 7 | * How do I know what programs to use? 8 | -------------------------------------------------------------------------------- /workflow.md: -------------------------------------------------------------------------------- 1 | # Workflow and repeatability discussion 2 | 3 | See CTB's [Mar 2016 workshop on "repeatability"](https://2016-oslo-repeatability.readthedocs.io/en/latest/) - in sum, 4 | 5 | * write down and save the set of software loading or installation 6 | commands you use; 7 | 8 | * you should always use version control (e.g. git) to track any custom 9 | processing scripts or commands; 10 | 11 | * record any long-running scripts or commands that you use, and make it 12 | easy to (re)run them, using either shell scripts or 'make' or some other 13 | workflow system; 14 | 15 | * get as close to automating your data viz and figure generation as you can 16 | with (e.g.) RMarkdown or Jupyter Notebook; 17 | 18 | Among other things, this provides a set of artifacts that can be sent 19 | to your advisor, provided to your collaborators and (ultimately) 20 | published with your publication. Plus you won't have to remember what 21 | you did - you'll have it written down! 22 | 23 | ---- 24 | 25 | [What's next?](whatsnext) 26 | --------------------------------------------------------------------------------