├── 01.raw_data ├── 40BHFBK1 │ ├── 40BHFBK1_R1.fastq.gz │ └── 40BHFBK1_R2.fastq.gz ├── 40BHFBK2 │ ├── 40BHFBK2_R1.fastq.gz │ └── 40BHFBK2_R2.fastq.gz ├── 40BHFBK3 │ ├── 40BHFBK3_R1.fastq.gz │ └── 40BHFBK3_R2.fastq.gz ├── 40BHFBK4 │ ├── 40BHFBK4_R1.fastq.gz │ └── 40BHFBK4_R2.fastq.gz ├── 40BHFBK1_PF │ ├── 40BHFBK1_PF_R1.fastq.gz │ └── 40BHFBK1_PF_R2.fastq.gz ├── 40BHFBK3_PF │ ├── 40BHFBK3_PF_R1.fastq.gz │ └── 40BHFBK3_PF_R2.fastq.gz └── MANIFEST ├── sequence_data └── metadata.yml ├── create_DB ├── .snakemake │ └── log │ │ ├── 2021-05-26T115231.960767.snakemake.log │ │ ├── 2021-05-26T155428.061801.snakemake.log │ │ ├── 2021-05-26T155241.369810.snakemake.log │ │ ├── 2021-05-26T155251.684002.snakemake.log │ │ ├── 2021-05-26T155501.617298.snakemake.log │ │ ├── 2021-05-26T155630.825466.snakemake.log │ │ └── 2021-05-26T114454.315556.snakemake.log ├── rulegraph.png ├── config │ ├── cluster.yaml │ ├── config.yaml │ └── sample.tsv ├── eukaryote-unite │ ├── config │ │ ├── cluster.yaml │ │ ├── config.yaml │ │ └── sample.tsv │ └── Snakefile └── qsub-submit.sh ├── 00.mapping ├── treatment-metadata.tsv ├── basins-treatment.tsv ├── first_analysis │ ├── indoors-cntVsB12-treatment.tsv │ ├── final_metadata.xlsx │ ├── mock-treatment.tsv │ ├── pe-dada2 │ │ ├── indoors-cntVsB12-treatment.tsv │ │ ├── mock-treatment.tsv │ │ ├── outdoors-treatment.tsv │ │ ├── indoors-minus-cntVsB12-treatment.tsv │ │ ├── indoors-treatment.tsv │ │ ├── mock.tsv │ │ ├── combined-treatment.tsv │ │ ├── indoors-cntVsB12.tsv │ │ ├── indoors-minus-cntVsB12.tsv │ │ ├── outdoors.tsv │ │ └── indoors.tsv │ ├── outdoors-treatment.tsv │ ├── indoors-minus-cntVsB12-treatment.tsv │ ├── indoors-treatment.tsv │ ├── mock.tsv │ ├── combined-treatment.tsv │ ├── indoors-cntVsB12.tsv │ ├── indoors-minus-cntVsB12.tsv │ ├── outdoors.tsv │ └── indoors.tsv ├── outdoors-treatment.tsv ├── metadata.tsv ├── indoors-treatment.tsv ├── basins-edited.tsv ├── basins.tsv ├── indoors-edited.tsv ├── indoors.tsv ├── outdoors-edited.tsv └── outdoors.tsv ├── images └── rulegraph.png ├── docker ├── download_seqs.sh ├── get_samples.sh ├── make_manifest.sh ├── rename_files.sh ├── Makefile └── config.yaml ├── scripts ├── join_reads.sh ├── find-probes.sh ├── functions.sh ├── vsearch-join-pairs.sh ├── blast-seqs.sh ├── 05.run_complete-submit-slurm.sh ├── 01.import-submit-slurm.sh ├── 04.filter_rare-submit-slurm.sh ├── 03.filter_taxa-submit-slurm.sh ├── 02.denoise-submit-slurm.sh ├── classify_ASVs.sh ├── tabulate-metadata.sh ├── phylogeny_tree.sh ├── deblur_denoize.sh ├── .bash_profile ├── generate-krona.py ├── default_variables.sh ├── dada2_denoize.sh ├── krona-arg.py ├── export_table.sh ├── new-dada2_denoize.sh ├── filter-samples.sh ├── .bashrc ├── picrust2_analysis.sh ├── filter_feature_table.sh ├── qiime2_api.ipynb ├── .ipynb_checkpoints │ └── qiime2_api-checkpoint.ipynb ├── ancom_differential_abundance.sh ├── run_pear.pl └── taxa-plots.sh ├── rules ├── count_sequences.smk ├── sequence_length.smk └── filter_samples.smk ├── config ├── cluster.yaml ├── sample.tsv └── config.yaml ├── qsub-submit.sh ├── slurm.mk ├── local.mk ├── examples ├── single-MANIFEST └── merge-MANIFEST └── README.md /01.raw_data/40BHFBK1/40BHFBK1_R1.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK1/40BHFBK1_R2.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK2/40BHFBK2_R1.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK2/40BHFBK2_R2.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK3/40BHFBK3_R1.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK3/40BHFBK3_R2.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK4/40BHFBK4_R1.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK4/40BHFBK4_R2.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK1_PF/40BHFBK1_PF_R1.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK1_PF/40BHFBK1_PF_R2.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK3_PF/40BHFBK3_PF_R1.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /01.raw_data/40BHFBK3_PF/40BHFBK3_PF_R2.fastq.gz: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sequence_data/metadata.yml: -------------------------------------------------------------------------------- 1 | {'phred-offset': 33} 2 | -------------------------------------------------------------------------------- /create_DB/.snakemake/log/2021-05-26T115231.960767.snakemake.log: -------------------------------------------------------------------------------- 1 | Building DAG of jobs... 2 | -------------------------------------------------------------------------------- /00.mapping/treatment-metadata.tsv: -------------------------------------------------------------------------------- 1 | sample-id Time Group 2 | 40BHFBK 40 HFBK 3 | 40BHFBK_PF 40 HFBK_PF 4 | -------------------------------------------------------------------------------- /images/rulegraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olabiyi/snakemake-workflow-qiime2/HEAD/images/rulegraph.png -------------------------------------------------------------------------------- /00.mapping/basins-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | Control control 3 | B12_enriched indoor_waterWashed_B12 -------------------------------------------------------------------------------- /create_DB/rulegraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olabiyi/snakemake-workflow-qiime2/HEAD/create_DB/rulegraph.png -------------------------------------------------------------------------------- /00.mapping/first_analysis/indoors-cntVsB12-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | Control control 3 | indoor+water_washed+B12 indoor_waterWashed_B12 4 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/final_metadata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olabiyi/snakemake-workflow-qiime2/HEAD/00.mapping/first_analysis/final_metadata.xlsx -------------------------------------------------------------------------------- /00.mapping/first_analysis/mock-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | MOCK MOCK 3 | MOCK-DreamTaq MOCK_DreamTaq 4 | MOCK-NegativeControl MOCK_NegativeControl 5 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/indoors-cntVsB12-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | control control 3 | indoor+water_washed+B12 indoor_waterWashed_B12 4 | -------------------------------------------------------------------------------- /00.mapping/outdoors-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | Medium Medium 3 | water_washed Washed_with_Sterile_DW 4 | water_washed_Surface_sterile Washed_with_7%_H2O2 -------------------------------------------------------------------------------- /create_DB/config/cluster.yaml: -------------------------------------------------------------------------------- 1 | __default__: 2 | queue: bioinfo.q 3 | shell: /bin/bash 4 | threads: 20 5 | time: 1:00:00 6 | node: 1 7 | 8 | 9 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/mock-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | MOCK MOCK 3 | MOCK-DreamTaq MOCK_DreamTaq 4 | MOCK-NegativeControl MOCK_NegativeControl 5 | -------------------------------------------------------------------------------- /create_DB/eukaryote-unite/config/cluster.yaml: -------------------------------------------------------------------------------- 1 | __default__: 2 | queue: bioinfo.q 3 | shell: /bin/bash 4 | threads: 20 5 | time: 1:00:00 6 | node: 1 7 | 8 | 9 | -------------------------------------------------------------------------------- /create_DB/.snakemake/log/2021-05-26T155428.061801.snakemake.log: -------------------------------------------------------------------------------- 1 | SyntaxError in line 81 of /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile: 2 | EOF in multi-line string (Snakefile, line 81) 3 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/outdoors-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | Medium Medium 3 | outdoor+water_washed outdoor_waterWashed 4 | outdoor+water_washed+surface_sterile outdoor_waterwashed_surfaceSterile 5 | outdoor outdoor 6 | -------------------------------------------------------------------------------- /docker/download_seqs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | awk 'BEGIN{FS=","; OFS="\n"} NR>1{print $18,$19}' 00.mapping/Sample_Detail.csv > files2download.txt && \ 5 | parallel -j 20 aws s3 cp {} 01.raw_data/ :::: files2download.txt 6 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/outdoors-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | Medium Medium 3 | outdoor+water_washed outdoor_waterWashed 4 | outdoor+water_washed+surface_sterile outdoor_waterwashed_surfaceSterile 5 | outdoor outdoor 6 | -------------------------------------------------------------------------------- /00.mapping/metadata.tsv: -------------------------------------------------------------------------------- 1 | sample-id Time Group Treatment 2 | 40BHFBK1 40 BHFBK 40BHFBK 3 | 40BHFBK2 40 BHFBK 40BHFBK 4 | 40BHFBK3 40 BHFBK 40BHFBK 5 | 40BHFBK4 40 BHFBK 40BHFBK 6 | 40BHFBK1_PF 40 BHFBK 40BHFBK_PF 7 | 40BHFBK3_PF 40 BHFBK 40BHFBK_PF 8 | -------------------------------------------------------------------------------- /00.mapping/indoors-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | water_washedSterile_antibiotic water_washed_sterile_plant_grown_with_antibiotic 3 | water_washed_sterile water_washed_sterile_plant 4 | water_washed water_washed_plant 5 | sterile_antibiotic sterile_plant_grown_with_antibiotic 6 | sterile sterile_plant -------------------------------------------------------------------------------- /create_DB/.snakemake/log/2021-05-26T155241.369810.snakemake.log: -------------------------------------------------------------------------------- 1 | KeyError in line 78 of /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile: 2 | 'SILVA_CLASSIFIER' 3 | File "/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile", line 78, in 4 | -------------------------------------------------------------------------------- /create_DB/.snakemake/log/2021-05-26T155251.684002.snakemake.log: -------------------------------------------------------------------------------- 1 | KeyError in line 78 of /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile: 2 | 'SILVA_CLASSIFIER' 3 | File "/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile", line 78, in 4 | -------------------------------------------------------------------------------- /create_DB/.snakemake/log/2021-05-26T155501.617298.snakemake.log: -------------------------------------------------------------------------------- 1 | KeyError in line 78 of /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile: 2 | 'SILVA_CLASSIFIER' 3 | File "/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile", line 78, in 4 | -------------------------------------------------------------------------------- /create_DB/.snakemake/log/2021-05-26T155630.825466.snakemake.log: -------------------------------------------------------------------------------- 1 | KeyError in line 104 of /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile: 2 | 'forward_primer' 3 | File "/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile", line 104, in 4 | -------------------------------------------------------------------------------- /docker/get_samples.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | #mkdir 01.raw_data/ 00.mapping/ 4 | echo "Below are the sample names for config.yaml" 5 | SAMPLES=($(ls -1 01.raw_data/ | grep -Ev "MANIFEST|seq" - |sort -V)) && \ 6 | (echo -ne '[';echo ${SAMPLES[*]} | sed -E 's/ /, /g' | sed -E 's/([A-Za-z0-9_-]+)/"\1"/g'; echo -e ']') 7 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/indoors-minus-cntVsB12-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | indoor+water_washed+sterile indoor_waterWashed_sterile 3 | indoor+water_washed indoor_waterWashed 4 | indoor+sterile indoor_sterile 5 | indoor+sterile+antibiotic indoor_sterile_antibiotic 6 | indoor+water_washed+sterile+antibiotic indoor_waterWashed_sterile_antibiotic 7 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/indoors-minus-cntVsB12-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | indoor+water_washed+sterile indoor_waterWashed_sterile 3 | indoor+water_washed indoor_waterWashed 4 | indoor+water_washed_sterile indoor_waterWashed_sterile 5 | indoor+sterile indoor_sterile 6 | indoor+sterile+antibiotic indoor_sterile_antibiotic 7 | indoor+water_washed+sterile+antibiotic indoor_waterWashed_sterile_antibiotic 8 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/indoors-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | indoor+water_washed+sterile indoor_waterWashed_sterile 3 | control control 4 | indoor+water_washed+B12 indoor_waterWashed_B12 5 | indoor+water_washed indoor_waterWashed 6 | indoor+sterile indoor_sterile 7 | indoor+sterile+antibiotic indoor_sterile_antibiotic 8 | indoor+water_washed+sterile+antibiotic indoor_waterWashed_sterile_antibiotic 9 | -------------------------------------------------------------------------------- /scripts/join_reads.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N join_reads 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | set -e 11 | 12 | run_pear.pl -o stitched_reads/ sequence_data/*.fastq.gz 13 | 14 | #cleaned the folder containg the assembeled reads 15 | rm -rf stitched_reads/*.unassembled* stitched_reads/*discarded* 16 | 17 | # gzip to save memory 18 | gzip stitched_reads/*.fastq 19 | -------------------------------------------------------------------------------- /rules/count_sequences.smk: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | rule count_sequences: 4 | input: expand("01.raw_data/{sample}.fastq.gz", sample=config['samples']) 5 | output: "sequence_stats/reads_stats.tsv" 6 | log: "log/count_sequences/count_sequences.log" 7 | threads: 10 8 | params: 9 | in_dir=lambda w, input: path.dirname(input[0]) 10 | shell: 11 | "seqkit stats {params.in_dir}/*.fastq.gz > {output} " 12 | 13 | -------------------------------------------------------------------------------- /rules/sequence_length.smk: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | rule Get_sequence_length: 4 | input: "01.raw_data/{sample}.fastq.gz" 5 | output: "02.Get_sequence_length/{sample}_sequence_length.tsv" 6 | log: "logs/Get_sequence_length/Get_sequence_length.log" 7 | threads: 10 8 | params: 9 | in_dir=lambda w, input: path.dirname(input[0]) 10 | shell: 11 | "bioawk -c fastx 'BEGIN{{OFS="\\t"}} {{print $name,length($seq)}}' {input} {output}" 12 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/indoors-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | indoor+water_washed+sterile indoor_waterWashed_sterile 3 | Control control 4 | indoor+water_washed+B12 indoor_waterWashed_B12 5 | indoor+water_washed indoor_waterWashed 6 | indoor+water_washed_sterile indoor_waterWashed_sterile 7 | indoor+sterile indoor_sterile 8 | indoor+sterile+antibiotic indoor_sterile_antibiotic 9 | indoor+water_washed+sterile+antibiotic indoor_waterWashed_sterile_antibiotic 10 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/mock.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number batch pcr_cycles medium_or_plant sterile_plant grown_with_antibiotics location water_washed surface_sterilization b12_enriched treatment description 2 | M-1 NA B NA NA NA NA NA NA NA NA MOCK Mock_community 3 | M-2 NA B NA NA NA NA NA NA NA NA MOCK Mock_community 4 | M-3 NA B NA NA NA NA NA NA NA NA MOCK-DreamTaq Mock_community(Dream_taq_ready_mix) 5 | M-NC NA B NA NA NA NA NA NA NA NA MOCK-NegativeControl Mock_community_negative_control 6 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/mock.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number batch pcr_cycles medium_or_plant sterile_plant grown_with_antibiotics location water_washed surface_sterilization b12_enriched treatment description 2 | Osnat157-M-1 NA B NA NA NA NA NA NA NA NA MOCK Mock_community 3 | Osnat158-M-2 NA B NA NA NA NA NA NA NA NA MOCK Mock_community 4 | Osnat159-M-3 NA B NA NA NA NA NA NA NA NA MOCK-DreamTaq Mock_community(Dream_taq_ready_mix) 5 | Osnat160-M- NA B NA NA NA NA NA NA NA NA MOCK-NegativeControl Mock_community_negative_control 6 | -------------------------------------------------------------------------------- /scripts/find-probes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N Find_probes 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | set -e 11 | 12 | #source activate qiime2-2020.6 13 | #export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi' 14 | PROBES=('ACTCCTACGGGAGGCAGC' 'GGTGACAGTGGGCAGCGA' 'AAACGATGTGGGAAGGC' 'AAACGAAGTGGGAAGGC') 15 | 16 | FILES=($(find "sequence_data/" -type f -name "*gz")) 17 | 18 | parallel --jobs 0 zgrep {} ${FILES[*]} '>' find-probe/{}.txt ::: ${PROBES[*]} 19 | -------------------------------------------------------------------------------- /config/cluster.yaml: -------------------------------------------------------------------------------- 1 | __default__: 2 | system: 'slurm' 3 | account: "132773335440" 4 | time: "12:00:00" # in minutes 5 | mem: "10g" # in GB 6 | threads: "10" 7 | queue: "xlong" # gpu 8 | nodes: "1" 9 | 10 | 11 | Trim_primers: 12 | mem: "60g" # in GB 13 | threads: "28" 14 | 15 | Denoise_reads: 16 | mem: "60g" # in GB 17 | threads: "28" 18 | 19 | Build_phylogenetic_tree: 20 | mem: "60g" # in GB 21 | threads: "28" 22 | 23 | Assign_taxonomy: 24 | mem: "210g" # in GB 25 | threads: "28" 26 | 27 | Function_annotation: 28 | mem: "60g" # in GB 29 | threads: "28" 30 | time: "17:00:00" 31 | -------------------------------------------------------------------------------- /scripts/functions.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Check fastq encoding 4 | function check_fastq_encoding(){ 5 | 6 | # USAGE: 7 | # check_fastq_encoding file.fastq 8 | local fastq_file=$1 9 | 10 | head -n 40 ${fastq_file} | \ 11 | awk '{if(NR%4==0) printf("%s",$0);}' | \ 12 | od -A n -t u1 | \ 13 | awk 'BEGIN{min=100;max=0;}{ 14 | for(i=1;i<=NF;i++) {if($i>max) max=$i; if($i73 && min>=64) print "Phred+64"; \ 18 | else if(min>=59 && min<64 && max>73) print "Solexa+64"; \ 19 | else print "Unknown score encoding\!"; 20 | }' 21 | 22 | } 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/combined-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | Medium Medium 3 | outdoor+water_washed outdoor_waterWashed 4 | indoor+water_washed+sterile indoor_waterWashed_sterile 5 | outdoor+water_washed+surface_sterile outdoor_waterwashed_surfaceSterile 6 | control control 7 | indoor+water_washed+B12 indoor_waterWashed_B12 8 | indoor+water_washed indoor_waterWashed 9 | outdoor outdoor 10 | indoor+sterile indoor_sterile 11 | indoor+sterile+antibiotic indoor_sterile_antibiotic 12 | indoor+water_washed+sterile+antibiotic indoor_waterWashed_sterile_antibiotic 13 | MOCK MOCK 14 | MOCK-DreamTaq MOCK_DreamTaq 15 | MOCK-NegativeControl MOCK_NegativeControl 16 | -------------------------------------------------------------------------------- /scripts/vsearch-join-pairs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N join_vsearch 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | set -e 11 | 12 | source activate qiime2-2020.6 13 | 14 | # Stitch the fowards and reverse reads together using vsearch 15 | qiime vsearch join-pairs \ 16 | --i-demultiplexed-seqs 01.import/reads.qza \ 17 | --p-truncqual 20 \ 18 | --p-minlen 400 \ 19 | --p-maxns 20 \ 20 | --p-minmergelen 400 \ 21 | --p-maxmergelen 600 \ 22 | --o-joined-sequences 02.Join/vsearch-joined-reads.qza 23 | 24 | # view the joined reads 25 | qiime demux summarize \ 26 | --i-data 02.Join/vsearch-joined-reads.qza \ 27 | --o-visualization 02.QC/vsearch-joined-reads.qzv 28 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/combined-treatment.tsv: -------------------------------------------------------------------------------- 1 | sample-id treatment 2 | Medium Medium 3 | outdoor+water_washed outdoor_waterWashed 4 | indoor+water_washed+sterile indoor_waterWashed_sterile 5 | outdoor+water_washed+surface_sterile outdoor_waterwashed_surfaceSterile 6 | control control 7 | indoor+water_washed+B12 indoor_waterWashed_B12 8 | indoor+water_washed indoor_waterWashed 9 | outdoor outdoor 10 | indoor+water_washed_sterile indoor_waterWashed_sterile 11 | indoor+sterile indoor_sterile 12 | indoor+sterile+antibiotic indoor_sterile_antibiotic 13 | indoor+water_washed+sterile+antibiotic indoor_waterWashed_sterile_antibiotic 14 | MOCK MOCK 15 | MOCK-DreamTaq MOCK_DreamTaq 16 | MOCK-NegativeControl MOCK_NegativeControl 17 | -------------------------------------------------------------------------------- /docker/make_manifest.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | SAMPLES=($(ls -1 01.raw_data/ | grep -Ev "MANIFEST|seq" - |sort -V)) 4 | 5 | # Creating MANIFEST FILE 6 | (echo "sample-id,absolute-filepath,direction"; for SAMPLE in ${SAMPLES[*]}; \ 7 | do echo -ne "${SAMPLE},$PWD/01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz,forward\n${SAMPLE},$PWD/01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz,reverse\n";done) \ 8 | > 01.raw_data/MANIFEST 9 | 10 | # Creating the samples.tsv file" 11 | (echo -ne "SampleID\tType\tOld_name\tNew_name\n"; \ 12 | for SAMPLE in ${SAMPLES[*]}; \ 13 | do echo -ne \ 14 | "${SAMPLE}\tForward\t01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz\t01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz\n${SAMPLE}\tReverse\t01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz\t01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz\n";done) \ 15 | > sample.tsv 16 | -------------------------------------------------------------------------------- /docker/rename_files.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | awk 'BEGIN{FS=","; OFS="\t"} NR>1{ gsub("s3://biodsa-sequencing-data/SEQ44XXX/SEQ44733/Reads/", "", $18); \ 4 | gsub("s3://biodsa-sequencing-data/SEQ44XXX/SEQ44733/Reads/", "", $19); \ 5 | print $1,$18,$19}' 00.mapping/Sample_Detail.csv > 00.mapping/reads_mapping.txt 6 | 7 | SAMPLES=($(awk 'BEGIN{FS=OFS="\t"} {print $1}' 00.mapping/reads_mapping.txt)) 8 | FORWARD=($(awk 'BEGIN{FS=OFS="\t"} {print $2}' 00.mapping/reads_mapping.txt)) 9 | REVERSE=($(awk 'BEGIN{FS=OFS="\t"} {print $3}' 00.mapping/reads_mapping.txt)) 10 | 11 | 12 | parallel -j 10 --link \ 13 | "[ -d 01.raw_data/{3}/ ] || mkdir 01.raw_data/{3}/ && mv 01.raw_data/{1} 01.raw_data/{3}/{3}_R1.fastq.gz && mv 01.raw_data/{2} 01.raw_data/{3}/{3}_R2.fastq.gz" \ 14 | ::: ${FORWARD[*]} ::: ${REVERSE[*]} ::: ${SAMPLES[*]} 15 | -------------------------------------------------------------------------------- /scripts/blast-seqs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N blast_seqs 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 72 9 | 10 | set -euo pipefail 11 | 12 | # database after retrieving the sequence by ASV id from the representative sequence file 13 | DATABASE="/gpfs0/bioinfo/users/obayomi/databases/non_redundant_NCBI_DB/non_redundant" 14 | QUERY="/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/13.find_B12_bacteria/blast/potential_B12_bacteria_sequences.fasta" 15 | OUT="/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/13.find_B12_bacteria/blast/potential_B12_bacteria_blast.tsv" 16 | 17 | cat ${QUERY} | \ 18 | parallel --jobs 0 --recstart '>' \ 19 | --pipe blastn -db ${DATABASE} -outfmt \"6 qseqid sseqid stitle pident length mismatch gapopen qstart qend sstart send evalue bitscore\" \ 20 | -max_target_seqs 5 -out ${OUT} -query - 21 | -------------------------------------------------------------------------------- /qsub-submit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #$ -S /bin/bash 3 | #$ -q bioinfo.q 4 | #$ -V 5 | #$ -cwd 6 | #$ -N submit-jobs 7 | #$ -pe shared 1 8 | 9 | set -e 10 | 11 | # Activate the main conda environment 12 | source activate qiime2-2020.6 13 | 14 | 15 | # Generate the rule graph on the commadline 16 | # Rule graph 17 | # snakemake -s Snakefile --rulegraph | dot -Tpng > rulegraph.png 18 | # Directed Acyclic Graph (DAG) 19 | # snakemake -s Snakefile --dag | dot -Tpng > dag.png 20 | 21 | # Run snmakemake on the cluster 22 | # --jobs 100 # submit a maximum 100 jobs 23 | # --latency-wait 60 # wait for 60 seconds before declaring that a job has failed 24 | snakemake \ 25 | --keep-going \ 26 | --restart-times 3 \ 27 | --rerun-incomplete \ 28 | --cluster-config config/config.yaml \ 29 | --cluster 'qsub -q bioinfo.q -S /bin/bash -cwd -V -N {rule}.{wildcards} -e logs/{rule}/{rule}.{wildcards}.e -o logs/{rule}/{rule}.{wildcards}.o -pe shared {threads}' \ 30 | --jobs 10 \ 31 | --latency-wait 60 32 | 33 | -------------------------------------------------------------------------------- /create_DB/qsub-submit.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #$ -S /bin/bash 3 | #$ -q bioinfo.q 4 | #$ -V 5 | #$ -cwd 6 | #$ -N submit-jobs 7 | #$ -pe shared 1 8 | 9 | set -e 10 | 11 | # Activate the main conda environment 12 | source activate qiime2-2020.6 13 | 14 | 15 | # Generate the rule graph on the commadline 16 | # Rule graph 17 | # snakemake -s Snakefile --rulegraph | dot -Tpng > rulegraph.png 18 | # Directed Acyclic Graph (DAG) 19 | # snakemake -s Snakefile --dag | dot -Tpng > dag.png 20 | 21 | # Run snmakemake on the cluster 22 | # --jobs 100 # submit a maximum 100 jobs 23 | # --latency-wait 60 # wait for 60 seconds before declaring that a job has failed 24 | snakemake \ 25 | --keep-going \ 26 | --restart-times 3 \ 27 | --rerun-incomplete \ 28 | --cluster-config config/config.yaml \ 29 | --cluster 'qsub -q bioinfo.q -S /bin/bash -cwd -V -N {rule}.{wildcards} -e logs/{rule}/{rule}.{wildcards}.e -o logs/{rule}/{rule}.{wildcards}.o -pe shared {threads}' \ 30 | --jobs 10 \ 31 | --latency-wait 60 32 | 33 | -------------------------------------------------------------------------------- /create_DB/config/config.yaml: -------------------------------------------------------------------------------- 1 | 2 | QIIME2_ENV: "source activate /home/jeffbrady/miniconda3/envs/qiime2-2020.6" 3 | 4 | # Download pre-trained silva database for qiime2 5 | 6 | # Get the pre-trained full-length SILVA 99% classifier 7 | SILVA_CLASSIFIER: "https://data.qiime2.org/2020.6/common/silva-138-99-nb-classifier.qza" 8 | 9 | # Get the raw preformatted sequences 10 | SILVA_SEQUENCES: "https://data.qiime2.org/2020.6/common/silva-138-99-seqs.qza" 11 | 12 | # Get the preformatted taxonomy 13 | SILVA_TAXONOMY: "https://data.qiime2.org/2020.6/common/silva-138-99-tax.qza" 14 | 15 | 16 | # Unite fungi databse for qiime2 17 | UNITE_URL: "https://files.plutof.ut.ee/public/orig/98/AE/98AE96C6593FC9C52D1C46B96C2D9064291F4DBA625EF189FEC1CCAFCF4A1691.gz" 18 | 19 | # Set tool specific parameters 20 | # Sample primers for 341F and 806R 21 | parameters: 22 | extract_sequence: 23 | forward_primer: "GTGCCAGCMGCCGCGGTAA" 24 | reverse_primer: "GGACTACHVGGGTWTCTAAT" 25 | min_length: 100 26 | max_length: 800 27 | trunc_length: 585 28 | -------------------------------------------------------------------------------- /create_DB/eukaryote-unite/config/config.yaml: -------------------------------------------------------------------------------- 1 | 2 | QIIME2_ENV: "source activate /home/jeffbrady/miniconda3/envs/qiime2-2020.6" 3 | 4 | # Download pre-trained silva database for qiime2 5 | 6 | # Get the pre-trained full-length SILVA 99% classifier 7 | SILVA_CLASSIFIER: "https://data.qiime2.org/2020.6/common/silva-138-99-nb-classifier.qza" 8 | 9 | # Get the raw preformatted sequences 10 | SILVA_SEQUENCES: "https://data.qiime2.org/2020.6/common/silva-138-99-seqs.qza" 11 | 12 | # Get the preformatted taxonomy 13 | SILVA_TAXONOMY: "https://data.qiime2.org/2020.6/common/silva-138-99-tax.qza" 14 | 15 | 16 | # Unite fungi databse for qiime2 17 | UNITE_URL: "https://files.plutof.ut.ee/public/orig/1D/31/1D31FA3A308BDC2FB2750D62C0AA40C5058C15405A3CC5C626CC3A3F5E3903ED.tgz" 18 | 19 | # Set tool specific parameters 20 | # Sample primers for 341F and 806R 21 | parameters: 22 | extract_sequence: 23 | forward_primer: "GTGCCAGCMGCCGCGGTAA" 24 | reverse_primer: "GGACTACHVGGGTWTCTAAT" 25 | min_length: 100 26 | max_length: 800 27 | trunc_length: 585 28 | -------------------------------------------------------------------------------- /00.mapping/basins-edited.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number treatment description 2 | 38A 38 Control Control_indoor_plants_washed_with_water 3 | 39A 39 Control Control_indoor_plants_washed_with_water 4 | 40A 40 Control Control_indoor_plants_washed_with_water 5 | 41A 41 Control Control_indoor_plants_washed_with_water 6 | 42A 42 Control Control_indoor_plants_washed_with_water 7 | 43A 43 Control Control_indoor_plants_washed_with_water 8 | 44A 44 Control Control_indoor_plants_washed_with_water 9 | 45A 45 Control Control_indoor_plants_washed_with_water 10 | 46A 46 Control Control_indoor_plants_washed_with_water 11 | 47A 47 B12_enriched Modified_Hinoman_medium_with_B12_enriched 12 | 48A 48 B12_enriched Modified_Hinoman_medium_with_B12_enriched 13 | 49A 49 B12_enriched Modified_Hinoman_medium_with_B12_enriched 14 | 50A 50 B12_enriched Modified_Hinoman_medium_with_B12_enriched 15 | 51A 51 B12_enriched Modified_Hinoman_medium_with_B12_enriched 16 | 52A 52 B12_enriched Modified_Hinoman_medium_with_B12_enriched 17 | 53A 53 B12_enriched Modified_Hinoman_medium_with_B12_enriched 18 | 54A 54 B12_enriched Modified_Hinoman_medium_with_B12_enriched 19 | -------------------------------------------------------------------------------- /scripts/05.run_complete-submit-slurm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=complete #Set the job name to "JobExample2" 3 | #SBATCH --time=23:00:00 #Set the wall clock limit to 6hr and 30min 4 | #SBATCH --nodes=1 #Request 1 node 5 | #SBATCH --ntasks=1 #Request 1 tasks/cores per node 6 | #SBATCH --mem=1G #Request 1GB per node 7 | #SBATCH --output=complete.o.%j #Send stdout/err to "Example2Out.[jobID]" 8 | #SBATCH --error=complete.e.%j #Send std err to "Example2error.[jobID]" 9 | 10 | module purge 11 | module load iccifort/2020.1.217 12 | module load impi/2019.7.217 13 | module load snakemake/5.26.1-Python-3.8.2 14 | 15 | # Get the rarefation depth for diversity analysis after viewing "08.Filter_feature_table/filtered_table.qzv" and run the complete pipeline 16 | snakemake \ 17 | --jobs 10 \ 18 | --keep-going \ 19 | --rerun-incomplete \ 20 | --cluster-config config/cluster.yaml \ 21 | --cluster "sbatch --partition {cluster.queue} --job-name={rule}.{wildcards} --mem={cluster.mem} --time={cluster.time} --ntasks={cluster.threads}" 22 | 23 | -------------------------------------------------------------------------------- /scripts/01.import-submit-slurm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=import-sequences #Set the job name to "JobExample2" 3 | #SBATCH --time=10:00:00 #Set the wall clock limit to 6hr and 30min 4 | #SBATCH --nodes=1 #Request 1 node 5 | #SBATCH --ntasks=1 #Request 1 tasks/cores per node 6 | #SBATCH --mem=1G #Request 1GB per node 7 | #SBATCH --output=import-seqs.o.%j #Send stdout/err to "Example2Out.[jobID]" 8 | #SBATCH --error=import-seqs.e.%j #Send std err to "Example2error.[jobID]" 9 | 10 | module purge 11 | module load iccifort/2020.1.217 12 | module load impi/2019.7.217 13 | module load snakemake/5.26.1-Python-3.8.2 14 | 15 | # import reads and check their quality to determine trunc lengths for dada2 16 | snakemake \ 17 | --jobs 10 \ 18 | --keep-going \ 19 | --rerun-incomplete \ 20 | --cluster-config config/cluster.yaml \ 21 | --cluster "sbatch --partition {cluster.queue} --mem={cluster.mem} --time={cluster.time} --ntasks={cluster.threads}" \ 22 | "04.QC/trimmed_reads_qual_viz.qzv" "04.QC/raw_reads_qual_viz.qzv" 23 | 24 | 25 | -------------------------------------------------------------------------------- /00.mapping/basins.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number treatment description 2 | 37A 37 Control Control_indoor_plants_washed_with_water 3 | 38A 38 Control Control_indoor_plants_washed_with_water 4 | 39A 39 Control Control_indoor_plants_washed_with_water 5 | 40A 40 Control Control_indoor_plants_washed_with_water 6 | 41A 41 Control Control_indoor_plants_washed_with_water 7 | 42A 42 Control Control_indoor_plants_washed_with_water 8 | 43A 43 Control Control_indoor_plants_washed_with_water 9 | 44A 44 Control Control_indoor_plants_washed_with_water 10 | 45A 45 Control Control_indoor_plants_washed_with_water 11 | 46A 46 B12_enriched Modified_Hinoman_medium_with_B12_enriched 12 | 47A 47 B12_enriched Modified_Hinoman_medium_with_B12_enriched 13 | 48A 48 B12_enriched Modified_Hinoman_medium_with_B12_enriched 14 | 49A 49 B12_enriched Modified_Hinoman_medium_with_B12_enriched 15 | 50A 50 B12_enriched Modified_Hinoman_medium_with_B12_enriched 16 | 51A 51 B12_enriched Modified_Hinoman_medium_with_B12_enriched 17 | 52A 52 B12_enriched Modified_Hinoman_medium_with_B12_enriched 18 | 53A 53 B12_enriched Modified_Hinoman_medium_with_B12_enriched 19 | 54A 54 B12_enriched Modified_Hinoman_medium_with_B12_enriched 20 | -------------------------------------------------------------------------------- /scripts/04.filter_rare-submit-slurm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=Taxa-plots #Set the job name to "JobExample2" 3 | #SBATCH --time=23:00:00 #Set the wall clock limit to 6hr and 30min 4 | #SBATCH --nodes=1 #Request 1 node 5 | #SBATCH --ntasks=1 #Request 1 tasks/cores per node 6 | #SBATCH --mem=1G #Request 1GB per node 7 | #SBATCH --output=tax-plots.o.%j #Send stdout/err to "Example2Out.[jobID]" 8 | #SBATCH --error=tax-plots.e.%j #Send std err to "Example2error.[jobID]" 9 | 10 | 11 | module purge 12 | module load iccifort/2020.1.217 13 | module load impi/2019.7.217 14 | module load snakemake/5.26.1-Python-3.8.2 15 | 16 | 17 | # Filter rare taxa and make relative abundance bar plots 18 | snakemake \ 19 | --jobs 10 \ 20 | --keep-going \ 21 | --rerun-incomplete \ 22 | --cluster-config config/cluster.yaml \ 23 | --cluster "sbatch --partition {cluster.queue} --job-name={rule}.{wildcards} --mem={cluster.mem} --time={cluster.time} --ntasks={cluster.threads}" \ 24 | "08.Filter_feature_table/filtered_table.qzv" "09.Taxa_bar_plots/group-bar-plot.qzv" "09.Taxa_bar_plots/samples-bar-plots.qzv" 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /scripts/03.filter_taxa-submit-slurm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=Assign-taxonomy #Set the job name to "JobExample2" 3 | #SBATCH --time=23:00:00 #Set the wall clock limit to 6hr and 30min 4 | #SBATCH --nodes=1 #Request 1 node 5 | #SBATCH --ntasks=1 #Request 1 tasks/cores per node 6 | #SBATCH --mem=1G #Request 1GB per node 7 | #SBATCH --output=assign-tax.o.%j #Send stdout/err to "Example2Out.[jobID]" 8 | #SBATCH --error=assign-tax.e.%j #Send std err to "Example2error.[jobID]" 9 | 10 | 11 | module purge 12 | module load iccifort/2020.1.217 13 | module load impi/2019.7.217 14 | module load snakemake/5.26.1-Python-3.8.2 15 | 16 | 17 | # Filter taxa - Examine "08.Filter_feature_table/taxa_filtered_table.qzv" to determine the threshold for filtering out rare taxa 18 | snakemake -pr \ 19 | --jobs 10 \ 20 | --keep-going \ 21 | --rerun-incomplete \ 22 | --cluster-config config/cluster.yaml \ 23 | --cluster "sbatch --partition {cluster.queue} --mem={cluster.mem} --time={cluster.time} --ntasks={cluster.threads}" \ 24 | "06.Assign_taxonomy/taxonomy.qzv" "07.Build_phylogenetic_tree/rooted-tree.qza" "08.Filter_feature_table/taxa_filtered_table.qzv" 25 | 26 | -------------------------------------------------------------------------------- /01.raw_data/MANIFEST: -------------------------------------------------------------------------------- 1 | sample-id,absolute-filepath,direction 2 | 40BHFBK1,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK1/40BHFBK1_R1.fastq.gz,forward 3 | 40BHFBK1,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK1/40BHFBK1_R2.fastq.gz,reverse 4 | 40BHFBK1_PF,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK1_PF/40BHFBK1_PF_R1.fastq.gz,forward 5 | 40BHFBK1_PF,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK1_PF/40BHFBK1_PF_R2.fastq.gz,reverse 6 | 40BHFBK2,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK2/40BHFBK2_R1.fastq.gz,forward 7 | 40BHFBK2,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK2/40BHFBK2_R2.fastq.gz,reverse 8 | 40BHFBK3,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK3/40BHFBK3_R1.fastq.gz,forward 9 | 40BHFBK3,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK3/40BHFBK3_R2.fastq.gz,reverse 10 | 40BHFBK3_PF,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK3_PF/40BHFBK3_PF_R1.fastq.gz,forward 11 | 40BHFBK3_PF,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK3_PF/40BHFBK3_PF_R2.fastq.gz,reverse 12 | 40BHFBK4,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK4/40BHFBK4_R1.fastq.gz,forward 13 | 40BHFBK4,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK4/40BHFBK4_R2.fastq.gz,reverse 14 | -------------------------------------------------------------------------------- /scripts/02.denoise-submit-slurm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH --job-name=dada-denoise #Set the job name to "JobExample2" 3 | #SBATCH --time=23:00:00 #Set the wall clock limit to 6hr and 30min 4 | #SBATCH --nodes=1 #Request 1 node 5 | #SBATCH --ntasks=1 #Request 1 tasks/cores per node 6 | #SBATCH --mem=1G #Request 1GB per node 7 | #SBATCH --output=dada-denoise.o.%j #Send stdout/err to "Example2Out.[jobID]" 8 | #SBATCH --error=dada-denoise.e.%j #Send std err to "Example2error.[jobID]" 9 | 10 | 11 | module purge 12 | module load iccifort/2020.1.217 13 | module load impi/2019.7.217 14 | module load snakemake/5.26.1-Python-3.8.2 15 | 16 | 17 | # Denoise reads - chimera removal, reads merging, quality trimming and ASV feature table generation take a good look at 05.Denoise_reads/denoise_stats.qzv to see if you didn't lose too many reads and if the reads merged well. If the denoizing was not sucessful, adjust the parameters you set for dada2 and then re-run 18 | snakemake \ 19 | --jobs 10 \ 20 | --keep-going \ 21 | --rerun-incomplete \ 22 | --cluster-config config/cluster.yaml \ 23 | --cluster "sbatch --partition {cluster.queue} --mem={cluster.mem} --time={cluster.time} --ntasks={cluster.threads}" \ 24 | "05.Denoise_reads/denoise_stats.qzv" "05.Denoise_reads/table_summary.qzv" "05.Denoise_reads/representative_sequences.qzv" 25 | 26 | 27 | -------------------------------------------------------------------------------- /scripts/classify_ASVs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N ASV_classify 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | set -e 11 | 12 | source activate qiime2-2020.6 13 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi' 14 | export TEMPDIR='/gpfs0/bioinfo/users/obayomi/hinuman_analysis/18S_illumina/tmp/' TMPDIR='/gpfs0/bioinfo/users/obayomi/hinuman_analysis/18S_illumina/tmp/' 15 | 16 | #IN_PREFIX=('03.dada_denoise/se' '03.dada_denoise/pear-joined' '03.deblur_denoise/se' '03.deblur_denoise/pear-joined') 17 | IN_PREFIX=('03.redo_dada_denoise/se' '03.redo_dada_denoise/pear-joined' '03.redo_dada_denoise/pe') 18 | 19 | #OUT_PREFIX=('04.assign_taxonomy/dada2/se' '04.assign_taxonomy/dada2/pear-joined' '04.assign_taxonomy/deblur/se' '04.assign_taxonomy/deblur/pear-joined') 20 | OUT_PREFIX=('04.redo_assign_taxonomy/dada2/se' '04.redo_assign_taxonomy/dada2/pear-joined' '04.redo_assign_taxonomy/dada2/pe' ) 21 | 22 | # Classify representative ASV sequences against a pre-trained SILVA database with Naive Bayes 23 | parallel --jobs 0 --link qiime feature-classifier classify-sklearn \ 24 | --i-classifier /gpfs0/bioinfo/users/obayomi/databases/q2_database/silva-138-99-nb-classifier.qza \ 25 | --i-reads {1}-representative_sequences.qza \ 26 | --o-classification {2}-taxonomy.qza ::: ${IN_PREFIX[*]} ::: ${OUT_PREFIX[*]} 27 | 28 | parallel --jobs 0 qiime metadata tabulate \ 29 | --m-input-file {}-taxonomy.qza \ 30 | --o-visualization {}-taxonomy.qzv ::: ${OUT_PREFIX[*]} 31 | -------------------------------------------------------------------------------- /slurm.mk: -------------------------------------------------------------------------------- 1 | .PHONY: import denoise assign_taxonomy plot complete clean 2 | 3 | complete: 4 | @echo "Running the complete pipeline. Quality rreports, Corediversity analysis, statistics and functional analysis" 5 | sbatch 05.run_complete-submit-slurm.sh 6 | 7 | import: 8 | @echo "Importing, trimming primers and adapters, and performing initial quality checks" 9 | @echo "Inspect the plots generated in 04.QC/trimmed_reads_qual_viz.qzv at https://view.qiime2.org/" 10 | sbatch 01.import-submit-slurm.sh 11 | 12 | denoise: 13 | @echo "Denoising your imported sequences" 14 | @echo "Inspect the table 05.Denoise_reads/denoise_stats.qzv at https://view.qiime2.org/" 15 | @echo "Edit the config/config.yaml file appropriately and re-run if many were lost after denoising." 16 | sbatch 02.denoise-submit-slurm.sh 17 | 18 | assign_taxonomy: 19 | @echo "Assigning taxonomy and filtering out non-target taxa" 20 | @echo "After this run completes" 21 | @echo "Examine 08.Filter_feature_table/taxa_filtered_table.qzv" 22 | @echo "To figure out the total number of sequences ('Total freqency') to be used to determine the minuminum frequency for filtering out rare taxa" 23 | @echo "Simply multiply the total number of sequences by your threshold for example 0.00005 (0.005 percent)" 24 | @echo "python -c print(1298206 * 0.00005) = 64.9103" 25 | @echo "Set the 'minimum_frequency' parmeter in config/config.yaml with the result of this calculation rounded up like so:" 26 | @echo "minimum_frequency: 65" 27 | sbatch 03.filter_taxa-submit-slurm.sh 28 | 29 | plot: 30 | @echo "Filtering out rare ASV and generating taxonomy plots" 31 | sbatch 04.filter_rare-submit-slurm.sh 32 | 33 | clean: 34 | rm slurm-* *.{e,o}.* 35 | 36 | -------------------------------------------------------------------------------- /scripts/tabulate-metadata.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | # associate the representative sequences with their taxonomic annotations 4 | qiime metadata tabulate \ 5 | --m-input-file rep-seqs.qza \ 6 | --m-input-file taxonomy.qza \ 7 | --o-visualization tabulated-feature-metadata.qzv 8 | 9 | 10 | # Metadata merging is supported anywhere that metadata is accepted in QIIME 2. For example, it might be interesting to color an Emperor plot based on the study metadata, or sample alpha diversity. This can be accomplished by providing both the sample metadata file and the SampleData[AlphaDiversity] artifact: 11 | qiime emperor plot \ 12 | --i-pcoa unweighted_unifrac_pcoa_results.qza \ 13 | --m-metadata-file sample-metadata.tsv \ 14 | --m-metadata-file faith_pd_vector.qza \ 15 | --o-visualization unweighted-unifrac-emperor-with-alpha.qzv 16 | 17 | 18 | 19 | # Merging metadata 20 | # Since metadata can come from many different sources, QIIME 2 supports metadata merging when running commands. Building upon the examples above, simply passing --m-input-file multiple times will combine the metadata columns in the specified files 21 | qiime metadata tabulate \ 22 | --m-input-file sample-metadata.tsv \ 23 | --m-input-file faith_pd_vector.qza \ 24 | --o-visualization tabulated-combined-metadata.qzv 25 | 26 | # To view an artifact as metadata, simply pass it in to any method or visualizer that expects to see metadata (e.g. metadata tabulate or emperor plot): 27 | qiime metadata tabulate \ 28 | --m-input-file faith_pd_vector.qza \ 29 | --o-visualization tabulated-faith-pd-metadata.qzv 30 | 31 | # Tabulate your mapping file with QIIME2 32 | qiime metadata tabulate \ 33 | --m-input-file sample-metadata.tsv \ 34 | --o-visualization tabulated-sample-metadata.qzv 35 | -------------------------------------------------------------------------------- /scripts/phylogeny_tree.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N make_tree 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | set -e 11 | 12 | source activate qiime2-2020.6 13 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi' 14 | #IN_PREFIX=('03.dada_denoise/se' '03.dada_denoise/pear-joined' '03.deblur_denoise/se' '03.deblur_denoise/pear-joined') 15 | IN_PREFIX=('03.redo_dada_denoise/se' '03.redo_dada_denoise/pear-joined' '03.redo_dada_denoise/pe' ) 16 | 17 | #OUT_PREFIX=('06.make_tree/dada2/se' '06.make_tree/dada2/pear-joined' '06.make_tree/deblur/se' '06.make_tree/deblur/pear-joined') 18 | OUT_PREFIX=('06.redo_make_tree/dada2/se' '06.redo_make_tree/dada2/pear-joined' '06.redo_make_tree/dada2/pe') 19 | 20 | # Make phylogenetic tree pipeline - all the below in one command 21 | parallel --jobs 0 --link qiime phylogeny align-to-tree-mafft-fasttree \ 22 | --i-sequences {1}-representative_sequences.qza \ 23 | --o-alignment {2}-aligned_representative_sequences.qza \ 24 | --o-masked-alignment {2}-masked_aligned_representative_sequences.qza \ 25 | --o-tree {2}-unrooted-tree.qza \ 26 | --o-rooted-tree {2}-rooted-tree.qza ::: ${IN_PREFIX[*]} ::: ${OUT_PREFIX[*]} 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | #Steps for generating a phylogenetic tree 35 | #qiime alignment mafft \ 36 | # --i-sequences representative_sequences.qza \ 37 | # --o-alignment aligned_representative_sequences 38 | 39 | #qiime alignment mask \ 40 | # --i-alignment aligned_representative_sequences.qza \ 41 | # --o-masked-alignment masked_aligned_representative_sequences 42 | 43 | #qiime phylogeny fasttree \ 44 | # --i-alignment masked_aligned_representative_sequences.qza \ 45 | # --o-tree unrooted_tree 46 | 47 | #qiime phylogeny midpoint-root \ 48 | # --i-tree unrooted_tree.qza \ 49 | # --o-rooted-tree rooted_tree 50 | -------------------------------------------------------------------------------- /scripts/deblur_denoize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N denoize_deblur 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | set -e 11 | 12 | source activate qiime2-2020.6 13 | 14 | #PREFIX="se" 15 | #TRUNC_LENGTH=280 16 | PREFIX="pear-joined" 17 | TRUNC_LENGTH=400 #587 18 | 19 | # initial quality filtering process based on quality scores 20 | qiime quality-filter q-score \ 21 | --i-demux 01.import/${PREFIX}-reads.qza \ 22 | --o-filtered-sequences 03.deblur_denoise/${PREFIX}-reads-filtered.qza \ 23 | --o-filter-stats 03.deblur_denoise/${PREFIX}-reads-filter-stats.qza 24 | 25 | qiime metadata tabulate \ 26 | --m-input-file 03.deblur_denoise/${PREFIX}-reads-filter-stats.qza \ 27 | --o-visualization 03.deblur_denoise/${PREFIX}-reads-filter-stats.qzv 28 | 29 | # Next, the Deblur workflow is applied using the qiime deblur denoise-16S method. This method requires one parameter that is used in quality filtering, --p-trim-length n which truncates the sequences at position n. In general, the Deblur developers recommend setting this value to a length where the median quality score begins to drop too low 30 | qiime deblur denoise-16S \ 31 | --i-demultiplexed-seqs 03.deblur_denoise/${PREFIX}-reads-filtered.qza \ 32 | --p-trim-length ${TRUNC_LENGTH} \ 33 | --o-representative-sequences 03.deblur_denoise/${PREFIX}-representative_sequences.qza \ 34 | --o-table 03.deblur_denoise/${PREFIX}-table.qza \ 35 | --p-sample-stats \ 36 | --o-stats 03.deblur_denoise/${PREFIX}-denoise_stats.qza 37 | 38 | qiime deblur visualize-stats \ 39 | --i-deblur-stats 03.deblur_denoise/${PREFIX}-denoise_stats.qza \ 40 | --o-visualization 03.deblur_denoise/${PREFIX}-denoise_stats.qzv 41 | 42 | 43 | qiime feature-table summarize \ 44 | --i-table 03.deblur_denoise/${PREFIX}-table.qza \ 45 | --o-visualization 03.deblur_denoise/${PREFIX}-table_summary.qzv 46 | 47 | 48 | qiime feature-table tabulate-seqs \ 49 | --i-data 03.deblur_denoise/${PREFIX}-representative_sequences.qza \ 50 | --o-visualization 03.deblur_denoise/${PREFIX}-representative_sequences.qzv 51 | -------------------------------------------------------------------------------- /rules/filter_samples.smk: -------------------------------------------------------------------------------- 1 | 2 | # Filter samples based on a provide metadata file 3 | rule Filter_samples: 4 | input: 5 | table=rules.Exclude_non_target_taxa.output.table_raw, 6 | metadata=config['metadata'] 7 | output: 8 | table_raw="08.Filter_feature_table/samples_filtered_table.qza", 9 | table_viz="08.Filter_feature_table/samples_filtered_table.qzv" 10 | log: "logs/Filter_samples/Filter_samples.log" 11 | threads: 1 12 | params: 13 | conda_activate=config["QIIME2_ENV"], 14 | minumum_frequency=config['minimum_frequency'] 15 | shell: 16 | """ 17 | set +u 18 | {params.conda_activate} 19 | set -u 20 | 21 | # Filter samples 22 | qiime feature-table filter-samples \ 23 | --i-table {input.table} \ 24 | --m-metadata-file {input.metadata} \ 25 | --o-filtered-table {output.table_raw} 26 | 27 | qiime feature-table summarize \ 28 | --i-table {output.table_raw} \ 29 | --o-visualization {output.table_viz} 30 | """ 31 | 32 | 33 | 34 | # Removing rare taxa i.e. features with abundance less the 0.005% 35 | rule Exclude_rare_taxa: 36 | input: 37 | rules.Filter_samples.output.table_raw 38 | output: 39 | table_raw="08.Filter_feature_table/filtered_table.qza", 40 | table_viz="08.Filter_feature_table/filtered_table.qzv" 41 | log: "logs//Exclude_singletons.log" 42 | threads: 1 43 | params: 44 | conda_activate=config["QIIME2_ENV"], 45 | minumum_frequency=config['minimum_frequency'] 46 | shell: 47 | """ 48 | set +u 49 | {params.conda_activate} 50 | set -u 51 | 52 | # Removing rare otus / features with abundance less the 0.005% 53 | qiime feature-table filter-features \ 54 | --i-table {input} \ 55 | --p-min-frequency {params.minumum_frequency} \ 56 | --o-filtered-table {output.table_raw} 57 | 58 | qiime feature-table summarize \ 59 | --i-table {output.table_raw} \ 60 | --o-visualization {output.table_viz} 61 | """ 62 | 63 | -------------------------------------------------------------------------------- /scripts/.bash_profile: -------------------------------------------------------------------------------- 1 | source /storage/SGE6U8/default/common/settings.sh 2 | 3 | #export SOURCETRACKER_PATH=/gpfs0/biores/users/gilloro/Biyi/SourceTracking/sourcetracker-1.0.1 4 | #Chimera slayer 5 | export PATH=/fastspace/bioinfo_apps/microbiomeutil-r20110519/ChimeraSlayer/:$PATH 6 | #vsearch 7 | export PATH=/fastspace/bioinfo_apps/vsearch/vsearch_v2.3.4/bin/:$PATH 8 | #pathogen analysis scripts 9 | #export PATH=/gpfs0/biores/users/gilloro/Biyi/pathogen_analysis/:$PATH 10 | #qiime 11 | #export PATH=/fastspace/bioinfo_apps/qiime/usr/local/bin/:$PATH 12 | #NCBI blast 13 | export PATH=/gpfs0/bioinfo/users/obayomi/ncbi-blast-2.3.0+/bin/:$PATH 14 | #qsub 15 | export PATH=/storage/SGE6U8/bin/lx24-amd64/:$PATH 16 | #all executables 17 | export PATH=/gpfs0/bioinfo/users/obayomi/bin/:$PATH 18 | #sra tolkit 19 | export PATH=/gpfs0/bioinfo/users/obayomi/sratoolkit.2.9.6-1-ubuntu64/bin/:$PATH 20 | #Diamond 0.7.11 21 | #export PATH=/fastspace/bioinfo_apps/Diamond/v0.7.11/:$PATH 22 | #MEGAN 23 | export PATH=/gpfs0/bioinfo/users/obayomi/megan/:$PATH 24 | #MEGAN commandline tools 25 | export PATH=/gpfs0/bioinfo/users/obayomi/megan/tools:$PATH 26 | #minimap2 for aligning long reads like nanopore 27 | export PATH=/gpfs0/bioinfo/users/obayomi/minimap2:$PATH 28 | #fastx tool kit for processing fasta and fastq files 29 | #export PATH=/gpfs0/biores/users/gilloro/Biyi/fastx_toolkit/bin:$PATH 30 | 31 | #centrifuge for metagenomic reads classification 32 | export PATH=/gpfs0/bioinfo/users/obayomi/centrifuge/:$PATH 33 | #microbiome helper 34 | export PATH=/gpfs0/bioinfo/users/obayomi/microbiome_helper/:$PATH 35 | # LAST 36 | export PATH=/gpfs0/bioinfo/users/obayomi/last-1021/src/:$PATH 37 | export PATH=/gpfs0/bioinfo/users/obayomi/last-1021/scripts/:$PATH 38 | # Kraken 39 | export PATH=/fastspace/bioinfo_apps/kraken/:$PATH 40 | #metaphlan2 41 | #export PATH=/gpfs0/bioinfo/users/obayomi/biobakery-metaphlan2-5bd7cd0e4854/:$PATH 42 | #miniconda 43 | export PATH=/gpfs0/bioinfo/users/obayomi/miniconda3/envs/python2/bin/:$PATH 44 | #set SGE_ROOT variable 45 | export SGE_ROOT=/storage/SGE6U8 46 | #HMM 47 | export PATH=/gpfs0/bioinfo/apps/HMMER/HMMER_v3.1b1/bin/:$PATH 48 | #metaBAT 49 | export PATH=/gpfs0/bioinfo/users/obayomi/metabat/:$PATH 50 | alias ll='ls --color=auto -alh' -------------------------------------------------------------------------------- /00.mapping/first_analysis/indoors-cntVsB12.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number batch pcr_cycles medium_or_plant sterile_plant grown_with_antibiotics location water_washed surface_sterilization b12_enriched treatment description 2 | 37A 37 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 3 | 38A 38 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 4 | 39A 39 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 5 | 40A 40 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 6 | 41A 41 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 7 | 42A 42 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 8 | 43A 43 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 9 | 44A 44 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 10 | 45A 45 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 11 | 46A 46 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 12 | 47A 47 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 13 | 48A 48 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 14 | 49A 49 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 15 | 50A 50 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 16 | 51A 51 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 17 | 52A 52 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 18 | 53A 53 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 19 | 54A 54 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 20 | -------------------------------------------------------------------------------- /00.mapping/indoors-edited.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number sterile_plant grown_with_antibiotics water_washed treatment description 2 | 9A-2 55 No No Yes water_washed water_washed_plant_with_pPNA_&_mPNA 3 | 10A-2 56 No No Yes water_washed water_washed_plant_with_pPNA_&_mPNA 4 | 11A-2 57 No No Yes water_washed water_washed_plant_with_pPNA_&_mPNA 5 | 12A-2 58 No No Yes water_washed water_washed_plant_with_pPNA_&_mPNA 6 | 23A-2 69 No No Yes water_washed water_washed_plant_with_pPNA_&_mPNA 7 | 25A-2 70 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 8 | 26A-2 71 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 9 | 27A-2 72 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 10 | 28A-2 73 Yes No No sterile sterile_plant_with_pPNA_&_mPNA 11 | 29A-2 74 Yes No No sterile sterile_plant_with_pPNA_&_mPNA 12 | 30A-2 75 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 13 | 31A-2 76 Yes No No sterile sterile_plant_with_pPNA_&_mPNA 14 | 32A-2 77 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 15 | 33A-2 78 Yes No No sterile sterile_plant_with_pPNA_&_mPNA 16 | 34A-2 79 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 17 | 35A-2 80 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 18 | 36A-2 81 Yes Yes Yes water_washedSterile_antibiotic water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 19 | 37A-2 82 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 20 | 38A-2 83 Yes Yes Yes water_washedSterile_antibiotic water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 21 | 39A-2 84 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 22 | 40A-2 85 Yes Yes Yes water_washedSterile_antibiotic water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 23 | 41A-2 86 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 24 | 42A-2 87 Yes Yes Yes water_washedSterile_antibiotic water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 25 | 43A-2 88 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 26 | 44A-2 89 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic 27 | -------------------------------------------------------------------------------- /00.mapping/indoors.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number sterile_plant grown_with_antibiotics water_washed treatment description 2 | 9A-2 55 No No Yes water_washed water_washed_plant_with_pPNA_&_mPNA 3 | 10A-2 56 No No Yes water_washed water_washed_plant_with_pPNA_&_mPNA 4 | 11A-2 57 No No Yes water_washed water_washed_plant_with_pPNA_&_mPNA 5 | 12A-2 58 No No Yes water_washed water_washed_plant_with_pPNA_&_mPNA 6 | 23A-2 69 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 7 | 25A-2 70 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 8 | 26A-2 71 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 9 | 27A-2 72 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 10 | 28A-2 73 Yes No No sterile sterile_plant_with_pPNA_&_mPNA 11 | 29A-2 74 Yes No No sterile sterile_plant_with_pPNA_&_mPNA 12 | 30A-2 75 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 13 | 31A-2 76 Yes No No sterile sterile_plant_with_pPNA_&_mPNA 14 | 32A-2 77 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 15 | 33A-2 78 Yes No No sterile sterile_plant_with_pPNA_&_mPNA 16 | 34A-2 79 Yes No Yes water_washed_sterile water_washed_sterile_plant_with_pPNA_&_mPNA 17 | 35A-2 80 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 18 | 36A-2 81 Yes Yes Yes water_washedSterile_antibiotic water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 19 | 37A-2 82 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 20 | 38A-2 83 Yes Yes Yes water_washedSterile_antibiotic water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 21 | 39A-2 84 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 22 | 40A-2 85 Yes Yes Yes water_washedSterile_antibiotic water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 23 | 41A-2 86 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 24 | 42A-2 87 Yes Yes Yes water_washedSterile_antibiotic water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 25 | 43A-2 88 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 26 | 44A-2 89 Yes Yes No sterile_antibiotic sterile_plant_grown_with_antibiotic -------------------------------------------------------------------------------- /create_DB/.snakemake/log/2021-05-26T114454.315556.snakemake.log: -------------------------------------------------------------------------------- 1 | Building DAG of jobs... 2 | Job counts: 3 | count jobs 4 | 1 Download_silva_database 5 | 1 Download_unite_database 6 | 1 Extract_primer_silva_reads 7 | 1 Import_unite_sequences 8 | 1 Import_unite_taxonomy 9 | 1 Train_silva_classifier 10 | 1 Train_unite_classifier 11 | 1 Unzip_unite_DB 12 | 1 all 13 | 1 make_logs_directories 14 | 10 15 | 16 | [Wed May 26 11:45:01 2021] 17 | rule make_logs_directories: 18 | output: logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier, logs/Train_silva_classifier 19 | jobid: 1 20 | reason: Missing output files: logs/Train_silva_classifier, logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier 21 | 22 | 23 | [ -d logs/ ] || mkdir -p logs/ 24 | cd logs/ 25 | for RULE in Download_silva_database Extract_primer_silva_reads Train_silva_classifier Download_unite_database Unzip_unite_DB Import_unite_sequences Import_unite_taxonomy Import_unite_taxonomy Train_unite_classifier; do 26 | [ -d ${RULE}/ ] || mkdir -p ${RULE}/ 27 | done 28 | 29 | 30 | [Wed May 26 11:45:01 2021] 31 | rule Download_unite_database: 32 | input: logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier, logs/Train_silva_classifier 33 | output: 00.database/unite.gz 34 | jobid: 8 35 | reason: Missing output files: 00.database/unite.gz; Input files updated by another job: logs/Train_silva_classifier, logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier 36 | 37 | wget -O 00.database/unite.gz https://files.plutof.ut.ee/public/orig/98/AE/98AE96C6593FC9C52D1C46B96C2D9064291F4DBA625EF189FEC1CCAFCF4A1691.gz 38 | 39 | [Wed May 26 11:45:01 2021] 40 | rule Download_silva_database: 41 | input: logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier, logs/Train_silva_classifier 42 | output: 00.database/silva-138-99-nb-classifier.qza, 00.database/silva-138-99-seqs.qza, 00.database/silva-138-99-tax.qza 43 | jobid: 4 44 | reason: Missing output files: 00.database/silva-138-99-tax.qza, 00.database/silva-138-99-seqs.qza; Input files updated by another job: logs/Train_silva_classifier, logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier 45 | 46 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/indoors-cntVsB12.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number batch pcr_cycles medium_or_plant sterile_plant grown_with_antibiotics location water_washed surface_sterilization b12_enriched treatment description 2 | Osnat036-37-A 37 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 3 | Osnat037-38-A 38 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 4 | Osnat038-39-A 39 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 5 | Osnat039-40-A 40 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 6 | Osnat040-41-A 41 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 7 | Osnat041-42-A 42 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 8 | Osnat042-43-A 43 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 9 | Osnat043-44-A 44 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 10 | Osnat044-45-A 45 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 11 | Osnat045-46-A 46 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 12 | Osnat046-47-A 47 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 13 | Osnat047-48-A 48 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 14 | Osnat048-49-A 49 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 15 | Osnat049-50-A 50 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 16 | Osnat050-51-A 51 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 17 | Osnat051-52-A 52 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 18 | Osnat052-53-A 53 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 19 | Osnat053-54-A 54 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 20 | -------------------------------------------------------------------------------- /scripts/generate-krona.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import zipfile 4 | 5 | #extract level-7.csv from taxa barplots 6 | def unzip(qzv_file): 7 | with zipfile.ZipFile(qzv_file) as zip: 8 | for zip_info in zip.infolist(): 9 | if "level-7.csv" in zip_info.filename: 10 | zip_info.filename=os.path.basename(zip_info.filename) 11 | zip.extract(zip_info) 12 | 13 | #create tsv files which Krona likes 14 | def make_tsv(name): 15 | tsv=open("krona-tsv/"+name+".tsv","w+") 16 | tsv.write(name+"\n") 17 | for i in range(0,len(new)): 18 | tsv.write(data_dict[name][i]+"\t"+new[i]+"\n") 19 | tsv.close() 20 | 21 | #this is my base output. you can change it to anything you wish. 22 | unzip("taxa-bar-plots.qzv") 23 | 24 | #this folder will be deleted in the end of the process 25 | if not os.path.exists("krona-tsv"): 26 | os.makedirs("krona-tsv") 27 | 28 | #this folder will have the last output 29 | if not os.path.exists("Krona"): 30 | os.makedirs("Krona") 31 | 32 | file=open("level-7.csv","r") 33 | 34 | lines=file.readlines() 35 | 36 | file.close() 37 | 38 | #remove the file since we don't need it anymore 39 | os.system("rm "+"level-7.csv") 40 | 41 | taxa=[] 42 | new=[] 43 | sample_names=[] 44 | data_dict={} 45 | for line in lines: 46 | line=line.strip().split(",") 47 | if line[0]=="index": 48 | for i in line: 49 | if ";" in i: 50 | taxa.append(i) 51 | elif i.startswith("Unassigned"): 52 | taxa.append("Unassigned") 53 | 54 | else: 55 | data=[] 56 | sample_names.append(line[0]) 57 | for value in line: 58 | if any(i.isalpha() for i in value)==True: 59 | pass 60 | else: 61 | value=value.split(".") 62 | data.append(value[0]) 63 | data_dict[line[0]]=data 64 | 65 | #Regex for SILVA and greengenes. I don't like to see the prefix they add. 66 | #There is no harm with other databases. If you want to leave them be, just remove first two lines after "for" loop. 67 | for x in taxa: 68 | x=re.sub("D_\d__","",x) 69 | x=re.sub("\w__","",x) 70 | new.append(x.replace(";","\t")) 71 | 72 | for sample in sample_names: 73 | make_tsv(sample) 74 | 75 | #This part runs Krona and removes tsv files we created. 76 | #You can change the output as you wish. 77 | os.system("ktImportText krona-tsv/* -o Krona/krona.html") 78 | os.system("rm -r krona-tsv") 79 | -------------------------------------------------------------------------------- /local.mk: -------------------------------------------------------------------------------- 1 | .PHONY: import denoise assign_taxonomy plot complete clean upload download 2 | 3 | complete: 4 | @echo "Running the complete pipeline. Quality reports, Corediversity analysis, statistics and functional analysis" 5 | snakemake -pr --cores 10 --keep-going --rerun-incomplete 6 | 7 | import: 8 | @echo "Importing, trimming primers and adapters, and performing initial quality checks" 9 | @echo "Inspect the plots generated in 04.QC/trimmed_reads_qual_viz.qzv at https://view.qiime2.org/" 10 | snakemake -pr --cores 10 --keep-going --rerun-incomplete "04.QC/trimmed_reads_qual_viz.qzv" "04.QC/raw_reads_qual_viz.qzv" 11 | 12 | denoise: 13 | @echo "Denoising your imported sequences" 14 | @echo "Inspect the table 05.Denoise_reads/denoise_stats.qzv at https://view.qiime2.org/" 15 | @echo "Edit the config/config.yaml file appropriately and re-run if many were lost after denoising." 16 | snakemake -pr --cores 10 --keep-going --rerun-incomplete "05.Denoise_reads/denoise_stats.qzv" "05.Denoise_reads/table_summary.qzv" "05.Denoise_reads/representative_sequences.qzv" 17 | 18 | assign_taxonomy: 19 | @echo "Assigning taxonomy and filtering out non-target taxa" 20 | @echo "After this run completes" 21 | @echo "Examine 08.Filter_feature_table/taxa_filtered_table.qzv" 22 | @echo "To figure out the total number of sequences ('Total freqency') to be used to determine the minuminum frequency for filtering out rare taxa" 23 | @echo "Simply multiply the total number of sequences by your threshold for example 0.00005 (0.005 percent)" 24 | @echo "python -c print(1298206 * 0.00005) = 64.9103" 25 | @echo "Set the 'minimum_frequency' parmeter in config/config.yaml with the result of this calculation rounded up like so:" 26 | @echo "minimum_frequency: 65" 27 | snakemake -pr --cores 10 --keep-going --rerun-incomplete "06.Assign_taxonomy/taxonomy.qzv" "07.Build_phylogenetic_tree/rooted-tree.qza" "08.Filter_feature_table/taxa_filtered_table.qzv" 28 | 29 | plot: 30 | @echo "Filtering out rare ASV and generating taxonomy plots" 31 | snakemake -pr --cores 10 --keep-going --rerun-incomplete "08.Filter_feature_table/filtered_table.qzv" "09.Taxa_bar_plots/group-bar-plot.qzv" "09.Taxa_bar_plots/samples-bar-plots.qzv" 32 | 33 | upload: 34 | @echo "Copying the denoising folder to HPRC for taxonomy assignment" 35 | scp -r 05.Denoise_reads/ obayomi@grace.tamu.edu:/scratch/user/obayomi/projects/amplicon_sequencing/Guay 36 | 37 | download: 38 | @echo "Downloading the assign taxonomy folder from HPRC" 39 | scp -r obayomi@grace.tamu.edu:/scratch/user/obayomi/projects/amplicon_sequencing/Guay/06.Assign_taxonomy/ . 40 | -------------------------------------------------------------------------------- /scripts/default_variables.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N diversity_analysis 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | 11 | set -e 12 | 13 | source activate qiime2-2020.6 14 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi' 15 | 16 | TREE=('06.make_tree/dada2' '06.make_tree/dada2' '06.make_tree/deblur' '06.make_tree/deblur' '06.make_tree/dada2' '06.make_tree/dada2' '06.make_tree/deblur' '06.make_tree/deblur' '06.make_tree/dada2' '06.make_tree/dada2' '06.make_tree/deblur' '06.make_tree/deblur' '06.make_tree/dada2' '06.make_tree/dada2' '06.make_tree/deblur' '06.make_tree/deblur') 17 | 18 | DEPTH=(1201 1035 1003 501 1201 1276 617 480 3116 989 726 400 2140 2115 1484 1260) 19 | 20 | FEATURE_TABLE_DIR=('05.filter_table/dada2' '05.filter_table/dada2' '05.filter_table/deblur/' '05.filter_table/deblur/' '05.filter_table/dada2/indoors' '05.filter_table/dada2/indoors' '05.filter_table/deblur/indoors' '05.filter_table/deblur/indoors' '05.filter_table/dada2/outdoors' '05.filter_table/dada2/outdoors' '05.filter_table/deblur/outdoors' '05.filter_table/deblur/outdoors' '05.filter_table/dada2/mock' '05.filter_table/dada2/mock' '05.filter_table/deblur/mock' '05.filter_table/deblur/mock') 21 | 22 | PREFIX=('se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined') 23 | 24 | METADATA=('00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv') 25 | 26 | OUT_DIR=('08.core_diversity/dada2' '08.core_diversity/dada2' '08.core_diversity/deblur' '08.core_diversity/deblur' '08.core_diversity/dada2/indoors' '08.core_diversity/dada2/indoors' '08.core_diversity/deblur/indoors' '08.core_diversity/deblur/indoors' '08.core_diversity/dada2/outdoors' '08.core_diversity/dada2/outdoors' '08.core_diversity/deblur/outdoors' '08.core_diversity/deblur/outdoors' '08.core_diversity/dada2/mock' '08.core_diversity/dada2/mock' '08.core_diversity/deblur/mock' '08.core_diversity/deblur/mock') 27 | 28 | METADATA_COLUMN=('treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment') 29 | 30 | -------------------------------------------------------------------------------- /scripts/dada2_denoize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N denoize_dada2 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | set -e 11 | 12 | source activate qiime2-2020.6 13 | 14 | PAIRED='false' 15 | TRIM_LEFT=0 16 | TRUNC_LENGTH=400 17 | #TRUNC_LENGTH=280 18 | #PREFIX="se" 19 | #PREFIX="pe" 20 | PREFIX="pear-joined" 21 | 22 | if [ "${PAIRED}" != "true" ]; then 23 | 24 | # Denoise, truncate and assign ASVs 25 | qiime dada2 denoise-single \ 26 | --i-demultiplexed-seqs 01.import/${PREFIX}-reads.qza \ 27 | --p-trim-left ${TRIM_LEFT} \ 28 | --p-trunc-len ${TRUNC_LENGTH} \ 29 | --o-representative-sequences 03.dada_denoise/${PREFIX}-representative_sequences.qza \ 30 | --o-table 03.dada_denoise/${PREFIX}-table.qza \ 31 | --o-denoising-stats 03.dada_denoise/${PREFIX}-denoise_stats.qza 32 | 33 | 34 | qiime feature-table summarize \ 35 | --i-table 03.dada_denoise/${PREFIX}-table.qza \ 36 | --o-visualization 03.dada_denoise/${PREFIX}-table_summary.qzv 37 | 38 | 39 | qiime feature-table tabulate-seqs \ 40 | --i-data 03.dada_denoise/${PREFIX}-representative_sequences.qza \ 41 | --o-visualization 03.dada_denoise/${PREFIX}-representative_sequences.qzv 42 | 43 | 44 | 45 | qiime metadata tabulate \ 46 | --m-input-file 03.dada_denoise/${PREFIX}-denoise_stats.qza \ 47 | --o-visualization 03.dada_denoise/${PREFIX}-denoise_stats.qzv 48 | 49 | else 50 | 51 | qiime dada2 denoise-paired \ 52 | --i-demultiplexed-seqs 01.import/reads.qza \ 53 | --o-table 03.dada_denoise/${PREFIX}-table.qza \ 54 | --o-representative-sequences 03.dada_denoise/${PREFIX}-representative_sequences.qza \ 55 | --o-denoising-stats 03.dada_denoise/${PREFIX}-denoise_stats.qza \ 56 | --p-trunc-len-f ${TRUNC_LENGTH} \ 57 | --p-trunc-len-r ${TRUNC_LENGTH} \ 58 | --p-trim-left-f ${TRIM_LEFT} \ 59 | --p-trim-left-r ${TRIM_LEFT} \ 60 | --p-n-threads 30 61 | 62 | 63 | # This visualization shows us the sequences per sample spread - to determine minimum number for rarefaction 64 | # and sequences per feature (OTU or ASV) 65 | qiime feature-table summarize \ 66 | --i-table 03.dada_denoise/${PREFIX}-table.qza \ 67 | --o-visualization 03.dada_denoise/${PREFIX}-table_summary.qzv 68 | 69 | 70 | qiime feature-table tabulate-seqs \ 71 | --i-data 03.dada_denoise/${PREFIX}-representative_sequences.qza \ 72 | --o-visualization 03.dada_denoise/${PREFIX}-representative_sequences.qzv 73 | 74 | 75 | qiime metadata tabulate \ 76 | --m-input-file 03.dada_denoise/${PREFIX}-denoise_stats.qza \ 77 | --o-visualization 03.dada_denoise/${PREFIX}-denoise_stats.qzv 78 | 79 | fi 80 | -------------------------------------------------------------------------------- /create_DB/config/sample.tsv: -------------------------------------------------------------------------------- 1 | SampleID Type Prefix Direction Old_name New_name 2 | A Forward A_DKDL210000007-1a_HFL3FCCX2_L6 _1 01.raw_data/A/A_DKDL210000007-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/A/A_1.fq.gz 3 | A Reverse A_DKDL210000007-1a_HFL3FCCX2_L6 _2 01.raw_data/A/A_DKDL210000007-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/A/A_2.fq.gz 4 | A1 Forward A1_DKDL210000015-1a_HFL3FCCX2_L6 _1 01.raw_data/A1/A1_DKDL210000015-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/A1/A1_1.fq.gz 5 | A1 Reverse A1_DKDL210000015-1a_HFL3FCCX2_L6 _2 01.raw_data/A1/A1_DKDL210000015-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/A1/A1_2.fq.gz 6 | A2 Forward A2_DKDL210000016-1a_HFL3FCCX2_L6 _1 01.raw_data/A2/A2_DKDL210000016-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/A2/A2_1.fq.gz 7 | A2 Reverse A2_DKDL210000016-1a_HFL3FCCX2_L6 _2 01.raw_data/A2/A2_DKDL210000016-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/A2/A2_2.fq.gz 8 | B Forward B_DKDL210000012-1a_HFL3FCCX2_L6 _1 01.raw_data/B/B_DKDL210000012-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/B/B_1.fq.gz 9 | B Reverse B_DKDL210000012-1a_HFL3FCCX2_L6 _2 01.raw_data/B/B_DKDL210000012-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/B/B_2.fq.gz 10 | L2 Forward L2_DKDL210000008-1a_HFL3FCCX2_L6 _1 01.raw_data/L2/L2_DKDL210000008-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L2/L2_1.fq.gz 11 | L2 Reverse L2_DKDL210000008-1a_HFL3FCCX2_L6 _2 01.raw_data/L2/L2_DKDL210000008-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L2/L2_2.fq.gz 12 | L3 Forward L3_DKDL210000009-1a_HFL3FCCX2_L6 _1 01.raw_data/L3/L3_DKDL210000009-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L3/L3_1.fq.gz 13 | L3 Reverse L3_DKDL210000009-1a_HFL3FCCX2_L6 _2 01.raw_data/L3/L3_DKDL210000009-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L3/L3_2.fq.gz 14 | L4 Forward L4_DKDL210000010-1a_HFL3FCCX2_L6 _1 01.raw_data/L4/L4_DKDL210000010-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L4/L4_1.fq.gz 15 | L4 Reverse L4_DKDL210000010-1a_HFL3FCCX2_L6 _2 01.raw_data/L4/L4_DKDL210000010-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L4/L4_2.fq.gz 16 | L5 Forward L5_DKDL210000011-1a_HFL3FCCX2_L6 _1 01.raw_data/L5/L5_DKDL210000011-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L5/L5_1.fq.gz 17 | L5 Reverse L5_DKDL210000011-1a_HFL3FCCX2_L6 _2 01.raw_data/L5/L5_DKDL210000011-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L5/L5_2.fq.gz 18 | L7 Forward L7_DKDL210000013-1a_HFL3FCCX2_L6 _1 01.raw_data/L7/L7_DKDL210000013-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L7/L7_1.fq.gz 19 | L7 Reverse L7_DKDL210000013-1a_HFL3FCCX2_L6 _2 01.raw_data/L7/L7_DKDL210000013-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L7/L7_2.fq.gz 20 | L8 Forward L8_DKDL210000014-1a_HFL3FCCX2_L6 _1 01.raw_data/L8/L8_DKDL210000014-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L8/L8_1.fq.gz 21 | L8 Reverse L8_DKDL210000014-1a_HFL3FCCX2_L6 _2 01.raw_data/L8/L8_DKDL210000014-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L8/L8_2.fq.gz 22 | -------------------------------------------------------------------------------- /create_DB/eukaryote-unite/config/sample.tsv: -------------------------------------------------------------------------------- 1 | SampleID Type Prefix Direction Old_name New_name 2 | A Forward A_DKDL210000007-1a_HFL3FCCX2_L6 _1 01.raw_data/A/A_DKDL210000007-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/A/A_1.fq.gz 3 | A Reverse A_DKDL210000007-1a_HFL3FCCX2_L6 _2 01.raw_data/A/A_DKDL210000007-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/A/A_2.fq.gz 4 | A1 Forward A1_DKDL210000015-1a_HFL3FCCX2_L6 _1 01.raw_data/A1/A1_DKDL210000015-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/A1/A1_1.fq.gz 5 | A1 Reverse A1_DKDL210000015-1a_HFL3FCCX2_L6 _2 01.raw_data/A1/A1_DKDL210000015-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/A1/A1_2.fq.gz 6 | A2 Forward A2_DKDL210000016-1a_HFL3FCCX2_L6 _1 01.raw_data/A2/A2_DKDL210000016-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/A2/A2_1.fq.gz 7 | A2 Reverse A2_DKDL210000016-1a_HFL3FCCX2_L6 _2 01.raw_data/A2/A2_DKDL210000016-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/A2/A2_2.fq.gz 8 | B Forward B_DKDL210000012-1a_HFL3FCCX2_L6 _1 01.raw_data/B/B_DKDL210000012-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/B/B_1.fq.gz 9 | B Reverse B_DKDL210000012-1a_HFL3FCCX2_L6 _2 01.raw_data/B/B_DKDL210000012-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/B/B_2.fq.gz 10 | L2 Forward L2_DKDL210000008-1a_HFL3FCCX2_L6 _1 01.raw_data/L2/L2_DKDL210000008-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L2/L2_1.fq.gz 11 | L2 Reverse L2_DKDL210000008-1a_HFL3FCCX2_L6 _2 01.raw_data/L2/L2_DKDL210000008-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L2/L2_2.fq.gz 12 | L3 Forward L3_DKDL210000009-1a_HFL3FCCX2_L6 _1 01.raw_data/L3/L3_DKDL210000009-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L3/L3_1.fq.gz 13 | L3 Reverse L3_DKDL210000009-1a_HFL3FCCX2_L6 _2 01.raw_data/L3/L3_DKDL210000009-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L3/L3_2.fq.gz 14 | L4 Forward L4_DKDL210000010-1a_HFL3FCCX2_L6 _1 01.raw_data/L4/L4_DKDL210000010-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L4/L4_1.fq.gz 15 | L4 Reverse L4_DKDL210000010-1a_HFL3FCCX2_L6 _2 01.raw_data/L4/L4_DKDL210000010-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L4/L4_2.fq.gz 16 | L5 Forward L5_DKDL210000011-1a_HFL3FCCX2_L6 _1 01.raw_data/L5/L5_DKDL210000011-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L5/L5_1.fq.gz 17 | L5 Reverse L5_DKDL210000011-1a_HFL3FCCX2_L6 _2 01.raw_data/L5/L5_DKDL210000011-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L5/L5_2.fq.gz 18 | L7 Forward L7_DKDL210000013-1a_HFL3FCCX2_L6 _1 01.raw_data/L7/L7_DKDL210000013-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L7/L7_1.fq.gz 19 | L7 Reverse L7_DKDL210000013-1a_HFL3FCCX2_L6 _2 01.raw_data/L7/L7_DKDL210000013-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L7/L7_2.fq.gz 20 | L8 Forward L8_DKDL210000014-1a_HFL3FCCX2_L6 _1 01.raw_data/L8/L8_DKDL210000014-1a_HFL3FCCX2_L6_1.fq.gz 01.raw_data/L8/L8_1.fq.gz 21 | L8 Reverse L8_DKDL210000014-1a_HFL3FCCX2_L6 _2 01.raw_data/L8/L8_DKDL210000014-1a_HFL3FCCX2_L6_2.fq.gz 01.raw_data/L8/L8_2.fq.gz 22 | -------------------------------------------------------------------------------- /config/sample.tsv: -------------------------------------------------------------------------------- 1 | SampleID Type Old_name New_name 2 | A1 Forward 01.raw_data/A1_1ILC_S33_L005_R1_001.fastq.gz 01.raw_data/A1.fastq.gz 3 | A2 Forward 01.raw_data/A2_9IL7_S41_L006_R1_001.fastq.gz 01.raw_data/A2.fastq.gz 4 | A3 Forward 01.raw_data/A3_17IRC_S49_L007_R1_001.fastq.gz 01.raw_data/A3.fastq.gz 5 | A4 Forward 01.raw_data/A4_25IR7_S57_L008_R1_001.fastq.gz 01.raw_data/A4.fastq.gz 6 | B1 Forward 01.raw_data/B1_2DLC_S34_L005_R1_001.fastq.gz 01.raw_data/B1.fastq.gz 7 | B2 Forward 01.raw_data/B2_10DL7_S42_L006_R1_001.fastq.gz 01.raw_data/B2.fastq.gz 8 | B3 Forward 01.raw_data/B3_18DRC_S50_L007_R1_001.fastq.gz 01.raw_data/B3.fastq.gz 9 | B4 Forward 01.raw_data/B4_26DR7_S58_L008_R1_001.fastq.gz 01.raw_data/B4.fastq.gz 10 | C1 Forward 01.raw_data/C1_3ILC_S35_L005_R1_001.fastq.gz 01.raw_data/C1.fastq.gz 11 | C2 Forward 01.raw_data/C2_11IL7_S43_L006_R1_001.fastq.gz 01.raw_data/C2.fastq.gz 12 | C3 Forward 01.raw_data/C3_19IRC_S51_L007_R1_001.fastq.gz 01.raw_data/C3.fastq.gz 13 | C4 Forward 01.raw_data/C4_27IR7_S59_L008_R1_001.fastq.gz 01.raw_data/C4.fastq.gz 14 | D1 Forward 01.raw_data/D1_4DLC_S36_L005_R1_001.fastq.gz 01.raw_data/D1.fastq.gz 15 | D2 Forward 01.raw_data/D2_12DL7_S44_L006_R1_001.fastq.gz 01.raw_data/D2.fastq.gz 16 | D3 Forward 01.raw_data/D3_20DRC_S52_L007_R1_001.fastq.gz 01.raw_data/D3.fastq.gz 17 | D4 Forward 01.raw_data/D4_28DR7_S60_L008_R1_001.fastq.gz 01.raw_data/D4.fastq.gz 18 | E1 Forward 01.raw_data/E1_5ILC_S37_L005_R1_001.fastq.gz 01.raw_data/E1.fastq.gz 19 | E2 Forward 01.raw_data/E2_13IL7_S45_L006_R1_001.fastq.gz 01.raw_data/E2.fastq.gz 20 | E3 Forward 01.raw_data/E3_21IRC_S53_L007_R1_001.fastq.gz 01.raw_data/E3.fastq.gz 21 | E4 Forward 01.raw_data/E4_29IR7_S61_L008_R1_001.fastq.gz 01.raw_data/E4.fastq.gz 22 | F1 Forward 01.raw_data/F1_6DLC_S38_L005_R1_001.fastq.gz 01.raw_data/F1.fastq.gz 23 | F2 Forward 01.raw_data/F2_14DL7_S46_L006_R1_001.fastq.gz 01.raw_data/F2.fastq.gz 24 | F3 Forward 01.raw_data/F3_22DRC_S54_L007_R1_001.fastq.gz 01.raw_data/F3.fastq.gz 25 | F4 Forward 01.raw_data/F4_30DR7_S62_L008_R1_001.fastq.gz 01.raw_data/F4.fastq.gz 26 | G1 Forward 01.raw_data/G1_7ILC_S39_L005_R1_001.fastq.gz 01.raw_data/G1.fastq.gz 27 | G2 Forward 01.raw_data/G2_15IL7_S47_L006_R1_001.fastq.gz 01.raw_data/G2.fastq.gz 28 | G3 Forward 01.raw_data/G3_23IRC_S55_L007_R1_001.fastq.gz 01.raw_data/G3.fastq.gz 29 | G4 Forward 01.raw_data/G4_31IR7_S63_L008_R1_001.fastq.gz 01.raw_data/G4.fastq.gz 30 | H1 Forward 01.raw_data/H1_8DLC_S40_L005_R1_001.fastq.gz 01.raw_data/H1.fastq.gz 31 | H2 Forward 01.raw_data/H2_16DL7_S48_L006_R1_001.fastq.gz 01.raw_data/H2.fastq.gz 32 | H3 Forward 01.raw_data/H3_24DRC_S56_L007_R1_001.fastq.gz 01.raw_data/H3.fastq.gz 33 | H4 Forward 01.raw_data/H4_32DR7_S64_L008_R1_001.fastq.gz 01.raw_data/H4.fastq.gz 34 | -------------------------------------------------------------------------------- /scripts/krona-arg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import zipfile 4 | import argparse 5 | 6 | #argparse 7 | parser = argparse.ArgumentParser() 8 | 9 | parser.add_argument("--input","-i",help="visualized collapsed taxa (qzv)") 10 | parser.add_argument("--output","-o",help="name of krona output (better have .html)") 11 | parser.add_argument("--exclude","-e",help="exclude sample list (add samples seperated by comma(,))") 12 | parser.add_argument("--regex","-r",help="allow regex applied or not (default: True)") 13 | 14 | args=parser.parse_args() 15 | 16 | if args.input: 17 | input=args.input 18 | 19 | if args.output: 20 | output=args.output 21 | 22 | if args.exclude: 23 | excludelist=args.exclude.split(",") 24 | 25 | if args.regex: 26 | regex=args.regex 27 | else: 28 | regex=True 29 | 30 | #extract metadata.tsv from collapsed taxa 31 | def unzip(qzv_file): 32 | with zipfile.ZipFile(qzv_file) as zip: 33 | for zip_info in zip.infolist(): 34 | if "data/metadata.tsv" in zip_info.filename: 35 | zip_info.filename=os.path.basename(zip_info.filename) 36 | zip.extract(zip_info) 37 | 38 | #create tsv files which Krona likes 39 | def make_tsv(name): 40 | tsv=open("krona-tsv/"+name+".tsv","w+") 41 | tsv.write(name) 42 | for i in range(0,len(new)): 43 | tsv.write("\n"+data_dict[name][i]+"\t"+new[i]) 44 | tsv.close() 45 | 46 | unzip(input) 47 | 48 | #this folder will be deleted in the end of the process 49 | if not os.path.exists("krona-tsv"): 50 | os.makedirs("krona-tsv") 51 | 52 | file=open("metadata.tsv","r") 53 | 54 | lines=file.readlines() 55 | 56 | file.close() 57 | 58 | #remove the file since we don't need it anymore 59 | os.system("rm "+"metadata.tsv") 60 | 61 | new=[] 62 | sample_names=[] 63 | data_dict={} 64 | 65 | taxa=lines[0].split("\t") 66 | taxa=taxa[1:] 67 | 68 | lines.pop(0) 69 | lines.pop(0) 70 | 71 | for line in lines: 72 | line=line.strip().split("\t") 73 | data=[] 74 | if line[0] in excludelist: 75 | continue 76 | else: 77 | sample_names.append(line[0]) 78 | for value in line: 79 | if any(i.isalpha() for i in value)==True: 80 | pass 81 | else: 82 | data.append(value) 83 | data_dict[line[0]]=data 84 | print(data_dict) 85 | 86 | #Regex for SILVA and greengenes. I don't like to see the prefix they add. 87 | for x in taxa: 88 | if regex==True: 89 | x=re.sub("D_\d__","",x) 90 | x=re.sub("\w__","",x) 91 | new.append(x.replace(";","\t")) 92 | 93 | for sample in sample_names: 94 | make_tsv(sample) 95 | 96 | #This part runs Krona and removes tsv files we created. 97 | os.system("ktImportText krona-tsv/* -o "+output) 98 | os.system("rm -r krona-tsv") 99 | -------------------------------------------------------------------------------- /scripts/export_table.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -eo pipefail 4 | 5 | source activate qiime2-2020.6 6 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi' 7 | 8 | 9 | # Dada2 Reanalysis after splitting indoor samples and dropping some outdoor samples 10 | TAXONOMY_DIR=(04.redo_assign_taxonomy/dada2{,,}) 11 | FEATURE_TABLE_DIR=(05.redo_filter_table/dada2/{indoors,outdoors,basins}/) 12 | PREFIX=($( for i in {1..3}; do echo 'se'; done)) 13 | OUT_DIR=(10.exports/dada2/{indoors,outdoors,basins}) 14 | 15 | 16 | ##### Export feature table with taxonomy assignment in biom format 17 | # https://forum.qiime2.org/t/exporting-and-modifying-biom-tables-e-g-adding-taxonomy-annotations/3630 18 | 19 | 20 | function export_feature_table(){ 21 | 22 | local PREFIX=$1 23 | local FEATURE_DIR=$2 24 | local OUT_DIR=$3 25 | local TAXONOMY_DIR=$4 26 | 27 | ##### Creating a BIOM table with taxonomy annotations 28 | qiime tools export --input-path ${FEATURE_DIR}/${PREFIX}-filtered_table.qza --output-path ${OUT_DIR}/ 29 | # Creating a TSV BIOM table 30 | biom convert -i ${OUT_DIR}/feature-table.biom -o ${OUT_DIR}/feature-table.tsv --to-tsv 31 | # Export taxonomy 32 | qiime tools export --input-path ${TAXONOMY_DIR}/${PREFIX}-taxonomy.qza --output-path ${OUT_DIR}/ 33 | 34 | #Next, we’ll need to modify the exported taxonomy file’s header before using it with BIOM software. 35 | 36 | # Before modifying that file, make a copy: 37 | cp ${OUT_DIR}/taxonomy.tsv ${OUT_DIR}/biom-taxonomy.tsv 38 | 39 | # Change the first line of biom-taxonomy.tsv (i.e. the header) to this: 40 | # Note that you’ll need to use tab characters in the header since this is a TSV file. 41 | #OTUID taxonomy confidence 42 | 43 | # programatsically 44 | (echo "#OTUID taxonomy confidence"; sed -e '1d' ${OUT_DIR}/biom-taxonomy.tsv) \ 45 | > ${OUT_DIR}/tmp.tsv && rm -rf ${OUT_DIR}/biom-taxonomy.tsv && mv ${OUT_DIR}/tmp.tsv ${OUT_DIR}/biom-taxonomy.tsv 46 | 47 | # Finally, add the taxonomy data to your .biom file: 48 | biom add-metadata \ 49 | -i ${OUT_DIR}/feature-table.biom \ 50 | -o ${OUT_DIR}/table-with-taxonomy.biom \ 51 | --observation-metadata-fp ${OUT_DIR}/biom-taxonomy.tsv \ 52 | --sc-separated taxonomy 53 | 54 | # Creating a TSV BIOM table 55 | #biom convert -i ${OUT_DIR}/table-with-taxonomy.biom -o ${OUT_DIR}/table-with-taxonomy.biom.tsv --to-tsv 56 | 57 | 58 | } 59 | 60 | 61 | export -f export_feature_table 62 | 63 | # Export tables 64 | parallel --jobs 0 --link export_feature_table {1} {2} {3} {4} ::: ${PREFIX[*]} ::: ${FEATURE_TABLE_DIR[*]} ::: ${OUT_DIR[*]} ::: ${TAXONOMY_DIR[*]} 65 | 66 | #Test 67 | #export_feature_table ${PREFIX[0]} ${FEATURE_TABLE_DIR[0]} ${OUT_DIR[0]} ${TAXONOMY_DIR[0]} 68 | -------------------------------------------------------------------------------- /scripts/new-dada2_denoize.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N denoize_dada2 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | set -e 11 | 12 | source activate qiime2-2020.6 13 | PAIRED="false" 14 | #PAIRED="true" 15 | TRIM_LEFT=0 16 | TRIM_RIGHT=0 17 | TRUNC_LENGTH=400 18 | #TRUNC_LENGTH=260 19 | TRUNC_LENGTH_LEFT=297 20 | TRUNC_LENGTH_RIGHT=290 21 | maxE_f=4 22 | maxE_r=7 23 | 24 | OUT_DIR="03.redo_dada_denoise" 25 | IMPORT_DIR="01.import" 26 | #PREFIX="se" 27 | #PREFIX="pe" 28 | PREFIX="pear-joined" 29 | 30 | if [ "${PAIRED}" != "true" ]; then 31 | echo "running dada single" 32 | # Denoise, truncate and assign ASVs 33 | qiime dada2 denoise-single \ 34 | --i-demultiplexed-seqs ${IMPORT_DIR}/${PREFIX}-reads.qza \ 35 | --p-trim-left ${TRIM_LEFT} \ 36 | --p-trunc-len ${TRUNC_LENGTH} \ 37 | --p-max-ee ${maxE_f} \ 38 | --o-representative-sequences ${OUT_DIR}/${PREFIX}-representative_sequences.qza \ 39 | --o-table ${OUT_DIR}/${PREFIX}-table.qza \ 40 | --o-denoising-stats ${OUT_DIR}/${PREFIX}-denoise_stats.qza 41 | 42 | 43 | qiime feature-table summarize \ 44 | --i-table ${OUT_DIR}/${PREFIX}-table.qza \ 45 | --o-visualization ${OUT_DIR}/${PREFIX}-table_summary.qzv 46 | 47 | 48 | qiime feature-table tabulate-seqs \ 49 | --i-data ${OUT_DIR}/${PREFIX}-representative_sequences.qza \ 50 | --o-visualization ${OUT_DIR}/${PREFIX}-representative_sequences.qzv 51 | 52 | 53 | 54 | qiime metadata tabulate \ 55 | --m-input-file ${OUT_DIR}/${PREFIX}-denoise_stats.qza \ 56 | --o-visualization ${OUT_DIR}/${PREFIX}-denoise_stats.qzv 57 | 58 | else 59 | echo "running dada paired" 60 | qiime dada2 denoise-paired \ 61 | --i-demultiplexed-seqs 01.import/${PREFIX}-reads.qza \ 62 | --o-table ${OUT_DIR}/${PREFIX}-table.qza \ 63 | --o-representative-sequences ${OUT_DIR}/${PREFIX}-representative_sequences.qza \ 64 | --o-denoising-stats ${OUT_DIR}/${PREFIX}-denoise_stats.qza \ 65 | --p-trunc-len-f ${TRUNC_LENGTH_LEFT} \ 66 | --p-trunc-len-r ${TRUNC_LENGTH_RIGHT} \ 67 | --p-trim-left-f ${TRIM_LEFT} \ 68 | --p-trim-left-r ${TRIM_RIGHT} \ 69 | --p-max-ee-f ${maxE_f} \ 70 | --p-max-ee-r ${maxE_r} \ 71 | --p-n-threads 30 72 | 73 | 74 | # This visualization shows us the sequences per sample spread - to determine minimum number for rarefaction 75 | # and sequences per feature (OTU or ASV) 76 | qiime feature-table summarize \ 77 | --i-table ${OUT_DIR}/${PREFIX}-table.qza \ 78 | --o-visualization ${OUT_DIR}/${PREFIX}-table_summary.qzv 79 | 80 | 81 | qiime feature-table tabulate-seqs \ 82 | --i-data ${OUT_DIR}/${PREFIX}-representative_sequences.qza \ 83 | --o-visualization ${OUT_DIR}/${PREFIX}-representative_sequences.qzv 84 | 85 | 86 | qiime metadata tabulate \ 87 | --m-input-file ${OUT_DIR}/${PREFIX}-denoise_stats.qza \ 88 | --o-visualization ${OUT_DIR}/${PREFIX}-denoise_stats.qzv 89 | 90 | fi 91 | -------------------------------------------------------------------------------- /00.mapping/outdoors-edited.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number pcr_cycles medium_or_plant surface_sterilization treatment description 2 | 1A 1 22 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 3 | 2A 2 22 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 4 | 3A 3 22 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 5 | 4A 4 22 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 6 | 5A 5 22 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 7 | 6A 6 22 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 8 | 7A 7 22 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 9 | 8A 8 24 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 10 | 9A 9 24 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 11 | 10A 10 24 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 12 | 11A 11 24 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 13 | 12A 12 24 Medium NONE Medium Filtered_by_Avital_with_pPNA_&_mPNA 14 | 13A 13 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 15 | 14A 14 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 16 | 15A 15 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 17 | 16A 16 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 18 | 17A 17 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 19 | 18A 18 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 20 | 19A 19 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 21 | 20A 20 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 22 | 21A 21 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 23 | 22A 22 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 24 | 23A 23 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 25 | 24A 24 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 26 | 25A 25 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 27 | 26A 26 24 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 28 | 27A 27 24 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 29 | 28A 28 24 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 30 | 29A 29 24 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 31 | 30A 30 24 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 32 | 31A 31 24 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 33 | 32A 32 24 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 34 | 33A 33 24 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 35 | 34A 34 24 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 36 | 35A 35 24 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 37 | 36A 36 22 Plant Yes water_washed_Surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 38 | -------------------------------------------------------------------------------- /00.mapping/outdoors.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number pcr_cycles medium_or_plant surface_sterilization treatment description 2 | 1A 1 22 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 3 | 2A 2 22 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 4 | 3A 3 22 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 5 | 4A 4 22 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 6 | 5A 5 22 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 7 | 6A 6 22 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 8 | 7A 7 22 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 9 | 8A 8 24 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 10 | 9A 9 24 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 11 | 10A 10 24 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 12 | 11A 11 24 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 13 | 12A 12 24 Medium NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 14 | 13A 13 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 15 | 14A 14 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 16 | 15A 15 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 17 | 16A 16 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 18 | 17A 17 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 19 | 18A 18 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 20 | 19A 19 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 21 | 20A 20 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 22 | 21A 21 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 23 | 22A 22 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 24 | 23A 23 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 25 | 24A 24 24 Plant No water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 26 | 25A 25 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 27 | 26A 26 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 28 | 27A 27 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 29 | 28A 28 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 30 | 29A 29 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 31 | 30A 30 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 32 | 31A 31 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 33 | 32A 32 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 34 | 33A 33 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 35 | 34A 34 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 36 | 35A 35 24 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 37 | 36A 36 22 Plant Yes water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA -------------------------------------------------------------------------------- /scripts/filter-samples.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N Filter_samples 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 10 9 | 10 | set -e 11 | 12 | source activate qiime2-2020.6 13 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi' 14 | 15 | #OUT_PREFIX=('05.filter_table/dada2/indoors/se' '05.filter_table/dada2/indoors/pear-joined' '05.filter_table/deblur/indoors/se' '05.filter_table/deblur/indoors/pear-joined' '05.filter_table/dada2/outdoors/se' '05.filter_table/dada2/outdoors/pear-joined' '05.filter_table/deblur/outdoors/se' '05.filter_table/deblur/outdoors/pear-joined' '05.filter_table/dada2/mock/se' '05.filter_table/dada2/mock/pear-joined' '05.filter_table/deblur/mock/se' '05.filter_table/deblur/mock/pear-joined') 16 | 17 | #OUT_PREFIX=('05.redo_filter_table/dada2/indoors/se' '05.redo_filter_table/dada2/indoors/pear-joined' '05.redo_filter_table/dada2/indoors/pe' '05.redo_filter_table/dada2/outdoors/se' '05.redo_filter_table/dada2/outdoors/pear-joined' '05.redo_filter_table/dada2/outdoors/pe' '05.redo_filter_table/dada2/mock/se' '05.redo_filter_table/dada2/mock/pear-joined' '05.redo_filter_table/dada2/mock/pe') 18 | 19 | 20 | OUT_PREFIX=(05.{,redo_}filter_table/dada2/{indoors,outdoors,basins}/se) 21 | 22 | 23 | #METADATA=('00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv') 24 | 25 | #METADATA=('00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/pe-dada2/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/pe-dada2/outdoors.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/pe-dada2/mock.tsv') 26 | 27 | METADATA=($(for i in {1..2}; do echo 00.mapping/{indoors,outdoors,basins}.tsv;done)) 28 | 29 | #COMBINED_TABLE=('05.filter_table/dada2/se' '05.filter_table/dada2/pear-joined' '05.filter_table/deblur/se' '05.filter_table/deblur/pear-joined' '05.filter_table/dada2/se' '05.filter_table/dada2/pear-joined' '05.filter_table/deblur/se' '05.filter_table/deblur/pear-joined' '05.filter_table/dada2/se' '05.filter_table/dada2/pear-joined' '05.filter_table/deblur/se' '05.filter_table/deblur/pear-joined') 30 | 31 | #COMBINED_TABLE=('05.redo_filter_table/dada2/se' '05.redo_filter_table/dada2/pear-joined' '05.redo_filter_table/dada2/pe' '05.redo_filter_table/dada2/se' '05.redo_filter_table/dada2/pear-joined' '05.redo_filter_table/dada2/pe' '05.redo_filter_table/dada2/se' '05.redo_filter_table/dada2/pear-joined' '05.redo_filter_table/dada2/pe') 32 | 33 | #{,,} means to repeat the preceeding text 3 times 34 | COMBINED_TABLE=(05.{,redo_}filter_table/dada2/se{,,}) 35 | 36 | 37 | 38 | parallel --jobs 0 --link qiime feature-table filter-samples \ 39 | --i-table {1}-taxa_filtered_table.qza \ 40 | --m-metadata-file {2} \ 41 | --o-filtered-table {3}-taxa_filtered_table.qza ::: ${COMBINED_TABLE[*]} ::: ${METADATA[*]} ::: ${OUT_PREFIX[*]} 42 | 43 | 44 | parallel --jobs 0 --link qiime feature-table summarize \ 45 | --i-table {}-taxa_filtered_table.qza \ 46 | --o-visualization {}-taxa_filtered_table.qzv ::: ${OUT_PREFIX[*]} 47 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/indoors-minus-cntVsB12.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number batch pcr_cycles medium_or_plant sterile_plant grown_with_antibiotics location water_washed surface_sterilization b12_enriched treatment description 2 | 9A-2 55 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 3 | 10A-2 56 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 4 | 11A-2 57 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 5 | 12A-2 58 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 6 | 23A-2 69 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 7 | 25A-2 70 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 8 | 26A-2 71 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 9 | 27A-2 72 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 10 | 28A-2 73 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 11 | 29A-2 74 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 12 | 30A-2 75 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 13 | 31A-2 76 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 14 | 32A-2 77 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 15 | 33A-2 78 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 16 | 34A-2 79 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 17 | 35A-2 80 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 18 | 36A-2 81 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 19 | 37A-2 82 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 20 | 38A-2 83 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 21 | 39A-2 84 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 22 | 40A-2 85 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 23 | 41A-2 86 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 24 | 42A-2 87 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 25 | 43A-2 88 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 26 | 44A-2 89 B NA Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic 27 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/indoors-minus-cntVsB12.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number batch pcr_cycles medium_or_plant sterile_plant grown_with_antibiotics location water_washed surface_sterilization b12_enriched treatment description 2 | Osnat054-9-A-2 55 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 3 | Osnat055-10-A-2 56 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 4 | Osnat056-11-A-2 57 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 5 | Osnat057-12-A-2 58 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 6 | Osnat068-23-A-2 69 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 7 | Osnat069-25-A-2 70 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 8 | Osnat070-26-A-2 71 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 9 | Osnat071-27-A-2 72 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 10 | Osnat072-28-A-2 73 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 11 | Osnat073-29-A-2 74 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 12 | Osnat074-30-A-2 75 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 13 | Osnat075-31-A-2 76 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 14 | Osnat076-32-A-2 77 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 15 | Osnat077-33-A-2 78 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 16 | Osnat078-34-A-2 79 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 17 | Osnat079-35-A-2 80 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 18 | Osnat080-36-A-2 81 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 19 | Osnat081-37-A-2 82 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 20 | Osnat082-38-A-2 83 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 21 | Osnat083-39-A-2 84 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 22 | Osnat084-40-A-2 85 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 23 | Osnat085-41-A-2 86 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 24 | Osnat086-42-A-2 87 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 25 | Osnat087-43-A-2 88 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 26 | Osnat088-44-A-2 89 B NA Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic 27 | -------------------------------------------------------------------------------- /docker/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: install snakemake fastqc multiqc pear qiime run copy manifest samples rename silva complete import denoise plot assign_taxonomy 2 | 3 | help: 4 | @echo "A pipeline to perform 16S microbiome analysis on aws " 5 | 6 | install: update snakemake fastqc multiqc pear qiime silva 7 | 8 | update: 9 | @echo "updating the shell" 10 | #sudo apt update && apt upgrade 11 | 12 | fastqc: update 13 | @echo "Downloading fastqc" 14 | docker pull staphb/fastqc:0.12.1 15 | 16 | multiqc: update 17 | @echo "Downloading Multiqc" 18 | docker pull staphb/multiqc:1.8 19 | 20 | pear: update 21 | @echo "Downloading pear read merger" 22 | docker pull olabiyi/pear:0.92 23 | 24 | qiime: update 25 | @echo "Downloading qiime and picrust" 26 | # For functions analysis using picrust 27 | docker pull kubor/qiime2-picrust2:2019.10 28 | # Core qiime 29 | docker pull quay.io/qiime2/amplicon:2023.9 30 | 31 | snakemake: update 32 | @echo "Download snakemake" 33 | #docker pull snakemake/snakemake:stable 34 | conda install -c bioconda snakemake 35 | 36 | silva: update 37 | @echo "Dowloading Silva database" 38 | #Full 39 | wget https://data.qiime2.org/2023.9/common/silva-138-99-nb-classifier.qza 40 | # V4 specific 41 | wget https://data.qiime2.org/2023.9/common/silva-138-99-515-806-nb-classifier.qza 42 | 43 | run: 44 | #@echo "Running snakemake in a docker container" 45 | $(shell docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker -v ${PWD}:${PWD} -w ${PWD} -u $(id -u):$(id -g) snakemake/snakemake:stable; snakemake -pr --cores 30 --keep-going --rerun-incomplete) 46 | 47 | 48 | copy: 49 | @echo "Copying files from S3" 50 | @echo "Configure aws first by running: sudo apt install awscli -y && aws configure" 51 | #aws s3 cp --recursive s3://biodsa-sequencing-data/SEQ44XXX/SEQ44733/Reads/ 01.raw_data/ 52 | bash download_seqs.sh 53 | 54 | rename: 55 | @echo "Renaming the files so that the filename replect the sample names in individual directory" 56 | bash rename_files.sh 57 | 58 | samples: 59 | @echo "Get sample names for config.yaml" 60 | bash get_samples.sh 61 | 62 | manifest: 63 | @echo "Creating a MANIFEST file" 64 | bash make_manifest.sh 65 | 66 | complete: 67 | @echo "Running the complete pipeline. Quality reports, Corediversity analysis, statistics and functional analysis" 68 | snakemake -pr --cores 50 --keep-going --rerun-incomplete 69 | 70 | import: 71 | @echo "Importing, trimming primers and adapters, and performing initial quality checks" 72 | @echo "Inspect the plots generated in 04.QC/trimmed_reads_qual_viz.qzv at https://view.qiime2.org/" 73 | snakemake -pr --cores 50 --keep-going --rerun-incomplete "04.QC/trimmed_reads_qual_viz.qzv" "04.QC/raw_reads_qual_viz.qzv" 74 | 75 | denoise: 76 | @echo "Denoising your imported sequences" 77 | @echo "Inspect the table 05.Denoise_reads/denoise_stats.qzv at https://view.qiime2.org/" 78 | @echo "Edit the config/config.yaml file appropriately and re-run if many were lost after denoising." 79 | snakemake -pr --cores 50 --keep-going --rerun-incomplete "05.Denoise_reads/denoise_stats.qzv" "05.Denoise_reads/table_summary.qzv" "05.Denoise_reads/representative_sequences.qzv" 80 | 81 | assign_taxonomy: 82 | @echo "Assigning taxonomy and filtering out non-target taxa" 83 | @echo "After this run completes" 84 | @echo "Examine 08.Filter_feature_table/taxa_filtered_table.qzv" 85 | @echo "To figure out the total number of sequences ('Total freqency') to be used to determine the minuminum frequency for filtering out rare taxa" 86 | @echo "Simply multiply the total number of sequences by your threshold for example 0.00005 (0.005 percent)" 87 | @echo "python -c print(1298206 * 0.00005) = 64.9103" 88 | @echo "Set the 'minimum_frequency' parmeter in config/config.yaml with the result of this calculation rounded up like so:" 89 | @echo "minimum_frequency: 65" 90 | snakemake -pr --cores 50 --keep-going --rerun-incomplete "06.Assign_taxonomy/taxonomy.qzv" "07.Build_phylogenetic_tree/rooted-tree.qza" "08.Filter_feature_table/taxa_filtered_table.qzv" 91 | 92 | plot: 93 | @echo "Filtering out rare ASV and generating taxonomy plots" 94 | snakemake -pr --cores 10 --keep-going --rerun-incomplete "08.Filter_feature_table/filtered_table.qzv" "09.Taxa_bar_plots/group-bar-plot.qzv" "09.Taxa_bar_plots/samples-bar-plots.qzv" 95 | -------------------------------------------------------------------------------- /scripts/.bashrc: -------------------------------------------------------------------------------- 1 | 2 | # >>> conda initialize >>> 3 | # !! Contents within this block are managed by 'conda init' !! 4 | # __conda_setup="$('/gpfs0/bioinfo/users/obayomi/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)" 5 | # if [ $? -eq 0 ]; then 6 | # eval "$__conda_setup" 7 | # else 8 | # if [ -f "/gpfs0/bioinfo/users/obayomi/miniconda3/etc/profile.d/conda.sh" ]; then 9 | # . "/gpfs0/bioinfo/users/obayomi/miniconda3/etc/profile.d/conda.sh" 10 | # else 11 | # export PATH="/gpfs0/bioinfo/users/obayomi/miniconda3/bin:$PATH" 12 | # fi 13 | # fi 14 | # unset __conda_setup 15 | # <<< conda initialize <<< 16 | 17 | source /storage/SGE6U8/default/common/settings.sh 18 | # FASTQC 19 | export PATH=/gpfs0/bioinfo/users/obayomi/FastQC/:$PATH 20 | #export SOURCETRACKER_PATH=/gpfs0/biores/users/gilloro/Biyi/SourceTracking/sourcetracker-1.0.1 21 | #Chimera slayer 22 | export PATH=/fastspace/bioinfo_apps/microbiomeutil-r20110519/ChimeraSlayer/:$PATH 23 | #vsearch 24 | export PATH=/fastspace/bioinfo_apps/vsearch/vsearch_v2.3.4/bin/:$PATH 25 | # perldl and pdl2 perl bin 26 | #export PATH=/gpfs0/bioinfo/users/obayomi/perl5/bin/:PATH 27 | # create alias for pdl2 because it has trouble finding perl 28 | alias pdl2="/bin/perl /gpfs0/bioinfo/users/obayomi/perl5/bin/pdl2" 29 | # rlwrap - needed for autocompletion when using perli 30 | export PATH=/gpfs0/bioinfo/users/obayomi/bin/bin/:$PATH 31 | #pathogen analysis scripts 32 | export PATH=/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16s_pathogen_analysis/:$PATH 33 | #qiime 34 | export PATH=/fastspace/bioinfo_apps/qiime/usr/local/bin/:$PATH 35 | #NCBI blast 36 | export PATH=/gpfs0/bioinfo/users/obayomi/ncbi-blast-2.10.1+/bin/:$PATH 37 | #qsub 38 | export PATH=/storage/SGE6U8/bin/lx24-amd64/:$PATH 39 | #all executables 40 | export PATH=/gpfs0/bioinfo/users/obayomi/bin/:$PATH 41 | #sra tolkit 42 | export PATH=/gpfs0/bioinfo/users/obayomi/sratoolkit.2.9.6-1-ubuntu64/bin/:$PATH 43 | #Diamond 0.7.11 44 | #export PATH=/fastspace/bioinfo_apps/Diamond/v0.7.11/:$PATH 45 | #MEGAN 46 | export PATH=/gpfs0/bioinfo/users/obayomi/megan/:$PATH 47 | #MEGAN commandline tools 48 | export PATH=/gpfs0/bioinfo/users/obayomi/megan/tools:$PATH 49 | #minimap2 for aligning long reads like nanopore 50 | export PATH=/gpfs0/bioinfo/users/obayomi/minimap2:$PATH 51 | #fastx tool kit for processing fasta and fastq files 52 | #export PATH=/gpfs0/biores/users/gilloro/Biyi/fastx_toolkit/bin:$PATH 53 | 54 | #centrifuge for metagenomic reads classification 55 | export PATH=/gpfs0/bioinfo/users/obayomi/centrifuge/:$PATH 56 | # Kraken 57 | export PATH=/fastspace/bioinfo_apps/kraken/:$PATH 58 | #metaphlan2 59 | #export PATH=/gpfs0/bioinfo/users/obayomi/biobakery-metaphlan2-5bd7cd0e4854/:$PATH 60 | 61 | # bbmap 62 | export PATH=/gpfs0/bioinfo/users/obayomi/bbmap/:$PATH 63 | 64 | #microbiome helper 65 | export PATH=/gpfs0/bioinfo/users/obayomi/microbiome_helper/:$PATH 66 | # LAST 67 | export PATH=/gpfs0/bioinfo/users/obayomi/last-1021/src/:$PATH 68 | export PATH=/gpfs0/bioinfo/users/obayomi/last-1021/scripts/:$PATH 69 | 70 | #Trimmomatic 71 | export PATH=/fastspace/bioinfo_apps/Trimmomatic-0.32/:$PATH 72 | 73 | #set SGE_ROOT variable 74 | export SGE_ROOT=/storage/SGE6U8 75 | 76 | #miniconda 77 | #export PATH=/gpfs0/bioinfo/users/obayomi/miniconda3/envs/python2/bin/:$PATH 78 | #export PATH=/gpfs0/bioinfo/apps/Miniconda2/Miniconda_v4.3.21/bin/:$PATH 79 | export PATH=/gpfs0/bioinfo/users/obayomi/miniconda3/bin/:$PATH 80 | #export PATH=/gpfs0/bioinfo/apps/Miniconda2/Miniconda_v4.3.21/envs/Metagenomics/share/minced-0.3.2-0/:$PATH 81 | #HMM 82 | export PATH=/gpfs0/bioinfo/apps/HMMER/HMMER_v3.1b1/bin/:$PATH 83 | #metaBAT 84 | export PATH=/gpfs0/bioinfo/users/obayomi/metabat/:$PATH 85 | alias ll='ls --color=auto -alh' 86 | #Bowtie2 87 | export PATH=/gpfs0/bioinfo/apps/bowtie2/bowtie2-2.3.5-linux-x86_64:$PATH 88 | 89 | # source useful function for running Neatseq_Flow 90 | source /gpfs0/bioinfo/users/obayomi/non_model_RNA-Seq/functions.sh 91 | 92 | # mauve 93 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/mauve_snapshot_2015-02-13/ 94 | 95 | # MinPath 96 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/MinPath/ 97 | 98 | # Signalp 99 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/signalp-5.0b/bin/ 100 | 101 | # tmHMM 102 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/tmhmm-2.0c/bin/ 103 | 104 | # aragorn 105 | #export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/aragorn1.2.36/ 106 | 107 | # metaErg - anotation of metagenomics and metaproteomics assembly 108 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/metaerg/bin/ 109 | 110 | # Phyloflash home 111 | PHYLOFLASH_DBHOME=/gpfs0/bioinfo/users/obayomi/138.1 112 | 113 | # Motus 114 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/mOTUs_v2/ 115 | -------------------------------------------------------------------------------- /docker/config.yaml: -------------------------------------------------------------------------------- 1 | sample_file: "config/sample.tsv" 2 | metadata: "00.mapping/metadata.tsv" 3 | mail: "obadbotanist@yahoo.com" # A mere label 4 | samples: ["S44733-0001", "S44733-0002", "S44733-0003", "S44733-0004", "S44733-0005"] 5 | 6 | # List your sample names here - see the README.md file for an easy way to create this list 7 | project_dir: "/mnt/efs/scratch/gokdx/Pivot/contamination_hunting/SEQ44733/16s_analysis" 8 | # what type of amplicon are we analyzing 9 | # options are 10 | # "16S", "18S" and "ITS" 11 | amplicon: "16S" # "ITS" 12 | # A coloumn in metada for grouping bar plot and for statistics 13 | category: "Strain" 14 | # Three possible mode 15 | # pair - paired-end reads without joining 16 | # single - single end reads, joining of unnecessary 17 | # merge - merge paired end reads 18 | # if you will select to join the reads 19 | # # make sure to modify the -m -t flags of pear in the run_pear.pl script 20 | # before running the workflow 21 | 22 | mode: "pair" # "pair", "single" or "merge" 23 | RENAME_FILES: false # should your input files be renamed if they don't follow the requirement of 01.raw_data/{SAMPLE}_R{1|2}.fastq.gz 24 | 25 | # What method should be used in merging reads 26 | # options are "pear" or "vsearch" 27 | # for merging with pear or vsearch, repectively 28 | merge_method: "pear" 29 | 30 | # ASV or zoTUs denoising and clustering method. Can be "dada2" or "deblur" 31 | denoise_method: "dada2" 32 | 33 | # path to your manifest file - see the example folder for examples 34 | MANIFEST: "01.raw_data/MANIFEST" 35 | project_name: "Contamination_hunt_round2" # This has no use in the pipeline just help to keep records 36 | 37 | # # Add this line to everdy script to avoid device out of space error 38 | TEMP_DIR: "export TEMPDIR=/mnt/efs/scratch/gokdx/Pivot/contamination_hunting/SEQ44733/16s_analysis/tmp/ TMPDIR=/mnt/efs/scratch/gokdx/Pivot/contamination_hunting/SEQ44733/16s_analysis/tmp/" 39 | 40 | # set the path to the appropriate classifier for assigning taxonomy 41 | # Here i chose the classifier for silva for bacteria (16S) and protist (18S) analysis 42 | # For Fungi set to the path of a pre trained unite database classifier 43 | classifier: "silva-138-99-nb-classifier.qza" 44 | 45 | # To figure out the total number of sequences ("Total freqency") 46 | # to be used to determine the minuminum frequency for filtering out 47 | # rare taxa, examine "08.Filter_feature_table/taxa_filtered_table.qzv". 48 | # To calculate, multiply the total number of sequences by 0.00001 (0.001%) 49 | # Assign the result of your calulation below as the minimum frequency 50 | # for filtering out rare taxa 51 | # 106,203 * 0.00001 = 1.06203 52 | minimum_frequency: 1 53 | 54 | # Change this and re-run core diversity step if needed. Determine this number by 55 | # examiming "08.Filter_feature_table/filtered_table.qzv". Either choose the 56 | # minimum sequence count or choose the minimum sequence count 57 | # that will be enough to capture the diversity of your samples and still 58 | # not lose a lot of samples 59 | rarefaction_depth: 3362 60 | 61 | # Set tool specific parameters 62 | parameters: 63 | vsearch: 64 | join_pairs: 65 | truncqual: 20 66 | minimum_length: 400 67 | maximum_Ns: 20 68 | minimum_merge_length: 400 69 | minimum_merge_length: 600 70 | dada2: 71 | mode: "single" # "single" or "paired" 72 | trunc_length_forward: 120 #260 # this will be determined after visulaizing the quality plot where quality score is >= 20 73 | trunc_length_reverse: 110 #180 74 | trim_length_forward: 0 75 | trim_length_reverse: 0 76 | maximum_forward_error: 4 77 | maximum_reverse_error: 4 78 | threads: 40 79 | 80 | # --p-trim-length n which truncates the sequences at position n 81 | # In general, the Deblur developers recommend setting this value 82 | # to a length where the median quality score begins to drop too low 83 | deblur: 84 | trunc_length: 40 85 | # Parameters to argument of qiime feature-table group 86 | # when grouping the feature table for making grouped taxa barplots 87 | group_taxa_plot: 88 | category: "Strain" # --m-metadata-column argument 89 | mode: "sum" # --p-mode argument 90 | metadata: "00.mapping/treatment-metadata.tsv" # a 2-column or more metadata for grouping bar plots ['sample-id', 'treatment'] 91 | beta_diversity_significance: 92 | categories: "Strain" 93 | # Adators and primer trimming using cutadapt 94 | cutadapt: 95 | forward_primer: "GTGCCAGCMGCCGCGGTAA" 96 | reverse_primer: "GGACTACHVGGGTWTCTAAT" 97 | cores: 5 98 | fastree: 99 | threads: 20 100 | assign_taxonomy: 101 | threads: 40 102 | picrust: 103 | threads: 20 -------------------------------------------------------------------------------- /scripts/picrust2_analysis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N function_analysis 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | 11 | set -e 12 | 13 | # Edit the headers of rep_set.fna to contain only OTU names 14 | #sed -i -E 's/(>.+) .+$/\1/g' rep_set.fna 15 | 16 | # make annotation directory 17 | #mkdir /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/12.function_annotation/ 18 | 19 | ##################### Export and rename feature tables and representative sequences from qiime2 artifact 20 | 21 | ###### copy and rename the artifacts to the function annotation directory 22 | # Feature Tables 23 | #cp /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/05.filter_table/dada2/se-taxa_filtered_table.qza \ 24 | # 12.function_annotation/ 25 | #mv 12.function_annotation/se-taxa_filtered_table.qza 12.function_annotation/_se-taxa_filtered_table.qza 26 | 27 | #cp /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/05.redo_filter_table/dada2/se-taxa_filtered_table.qza \ 28 | # 12.function_annotation/ 29 | #mv 12.function_annotation/se-taxa_filtered_table.qza 12.function_annotation/redo-se-taxa_filtered_table.qza 30 | 31 | # Representative sequences 32 | #cp /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/03.dada_denoise/se-representative_sequences.qza \ 33 | # 12.function_annotation/ 34 | #mv 12.function_annotation/se-representative_sequences.qza 12.function_annotation/_se-representative_sequences.qza 35 | 36 | #cp /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/03.redo_dada_denoise/se-representative_sequences.qza \ 37 | # 12.function_annotation/ 38 | #mv 12.function_annotation/se-representative_sequences.qza 12.function_annotation/redo-se-representative_sequences.qza 39 | 40 | #cd /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/12.function_annotation/ 41 | #source activate qiime2-2020.6 42 | #qiime tools export --input-path _se-taxa_filtered_table.qza --output-path ./ 43 | #mv feature-table.biom _se-feature-table.biom 44 | 45 | #qiime tools export --input-path redo-se-taxa_filtered_table.qza --output-path ./ 46 | #mv feature-table.biom redo-se-feature-table.biom 47 | 48 | #qiime tools export --input-path _se-representative_sequences.qza --output-path ./ 49 | #mv dna-sequences.fasta _se-rep_set.fna 50 | 51 | #qiime tools export --input-path redo-se-representative_sequences.qza --output-path ./ 52 | #mv dna-sequences.fasta redo-se-rep_set.fna 53 | 54 | 55 | source activate picrust2 56 | PREFIX=("_se" "redo-se") 57 | REP_SET=("rep_set.fna" "rep_set.fna") 58 | FEATURE_TABLE=("feature-table.biom" "feature-table.biom") 59 | 60 | 61 | function run_picrust(){ 62 | 63 | local PREFIX=$1 64 | local REP_SET=$2 65 | local FEATURE_TABLE=$3 66 | # Run PICRUST2 pipeline 67 | picrust2_pipeline.py \ 68 | -s ${PREFIX}-${REP_SET} \ 69 | -i ${PREFIX}-${FEATURE_TABLE} \ 70 | -o ${PREFIX}-picrust2_out_pipeline \ 71 | -p 40 72 | 73 | # Annotate you enzymes / pathways by adding a description column 74 | add_descriptions.py -i ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -m EC \ 75 | -o ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz 76 | 77 | add_descriptions.py -i ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat.tsv.gz -m METACYC \ 78 | -o ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat_descrip.tsv.gz 79 | 80 | add_descriptions.py -i ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat.tsv.gz -m KO \ 81 | -o ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz 82 | 83 | # Unzip the prediction files 84 | gunzip ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz 85 | gunzip ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz 86 | gunzip ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat_descrip.tsv.gz 87 | 88 | gunzip ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat.tsv.gz 89 | gunzip ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat.tsv.gz 90 | gunzip ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat.tsv.gz 91 | 92 | # Convert to biom 93 | biom convert \ 94 | -i ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat.tsv \ 95 | -o ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat.biom \ 96 | --table-type="OTU table" \ 97 | --to-hdf5 98 | 99 | biom convert \ 100 | -i ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat.tsv \ 101 | -o ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat.biom \ 102 | --table-type="OTU table" \ 103 | --to-hdf5 104 | 105 | biom convert \ 106 | -i ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat.tsv \ 107 | -o ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat.biom \ 108 | --table-type="OTU table" \ 109 | --to-hdf5 110 | 111 | } 112 | 113 | 114 | export -f run_picrust 115 | 116 | parallel --jobs 0 --link run_picrust {1} {2} {3} ::: ${PREFIX[*]} ::: ${REP_SET[*]} ::: ${FEATURE_TABLE[*]} 117 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/outdoors.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number batch pcr_cycles medium_or_plant sterile_plant grown_with_antibiotics location water_washed surface_sterilization b12_enriched treatment description 2 | 1A 1 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 3 | 2A 2 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 4 | 3A 3 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 5 | 4A 4 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 6 | 5A 5 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 7 | 6A 6 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 8 | 7A 7 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 9 | 8A 8 A 24 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 10 | 9A 9 A 24 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 11 | 10A 10 A 24 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 12 | 11A 11 A 24 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 13 | 12A 12 A 24 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 14 | 13A 13 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 15 | 14A 14 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 16 | 15A 15 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 17 | 16A 16 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 18 | 17A 17 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 19 | 18A 18 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 20 | 19A 19 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 21 | 20A 20 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 22 | 22A 22 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 23 | 23A 23 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 24 | 24A 24 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 25 | 25A 25 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 26 | 26A 26 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 27 | 27A 27 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 28 | 28A 28 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 29 | 29A 29 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 30 | 30A 30 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 31 | 31A 31 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 32 | 32A 32 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 33 | 33A 33 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 34 | 34A 34 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 35 | 35A 35 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 36 | 36A 36 A 22 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 37 | 13A-2 59 B 22 Plant No No outdoors No No NA outdoor outdoor_plant_with_pPNA_&_mPNA 38 | 14A-2 60 B 22 Plant No No outdoors Yes No NA outdoor+water_washed outdoor_plus_water_washed_plant_with_pPNA_&_mPNA 39 | 15A-2 61 B 22 Plant No No outdoors No No NA outdoor outdoor_plant_with_pPNA_&_mPNA 40 | 16A-2 62 B 22 Plant No No outdoors Yes No NA outdoor+water_washed outdoor_plus_water_washed_plant_with_pPNA_&_mPNA 41 | 17A-2 63 B 22 Plant No No outdoors Yes No NA outdoor+water_washed outdoor_plus_water_washed_plant_with_pPNA_&_mPNA 42 | 18A-2 64 B 22 Plant No No outdoors No No NA outdoor outdoor_plant_with_pPNA_&_mPNA 43 | 19A-2 65 B 22 Plant No No outdoors Yes No NA outdoor+water_washed outdoor_plus_water_washed_plant_with_pPNA_&_mPNA 44 | 20A-2 66 B 22 Plant No No outdoors No No NA outdoor outdoor_plant_with_pPNA_&_mPNA 45 | 21A-2 67 B 22 Plant No No outdoors No No NA outdoor outdoor_plant_with_pPNA_&_mPNA 46 | 22A-2 68 B 22 Plant No No outdoors Yes No NA outdoor+water_washed outdoor_plus_water_washed_plant_with_pPNA_&_mPNA 47 | -------------------------------------------------------------------------------- /scripts/filter_feature_table.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N Filter_features 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 10 9 | 10 | set -e 11 | 12 | # STEPS 13 | #1. Filter-out singletons and non-target ASVs in the combined table by setting REMOVE_RARE_FEATURES="false" 14 | #2. Run filter-sample.sh to subset the filtered table by analysis type e.g. indoors, outdoors e.t.c. 15 | #3. View qsv summary files for each analysis to determine the "Total number of sequences" that will be used to estimate the rare ASVs and also rarefaction depth 16 | #3. Remove rare ASVs from the feature tables by setting REMOVE_RARE_FEATURES="true" 17 | 18 | source activate qiime2-2020.6 19 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi' 20 | #IN_PREFIX=('03.dada_denoise/se' '03.dada_denoise/pear-joined' '03.deblur_denoise/se' '03.deblur_denoise/pear-joined') 21 | IN_PREFIX=('03.redo_dada_denoise/se' '03.redo_dada_denoise/pear-joined' '03.redo_dada_denoise/pe') 22 | 23 | 24 | # For Combined table i.e the original table with indoors, outdoors and mock tables combined 25 | #OUT_PREFIX=('05.filter_table/dada2/se' '05.filter_table/dada2/pear-joined' '05.filter_table/deblur/se' '05.filter_table/deblur/pear-joined') 26 | #TAXONOMY_PREFIX=('04.assign_taxonomy/dada2/se' '04.assign_taxonomy/dada2/pear-joined' '04.assign_taxonomy/deblur/se' '04.assign_taxonomy/deblur/pear-joined') 27 | 28 | TAXONOMY_PREFIX=('04.redo_assign_taxonomy/dada2/se' '04.redo_assign_taxonomy/dada2/pear-joined' '04.redo_assign_taxonomy/dada2/pe' ) 29 | 30 | 31 | 32 | #TOTAL_SEQUENCES=(994346 415117 243487 58268) multiply each number by 0.00005 to get the minimum number for filtering rare otus below 33 | #MIN_FREQUENCY=(50 21 12 3) 34 | 35 | # For the tables that have been split by metadata 36 | #OUT_PREFIX=('05.filter_table/dada2/indoors/se' '05.filter_table/dada2/indoors/pear-joined' '05.filter_table/deblur/indoors/se' '05.filter_table/deblur/indoors/pear-joined' '05.filter_table/dada2/outdoors/se' '05.filter_table/dada2/outdoors/pear-joined' '05.filter_table/deblur/outdoors/se' '05.filter_table/deblur/outdoors/pear-joined' '05.filter_table/dada2/mock/se' '05.filter_table/dada2/mock/pear-joined' '05.filter_table/deblur/mock/se' '05.filter_table/deblur/mock/pear-joined') 37 | 38 | 39 | # All filtered tables 40 | #OUT_PREFIX=('05.redo_filter_table/dada2/indoors/se' '05.redo_filter_table/dada2/indoors/pear-joined' '05.redo_filter_table/dada2/indoors/pe' '05.redo_filter_table/dada2/outdoors/se' '05.redo_filter_table/dada2/outdoors/pear-joined' '05.redo_filter_table/dada2/outdoors/pe' '05.redo_filter_table/dada2/mock/se' '05.redo_filter_table/dada2/mock/pear-joined' '05.redo_filter_table/dada2/mock/pe' '05.redo_filter_table/dada2/se' '05.redo_filter_table/dada2/pear-joined' '05.redo_filter_table/dada2/pe') 41 | # combined table 42 | #OUT_PREFIX=('05.redo_filter_table/dada2/se' '05.redo_filter_table/dada2/pear-joined' '05.redo_filter_table/dada2/pe') 43 | OUT_PREFIX=(05.{,redo_}filter_table/dada2/{indoors,outdoors,basins}/se) 44 | 45 | #MIN_FREQUENCY=(18 7 5 1 29 10 7 2 3 4 1 1) 46 | #MIN_FREQUENCY=(26 14 9 41 22 8 4 4 1 71 40 18) 47 | 48 | MIN_FREQUENCY=(4 25 14 5 36 21) 49 | 50 | REMOVE_RARE_FEATURES="true" 51 | 52 | function filter_table(){ 53 | 54 | local in_prefix=$1 55 | local out_prefix=$2 56 | local taxonomy_prefix=$3 57 | 58 | # Remove singletons 59 | qiime feature-table filter-features \ 60 | --i-table ${in_prefix}-table.qza \ 61 | --p-min-frequency 2 \ 62 | --o-filtered-table ${out_prefix}-noSingleton_filtered_table.qza 63 | 64 | qiime feature-table summarize \ 65 | --i-table ${out_prefix}-noSingleton_filtered_table.qza \ 66 | --o-visualization ${out_prefix}-noSingleton_filtered_table.qzv 67 | 68 | 69 | # Remove unassigned, archaea, eukaryota, chloroplast and mitochondria taxa 70 | qiime taxa filter-table \ 71 | --i-table ${out_prefix}-noSingleton_filtered_table.qza \ 72 | --i-taxonomy ${taxonomy_prefix}-taxonomy.qza \ 73 | --p-exclude "Unassigned,Chloroplast,Mitochondria,Archaea,Eukaryota" \ 74 | --o-filtered-table ${out_prefix}-taxa_filtered_table.qza 75 | 76 | # To figure out the total number of sequences ("Total freqency") here equals ${TOTAL_SEQUENCES} e.g. 8,053,326 77 | qiime feature-table summarize \ 78 | --i-table ${out_prefix}-taxa_filtered_table.qza \ 79 | --o-visualization ${out_prefix}-taxa_filtered_table.qzv 80 | 81 | } 82 | 83 | if [ "${REMOVE_RARE_FEATURES}" == "false" ]; then 84 | # Filter-out singletons and non-target ASVs from the combined table 85 | export -f filter_table 86 | parallel --jobs 0 --link filter_table {1} {2} {3} ::: ${IN_PREFIX[*]} ::: ${OUT_PREFIX[*]} ::: ${TAXONOMY_PREFIX[*]} 87 | 88 | else 89 | ##### Removing rare otus / features with abundance less the 0.005% 90 | parallel --jobs 0 --link qiime feature-table filter-features \ 91 | --i-table {1}-taxa_filtered_table.qza \ 92 | --p-min-frequency {2} \ 93 | --o-filtered-table {1}-filtered_table.qza ::: ${OUT_PREFIX[*]} ::: ${MIN_FREQUENCY[*]} 94 | 95 | parallel --jobs 0 --link qiime feature-table summarize \ 96 | --i-table {}-filtered_table.qza \ 97 | --o-visualization {}-filtered_table.qzv ::: ${OUT_PREFIX[*]} 98 | 99 | fi 100 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/indoors.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number batch pcr_cycles medium_or_plant sterile_plant grown_with_antibiotics location water_washed surface_sterilization b12_enriched treatment description 2 | 37A 37 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 3 | 38A 38 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 4 | 39A 39 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 5 | 40A 40 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 6 | 41A 41 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 7 | 42A 42 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 8 | 43A 43 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 9 | 44A 44 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 10 | 45A 45 A NA Plant No No indoors Yes No Control Control Control_indoor_plants_washed_with_water 11 | 46A 46 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 12 | 47A 47 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 13 | 48A 48 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 14 | 49A 49 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 15 | 50A 50 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 16 | 51A 51 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 17 | 52A 52 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 18 | 53A 53 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 19 | 54A 54 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 20 | 9A-2 55 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 21 | 10A-2 56 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 22 | 11A-2 57 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 23 | 12A-2 58 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 24 | 23A-2 69 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 25 | 25A-2 70 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 26 | 26A-2 71 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 27 | 27A-2 72 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 28 | 28A-2 73 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 29 | 29A-2 74 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 30 | 30A-2 75 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 31 | 31A-2 76 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 32 | 32A-2 77 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 33 | 33A-2 78 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 34 | 34A-2 79 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 35 | 35A-2 80 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 36 | 36A-2 81 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 37 | 37A-2 82 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 38 | 38A-2 83 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 39 | 39A-2 84 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 40 | 40A-2 85 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 41 | 41A-2 86 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 42 | 42A-2 87 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 43 | 43A-2 88 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 44 | 44A-2 89 B NA Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic 45 | -------------------------------------------------------------------------------- /create_DB/eukaryote-unite/Snakefile: -------------------------------------------------------------------------------- 1 | from os import path,getcwd 2 | 3 | # Run the pipeline line so on your local computer 4 | # snakemake -npr --cores 10 --keep-going --rerun-incomplete --restart-times 3 5 | 6 | configfile: "config/config.yaml" 7 | 8 | 9 | RULES=["Download_unite_database", "Unzip_unite_DB", "modify_and_rename_unite_files","Import_unite_sequences", 10 | "Import_unite_taxonomy", "Import_unite_taxonomy", "Train_unite_classifier"] 11 | 12 | 13 | 14 | rule all: 15 | input: 16 | "logs/Download_unite_database/", 17 | "logs/modify_and_rename_unite_files/", 18 | "logs/Train_unite_classifier/", 19 | "databases/unite-classifier.qza" 20 | 21 | 22 | # This rule will make rule specific log directories 23 | # # in order to easily store the standard input and stand error 24 | # # generated when submiting jobs to the cluster 25 | rule make_logs_directories: 26 | output: 27 | directory("logs/Download_unite_database/"), 28 | directory("logs/modify_and_rename_unite_files/"), 29 | directory("logs/Train_unite_classifier/") 30 | threads: 1 31 | shell: 32 | """ 33 | [ -d logs/ ] || mkdir -p logs/ 34 | cd logs/ 35 | for RULE in {RULES}; do 36 | [ -d ${{RULE}}/ ] || mkdir -p ${{RULE}}/ 37 | done 38 | """ 39 | 40 | 41 | 42 | # --------------- Create Unite database for QIIME2 -----------------# 43 | 44 | rule Download_unite_database: 45 | input: 46 | log_dirs=rules.make_logs_directories.output 47 | output: 48 | temp("databases/unite.gz") 49 | threads: 1 50 | log: "logs/Download_unite_database/Download_unite_database.log" 51 | params: 52 | url=config["UNITE_URL"] 53 | shell: 54 | "wget -O {output} {params.url} > {log} 2>&1 " 55 | 56 | rule Unzip_unite_DB: 57 | input: rules.Download_unite_database.output 58 | output: 59 | sequences="databases/sh_qiime_release_s_all_10.05.2021/sh_refs_qiime_ver8_dynamic_s_all_10.05.2021.fasta", 60 | taxonomy="databases/sh_qiime_release_s_all_10.05.2021/sh_taxonomy_qiime_ver8_dynamic_s_all_10.05.2021.txt" 61 | threads: 1 62 | #log: "logs/Unzip_unite_DB/Unzip_unite_DB.log" 63 | params: 64 | out_dir=lambda w, input: path.dirname(input[0]), 65 | basename=lambda w, input: path.basename(input[0]) 66 | shell: 67 | """ 68 | cd {params.out_dir} 69 | #[ -f {log} ] || touch {log} 70 | tar -xvzf {params.basename} 71 | """ 72 | 73 | # modify the taxonomy header such that the first two lines 74 | # are "Feature ID\tTaxon" 75 | # and rename the sequence.fasta file 76 | rule modify_and_rename_unite_files: 77 | input: 78 | sequences=rules.Unzip_unite_DB.output.sequences, 79 | taxonomy=rules.Unzip_unite_DB.output.taxonomy 80 | output: 81 | sequences="databases/unite-sequences.fasta", 82 | taxonomy="databases/unite-taxonomy.txt" 83 | threads: 2 84 | log: "logs/modify_and_rename_unite_files/modify_and_rename_unite_files.log" 85 | shell: 86 | """ 87 | # Modify and rename the unite taxonomy file 88 | (echo -e "Feature ID\tTaxon"; cat {input.taxonomy}) > {output.taxonomy} 2> {log} 89 | 90 | # Copy and rename the unite sequences file 91 | cat {input.sequences} > {output.sequences} 2> {log} 92 | """ 93 | 94 | 95 | # Setting up the already trimmed database 96 | rule Import_unite_sequences: 97 | input: rules.modify_and_rename_unite_files.output.sequences 98 | output: "databases/unite-sequences.qza" 99 | threads: 2 100 | log: "logs/Import_unite_sequences/Import_unite_sequences.log" 101 | params: 102 | conda_activate=config["QIIME2_ENV"] 103 | shell: 104 | """ 105 | set +u 106 | 107 | {params.conda_activate} 108 | 109 | set -u 110 | 111 | qiime tools import \ 112 | --type 'FeatureData[Sequence]' \ 113 | --input-path {input} \ 114 | --output-path {output} > {log} 2>&1 115 | """ 116 | 117 | 118 | 119 | # Import Taxonomy 120 | rule Import_unite_taxonomy: 121 | input: rules.modify_and_rename_unite_files.output.taxonomy 122 | output: "databases/unite-taxonomy.qza" 123 | threads: 2 124 | log: "logs/Import_unite_taxonomy/Import_unite_taxonomy.log" 125 | params: 126 | conda_activate=config["QIIME2_ENV"] 127 | shell: 128 | """ 129 | set +u 130 | 131 | {params.conda_activate} 132 | 133 | set -u 134 | 135 | qiime tools import \ 136 | --type 'FeatureData[Taxonomy]' \ 137 | --input-path {input} \ 138 | --output-path {output} > {log} 2>&1 139 | """ 140 | 141 | 142 | # Train the classifier 143 | rule Train_unite_classifier: 144 | input: 145 | sequences=rules.Import_unite_sequences.output, 146 | taxonomy=rules.Import_unite_taxonomy.output 147 | output: "databases/unite-classifier.qza" 148 | threads: 10 149 | log: "logs/Train_unite_classifier/Train_unite_classifier.log" 150 | params: 151 | conda_activate=config["QIIME2_ENV"] 152 | shell: 153 | """ 154 | set +u 155 | 156 | {params.conda_activate} 157 | 158 | set -u 159 | 160 | qiime feature-classifier fit-classifier-naive-bayes \ 161 | --i-reference-reads {input.sequences} \ 162 | --i-reference-taxonomy {input.taxonomy} \ 163 | --o-classifier {output} > {log} 2>&1 164 | """ 165 | 166 | 167 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/outdoors.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number batch pcr_cycles medium_or_plant sterile_plant grown_with_antibiotics location water_washed surface_sterilization b12_enriched treatment description 2 | Osnat001-1-A 1 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 3 | Osnat002-2-A 2 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 4 | Osnat003-3-A 3 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 5 | Osnat004-4-A 4 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 6 | Osnat005-5-A 5 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 7 | Osnat006-6-A 6 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 8 | Osnat007-7-A 7 A 22 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 9 | Osnat008-8-A 8 A 24 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 10 | Osnat009-9-A 9 A 24 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 11 | Osnat010-10-A 10 A 24 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 12 | Osnat011-11-A 11 A 24 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 13 | Osnat012-12-A 12 A 24 Medium No No outdoors NA NA NA Medium Filtered_by_Avital_with_pPNA_&_mPNA 14 | Osnat013-13-A 13 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 15 | Osnat014-14-A 14 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 16 | Osnat015-15-A 15 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 17 | Osnat016-16-A 16 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 18 | Osnat017-17-A 17 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 19 | Osnat018-18-A 18 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 20 | Osnat019-19-A 19 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 21 | Osnat020-20-A 20 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 22 | Osnat021-22-A 22 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 23 | Osnat022-23-A 23 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 24 | Osnat023-24-A 24 A 24 Plant No No outdoors Yes No NA outdoor+water_washed Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA 25 | Osnat024-25-A 25 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 26 | Osnat025-26-A 26 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 27 | Osnat026-27-A 27 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 28 | Osnat027-28-A 28 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 29 | Osnat028-29-A 29 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 30 | Osnat029-30-A 30 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 31 | Osnat030-31-A 31 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 32 | Osnat031-32-A 32 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 33 | Osnat032-33-A 33 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 34 | Osnat033-34-A 34 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 35 | Osnat034-35-A 35 A 24 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 36 | Osnat035-36-A 36 A 22 Plant No No outdoors Yes Yes NA outdoor+water_washed+surface_sterile washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA 37 | Osnat058-13-A-2 59 B 22 Plant No No outdoors No No NA outdoor outdoor_plant_with_pPNA_&_mPNA 38 | Osnat059-14-A-2 60 B 22 Plant No No outdoors Yes No NA outdoor+water_washed outdoor_plus_water_washed_plant_with_pPNA_&_mPNA 39 | Osnat060-15-A-2 61 B 22 Plant No No outdoors No No NA outdoor outdoor_plant_with_pPNA_&_mPNA 40 | Osnat061-16-A-2 62 B 22 Plant No No outdoors Yes No NA outdoor+water_washed outdoor_plus_water_washed_plant_with_pPNA_&_mPNA 41 | Osnat062-17-A-2 63 B 22 Plant No No outdoors Yes No NA outdoor+water_washed outdoor_plus_water_washed_plant_with_pPNA_&_mPNA 42 | Osnat063-18-A-2 64 B 22 Plant No No outdoors No No NA outdoor outdoor_plant_with_pPNA_&_mPNA 43 | Osnat064-19-A-2 65 B 22 Plant No No outdoors Yes No NA outdoor+water_washed outdoor_plus_water_washed_plant_with_pPNA_&_mPNA 44 | Osnat065-20-A-2 66 B 22 Plant No No outdoors No No NA outdoor outdoor_plant_with_pPNA_&_mPNA 45 | Osnat066-21-A-2 67 B 22 Plant No No outdoors No No NA outdoor outdoor_plant_with_pPNA_&_mPNA 46 | Osnat067-22-A-2 68 B 22 Plant No No outdoors Yes No NA outdoor+water_washed outdoor_plus_water_washed_plant_with_pPNA_&_mPNA 47 | -------------------------------------------------------------------------------- /00.mapping/first_analysis/pe-dada2/indoors.tsv: -------------------------------------------------------------------------------- 1 | sample-id sample_number batch pcr_cycles medium_or_plant sterile_plant grown_with_antibiotics location water_washed surface_sterilization b12_enriched treatment description 2 | Osnat036-37-A 37 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 3 | Osnat037-38-A 38 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 4 | Osnat038-39-A 39 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 5 | Osnat039-40-A 40 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 6 | Osnat040-41-A 41 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 7 | Osnat041-42-A 42 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 8 | Osnat042-43-A 43 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 9 | Osnat043-44-A 44 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 10 | Osnat044-45-A 45 A NA Plant No No indoors Yes No Control control Control_indoor_plants_washed_with_water 11 | Osnat045-46-A 46 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 12 | Osnat046-47-A 47 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 13 | Osnat047-48-A 48 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 14 | Osnat048-49-A 49 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 15 | Osnat049-50-A 50 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 16 | Osnat050-51-A 51 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 17 | Osnat051-52-A 52 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 18 | Osnat052-53-A 53 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 19 | Osnat053-54-A 54 A NA Plant No No indoors Yes No B12_Enriched indoor+water_washed+B12 Modified_Hinoman_medium_with_B12_enriched 20 | Osnat054-9-A-2 55 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 21 | Osnat055-10-A-2 56 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 22 | Osnat056-11-A-2 57 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 23 | Osnat057-12-A-2 58 B 22 Plant No No indoors Yes No NA indoor+water_washed indoor_plus_water_washed_plant_with_pPNA_&_mPNA 24 | Osnat068-23-A-2 69 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 25 | Osnat069-25-A-2 70 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 26 | Osnat070-26-A-2 71 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 27 | Osnat071-27-A-2 72 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 28 | Osnat072-28-A-2 73 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 29 | Osnat073-29-A-2 74 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 30 | Osnat074-30-A-2 75 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 31 | Osnat075-31-A-2 76 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 32 | Osnat076-32-A-2 77 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 33 | Osnat077-33-A-2 78 B 22 Plant Yes No indoors No No NA indoor+sterile indoor_plus_sterile_plant_with_pPNA_&_mPNA 34 | Osnat078-34-A-2 79 B 22 Plant Yes No indoors Yes No NA indoor+water_washed+sterile indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA 35 | Osnat079-35-A-2 80 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 36 | Osnat080-36-A-2 81 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 37 | Osnat081-37-A-2 82 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 38 | Osnat082-38-A-2 83 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 39 | Osnat083-39-A-2 84 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 40 | Osnat084-40-A-2 85 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 41 | Osnat085-41-A-2 86 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 42 | Osnat086-42-A-2 87 B 22 Plant Yes Yes indoors Yes No NA indoor+water_washed+sterile+antibiotic indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 43 | Osnat087-43-A-2 88 B 22 Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA 44 | Osnat088-44-A-2 89 B NA Plant Yes Yes indoors No No NA indoor+sterile+antibiotic indoor_plus_sterile_plant_grown_with_antibiotic 45 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- 1 | sample_file: "config/sample.tsv" 2 | metadata: "00.mapping/metadata.tsv" 3 | mail: "obadbotanist@yahoo.com" # A mere label 4 | samples: ["A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", 5 | "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9_1", "B9_2", 6 | "B10_1", "B10_2", "C1", "C2", "C3", "C4", "C5", "C6", "C7", 7 | "C8", "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", 8 | "D10", "E1", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", 9 | "E10", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", 10 | "F10", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8", "G9", "G10"] 11 | # List your sample names here - see the README.md file for an easy way to create this list 12 | project_dir: "/scratch/user/obayomi/projects/Zebrafish/16S_trimmed_unmerged" 13 | # what type of amplicon are we analyzing 14 | # options are 15 | # "16S", "18S" and "ITS" 16 | amplicon: "16S" # "ITS" 17 | # A coloumn in metada for grouping bar plot and for statistics 18 | category: "Treatment" 19 | # Three possible mode 20 | # pair - paired-end reads without joining 21 | # single - single end reads, joining of unnecessary 22 | # merge - merge paired end reads 23 | # if you will select to join the reads 24 | # # make sure to modify the -m -t flags of pear in the run_pear.pl script 25 | # before running the workflow 26 | 27 | mode: "pair" # "pair", "single" or "merge" 28 | RENAME_FILES: false # should your input files be renamed if they don't follow the requirement of 01.raw_data/{SAMPLE}_R{1|2}.fastq.gz 29 | 30 | # What method should be used in merging reads 31 | # options are "pear" or "vsearch" 32 | # for merging with pear or vsearch, repectively 33 | merge_method: "pear" 34 | 35 | # ASV or zoTUs denoising and clustering method. Can be "dada2" or "deblur" 36 | denoise_method: "dada2" 37 | 38 | # path to your manifest file - see the example folder for examples 39 | MANIFEST: "01.raw_data/MANIFEST" 40 | project_name: "Zebrafish" # This has no use in the pipeline just help to keep records 41 | 42 | # # Add this line to everdy script to avoid device out of space error 43 | TEMP_DIR: "export TEMPDIR=/scratch/user/obayomi/projects/Zebrafish/tmp/ TMPDIR=/scratch/user/obayomi/projects/Zebrafish/tmp/" 44 | 45 | # set the path to the appropriate classifier for assigning taxonomy 46 | # Here i chose the classifier for silva for bacteria (16S) and protist (18S) analysis 47 | # For Fungi set to the path of a pre trained unite database classifier 48 | classifier: "/scratch/user/obayomi/projects/qiime2/create_DB/databases/silva-138-99-nb-classifier.qza" 49 | 50 | # To figure out the total number of sequences ("Total freqency") 51 | # to be used to determine the minuminum frequency for filtering out 52 | # rare taxa, examine "08.Filter_feature_table/taxa_filtered_table.qzv". 53 | # To calculate, multiply the total number of sequences by 0.00005 (0.005%) 54 | # Assign the result of your calulation below as the minimum frequency 55 | # for filtering out rare taxa 56 | # 741,904 * 0.00005 = 37.0952 57 | minimum_frequency: 37 58 | 59 | # Change this and re-run core diversity step if needed. Determine this number by 60 | # examiming "08.Filter_feature_table/filtered_table.qzv". Either choose the 61 | # minimum sequence count or choose the minimum sequence count 62 | # that will be enough to capture the diversity of your samples and still 63 | # not lose a lot of samples 64 | rarefaction_depth: 123 65 | 66 | 67 | # Full paths to the specified programs 68 | programs_path: 69 | multiqc: "/scratch/user/obayomi/.conda/envs/bioinfo/bin/multiqc" 70 | fastqc: "/scratch/user/obayomi/.conda/envs/bioinfo/bin/fastqc" 71 | parallel: "/scratch/user/obayomi/.conda/envs/bioinfo/bin/parallel" 72 | run_pear: "pear" #"/scratch/user/obayomi/projects/qiime2/run_pear.pl" 73 | 74 | # Set tool specific parameters 75 | parameters: 76 | vsearch: 77 | join_pairs: 78 | truncqual: 20 79 | minimum_length: 400 80 | maximum_Ns: 20 81 | minimum_merge_length: 400 82 | minimum_merge_length: 600 83 | dada2: 84 | mode: "single" # "single" or "paired" 85 | trunc_length_forward: 200 # 280 #220 # this will be determined after visulaizing the quality plot where quality score is >= 20 86 | trunc_length_reverse: 140 #180 87 | trim_length_forward: 0 88 | trim_length_reverse: 0 89 | maximum_forward_error: 4 90 | maximum_reverse_error: 4 91 | threads: 28 92 | 93 | # --p-trim-length n which truncates the sequences at position n 94 | # In general, the Deblur developers recommend setting this value 95 | # to a length where the median quality score begins to drop too low 96 | deblur: 97 | trunc_length: 40 98 | # Parameters to argument of qiime feature-table group 99 | # when grouping the feature table for making grouped taxa barplots 100 | group_taxa_plot: 101 | category: "Treatment" # --m-metadata-column argument 102 | mode: "sum" # --p-mode argument 103 | metadata: "00.mapping/treatment-metadata.tsv" # a 2-column or more metadata for grouping bar plots ['sample-id', 'treatment'] 104 | beta_diversity_significance: 105 | categories: "Treatment" 106 | # Adators and primer trimming using cutadapt 107 | cutadapt: 108 | forward_primer: "GTGYCAGCMGCCGCGGTAA" 109 | reverse_primer: "GGACTACNVGGGTWTCTAAT" 110 | cores: 10 111 | fastree: 112 | threads: 28 113 | assign_taxonomy: 114 | threads: 28 115 | picrust: 116 | threads: 28 117 | pear: 118 | min_assembly: 150 119 | max_assembly: 300 120 | min_trim: 150 121 | threads: 8 122 | 123 | conda: 124 | qiime2: 125 | env: "module purge; module load Anaconda3/2020.07; source activate /sw/hprc/sw/Anaconda3/2020.07/envs/qiime2-2021.2" 126 | perl5lib: "export PERL5LIB=/sw/hprc/sw/Anaconda3/2020.07/envs/qiime2-2021.2/lib/site_perl/5.26.2/x86_64-linux-thread-multi" 127 | picrust2: 128 | env: "module purge; module load Anaconda3/2020.07; source activate /scratch/user/obayomi/.conda/envs/picrust2" 129 | perl5lib: "export PERL5LIB=/scratch/user/obayomi/.conda/envs/picrust2/lib/site_perl/5.26.2/x86_64-linux-thread-multi" 130 | bioinfo: 131 | env: "module purge; module load Anaconda3/2020.07; source activate /scratch/user/obayomi/.conda/envs/bioinfo" 132 | perl5lib: "export PERL5LIB=/scratch/user/obayomi/.conda/envs/bioinfo/lib/5.26.2" 133 | pear: 134 | env: "module purge; module load GCCcore/9.3.0 PEAR/0.9.11" 135 | 136 | -------------------------------------------------------------------------------- /scripts/qiime2_api.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from qiime2.plugins import feature_table\n", 10 | "from qiime2 import Artifact\n", 11 | "import biom\n", 12 | "import pandas as pd\n", 13 | "from qiime2.plugins import diversity\n", 14 | "from qiime2 import Metadata" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 3, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# Load an artifact. In this case a feature table i.e ASV or OTU table\n", 24 | "unrarefied_table = Artifact.load('../04.filter_table/noChlr_noMitoch_noSingleton_filtered_table.qza')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 5, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "# Constructed from biom file\n", 37 | "#OTU ID\tSRR3202913\tSRR3202914\tSRR3202915\tSRR3202916\tSRR3202917\n", 38 | "65fb08bed0eeb24cfff33eeedfad522f\t0.0\t0.0\t0.0\t0.0\t0.0\n", 39 | "1ca86d303424bc40036ec3cdac72d8ad\t0.0\t0.0\t0.0\t0.0\t0.0\n", 40 | "4a0f23475dad7251063a5a39cf12d27f\t0.0\t0.0\t0.0\t0.0\t0.0\n", 41 | "36bb069fa6345961fc82056566252ace\t0.0\t0.0\t0.0\t0.0\t0.0\n", 42 | "58a85ad58122a7097ce75583f34d8626\t0.0\t0.0\t0.0\t0.0\t0.0\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "biom_table = unrarefied_table.view(biom.Table)\n", 48 | "print(biom_table.head())" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 6, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/plain": [ 59 | "['__class__',\n", 60 | " '__delattr__',\n", 61 | " '__dict__',\n", 62 | " '__dir__',\n", 63 | " '__doc__',\n", 64 | " '__eq__',\n", 65 | " '__format__',\n", 66 | " '__ge__',\n", 67 | " '__getattribute__',\n", 68 | " '__getitem__',\n", 69 | " '__gt__',\n", 70 | " '__hash__',\n", 71 | " '__init__',\n", 72 | " '__init_subclass__',\n", 73 | " '__iter__',\n", 74 | " '__le__',\n", 75 | " '__lt__',\n", 76 | " '__module__',\n", 77 | " '__ne__',\n", 78 | " '__new__',\n", 79 | " '__reduce__',\n", 80 | " '__reduce_ex__',\n", 81 | " '__repr__',\n", 82 | " '__setattr__',\n", 83 | " '__sizeof__',\n", 84 | " '__str__',\n", 85 | " '__subclasshook__',\n", 86 | " '__weakref__',\n", 87 | " '_axis_to_num',\n", 88 | " '_cast_metadata',\n", 89 | " '_conv_to_self_type',\n", 90 | " '_data',\n", 91 | " '_data_equality',\n", 92 | " '_extract_data_from_tsv',\n", 93 | " '_get_col',\n", 94 | " '_get_row',\n", 95 | " '_get_sparse_data',\n", 96 | " '_index',\n", 97 | " '_index_ids',\n", 98 | " '_intersect_id_order',\n", 99 | " '_invert_axis',\n", 100 | " '_iter_obs',\n", 101 | " '_iter_samp',\n", 102 | " '_obs_index',\n", 103 | " '_observation_group_metadata',\n", 104 | " '_observation_ids',\n", 105 | " '_observation_metadata',\n", 106 | " '_sample_group_metadata',\n", 107 | " '_sample_ids',\n", 108 | " '_sample_index',\n", 109 | " '_sample_metadata',\n", 110 | " '_to_dense',\n", 111 | " '_to_sparse',\n", 112 | " '_union_id_order',\n", 113 | " 'add_group_metadata',\n", 114 | " 'add_metadata',\n", 115 | " 'align_to',\n", 116 | " 'collapse',\n", 117 | " 'concat',\n", 118 | " 'copy',\n", 119 | " 'create_date',\n", 120 | " 'data',\n", 121 | " 'del_metadata',\n", 122 | " 'delimited_self',\n", 123 | " 'descriptive_equality',\n", 124 | " 'dtype',\n", 125 | " 'exists',\n", 126 | " 'filter',\n", 127 | " 'format_version',\n", 128 | " 'from_hdf5',\n", 129 | " 'from_json',\n", 130 | " 'from_tsv',\n", 131 | " 'generated_by',\n", 132 | " 'get_table_density',\n", 133 | " 'get_value_by_ids',\n", 134 | " 'group_metadata',\n", 135 | " 'head',\n", 136 | " 'ids',\n", 137 | " 'index',\n", 138 | " 'is_empty',\n", 139 | " 'iter',\n", 140 | " 'iter_data',\n", 141 | " 'iter_pairwise',\n", 142 | " 'length',\n", 143 | " 'matrix_data',\n", 144 | " 'max',\n", 145 | " 'merge',\n", 146 | " 'metadata',\n", 147 | " 'metadata_to_dataframe',\n", 148 | " 'min',\n", 149 | " 'nnz',\n", 150 | " 'nonzero',\n", 151 | " 'nonzero_counts',\n", 152 | " 'norm',\n", 153 | " 'pa',\n", 154 | " 'partition',\n", 155 | " 'rankdata',\n", 156 | " 'reduce',\n", 157 | " 'remove_empty',\n", 158 | " 'shape',\n", 159 | " 'sort',\n", 160 | " 'sort_order',\n", 161 | " 'subsample',\n", 162 | " 'sum',\n", 163 | " 'table_id',\n", 164 | " 'to_dataframe',\n", 165 | " 'to_hdf5',\n", 166 | " 'to_json',\n", 167 | " 'to_tsv',\n", 168 | " 'transform',\n", 169 | " 'transpose',\n", 170 | " 'type',\n", 171 | " 'update_ids']" 172 | ] 173 | }, 174 | "execution_count": 6, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "dir(biom_table)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 7, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "text/plain": [ 191 | "Results (name = value)\n", 192 | "-----------------------------------------------------------------------------------------\n", 193 | "visualization = " 194 | ] 195 | }, 196 | "execution_count": 7, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "feature_table.actions.filter_samples()" 203 | ] 204 | } 205 | ], 206 | "metadata": { 207 | "kernelspec": { 208 | "display_name": "Python 3", 209 | "language": "python", 210 | "name": "python3" 211 | }, 212 | "language_info": { 213 | "codemirror_mode": { 214 | "name": "ipython", 215 | "version": 3 216 | }, 217 | "file_extension": ".py", 218 | "mimetype": "text/x-python", 219 | "name": "python", 220 | "nbconvert_exporter": "python", 221 | "pygments_lexer": "ipython3", 222 | "version": "3.6.7" 223 | } 224 | }, 225 | "nbformat": 4, 226 | "nbformat_minor": 2 227 | } 228 | -------------------------------------------------------------------------------- /scripts/.ipynb_checkpoints/qiime2_api-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from qiime2.plugins import feature_table\n", 10 | "from qiime2 import Artifact\n", 11 | "import biom\n", 12 | "import pandas as pd\n", 13 | "from qiime2.plugins import diversity\n", 14 | "from qiime2 import Metadata" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 3, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "# Load an artifact. In this case a feature table i.e ASV or OTU table\n", 24 | "unrarefied_table = Artifact.load('../04.filter_table/noChlr_noMitoch_noSingleton_filtered_table.qza')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 5, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "# Constructed from biom file\n", 37 | "#OTU ID\tSRR3202913\tSRR3202914\tSRR3202915\tSRR3202916\tSRR3202917\n", 38 | "65fb08bed0eeb24cfff33eeedfad522f\t0.0\t0.0\t0.0\t0.0\t0.0\n", 39 | "1ca86d303424bc40036ec3cdac72d8ad\t0.0\t0.0\t0.0\t0.0\t0.0\n", 40 | "4a0f23475dad7251063a5a39cf12d27f\t0.0\t0.0\t0.0\t0.0\t0.0\n", 41 | "36bb069fa6345961fc82056566252ace\t0.0\t0.0\t0.0\t0.0\t0.0\n", 42 | "58a85ad58122a7097ce75583f34d8626\t0.0\t0.0\t0.0\t0.0\t0.0\n" 43 | ] 44 | } 45 | ], 46 | "source": [ 47 | "biom_table = unrarefied_table.view(biom.Table)\n", 48 | "print(biom_table.head())" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 6, 54 | "metadata": {}, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/plain": [ 59 | "['__class__',\n", 60 | " '__delattr__',\n", 61 | " '__dict__',\n", 62 | " '__dir__',\n", 63 | " '__doc__',\n", 64 | " '__eq__',\n", 65 | " '__format__',\n", 66 | " '__ge__',\n", 67 | " '__getattribute__',\n", 68 | " '__getitem__',\n", 69 | " '__gt__',\n", 70 | " '__hash__',\n", 71 | " '__init__',\n", 72 | " '__init_subclass__',\n", 73 | " '__iter__',\n", 74 | " '__le__',\n", 75 | " '__lt__',\n", 76 | " '__module__',\n", 77 | " '__ne__',\n", 78 | " '__new__',\n", 79 | " '__reduce__',\n", 80 | " '__reduce_ex__',\n", 81 | " '__repr__',\n", 82 | " '__setattr__',\n", 83 | " '__sizeof__',\n", 84 | " '__str__',\n", 85 | " '__subclasshook__',\n", 86 | " '__weakref__',\n", 87 | " '_axis_to_num',\n", 88 | " '_cast_metadata',\n", 89 | " '_conv_to_self_type',\n", 90 | " '_data',\n", 91 | " '_data_equality',\n", 92 | " '_extract_data_from_tsv',\n", 93 | " '_get_col',\n", 94 | " '_get_row',\n", 95 | " '_get_sparse_data',\n", 96 | " '_index',\n", 97 | " '_index_ids',\n", 98 | " '_intersect_id_order',\n", 99 | " '_invert_axis',\n", 100 | " '_iter_obs',\n", 101 | " '_iter_samp',\n", 102 | " '_obs_index',\n", 103 | " '_observation_group_metadata',\n", 104 | " '_observation_ids',\n", 105 | " '_observation_metadata',\n", 106 | " '_sample_group_metadata',\n", 107 | " '_sample_ids',\n", 108 | " '_sample_index',\n", 109 | " '_sample_metadata',\n", 110 | " '_to_dense',\n", 111 | " '_to_sparse',\n", 112 | " '_union_id_order',\n", 113 | " 'add_group_metadata',\n", 114 | " 'add_metadata',\n", 115 | " 'align_to',\n", 116 | " 'collapse',\n", 117 | " 'concat',\n", 118 | " 'copy',\n", 119 | " 'create_date',\n", 120 | " 'data',\n", 121 | " 'del_metadata',\n", 122 | " 'delimited_self',\n", 123 | " 'descriptive_equality',\n", 124 | " 'dtype',\n", 125 | " 'exists',\n", 126 | " 'filter',\n", 127 | " 'format_version',\n", 128 | " 'from_hdf5',\n", 129 | " 'from_json',\n", 130 | " 'from_tsv',\n", 131 | " 'generated_by',\n", 132 | " 'get_table_density',\n", 133 | " 'get_value_by_ids',\n", 134 | " 'group_metadata',\n", 135 | " 'head',\n", 136 | " 'ids',\n", 137 | " 'index',\n", 138 | " 'is_empty',\n", 139 | " 'iter',\n", 140 | " 'iter_data',\n", 141 | " 'iter_pairwise',\n", 142 | " 'length',\n", 143 | " 'matrix_data',\n", 144 | " 'max',\n", 145 | " 'merge',\n", 146 | " 'metadata',\n", 147 | " 'metadata_to_dataframe',\n", 148 | " 'min',\n", 149 | " 'nnz',\n", 150 | " 'nonzero',\n", 151 | " 'nonzero_counts',\n", 152 | " 'norm',\n", 153 | " 'pa',\n", 154 | " 'partition',\n", 155 | " 'rankdata',\n", 156 | " 'reduce',\n", 157 | " 'remove_empty',\n", 158 | " 'shape',\n", 159 | " 'sort',\n", 160 | " 'sort_order',\n", 161 | " 'subsample',\n", 162 | " 'sum',\n", 163 | " 'table_id',\n", 164 | " 'to_dataframe',\n", 165 | " 'to_hdf5',\n", 166 | " 'to_json',\n", 167 | " 'to_tsv',\n", 168 | " 'transform',\n", 169 | " 'transpose',\n", 170 | " 'type',\n", 171 | " 'update_ids']" 172 | ] 173 | }, 174 | "execution_count": 6, 175 | "metadata": {}, 176 | "output_type": "execute_result" 177 | } 178 | ], 179 | "source": [ 180 | "dir(biom_table)" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 7, 186 | "metadata": {}, 187 | "outputs": [ 188 | { 189 | "data": { 190 | "text/plain": [ 191 | "Results (name = value)\n", 192 | "-----------------------------------------------------------------------------------------\n", 193 | "visualization = " 194 | ] 195 | }, 196 | "execution_count": 7, 197 | "metadata": {}, 198 | "output_type": "execute_result" 199 | } 200 | ], 201 | "source": [ 202 | "feature_table.actions.filter_samples()" 203 | ] 204 | } 205 | ], 206 | "metadata": { 207 | "kernelspec": { 208 | "display_name": "Python 3", 209 | "language": "python", 210 | "name": "python3" 211 | }, 212 | "language_info": { 213 | "codemirror_mode": { 214 | "name": "ipython", 215 | "version": 3 216 | }, 217 | "file_extension": ".py", 218 | "mimetype": "text/x-python", 219 | "name": "python", 220 | "nbconvert_exporter": "python", 221 | "pygments_lexer": "ipython3", 222 | "version": "3.6.7" 223 | } 224 | }, 225 | "nbformat": 4, 226 | "nbformat_minor": 2 227 | } 228 | -------------------------------------------------------------------------------- /examples/single-MANIFEST: -------------------------------------------------------------------------------- 1 | sample-id,absolute-filepath,direction 2 | 1A,$PWD/sequence_data/Osnat001-1-A_S1_L001_R1_001.fastq.gz,forward 3 | 2A,$PWD/sequence_data/Osnat002-2-A_S2_L001_R1_001.fastq.gz,forward 4 | 3A,$PWD/sequence_data/Osnat003-3-A_S3_L001_R1_001.fastq.gz,forward 5 | 4A,$PWD/sequence_data/Osnat004-4-A_S4_L001_R1_001.fastq.gz,forward 6 | 5A,$PWD/sequence_data/Osnat005-5-A_S5_L001_R1_001.fastq.gz,forward 7 | 6A,$PWD/sequence_data/Osnat006-6-A_S6_L001_R1_001.fastq.gz,forward 8 | 7A,$PWD/sequence_data/Osnat007-7-A_S7_L001_R1_001.fastq.gz,forward 9 | 8A,$PWD/sequence_data/Osnat008-8-A_S8_L001_R1_001.fastq.gz,forward 10 | 9A,$PWD/sequence_data/Osnat009-9-A_S9_L001_R1_001.fastq.gz,forward 11 | 10A,$PWD/sequence_data/Osnat010-10-A_S10_L001_R1_001.fastq.gz,forward 12 | 11A,$PWD/sequence_data/Osnat011-11-A_S11_L001_R1_001.fastq.gz,forward 13 | 12A,$PWD/sequence_data/Osnat012-12-A_S12_L001_R1_001.fastq.gz,forward 14 | 13A,$PWD/sequence_data/Osnat013-13-A_S13_L001_R1_001.fastq.gz,forward 15 | 14A,$PWD/sequence_data/Osnat014-14-A_S14_L001_R1_001.fastq.gz,forward 16 | 15A,$PWD/sequence_data/Osnat015-15-A_S15_L001_R1_001.fastq.gz,forward 17 | 16A,$PWD/sequence_data/Osnat016-16-A_S16_L001_R1_001.fastq.gz,forward 18 | 17A,$PWD/sequence_data/Osnat017-17-A_S17_L001_R1_001.fastq.gz,forward 19 | 18A,$PWD/sequence_data/Osnat018-18-A_S18_L001_R1_001.fastq.gz,forward 20 | 19A,$PWD/sequence_data/Osnat019-19-A_S19_L001_R1_001.fastq.gz,forward 21 | 20A,$PWD/sequence_data/Osnat020-20-A_S20_L001_R1_001.fastq.gz,forward 22 | 22A,$PWD/sequence_data/Osnat021-22-A_S21_L001_R1_001.fastq.gz,forward 23 | 23A,$PWD/sequence_data/Osnat022-23-A_S22_L001_R1_001.fastq.gz,forward 24 | 24A,$PWD/sequence_data/Osnat023-24-A_S23_L001_R1_001.fastq.gz,forward 25 | 25A,$PWD/sequence_data/Osnat024-25-A_S24_L001_R1_001.fastq.gz,forward 26 | 26A,$PWD/sequence_data/Osnat025-26-A_S25_L001_R1_001.fastq.gz,forward 27 | 27A,$PWD/sequence_data/Osnat026-27-A_S26_L001_R1_001.fastq.gz,forward 28 | 28A,$PWD/sequence_data/Osnat027-28-A_S27_L001_R1_001.fastq.gz,forward 29 | 29A,$PWD/sequence_data/Osnat028-29-A_S28_L001_R1_001.fastq.gz,forward 30 | 30A,$PWD/sequence_data/Osnat029-30-A_S29_L001_R1_001.fastq.gz,forward 31 | 31A,$PWD/sequence_data/Osnat030-31-A_S30_L001_R1_001.fastq.gz,forward 32 | 32A,$PWD/sequence_data/Osnat031-32-A_S31_L001_R1_001.fastq.gz,forward 33 | 33A,$PWD/sequence_data/Osnat032-33-A_S32_L001_R1_001.fastq.gz,forward 34 | 34A,$PWD/sequence_data/Osnat033-34-A_S33_L001_R1_001.fastq.gz,forward 35 | 35A,$PWD/sequence_data/Osnat034-35-A_S34_L001_R1_001.fastq.gz,forward 36 | 36A,$PWD/sequence_data/Osnat035-36-A_S35_L001_R1_001.fastq.gz,forward 37 | 37A,$PWD/sequence_data/Osnat036-37-A_S36_L001_R1_001.fastq.gz,forward 38 | 38A,$PWD/sequence_data/Osnat037-38-A_S37_L001_R1_001.fastq.gz,forward 39 | 39A,$PWD/sequence_data/Osnat038-39-A_S38_L001_R1_001.fastq.gz,forward 40 | 40A,$PWD/sequence_data/Osnat039-40-A_S39_L001_R1_001.fastq.gz,forward 41 | 41A,$PWD/sequence_data/Osnat040-41-A_S40_L001_R1_001.fastq.gz,forward 42 | 42A,$PWD/sequence_data/Osnat041-42-A_S41_L001_R1_001.fastq.gz,forward 43 | 43A,$PWD/sequence_data/Osnat042-43-A_S42_L001_R1_001.fastq.gz,forward 44 | 44A,$PWD/sequence_data/Osnat043-44-A_S43_L001_R1_001.fastq.gz,forward 45 | 45A,$PWD/sequence_data/Osnat044-45-A_S44_L001_R1_001.fastq.gz,forward 46 | 46A,$PWD/sequence_data/Osnat045-46-A_S45_L001_R1_001.fastq.gz,forward 47 | 47A,$PWD/sequence_data/Osnat046-47-A_S46_L001_R1_001.fastq.gz,forward 48 | 48A,$PWD/sequence_data/Osnat047-48-A_S47_L001_R1_001.fastq.gz,forward 49 | 49A,$PWD/sequence_data/Osnat048-49-A_S48_L001_R1_001.fastq.gz,forward 50 | 50A,$PWD/sequence_data/Osnat049-50-A_S49_L001_R1_001.fastq.gz,forward 51 | 51A,$PWD/sequence_data/Osnat050-51-A_S50_L001_R1_001.fastq.gz,forward 52 | 52A,$PWD/sequence_data/Osnat051-52-A_S51_L001_R1_001.fastq.gz,forward 53 | 53A,$PWD/sequence_data/Osnat052-53-A_S52_L001_R1_001.fastq.gz,forward 54 | 54A,$PWD/sequence_data/Osnat053-54-A_S53_L001_R1_001.fastq.gz,forward 55 | 9A-2,$PWD/sequence_data/Osnat054-9-A-2_S54_L001_R1_001.fastq.gz,forward 56 | 10A-2,$PWD/sequence_data/Osnat055-10-A-2_S55_L001_R1_001.fastq.gz,forward 57 | 11A-2,$PWD/sequence_data/Osnat056-11-A-2_S56_L001_R1_001.fastq.gz,forward 58 | 12A-2,$PWD/sequence_data/Osnat057-12-A-2_S57_L001_R1_001.fastq.gz,forward 59 | 13A-2,$PWD/sequence_data/Osnat058-13-A-2_S58_L001_R1_001.fastq.gz,forward 60 | 14A-2,$PWD/sequence_data/Osnat059-14-A-2_S59_L001_R1_001.fastq.gz,forward 61 | 15A-2,$PWD/sequence_data/Osnat060-15-A-2_S60_L001_R1_001.fastq.gz,forward 62 | 16A-2,$PWD/sequence_data/Osnat061-16-A-2_S61_L001_R1_001.fastq.gz,forward 63 | 17A-2,$PWD/sequence_data/Osnat062-17-A-2_S62_L001_R1_001.fastq.gz,forward 64 | 18A-2,$PWD/sequence_data/Osnat063-18-A-2_S63_L001_R1_001.fastq.gz,forward 65 | 19A-2,$PWD/sequence_data/Osnat064-19-A-2_S64_L001_R1_001.fastq.gz,forward 66 | 20A-2,$PWD/sequence_data/Osnat065-20-A-2_S65_L001_R1_001.fastq.gz,forward 67 | 21A-2,$PWD/sequence_data/Osnat066-21-A-2_S66_L001_R1_001.fastq.gz,forward 68 | 22A-2,$PWD/sequence_data/Osnat067-22-A-2_S67_L001_R1_001.fastq.gz,forward 69 | 23A-2,$PWD/sequence_data/Osnat068-23-A-2_S68_L001_R1_001.fastq.gz,forward 70 | 25A-2,$PWD/sequence_data/Osnat069-25-A-2_S69_L001_R1_001.fastq.gz,forward 71 | 26A-2,$PWD/sequence_data/Osnat070-26-A-2_S70_L001_R1_001.fastq.gz,forward 72 | 27A-2,$PWD/sequence_data/Osnat071-27-A-2_S71_L001_R1_001.fastq.gz,forward 73 | 28A-2,$PWD/sequence_data/Osnat072-28-A-2_S72_L001_R1_001.fastq.gz,forward 74 | 29A-2,$PWD/sequence_data/Osnat073-29-A-2_S73_L001_R1_001.fastq.gz,forward 75 | 30A-2,$PWD/sequence_data/Osnat074-30-A-2_S74_L001_R1_001.fastq.gz,forward 76 | 31A-2,$PWD/sequence_data/Osnat075-31-A-2_S75_L001_R1_001.fastq.gz,forward 77 | 32A-2,$PWD/sequence_data/Osnat076-32-A-2_S76_L001_R1_001.fastq.gz,forward 78 | 33A-2,$PWD/sequence_data/Osnat077-33-A-2_S77_L001_R1_001.fastq.gz,forward 79 | 34A-2,$PWD/sequence_data/Osnat078-34-A-2_S78_L001_R1_001.fastq.gz,forward 80 | 35A-2,$PWD/sequence_data/Osnat079-35-A-2_S79_L001_R1_001.fastq.gz,forward 81 | 36A-2,$PWD/sequence_data/Osnat080-36-A-2_S80_L001_R1_001.fastq.gz,forward 82 | 37A-2,$PWD/sequence_data/Osnat081-37-A-2_S81_L001_R1_001.fastq.gz,forward 83 | 38A-2,$PWD/sequence_data/Osnat082-38-A-2_S82_L001_R1_001.fastq.gz,forward 84 | 39A-2,$PWD/sequence_data/Osnat083-39-A-2_S83_L001_R1_001.fastq.gz,forward 85 | 40A-2,$PWD/sequence_data/Osnat084-40-A-2_S84_L001_R1_001.fastq.gz,forward 86 | 41A-2,$PWD/sequence_data/Osnat085-41-A-2_S85_L001_R1_001.fastq.gz,forward 87 | 42A-2,$PWD/sequence_data/Osnat086-42-A-2_S86_L001_R1_001.fastq.gz,forward 88 | 43A-2,$PWD/sequence_data/Osnat087-43-A-2_S87_L001_R1_001.fastq.gz,forward 89 | 44A-2,$PWD/sequence_data/Osnat088-44-A-2_S88_L001_R1_001.fastq.gz,forward 90 | M-1,$PWD/sequence_data/Osnat157-M-1_S157_L001_R1_001.fastq.gz,forward 91 | M-2,$PWD/sequence_data/Osnat158-M-2_S158_L001_R1_001.fastq.gz,forward 92 | M-3,$PWD/sequence_data/Osnat159-M-3_S159_L001_R1_001.fastq.gz,forward 93 | M-NC,$PWD/sequence_data/Osnat160-M-_S160_L001_R1_001.fastq.gz,forward 94 | -------------------------------------------------------------------------------- /scripts/ancom_differential_abundance.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N diff_abundance 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | 11 | set -e 12 | 13 | source activate qiime2-2020.6 14 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi' 15 | 16 | 17 | TAXON_LEVELS=(2 3 4 5 6) 18 | 19 | #FEATURE_TABLE_DIR=('05.filter_table/dada2' '05.filter_table/dada2' '05.filter_table/deblur/' '05.filter_table/deblur/' '05.filter_table/dada2/indoors' '05.filter_table/dada2/indoors' '05.filter_table/deblur/indoors' '05.filter_table/deblur/indoors' '05.filter_table/dada2/outdoors' '05.filter_table/dada2/outdoors' '05.filter_table/deblur/outdoors' '05.filter_table/deblur/outdoors') 20 | 21 | #PREFIX=('se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined') 22 | 23 | #METADATA=('00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv') 24 | 25 | #OUT_DIR=('09.differential_abundance/dada2' '09.differential_abundance/dada2' '09.differential_abundance/deblur' '09.differential_abundance/deblur' '09.differential_abundance/dada2/indoors' '09.differential_abundance/dada2/indoors' '09.differential_abundance/deblur/indoors' '09.differential_abundance/deblur/indoors' '09.differential_abundance/dada2/outdoors' '09.differential_abundance/dada2/outdoors' '09.differential_abundance/deblur/outdoors' '09.differential_abundance/deblur/outdoors') 26 | 27 | #METADATA_COLUMN=('treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment') 28 | 29 | #TAXONOMY_DIR=('04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur' '04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur' '04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur') 30 | 31 | 32 | ##################################################################################################################################################### 33 | 34 | # Dada2 Reanalysis modified maxEE and read trunc length 35 | #TAXONOMY_DIR=('04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2') 36 | 37 | #FEATURE_TABLE_DIR=('05.redo_filter_table/dada2' '05.redo_filter_table/dada2' '05.redo_filter_table/dada2' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/mock' '05.redo_filter_table/dada2/mock' '05.redo_filter_table/dada2/mock') 38 | 39 | #PREFIX=('se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe') 40 | 41 | #METADATA=('00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/pe-dada2/combined.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/pe-dada2/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/pe-dada2/outdoors.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/pe-dada2/mock.tsv') 42 | 43 | #METADATA_COLUMN=('treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment') 44 | 45 | 46 | #OUT_DIR=('09.redo_differential_abundance/dada2' '09.redo_differential_abundance/dada2' '09.redo_differential_abundance/dada2' '09.redo_differential_abundance/dada2/indoors' '09.redo_differential_abundance/dada2/indoors' '09.redo_differential_abundance/dada2/indoors' '09.redo_differential_abundance/dada2/outdoors' '09.redo_differential_abundance/dada2/outdoors' '09.redo_differential_abundance/dada2/outdoors' '09.redo_differential_abundance/dada2/mock' '09.redo_differential_abundance/dada2/mock' '09.redo_differential_abundance/dada2/mock') 47 | 48 | 49 | ################################################################################################################################################## 50 | 51 | TAXONOMY_DIR=(04.{,redo_}assign_taxonomy/dada2{,,}) 52 | FEATURE_TABLE_DIR=(05.{,redo_}filter_table/dada2/{indoors,outdoors,basins}/) 53 | PREFIX=($( for i in {1..6}; do echo 'se'; done)) 54 | METADATA=($(for i in {1..2}; do echo 00.mapping/{indoors,outdoors,basins}-edited.tsv; done)) 55 | METADATA_COLUMN=($( for i in {1..6}; do echo 'treatment'; done)) 56 | OUT_DIR=(09.{,redo_}differential_abundance/dada2/{indoors,outdoors,basins}) 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | # Differntial abundance testing using ANCOM 65 | # At ASV level 66 | # Add pseudocount to ASV table because ANCOM can't deal with zero counts 67 | parallel --jobs 0 --link qiime composition add-pseudocount \ 68 | --i-table {1}/{2}-filtered_table.qza \ 69 | --o-composition-table {3}/{2}-composition-table.qza \ 70 | ::: ${FEATURE_TABLE_DIR[*]} ::: ${PREFIX[*]} ::: ${OUT_DIR[*]} 71 | 72 | # Apply ANCOM to identify ASV/OTUs that differ in abundance 73 | parallel --jobs 0 --link qiime composition ancom \ 74 | --i-table {3}/{1}-composition-table.qza \ 75 | --m-metadata-file {2} \ 76 | --m-metadata-column {4} \ 77 | --o-visualization {3}/{1}-{4}-ancom.qzv \ 78 | ::: ${PREFIX[*]} ::: ${METADATA[*]} ::: ${OUT_DIR[*]} ::: ${METADATA_COLUMN[*]} 79 | 80 | 81 | 82 | for TAXON_LEVEL in ${TAXON_LEVELS[*]}; do 83 | 84 | # At specific taxonomy level - here at the genus level level 6 (L6) 85 | # 1. Collapse feauture table at a taxonomy level of interest 86 | parallel --jobs 0 --link qiime taxa collapse \ 87 | --i-table {1}/{2}-filtered_table.qza \ 88 | --i-taxonomy {4}/{2}-taxonomy.qza \ 89 | --p-level ${TAXON_LEVEL} \ 90 | --o-collapsed-table {3}/{2}-L${TAXON_LEVEL}-filtered_table.qza \ 91 | ::: ${FEATURE_TABLE_DIR[*]} ::: ${PREFIX[*]} ::: ${OUT_DIR[*]} ::: ${TAXONOMY_DIR[*]} 92 | 93 | 94 | # 2. Add pseudocount to ASV table because ANCOM can't deal with zero counts 95 | parallel --jobs 0 --link qiime composition add-pseudocount \ 96 | --i-table {2}/{1}-L${TAXON_LEVEL}-filtered_table.qza \ 97 | --o-composition-table {2}/{1}-L${TAXON_LEVEL}-composition-table.qza \ 98 | ::: ${PREFIX[*]} ::: ${OUT_DIR[*]} 99 | 100 | # 3. Apply ANCOM to identify ASV/OTUs that differ in abundance 101 | parallel --jobs 0 --link qiime composition ancom \ 102 | --i-table {3}/{1}-L${TAXON_LEVEL}-composition-table.qza \ 103 | --m-metadata-file {2} \ 104 | --m-metadata-column {4} \ 105 | --o-visualization {3}/{1}-L${TAXON_LEVEL}-{4}-ancom.qzv \ 106 | ::: ${PREFIX[*]} ::: ${METADATA[*]} ::: ${OUT_DIR[*]} ::: ${METADATA_COLUMN[*]} 107 | 108 | done 109 | 110 | -------------------------------------------------------------------------------- /examples/merge-MANIFEST: -------------------------------------------------------------------------------- 1 | sample-id,absolute-filepath,direction 2 | 1A,$PWD/stitched_reads/Osnat001-1-A_S1_L001.assembled.fastq.gz,forward 3 | 2A,$PWD/stitched_reads/Osnat002-2-A_S2_L001.assembled.fastq.gz,forward 4 | 3A,$PWD/stitched_reads/Osnat003-3-A_S3_L001.assembled.fastq.gz,forward 5 | 4A,$PWD/stitched_reads/Osnat004-4-A_S4_L001.assembled.fastq.gz,forward 6 | 5A,$PWD/stitched_reads/Osnat005-5-A_S5_L001.assembled.fastq.gz,forward 7 | 6A,$PWD/stitched_reads/Osnat006-6-A_S6_L001.assembled.fastq.gz,forward 8 | 7A,$PWD/stitched_reads/Osnat007-7-A_S7_L001.assembled.fastq.gz,forward 9 | 8A,$PWD/stitched_reads/Osnat008-8-A_S8_L001.assembled.fastq.gz,forward 10 | 9A,$PWD/stitched_reads/Osnat009-9-A_S9_L001.assembled.fastq.gz,forward 11 | 10A,$PWD/stitched_reads/Osnat010-10-A_S10_L001.assembled.fastq.gz,forward 12 | 11A,$PWD/stitched_reads/Osnat011-11-A_S11_L001.assembled.fastq.gz,forward 13 | 12A,$PWD/stitched_reads/Osnat012-12-A_S12_L001.assembled.fastq.gz,forward 14 | 13A,$PWD/stitched_reads/Osnat013-13-A_S13_L001.assembled.fastq.gz,forward 15 | 14A,$PWD/stitched_reads/Osnat014-14-A_S14_L001.assembled.fastq.gz,forward 16 | 15A,$PWD/stitched_reads/Osnat015-15-A_S15_L001.assembled.fastq.gz,forward 17 | 16A,$PWD/stitched_reads/Osnat016-16-A_S16_L001.assembled.fastq.gz,forward 18 | 17A,$PWD/stitched_reads/Osnat017-17-A_S17_L001.assembled.fastq.gz,forward 19 | 18A,$PWD/stitched_reads/Osnat018-18-A_S18_L001.assembled.fastq.gz,forward 20 | 19A,$PWD/stitched_reads/Osnat019-19-A_S19_L001.assembled.fastq.gz,forward 21 | 20A,$PWD/stitched_reads/Osnat020-20-A_S20_L001.assembled.fastq.gz,forward 22 | 22A,$PWD/stitched_reads/Osnat021-22-A_S21_L001.assembled.fastq.gz,forward 23 | 23A,$PWD/stitched_reads/Osnat022-23-A_S22_L001.assembled.fastq.gz,forward 24 | 24A,$PWD/stitched_reads/Osnat023-24-A_S23_L001.assembled.fastq.gz,forward 25 | 25A,$PWD/stitched_reads/Osnat024-25-A_S24_L001.assembled.fastq.gz,forward 26 | 26A,$PWD/stitched_reads/Osnat025-26-A_S25_L001.assembled.fastq.gz,forward 27 | 27A,$PWD/stitched_reads/Osnat026-27-A_S26_L001.assembled.fastq.gz,forward 28 | 28A,$PWD/stitched_reads/Osnat027-28-A_S27_L001.assembled.fastq.gz,forward 29 | 29A,$PWD/stitched_reads/Osnat028-29-A_S28_L001.assembled.fastq.gz,forward 30 | 30A,$PWD/stitched_reads/Osnat029-30-A_S29_L001.assembled.fastq.gz,forward 31 | 31A,$PWD/stitched_reads/Osnat030-31-A_S30_L001.assembled.fastq.gz,forward 32 | 32A,$PWD/stitched_reads/Osnat031-32-A_S31_L001.assembled.fastq.gz,forward 33 | 33A,$PWD/stitched_reads/Osnat032-33-A_S32_L001.assembled.fastq.gz,forward 34 | 34A,$PWD/stitched_reads/Osnat033-34-A_S33_L001.assembled.fastq.gz,forward 35 | 35A,$PWD/stitched_reads/Osnat034-35-A_S34_L001.assembled.fastq.gz,forward 36 | 36A,$PWD/stitched_reads/Osnat035-36-A_S35_L001.assembled.fastq.gz,forward 37 | 37A,$PWD/stitched_reads/Osnat036-37-A_S36_L001.assembled.fastq.gz,forward 38 | 38A,$PWD/stitched_reads/Osnat037-38-A_S37_L001.assembled.fastq.gz,forward 39 | 39A,$PWD/stitched_reads/Osnat038-39-A_S38_L001.assembled.fastq.gz,forward 40 | 40A,$PWD/stitched_reads/Osnat039-40-A_S39_L001.assembled.fastq.gz,forward 41 | 41A,$PWD/stitched_reads/Osnat040-41-A_S40_L001.assembled.fastq.gz,forward 42 | 42A,$PWD/stitched_reads/Osnat041-42-A_S41_L001.assembled.fastq.gz,forward 43 | 43A,$PWD/stitched_reads/Osnat042-43-A_S42_L001.assembled.fastq.gz,forward 44 | 44A,$PWD/stitched_reads/Osnat043-44-A_S43_L001.assembled.fastq.gz,forward 45 | 45A,$PWD/stitched_reads/Osnat044-45-A_S44_L001.assembled.fastq.gz,forward 46 | 46A,$PWD/stitched_reads/Osnat045-46-A_S45_L001.assembled.fastq.gz,forward 47 | 47A,$PWD/stitched_reads/Osnat046-47-A_S46_L001.assembled.fastq.gz,forward 48 | 48A,$PWD/stitched_reads/Osnat047-48-A_S47_L001.assembled.fastq.gz,forward 49 | 49A,$PWD/stitched_reads/Osnat048-49-A_S48_L001.assembled.fastq.gz,forward 50 | 50A,$PWD/stitched_reads/Osnat049-50-A_S49_L001.assembled.fastq.gz,forward 51 | 51A,$PWD/stitched_reads/Osnat050-51-A_S50_L001.assembled.fastq.gz,forward 52 | 52A,$PWD/stitched_reads/Osnat051-52-A_S51_L001.assembled.fastq.gz,forward 53 | 53A,$PWD/stitched_reads/Osnat052-53-A_S52_L001.assembled.fastq.gz,forward 54 | 54A,$PWD/stitched_reads/Osnat053-54-A_S53_L001.assembled.fastq.gz,forward 55 | 9A-2,$PWD/stitched_reads/Osnat054-9-A-2_S54_L001.assembled.fastq.gz,forward 56 | 10A-2,$PWD/stitched_reads/Osnat055-10-A-2_S55_L001.assembled.fastq.gz,forward 57 | 11A-2,$PWD/stitched_reads/Osnat056-11-A-2_S56_L001.assembled.fastq.gz,forward 58 | 12A-2,$PWD/stitched_reads/Osnat057-12-A-2_S57_L001.assembled.fastq.gz,forward 59 | 13A-2,$PWD/stitched_reads/Osnat058-13-A-2_S58_L001.assembled.fastq.gz,forward 60 | 14A-2,$PWD/stitched_reads/Osnat059-14-A-2_S59_L001.assembled.fastq.gz,forward 61 | 15A-2,$PWD/stitched_reads/Osnat060-15-A-2_S60_L001.assembled.fastq.gz,forward 62 | 16A-2,$PWD/stitched_reads/Osnat061-16-A-2_S61_L001.assembled.fastq.gz,forward 63 | 17A-2,$PWD/stitched_reads/Osnat062-17-A-2_S62_L001.assembled.fastq.gz,forward 64 | 18A-2,$PWD/stitched_reads/Osnat063-18-A-2_S63_L001.assembled.fastq.gz,forward 65 | 19A-2,$PWD/stitched_reads/Osnat064-19-A-2_S64_L001.assembled.fastq.gz,forward 66 | 20A-2,$PWD/stitched_reads/Osnat065-20-A-2_S65_L001.assembled.fastq.gz,forward 67 | 21A-2,$PWD/stitched_reads/Osnat066-21-A-2_S66_L001.assembled.fastq.gz,forward 68 | 22A-2,$PWD/stitched_reads/Osnat067-22-A-2_S67_L001.assembled.fastq.gz,forward 69 | 23A-2,$PWD/stitched_reads/Osnat068-23-A-2_S68_L001.assembled.fastq.gz,forward 70 | 25A-2,$PWD/stitched_reads/Osnat069-25-A-2_S69_L001.assembled.fastq.gz,forward 71 | 26A-2,$PWD/stitched_reads/Osnat070-26-A-2_S70_L001.assembled.fastq.gz,forward 72 | 27A-2,$PWD/stitched_reads/Osnat071-27-A-2_S71_L001.assembled.fastq.gz,forward 73 | 28A-2,$PWD/stitched_reads/Osnat072-28-A-2_S72_L001.assembled.fastq.gz,forward 74 | 29A-2,$PWD/stitched_reads/Osnat073-29-A-2_S73_L001.assembled.fastq.gz,forward 75 | 30A-2,$PWD/stitched_reads/Osnat074-30-A-2_S74_L001.assembled.fastq.gz,forward 76 | 31A-2,$PWD/stitched_reads/Osnat075-31-A-2_S75_L001.assembled.fastq.gz,forward 77 | 32A-2,$PWD/stitched_reads/Osnat076-32-A-2_S76_L001.assembled.fastq.gz,forward 78 | 33A-2,$PWD/stitched_reads/Osnat077-33-A-2_S77_L001.assembled.fastq.gz,forward 79 | 34A-2,$PWD/stitched_reads/Osnat078-34-A-2_S78_L001.assembled.fastq.gz,forward 80 | 35A-2,$PWD/stitched_reads/Osnat079-35-A-2_S79_L001.assembled.fastq.gz,forward 81 | 36A-2,$PWD/stitched_reads/Osnat080-36-A-2_S80_L001.assembled.fastq.gz,forward 82 | 37A-2,$PWD/stitched_reads/Osnat081-37-A-2_S81_L001.assembled.fastq.gz,forward 83 | 38A-2,$PWD/stitched_reads/Osnat082-38-A-2_S82_L001.assembled.fastq.gz,forward 84 | 39A-2,$PWD/stitched_reads/Osnat083-39-A-2_S83_L001.assembled.fastq.gz,forward 85 | 40A-2,$PWD/stitched_reads/Osnat084-40-A-2_S84_L001.assembled.fastq.gz,forward 86 | 41A-2,$PWD/stitched_reads/Osnat085-41-A-2_S85_L001.assembled.fastq.gz,forward 87 | 42A-2,$PWD/stitched_reads/Osnat086-42-A-2_S86_L001.assembled.fastq.gz,forward 88 | 43A-2,$PWD/stitched_reads/Osnat087-43-A-2_S87_L001.assembled.fastq.gz,forward 89 | 44A-2,$PWD/stitched_reads/Osnat088-44-A-2_S88_L001.assembled.fastq.gz,forward 90 | M-1,$PWD/stitched_reads/Osnat157-M-1_S157_L001.assembled.fastq.gz,forward 91 | M-2,$PWD/stitched_reads/Osnat158-M-2_S158_L001.assembled.fastq.gz,forward 92 | M-3,$PWD/stitched_reads/Osnat159-M-3_S159_L001.assembled.fastq.gz,forward 93 | M-NC,$PWD/stitched_reads/Osnat160-M-_S160_L001.assembled.fastq.gz,forward 94 | -------------------------------------------------------------------------------- /scripts/run_pear.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use warnings; 4 | use strict; 5 | use File::Basename; 6 | use Getopt::Long; 7 | use Pod::Usage; 8 | use Parallel::ForkManager; 9 | use List::Util qw(min max sum); 10 | 11 | 12 | my ($parallel,$help); 13 | my $out_dir='./'; 14 | my $full_log='pear_full_log.txt'; 15 | my $summary_log='pear_summary_log.txt'; 16 | my $gzip_output; 17 | 18 | my $res = GetOptions("out_dir=s" => \$out_dir, 19 | "parallel:i"=>\$parallel, 20 | "full_log=s"=>\$full_log, 21 | "summary_log=s"=>\$summary_log, 22 | "gzip_output"=>\$gzip_output, 23 | "help"=>\$help, 24 | )or pod2usage(2); 25 | 26 | pod2usage(-verbose=>2) if $help; 27 | 28 | my @files=@ARGV; 29 | 30 | pod2usage($0.': You must provide a list of fastq files to be merged.') unless @files; 31 | 32 | #make output directory 33 | system("mkdir -p $out_dir"); 34 | 35 | my $cpu_count=1; 36 | #if the option is set 37 | if(defined($parallel)){ 38 | #option is set but with no value then use the max number of proccessors 39 | if($parallel ==0){ 40 | #load this module dynamically 41 | eval("use Sys::CPU;"); 42 | $cpu_count=Sys::CPU::cpu_count(); 43 | }else{ 44 | $cpu_count=$parallel; 45 | } 46 | } 47 | 48 | 49 | my %paired_files; 50 | foreach my $file (@files){ 51 | my ($file_name,$dir)=fileparse($file); 52 | if($file_name =~ /(.+)_R([1|2])[_|\.]/){ 53 | $paired_files{$1}[$2-1]=$file; 54 | #attempt different naming scheme 55 | }elsif($file_name =~ /(.+)_([1|2])/){ 56 | $paired_files{$1}[$2-1]=$file; 57 | }else{ 58 | warn "Input file \"$file\" does not contain '_R1_' or '_R2_' (or alternatively '_R1.' or '_R2.')."; 59 | } 60 | } 61 | 62 | #clear the output log (and make sure it is writable) 63 | #open(my $FULL_LOG,'>',$full_log) || die "Can't write to log file: $full_log"; 64 | #close($FULL_LOG); 65 | 66 | 67 | foreach my $name (sort keys %paired_files){ 68 | unless(defined($paired_files{$name}[0]) && defined($paired_files{$name}[1])){ 69 | warn "Couldn't find matching paired end files for file starting with: $name"; 70 | next; 71 | } 72 | my $out_file=$out_dir.'/'.$name; 73 | #check if this has already been done 74 | my $assembled_out_file=$out_file.'.assembled.fastq'; 75 | if (-e $assembled_out_file || -e $assembled_out_file.'.gz'){ 76 | print "Skipping this sample because output file already exists: $assembled_out_file\n"; 77 | next; 78 | } 79 | my $cmd="/gpfs0/bioinfo/users/obayomi/pear/pear-0.9.11-linux-x86_64/bin/pear -f $paired_files{$name}[0] -r $paired_files{$name}[1] -j $cpu_count -o $out_file -m 480 -n 400 -t 400 -q 20 >>$full_log"; 80 | #-m 600 -n 400 -t 400 -q 20 - used this for the 16S sequences 81 | print $cmd,"\n"; 82 | die if system($cmd); 83 | 84 | #compress output files (if the flag is set) 85 | if($gzip_output){ 86 | my $gzip_cmd="pigz -p $cpu_count -f $out_file".'*'; 87 | print $gzip_cmd,"\n"; 88 | die if system($gzip_cmd); 89 | } 90 | } 91 | 92 | print "Creating PEAR summary log at: $summary_log \n"; 93 | my $min_assembled=create_summary_log($full_log,$summary_log); 94 | 95 | if($min_assembled < 90){ 96 | print "Finished! Warning!! one or more samples were less than 90% assembled! You should manually inspect the log file: $summary_log \n"; 97 | }else{ 98 | print "Finished! All samples assembled at 90% or greater. For more details you can check manually inspect the log file: $summary_log \n"; 99 | } 100 | 101 | sub mean { 102 | return sum(@_)/@_; 103 | } 104 | 105 | sub create_summary_log{ 106 | 107 | my $full_log=shift; 108 | my $summary_log=shift; 109 | open(my $FULL_LOG,'<',$full_log) || die "Can't read log file: $full_log"; 110 | 111 | open(my $SUMMARY_LOG,'>',$summary_log) || die "Can't create summary log file for writing: $summary_log"; 112 | my @samples; 113 | 114 | while (<$FULL_LOG>) { 115 | chomp; 116 | if (/Assembled reads/) { 117 | my $assembled_string=$_; 118 | my $discarded_string=<$FULL_LOG>; 119 | my $unassembled_string=<$FULL_LOG>; 120 | my $assembled_file_string=<$FULL_LOG>; 121 | my ($assembled_percent) = $assembled_string =~ /(\d+\.\d+)\%/; 122 | my ($discarded_percent) = $discarded_string =~ /(\d+\.\d+)\%/; 123 | my ($unassembled_percent) = $unassembled_string =~ /(\d+\.\d+)\%/; 124 | my ($assembled_file) = $assembled_file_string =~ /([\w|\-|\.]+)\.assembled/; 125 | push (@samples, [$assembled_file,$assembled_percent,$discarded_percent,$unassembled_percent]); 126 | } 127 | } 128 | 129 | #Add min, mean, and max as first three lines of output 130 | unshift @samples,['Max',sprintf("%.3f",max(map{$_->[1]}@samples)),sprintf("%.3f",max(map{$_->[2]}@samples)),sprintf("%.3f",max(map{$_->[3]}@samples))]; 131 | unshift @samples,['Mean',sprintf("%.3f",mean(map{$_->[1]}@samples)),sprintf("%.3f",mean(map{$_->[2]}@samples)),sprintf("%.3f",mean(map{$_->[3]}@samples))]; 132 | unshift @samples,['Min',sprintf("%.3f",min(map{$_->[1]}@samples)),sprintf("%.3f",min(map{$_->[2]}@samples)),sprintf("%.3f",min(map{$_->[3]}@samples))]; 133 | 134 | #print header 135 | print $SUMMARY_LOG join("\t","ID","Assembled","Discarded","Unassembled"),"\n"; 136 | 137 | #print out all the data 138 | foreach my $sample (@samples) { 139 | print $SUMMARY_LOG join("\t",@$sample),"\n"; 140 | } 141 | return sprintf("%.3f",min(map{$_->[1]}@samples)) 142 | } 143 | 144 | 145 | __END__ 146 | 147 | =head1 Name 148 | 149 | run_pear.pl - A simple wrapper for PEAR to stich paired-end reads 150 | 151 | =head1 USAGE 152 | 153 | run_pear.pl [-p [<# proc>] -o -h] 154 | 155 | E.g. 156 | 157 | #Note: Files must have "_R1_" and "_R2_" (or "_R1." and "_R2.") within the file name (or secondarily "_1" and "_2") 158 | 159 | run_pear.pl sample1_R1_001.fastq sample1_R2_001.fastq sample2_R1_001.fastq sample2_R2_001.fastq 160 | 161 | #Shorter way to do the same thing 162 | 163 | run_pear.pl *.fastq 164 | 165 | #Specify alternate location for output files (instead of default current directory) 166 | 167 | run_pear.pl -o stitched_reads *.fastq 168 | 169 | #Run in parallel and use all CPUs 170 | 171 | run_pear.pl *.fastq -p 172 | 173 | #Run in parallel limit to only 2 CPUs 174 | 175 | run_pear.pl *.fastq -p 2 176 | 177 | #Turn off gzip compression of output files 178 | 179 | run_pear.pl -g *.fastq 180 | 181 | =head1 OPTIONS 182 | 183 | =over 4 184 | 185 | =item B<-o, --out_dir > 186 | 187 | The name of the output directory to place all PEAR output files. 188 | 189 | =item B<-p, --parallel [<# of proc>]> 190 | 191 | Using this option without a value will use all CPUs on machine, while giving it a value will limit to that many CPUs. Without option only one CPU is used. 192 | 193 | =item B<-g, --gzip_output> 194 | 195 | Gzip the PEAR output files. 196 | 197 | =item B<-f, --full_log > 198 | 199 | The location to write the PEAR full log file. Default is "pear_full_log.txt" 200 | 201 | =item B<-s, --summary_log > 202 | 203 | The location to write teh PEAR summary log file. Default is "pear_summary_log.txt" 204 | 205 | =item B<-h, --help> 206 | 207 | Displays the entire help documentation. 208 | 209 | =back 210 | 211 | =head1 DESCRIPTION 212 | 213 | B This script allows for more automated running of the PEAR program on multiple fastq files. PEAR is used to stitch (or assemble) paired end reads together. The assumption is made that the paired end files have the same name with the forward reads being indicated by "_R1_" and the reverse being "_R2_" (they can also be "_R1." and "_R2."). If file names are not found matching these then an simpler label is attempted ("_1" and "_2"). 214 | 215 | The script allows the use of multiple threads. 216 | 217 | This script also captures the output statistics from PEAR and outputs them to a single "pear_full_log.txt"(by default). It also parses this and simplifies the output into "pear_summary_log.txt" (by default). 218 | 219 | By default, output files from PEAR are gzipped to save on space. 220 | 221 | Before use make sure you have installed the "pear" program so it is accesible from your PATH. 222 | 223 | =head1 AUTHOR 224 | 225 | Morgan Langille, Emorgan.g.i.langille@gmail.comE 226 | 227 | =cut 228 | 229 | -------------------------------------------------------------------------------- /scripts/taxa-plots.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #$ -S /bin/bash 3 | #$ -N make_bar_plots 4 | #$ -q bioinfo.q 5 | #$ -V 6 | #$ -cwd 7 | #$ -notify 8 | #$ -pe shared 40 9 | 10 | set -e 11 | 12 | source activate qiime2-2020.6 13 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi' 14 | export TEMPDIR='/gpfs0/bioinfo/users/obayomi/hinuman_analysis/18S_illumina/tmp/' TMPDIR='/gpfs0/bioinfo/users/obayomi/hinuman_analysis/18S_illumina/tmp/' 15 | 16 | 17 | 18 | #TAXONOMY_DIR=('04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur' '04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur' '04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur' '04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur') 19 | 20 | #FEATURE_TABLE_DIR=('05.filter_table/dada2' '05.filter_table/dada2' '05.filter_table/deblur/' '05.filter_table/deblur/' '05.filter_table/dada2/indoors' '05.filter_table/dada2/indoors' '05.filter_table/deblur/indoors' '05.filter_table/deblur/indoors' '05.filter_table/dada2/outdoors' '05.filter_table/dada2/outdoors' '05.filter_table/deblur/outdoors' '05.filter_table/deblur/outdoors' '05.filter_table/dada2/mock' '05.filter_table/dada2/mock' '05.filter_table/deblur/mock' '05.filter_table/deblur/mock') 21 | 22 | #PREFIX=('se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined') 23 | 24 | #METADATA=('00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv') 25 | 26 | #METADATA_COLUMN=('treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment') 27 | 28 | #MODE=('combined' 'combined' 'combined' 'combined' 'indoors' 'indoors' 'indoors' 'indoors' 'outdoors' 'outdoors' 'outdoors' 'outdoors' 'mock' 'mock' 'mock' 'mock') 29 | 30 | #GROUP_METADATA=('00.mapping/combined-treatment.tsv' '00.mapping/combined-treatment.tsv' '00.mapping/combined-treatment.tsv' '00.mapping/combined-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/mock-treatment.tsv' '00.mapping/mock-treatment.tsv' '00.mapping/mock-treatment.tsv' '00.mapping/mock-treatment.tsv') 31 | 32 | #PLOT_DIR=('07.make_taxa_plots/dada2' '07.make_taxa_plots/dada2' '07.make_taxa_plots/deblur/' '07.make_taxa_plots/deblur/' '07.make_taxa_plots/dada2/indoors' '07.make_taxa_plots/dada2/indoors' '07.make_taxa_plots/deblur/indoors' '07.make_taxa_plots/deblur/indoors' '07.make_taxa_plots/dada2/outdoors' '07.make_taxa_plots/dada2/outdoors' '07.make_taxa_plots/deblur/outdoors' '07.make_taxa_plots/deblur/outdoors' '07.make_taxa_plots/dada2/mock' '07.make_taxa_plots/dada2/mock' '07.make_taxa_plots/deblur/mock' '07.make_taxa_plots/deblur/mock') 33 | 34 | 35 | ########################################################################################################################################################## 36 | 37 | 38 | 39 | # Dada2 Reanalysis modified maxEE and read trunc length 40 | #TAXONOMY_DIR=('04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2') 41 | 42 | #FEATURE_TABLE_DIR=('05.redo_filter_table/dada2' '05.redo_filter_table/dada2' '05.redo_filter_table/dada2' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/mock' '05.redo_filter_table/dada2/mock' '05.redo_filter_table/dada2/mock') 43 | 44 | #PREFIX=('se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe') 45 | 46 | #METADATA=('00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/pe-dada2/combined.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/pe-dada2/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/pe-dada2/outdoors.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/pe-dada2/mock.tsv') 47 | 48 | #METADATA_COLUMN=('treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment') 49 | 50 | #MODE=('combined' 'combined' 'combined' 'indoors' 'indoors' 'indoors' 'outdoors' 'outdoors' 'outdoors' 'mock' 'mock' 'mock') 51 | 52 | #GROUP_METADATA=('00.mapping/combined-treatment.tsv' '00.mapping/combined-treatment.tsv' '00.mapping/pe-dada2/combined-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/pe-dada2/indoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/pe-dada2/outdoors-treatment.tsv' '00.mapping/mock-treatment.tsv' '00.mapping/mock-treatment.tsv' '00.mapping/pe-dada2/mock-treatment.tsv') 53 | 54 | #PLOT_DIR=('07.redo_make_taxa_plots/dada2' '07.redo_make_taxa_plots/dada2' '07.redo_make_taxa_plots/dada2' '07.redo_make_taxa_plots/dada2/indoors' '07.redo_make_taxa_plots/dada2/indoors' '07.redo_make_taxa_plots/dada2/indoors' '07.redo_make_taxa_plots/dada2/outdoors' '07.redo_make_taxa_plots/dada2/outdoors' '07.redo_make_taxa_plots/dada2/outdoors' '07.redo_make_taxa_plots/dada2/mock' '07.redo_make_taxa_plots/dada2/mock' '07.redo_make_taxa_plots/dada2/mock') 55 | 56 | ###################################################################################################################################################### 57 | 58 | # Dada2 Reanalysis after splitting indoor samples and dropping some outdoor samples 59 | TAXONOMY_DIR=(04.{,redo_}assign_taxonomy/dada2{,,}) 60 | 61 | FEATURE_TABLE_DIR=(05.{,redo_}filter_table/dada2/{indoors,outdoors,basins}/) 62 | 63 | PREFIX=($( for i in {1..6}; do echo 'se'; done)) 64 | 65 | METADATA=($(for i in {1..2}; do echo 00.mapping/{indoors,outdoors,basins}-edited.tsv; done)) 66 | 67 | METADATA_COLUMN=($( for i in {1..6}; do echo 'treatment'; done)) 68 | 69 | MODE=($(for i in {1..2}; do echo {indoors,outdoors,basins}; done)) 70 | 71 | GROUP_METADATA=($(for i in {1..2}; do echo 00.mapping/{indoors-treatment,outdoors-treatment,basins-treatment}.tsv; done)) 72 | 73 | PLOT_DIR=(07.{,redo_}make_taxa_plots/dada2/{indoors,outdoors,basins}) 74 | 75 | 76 | 77 | 78 | # Sample bar plots 79 | parallel --jobs 0 --link qiime taxa barplot \ 80 | --i-table {1}/{2}-filtered_table.qza \ 81 | --i-taxonomy {6}/{2}-taxonomy.qza \ 82 | --m-metadata-file {3} \ 83 | --o-visualization {4}/{5}-samples-{2}-bar-plots.qzv \ 84 | ::: ${FEATURE_TABLE_DIR[*]} ::: ${PREFIX[*]} ::: ${METADATA[*]} ::: ${PLOT_DIR[*]} ::: ${MODE[*]} ::: ${TAXONOMY_DIR[*]} 85 | 86 | # Taxa bar plots of metadata group - here by treatment 87 | # group feature table (*-filtered_table.qza) by metadata column of interest 88 | parallel --jobs 0 --link qiime feature-table group \ 89 | --i-table {1}/{2}-filtered_table.qza \ 90 | --p-axis sample \ 91 | --m-metadata-file {3} \ 92 | --m-metadata-column {4} \ 93 | --p-mode sum \ 94 | --o-grouped-table {1}/{5}-{4}-{2}-filtered_table.qza \ 95 | ::: ${FEATURE_TABLE_DIR[*]} ::: ${PREFIX[*]} ::: ${METADATA[*]} ::: ${METADATA_COLUMN[*]} ::: ${MODE[*]} 96 | 97 | # Make bar plot of group table here by treatment 98 | # Make sure you create a new metadata with the group level names as sample-id 99 | parallel --jobs 0 --link qiime taxa barplot \ 100 | --i-table {1}/{5}-{4}-{2}-filtered_table.qza \ 101 | --i-taxonomy {6}/{2}-taxonomy.qza \ 102 | --m-metadata-file {3} \ 103 | --o-visualization {7}/{5}-{4}-{2}-bar-plots.qzv \ 104 | ::: ${FEATURE_TABLE_DIR[*]} ::: ${PREFIX[*]} ::: ${GROUP_METADATA[*]} ::: ${METADATA_COLUMN[*]} ::: ${MODE[*]} ::: ${TAXONOMY_DIR[*]} ::: ${PLOT_DIR[*]} 105 | 106 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Snakemake Workflow: Microbiome Amplicon (16S, 18S and ITS) sequence analysis using Qiime2 and PICRUSt2. 2 | 3 | QIIME2-workflow 4 | 5 | This workflow performs microbiome analysis using QIIME2 and PICRUSt2 for functional annotation. Functional annotation is only performed for 16S amplicon sequences. 6 | 7 | Please note the following: 8 | 9 | 1. I analyze my data with qiime2 version 2020.6 so that's what I have tested this pipeline with. 10 | 2. I have not tested the pipeline using deblur or vsearch even though I have implemented them, so use these methods at your own risk. I have tested the dada2 pipeline and it works great. Hence, I advice you run the dada2 pipeline. 11 | 3. I provide 3 Snakefiles: Snakefile (16S, 18S and ITS), Snakefile.16S (16S and 18S) and Snakefile.ITS (ITS alone). 12 | 4. I will be be happy to fix any bug that you migth find, so please feel free to reach out to me at obadbotanist@yahoo.com 13 | 14 | 15 | Please do not forget to cite the authors of the tools used. 16 | 17 | 18 | **The Pipeline does the following:** 19 | 20 | - It renames your input files (optional) so that it conforms with the required input format i.e. 01.raw_data/{SAMPLE}_R{1|2}.fastq.gz for paired-end or 01.raw_data/{SAMPLE}.fastq.gz for single-end reads 21 | - Quality checks and summarizes the input reads using FASTQC and MultiQC 22 | - Imports the reads into Qiime2 23 | - Quality checks the input artifact using Qiime2 24 | - Trims the imported arfifact for primers and adaptors using cutadapt implemented in qiime2 25 | - Quality checks the trimmed input artifact using Qiime2 26 | - Denoises (filtering, chimera checking and ASV table generation) the reads using dada2 (default) 27 | - Asigns taxonomy to the representative sequences using sci-kit learn and your provided database. see the folder Create__DB for a pipeline that can be used to create the required databases 28 | - Excludes singletons and non-target taxa such as Mitochondria, Chloroplast etc. The taxa to be filtered can be set from within the Snakefile file by editing the "taxa2filter" variable. 29 | - Excludes rare ASV i.e. ASVs with sequences less than 0.005% of the total number of sequences (Navas-Molina et al. 2013) 30 | - Builds a phylogenetic tree 31 | - Generates sample and group taxa plots 32 | - Performs core diversity analysis i.e alpha and betadiversity analysis along with the related statistical tests 33 | - Performs differential abundance testing using ANCOM 34 | - Perform functional anaotation using PICRUSt2 for 16S sequences. 35 | 36 | 37 | ## Authors 38 | 39 | * Olabiyi Obayomi (@olabiyi) 40 | 41 | 42 | Before you start, make sure you have miniconda, qiime2, picrust2 and snakemake installed. You can optionally install my bioinfo environment which contains snakemake and many other useful bioinformatics tools. 43 | 44 | ### STEP 1: Install miniconda and qiime 2 (optional) 45 | 46 | See instructions on how to do so [here](https://docs.qiime2.org/2020.6/install/) 47 | 48 | ### STEP 2: Install picrust2 (optional) 49 | 50 | See instuctions on how to do so [here](https://github.com/picrust/picrust2/blob/master/INSTALL.md) 51 | 52 | 53 | ### STEP 3: Install Snakemake in a separate conda environment or install my bioinfo environment which contains snakemake(optional) 54 | 55 | Install Snakemake using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html): 56 | 57 | conda create -c bioconda -c conda-forge -n snakemake snakemake 58 | 59 | For installation details, see the [instructions in the Snakemake documentation](https://snakemake.readthedocs.io/en/stable/getting_started/installation.html). 60 | 61 | 62 | ### Step 4: Obtain a copy of this workflow 63 | 64 | git clone https://github.com/olabiyi/sankemake-workflow-qiime2.git 65 | 66 | ### Step 5: Configure workflow 67 | 68 | Configure the workflow according to your needs by editing the files in the `config/` folder. Adjust `config.yaml` to configure the workflow execution, and `samples.tsv` to specify your sample setup. Make sure your sample.tsv file does not contain any error as this could lead to potentially losing all of your data when renaming the files. 69 | 70 | ### Step 6: Install bioinfo environment (Optional) 71 | 72 | If you would like to use my bioinfo environment: 73 | 74 | conda env create -f envs/bioinfo.yaml 75 | 76 | 77 | ### Step 7: Running the pipeline 78 | 79 | #### Activate the conda environment containing snakemake 80 | 81 | source activate bioinfo 82 | 83 | 84 | #### Set-up the mapping file and raw data directories 85 | 86 | [ -d 00.mapping/ ] || mkdir 00.mapping/ 87 | [ -d 01.raw_data/ ] || mkdir 01.raw_data/ 88 | 89 | #### Move your raw data to the 01.raw_data directory 90 | # Delete anything that may be present in the rawdata directory 91 | rm -rf mkdir 01.raw_data/* 92 | # Move your read files to the rawa data directory - Every sample in its own directory - see the example in this repo 93 | mv location/rawData/16S/* 01.raw_data/ 94 | 95 | #### Create metadata files 96 | 97 | You need two metadata files: a general metadata file called metadata.tsv and a treatment-treatment.tsv file. 98 | Thes files can be createda nd editted with excel. Make sure to save the names as *metadata.tsv* and *treatment-metadata.tsv*. 99 | The treatment-metadata is used for makeing grouped bar plots while the metadata.tsv is used for corediversity analysis and general statistics. 100 | Please see the examples provided in this repository for specific formats. 101 | 102 | 103 | #### Create the required MANIFEST FILE 104 | 105 | # Get the sample names. This assumes that the folders in the 01.raw_data/ directory are named by sample. 106 | SAMPLES=($(ls -1 01.raw_data/ | grep -Ev "MANIFEST|seq" - |sort -V)) 107 | 108 | # Get sample names for "samples" field in the config file 109 | 110 | (echo -ne '[';echo ${SAMPLES[*]} | sed -E 's/ /, /g' | sed -E 's/(\w+)/"\1"/g'; echo -e ']') 111 | 112 | # Generate the MANIFEST file 113 | (echo "sample-id,absolute-filepath,direction"; \ 114 | for SAMPLE in ${SAMPLES[*]}; do echo -ne "${SAMPLE},$PWD/01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz,forward\n${SAMPLE},$PWD/01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz,reverse\n";done) \ 115 | > 01.raw_data/MANIFEST 116 | 117 | #### Create config/sample.tsv file 118 | (echo -ne "SampleID\tType\tOld_name\tNew_name\n"; \ 119 | for SAMPLE in ${SAMPLES[*]}; do echo -ne "${SAMPLE}\tForward\t01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz\t01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz\n${SAMPLE}\tReverse\t01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz\t01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz\n";done) \ 120 | > config/sample.tsv 121 | 122 | 123 | #### gzip fastq files if they are not already gziped as required by this pipeline. It also helps to save disk memory. 124 | 125 | find 01.raw_data/ -type f -name '*.fastq' -exec gzip {} \; 126 | 127 | 128 | #### Executing the Workflow 129 | 130 | ##### import reads and check their quality to determine trunc lengths for dada2 131 | 132 | snakemake -pr --cores 10 --keep-going "04.QC/trimmed_reads_qual_viz.qzv" "04.QC/raw_reads_qual_viz.qzv" 133 | 134 | 135 | ##### Denoise reads - chimera removal, reads merging, quality trimming and ASV feature table generation take a good look at 05.Denoise_reads/denoise_stats.qzv to see if you didn't lose too many reads and if the reads merged well. If the denoizing was not sucessful, adjust the parameters you set for dada2 and then re-run 136 | 137 | snakemake -pr --cores 15 --keep-going "05.Denoise_reads/denoise_stats.qzv" "05.Denoise_reads/table_summary.qzv" "05.Denoise_reads/representative_sequences.qzv" 138 | 139 | ##### Filter taxa - Examine "08.Filter_feature_table/taxa_filtered_table.qzv" to determine the threshold for filtering out rare taxa 140 | 141 | snakemake -pr --cores 15 --keep-going "06.Assign_taxonomy/taxonomy.qzv" "07.Build_phylogenetic_tree/rooted-tree.qza" "08.Filter_feature_table/taxa_filtered_table.qzv" 142 | 143 | ##### Filter rare taxa and make relative abundance bar plots 144 | 145 | snakemake -pr --cores 15 --keep-going "08.Filter_feature_table/filtered_table.qzv" "09.Taxa_bar_plots/group-bar-plot.qzv" "09.Taxa_bar_plots/samples-bar-plots.qzv" 146 | 147 | ##### Get the rarefation depth for diversity analysis after viewing "08.Filter_feature_table/filtered_table.qzv" and run the complete pipeline 148 | 149 | snakemake -pr --cores 15 --keep-going 150 | 151 | 152 | #### Export the following files for downstream analysis with R Scripts 153 | 154 | 1. 05.Denoise_reads/denoise_stats.qza -> Denoising statistics 155 | 2. 06.Assign_taxonomy/taxonomy.qza -> Taxonomy assignments of the representative sequences 156 | 3. 07.Build_phylogenetic_tree/rooted-tree.qza -> Phylogenetic tree for phylogenetic alphadiversity measurements 157 | 4. 08.Filter_feature_table/filtered_table.qza -> ASV table 158 | 5. 10.Diversity_analysis_{RAREFACTION_DEPTH}/bray_curtis_pcoa_results.qza -> Bray Curtis pcoa results 159 | 6. 10.Diversity_analysis_{RAREFACTION_DEPTH}/bray_curtis_distance_matrix.qza -> Bray Curtis distance matrix 160 | 7. 15.Function_annotation/picrust2_out_pipeline/pathways_out -> Picrust2 pathway output 161 | 8. 15.Function_annotation/picrust2_out_pipeline/KO_metagenome_out -> Picrust2 KO / genes output 162 | --------------------------------------------------------------------------------