├── 01.raw_data
    ├── 40BHFBK1
    │   ├── 40BHFBK1_R1.fastq.gz
    │   └── 40BHFBK1_R2.fastq.gz
    ├── 40BHFBK2
    │   ├── 40BHFBK2_R1.fastq.gz
    │   └── 40BHFBK2_R2.fastq.gz
    ├── 40BHFBK3
    │   ├── 40BHFBK3_R1.fastq.gz
    │   └── 40BHFBK3_R2.fastq.gz
    ├── 40BHFBK4
    │   ├── 40BHFBK4_R1.fastq.gz
    │   └── 40BHFBK4_R2.fastq.gz
    ├── 40BHFBK1_PF
    │   ├── 40BHFBK1_PF_R1.fastq.gz
    │   └── 40BHFBK1_PF_R2.fastq.gz
    ├── 40BHFBK3_PF
    │   ├── 40BHFBK3_PF_R1.fastq.gz
    │   └── 40BHFBK3_PF_R2.fastq.gz
    └── MANIFEST
├── sequence_data
    └── metadata.yml
├── create_DB
    ├── .snakemake
    │   └── log
    │   │   ├── 2021-05-26T115231.960767.snakemake.log
    │   │   ├── 2021-05-26T155428.061801.snakemake.log
    │   │   ├── 2021-05-26T155241.369810.snakemake.log
    │   │   ├── 2021-05-26T155251.684002.snakemake.log
    │   │   ├── 2021-05-26T155501.617298.snakemake.log
    │   │   ├── 2021-05-26T155630.825466.snakemake.log
    │   │   └── 2021-05-26T114454.315556.snakemake.log
    ├── rulegraph.png
    ├── config
    │   ├── cluster.yaml
    │   ├── config.yaml
    │   └── sample.tsv
    ├── eukaryote-unite
    │   ├── config
    │   │   ├── cluster.yaml
    │   │   ├── config.yaml
    │   │   └── sample.tsv
    │   └── Snakefile
    └── qsub-submit.sh
├── 00.mapping
    ├── treatment-metadata.tsv
    ├── basins-treatment.tsv
    ├── first_analysis
    │   ├── indoors-cntVsB12-treatment.tsv
    │   ├── final_metadata.xlsx
    │   ├── mock-treatment.tsv
    │   ├── pe-dada2
    │   │   ├── indoors-cntVsB12-treatment.tsv
    │   │   ├── mock-treatment.tsv
    │   │   ├── outdoors-treatment.tsv
    │   │   ├── indoors-minus-cntVsB12-treatment.tsv
    │   │   ├── indoors-treatment.tsv
    │   │   ├── mock.tsv
    │   │   ├── combined-treatment.tsv
    │   │   ├── indoors-cntVsB12.tsv
    │   │   ├── indoors-minus-cntVsB12.tsv
    │   │   ├── outdoors.tsv
    │   │   └── indoors.tsv
    │   ├── outdoors-treatment.tsv
    │   ├── indoors-minus-cntVsB12-treatment.tsv
    │   ├── indoors-treatment.tsv
    │   ├── mock.tsv
    │   ├── combined-treatment.tsv
    │   ├── indoors-cntVsB12.tsv
    │   ├── indoors-minus-cntVsB12.tsv
    │   ├── outdoors.tsv
    │   └── indoors.tsv
    ├── outdoors-treatment.tsv
    ├── metadata.tsv
    ├── indoors-treatment.tsv
    ├── basins-edited.tsv
    ├── basins.tsv
    ├── indoors-edited.tsv
    ├── indoors.tsv
    ├── outdoors-edited.tsv
    └── outdoors.tsv
├── images
    └── rulegraph.png
├── docker
    ├── download_seqs.sh
    ├── get_samples.sh
    ├── make_manifest.sh
    ├── rename_files.sh
    ├── Makefile
    └── config.yaml
├── scripts
    ├── join_reads.sh
    ├── find-probes.sh
    ├── functions.sh
    ├── vsearch-join-pairs.sh
    ├── blast-seqs.sh
    ├── 05.run_complete-submit-slurm.sh
    ├── 01.import-submit-slurm.sh
    ├── 04.filter_rare-submit-slurm.sh
    ├── 03.filter_taxa-submit-slurm.sh
    ├── 02.denoise-submit-slurm.sh
    ├── classify_ASVs.sh
    ├── tabulate-metadata.sh
    ├── phylogeny_tree.sh
    ├── deblur_denoize.sh
    ├── .bash_profile
    ├── generate-krona.py
    ├── default_variables.sh
    ├── dada2_denoize.sh
    ├── krona-arg.py
    ├── export_table.sh
    ├── new-dada2_denoize.sh
    ├── filter-samples.sh
    ├── .bashrc
    ├── picrust2_analysis.sh
    ├── filter_feature_table.sh
    ├── qiime2_api.ipynb
    ├── .ipynb_checkpoints
    │   └── qiime2_api-checkpoint.ipynb
    ├── ancom_differential_abundance.sh
    ├── run_pear.pl
    └── taxa-plots.sh
├── rules
    ├── count_sequences.smk
    ├── sequence_length.smk
    └── filter_samples.smk
├── config
    ├── cluster.yaml
    ├── sample.tsv
    └── config.yaml
├── qsub-submit.sh
├── slurm.mk
├── local.mk
├── examples
    ├── single-MANIFEST
    └── merge-MANIFEST
└── README.md


/01.raw_data/40BHFBK1/40BHFBK1_R1.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK1/40BHFBK1_R2.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK2/40BHFBK2_R1.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK2/40BHFBK2_R2.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK3/40BHFBK3_R1.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK3/40BHFBK3_R2.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK4/40BHFBK4_R1.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK4/40BHFBK4_R2.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK1_PF/40BHFBK1_PF_R1.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK1_PF/40BHFBK1_PF_R2.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK3_PF/40BHFBK3_PF_R1.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/01.raw_data/40BHFBK3_PF/40BHFBK3_PF_R2.fastq.gz:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sequence_data/metadata.yml:
--------------------------------------------------------------------------------
1 | {'phred-offset': 33}
2 | 


--------------------------------------------------------------------------------
/create_DB/.snakemake/log/2021-05-26T115231.960767.snakemake.log:
--------------------------------------------------------------------------------
1 | Building DAG of jobs...
2 | 


--------------------------------------------------------------------------------
/00.mapping/treatment-metadata.tsv:
--------------------------------------------------------------------------------
1 | sample-id	Time	Group
2 | 40BHFBK	40	HFBK
3 | 40BHFBK_PF	40	HFBK_PF
4 | 


--------------------------------------------------------------------------------
/images/rulegraph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olabiyi/snakemake-workflow-qiime2/HEAD/images/rulegraph.png


--------------------------------------------------------------------------------
/00.mapping/basins-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | Control 	control 
3 | B12_enriched	indoor_waterWashed_B12


--------------------------------------------------------------------------------
/create_DB/rulegraph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olabiyi/snakemake-workflow-qiime2/HEAD/create_DB/rulegraph.png


--------------------------------------------------------------------------------
/00.mapping/first_analysis/indoors-cntVsB12-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | Control 	control 
3 | indoor+water_washed+B12	indoor_waterWashed_B12
4 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/final_metadata.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olabiyi/snakemake-workflow-qiime2/HEAD/00.mapping/first_analysis/final_metadata.xlsx


--------------------------------------------------------------------------------
/00.mapping/first_analysis/mock-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | MOCK	MOCK
3 | MOCK-DreamTaq	MOCK_DreamTaq
4 | MOCK-NegativeControl	MOCK_NegativeControl
5 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/indoors-cntVsB12-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | control 	control 
3 | indoor+water_washed+B12	indoor_waterWashed_B12
4 | 


--------------------------------------------------------------------------------
/00.mapping/outdoors-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | Medium	Medium
3 | water_washed	Washed_with_Sterile_DW
4 | water_washed_Surface_sterile	Washed_with_7%_H2O2


--------------------------------------------------------------------------------
/create_DB/config/cluster.yaml:
--------------------------------------------------------------------------------
1 | __default__:
2 |     queue: bioinfo.q
3 |     shell: /bin/bash
4 |     threads: 20
5 |     time: 1:00:00
6 |     node: 1
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/mock-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | MOCK	MOCK
3 | MOCK-DreamTaq	MOCK_DreamTaq
4 | MOCK-NegativeControl	MOCK_NegativeControl
5 | 


--------------------------------------------------------------------------------
/create_DB/eukaryote-unite/config/cluster.yaml:
--------------------------------------------------------------------------------
1 | __default__:
2 |     queue: bioinfo.q
3 |     shell: /bin/bash
4 |     threads: 20
5 |     time: 1:00:00
6 |     node: 1
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/create_DB/.snakemake/log/2021-05-26T155428.061801.snakemake.log:
--------------------------------------------------------------------------------
1 | SyntaxError in line 81 of /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile:
2 | EOF in multi-line string (Snakefile, line 81)
3 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/outdoors-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | Medium	Medium
3 | outdoor+water_washed	outdoor_waterWashed
4 | outdoor+water_washed+surface_sterile	outdoor_waterwashed_surfaceSterile
5 | outdoor	outdoor
6 | 


--------------------------------------------------------------------------------
/docker/download_seqs.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | 
4 | awk 'BEGIN{FS=","; OFS="\n"} NR>1{print $18,$19}' 00.mapping/Sample_Detail.csv > files2download.txt && \
5 | 	parallel -j 20 aws s3 cp {} 01.raw_data/ :::: files2download.txt
6 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/outdoors-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | Medium	Medium
3 | outdoor+water_washed	outdoor_waterWashed
4 | outdoor+water_washed+surface_sterile	outdoor_waterwashed_surfaceSterile
5 | outdoor	outdoor
6 | 


--------------------------------------------------------------------------------
/00.mapping/metadata.tsv:
--------------------------------------------------------------------------------
1 | sample-id	Time	Group	Treatment
2 | 40BHFBK1	40	BHFBK	40BHFBK
3 | 40BHFBK2	40	BHFBK	40BHFBK
4 | 40BHFBK3	40	BHFBK	40BHFBK
5 | 40BHFBK4	40	BHFBK	40BHFBK
6 | 40BHFBK1_PF	40	BHFBK	40BHFBK_PF
7 | 40BHFBK3_PF	40	BHFBK	40BHFBK_PF
8 | 


--------------------------------------------------------------------------------
/00.mapping/indoors-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | water_washedSterile_antibiotic	water_washed_sterile_plant_grown_with_antibiotic
3 | water_washed_sterile	water_washed_sterile_plant
4 | water_washed	water_washed_plant
5 | sterile_antibiotic	sterile_plant_grown_with_antibiotic
6 | sterile	sterile_plant


--------------------------------------------------------------------------------
/create_DB/.snakemake/log/2021-05-26T155241.369810.snakemake.log:
--------------------------------------------------------------------------------
1 | KeyError in line 78 of /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile:
2 | 'SILVA_CLASSIFIER'
3 |   File "/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile", line 78, in <module>
4 | 


--------------------------------------------------------------------------------
/create_DB/.snakemake/log/2021-05-26T155251.684002.snakemake.log:
--------------------------------------------------------------------------------
1 | KeyError in line 78 of /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile:
2 | 'SILVA_CLASSIFIER'
3 |   File "/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile", line 78, in <module>
4 | 


--------------------------------------------------------------------------------
/create_DB/.snakemake/log/2021-05-26T155501.617298.snakemake.log:
--------------------------------------------------------------------------------
1 | KeyError in line 78 of /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile:
2 | 'SILVA_CLASSIFIER'
3 |   File "/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile", line 78, in <module>
4 | 


--------------------------------------------------------------------------------
/create_DB/.snakemake/log/2021-05-26T155630.825466.snakemake.log:
--------------------------------------------------------------------------------
1 | KeyError in line 104 of /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile:
2 | 'forward_primer'
3 |   File "/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/create_uniteDB/Snakefile", line 104, in <module>
4 | 


--------------------------------------------------------------------------------
/docker/get_samples.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | #mkdir 01.raw_data/ 00.mapping/
4 | echo "Below are the sample names for config.yaml"
5 | SAMPLES=($(ls -1 01.raw_data/ | grep -Ev "MANIFEST|seq" - |sort -V)) && \
6 |  (echo -ne '[';echo ${SAMPLES[*]} | sed -E 's/ /, /g' | sed -E 's/([A-Za-z0-9_-]+)/"\1"/g'; echo -e ']')
7 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/indoors-minus-cntVsB12-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | indoor+water_washed+sterile	indoor_waterWashed_sterile
3 | indoor+water_washed	indoor_waterWashed
4 | indoor+sterile	indoor_sterile
5 | indoor+sterile+antibiotic	indoor_sterile_antibiotic
6 | indoor+water_washed+sterile+antibiotic	indoor_waterWashed_sterile_antibiotic
7 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/indoors-minus-cntVsB12-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | indoor+water_washed+sterile	indoor_waterWashed_sterile
3 | indoor+water_washed	indoor_waterWashed
4 | indoor+water_washed_sterile	indoor_waterWashed_sterile
5 | indoor+sterile	indoor_sterile
6 | indoor+sterile+antibiotic	indoor_sterile_antibiotic
7 | indoor+water_washed+sterile+antibiotic	indoor_waterWashed_sterile_antibiotic
8 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/indoors-treatment.tsv:
--------------------------------------------------------------------------------
1 | sample-id	treatment
2 | indoor+water_washed+sterile	indoor_waterWashed_sterile
3 | control 	control 
4 | indoor+water_washed+B12	indoor_waterWashed_B12
5 | indoor+water_washed	indoor_waterWashed
6 | indoor+sterile	indoor_sterile
7 | indoor+sterile+antibiotic	indoor_sterile_antibiotic
8 | indoor+water_washed+sterile+antibiotic	indoor_waterWashed_sterile_antibiotic
9 | 


--------------------------------------------------------------------------------
/scripts/join_reads.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N join_reads 
 4 | #$ -q bioinfo.q
 5 | #$ -V 
 6 | #$ -cwd 
 7 | #$ -notify 
 8 | #$ -pe shared 40
 9 | 
10 | set -e
11 | 
12 | run_pear.pl -o stitched_reads/  sequence_data/*.fastq.gz 
13 | 
14 | #cleaned the folder containg the assembeled reads
15 | rm -rf stitched_reads/*.unassembled* stitched_reads/*discarded*
16 | 
17 | # gzip to save memory
18 | gzip stitched_reads/*.fastq
19 | 


--------------------------------------------------------------------------------
/rules/count_sequences.smk:
--------------------------------------------------------------------------------
 1 | from os import path
 2 | 
 3 | rule count_sequences:
 4 |     input: expand("01.raw_data/{sample}.fastq.gz", sample=config['samples'])
 5 |     output: "sequence_stats/reads_stats.tsv"
 6 |     log: "log/count_sequences/count_sequences.log"
 7 |     threads: 10
 8 |     params:
 9 |         in_dir=lambda w, input: path.dirname(input[0])
10 |     shell:
11 |       "seqkit stats {params.in_dir}/*.fastq.gz > {output} "
12 |    
13 | 


--------------------------------------------------------------------------------
/rules/sequence_length.smk:
--------------------------------------------------------------------------------
 1 | from os import path
 2 | 
 3 | rule Get_sequence_length:
 4 |     input: "01.raw_data/{sample}.fastq.gz"
 5 |     output: "02.Get_sequence_length/{sample}_sequence_length.tsv"
 6 |     log: "logs/Get_sequence_length/Get_sequence_length.log"
 7 |     threads: 10
 8 |     params:
 9 |         in_dir=lambda w, input: path.dirname(input[0])
10 |     shell:
11 |         "bioawk -c fastx 'BEGIN{{OFS="\\t"}} {{print $name,length($seq)}}' {input} {output}" 
12 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/indoors-treatment.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	treatment
 2 | indoor+water_washed+sterile	indoor_waterWashed_sterile
 3 | Control 	control 
 4 | indoor+water_washed+B12	indoor_waterWashed_B12
 5 | indoor+water_washed	indoor_waterWashed
 6 | indoor+water_washed_sterile	indoor_waterWashed_sterile
 7 | indoor+sterile	indoor_sterile
 8 | indoor+sterile+antibiotic	indoor_sterile_antibiotic
 9 | indoor+water_washed+sterile+antibiotic	indoor_waterWashed_sterile_antibiotic
10 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/mock.tsv:
--------------------------------------------------------------------------------
1 | sample-id	sample_number	batch	pcr_cycles	medium_or_plant	sterile_plant	grown_with_antibiotics	location	water_washed	surface_sterilization	b12_enriched 	treatment	description
2 | M-1	NA	B	NA	NA	NA	NA	NA	NA	NA	NA	MOCK	Mock_community
3 | M-2	NA	B	NA	NA	NA	NA	NA	NA	NA	NA	MOCK	Mock_community
4 | M-3 	NA	B	NA	NA	NA	NA	NA	NA	NA	NA	MOCK-DreamTaq	Mock_community(Dream_taq_ready_mix)
5 | M-NC	NA	B	NA	NA	NA	NA	NA	NA	NA	NA	MOCK-NegativeControl	Mock_community_negative_control
6 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/mock.tsv:
--------------------------------------------------------------------------------
1 | sample-id	sample_number	batch	pcr_cycles	medium_or_plant	sterile_plant	grown_with_antibiotics	location	water_washed	surface_sterilization	b12_enriched 	treatment	description
2 | Osnat157-M-1	NA	B	NA	NA	NA	NA	NA	NA	NA	NA	MOCK	Mock_community
3 | Osnat158-M-2	NA	B	NA	NA	NA	NA	NA	NA	NA	NA	MOCK	Mock_community
4 | Osnat159-M-3	NA	B	NA	NA	NA	NA	NA	NA	NA	NA	MOCK-DreamTaq	Mock_community(Dream_taq_ready_mix)
5 | Osnat160-M-	NA	B	NA	NA	NA	NA	NA	NA	NA	NA	MOCK-NegativeControl	Mock_community_negative_control
6 | 


--------------------------------------------------------------------------------
/scripts/find-probes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N Find_probes 
 4 | #$ -q bioinfo.q
 5 | #$ -V 
 6 | #$ -cwd 
 7 | #$ -notify 
 8 | #$ -pe shared 40
 9 | 
10 | set -e 
11 | 
12 | #source activate qiime2-2020.6
13 | #export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi'
14 | PROBES=('ACTCCTACGGGAGGCAGC' 'GGTGACAGTGGGCAGCGA' 'AAACGATGTGGGAAGGC' 'AAACGAAGTGGGAAGGC')
15 | 
16 | FILES=($(find "sequence_data/" -type f -name "*gz"))
17 | 
18 | parallel --jobs 0 zgrep {} ${FILES[*]}  '>' find-probe/{}.txt ::: ${PROBES[*]}
19 | 


--------------------------------------------------------------------------------
/config/cluster.yaml:
--------------------------------------------------------------------------------
 1 | __default__:
 2 |     system: 'slurm'
 3 |     account: "132773335440"
 4 |     time: "12:00:00" # in minutes
 5 |     mem: "10g" # in GB
 6 |     threads: "10"
 7 |     queue: "xlong" # gpu
 8 |     nodes: "1"
 9 |     
10 | 
11 | Trim_primers:
12 |     mem: "60g" # in GB
13 |     threads: "28"
14 |  
15 | Denoise_reads:
16 |     mem: "60g" # in GB
17 |     threads: "28" 
18 | 
19 | Build_phylogenetic_tree:
20 |     mem: "60g" # in GB
21 |     threads: "28"   
22 |  
23 | Assign_taxonomy:
24 |     mem: "210g" # in GB
25 |     threads: "28"    
26 |     
27 | Function_annotation:
28 |     mem: "60g" # in GB
29 |     threads: "28"
30 |     time: "17:00:00" 
31 | 


--------------------------------------------------------------------------------
/scripts/functions.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Check fastq encoding
 4 | function check_fastq_encoding(){
 5 |     
 6 | 	# USAGE:
 7 | 	# check_fastq_encoding file.fastq
 8 |     local fastq_file=$1
 9 | 	
10 | 	head -n 40 ${fastq_file} | \
11 | 	awk '{if(NR%4==0) printf("%s",$0);}' | \
12 | 	od -A n -t u1 | \
13 | 	awk 'BEGIN{min=100;max=0;}{
14 | 	 for(i=1;i<=NF;i++) {if($i>max) max=$i; if($i<min) min=$i;}
15 | 	 }END{
16 | 	 if(max<=74 && min<59) print "Phred+33"; \
17 | 	 else if(max>73 && min>=64) print "Phred+64"; \
18 | 	 else if(min>=59 && min<64 && max>73) print "Solexa+64"; \
19 | 	 else print "Unknown score encoding\!";
20 | 	 }'
21 | 
22 | }
23 | 
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/combined-treatment.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	treatment
 2 | Medium	Medium
 3 | outdoor+water_washed	outdoor_waterWashed
 4 | indoor+water_washed+sterile	indoor_waterWashed_sterile
 5 | outdoor+water_washed+surface_sterile	outdoor_waterwashed_surfaceSterile
 6 | control 	control 
 7 | indoor+water_washed+B12	indoor_waterWashed_B12
 8 | indoor+water_washed	indoor_waterWashed
 9 | outdoor	outdoor
10 | indoor+sterile	indoor_sterile
11 | indoor+sterile+antibiotic	indoor_sterile_antibiotic
12 | indoor+water_washed+sterile+antibiotic	indoor_waterWashed_sterile_antibiotic
13 | MOCK	MOCK
14 | MOCK-DreamTaq	MOCK_DreamTaq
15 | MOCK-NegativeControl	MOCK_NegativeControl
16 | 


--------------------------------------------------------------------------------
/scripts/vsearch-join-pairs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N join_vsearch 
 4 | #$ -q bioinfo.q
 5 | #$ -V 
 6 | #$ -cwd 
 7 | #$ -notify 
 8 | #$ -pe shared 40
 9 | 
10 | set -e
11 | 
12 | source activate qiime2-2020.6
13 | 
14 | # Stitch the fowards and reverse reads together using vsearch
15 | qiime vsearch join-pairs \
16 | 	 --i-demultiplexed-seqs 01.import/reads.qza \
17 | 	 --p-truncqual 20 \
18 | 	 --p-minlen  400 \
19 | 	 --p-maxns 20 \
20 | 	 --p-minmergelen 400 \
21 | 	 --p-maxmergelen 600 \
22 | 	 --o-joined-sequences 02.Join/vsearch-joined-reads.qza
23 | 
24 | # view the joined reads
25 | qiime demux summarize \
26 |   --i-data 02.Join/vsearch-joined-reads.qza \
27 |   --o-visualization 02.QC/vsearch-joined-reads.qzv
28 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/combined-treatment.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	treatment
 2 | Medium	Medium
 3 | outdoor+water_washed	outdoor_waterWashed
 4 | indoor+water_washed+sterile	indoor_waterWashed_sterile
 5 | outdoor+water_washed+surface_sterile	outdoor_waterwashed_surfaceSterile
 6 | control 	control 
 7 | indoor+water_washed+B12	indoor_waterWashed_B12
 8 | indoor+water_washed	indoor_waterWashed
 9 | outdoor	outdoor
10 | indoor+water_washed_sterile	indoor_waterWashed_sterile
11 | indoor+sterile	indoor_sterile
12 | indoor+sterile+antibiotic	indoor_sterile_antibiotic
13 | indoor+water_washed+sterile+antibiotic	indoor_waterWashed_sterile_antibiotic
14 | MOCK	MOCK
15 | MOCK-DreamTaq	MOCK_DreamTaq
16 | MOCK-NegativeControl	MOCK_NegativeControl
17 | 


--------------------------------------------------------------------------------
/docker/make_manifest.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | SAMPLES=($(ls -1 01.raw_data/ | grep -Ev "MANIFEST|seq" - |sort -V))
 4 | 
 5 | # Creating MANIFEST FILE
 6 | (echo "sample-id,absolute-filepath,direction"; for SAMPLE in ${SAMPLES[*]}; \
 7 |  do echo -ne "${SAMPLE},$PWD/01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz,forward\n${SAMPLE},$PWD/01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz,reverse\n";done) \
 8 | 	> 01.raw_data/MANIFEST
 9 | 
10 | # Creating the samples.tsv file"
11 | (echo -ne "SampleID\tType\tOld_name\tNew_name\n"; \
12 |  for SAMPLE in ${SAMPLES[*]}; \
13 |   do echo -ne \
14 | 	"${SAMPLE}\tForward\t01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz\t01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz\n${SAMPLE}\tReverse\t01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz\t01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz\n";done) \
15 |  > sample.tsv
16 | 


--------------------------------------------------------------------------------
/docker/rename_files.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | awk 'BEGIN{FS=","; OFS="\t"} NR>1{ gsub("s3://biodsa-sequencing-data/SEQ44XXX/SEQ44733/Reads/", "", $18); \
 4 | 	gsub("s3://biodsa-sequencing-data/SEQ44XXX/SEQ44733/Reads/", "", $19); \
 5 | 	print $1,$18,$19}' 00.mapping/Sample_Detail.csv > 00.mapping/reads_mapping.txt
 6 | 
 7 | SAMPLES=($(awk 'BEGIN{FS=OFS="\t"} {print $1}' 00.mapping/reads_mapping.txt))
 8 | FORWARD=($(awk 'BEGIN{FS=OFS="\t"} {print $2}' 00.mapping/reads_mapping.txt))
 9 | REVERSE=($(awk 'BEGIN{FS=OFS="\t"} {print $3}' 00.mapping/reads_mapping.txt))
10 | 
11 | 
12 | parallel -j 10 --link \
13 |      "[ -d 01.raw_data/{3}/ ] || mkdir 01.raw_data/{3}/ && mv 01.raw_data/{1} 01.raw_data/{3}/{3}_R1.fastq.gz && mv 01.raw_data/{2} 01.raw_data/{3}/{3}_R2.fastq.gz" \
14 |      :::  ${FORWARD[*]} ::: ${REVERSE[*]} ::: ${SAMPLES[*]}
15 | 


--------------------------------------------------------------------------------
/scripts/blast-seqs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N blast_seqs 
 4 | #$ -q bioinfo.q
 5 | #$ -V 
 6 | #$ -cwd 
 7 | #$ -notify 
 8 | #$ -pe shared 72
 9 | 
10 | set -euo pipefail
11 |  
12 | # database after retrieving the sequence by ASV id from the representative sequence file
13 | DATABASE="/gpfs0/bioinfo/users/obayomi/databases/non_redundant_NCBI_DB/non_redundant"
14 | QUERY="/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/13.find_B12_bacteria/blast/potential_B12_bacteria_sequences.fasta"
15 | OUT="/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/13.find_B12_bacteria/blast/potential_B12_bacteria_blast.tsv"
16 | 
17 | cat ${QUERY} | \
18 | parallel --jobs 0 --recstart '>' \
19 | 	--pipe blastn -db ${DATABASE} -outfmt \"6 qseqid sseqid stitle pident length mismatch gapopen qstart qend sstart send evalue bitscore\"  \
20 | 	-max_target_seqs 5  -out ${OUT} -query -
21 | 


--------------------------------------------------------------------------------
/qsub-submit.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #$ -S /bin/bash
 3 | #$ -q bioinfo.q
 4 | #$ -V
 5 | #$ -cwd
 6 | #$ -N submit-jobs
 7 | #$ -pe shared 1
 8 | 
 9 | set -e
10 | 
11 | # Activate the main conda environment
12 | source activate qiime2-2020.6
13 | 
14 | 
15 | # Generate the rule graph on the commadline
16 | # Rule graph
17 | # snakemake -s Snakefile --rulegraph | dot -Tpng > rulegraph.png
18 | # Directed Acyclic Graph (DAG)
19 | # snakemake -s Snakefile --dag | dot -Tpng > dag.png
20 | 
21 | # Run snmakemake on the cluster
22 | # --jobs 100 # submit a maximum 100 jobs
23 | # --latency-wait 60 # wait for 60 seconds before declaring that a job has failed
24 | snakemake \
25 |         --keep-going \
26 |         --restart-times 3 \
27 |         --rerun-incomplete  \
28 | 	--cluster-config config/config.yaml \
29 | 	--cluster 'qsub -q bioinfo.q -S /bin/bash -cwd -V -N {rule}.{wildcards} -e logs/{rule}/{rule}.{wildcards}.e -o logs/{rule}/{rule}.{wildcards}.o -pe shared {threads}' \
30 | 	--jobs 10 \
31 | 	--latency-wait 60 
32 | 
33 | 


--------------------------------------------------------------------------------
/create_DB/qsub-submit.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #$ -S /bin/bash
 3 | #$ -q bioinfo.q
 4 | #$ -V
 5 | #$ -cwd
 6 | #$ -N submit-jobs
 7 | #$ -pe shared 1
 8 | 
 9 | set -e
10 | 
11 | # Activate the main conda environment
12 | source activate qiime2-2020.6
13 | 
14 | 
15 | # Generate the rule graph on the commadline
16 | # Rule graph
17 | # snakemake -s Snakefile --rulegraph | dot -Tpng > rulegraph.png
18 | # Directed Acyclic Graph (DAG)
19 | # snakemake -s Snakefile --dag | dot -Tpng > dag.png
20 | 
21 | # Run snmakemake on the cluster
22 | # --jobs 100 # submit a maximum 100 jobs
23 | # --latency-wait 60 # wait for 60 seconds before declaring that a job has failed
24 | snakemake \
25 |         --keep-going \
26 |         --restart-times 3 \
27 |         --rerun-incomplete  \
28 | 	--cluster-config config/config.yaml \
29 | 	--cluster 'qsub -q bioinfo.q -S /bin/bash -cwd -V -N {rule}.{wildcards} -e logs/{rule}/{rule}.{wildcards}.e -o logs/{rule}/{rule}.{wildcards}.o -pe shared {threads}' \
30 | 	--jobs 10 \
31 | 	--latency-wait 60 
32 | 
33 | 


--------------------------------------------------------------------------------
/create_DB/config/config.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | QIIME2_ENV: "source activate /home/jeffbrady/miniconda3/envs/qiime2-2020.6"
 3 | 
 4 | # Download pre-trained silva database for qiime2
 5 | 
 6 | # Get the pre-trained full-length SILVA 99% classifier
 7 | SILVA_CLASSIFIER: "https://data.qiime2.org/2020.6/common/silva-138-99-nb-classifier.qza"
 8 | 
 9 | # Get the raw preformatted sequences
10 | SILVA_SEQUENCES: "https://data.qiime2.org/2020.6/common/silva-138-99-seqs.qza"
11 | 
12 | # Get the preformatted taxonomy
13 | SILVA_TAXONOMY: "https://data.qiime2.org/2020.6/common/silva-138-99-tax.qza"
14 | 
15 | 
16 | # Unite fungi databse for qiime2
17 | UNITE_URL: "https://files.plutof.ut.ee/public/orig/98/AE/98AE96C6593FC9C52D1C46B96C2D9064291F4DBA625EF189FEC1CCAFCF4A1691.gz"
18 | 
19 | # Set tool specific parameters
20 | # Sample primers for 341F and 806R
21 | parameters:
22 |     extract_sequence: 
23 |         forward_primer: "GTGCCAGCMGCCGCGGTAA"
24 |         reverse_primer: "GGACTACHVGGGTWTCTAAT"
25 |         min_length: 100
26 |         max_length: 800
27 |         trunc_length: 585
28 | 


--------------------------------------------------------------------------------
/create_DB/eukaryote-unite/config/config.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | QIIME2_ENV: "source activate /home/jeffbrady/miniconda3/envs/qiime2-2020.6"
 3 | 
 4 | # Download pre-trained silva database for qiime2
 5 | 
 6 | # Get the pre-trained full-length SILVA 99% classifier
 7 | SILVA_CLASSIFIER: "https://data.qiime2.org/2020.6/common/silva-138-99-nb-classifier.qza"
 8 | 
 9 | # Get the raw preformatted sequences
10 | SILVA_SEQUENCES: "https://data.qiime2.org/2020.6/common/silva-138-99-seqs.qza"
11 | 
12 | # Get the preformatted taxonomy
13 | SILVA_TAXONOMY: "https://data.qiime2.org/2020.6/common/silva-138-99-tax.qza"
14 | 
15 | 
16 | # Unite fungi databse for qiime2
17 | UNITE_URL: "https://files.plutof.ut.ee/public/orig/1D/31/1D31FA3A308BDC2FB2750D62C0AA40C5058C15405A3CC5C626CC3A3F5E3903ED.tgz"
18 | 
19 | # Set tool specific parameters
20 | # Sample primers for 341F and 806R
21 | parameters:
22 |     extract_sequence: 
23 |         forward_primer: "GTGCCAGCMGCCGCGGTAA"
24 |         reverse_primer: "GGACTACHVGGGTWTCTAAT"
25 |         min_length: 100
26 |         max_length: 800
27 |         trunc_length: 585
28 | 


--------------------------------------------------------------------------------
/00.mapping/basins-edited.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	treatment	description
 2 | 38A	38	Control	Control_indoor_plants_washed_with_water
 3 | 39A	39	Control	Control_indoor_plants_washed_with_water
 4 | 40A	40	Control	Control_indoor_plants_washed_with_water
 5 | 41A	41	Control	Control_indoor_plants_washed_with_water
 6 | 42A	42	Control	Control_indoor_plants_washed_with_water
 7 | 43A	43	Control	Control_indoor_plants_washed_with_water
 8 | 44A	44	Control	Control_indoor_plants_washed_with_water
 9 | 45A	45	Control	Control_indoor_plants_washed_with_water
10 | 46A	46	Control	Control_indoor_plants_washed_with_water
11 | 47A	47	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
12 | 48A	48	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
13 | 49A	49	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
14 | 50A	50	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
15 | 51A	51	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
16 | 52A	52	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
17 | 53A	53	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
18 | 54A	54	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
19 | 


--------------------------------------------------------------------------------
/scripts/05.run_complete-submit-slurm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=complete       #Set the job name to "JobExample2"
 3 | #SBATCH --time=23:00:00               #Set the wall clock limit to 6hr and 30min
 4 | #SBATCH --nodes=1                    #Request 1 node
 5 | #SBATCH --ntasks=1          #Request 1 tasks/cores per node
 6 | #SBATCH --mem=1G                     #Request 1GB per node 
 7 | #SBATCH --output=complete.o.%j      #Send stdout/err to "Example2Out.[jobID]" 
 8 | #SBATCH --error=complete.e.%j    #Send std err to "Example2error.[jobID]"
 9 | 
10 | module purge
11 | module load iccifort/2020.1.217
12 | module load impi/2019.7.217
13 | module load snakemake/5.26.1-Python-3.8.2
14 | 
15 | # Get the rarefation depth for diversity analysis after viewing "08.Filter_feature_table/filtered_table.qzv" and run the complete pipeline
16 | snakemake   \
17 |         --jobs 10 \
18 |         --keep-going \
19 |         --rerun-incomplete \
20 |         --cluster-config config/cluster.yaml \
21 |         --cluster "sbatch --partition {cluster.queue} --job-name={rule}.{wildcards} --mem={cluster.mem} --time={cluster.time} --ntasks={cluster.threads}"
22 | 
23 | 


--------------------------------------------------------------------------------
/scripts/01.import-submit-slurm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=import-sequences       #Set the job name to "JobExample2"
 3 | #SBATCH --time=10:00:00               #Set the wall clock limit to 6hr and 30min
 4 | #SBATCH --nodes=1                    #Request 1 node
 5 | #SBATCH --ntasks=1          #Request 1 tasks/cores per node
 6 | #SBATCH --mem=1G                     #Request 1GB per node 
 7 | #SBATCH --output=import-seqs.o.%j      #Send stdout/err to "Example2Out.[jobID]" 
 8 | #SBATCH --error=import-seqs.e.%j    #Send std err to "Example2error.[jobID]"
 9 | 
10 | module purge
11 | module load iccifort/2020.1.217
12 | module load impi/2019.7.217
13 | module load snakemake/5.26.1-Python-3.8.2
14 | 
15 | # import reads and check their quality to determine trunc lengths for dada2
16 | snakemake   \
17 |         --jobs 10 \
18 |         --keep-going \
19 |         --rerun-incomplete \
20 |         --cluster-config config/cluster.yaml \
21 |         --cluster "sbatch --partition {cluster.queue} --mem={cluster.mem} --time={cluster.time} --ntasks={cluster.threads}" \
22 |         "04.QC/trimmed_reads_qual_viz.qzv" "04.QC/raw_reads_qual_viz.qzv"
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/00.mapping/basins.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	treatment	description
 2 | 37A	37	Control 	Control_indoor_plants_washed_with_water 
 3 | 38A	38	Control 	Control_indoor_plants_washed_with_water 
 4 | 39A	39	Control 	Control_indoor_plants_washed_with_water 
 5 | 40A	40	Control 	Control_indoor_plants_washed_with_water 
 6 | 41A	41	Control 	Control_indoor_plants_washed_with_water 
 7 | 42A	42	Control 	Control_indoor_plants_washed_with_water 
 8 | 43A	43	Control 	Control_indoor_plants_washed_with_water 
 9 | 44A	44	Control 	Control_indoor_plants_washed_with_water 
10 | 45A	45	Control 	Control_indoor_plants_washed_with_water 
11 | 46A	46	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
12 | 47A	47	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
13 | 48A	48	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
14 | 49A	49	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
15 | 50A	50	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
16 | 51A	51	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
17 | 52A	52	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
18 | 53A	53	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
19 | 54A	54	B12_enriched	Modified_Hinoman_medium_with_B12_enriched
20 | 


--------------------------------------------------------------------------------
/scripts/04.filter_rare-submit-slurm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=Taxa-plots       #Set the job name to "JobExample2"
 3 | #SBATCH --time=23:00:00               #Set the wall clock limit to 6hr and 30min
 4 | #SBATCH --nodes=1                    #Request 1 node
 5 | #SBATCH --ntasks=1          #Request 1 tasks/cores per node
 6 | #SBATCH --mem=1G                     #Request 1GB per node 
 7 | #SBATCH --output=tax-plots.o.%j      #Send stdout/err to "Example2Out.[jobID]" 
 8 | #SBATCH --error=tax-plots.e.%j    #Send std err to "Example2error.[jobID]"
 9 | 
10 | 
11 | module purge
12 | module load iccifort/2020.1.217
13 | module load impi/2019.7.217
14 | module load snakemake/5.26.1-Python-3.8.2
15 | 
16 | 
17 | # Filter rare taxa and make relative abundance bar plots
18 | snakemake   \
19 |         --jobs 10 \
20 |         --keep-going \
21 |         --rerun-incomplete \
22 |         --cluster-config config/cluster.yaml \
23 |         --cluster "sbatch --partition {cluster.queue} --job-name={rule}.{wildcards} --mem={cluster.mem} --time={cluster.time} --ntasks={cluster.threads}" \
24 |         "08.Filter_feature_table/filtered_table.qzv" "09.Taxa_bar_plots/group-bar-plot.qzv" "09.Taxa_bar_plots/samples-bar-plots.qzv"
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/scripts/03.filter_taxa-submit-slurm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=Assign-taxonomy      #Set the job name to "JobExample2"
 3 | #SBATCH --time=23:00:00               #Set the wall clock limit to 6hr and 30min
 4 | #SBATCH --nodes=1                    #Request 1 node
 5 | #SBATCH --ntasks=1          #Request 1 tasks/cores per node
 6 | #SBATCH --mem=1G                     #Request 1GB per node 
 7 | #SBATCH --output=assign-tax.o.%j      #Send stdout/err to "Example2Out.[jobID]" 
 8 | #SBATCH --error=assign-tax.e.%j    #Send std err to "Example2error.[jobID]"
 9 | 
10 | 
11 | module purge
12 | module load iccifort/2020.1.217
13 | module load impi/2019.7.217
14 | module load snakemake/5.26.1-Python-3.8.2
15 | 
16 | 
17 | # Filter taxa - Examine "08.Filter_feature_table/taxa_filtered_table.qzv" to determine the threshold for filtering out rare taxa
18 | snakemake -pr  \
19 |         --jobs 10 \
20 |         --keep-going \
21 |         --rerun-incomplete \
22 |         --cluster-config config/cluster.yaml \
23 |         --cluster "sbatch --partition {cluster.queue} --mem={cluster.mem} --time={cluster.time} --ntasks={cluster.threads}" \
24 |         "06.Assign_taxonomy/taxonomy.qzv" "07.Build_phylogenetic_tree/rooted-tree.qza" "08.Filter_feature_table/taxa_filtered_table.qzv"
25 | 
26 | 


--------------------------------------------------------------------------------
/01.raw_data/MANIFEST:
--------------------------------------------------------------------------------
 1 | sample-id,absolute-filepath,direction
 2 | 40BHFBK1,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK1/40BHFBK1_R1.fastq.gz,forward
 3 | 40BHFBK1,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK1/40BHFBK1_R2.fastq.gz,reverse
 4 | 40BHFBK1_PF,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK1_PF/40BHFBK1_PF_R1.fastq.gz,forward
 5 | 40BHFBK1_PF,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK1_PF/40BHFBK1_PF_R2.fastq.gz,reverse
 6 | 40BHFBK2,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK2/40BHFBK2_R1.fastq.gz,forward
 7 | 40BHFBK2,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK2/40BHFBK2_R2.fastq.gz,reverse
 8 | 40BHFBK3,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK3/40BHFBK3_R1.fastq.gz,forward
 9 | 40BHFBK3,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK3/40BHFBK3_R2.fastq.gz,reverse
10 | 40BHFBK3_PF,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK3_PF/40BHFBK3_PF_R1.fastq.gz,forward
11 | 40BHFBK3_PF,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK3_PF/40BHFBK3_PF_R2.fastq.gz,reverse
12 | 40BHFBK4,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK4/40BHFBK4_R1.fastq.gz,forward
13 | 40BHFBK4,/home/jeffbrady/biyi/snakemake-workflow-qiime2/01.raw_data/40BHFBK4/40BHFBK4_R2.fastq.gz,reverse
14 | 


--------------------------------------------------------------------------------
/scripts/02.denoise-submit-slurm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --job-name=dada-denoise       #Set the job name to "JobExample2"
 3 | #SBATCH --time=23:00:00               #Set the wall clock limit to 6hr and 30min
 4 | #SBATCH --nodes=1                    #Request 1 node
 5 | #SBATCH --ntasks=1          #Request 1 tasks/cores per node
 6 | #SBATCH --mem=1G                     #Request 1GB per node 
 7 | #SBATCH --output=dada-denoise.o.%j      #Send stdout/err to "Example2Out.[jobID]" 
 8 | #SBATCH --error=dada-denoise.e.%j    #Send std err to "Example2error.[jobID]"
 9 | 
10 | 
11 | module purge
12 | module load iccifort/2020.1.217
13 | module load impi/2019.7.217
14 | module load snakemake/5.26.1-Python-3.8.2
15 | 
16 | 
17 | # Denoise reads - chimera removal, reads merging, quality trimming and ASV feature table generation take a good look at 05.Denoise_reads/denoise_stats.qzv to see if you didn't lose too many reads and if the reads merged well. If the denoizing was not sucessful, adjust the parameters you set for dada2 and then re-run
18 | snakemake   \
19 |         --jobs 10 \
20 |         --keep-going \
21 |         --rerun-incomplete \
22 |         --cluster-config config/cluster.yaml \
23 |         --cluster "sbatch --partition {cluster.queue} --mem={cluster.mem} --time={cluster.time} --ntasks={cluster.threads}" \
24 |         "05.Denoise_reads/denoise_stats.qzv" "05.Denoise_reads/table_summary.qzv" "05.Denoise_reads/representative_sequences.qzv"
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/scripts/classify_ASVs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N ASV_classify 
 4 | #$ -q bioinfo.q
 5 | #$ -V 
 6 | #$ -cwd 
 7 | #$ -notify 
 8 | #$ -pe shared 40
 9 | 
10 | set -e
11 | 
12 | source activate qiime2-2020.6
13 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi'
14 | export TEMPDIR='/gpfs0/bioinfo/users/obayomi/hinuman_analysis/18S_illumina/tmp/' TMPDIR='/gpfs0/bioinfo/users/obayomi/hinuman_analysis/18S_illumina/tmp/'
15 | 
16 | #IN_PREFIX=('03.dada_denoise/se' '03.dada_denoise/pear-joined' '03.deblur_denoise/se' '03.deblur_denoise/pear-joined')
17 | IN_PREFIX=('03.redo_dada_denoise/se' '03.redo_dada_denoise/pear-joined' '03.redo_dada_denoise/pe')
18 | 
19 | #OUT_PREFIX=('04.assign_taxonomy/dada2/se' '04.assign_taxonomy/dada2/pear-joined' '04.assign_taxonomy/deblur/se' '04.assign_taxonomy/deblur/pear-joined')
20 | OUT_PREFIX=('04.redo_assign_taxonomy/dada2/se' '04.redo_assign_taxonomy/dada2/pear-joined' '04.redo_assign_taxonomy/dada2/pe' )
21 | 
22 | # Classify representative ASV sequences against a pre-trained SILVA database with Naive Bayes
23 | parallel --jobs 0 --link qiime feature-classifier classify-sklearn \
24 | 			--i-classifier /gpfs0/bioinfo/users/obayomi/databases/q2_database/silva-138-99-nb-classifier.qza \
25 | 			--i-reads {1}-representative_sequences.qza \
26 | 			--o-classification {2}-taxonomy.qza ::: ${IN_PREFIX[*]} :::  ${OUT_PREFIX[*]}
27 | 
28 | parallel --jobs 0  qiime metadata tabulate \
29 |   			--m-input-file {}-taxonomy.qza \
30 |   			--o-visualization {}-taxonomy.qzv ::: ${OUT_PREFIX[*]}
31 | 


--------------------------------------------------------------------------------
/slurm.mk:
--------------------------------------------------------------------------------
 1 | .PHONY: import denoise assign_taxonomy plot complete clean
 2 | 
 3 | complete:
 4 | 	@echo "Running the complete pipeline. Quality rreports, Corediversity analysis, statistics and functional analysis"
 5 | 	sbatch 05.run_complete-submit-slurm.sh
 6 | 
 7 | import:
 8 | 	@echo "Importing, trimming primers and adapters, and performing initial quality checks"
 9 | 	@echo "Inspect the plots generated in 04.QC/trimmed_reads_qual_viz.qzv at https://view.qiime2.org/"
10 | 	sbatch 01.import-submit-slurm.sh
11 | 
12 | denoise:
13 | 	@echo "Denoising your imported sequences"
14 | 	@echo "Inspect the table 05.Denoise_reads/denoise_stats.qzv at https://view.qiime2.org/"
15 | 	@echo "Edit the config/config.yaml file appropriately and re-run if many were lost after denoising."
16 | 	sbatch 02.denoise-submit-slurm.sh
17 | 
18 | assign_taxonomy:
19 | 	@echo "Assigning taxonomy and filtering out non-target taxa"
20 | 	@echo "After this run completes"
21 | 	@echo "Examine 08.Filter_feature_table/taxa_filtered_table.qzv"
22 | 	@echo "To figure out the total number of sequences ('Total freqency') to be used to determine the minuminum frequency for filtering out rare taxa"
23 | 	@echo  "Simply multiply the total number of sequences by your threshold for example 0.00005 (0.005 percent)"
24 | 	@echo "python -c print(1298206 * 0.00005) = 64.9103"
25 | 	@echo "Set the 'minimum_frequency' parmeter in config/config.yaml with the result of this calculation rounded up like so:"
26 | 	@echo "minimum_frequency: 65"
27 | 	sbatch 03.filter_taxa-submit-slurm.sh
28 | 
29 | plot:
30 | 	@echo "Filtering out rare ASV and generating taxonomy plots"
31 | 	sbatch 04.filter_rare-submit-slurm.sh
32 | 
33 | clean:
34 | 	rm  slurm-*  *.{e,o}.*
35 | 
36 | 


--------------------------------------------------------------------------------
/scripts/tabulate-metadata.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # associate the representative sequences with their taxonomic annotations
 4 | qiime metadata tabulate \
 5 |   --m-input-file rep-seqs.qza \
 6 |   --m-input-file taxonomy.qza \
 7 |   --o-visualization tabulated-feature-metadata.qzv
 8 | 
 9 | 
10 | # Metadata merging is supported anywhere that metadata is accepted in QIIME 2. For example, it might be interesting to color an Emperor plot based on the study metadata, or sample alpha diversity. This can be accomplished by providing both the sample metadata file and the SampleData[AlphaDiversity] artifact:
11 | qiime emperor plot \
12 |   --i-pcoa unweighted_unifrac_pcoa_results.qza \
13 |   --m-metadata-file sample-metadata.tsv \
14 |   --m-metadata-file faith_pd_vector.qza \
15 |   --o-visualization unweighted-unifrac-emperor-with-alpha.qzv
16 | 
17 | 
18 | 
19 | # Merging metadata
20 | # Since metadata can come from many different sources, QIIME 2 supports metadata merging when running commands. Building upon the examples above, simply passing --m-input-file multiple times will combine the metadata columns in the specified files
21 | qiime metadata tabulate \
22 |   --m-input-file sample-metadata.tsv \
23 |   --m-input-file faith_pd_vector.qza \
24 |   --o-visualization tabulated-combined-metadata.qzv
25 | 
26 | # To view an artifact as metadata, simply pass it in to any method or visualizer that expects to see metadata (e.g. metadata tabulate or emperor plot):
27 | qiime metadata tabulate \
28 |   --m-input-file faith_pd_vector.qza \
29 |   --o-visualization tabulated-faith-pd-metadata.qzv
30 | 
31 | # Tabulate your mapping file with QIIME2
32 | qiime metadata tabulate \
33 |   --m-input-file sample-metadata.tsv \
34 |   --o-visualization tabulated-sample-metadata.qzv
35 | 


--------------------------------------------------------------------------------
/scripts/phylogeny_tree.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N make_tree 
 4 | #$ -q bioinfo.q
 5 | #$ -V 
 6 | #$ -cwd 
 7 | #$ -notify 
 8 | #$ -pe shared 40
 9 | 
10 | set -e
11 | 
12 | source activate qiime2-2020.6
13 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi'
14 | #IN_PREFIX=('03.dada_denoise/se' '03.dada_denoise/pear-joined' '03.deblur_denoise/se' '03.deblur_denoise/pear-joined')
15 | IN_PREFIX=('03.redo_dada_denoise/se' '03.redo_dada_denoise/pear-joined' '03.redo_dada_denoise/pe' )
16 | 
17 | #OUT_PREFIX=('06.make_tree/dada2/se' '06.make_tree/dada2/pear-joined' '06.make_tree/deblur/se' '06.make_tree/deblur/pear-joined')
18 | OUT_PREFIX=('06.redo_make_tree/dada2/se' '06.redo_make_tree/dada2/pear-joined' '06.redo_make_tree/dada2/pe')
19 | 
20 | # Make phylogenetic tree pipeline - all the below in one command
21 | parallel --jobs 0 --link qiime phylogeny align-to-tree-mafft-fasttree \
22 |   		--i-sequences {1}-representative_sequences.qza \
23 |   		--o-alignment {2}-aligned_representative_sequences.qza \
24 |   		--o-masked-alignment {2}-masked_aligned_representative_sequences.qza \
25 |   		--o-tree {2}-unrooted-tree.qza \
26 |   		--o-rooted-tree {2}-rooted-tree.qza ::: ${IN_PREFIX[*]} ::: ${OUT_PREFIX[*]}
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 
33 | 
34 | #Steps for generating a phylogenetic tree
35 | #qiime alignment mafft \
36 | #		--i-sequences representative_sequences.qza \
37 | #		--o-alignment aligned_representative_sequences
38 | 
39 | #qiime alignment mask \
40 | #		--i-alignment aligned_representative_sequences.qza \
41 | #		--o-masked-alignment masked_aligned_representative_sequences
42 | 
43 | #qiime phylogeny fasttree \
44 | #		--i-alignment masked_aligned_representative_sequences.qza \
45 | #		--o-tree unrooted_tree
46 | 
47 | #qiime phylogeny midpoint-root \
48 | #		--i-tree unrooted_tree.qza \
49 | #		--o-rooted-tree rooted_tree
50 | 


--------------------------------------------------------------------------------
/scripts/deblur_denoize.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N denoize_deblur
 4 | #$ -q bioinfo.q
 5 | #$ -V
 6 | #$ -cwd
 7 | #$ -notify
 8 | #$ -pe shared 40
 9 | 
10 | set -e
11 | 
12 | source activate qiime2-2020.6
13 | 
14 | #PREFIX="se"
15 | #TRUNC_LENGTH=280
16 | PREFIX="pear-joined"
17 | TRUNC_LENGTH=400 #587
18 | 
19 | # initial quality filtering process based on quality scores
20 | qiime quality-filter q-score \
21 |  --i-demux 01.import/${PREFIX}-reads.qza \
22 |  --o-filtered-sequences 03.deblur_denoise/${PREFIX}-reads-filtered.qza \
23 |  --o-filter-stats 03.deblur_denoise/${PREFIX}-reads-filter-stats.qza
24 | 
25 | qiime metadata tabulate \
26 | 	--m-input-file 03.deblur_denoise/${PREFIX}-reads-filter-stats.qza \
27 |  	--o-visualization 03.deblur_denoise/${PREFIX}-reads-filter-stats.qzv
28 | 
29 | # Next, the Deblur workflow is applied using the qiime deblur denoise-16S method. This method requires one parameter that is used in quality filtering, --p-trim-length n which truncates the sequences at position n. In general, the Deblur developers recommend setting this value to a length where the median quality score begins to drop too low
30 | qiime deblur denoise-16S \
31 |   --i-demultiplexed-seqs 03.deblur_denoise/${PREFIX}-reads-filtered.qza \
32 |   --p-trim-length ${TRUNC_LENGTH} \
33 |   --o-representative-sequences 03.deblur_denoise/${PREFIX}-representative_sequences.qza \
34 |   --o-table 03.deblur_denoise/${PREFIX}-table.qza \
35 |   --p-sample-stats \
36 |   --o-stats 03.deblur_denoise/${PREFIX}-denoise_stats.qza
37 | 
38 | qiime deblur visualize-stats \
39 |   --i-deblur-stats 03.deblur_denoise/${PREFIX}-denoise_stats.qza \
40 |   --o-visualization 03.deblur_denoise/${PREFIX}-denoise_stats.qzv
41 | 
42 | 
43 | qiime feature-table summarize \
44 | 	--i-table 03.deblur_denoise/${PREFIX}-table.qza \
45 | 	--o-visualization 03.deblur_denoise/${PREFIX}-table_summary.qzv
46 | 
47 | 
48 | qiime feature-table tabulate-seqs \
49 |   --i-data 03.deblur_denoise/${PREFIX}-representative_sequences.qza \
50 |   --o-visualization 03.deblur_denoise/${PREFIX}-representative_sequences.qzv
51 | 


--------------------------------------------------------------------------------
/rules/filter_samples.smk:
--------------------------------------------------------------------------------
 1 | 
 2 | # Filter samples based on a provide metadata file
 3 | rule Filter_samples:
 4 |     input:
 5 |         table=rules.Exclude_non_target_taxa.output.table_raw,
 6 |         metadata=config['metadata']
 7 |     output:
 8 |         table_raw="08.Filter_feature_table/samples_filtered_table.qza",
 9 |         table_viz="08.Filter_feature_table/samples_filtered_table.qzv"
10 |     log: "logs/Filter_samples/Filter_samples.log"
11 |     threads: 1
12 |     params:
13 |         conda_activate=config["QIIME2_ENV"],
14 |         minumum_frequency=config['minimum_frequency']
15 |     shell:
16 |         """
17 |         set +u
18 |         {params.conda_activate}
19 |         set -u
20 |  
21 |         # Filter samples
22 |         qiime feature-table filter-samples \
23 |             --i-table {input.table} \
24 |             --m-metadata-file {input.metadata} \
25 |             --o-filtered-table {output.table_raw}
26 | 
27 |         qiime feature-table summarize \
28 |           --i-table {output.table_raw} \
29 |           --o-visualization {output.table_viz}        
30 |         """
31 | 
32 | 
33 | 
34 | # Removing rare taxa i.e. features with abundance less the 0.005%
35 | rule Exclude_rare_taxa:
36 |     input:
37 |         rules.Filter_samples.output.table_raw
38 |     output:
39 |         table_raw="08.Filter_feature_table/filtered_table.qza",
40 |         table_viz="08.Filter_feature_table/filtered_table.qzv"
41 |     log: "logs//Exclude_singletons.log"
42 |     threads: 1
43 |     params:
44 |         conda_activate=config["QIIME2_ENV"],
45 |         minumum_frequency=config['minimum_frequency']
46 |     shell:
47 |         """
48 |         set +u
49 |         {params.conda_activate}
50 |         set -u
51 | 
52 |         # Removing rare otus / features with abundance less the 0.005%
53 |         qiime feature-table filter-features \
54 |           --i-table {input} \
55 |           --p-min-frequency {params.minumum_frequency} \
56 |           --o-filtered-table {output.table_raw}
57 | 
58 |         qiime feature-table summarize \
59 |           --i-table {output.table_raw} \
60 |           --o-visualization {output.table_viz}
61 |         """
62 | 
63 | 


--------------------------------------------------------------------------------
/scripts/.bash_profile:
--------------------------------------------------------------------------------
 1 | source /storage/SGE6U8/default/common/settings.sh
 2 | 
 3 | #export SOURCETRACKER_PATH=/gpfs0/biores/users/gilloro/Biyi/SourceTracking/sourcetracker-1.0.1
 4 | #Chimera slayer
 5 | export PATH=/fastspace/bioinfo_apps/microbiomeutil-r20110519/ChimeraSlayer/:$PATH
 6 | #vsearch
 7 | export PATH=/fastspace/bioinfo_apps/vsearch/vsearch_v2.3.4/bin/:$PATH
 8 | #pathogen analysis scripts
 9 | #export PATH=/gpfs0/biores/users/gilloro/Biyi/pathogen_analysis/:$PATH
10 | #qiime
11 | #export PATH=/fastspace/bioinfo_apps/qiime/usr/local/bin/:$PATH
12 | #NCBI blast
13 | export PATH=/gpfs0/bioinfo/users/obayomi/ncbi-blast-2.3.0+/bin/:$PATH
14 | #qsub
15 | export PATH=/storage/SGE6U8/bin/lx24-amd64/:$PATH
16 | #all executables
17 | export PATH=/gpfs0/bioinfo/users/obayomi/bin/:$PATH
18 | #sra tolkit
19 | export PATH=/gpfs0/bioinfo/users/obayomi/sratoolkit.2.9.6-1-ubuntu64/bin/:$PATH
20 | #Diamond 0.7.11
21 | #export PATH=/fastspace/bioinfo_apps/Diamond/v0.7.11/:$PATH
22 | #MEGAN
23 | export PATH=/gpfs0/bioinfo/users/obayomi/megan/:$PATH
24 | #MEGAN commandline tools
25 | export PATH=/gpfs0/bioinfo/users/obayomi/megan/tools:$PATH
26 | #minimap2 for aligning long reads like nanopore
27 | export PATH=/gpfs0/bioinfo/users/obayomi/minimap2:$PATH
28 | #fastx tool kit for processing fasta and fastq files
29 | #export PATH=/gpfs0/biores/users/gilloro/Biyi/fastx_toolkit/bin:$PATH
30 | 
31 | #centrifuge for metagenomic reads classification
32 | export PATH=/gpfs0/bioinfo/users/obayomi/centrifuge/:$PATH
33 | #microbiome helper
34 | export PATH=/gpfs0/bioinfo/users/obayomi/microbiome_helper/:$PATH
35 | # LAST
36 | export PATH=/gpfs0/bioinfo/users/obayomi/last-1021/src/:$PATH
37 | export PATH=/gpfs0/bioinfo/users/obayomi/last-1021/scripts/:$PATH
38 | # Kraken
39 | export PATH=/fastspace/bioinfo_apps/kraken/:$PATH
40 | #metaphlan2
41 | #export PATH=/gpfs0/bioinfo/users/obayomi/biobakery-metaphlan2-5bd7cd0e4854/:$PATH
42 | #miniconda
43 | export PATH=/gpfs0/bioinfo/users/obayomi/miniconda3/envs/python2/bin/:$PATH
44 | #set SGE_ROOT variable
45 | export SGE_ROOT=/storage/SGE6U8
46 | #HMM
47 | export PATH=/gpfs0/bioinfo/apps/HMMER/HMMER_v3.1b1/bin/:$PATH
48 | #metaBAT
49 | export PATH=/gpfs0/bioinfo/users/obayomi/metabat/:$PATH
50 | alias ll='ls --color=auto -alh'


--------------------------------------------------------------------------------
/00.mapping/first_analysis/indoors-cntVsB12.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	batch	pcr_cycles	medium_or_plant	sterile_plant	grown_with_antibiotics	location	water_washed	surface_sterilization	b12_enriched 	treatment	description
 2 | 37A	37	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 3 | 38A	38	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 4 | 39A	39	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 5 | 40A	40	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 6 | 41A	41	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 7 | 42A	42	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 8 | 43A	43	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 9 | 44A	44	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
10 | 45A	45	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
11 | 46A	46	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
12 | 47A	47	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
13 | 48A	48	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
14 | 49A	49	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
15 | 50A	50	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
16 | 51A	51	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
17 | 52A	52	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
18 | 53A	53	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
19 | 54A	54	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
20 | 


--------------------------------------------------------------------------------
/00.mapping/indoors-edited.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	sterile_plant	grown_with_antibiotics	water_washed	treatment	description
 2 | 9A-2	55	No	No	Yes	water_washed	water_washed_plant_with_pPNA_&_mPNA
 3 | 10A-2	56	No	No	Yes	water_washed	water_washed_plant_with_pPNA_&_mPNA
 4 | 11A-2	57	No	No	Yes	water_washed	water_washed_plant_with_pPNA_&_mPNA
 5 | 12A-2	58	No	No	Yes	water_washed	water_washed_plant_with_pPNA_&_mPNA
 6 | 23A-2	69	No	No	Yes	water_washed	water_washed_plant_with_pPNA_&_mPNA
 7 | 25A-2	70	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
 8 | 26A-2	71	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
 9 | 27A-2	72	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
10 | 28A-2	73	Yes	No	No	sterile	sterile_plant_with_pPNA_&_mPNA
11 | 29A-2	74	Yes	No	No	sterile	sterile_plant_with_pPNA_&_mPNA
12 | 30A-2	75	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
13 | 31A-2	76	Yes	No	No	sterile	sterile_plant_with_pPNA_&_mPNA
14 | 32A-2	77	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
15 | 33A-2	78	Yes	No	No	sterile	sterile_plant_with_pPNA_&_mPNA
16 | 34A-2	79	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
17 | 35A-2	80	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
18 | 36A-2	81	Yes	Yes	Yes	water_washedSterile_antibiotic	water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
19 | 37A-2	82	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
20 | 38A-2	83	Yes	Yes	Yes	water_washedSterile_antibiotic	water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
21 | 39A-2	84	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
22 | 40A-2	85	Yes	Yes	Yes	water_washedSterile_antibiotic	water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
23 | 41A-2	86	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
24 | 42A-2	87	Yes	Yes	Yes	water_washedSterile_antibiotic	water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
25 | 43A-2	88	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
26 | 44A-2	89	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic
27 | 


--------------------------------------------------------------------------------
/00.mapping/indoors.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	sterile_plant	grown_with_antibiotics	water_washed	treatment	description
 2 | 9A-2	55	No	No	Yes	water_washed	water_washed_plant_with_pPNA_&_mPNA
 3 | 10A-2	56	No	No	Yes	water_washed	water_washed_plant_with_pPNA_&_mPNA
 4 | 11A-2	57	No	No	Yes	water_washed	water_washed_plant_with_pPNA_&_mPNA
 5 | 12A-2	58	No	No	Yes	water_washed	water_washed_plant_with_pPNA_&_mPNA
 6 | 23A-2	69	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
 7 | 25A-2	70	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
 8 | 26A-2	71	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
 9 | 27A-2	72	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
10 | 28A-2	73	Yes	No	No	sterile	sterile_plant_with_pPNA_&_mPNA
11 | 29A-2	74	Yes	No	No	sterile	sterile_plant_with_pPNA_&_mPNA
12 | 30A-2	75	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
13 | 31A-2	76	Yes	No	No	sterile	sterile_plant_with_pPNA_&_mPNA
14 | 32A-2	77	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
15 | 33A-2	78	Yes	No	No	sterile	sterile_plant_with_pPNA_&_mPNA
16 | 34A-2	79	Yes	No	Yes	water_washed_sterile	water_washed_sterile_plant_with_pPNA_&_mPNA
17 | 35A-2	80	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
18 | 36A-2	81	Yes	Yes	Yes	water_washedSterile_antibiotic	water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
19 | 37A-2	82	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
20 | 38A-2	83	Yes	Yes	Yes	water_washedSterile_antibiotic	water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
21 | 39A-2	84	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
22 | 40A-2	85	Yes	Yes	Yes	water_washedSterile_antibiotic	water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
23 | 41A-2	86	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
24 | 42A-2	87	Yes	Yes	Yes	water_washedSterile_antibiotic	water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
25 | 43A-2	88	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
26 | 44A-2	89	Yes	Yes	No	sterile_antibiotic	sterile_plant_grown_with_antibiotic


--------------------------------------------------------------------------------
/create_DB/.snakemake/log/2021-05-26T114454.315556.snakemake.log:
--------------------------------------------------------------------------------
 1 | Building DAG of jobs...
 2 | Job counts:
 3 | 	count	jobs
 4 | 	1	Download_silva_database
 5 | 	1	Download_unite_database
 6 | 	1	Extract_primer_silva_reads
 7 | 	1	Import_unite_sequences
 8 | 	1	Import_unite_taxonomy
 9 | 	1	Train_silva_classifier
10 | 	1	Train_unite_classifier
11 | 	1	Unzip_unite_DB
12 | 	1	all
13 | 	1	make_logs_directories
14 | 	10
15 | 
16 | [Wed May 26 11:45:01 2021]
17 | rule make_logs_directories:
18 |     output: logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier, logs/Train_silva_classifier
19 |     jobid: 1
20 |     reason: Missing output files: logs/Train_silva_classifier, logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier
21 | 
22 | 
23 |          [ -d logs/ ] || mkdir -p logs/
24 |          cd logs/
25 |          for RULE in Download_silva_database Extract_primer_silva_reads Train_silva_classifier Download_unite_database Unzip_unite_DB Import_unite_sequences Import_unite_taxonomy Import_unite_taxonomy Train_unite_classifier; do
26 |           [ -d ${RULE}/ ] || mkdir -p ${RULE}/
27 |          done
28 |         
29 | 
30 | [Wed May 26 11:45:01 2021]
31 | rule Download_unite_database:
32 |     input: logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier, logs/Train_silva_classifier
33 |     output: 00.database/unite.gz
34 |     jobid: 8
35 |     reason: Missing output files: 00.database/unite.gz; Input files updated by another job: logs/Train_silva_classifier, logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier
36 | 
37 | wget -O 00.database/unite.gz https://files.plutof.ut.ee/public/orig/98/AE/98AE96C6593FC9C52D1C46B96C2D9064291F4DBA625EF189FEC1CCAFCF4A1691.gz
38 | 
39 | [Wed May 26 11:45:01 2021]
40 | rule Download_silva_database:
41 |     input: logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier, logs/Train_silva_classifier
42 |     output: 00.database/silva-138-99-nb-classifier.qza, 00.database/silva-138-99-seqs.qza, 00.database/silva-138-99-tax.qza
43 |     jobid: 4
44 |     reason: Missing output files: 00.database/silva-138-99-tax.qza, 00.database/silva-138-99-seqs.qza; Input files updated by another job: logs/Train_silva_classifier, logs/Download_silva_database, logs/Download_unite_database, logs/Train_unite_classifier
45 | 
46 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/indoors-cntVsB12.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	batch	pcr_cycles	medium_or_plant	sterile_plant	grown_with_antibiotics	location	water_washed	surface_sterilization	b12_enriched 	treatment	description
 2 | Osnat036-37-A	37	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 3 | Osnat037-38-A	38	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 4 | Osnat038-39-A	39	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 5 | Osnat039-40-A	40	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 6 | Osnat040-41-A	41	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 7 | Osnat041-42-A	42	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 8 | Osnat042-43-A	43	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 9 | Osnat043-44-A	44	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
10 | Osnat044-45-A	45	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
11 | Osnat045-46-A	46	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
12 | Osnat046-47-A	47	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
13 | Osnat047-48-A	48	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
14 | Osnat048-49-A	49	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
15 | Osnat049-50-A	50	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
16 | Osnat050-51-A	51	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
17 | Osnat051-52-A	52	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
18 | Osnat052-53-A	53	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
19 | Osnat053-54-A	54	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
20 | 


--------------------------------------------------------------------------------
/scripts/generate-krona.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import zipfile
 4 | 
 5 | #extract level-7.csv from taxa barplots
 6 | def unzip(qzv_file):
 7 |     with zipfile.ZipFile(qzv_file) as zip:
 8 |         for zip_info in zip.infolist():
 9 |             if "level-7.csv" in zip_info.filename:
10 |                 zip_info.filename=os.path.basename(zip_info.filename)
11 |                 zip.extract(zip_info)
12 | 
13 | #create tsv files which Krona likes
14 | def make_tsv(name):
15 |     tsv=open("krona-tsv/"+name+".tsv","w+")
16 |     tsv.write(name+"\n")
17 |     for i in range(0,len(new)):
18 |         tsv.write(data_dict[name][i]+"\t"+new[i]+"\n")
19 |     tsv.close()
20 | 
21 | #this is my base output. you can change it to anything you wish.
22 | unzip("taxa-bar-plots.qzv")
23 | 
24 | #this folder will be deleted in the end of the process
25 | if not os.path.exists("krona-tsv"):
26 |     os.makedirs("krona-tsv")
27 | 
28 | #this folder will have the last output
29 | if not os.path.exists("Krona"):
30 |     os.makedirs("Krona")
31 | 
32 | file=open("level-7.csv","r")
33 | 
34 | lines=file.readlines()
35 | 
36 | file.close()
37 | 
38 | #remove the file since we don't need it anymore
39 | os.system("rm "+"level-7.csv")
40 | 
41 | taxa=[]
42 | new=[]
43 | sample_names=[]
44 | data_dict={}
45 | for line in lines:
46 |     line=line.strip().split(",")
47 |     if line[0]=="index":
48 |         for i in line:
49 |             if ";" in i:
50 |                 taxa.append(i)
51 |             elif i.startswith("Unassigned"):
52 |                 taxa.append("Unassigned")
53 | 
54 |     else:
55 |         data=[]
56 |         sample_names.append(line[0])
57 |         for value in line:
58 |             if any(i.isalpha() for i in value)==True:
59 |                 pass
60 |             else:
61 |                 value=value.split(".")
62 |                 data.append(value[0])
63 |         data_dict[line[0]]=data
64 | 
65 | #Regex for SILVA and greengenes. I don't like to see the prefix they add.
66 | #There is no harm with other databases. If you want to leave them be, just remove first two lines after "for" loop.
67 | for x in taxa:
68 |     x=re.sub("D_\d__","",x)
69 |     x=re.sub("\w__","",x)
70 |     new.append(x.replace(";","\t"))
71 | 
72 | for sample in sample_names:
73 |     make_tsv(sample)
74 | 
75 | #This part runs Krona and removes tsv files we created.
76 | #You can change the output as you wish.
77 | os.system("ktImportText krona-tsv/* -o Krona/krona.html")
78 | os.system("rm -r krona-tsv")
79 | 


--------------------------------------------------------------------------------
/local.mk:
--------------------------------------------------------------------------------
 1 | .PHONY: import denoise assign_taxonomy plot complete clean upload download
 2 | 
 3 | complete:
 4 | 	@echo "Running the complete pipeline. Quality reports, Corediversity analysis, statistics and functional analysis"
 5 | 	snakemake -pr --cores 10 --keep-going --rerun-incomplete
 6 | 
 7 | import:
 8 | 	@echo "Importing, trimming primers and adapters, and performing initial quality checks"
 9 | 	@echo "Inspect the plots generated in 04.QC/trimmed_reads_qual_viz.qzv at https://view.qiime2.org/"
10 | 	snakemake -pr --cores 10 --keep-going --rerun-incomplete "04.QC/trimmed_reads_qual_viz.qzv" "04.QC/raw_reads_qual_viz.qzv"
11 | 
12 | denoise:
13 | 	@echo "Denoising your imported sequences"
14 | 	@echo "Inspect the table 05.Denoise_reads/denoise_stats.qzv at https://view.qiime2.org/"
15 | 	@echo "Edit the config/config.yaml file appropriately and re-run if many were lost after denoising."
16 | 	snakemake -pr --cores 10 --keep-going --rerun-incomplete "05.Denoise_reads/denoise_stats.qzv" "05.Denoise_reads/table_summary.qzv" "05.Denoise_reads/representative_sequences.qzv"
17 | 
18 | assign_taxonomy:
19 | 	@echo "Assigning taxonomy and filtering out non-target taxa"
20 | 	@echo "After this run completes"
21 | 	@echo "Examine 08.Filter_feature_table/taxa_filtered_table.qzv"
22 | 	@echo "To figure out the total number of sequences ('Total freqency') to be used to determine the minuminum frequency for filtering out rare taxa"
23 | 	@echo  "Simply multiply the total number of sequences by your threshold for example 0.00005 (0.005 percent)"
24 | 	@echo "python -c print(1298206 * 0.00005) = 64.9103"
25 | 	@echo "Set the 'minimum_frequency' parmeter in config/config.yaml with the result of this calculation rounded up like so:"
26 | 	@echo "minimum_frequency: 65"
27 | 	snakemake -pr --cores 10 --keep-going  --rerun-incomplete "06.Assign_taxonomy/taxonomy.qzv" "07.Build_phylogenetic_tree/rooted-tree.qza" "08.Filter_feature_table/taxa_filtered_table.qzv"
28 | 
29 | plot:
30 | 	@echo "Filtering out rare ASV and generating taxonomy plots"
31 | 	snakemake -pr --cores 10 --keep-going --rerun-incomplete  "08.Filter_feature_table/filtered_table.qzv" "09.Taxa_bar_plots/group-bar-plot.qzv" "09.Taxa_bar_plots/samples-bar-plots.qzv"
32 | 
33 | upload:
34 | 	@echo "Copying the denoising folder to HPRC for taxonomy assignment"
35 | 	scp -r 05.Denoise_reads/ obayomi@grace.tamu.edu:/scratch/user/obayomi/projects/amplicon_sequencing/Guay
36 | 
37 | download:
38 | 	@echo "Downloading the assign taxonomy folder from HPRC"
39 | 	scp -r obayomi@grace.tamu.edu:/scratch/user/obayomi/projects/amplicon_sequencing/Guay/06.Assign_taxonomy/ .
40 | 


--------------------------------------------------------------------------------
/scripts/default_variables.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N diversity_analysis
 4 | #$ -q bioinfo.q
 5 | #$ -V
 6 | #$ -cwd
 7 | #$ -notify
 8 | #$ -pe shared 40
 9 | 
10 | 
11 | set -e
12 | 
13 | source activate qiime2-2020.6
14 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi'
15 | 
16 | TREE=('06.make_tree/dada2' '06.make_tree/dada2' '06.make_tree/deblur' '06.make_tree/deblur' '06.make_tree/dada2' '06.make_tree/dada2' '06.make_tree/deblur' '06.make_tree/deblur' '06.make_tree/dada2' '06.make_tree/dada2' '06.make_tree/deblur' '06.make_tree/deblur' '06.make_tree/dada2' '06.make_tree/dada2' '06.make_tree/deblur' '06.make_tree/deblur')
17 | 
18 | DEPTH=(1201 1035 1003 501 1201 1276 617 480 3116 989 726 400 2140 2115 1484 1260)
19 | 
20 | FEATURE_TABLE_DIR=('05.filter_table/dada2' '05.filter_table/dada2' '05.filter_table/deblur/' '05.filter_table/deblur/' '05.filter_table/dada2/indoors' '05.filter_table/dada2/indoors' '05.filter_table/deblur/indoors' '05.filter_table/deblur/indoors' '05.filter_table/dada2/outdoors' '05.filter_table/dada2/outdoors' '05.filter_table/deblur/outdoors' '05.filter_table/deblur/outdoors' '05.filter_table/dada2/mock' '05.filter_table/dada2/mock' '05.filter_table/deblur/mock' '05.filter_table/deblur/mock')
21 | 
22 | PREFIX=('se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined')
23 | 
24 | METADATA=('00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv')
25 | 
26 | OUT_DIR=('08.core_diversity/dada2' '08.core_diversity/dada2' '08.core_diversity/deblur' '08.core_diversity/deblur' '08.core_diversity/dada2/indoors' '08.core_diversity/dada2/indoors' '08.core_diversity/deblur/indoors' '08.core_diversity/deblur/indoors' '08.core_diversity/dada2/outdoors' '08.core_diversity/dada2/outdoors' '08.core_diversity/deblur/outdoors' '08.core_diversity/deblur/outdoors' '08.core_diversity/dada2/mock' '08.core_diversity/dada2/mock' '08.core_diversity/deblur/mock' '08.core_diversity/deblur/mock')
27 | 
28 | METADATA_COLUMN=('treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment')
29 | 
30 | 


--------------------------------------------------------------------------------
/scripts/dada2_denoize.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N denoize_dada2 
 4 | #$ -q bioinfo.q
 5 | #$ -V 
 6 | #$ -cwd 
 7 | #$ -notify 
 8 | #$ -pe shared 40
 9 | 
10 | set -e
11 | 
12 | source activate qiime2-2020.6
13 | 
14 | PAIRED='false'
15 | TRIM_LEFT=0
16 | TRUNC_LENGTH=400
17 | #TRUNC_LENGTH=280
18 | #PREFIX="se"
19 | #PREFIX="pe"
20 | PREFIX="pear-joined"
21 | 
22 | if [ "${PAIRED}" != "true" ]; then
23 | 
24 | 	# Denoise, truncate and assign ASVs
25 | 	qiime dada2 denoise-single \
26 |   		--i-demultiplexed-seqs 01.import/${PREFIX}-reads.qza \
27 |   		--p-trim-left ${TRIM_LEFT} \
28 |   		--p-trunc-len ${TRUNC_LENGTH} \
29 |   		--o-representative-sequences 03.dada_denoise/${PREFIX}-representative_sequences.qza \
30 |   		--o-table 03.dada_denoise/${PREFIX}-table.qza \
31 |   		--o-denoising-stats  03.dada_denoise/${PREFIX}-denoise_stats.qza
32 | 
33 | 
34 | 	qiime feature-table summarize \
35 | 		--i-table 03.dada_denoise/${PREFIX}-table.qza \
36 | 		--o-visualization 03.dada_denoise/${PREFIX}-table_summary.qzv
37 | 
38 | 
39 | 	qiime feature-table tabulate-seqs \
40 |   		--i-data 03.dada_denoise/${PREFIX}-representative_sequences.qza \
41 |   		--o-visualization 03.dada_denoise/${PREFIX}-representative_sequences.qzv
42 | 
43 | 
44 | 
45 | 	qiime metadata tabulate \
46 | 		--m-input-file 03.dada_denoise/${PREFIX}-denoise_stats.qza \
47 |  		--o-visualization 03.dada_denoise/${PREFIX}-denoise_stats.qzv
48 | 
49 | else
50 | 
51 | 	qiime dada2 denoise-paired \
52 | 		--i-demultiplexed-seqs 01.import/reads.qza \
53 | 		--o-table 03.dada_denoise/${PREFIX}-table.qza \
54 | 		--o-representative-sequences 03.dada_denoise/${PREFIX}-representative_sequences.qza \
55 | 		--o-denoising-stats 03.dada_denoise/${PREFIX}-denoise_stats.qza \
56 | 		--p-trunc-len-f ${TRUNC_LENGTH} \
57 | 		--p-trunc-len-r ${TRUNC_LENGTH} \
58 | 		--p-trim-left-f ${TRIM_LEFT} \
59 | 		--p-trim-left-r ${TRIM_LEFT} \
60 | 		--p-n-threads 30
61 | 
62 | 
63 | 	# This visualization shows us the sequences per sample spread - to determine minimum number for rarefaction
64 | 	# and sequences per feature (OTU or ASV)
65 | 	qiime feature-table summarize \
66 | 		--i-table 03.dada_denoise/${PREFIX}-table.qza \
67 | 		--o-visualization 03.dada_denoise/${PREFIX}-table_summary.qzv
68 | 
69 | 
70 | 	qiime feature-table tabulate-seqs \
71 |   		--i-data 03.dada_denoise/${PREFIX}-representative_sequences.qza \
72 |   		--o-visualization 03.dada_denoise/${PREFIX}-representative_sequences.qzv
73 | 
74 | 
75 | 	qiime metadata tabulate \
76 | 		--m-input-file 03.dada_denoise/${PREFIX}-denoise_stats.qza \
77 | 		--o-visualization 03.dada_denoise/${PREFIX}-denoise_stats.qzv
78 | 
79 | fi
80 | 


--------------------------------------------------------------------------------
/create_DB/config/sample.tsv:
--------------------------------------------------------------------------------
 1 | SampleID	Type	Prefix	Direction	Old_name	New_name
 2 | A	Forward	A_DKDL210000007-1a_HFL3FCCX2_L6	_1	01.raw_data/A/A_DKDL210000007-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/A/A_1.fq.gz
 3 | A	Reverse	A_DKDL210000007-1a_HFL3FCCX2_L6	_2	01.raw_data/A/A_DKDL210000007-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/A/A_2.fq.gz
 4 | A1	Forward	A1_DKDL210000015-1a_HFL3FCCX2_L6	_1	01.raw_data/A1/A1_DKDL210000015-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/A1/A1_1.fq.gz
 5 | A1	Reverse	A1_DKDL210000015-1a_HFL3FCCX2_L6	_2	01.raw_data/A1/A1_DKDL210000015-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/A1/A1_2.fq.gz
 6 | A2	Forward	A2_DKDL210000016-1a_HFL3FCCX2_L6	_1	01.raw_data/A2/A2_DKDL210000016-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/A2/A2_1.fq.gz
 7 | A2	Reverse	A2_DKDL210000016-1a_HFL3FCCX2_L6	_2	01.raw_data/A2/A2_DKDL210000016-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/A2/A2_2.fq.gz
 8 | B	Forward	B_DKDL210000012-1a_HFL3FCCX2_L6	_1	01.raw_data/B/B_DKDL210000012-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/B/B_1.fq.gz
 9 | B	Reverse	B_DKDL210000012-1a_HFL3FCCX2_L6	_2	01.raw_data/B/B_DKDL210000012-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/B/B_2.fq.gz
10 | L2	Forward	L2_DKDL210000008-1a_HFL3FCCX2_L6	_1	01.raw_data/L2/L2_DKDL210000008-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L2/L2_1.fq.gz
11 | L2	Reverse	L2_DKDL210000008-1a_HFL3FCCX2_L6	_2	01.raw_data/L2/L2_DKDL210000008-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L2/L2_2.fq.gz
12 | L3	Forward	L3_DKDL210000009-1a_HFL3FCCX2_L6	_1	01.raw_data/L3/L3_DKDL210000009-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L3/L3_1.fq.gz
13 | L3	Reverse	L3_DKDL210000009-1a_HFL3FCCX2_L6	_2	01.raw_data/L3/L3_DKDL210000009-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L3/L3_2.fq.gz
14 | L4	Forward	L4_DKDL210000010-1a_HFL3FCCX2_L6	_1	01.raw_data/L4/L4_DKDL210000010-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L4/L4_1.fq.gz
15 | L4	Reverse	L4_DKDL210000010-1a_HFL3FCCX2_L6	_2	01.raw_data/L4/L4_DKDL210000010-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L4/L4_2.fq.gz
16 | L5	Forward	L5_DKDL210000011-1a_HFL3FCCX2_L6	_1	01.raw_data/L5/L5_DKDL210000011-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L5/L5_1.fq.gz
17 | L5	Reverse	L5_DKDL210000011-1a_HFL3FCCX2_L6	_2	01.raw_data/L5/L5_DKDL210000011-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L5/L5_2.fq.gz
18 | L7	Forward	L7_DKDL210000013-1a_HFL3FCCX2_L6	_1	01.raw_data/L7/L7_DKDL210000013-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L7/L7_1.fq.gz
19 | L7	Reverse	L7_DKDL210000013-1a_HFL3FCCX2_L6	_2	01.raw_data/L7/L7_DKDL210000013-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L7/L7_2.fq.gz
20 | L8	Forward	L8_DKDL210000014-1a_HFL3FCCX2_L6	_1	01.raw_data/L8/L8_DKDL210000014-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L8/L8_1.fq.gz
21 | L8	Reverse	L8_DKDL210000014-1a_HFL3FCCX2_L6	_2	01.raw_data/L8/L8_DKDL210000014-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L8/L8_2.fq.gz
22 | 


--------------------------------------------------------------------------------
/create_DB/eukaryote-unite/config/sample.tsv:
--------------------------------------------------------------------------------
 1 | SampleID	Type	Prefix	Direction	Old_name	New_name
 2 | A	Forward	A_DKDL210000007-1a_HFL3FCCX2_L6	_1	01.raw_data/A/A_DKDL210000007-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/A/A_1.fq.gz
 3 | A	Reverse	A_DKDL210000007-1a_HFL3FCCX2_L6	_2	01.raw_data/A/A_DKDL210000007-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/A/A_2.fq.gz
 4 | A1	Forward	A1_DKDL210000015-1a_HFL3FCCX2_L6	_1	01.raw_data/A1/A1_DKDL210000015-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/A1/A1_1.fq.gz
 5 | A1	Reverse	A1_DKDL210000015-1a_HFL3FCCX2_L6	_2	01.raw_data/A1/A1_DKDL210000015-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/A1/A1_2.fq.gz
 6 | A2	Forward	A2_DKDL210000016-1a_HFL3FCCX2_L6	_1	01.raw_data/A2/A2_DKDL210000016-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/A2/A2_1.fq.gz
 7 | A2	Reverse	A2_DKDL210000016-1a_HFL3FCCX2_L6	_2	01.raw_data/A2/A2_DKDL210000016-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/A2/A2_2.fq.gz
 8 | B	Forward	B_DKDL210000012-1a_HFL3FCCX2_L6	_1	01.raw_data/B/B_DKDL210000012-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/B/B_1.fq.gz
 9 | B	Reverse	B_DKDL210000012-1a_HFL3FCCX2_L6	_2	01.raw_data/B/B_DKDL210000012-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/B/B_2.fq.gz
10 | L2	Forward	L2_DKDL210000008-1a_HFL3FCCX2_L6	_1	01.raw_data/L2/L2_DKDL210000008-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L2/L2_1.fq.gz
11 | L2	Reverse	L2_DKDL210000008-1a_HFL3FCCX2_L6	_2	01.raw_data/L2/L2_DKDL210000008-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L2/L2_2.fq.gz
12 | L3	Forward	L3_DKDL210000009-1a_HFL3FCCX2_L6	_1	01.raw_data/L3/L3_DKDL210000009-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L3/L3_1.fq.gz
13 | L3	Reverse	L3_DKDL210000009-1a_HFL3FCCX2_L6	_2	01.raw_data/L3/L3_DKDL210000009-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L3/L3_2.fq.gz
14 | L4	Forward	L4_DKDL210000010-1a_HFL3FCCX2_L6	_1	01.raw_data/L4/L4_DKDL210000010-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L4/L4_1.fq.gz
15 | L4	Reverse	L4_DKDL210000010-1a_HFL3FCCX2_L6	_2	01.raw_data/L4/L4_DKDL210000010-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L4/L4_2.fq.gz
16 | L5	Forward	L5_DKDL210000011-1a_HFL3FCCX2_L6	_1	01.raw_data/L5/L5_DKDL210000011-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L5/L5_1.fq.gz
17 | L5	Reverse	L5_DKDL210000011-1a_HFL3FCCX2_L6	_2	01.raw_data/L5/L5_DKDL210000011-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L5/L5_2.fq.gz
18 | L7	Forward	L7_DKDL210000013-1a_HFL3FCCX2_L6	_1	01.raw_data/L7/L7_DKDL210000013-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L7/L7_1.fq.gz
19 | L7	Reverse	L7_DKDL210000013-1a_HFL3FCCX2_L6	_2	01.raw_data/L7/L7_DKDL210000013-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L7/L7_2.fq.gz
20 | L8	Forward	L8_DKDL210000014-1a_HFL3FCCX2_L6	_1	01.raw_data/L8/L8_DKDL210000014-1a_HFL3FCCX2_L6_1.fq.gz	01.raw_data/L8/L8_1.fq.gz
21 | L8	Reverse	L8_DKDL210000014-1a_HFL3FCCX2_L6	_2	01.raw_data/L8/L8_DKDL210000014-1a_HFL3FCCX2_L6_2.fq.gz	01.raw_data/L8/L8_2.fq.gz
22 | 


--------------------------------------------------------------------------------
/config/sample.tsv:
--------------------------------------------------------------------------------
 1 | SampleID	Type	Old_name	New_name
 2 | A1	Forward	01.raw_data/A1_1ILC_S33_L005_R1_001.fastq.gz	01.raw_data/A1.fastq.gz
 3 | A2	Forward	01.raw_data/A2_9IL7_S41_L006_R1_001.fastq.gz	01.raw_data/A2.fastq.gz
 4 | A3	Forward	01.raw_data/A3_17IRC_S49_L007_R1_001.fastq.gz	01.raw_data/A3.fastq.gz
 5 | A4	Forward	01.raw_data/A4_25IR7_S57_L008_R1_001.fastq.gz	01.raw_data/A4.fastq.gz
 6 | B1	Forward	01.raw_data/B1_2DLC_S34_L005_R1_001.fastq.gz	01.raw_data/B1.fastq.gz
 7 | B2	Forward	01.raw_data/B2_10DL7_S42_L006_R1_001.fastq.gz	01.raw_data/B2.fastq.gz
 8 | B3	Forward	01.raw_data/B3_18DRC_S50_L007_R1_001.fastq.gz	01.raw_data/B3.fastq.gz
 9 | B4	Forward	01.raw_data/B4_26DR7_S58_L008_R1_001.fastq.gz	01.raw_data/B4.fastq.gz
10 | C1	Forward	01.raw_data/C1_3ILC_S35_L005_R1_001.fastq.gz	01.raw_data/C1.fastq.gz
11 | C2	Forward	01.raw_data/C2_11IL7_S43_L006_R1_001.fastq.gz	01.raw_data/C2.fastq.gz
12 | C3	Forward	01.raw_data/C3_19IRC_S51_L007_R1_001.fastq.gz	01.raw_data/C3.fastq.gz
13 | C4	Forward	01.raw_data/C4_27IR7_S59_L008_R1_001.fastq.gz	01.raw_data/C4.fastq.gz
14 | D1	Forward	01.raw_data/D1_4DLC_S36_L005_R1_001.fastq.gz	01.raw_data/D1.fastq.gz
15 | D2	Forward	01.raw_data/D2_12DL7_S44_L006_R1_001.fastq.gz	01.raw_data/D2.fastq.gz
16 | D3	Forward	01.raw_data/D3_20DRC_S52_L007_R1_001.fastq.gz	01.raw_data/D3.fastq.gz
17 | D4	Forward	01.raw_data/D4_28DR7_S60_L008_R1_001.fastq.gz	01.raw_data/D4.fastq.gz
18 | E1	Forward	01.raw_data/E1_5ILC_S37_L005_R1_001.fastq.gz	01.raw_data/E1.fastq.gz
19 | E2	Forward	01.raw_data/E2_13IL7_S45_L006_R1_001.fastq.gz	01.raw_data/E2.fastq.gz
20 | E3	Forward	01.raw_data/E3_21IRC_S53_L007_R1_001.fastq.gz	01.raw_data/E3.fastq.gz
21 | E4	Forward	01.raw_data/E4_29IR7_S61_L008_R1_001.fastq.gz	01.raw_data/E4.fastq.gz
22 | F1	Forward	01.raw_data/F1_6DLC_S38_L005_R1_001.fastq.gz	01.raw_data/F1.fastq.gz
23 | F2	Forward	01.raw_data/F2_14DL7_S46_L006_R1_001.fastq.gz	01.raw_data/F2.fastq.gz
24 | F3	Forward	01.raw_data/F3_22DRC_S54_L007_R1_001.fastq.gz	01.raw_data/F3.fastq.gz
25 | F4	Forward	01.raw_data/F4_30DR7_S62_L008_R1_001.fastq.gz	01.raw_data/F4.fastq.gz
26 | G1	Forward	01.raw_data/G1_7ILC_S39_L005_R1_001.fastq.gz	01.raw_data/G1.fastq.gz
27 | G2	Forward	01.raw_data/G2_15IL7_S47_L006_R1_001.fastq.gz	01.raw_data/G2.fastq.gz
28 | G3	Forward	01.raw_data/G3_23IRC_S55_L007_R1_001.fastq.gz	01.raw_data/G3.fastq.gz
29 | G4	Forward	01.raw_data/G4_31IR7_S63_L008_R1_001.fastq.gz	01.raw_data/G4.fastq.gz
30 | H1	Forward	01.raw_data/H1_8DLC_S40_L005_R1_001.fastq.gz	01.raw_data/H1.fastq.gz
31 | H2	Forward	01.raw_data/H2_16DL7_S48_L006_R1_001.fastq.gz	01.raw_data/H2.fastq.gz
32 | H3	Forward	01.raw_data/H3_24DRC_S56_L007_R1_001.fastq.gz	01.raw_data/H3.fastq.gz
33 | H4	Forward	01.raw_data/H4_32DR7_S64_L008_R1_001.fastq.gz	01.raw_data/H4.fastq.gz
34 | 


--------------------------------------------------------------------------------
/scripts/krona-arg.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | import zipfile
 4 | import argparse
 5 | 
 6 | #argparse
 7 | parser = argparse.ArgumentParser()
 8 | 
 9 | parser.add_argument("--input","-i",help="visualized collapsed taxa (qzv)")
10 | parser.add_argument("--output","-o",help="name of krona output (better have .html)")
11 | parser.add_argument("--exclude","-e",help="exclude sample list (add samples seperated by comma(,))")
12 | parser.add_argument("--regex","-r",help="allow regex applied or not (default: True)")
13 | 
14 | args=parser.parse_args()
15 | 
16 | if args.input:
17 |     input=args.input
18 | 
19 | if args.output:
20 |     output=args.output
21 | 
22 | if args.exclude:
23 |     excludelist=args.exclude.split(",")
24 | 
25 | if args.regex:
26 |     regex=args.regex
27 | else:
28 |     regex=True
29 | 
30 | #extract metadata.tsv from collapsed taxa
31 | def unzip(qzv_file):
32 |     with zipfile.ZipFile(qzv_file) as zip:
33 |         for zip_info in zip.infolist():
34 |             if "data/metadata.tsv" in zip_info.filename:
35 |                 zip_info.filename=os.path.basename(zip_info.filename)
36 |                 zip.extract(zip_info)
37 | 
38 | #create tsv files which Krona likes
39 | def make_tsv(name):
40 |     tsv=open("krona-tsv/"+name+".tsv","w+")
41 |     tsv.write(name)
42 |     for i in range(0,len(new)):
43 |         tsv.write("\n"+data_dict[name][i]+"\t"+new[i])
44 |     tsv.close()
45 | 
46 | unzip(input)
47 | 
48 | #this folder will be deleted in the end of the process
49 | if not os.path.exists("krona-tsv"):
50 |     os.makedirs("krona-tsv")
51 | 
52 | file=open("metadata.tsv","r")
53 | 
54 | lines=file.readlines()
55 | 
56 | file.close()
57 | 
58 | #remove the file since we don't need it anymore
59 | os.system("rm "+"metadata.tsv")
60 | 
61 | new=[]
62 | sample_names=[]
63 | data_dict={}
64 | 
65 | taxa=lines[0].split("\t")
66 | taxa=taxa[1:]
67 | 
68 | lines.pop(0)
69 | lines.pop(0)
70 | 
71 | for line in lines:
72 |     line=line.strip().split("\t")
73 |     data=[]
74 |     if line[0] in excludelist:
75 |         continue
76 |     else:
77 |         sample_names.append(line[0])
78 |     for value in line:
79 |         if any(i.isalpha() for i in value)==True:
80 |             pass
81 |         else:
82 |             data.append(value)
83 |     data_dict[line[0]]=data
84 |     print(data_dict)
85 | 
86 | #Regex for SILVA and greengenes. I don't like to see the prefix they add.
87 | for x in taxa:
88 |     if regex==True:
89 |         x=re.sub("D_\d__","",x)
90 |         x=re.sub("\w__","",x)
91 |     new.append(x.replace(";","\t"))
92 | 
93 | for sample in sample_names:
94 |     make_tsv(sample)
95 | 
96 | #This part runs Krona and removes tsv files we created.
97 | os.system("ktImportText krona-tsv/* -o "+output)
98 | os.system("rm -r krona-tsv")
99 | 


--------------------------------------------------------------------------------
/scripts/export_table.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -eo pipefail
 4 | 
 5 | source activate qiime2-2020.6
 6 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi'
 7 | 
 8 | 
 9 | # Dada2 Reanalysis after splitting indoor samples and dropping some outdoor samples
10 | TAXONOMY_DIR=(04.redo_assign_taxonomy/dada2{,,})
11 | FEATURE_TABLE_DIR=(05.redo_filter_table/dada2/{indoors,outdoors,basins}/)
12 | PREFIX=($( for i in {1..3}; do echo 'se'; done))
13 | OUT_DIR=(10.exports/dada2/{indoors,outdoors,basins})
14 | 
15 | 
16 | ##### Export feature table with taxonomy assignment in biom format
17 | # https://forum.qiime2.org/t/exporting-and-modifying-biom-tables-e-g-adding-taxonomy-annotations/3630
18 |  
19 | 
20 | function export_feature_table(){
21 | 
22 | 	local PREFIX=$1
23 | 	local FEATURE_DIR=$2
24 | 	local OUT_DIR=$3
25 | 	local TAXONOMY_DIR=$4
26 | 
27 | 	##### Creating a BIOM table with taxonomy annotations
28 | 	qiime tools export --input-path ${FEATURE_DIR}/${PREFIX}-filtered_table.qza  --output-path ${OUT_DIR}/
29 | 	# Creating a TSV BIOM table
30 | 	biom convert -i ${OUT_DIR}/feature-table.biom -o ${OUT_DIR}/feature-table.tsv --to-tsv
31 | 	# Export taxonomy
32 | 	qiime tools export --input-path ${TAXONOMY_DIR}/${PREFIX}-taxonomy.qza --output-path ${OUT_DIR}/
33 | 
34 | 	#Next, we’ll need to modify the exported taxonomy file’s header before using it with BIOM software.
35 | 
36 | 	# Before modifying that file, make a copy:
37 | 	cp ${OUT_DIR}/taxonomy.tsv ${OUT_DIR}/biom-taxonomy.tsv
38 | 
39 | 	# Change the first line of biom-taxonomy.tsv (i.e. the header) to this:
40 | 	# Note that you’ll need to use tab characters in the header since this is a TSV file.
41 | 	#OTUID	taxonomy	confidence
42 | 
43 | 	# programatsically
44 | 	(echo "#OTUID	taxonomy	confidence"; sed -e '1d' ${OUT_DIR}/biom-taxonomy.tsv) \
45 | 	> ${OUT_DIR}/tmp.tsv && rm -rf ${OUT_DIR}/biom-taxonomy.tsv && mv ${OUT_DIR}/tmp.tsv ${OUT_DIR}/biom-taxonomy.tsv 
46 | 
47 | 	# Finally, add the taxonomy data to your .biom file:
48 | 	biom add-metadata \
49 | 		-i ${OUT_DIR}/feature-table.biom \
50 | 		-o ${OUT_DIR}/table-with-taxonomy.biom \
51 | 		--observation-metadata-fp ${OUT_DIR}/biom-taxonomy.tsv \
52 | 		--sc-separated taxonomy
53 | 
54 | 	# Creating a TSV BIOM table
55 |         #biom convert -i  ${OUT_DIR}/table-with-taxonomy.biom  -o  ${OUT_DIR}/table-with-taxonomy.biom.tsv --to-tsv
56 | 
57 | 
58 | }
59 | 
60 | 
61 | export -f export_feature_table
62 | 
63 | # Export tables
64 | parallel --jobs 0 --link export_feature_table  {1} {2} {3} {4}  ::: ${PREFIX[*]}  ::: ${FEATURE_TABLE_DIR[*]} ::: ${OUT_DIR[*]} ::: ${TAXONOMY_DIR[*]}
65 | 
66 | #Test
67 | #export_feature_table ${PREFIX[0]} ${FEATURE_TABLE_DIR[0]} ${OUT_DIR[0]} ${TAXONOMY_DIR[0]}
68 | 


--------------------------------------------------------------------------------
/scripts/new-dada2_denoize.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N denoize_dada2 
 4 | #$ -q bioinfo.q
 5 | #$ -V 
 6 | #$ -cwd 
 7 | #$ -notify 
 8 | #$ -pe shared 40
 9 | 
10 | set -e
11 | 
12 | source activate qiime2-2020.6
13 | PAIRED="false"
14 | #PAIRED="true"
15 | TRIM_LEFT=0
16 | TRIM_RIGHT=0
17 | TRUNC_LENGTH=400
18 | #TRUNC_LENGTH=260
19 | TRUNC_LENGTH_LEFT=297
20 | TRUNC_LENGTH_RIGHT=290
21 | maxE_f=4
22 | maxE_r=7
23 | 
24 | OUT_DIR="03.redo_dada_denoise"
25 | IMPORT_DIR="01.import"
26 | #PREFIX="se"
27 | #PREFIX="pe"
28 | PREFIX="pear-joined"
29 | 
30 | if [ "${PAIRED}" != "true" ]; then
31 | 	echo "running dada single"
32 | 	# Denoise, truncate and assign ASVs
33 | 	qiime dada2 denoise-single \
34 |   		--i-demultiplexed-seqs ${IMPORT_DIR}/${PREFIX}-reads.qza \
35 |   		--p-trim-left ${TRIM_LEFT} \
36 |   		--p-trunc-len ${TRUNC_LENGTH} \
37 | 		--p-max-ee ${maxE_f} \
38 |   		--o-representative-sequences ${OUT_DIR}/${PREFIX}-representative_sequences.qza \
39 |   		--o-table ${OUT_DIR}/${PREFIX}-table.qza \
40 |   		--o-denoising-stats  ${OUT_DIR}/${PREFIX}-denoise_stats.qza
41 | 
42 | 
43 | 	qiime feature-table summarize \
44 | 		--i-table ${OUT_DIR}/${PREFIX}-table.qza \
45 | 		--o-visualization ${OUT_DIR}/${PREFIX}-table_summary.qzv
46 | 
47 | 
48 | 	qiime feature-table tabulate-seqs \
49 |   		--i-data ${OUT_DIR}/${PREFIX}-representative_sequences.qza \
50 |   		--o-visualization ${OUT_DIR}/${PREFIX}-representative_sequences.qzv
51 | 
52 | 
53 | 
54 | 	qiime metadata tabulate \
55 | 		--m-input-file ${OUT_DIR}/${PREFIX}-denoise_stats.qza \
56 |  		--o-visualization ${OUT_DIR}/${PREFIX}-denoise_stats.qzv
57 | 
58 | else
59 | 		echo "running dada paired"
60 | 	qiime dada2 denoise-paired \
61 | 		--i-demultiplexed-seqs 01.import/${PREFIX}-reads.qza \
62 | 		--o-table ${OUT_DIR}/${PREFIX}-table.qza \
63 | 		--o-representative-sequences ${OUT_DIR}/${PREFIX}-representative_sequences.qza \
64 | 		--o-denoising-stats ${OUT_DIR}/${PREFIX}-denoise_stats.qza \
65 | 		--p-trunc-len-f ${TRUNC_LENGTH_LEFT} \
66 | 		--p-trunc-len-r ${TRUNC_LENGTH_RIGHT} \
67 | 		--p-trim-left-f ${TRIM_LEFT} \
68 | 		--p-trim-left-r ${TRIM_RIGHT} \
69 | 		--p-max-ee-f ${maxE_f} \
70 | 		--p-max-ee-r ${maxE_r} \
71 | 		--p-n-threads 30
72 | 
73 | 
74 | 	# This visualization shows us the sequences per sample spread - to determine minimum number for rarefaction
75 | 	# and sequences per feature (OTU or ASV)
76 | 	qiime feature-table summarize \
77 | 		--i-table ${OUT_DIR}/${PREFIX}-table.qza \
78 | 		--o-visualization ${OUT_DIR}/${PREFIX}-table_summary.qzv
79 | 
80 | 
81 | 	qiime feature-table tabulate-seqs \
82 |   		--i-data ${OUT_DIR}/${PREFIX}-representative_sequences.qza \
83 |   		--o-visualization ${OUT_DIR}/${PREFIX}-representative_sequences.qzv
84 | 
85 | 
86 | 	qiime metadata tabulate \
87 | 		--m-input-file ${OUT_DIR}/${PREFIX}-denoise_stats.qza \
88 | 		--o-visualization ${OUT_DIR}/${PREFIX}-denoise_stats.qzv
89 | 
90 | fi
91 | 


--------------------------------------------------------------------------------
/00.mapping/outdoors-edited.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	pcr_cycles	medium_or_plant	surface_sterilization	treatment	description
 2 | 1A	1	22	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 3 | 2A	2	22	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 4 | 3A	3	22	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 5 | 4A	4	22	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 6 | 5A	5	22	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 7 | 6A	6	22	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 8 | 7A	7	22	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 9 | 8A	8	24	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
10 | 9A	9	24	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
11 | 10A	10	24	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
12 | 11A	11	24	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
13 | 12A	12	24	Medium	NONE	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
14 | 13A	13	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
15 | 14A	14	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
16 | 15A	15	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
17 | 16A	16	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
18 | 17A	17	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
19 | 18A	18	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
20 | 19A	19	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
21 | 20A	20	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
22 | 21A	21	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
23 | 22A	22	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
24 | 23A	23	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
25 | 24A	24	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
26 | 25A	25	24	Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
27 | 26A	26	24	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
28 | 27A	27	24	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
29 | 28A	28	24	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
30 | 29A	29	24	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
31 | 30A	30	24	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
32 | 31A	31	24	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
33 | 32A	32	24	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
34 | 33A	33	24	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
35 | 34A	34	24	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
36 | 35A	35	24	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
37 | 36A	36	22	Plant	Yes	water_washed_Surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
38 | 


--------------------------------------------------------------------------------
/00.mapping/outdoors.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	pcr_cycles	medium_or_plant	surface_sterilization	treatment	description
 2 | 1A	1	22	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 3 | 2A	2	22	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 4 | 3A	3	22	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 5 | 4A	4	22	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 6 | 5A	5	22	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 7 | 6A	6	22	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 8 | 7A	7	22	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 9 | 8A	8	24	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
10 | 9A	9	24	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
11 | 10A	10	24	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
12 | 11A	11	24	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
13 | 12A	12	24	Medium	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
14 | 13A	13	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
15 | 14A	14	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
16 | 15A	15	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
17 | 16A	16	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
18 | 17A	17	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
19 | 18A	18	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
20 | 19A	19	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
21 | 20A	20	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
22 | 21A	21	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
23 | 22A	22	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
24 | 23A	23	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
25 | 24A	24	24	 Plant	No	water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
26 | 25A	25	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
27 | 26A	26	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
28 | 27A	27	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
29 | 28A	28	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
30 | 29A	29	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
31 | 30A	30	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
32 | 31A	31	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
33 | 32A	32	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
34 | 33A	33	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
35 | 34A	34	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
36 | 35A	35	24	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
37 | 36A	36	22	 Plant	Yes	water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA


--------------------------------------------------------------------------------
/scripts/filter-samples.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #$ -S /bin/bash
 3 | #$ -N Filter_samples 
 4 | #$ -q bioinfo.q
 5 | #$ -V 
 6 | #$ -cwd 
 7 | #$ -notify 
 8 | #$ -pe shared 10
 9 | 
10 | set -e 
11 | 
12 | source activate qiime2-2020.6
13 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi'
14 | 
15 | #OUT_PREFIX=('05.filter_table/dada2/indoors/se' '05.filter_table/dada2/indoors/pear-joined' '05.filter_table/deblur/indoors/se' '05.filter_table/deblur/indoors/pear-joined' '05.filter_table/dada2/outdoors/se' '05.filter_table/dada2/outdoors/pear-joined' '05.filter_table/deblur/outdoors/se' '05.filter_table/deblur/outdoors/pear-joined' '05.filter_table/dada2/mock/se' '05.filter_table/dada2/mock/pear-joined' '05.filter_table/deblur/mock/se' '05.filter_table/deblur/mock/pear-joined')
16 | 
17 | #OUT_PREFIX=('05.redo_filter_table/dada2/indoors/se' '05.redo_filter_table/dada2/indoors/pear-joined' '05.redo_filter_table/dada2/indoors/pe' '05.redo_filter_table/dada2/outdoors/se' '05.redo_filter_table/dada2/outdoors/pear-joined' '05.redo_filter_table/dada2/outdoors/pe' '05.redo_filter_table/dada2/mock/se' '05.redo_filter_table/dada2/mock/pear-joined' '05.redo_filter_table/dada2/mock/pe')
18 | 
19 | 
20 | OUT_PREFIX=(05.{,redo_}filter_table/dada2/{indoors,outdoors,basins}/se)
21 | 
22 | 
23 | #METADATA=('00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv'  '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv')
24 | 
25 | #METADATA=('00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/pe-dada2/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/pe-dada2/outdoors.tsv'  '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/pe-dada2/mock.tsv')
26 | 
27 | METADATA=($(for i in {1..2}; do echo 00.mapping/{indoors,outdoors,basins}.tsv;done))
28 | 
29 | #COMBINED_TABLE=('05.filter_table/dada2/se' '05.filter_table/dada2/pear-joined' '05.filter_table/deblur/se' '05.filter_table/deblur/pear-joined' '05.filter_table/dada2/se' '05.filter_table/dada2/pear-joined' '05.filter_table/deblur/se' '05.filter_table/deblur/pear-joined' '05.filter_table/dada2/se' '05.filter_table/dada2/pear-joined' '05.filter_table/deblur/se' '05.filter_table/deblur/pear-joined')
30 | 
31 | #COMBINED_TABLE=('05.redo_filter_table/dada2/se' '05.redo_filter_table/dada2/pear-joined' '05.redo_filter_table/dada2/pe' '05.redo_filter_table/dada2/se' '05.redo_filter_table/dada2/pear-joined' '05.redo_filter_table/dada2/pe' '05.redo_filter_table/dada2/se' '05.redo_filter_table/dada2/pear-joined' '05.redo_filter_table/dada2/pe')
32 | 
33 | #{,,} means to repeat the preceeding text 3 times
34 | COMBINED_TABLE=(05.{,redo_}filter_table/dada2/se{,,})
35 | 
36 | 
37 | 
38 | parallel --jobs 0 --link qiime feature-table filter-samples \
39 | 				--i-table {1}-taxa_filtered_table.qza \
40 | 				--m-metadata-file {2} \
41 | 				--o-filtered-table {3}-taxa_filtered_table.qza ::: ${COMBINED_TABLE[*]} ::: ${METADATA[*]} ::: ${OUT_PREFIX[*]}
42 | 
43 | 
44 | parallel --jobs 0 --link qiime feature-table summarize \
45 |                 		--i-table {}-taxa_filtered_table.qza \
46 |                 		--o-visualization {}-taxa_filtered_table.qzv ::: ${OUT_PREFIX[*]}
47 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/indoors-minus-cntVsB12.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	batch	pcr_cycles	medium_or_plant	sterile_plant	grown_with_antibiotics	location	water_washed	surface_sterilization	b12_enriched 	treatment	description
 2 | 9A-2	55	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
 3 | 10A-2	56	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
 4 | 11A-2	57	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
 5 | 12A-2	58	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
 6 | 23A-2	69	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
 7 | 25A-2	70	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
 8 | 26A-2	71	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
 9 | 27A-2	72	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
10 | 28A-2	73	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
11 | 29A-2	74	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
12 | 30A-2	75	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
13 | 31A-2	76	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
14 | 32A-2	77	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
15 | 33A-2	78	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
16 | 34A-2	79	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
17 | 35A-2	80	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
18 | 36A-2	81	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
19 | 37A-2	82	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
20 | 38A-2	83	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
21 | 39A-2	84	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
22 | 40A-2	85	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
23 | 41A-2	86	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
24 | 42A-2	87	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
25 | 43A-2	88	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
26 | 44A-2	89	B	NA	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic
27 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/indoors-minus-cntVsB12.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	batch	pcr_cycles	medium_or_plant	sterile_plant	grown_with_antibiotics	location	water_washed	surface_sterilization	b12_enriched 	treatment	description
 2 | Osnat054-9-A-2	55	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
 3 | Osnat055-10-A-2	56	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
 4 | Osnat056-11-A-2	57	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
 5 | Osnat057-12-A-2	58	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
 6 | Osnat068-23-A-2	69	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
 7 | Osnat069-25-A-2	70	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
 8 | Osnat070-26-A-2	71	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
 9 | Osnat071-27-A-2	72	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
10 | Osnat072-28-A-2	73	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
11 | Osnat073-29-A-2	74	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
12 | Osnat074-30-A-2	75	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
13 | Osnat075-31-A-2	76	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
14 | Osnat076-32-A-2	77	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
15 | Osnat077-33-A-2	78	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
16 | Osnat078-34-A-2	79	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
17 | Osnat079-35-A-2	80	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
18 | Osnat080-36-A-2	81	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
19 | Osnat081-37-A-2	82	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
20 | Osnat082-38-A-2	83	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
21 | Osnat083-39-A-2	84	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
22 | Osnat084-40-A-2	85	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
23 | Osnat085-41-A-2	86	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
24 | Osnat086-42-A-2	87	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
25 | Osnat087-43-A-2	88	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
26 | Osnat088-44-A-2	89	B	NA	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic
27 | 


--------------------------------------------------------------------------------
/docker/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: install snakemake fastqc multiqc pear qiime run copy manifest samples rename silva complete import denoise plot assign_taxonomy
 2 | 
 3 | help:
 4 | 	@echo "A pipeline to perform 16S microbiome analysis on aws "
 5 | 
 6 | install: update snakemake fastqc multiqc pear qiime silva
 7 | 
 8 | update:
 9 | 	@echo "updating the shell"
10 | 	#sudo apt update && apt upgrade
11 | 
12 | fastqc: update
13 | 	@echo "Downloading fastqc"
14 | 	docker pull staphb/fastqc:0.12.1
15 | 
16 | multiqc: update
17 | 	@echo "Downloading Multiqc"
18 | 	docker pull staphb/multiqc:1.8
19 | 
20 | pear: update
21 | 	@echo "Downloading pear read merger"
22 | 	docker pull olabiyi/pear:0.92
23 | 
24 | qiime: update
25 | 	@echo "Downloading qiime and picrust"
26 | 	# For functions analysis using picrust
27 | 	docker pull kubor/qiime2-picrust2:2019.10
28 | 	# Core qiime
29 | 	docker pull quay.io/qiime2/amplicon:2023.9
30 | 
31 | snakemake: update
32 | 	@echo "Download snakemake"
33 | 	#docker pull snakemake/snakemake:stable
34 | 	conda install -c bioconda snakemake
35 | 
36 | silva: update
37 | 	@echo "Dowloading Silva database"
38 | 	#Full
39 | 	wget https://data.qiime2.org/2023.9/common/silva-138-99-nb-classifier.qza
40 | 	# V4 specific
41 | 	wget https://data.qiime2.org/2023.9/common/silva-138-99-515-806-nb-classifier.qza
42 | 
43 | run:
44 | 	#@echo "Running snakemake in a docker container"
45 | 	$(shell docker run --rm -it -v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker -v ${PWD}:${PWD} -w ${PWD} -u $(id -u):$(id -g) snakemake/snakemake:stable; snakemake -pr --cores 30 --keep-going --rerun-incomplete)
46 | 
47 | 	
48 | copy:
49 | 	@echo "Copying files from S3"
50 | 	@echo "Configure aws first by running: sudo apt install awscli -y && aws configure"
51 | 	#aws s3 cp --recursive s3://biodsa-sequencing-data/SEQ44XXX/SEQ44733/Reads/ 01.raw_data/
52 | 	bash download_seqs.sh
53 |  
54 | rename:
55 | 	@echo "Renaming the files so that the filename replect the sample names in individual directory"
56 | 	bash rename_files.sh
57 |  
58 | samples:
59 | 	@echo "Get sample names for config.yaml"
60 | 	bash get_samples.sh
61 | 
62 | manifest:
63 | 	@echo "Creating a MANIFEST file"
64 | 	bash make_manifest.sh
65 |  
66 | complete:
67 | 	@echo "Running the complete pipeline. Quality reports, Corediversity analysis, statistics and functional analysis"
68 | 	snakemake -pr --cores 50 --keep-going --rerun-incomplete
69 | 
70 | import:
71 | 	@echo "Importing, trimming primers and adapters, and performing initial quality checks"
72 | 	@echo "Inspect the plots generated in 04.QC/trimmed_reads_qual_viz.qzv at https://view.qiime2.org/"
73 | 	snakemake -pr --cores 50 --keep-going --rerun-incomplete "04.QC/trimmed_reads_qual_viz.qzv" "04.QC/raw_reads_qual_viz.qzv"
74 | 
75 | denoise:
76 | 	@echo "Denoising your imported sequences"
77 | 	@echo "Inspect the table 05.Denoise_reads/denoise_stats.qzv at https://view.qiime2.org/"
78 | 	@echo "Edit the config/config.yaml file appropriately and re-run if many were lost after denoising."
79 | 	snakemake -pr --cores 50 --keep-going --rerun-incomplete "05.Denoise_reads/denoise_stats.qzv" "05.Denoise_reads/table_summary.qzv" "05.Denoise_reads/representative_sequences.qzv"
80 | 
81 | assign_taxonomy:
82 | 	@echo "Assigning taxonomy and filtering out non-target taxa"
83 | 	@echo "After this run completes"
84 | 	@echo "Examine 08.Filter_feature_table/taxa_filtered_table.qzv"
85 | 	@echo "To figure out the total number of sequences ('Total freqency') to be used to determine the minuminum frequency for filtering out rare taxa"
86 | 	@echo  "Simply multiply the total number of sequences by your threshold for example 0.00005 (0.005 percent)"
87 | 	@echo "python -c print(1298206 * 0.00005) = 64.9103"
88 | 	@echo "Set the 'minimum_frequency' parmeter in config/config.yaml with the result of this calculation rounded up like so:"
89 | 	@echo "minimum_frequency: 65"
90 | 	snakemake -pr --cores 50 --keep-going  --rerun-incomplete "06.Assign_taxonomy/taxonomy.qzv" "07.Build_phylogenetic_tree/rooted-tree.qza" "08.Filter_feature_table/taxa_filtered_table.qzv"
91 | 
92 | plot:
93 | 	@echo "Filtering out rare ASV and generating taxonomy plots"
94 | 	snakemake -pr --cores 10 --keep-going --rerun-incomplete  "08.Filter_feature_table/filtered_table.qzv" "09.Taxa_bar_plots/group-bar-plot.qzv" "09.Taxa_bar_plots/samples-bar-plots.qzv"
95 | 


--------------------------------------------------------------------------------
/scripts/.bashrc:
--------------------------------------------------------------------------------
  1 | 
  2 | # >>> conda initialize >>>
  3 | # !! Contents within this block are managed by 'conda init' !!
  4 | # __conda_setup="$('/gpfs0/bioinfo/users/obayomi/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
  5 | # if [ $? -eq 0 ]; then
  6 |     # eval "$__conda_setup"
  7 | # else
  8 |     # if [ -f "/gpfs0/bioinfo/users/obayomi/miniconda3/etc/profile.d/conda.sh" ]; then
  9 |         # . "/gpfs0/bioinfo/users/obayomi/miniconda3/etc/profile.d/conda.sh"
 10 |     # else
 11 |         # export PATH="/gpfs0/bioinfo/users/obayomi/miniconda3/bin:$PATH"
 12 |     # fi
 13 | # fi
 14 | # unset __conda_setup
 15 | # <<< conda initialize <<<
 16 | 
 17 | source /storage/SGE6U8/default/common/settings.sh
 18 | # FASTQC
 19 | export PATH=/gpfs0/bioinfo/users/obayomi/FastQC/:$PATH
 20 | #export SOURCETRACKER_PATH=/gpfs0/biores/users/gilloro/Biyi/SourceTracking/sourcetracker-1.0.1
 21 | #Chimera slayer
 22 | export PATH=/fastspace/bioinfo_apps/microbiomeutil-r20110519/ChimeraSlayer/:$PATH
 23 | #vsearch
 24 | export PATH=/fastspace/bioinfo_apps/vsearch/vsearch_v2.3.4/bin/:$PATH
 25 | # perldl and pdl2 perl bin
 26 | #export PATH=/gpfs0/bioinfo/users/obayomi/perl5/bin/:PATH
 27 | # create alias for pdl2 because it has trouble finding perl
 28 | alias pdl2="/bin/perl /gpfs0/bioinfo/users/obayomi/perl5/bin/pdl2"
 29 | # rlwrap - needed for autocompletion when using perli
 30 | export PATH=/gpfs0/bioinfo/users/obayomi/bin/bin/:$PATH
 31 | #pathogen analysis scripts
 32 | export PATH=/gpfs0/bioinfo/users/obayomi/hinuman_analysis/16s_pathogen_analysis/:$PATH
 33 | #qiime
 34 | export PATH=/fastspace/bioinfo_apps/qiime/usr/local/bin/:$PATH
 35 | #NCBI blast
 36 | export PATH=/gpfs0/bioinfo/users/obayomi/ncbi-blast-2.10.1+/bin/:$PATH
 37 | #qsub
 38 | export PATH=/storage/SGE6U8/bin/lx24-amd64/:$PATH
 39 | #all executables
 40 | export PATH=/gpfs0/bioinfo/users/obayomi/bin/:$PATH
 41 | #sra tolkit
 42 | export PATH=/gpfs0/bioinfo/users/obayomi/sratoolkit.2.9.6-1-ubuntu64/bin/:$PATH
 43 | #Diamond 0.7.11
 44 | #export PATH=/fastspace/bioinfo_apps/Diamond/v0.7.11/:$PATH
 45 | #MEGAN
 46 | export PATH=/gpfs0/bioinfo/users/obayomi/megan/:$PATH
 47 | #MEGAN commandline tools
 48 | export PATH=/gpfs0/bioinfo/users/obayomi/megan/tools:$PATH
 49 | #minimap2 for aligning long reads like nanopore
 50 | export PATH=/gpfs0/bioinfo/users/obayomi/minimap2:$PATH
 51 | #fastx tool kit for processing fasta and fastq files
 52 | #export PATH=/gpfs0/biores/users/gilloro/Biyi/fastx_toolkit/bin:$PATH
 53 | 
 54 | #centrifuge for metagenomic reads classification
 55 | export PATH=/gpfs0/bioinfo/users/obayomi/centrifuge/:$PATH
 56 | # Kraken
 57 | export PATH=/fastspace/bioinfo_apps/kraken/:$PATH
 58 | #metaphlan2
 59 | #export PATH=/gpfs0/bioinfo/users/obayomi/biobakery-metaphlan2-5bd7cd0e4854/:$PATH
 60 | 
 61 | # bbmap
 62 | export PATH=/gpfs0/bioinfo/users/obayomi/bbmap/:$PATH
 63 | 
 64 | #microbiome helper
 65 | export PATH=/gpfs0/bioinfo/users/obayomi/microbiome_helper/:$PATH
 66 | # LAST
 67 | export PATH=/gpfs0/bioinfo/users/obayomi/last-1021/src/:$PATH
 68 | export PATH=/gpfs0/bioinfo/users/obayomi/last-1021/scripts/:$PATH
 69 | 
 70 | #Trimmomatic
 71 | export PATH=/fastspace/bioinfo_apps/Trimmomatic-0.32/:$PATH
 72 | 
 73 | #set SGE_ROOT variable
 74 | export SGE_ROOT=/storage/SGE6U8
 75 | 
 76 | #miniconda
 77 | #export PATH=/gpfs0/bioinfo/users/obayomi/miniconda3/envs/python2/bin/:$PATH
 78 | #export PATH=/gpfs0/bioinfo/apps/Miniconda2/Miniconda_v4.3.21/bin/:$PATH
 79 | export PATH=/gpfs0/bioinfo/users/obayomi/miniconda3/bin/:$PATH
 80 | #export PATH=/gpfs0/bioinfo/apps/Miniconda2/Miniconda_v4.3.21/envs/Metagenomics/share/minced-0.3.2-0/:$PATH
 81 | #HMM
 82 | export PATH=/gpfs0/bioinfo/apps/HMMER/HMMER_v3.1b1/bin/:$PATH
 83 | #metaBAT
 84 | export PATH=/gpfs0/bioinfo/users/obayomi/metabat/:$PATH
 85 | alias ll='ls --color=auto -alh'
 86 | #Bowtie2
 87 | export PATH=/gpfs0/bioinfo/apps/bowtie2/bowtie2-2.3.5-linux-x86_64:$PATH
 88 | 
 89 | # source useful function for running Neatseq_Flow
 90 | source /gpfs0/bioinfo/users/obayomi/non_model_RNA-Seq/functions.sh
 91 | 
 92 | # mauve
 93 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/mauve_snapshot_2015-02-13/
 94 | 
 95 | # MinPath
 96 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/MinPath/
 97 | 
 98 | # Signalp
 99 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/signalp-5.0b/bin/
100 | 
101 | # tmHMM
102 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/tmhmm-2.0c/bin/
103 | 
104 | # aragorn
105 | #export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/aragorn1.2.36/
106 | 
107 | # metaErg - anotation of metagenomics and metaproteomics assembly
108 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/metaerg/bin/
109 | 
110 | # Phyloflash home
111 | PHYLOFLASH_DBHOME=/gpfs0/bioinfo/users/obayomi/138.1
112 | 
113 | # Motus
114 | export PATH=$PATH:/gpfs0/bioinfo/users/obayomi/mOTUs_v2/
115 | 


--------------------------------------------------------------------------------
/docker/config.yaml:
--------------------------------------------------------------------------------
  1 | sample_file: "config/sample.tsv"
  2 | metadata: "00.mapping/metadata.tsv"
  3 | mail: "obadbotanist@yahoo.com" # A mere label
  4 | samples: ["S44733-0001", "S44733-0002", "S44733-0003", "S44733-0004", "S44733-0005"]
  5 |  
  6 |  # List your sample names here - see the README.md file for an easy way to create this list
  7 | project_dir: "/mnt/efs/scratch/gokdx/Pivot/contamination_hunting/SEQ44733/16s_analysis"
  8 | # what type of amplicon are we analyzing
  9 | # options are
 10 | # "16S", "18S" and "ITS"
 11 | amplicon: "16S" # "ITS" 
 12 | # A coloumn in metada for grouping bar plot and for statistics
 13 | category: "Strain"
 14 | # Three possible mode
 15 | # pair - paired-end reads without joining
 16 | # single - single end reads, joining of unnecessary
 17 | # merge - merge paired end reads
 18 | # if you will select to join the reads 
 19 | # # make sure to modify the -m -t flags of pear in the run_pear.pl script 
 20 | # before running the workflow
 21 | 
 22 | mode: "pair" # "pair", "single" or "merge"
 23 | RENAME_FILES: false # should your input files be renamed if they don't follow the requirement of 01.raw_data/{SAMPLE}_R{1|2}.fastq.gz
 24 | 
 25 | # What method should be used in merging reads
 26 | # options are "pear" or "vsearch"
 27 | # for merging with pear or vsearch, repectively
 28 | merge_method: "pear"
 29 | 
 30 | # ASV or zoTUs denoising and clustering method. Can be "dada2" or "deblur"
 31 | denoise_method: "dada2"
 32 | 
 33 | # path to your manifest file - see the example folder for examples
 34 | MANIFEST: "01.raw_data/MANIFEST"
 35 | project_name: "Contamination_hunt_round2" # This has no use in the pipeline just help to keep records
 36 | 
 37 | # # Add this line to everdy script to avoid device out of space error
 38 | TEMP_DIR: "export TEMPDIR=/mnt/efs/scratch/gokdx/Pivot/contamination_hunting/SEQ44733/16s_analysis/tmp/ TMPDIR=/mnt/efs/scratch/gokdx/Pivot/contamination_hunting/SEQ44733/16s_analysis/tmp/"
 39 | 
 40 | # set the path to the appropriate classifier for assigning taxonomy
 41 | # Here i chose the classifier for silva for bacteria (16S) and protist (18S) analysis
 42 | # For Fungi set to the path of a pre trained unite database classifier
 43 | classifier: "silva-138-99-nb-classifier.qza"
 44 | 
 45 | # To figure out the total number of sequences ("Total freqency") 
 46 | # to be used to determine the minuminum frequency for filtering out
 47 | # rare taxa, examine "08.Filter_feature_table/taxa_filtered_table.qzv".
 48 | # To calculate, multiply the total number of sequences by 0.00001 (0.001%)
 49 | # Assign the result of your calulation below as the minimum frequency 
 50 | # for filtering out rare taxa
 51 | # 	106,203 * 0.00001 = 1.06203
 52 | minimum_frequency: 1
 53 | 
 54 | # Change this and re-run core diversity step if needed. Determine this number by
 55 | # examiming "08.Filter_feature_table/filtered_table.qzv". Either choose the
 56 | # minimum sequence count or choose the minimum sequence count 
 57 | # that will be enough to capture the diversity of your samples and still
 58 | # not lose a lot of samples
 59 | rarefaction_depth: 3362
 60 | 
 61 | # Set tool specific parameters
 62 | parameters:
 63 |     vsearch:
 64 |         join_pairs:
 65 |             truncqual: 20
 66 |             minimum_length: 400
 67 |             maximum_Ns: 20
 68 |             minimum_merge_length: 400
 69 |             minimum_merge_length: 600
 70 |     dada2:
 71 |         mode: "single" # "single" or "paired"
 72 |         trunc_length_forward: 120 #260 # this will be determined after visulaizing the quality plot where quality score is >= 20
 73 |         trunc_length_reverse: 110 #180
 74 |         trim_length_forward: 0
 75 |         trim_length_reverse: 0
 76 |         maximum_forward_error: 4
 77 |         maximum_reverse_error: 4
 78 |         threads: 40
 79 | 
 80 |     # --p-trim-length n which truncates the sequences at position n
 81 |     # In general, the Deblur developers recommend setting this value
 82 |     # to a length where the median quality score begins to drop too low 
 83 |     deblur:
 84 |         trunc_length: 40
 85 |     # Parameters to argument of qiime feature-table group
 86 |     # when grouping the feature table for making grouped taxa barplots
 87 |     group_taxa_plot:
 88 |         category: "Strain" # --m-metadata-column argument
 89 |         mode: "sum" # --p-mode argument
 90 |         metadata: "00.mapping/treatment-metadata.tsv" # a 2-column or more metadata for grouping bar plots ['sample-id', 'treatment']
 91 |     beta_diversity_significance:
 92 |          categories: "Strain" 
 93 |    # Adators and primer trimming using cutadapt
 94 |     cutadapt:
 95 |         forward_primer: "GTGCCAGCMGCCGCGGTAA"
 96 |         reverse_primer: "GGACTACHVGGGTWTCTAAT"
 97 |         cores: 5
 98 |     fastree:
 99 |         threads: 20
100 |     assign_taxonomy:
101 |         threads: 40
102 |     picrust:
103 |         threads: 20


--------------------------------------------------------------------------------
/scripts/picrust2_analysis.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #$ -S /bin/bash
  3 | #$ -N function_analysis
  4 | #$ -q bioinfo.q
  5 | #$ -V
  6 | #$ -cwd
  7 | #$ -notify
  8 | #$ -pe shared 40
  9 | 
 10 | 
 11 | set -e
 12 | 
 13 | # Edit the headers of rep_set.fna to contain only OTU names
 14 | #sed -i -E 's/(>.+) .+$/\1/g' rep_set.fna
 15 | 
 16 | # make annotation directory
 17 | #mkdir /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/12.function_annotation/
 18 | 
 19 | ##################### Export and rename feature tables and representative sequences from qiime2 artifact
 20 | 
 21 | ###### copy and rename the artifacts to the function annotation directory
 22 | # Feature Tables
 23 | #cp /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/05.filter_table/dada2/se-taxa_filtered_table.qza \
 24 | #  12.function_annotation/
 25 | #mv 12.function_annotation/se-taxa_filtered_table.qza 12.function_annotation/_se-taxa_filtered_table.qza  
 26 |  
 27 | #cp /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/05.redo_filter_table/dada2/se-taxa_filtered_table.qza \
 28 | #  12.function_annotation/  
 29 | #mv 12.function_annotation/se-taxa_filtered_table.qza 12.function_annotation/redo-se-taxa_filtered_table.qza
 30 | 
 31 | # Representative sequences
 32 | #cp /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/03.dada_denoise/se-representative_sequences.qza \
 33 | # 12.function_annotation/ 
 34 | #mv 12.function_annotation/se-representative_sequences.qza 12.function_annotation/_se-representative_sequences.qza
 35 | 
 36 | #cp /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/03.redo_dada_denoise/se-representative_sequences.qza \
 37 | # 12.function_annotation/
 38 | #mv 12.function_annotation/se-representative_sequences.qza 12.function_annotation/redo-se-representative_sequences.qza
 39 | 
 40 | #cd  /gpfs0/bioinfo/users/obayomi/hinuman_analysis/16S_illumina/12.function_annotation/
 41 | #source activate qiime2-2020.6
 42 | #qiime tools export --input-path _se-taxa_filtered_table.qza --output-path ./
 43 | #mv feature-table.biom _se-feature-table.biom
 44 | 
 45 | #qiime tools export --input-path redo-se-taxa_filtered_table.qza --output-path ./
 46 | #mv feature-table.biom redo-se-feature-table.biom
 47 | 
 48 | #qiime tools export --input-path _se-representative_sequences.qza --output-path ./
 49 | #mv dna-sequences.fasta _se-rep_set.fna
 50 | 
 51 | #qiime tools export --input-path redo-se-representative_sequences.qza --output-path ./
 52 | #mv dna-sequences.fasta redo-se-rep_set.fna
 53 | 
 54 | 
 55 | source activate picrust2
 56 | PREFIX=("_se" "redo-se")
 57 | REP_SET=("rep_set.fna" "rep_set.fna")
 58 | FEATURE_TABLE=("feature-table.biom" "feature-table.biom")
 59 | 
 60 | 
 61 | function run_picrust(){
 62 | 
 63 | 	local PREFIX=$1
 64 | 	local REP_SET=$2
 65 | 	local FEATURE_TABLE=$3
 66 | 	# Run PICRUST2 pipeline 
 67 | 	picrust2_pipeline.py \
 68 | 		-s ${PREFIX}-${REP_SET} \
 69 | 		-i ${PREFIX}-${FEATURE_TABLE} \
 70 | 		-o ${PREFIX}-picrust2_out_pipeline \
 71 | 		-p 40
 72 | 
 73 | 	# Annotate you enzymes / pathways by adding a description column
 74 | 	add_descriptions.py -i ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat.tsv.gz -m EC \
 75 |                 	    -o ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
 76 | 
 77 | 	add_descriptions.py -i ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat.tsv.gz -m METACYC \
 78 |         	            -o ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat_descrip.tsv.gz
 79 | 
 80 | 	add_descriptions.py -i ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat.tsv.gz -m KO \
 81 |         	            -o ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
 82 | 
 83 | 	# Unzip the prediction files
 84 | 	gunzip ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
 85 | 	gunzip ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat_descrip.tsv.gz
 86 | 	gunzip ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat_descrip.tsv.gz
 87 | 
 88 | 	gunzip ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat.tsv.gz
 89 | 	gunzip ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat.tsv.gz
 90 | 	gunzip ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat.tsv.gz
 91 | 
 92 | 	# Convert to biom
 93 | 	biom convert \
 94 | 		-i ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat.tsv \
 95 | 		-o ${PREFIX}-picrust2_out_pipeline/EC_metagenome_out/pred_metagenome_unstrat.biom \
 96 | 		--table-type="OTU table" \
 97 | 		--to-hdf5
 98 | 
 99 | 	biom convert \
100 | 		-i ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat.tsv \
101 | 		-o ${PREFIX}-picrust2_out_pipeline/KO_metagenome_out/pred_metagenome_unstrat.biom \
102 | 		--table-type="OTU table" \
103 | 		--to-hdf5
104 | 
105 | 	biom convert \
106 | 		-i ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat.tsv \
107 | 		-o ${PREFIX}-picrust2_out_pipeline/pathways_out/path_abun_unstrat.biom \
108 | 		--table-type="OTU table" \
109 | 		--to-hdf5
110 | 
111 | }
112 | 
113 | 
114 | export -f run_picrust
115 | 
116 | parallel --jobs 0 --link run_picrust {1} {2} {3} ::: ${PREFIX[*]} ::: ${REP_SET[*]} ::: ${FEATURE_TABLE[*]}
117 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/outdoors.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	batch	pcr_cycles	medium_or_plant	sterile_plant	grown_with_antibiotics	location	water_washed	surface_sterilization	b12_enriched 	treatment	description
 2 | 1A	1	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 3 | 2A	2	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 4 | 3A	3	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 5 | 4A	4	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 6 | 5A	5	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 7 | 6A	6	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 8 | 7A	7	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 9 | 8A	8	A	24	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
10 | 9A	9	A	24	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
11 | 10A	10	A	24	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
12 | 11A	11	A	24	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
13 | 12A	12	A	24	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
14 | 13A	13	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
15 | 14A	14	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
16 | 15A	15	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
17 | 16A	16	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
18 | 17A	17	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
19 | 18A	18	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
20 | 19A	19	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
21 | 20A	20	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
22 | 22A	22	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
23 | 23A	23	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
24 | 24A	24	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
25 | 25A	25	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
26 | 26A	26	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
27 | 27A	27	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
28 | 28A	28	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
29 | 29A	29	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
30 | 30A	30	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
31 | 31A	31	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
32 | 32A	32	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
33 | 33A	33	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
34 | 34A	34	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
35 | 35A	35	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
36 | 36A	36	A	22	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
37 | 13A-2	59	B	22	 Plant	No	No	outdoors	No	No	NA	outdoor	outdoor_plant_with_pPNA_&_mPNA
38 | 14A-2	60	B	22	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	outdoor_plus_water_washed_plant_with_pPNA_&_mPNA
39 | 15A-2	61	B	22	 Plant	No	No	outdoors	No	No	NA	outdoor	outdoor_plant_with_pPNA_&_mPNA
40 | 16A-2	62	B	22	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	outdoor_plus_water_washed_plant_with_pPNA_&_mPNA
41 | 17A-2	63	B	22	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	outdoor_plus_water_washed_plant_with_pPNA_&_mPNA
42 | 18A-2	64	B	22	 Plant	No	No	outdoors	No	No	NA	outdoor	outdoor_plant_with_pPNA_&_mPNA
43 | 19A-2	65	B	22	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	outdoor_plus_water_washed_plant_with_pPNA_&_mPNA
44 | 20A-2	66	B	22	 Plant	No	No	outdoors	No	No	NA	outdoor	outdoor_plant_with_pPNA_&_mPNA
45 | 21A-2	67	B	22	 Plant	No	No	outdoors	No	No	NA	outdoor	outdoor_plant_with_pPNA_&_mPNA
46 | 22A-2	68	B	22	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	outdoor_plus_water_washed_plant_with_pPNA_&_mPNA
47 | 


--------------------------------------------------------------------------------
/scripts/filter_feature_table.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #$ -S /bin/bash
  3 | #$ -N Filter_features 
  4 | #$ -q bioinfo.q
  5 | #$ -V 
  6 | #$ -cwd 
  7 | #$ -notify 
  8 | #$ -pe shared 10
  9 | 
 10 | set -e 
 11 | 
 12 | # STEPS
 13 | #1. Filter-out singletons and non-target ASVs in the combined table by setting REMOVE_RARE_FEATURES="false"
 14 | #2. Run filter-sample.sh to subset the filtered table by analysis type e.g. indoors, outdoors e.t.c.
 15 | #3. View qsv summary files for each analysis to determine the "Total number of sequences" that will be used to estimate the rare ASVs and also rarefaction depth
 16 | #3. Remove rare ASVs from the feature tables by setting REMOVE_RARE_FEATURES="true"
 17 | 
 18 | source activate qiime2-2020.6
 19 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi'
 20 | #IN_PREFIX=('03.dada_denoise/se' '03.dada_denoise/pear-joined' '03.deblur_denoise/se' '03.deblur_denoise/pear-joined')
 21 | IN_PREFIX=('03.redo_dada_denoise/se' '03.redo_dada_denoise/pear-joined' '03.redo_dada_denoise/pe')
 22 | 
 23 | 
 24 | # For Combined table i.e the original table with indoors, outdoors and mock tables combined
 25 | #OUT_PREFIX=('05.filter_table/dada2/se' '05.filter_table/dada2/pear-joined' '05.filter_table/deblur/se' '05.filter_table/deblur/pear-joined')
 26 | #TAXONOMY_PREFIX=('04.assign_taxonomy/dada2/se' '04.assign_taxonomy/dada2/pear-joined' '04.assign_taxonomy/deblur/se' '04.assign_taxonomy/deblur/pear-joined')
 27 | 
 28 | TAXONOMY_PREFIX=('04.redo_assign_taxonomy/dada2/se' '04.redo_assign_taxonomy/dada2/pear-joined' '04.redo_assign_taxonomy/dada2/pe' )
 29 | 
 30 | 
 31 | 
 32 | #TOTAL_SEQUENCES=(994346 415117 243487 58268) multiply each number by 0.00005 to get the minimum number for filtering rare otus below
 33 | #MIN_FREQUENCY=(50 21 12 3)
 34 | 
 35 | # For the tables that have been split by metadata
 36 | #OUT_PREFIX=('05.filter_table/dada2/indoors/se' '05.filter_table/dada2/indoors/pear-joined' '05.filter_table/deblur/indoors/se' '05.filter_table/deblur/indoors/pear-joined' '05.filter_table/dada2/outdoors/se' '05.filter_table/dada2/outdoors/pear-joined' '05.filter_table/deblur/outdoors/se' '05.filter_table/deblur/outdoors/pear-joined' '05.filter_table/dada2/mock/se' '05.filter_table/dada2/mock/pear-joined' '05.filter_table/deblur/mock/se' '05.filter_table/deblur/mock/pear-joined')
 37 | 
 38 | 
 39 | # All filtered tables
 40 | #OUT_PREFIX=('05.redo_filter_table/dada2/indoors/se' '05.redo_filter_table/dada2/indoors/pear-joined' '05.redo_filter_table/dada2/indoors/pe' '05.redo_filter_table/dada2/outdoors/se' '05.redo_filter_table/dada2/outdoors/pear-joined' '05.redo_filter_table/dada2/outdoors/pe' '05.redo_filter_table/dada2/mock/se' '05.redo_filter_table/dada2/mock/pear-joined' '05.redo_filter_table/dada2/mock/pe' '05.redo_filter_table/dada2/se' '05.redo_filter_table/dada2/pear-joined' '05.redo_filter_table/dada2/pe')
 41 | # combined table
 42 | #OUT_PREFIX=('05.redo_filter_table/dada2/se' '05.redo_filter_table/dada2/pear-joined' '05.redo_filter_table/dada2/pe')
 43 | OUT_PREFIX=(05.{,redo_}filter_table/dada2/{indoors,outdoors,basins}/se)
 44 | 
 45 | #MIN_FREQUENCY=(18 7 5 1 29 10 7 2 3 4 1 1)
 46 | #MIN_FREQUENCY=(26 14 9 41 22 8 4 4 1 71 40 18)
 47 | 
 48 | MIN_FREQUENCY=(4 25 14 5 36 21)
 49 | 
 50 | REMOVE_RARE_FEATURES="true"
 51 | 
 52 | function filter_table(){
 53 | 	
 54 | 	local in_prefix=$1
 55 | 	local out_prefix=$2
 56 | 	local taxonomy_prefix=$3
 57 | 
 58 | 	# Remove singletons
 59 | 	qiime feature-table filter-features \
 60 | 			--i-table ${in_prefix}-table.qza \
 61 | 			--p-min-frequency 2 \
 62 | 			--o-filtered-table ${out_prefix}-noSingleton_filtered_table.qza
 63 | 
 64 | 	qiime feature-table summarize \
 65 | 			--i-table ${out_prefix}-noSingleton_filtered_table.qza \
 66 | 			--o-visualization ${out_prefix}-noSingleton_filtered_table.qzv
 67 | 
 68 | 
 69 | 	# Remove unassigned, archaea, eukaryota, chloroplast and mitochondria taxa
 70 | 	qiime taxa filter-table \
 71 | 		--i-table ${out_prefix}-noSingleton_filtered_table.qza \
 72 | 		--i-taxonomy ${taxonomy_prefix}-taxonomy.qza \
 73 | 		--p-exclude "Unassigned,Chloroplast,Mitochondria,Archaea,Eukaryota" \
 74 | 		--o-filtered-table ${out_prefix}-taxa_filtered_table.qza
 75 | 
 76 | 	# To figure out the total number of sequences ("Total freqency") here equals ${TOTAL_SEQUENCES} e.g. 8,053,326
 77 | 	qiime feature-table summarize \
 78 | 		--i-table ${out_prefix}-taxa_filtered_table.qza \
 79 | 		--o-visualization ${out_prefix}-taxa_filtered_table.qzv
 80 | 
 81 | }
 82 | 
 83 | if [ "${REMOVE_RARE_FEATURES}" == "false" ]; then
 84 | 	# Filter-out singletons and non-target ASVs from the combined table
 85 | 	export -f filter_table
 86 | 	parallel  --jobs 0 --link filter_table {1} {2} {3} ::: ${IN_PREFIX[*]} ::: ${OUT_PREFIX[*]} ::: ${TAXONOMY_PREFIX[*]}
 87 | 
 88 | else
 89 | 	##### Removing rare otus / features with abundance less the 0.005%
 90 | 	parallel --jobs 0 --link qiime feature-table filter-features \
 91 |   				--i-table {1}-taxa_filtered_table.qza \
 92 |   				--p-min-frequency {2} \
 93 |   				--o-filtered-table {1}-filtered_table.qza ::: ${OUT_PREFIX[*]} ::: ${MIN_FREQUENCY[*]}
 94 | 
 95 | 	parallel --jobs 0 --link qiime feature-table summarize \
 96 |                 --i-table {}-filtered_table.qza \
 97 |                 --o-visualization {}-filtered_table.qzv ::: ${OUT_PREFIX[*]}
 98 | 
 99 | fi
100 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/indoors.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	batch	pcr_cycles	medium_or_plant	sterile_plant	grown_with_antibiotics	location	water_washed	surface_sterilization	b12_enriched 	treatment	description
 2 | 37A	37	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 3 | 38A	38	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 4 | 39A	39	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 5 | 40A	40	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 6 | 41A	41	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 7 | 42A	42	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 8 | 43A	43	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
 9 | 44A	44	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
10 | 45A	45	A	NA	 Plant	No	No	indoors	Yes	No	Control 	Control 	Control_indoor_plants_washed_with_water 
11 | 46A	46	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
12 | 47A	47	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
13 | 48A	48	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
14 | 49A	49	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
15 | 50A	50	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
16 | 51A	51	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
17 | 52A	52	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
18 | 53A	53	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
19 | 54A	54	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
20 | 9A-2	55	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
21 | 10A-2	56	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
22 | 11A-2	57	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
23 | 12A-2	58	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
24 | 23A-2	69	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
25 | 25A-2	70	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
26 | 26A-2	71	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
27 | 27A-2	72	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
28 | 28A-2	73	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
29 | 29A-2	74	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
30 | 30A-2	75	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
31 | 31A-2	76	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
32 | 32A-2	77	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
33 | 33A-2	78	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
34 | 34A-2	79	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
35 | 35A-2	80	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
36 | 36A-2	81	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
37 | 37A-2	82	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
38 | 38A-2	83	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
39 | 39A-2	84	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
40 | 40A-2	85	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
41 | 41A-2	86	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
42 | 42A-2	87	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
43 | 43A-2	88	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
44 | 44A-2	89	B	NA	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic
45 | 


--------------------------------------------------------------------------------
/create_DB/eukaryote-unite/Snakefile:
--------------------------------------------------------------------------------
  1 | from os import path,getcwd
  2 | 
  3 | # Run the pipeline line so on your local computer
  4 | # snakemake -npr --cores 10 --keep-going --rerun-incomplete --restart-times 3
  5 | 
  6 | configfile: "config/config.yaml"
  7 | 
  8 | 
  9 | RULES=["Download_unite_database", "Unzip_unite_DB", "modify_and_rename_unite_files","Import_unite_sequences",
 10 |        "Import_unite_taxonomy", "Import_unite_taxonomy", "Train_unite_classifier"]
 11 | 
 12 | 
 13 | 
 14 | rule all:
 15 |     input:
 16 |         "logs/Download_unite_database/",
 17 |         "logs/modify_and_rename_unite_files/",
 18 |         "logs/Train_unite_classifier/",
 19 |         "databases/unite-classifier.qza"
 20 | 
 21 | 
 22 | # This rule will make rule specific log directories
 23 | # # in order to easily store the standard input and stand error
 24 | # # generated when submiting jobs to the cluster
 25 | rule make_logs_directories:
 26 |     output:
 27 |         directory("logs/Download_unite_database/"),
 28 |         directory("logs/modify_and_rename_unite_files/"),
 29 |         directory("logs/Train_unite_classifier/")
 30 |     threads: 1
 31 |     shell:
 32 |         """
 33 |          [ -d logs/ ] || mkdir -p logs/
 34 |          cd logs/
 35 |          for RULE in {RULES}; do
 36 |           [ -d ${{RULE}}/ ] || mkdir -p ${{RULE}}/
 37 |          done
 38 |         """
 39 | 
 40 | 
 41 | 
 42 | # --------------- Create Unite database for QIIME2 -----------------#
 43 | 
 44 | rule Download_unite_database:
 45 |     input:
 46 |         log_dirs=rules.make_logs_directories.output
 47 |     output:
 48 |         temp("databases/unite.gz")
 49 |     threads: 1
 50 |     log: "logs/Download_unite_database/Download_unite_database.log"
 51 |     params:
 52 |         url=config["UNITE_URL"]
 53 |     shell:
 54 |         "wget -O {output} {params.url} > {log} 2>&1 " 
 55 | 
 56 | rule Unzip_unite_DB:
 57 |     input: rules.Download_unite_database.output
 58 |     output: 
 59 |         sequences="databases/sh_qiime_release_s_all_10.05.2021/sh_refs_qiime_ver8_dynamic_s_all_10.05.2021.fasta",
 60 |         taxonomy="databases/sh_qiime_release_s_all_10.05.2021/sh_taxonomy_qiime_ver8_dynamic_s_all_10.05.2021.txt"
 61 |     threads: 1
 62 |     #log: "logs/Unzip_unite_DB/Unzip_unite_DB.log"
 63 |     params:
 64 |         out_dir=lambda w, input: path.dirname(input[0]),
 65 |         basename=lambda w, input: path.basename(input[0])
 66 |     shell:
 67 |         """
 68 |         cd {params.out_dir}
 69 |         #[ -f {log} ] || touch {log}
 70 |         tar -xvzf {params.basename}
 71 |         """
 72 | 
 73 | # modify the taxonomy header such that the first two lines 
 74 | # are "Feature ID\tTaxon"
 75 | # and rename the sequence.fasta file
 76 | rule modify_and_rename_unite_files:
 77 |     input:
 78 |         sequences=rules.Unzip_unite_DB.output.sequences,
 79 |         taxonomy=rules.Unzip_unite_DB.output.taxonomy
 80 |     output:
 81 |         sequences="databases/unite-sequences.fasta",
 82 |         taxonomy="databases/unite-taxonomy.txt"
 83 |     threads: 2
 84 |     log: "logs/modify_and_rename_unite_files/modify_and_rename_unite_files.log"
 85 |     shell:
 86 |         """
 87 |         # Modify and rename the unite taxonomy file
 88 |          (echo -e "Feature ID\tTaxon"; cat {input.taxonomy}) > {output.taxonomy} 2> {log}
 89 | 
 90 |         # Copy and rename the unite sequences file
 91 |         cat {input.sequences} > {output.sequences} 2> {log}
 92 |         """
 93 | 
 94 | 
 95 | # Setting up the already trimmed database
 96 | rule Import_unite_sequences:
 97 |     input: rules.modify_and_rename_unite_files.output.sequences
 98 |     output: "databases/unite-sequences.qza"
 99 |     threads: 2
100 |     log: "logs/Import_unite_sequences/Import_unite_sequences.log"
101 |     params:
102 |         conda_activate=config["QIIME2_ENV"]
103 |     shell:
104 |         """
105 |         set +u
106 |  
107 |         {params.conda_activate}
108 |  
109 |         set -u
110 | 
111 |         qiime tools import \
112 |              --type 'FeatureData[Sequence]' \
113 |              --input-path {input} \
114 |              --output-path {output} > {log} 2>&1
115 |         """
116 | 
117 |         
118 |         
119 | # Import Taxonomy
120 | rule Import_unite_taxonomy:
121 |     input: rules.modify_and_rename_unite_files.output.taxonomy
122 |     output: "databases/unite-taxonomy.qza"
123 |     threads: 2
124 |     log: "logs/Import_unite_taxonomy/Import_unite_taxonomy.log"
125 |     params:
126 |         conda_activate=config["QIIME2_ENV"]
127 |     shell:
128 |         """
129 |         set +u
130 | 
131 |         {params.conda_activate}
132 | 
133 |         set -u
134 | 
135 |         qiime tools import \
136 |              --type 'FeatureData[Taxonomy]' \
137 |              --input-path {input} \
138 |              --output-path {output} > {log} 2>&1
139 |         """
140 | 
141 | 
142 | # Train the classifier
143 | rule Train_unite_classifier:
144 |     input: 
145 |         sequences=rules.Import_unite_sequences.output,
146 |         taxonomy=rules.Import_unite_taxonomy.output
147 |     output: "databases/unite-classifier.qza"
148 |     threads: 10
149 |     log: "logs/Train_unite_classifier/Train_unite_classifier.log"
150 |     params:
151 |         conda_activate=config["QIIME2_ENV"]
152 |     shell:
153 |         """
154 |         set +u
155 | 
156 |         {params.conda_activate}
157 | 
158 |         set -u
159 |         
160 |         qiime feature-classifier fit-classifier-naive-bayes \
161 |               --i-reference-reads {input.sequences} \
162 |               --i-reference-taxonomy {input.taxonomy} \
163 |               --o-classifier {output} > {log} 2>&1
164 |         """
165 | 
166 | 
167 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/outdoors.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	batch	pcr_cycles	medium_or_plant	sterile_plant	grown_with_antibiotics	location	water_washed	surface_sterilization	b12_enriched 	treatment	description
 2 | Osnat001-1-A	1	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 3 | Osnat002-2-A	2	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 4 | Osnat003-3-A	3	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 5 | Osnat004-4-A	4	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 6 | Osnat005-5-A	5	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 7 | Osnat006-6-A	6	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 8 | Osnat007-7-A	7	A	22	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
 9 | Osnat008-8-A	8	A	24	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
10 | Osnat009-9-A	9	A	24	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
11 | Osnat010-10-A	10	A	24	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
12 | Osnat011-11-A	11	A	24	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
13 | Osnat012-12-A	12	A	24	Medium	No	No	outdoors	NA	NA	NA	Medium	Filtered_by_Avital_with_pPNA_&_mPNA
14 | Osnat013-13-A	13	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
15 | Osnat014-14-A	14	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
16 | Osnat015-15-A	15	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
17 | Osnat016-16-A	16	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
18 | Osnat017-17-A	17	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
19 | Osnat018-18-A	18	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
20 | Osnat019-19-A	19	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
21 | Osnat020-20-A	20	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
22 | Osnat021-22-A	22	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
23 | Osnat022-23-A	23	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
24 | Osnat023-24-A	24	A	24	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	Washed_with_Sterile_DW_for_20sec_by_Ilan_with_pPNA_&_mPNA
25 | Osnat024-25-A	25	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
26 | Osnat025-26-A	26	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
27 | Osnat026-27-A	27	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
28 | Osnat027-28-A	28	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
29 | Osnat028-29-A	29	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
30 | Osnat029-30-A	30	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
31 | Osnat030-31-A	31	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
32 | Osnat031-32-A	32	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
33 | Osnat032-33-A	33	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
34 | Osnat033-34-A	34	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
35 | Osnat034-35-A	35	A	24	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
36 | Osnat035-36-A	36	A	22	 Plant	No	No	outdoors	Yes	Yes	NA	outdoor+water_washed+surface_sterile	washed_with_7%_H2O2_for_10min_by_Ilan_with_pPNA_&_mPNA
37 | Osnat058-13-A-2	59	B	22	 Plant	No	No	outdoors	No	No	NA	outdoor	outdoor_plant_with_pPNA_&_mPNA
38 | Osnat059-14-A-2	60	B	22	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	outdoor_plus_water_washed_plant_with_pPNA_&_mPNA
39 | Osnat060-15-A-2	61	B	22	 Plant	No	No	outdoors	No	No	NA	outdoor	outdoor_plant_with_pPNA_&_mPNA
40 | Osnat061-16-A-2	62	B	22	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	outdoor_plus_water_washed_plant_with_pPNA_&_mPNA
41 | Osnat062-17-A-2	63	B	22	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	outdoor_plus_water_washed_plant_with_pPNA_&_mPNA
42 | Osnat063-18-A-2	64	B	22	 Plant	No	No	outdoors	No	No	NA	outdoor	outdoor_plant_with_pPNA_&_mPNA
43 | Osnat064-19-A-2	65	B	22	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	outdoor_plus_water_washed_plant_with_pPNA_&_mPNA
44 | Osnat065-20-A-2	66	B	22	 Plant	No	No	outdoors	No	No	NA	outdoor	outdoor_plant_with_pPNA_&_mPNA
45 | Osnat066-21-A-2	67	B	22	 Plant	No	No	outdoors	No	No	NA	outdoor	outdoor_plant_with_pPNA_&_mPNA
46 | Osnat067-22-A-2	68	B	22	 Plant	No	No	outdoors	Yes	No	NA	outdoor+water_washed	outdoor_plus_water_washed_plant_with_pPNA_&_mPNA
47 | 


--------------------------------------------------------------------------------
/00.mapping/first_analysis/pe-dada2/indoors.tsv:
--------------------------------------------------------------------------------
 1 | sample-id	sample_number	batch	pcr_cycles	medium_or_plant	sterile_plant	grown_with_antibiotics	location	water_washed	surface_sterilization	b12_enriched 	treatment	description
 2 | Osnat036-37-A	37	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 3 | Osnat037-38-A	38	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 4 | Osnat038-39-A	39	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 5 | Osnat039-40-A	40	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 6 | Osnat040-41-A	41	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 7 | Osnat041-42-A	42	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 8 | Osnat042-43-A	43	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
 9 | Osnat043-44-A	44	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
10 | Osnat044-45-A	45	A	NA	 Plant	No	No	indoors	Yes	No	Control 	control 	Control_indoor_plants_washed_with_water 
11 | Osnat045-46-A	46	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
12 | Osnat046-47-A	47	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
13 | Osnat047-48-A	48	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
14 | Osnat048-49-A	49	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
15 | Osnat049-50-A	50	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
16 | Osnat050-51-A	51	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
17 | Osnat051-52-A	52	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
18 | Osnat052-53-A	53	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
19 | Osnat053-54-A	54	A	NA	 Plant	No	No	indoors	Yes	No	B12_Enriched 	indoor+water_washed+B12	Modified_Hinoman_medium_with_B12_enriched
20 | Osnat054-9-A-2	55	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
21 | Osnat055-10-A-2	56	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
22 | Osnat056-11-A-2	57	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
23 | Osnat057-12-A-2	58	B	22	 Plant	No	No	indoors	Yes	No	NA	indoor+water_washed	indoor_plus_water_washed_plant_with_pPNA_&_mPNA
24 | Osnat068-23-A-2	69	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
25 | Osnat069-25-A-2	70	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
26 | Osnat070-26-A-2	71	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
27 | Osnat071-27-A-2	72	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
28 | Osnat072-28-A-2	73	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
29 | Osnat073-29-A-2	74	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
30 | Osnat074-30-A-2	75	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
31 | Osnat075-31-A-2	76	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
32 | Osnat076-32-A-2	77	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
33 | Osnat077-33-A-2	78	B	22	 Plant	Yes	No	indoors	No	No	NA	indoor+sterile	indoor_plus_sterile_plant_with_pPNA_&_mPNA
34 | Osnat078-34-A-2	79	B	22	 Plant	Yes	No	indoors	Yes	No	NA	indoor+water_washed+sterile	indoor_plus_water_washed_sterile_plant_with_pPNA_&_mPNA
35 | Osnat079-35-A-2	80	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
36 | Osnat080-36-A-2	81	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
37 | Osnat081-37-A-2	82	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
38 | Osnat082-38-A-2	83	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
39 | Osnat083-39-A-2	84	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
40 | Osnat084-40-A-2	85	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
41 | Osnat085-41-A-2	86	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
42 | Osnat086-42-A-2	87	B	22	 Plant	Yes	Yes	indoors	Yes	No	NA	indoor+water_washed+sterile+antibiotic	indoor_plus_water_washed_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
43 | Osnat087-43-A-2	88	B	22	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic_with_pPNA_&_mPNA
44 | Osnat088-44-A-2	89	B	NA	 Plant	Yes	Yes	indoors	No	No	NA	indoor+sterile+antibiotic	indoor_plus_sterile_plant_grown_with_antibiotic
45 | 


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
  1 | sample_file: "config/sample.tsv"
  2 | metadata: "00.mapping/metadata.tsv"
  3 | mail: "obadbotanist@yahoo.com" # A mere label
  4 | samples: ["A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "A10", 
  5 |           "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9_1", "B9_2",
  6 |           "B10_1", "B10_2", "C1", "C2", "C3", "C4", "C5", "C6", "C7", 
  7 |           "C8", "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", 
  8 |           "D10", "E1", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9",
  9 |           "E10", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", 
 10 |           "F10", "G1", "G2", "G3", "G4", "G5", "G6", "G7", "G8", "G9", "G10"]
 11 |  # List your sample names here - see the README.md file for an easy way to create this list
 12 | project_dir: "/scratch/user/obayomi/projects/Zebrafish/16S_trimmed_unmerged"
 13 | # what type of amplicon are we analyzing
 14 | # options are
 15 | # "16S", "18S" and "ITS"
 16 | amplicon: "16S" # "ITS" 
 17 | # A coloumn in metada for grouping bar plot and for statistics
 18 | category: "Treatment"
 19 | # Three possible mode
 20 | # pair - paired-end reads without joining
 21 | # single - single end reads, joining of unnecessary
 22 | # merge - merge paired end reads
 23 | # if you will select to join the reads 
 24 | # # make sure to modify the -m -t flags of pear in the run_pear.pl script 
 25 | # before running the workflow
 26 | 
 27 | mode: "pair" # "pair", "single" or "merge"
 28 | RENAME_FILES: false # should your input files be renamed if they don't follow the requirement of 01.raw_data/{SAMPLE}_R{1|2}.fastq.gz
 29 | 
 30 | # What method should be used in merging reads
 31 | # options are "pear" or "vsearch"
 32 | # for merging with pear or vsearch, repectively
 33 | merge_method: "pear"
 34 | 
 35 | # ASV or zoTUs denoising and clustering method. Can be "dada2" or "deblur"
 36 | denoise_method: "dada2"
 37 | 
 38 | # path to your manifest file - see the example folder for examples
 39 | MANIFEST:  "01.raw_data/MANIFEST"
 40 | project_name: "Zebrafish" # This has no use in the pipeline just help to keep records
 41 | 
 42 | # # Add this line to everdy script to avoid device out of space error
 43 | TEMP_DIR: "export TEMPDIR=/scratch/user/obayomi/projects/Zebrafish/tmp/ TMPDIR=/scratch/user/obayomi/projects/Zebrafish/tmp/"
 44 | 
 45 | # set the path to the appropriate classifier for assigning taxonomy
 46 | # Here i chose the classifier for silva for bacteria (16S) and protist (18S) analysis
 47 | # For Fungi set to the path of a pre trained unite database classifier
 48 | classifier:  "/scratch/user/obayomi/projects/qiime2/create_DB/databases/silva-138-99-nb-classifier.qza"
 49 | 
 50 | # To figure out the total number of sequences ("Total freqency") 
 51 | # to be used to determine the minuminum frequency for filtering out
 52 | # rare taxa, examine "08.Filter_feature_table/taxa_filtered_table.qzv".
 53 | # To calculate, multiply the total number of sequences by 0.00005 (0.005%)
 54 | # Assign the result of your calulation below as the minimum frequency 
 55 | # for filtering out rare taxa
 56 | # 741,904 * 0.00005 = 37.0952
 57 | minimum_frequency: 37
 58 | 
 59 | # Change this and re-run core diversity step if needed. Determine this number by
 60 | # examiming "08.Filter_feature_table/filtered_table.qzv". Either choose the
 61 | # minimum sequence count or choose the minimum sequence count 
 62 | # that will be enough to capture the diversity of your samples and still
 63 | # not lose a lot of samples
 64 | rarefaction_depth: 123
 65 | 
 66 | 
 67 | # Full paths to the specified programs
 68 | programs_path:
 69 |     multiqc: "/scratch/user/obayomi/.conda/envs/bioinfo/bin/multiqc"
 70 |     fastqc: "/scratch/user/obayomi/.conda/envs/bioinfo/bin/fastqc"
 71 |     parallel: "/scratch/user/obayomi/.conda/envs/bioinfo/bin/parallel"
 72 |     run_pear:  "pear" #"/scratch/user/obayomi/projects/qiime2/run_pear.pl"
 73 | 
 74 | # Set tool specific parameters
 75 | parameters:
 76 |     vsearch:
 77 |         join_pairs:
 78 |             truncqual: 20
 79 |             minimum_length: 400
 80 |             maximum_Ns: 20
 81 |             minimum_merge_length: 400
 82 |             minimum_merge_length: 600
 83 |     dada2:
 84 |         mode: "single" # "single" or "paired"
 85 |         trunc_length_forward: 200 # 280 #220 # this will be determined after visulaizing the quality plot where quality score is >= 20
 86 |         trunc_length_reverse: 140 #180
 87 |         trim_length_forward: 0
 88 |         trim_length_reverse: 0
 89 |         maximum_forward_error: 4
 90 |         maximum_reverse_error: 4
 91 |         threads: 28
 92 | 
 93 |     # --p-trim-length n which truncates the sequences at position n
 94 |     # In general, the Deblur developers recommend setting this value
 95 |     # to a length where the median quality score begins to drop too low 
 96 |     deblur:
 97 |         trunc_length: 40
 98 |     # Parameters to argument of qiime feature-table group
 99 |     # when grouping the feature table for making grouped taxa barplots
100 |     group_taxa_plot:
101 |         category: "Treatment" # --m-metadata-column argument
102 |         mode: "sum" # --p-mode argument
103 |         metadata: "00.mapping/treatment-metadata.tsv" # a 2-column or more metadata for grouping bar plots ['sample-id', 'treatment']
104 |     beta_diversity_significance:
105 |          categories: "Treatment" 
106 |    # Adators and primer trimming using cutadapt
107 |     cutadapt:
108 |         forward_primer: "GTGYCAGCMGCCGCGGTAA"
109 |         reverse_primer: "GGACTACNVGGGTWTCTAAT"
110 |         cores: 10
111 |     fastree:
112 |         threads: 28
113 |     assign_taxonomy:
114 |         threads: 28
115 |     picrust:
116 |         threads: 28
117 |     pear:
118 |         min_assembly: 150
119 |         max_assembly: 300
120 |         min_trim: 150
121 |         threads: 8
122 | 
123 | conda:
124 |     qiime2:
125 |         env: "module purge; module load Anaconda3/2020.07; source activate /sw/hprc/sw/Anaconda3/2020.07/envs/qiime2-2021.2"
126 |         perl5lib: "export PERL5LIB=/sw/hprc/sw/Anaconda3/2020.07/envs/qiime2-2021.2/lib/site_perl/5.26.2/x86_64-linux-thread-multi"
127 |     picrust2:
128 |         env: "module purge; module load Anaconda3/2020.07; source activate /scratch/user/obayomi/.conda/envs/picrust2"
129 |         perl5lib: "export PERL5LIB=/scratch/user/obayomi/.conda/envs/picrust2/lib/site_perl/5.26.2/x86_64-linux-thread-multi"
130 |     bioinfo:
131 |         env: "module purge; module load Anaconda3/2020.07; source activate /scratch/user/obayomi/.conda/envs/bioinfo"
132 |         perl5lib: "export PERL5LIB=/scratch/user/obayomi/.conda/envs/bioinfo/lib/5.26.2"
133 |     pear:
134 |         env: "module purge; module load GCCcore/9.3.0 PEAR/0.9.11"
135 | 
136 | 


--------------------------------------------------------------------------------
/scripts/qiime2_api.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from qiime2.plugins import feature_table\n",
 10 |     "from qiime2 import Artifact\n",
 11 |     "import biom\n",
 12 |     "import pandas as pd\n",
 13 |     "from qiime2.plugins import diversity\n",
 14 |     "from qiime2 import Metadata"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 3,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Load an artifact. In this case a feature table i.e ASV or OTU table\n",
 24 |     "unrarefied_table = Artifact.load('../04.filter_table/noChlr_noMitoch_noSingleton_filtered_table.qza')"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 5,
 30 |    "metadata": {},
 31 |    "outputs": [
 32 |     {
 33 |      "name": "stdout",
 34 |      "output_type": "stream",
 35 |      "text": [
 36 |       "# Constructed from biom file\n",
 37 |       "#OTU ID\tSRR3202913\tSRR3202914\tSRR3202915\tSRR3202916\tSRR3202917\n",
 38 |       "65fb08bed0eeb24cfff33eeedfad522f\t0.0\t0.0\t0.0\t0.0\t0.0\n",
 39 |       "1ca86d303424bc40036ec3cdac72d8ad\t0.0\t0.0\t0.0\t0.0\t0.0\n",
 40 |       "4a0f23475dad7251063a5a39cf12d27f\t0.0\t0.0\t0.0\t0.0\t0.0\n",
 41 |       "36bb069fa6345961fc82056566252ace\t0.0\t0.0\t0.0\t0.0\t0.0\n",
 42 |       "58a85ad58122a7097ce75583f34d8626\t0.0\t0.0\t0.0\t0.0\t0.0\n"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "biom_table = unrarefied_table.view(biom.Table)\n",
 48 |     "print(biom_table.head())"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 6,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/plain": [
 59 |        "['__class__',\n",
 60 |        " '__delattr__',\n",
 61 |        " '__dict__',\n",
 62 |        " '__dir__',\n",
 63 |        " '__doc__',\n",
 64 |        " '__eq__',\n",
 65 |        " '__format__',\n",
 66 |        " '__ge__',\n",
 67 |        " '__getattribute__',\n",
 68 |        " '__getitem__',\n",
 69 |        " '__gt__',\n",
 70 |        " '__hash__',\n",
 71 |        " '__init__',\n",
 72 |        " '__init_subclass__',\n",
 73 |        " '__iter__',\n",
 74 |        " '__le__',\n",
 75 |        " '__lt__',\n",
 76 |        " '__module__',\n",
 77 |        " '__ne__',\n",
 78 |        " '__new__',\n",
 79 |        " '__reduce__',\n",
 80 |        " '__reduce_ex__',\n",
 81 |        " '__repr__',\n",
 82 |        " '__setattr__',\n",
 83 |        " '__sizeof__',\n",
 84 |        " '__str__',\n",
 85 |        " '__subclasshook__',\n",
 86 |        " '__weakref__',\n",
 87 |        " '_axis_to_num',\n",
 88 |        " '_cast_metadata',\n",
 89 |        " '_conv_to_self_type',\n",
 90 |        " '_data',\n",
 91 |        " '_data_equality',\n",
 92 |        " '_extract_data_from_tsv',\n",
 93 |        " '_get_col',\n",
 94 |        " '_get_row',\n",
 95 |        " '_get_sparse_data',\n",
 96 |        " '_index',\n",
 97 |        " '_index_ids',\n",
 98 |        " '_intersect_id_order',\n",
 99 |        " '_invert_axis',\n",
100 |        " '_iter_obs',\n",
101 |        " '_iter_samp',\n",
102 |        " '_obs_index',\n",
103 |        " '_observation_group_metadata',\n",
104 |        " '_observation_ids',\n",
105 |        " '_observation_metadata',\n",
106 |        " '_sample_group_metadata',\n",
107 |        " '_sample_ids',\n",
108 |        " '_sample_index',\n",
109 |        " '_sample_metadata',\n",
110 |        " '_to_dense',\n",
111 |        " '_to_sparse',\n",
112 |        " '_union_id_order',\n",
113 |        " 'add_group_metadata',\n",
114 |        " 'add_metadata',\n",
115 |        " 'align_to',\n",
116 |        " 'collapse',\n",
117 |        " 'concat',\n",
118 |        " 'copy',\n",
119 |        " 'create_date',\n",
120 |        " 'data',\n",
121 |        " 'del_metadata',\n",
122 |        " 'delimited_self',\n",
123 |        " 'descriptive_equality',\n",
124 |        " 'dtype',\n",
125 |        " 'exists',\n",
126 |        " 'filter',\n",
127 |        " 'format_version',\n",
128 |        " 'from_hdf5',\n",
129 |        " 'from_json',\n",
130 |        " 'from_tsv',\n",
131 |        " 'generated_by',\n",
132 |        " 'get_table_density',\n",
133 |        " 'get_value_by_ids',\n",
134 |        " 'group_metadata',\n",
135 |        " 'head',\n",
136 |        " 'ids',\n",
137 |        " 'index',\n",
138 |        " 'is_empty',\n",
139 |        " 'iter',\n",
140 |        " 'iter_data',\n",
141 |        " 'iter_pairwise',\n",
142 |        " 'length',\n",
143 |        " 'matrix_data',\n",
144 |        " 'max',\n",
145 |        " 'merge',\n",
146 |        " 'metadata',\n",
147 |        " 'metadata_to_dataframe',\n",
148 |        " 'min',\n",
149 |        " 'nnz',\n",
150 |        " 'nonzero',\n",
151 |        " 'nonzero_counts',\n",
152 |        " 'norm',\n",
153 |        " 'pa',\n",
154 |        " 'partition',\n",
155 |        " 'rankdata',\n",
156 |        " 'reduce',\n",
157 |        " 'remove_empty',\n",
158 |        " 'shape',\n",
159 |        " 'sort',\n",
160 |        " 'sort_order',\n",
161 |        " 'subsample',\n",
162 |        " 'sum',\n",
163 |        " 'table_id',\n",
164 |        " 'to_dataframe',\n",
165 |        " 'to_hdf5',\n",
166 |        " 'to_json',\n",
167 |        " 'to_tsv',\n",
168 |        " 'transform',\n",
169 |        " 'transpose',\n",
170 |        " 'type',\n",
171 |        " 'update_ids']"
172 |       ]
173 |      },
174 |      "execution_count": 6,
175 |      "metadata": {},
176 |      "output_type": "execute_result"
177 |     }
178 |    ],
179 |    "source": [
180 |     "dir(biom_table)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 7,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "data": {
190 |       "text/plain": [
191 |        "Results (name = value)\n",
192 |        "-----------------------------------------------------------------------------------------\n",
193 |        "visualization = <visualization: Visualization uuid: 263155b8-cf49-4f4e-8ffa-a2394a949cfd>"
194 |       ]
195 |      },
196 |      "execution_count": 7,
197 |      "metadata": {},
198 |      "output_type": "execute_result"
199 |     }
200 |    ],
201 |    "source": [
202 |     "feature_table.actions.filter_samples()"
203 |    ]
204 |   }
205 |  ],
206 |  "metadata": {
207 |   "kernelspec": {
208 |    "display_name": "Python 3",
209 |    "language": "python",
210 |    "name": "python3"
211 |   },
212 |   "language_info": {
213 |    "codemirror_mode": {
214 |     "name": "ipython",
215 |     "version": 3
216 |    },
217 |    "file_extension": ".py",
218 |    "mimetype": "text/x-python",
219 |    "name": "python",
220 |    "nbconvert_exporter": "python",
221 |    "pygments_lexer": "ipython3",
222 |    "version": "3.6.7"
223 |   }
224 |  },
225 |  "nbformat": 4,
226 |  "nbformat_minor": 2
227 | }
228 | 


--------------------------------------------------------------------------------
/scripts/.ipynb_checkpoints/qiime2_api-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from qiime2.plugins import feature_table\n",
 10 |     "from qiime2 import Artifact\n",
 11 |     "import biom\n",
 12 |     "import pandas as pd\n",
 13 |     "from qiime2.plugins import diversity\n",
 14 |     "from qiime2 import Metadata"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 3,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Load an artifact. In this case a feature table i.e ASV or OTU table\n",
 24 |     "unrarefied_table = Artifact.load('../04.filter_table/noChlr_noMitoch_noSingleton_filtered_table.qza')"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 5,
 30 |    "metadata": {},
 31 |    "outputs": [
 32 |     {
 33 |      "name": "stdout",
 34 |      "output_type": "stream",
 35 |      "text": [
 36 |       "# Constructed from biom file\n",
 37 |       "#OTU ID\tSRR3202913\tSRR3202914\tSRR3202915\tSRR3202916\tSRR3202917\n",
 38 |       "65fb08bed0eeb24cfff33eeedfad522f\t0.0\t0.0\t0.0\t0.0\t0.0\n",
 39 |       "1ca86d303424bc40036ec3cdac72d8ad\t0.0\t0.0\t0.0\t0.0\t0.0\n",
 40 |       "4a0f23475dad7251063a5a39cf12d27f\t0.0\t0.0\t0.0\t0.0\t0.0\n",
 41 |       "36bb069fa6345961fc82056566252ace\t0.0\t0.0\t0.0\t0.0\t0.0\n",
 42 |       "58a85ad58122a7097ce75583f34d8626\t0.0\t0.0\t0.0\t0.0\t0.0\n"
 43 |      ]
 44 |     }
 45 |    ],
 46 |    "source": [
 47 |     "biom_table = unrarefied_table.view(biom.Table)\n",
 48 |     "print(biom_table.head())"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 6,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "data": {
 58 |       "text/plain": [
 59 |        "['__class__',\n",
 60 |        " '__delattr__',\n",
 61 |        " '__dict__',\n",
 62 |        " '__dir__',\n",
 63 |        " '__doc__',\n",
 64 |        " '__eq__',\n",
 65 |        " '__format__',\n",
 66 |        " '__ge__',\n",
 67 |        " '__getattribute__',\n",
 68 |        " '__getitem__',\n",
 69 |        " '__gt__',\n",
 70 |        " '__hash__',\n",
 71 |        " '__init__',\n",
 72 |        " '__init_subclass__',\n",
 73 |        " '__iter__',\n",
 74 |        " '__le__',\n",
 75 |        " '__lt__',\n",
 76 |        " '__module__',\n",
 77 |        " '__ne__',\n",
 78 |        " '__new__',\n",
 79 |        " '__reduce__',\n",
 80 |        " '__reduce_ex__',\n",
 81 |        " '__repr__',\n",
 82 |        " '__setattr__',\n",
 83 |        " '__sizeof__',\n",
 84 |        " '__str__',\n",
 85 |        " '__subclasshook__',\n",
 86 |        " '__weakref__',\n",
 87 |        " '_axis_to_num',\n",
 88 |        " '_cast_metadata',\n",
 89 |        " '_conv_to_self_type',\n",
 90 |        " '_data',\n",
 91 |        " '_data_equality',\n",
 92 |        " '_extract_data_from_tsv',\n",
 93 |        " '_get_col',\n",
 94 |        " '_get_row',\n",
 95 |        " '_get_sparse_data',\n",
 96 |        " '_index',\n",
 97 |        " '_index_ids',\n",
 98 |        " '_intersect_id_order',\n",
 99 |        " '_invert_axis',\n",
100 |        " '_iter_obs',\n",
101 |        " '_iter_samp',\n",
102 |        " '_obs_index',\n",
103 |        " '_observation_group_metadata',\n",
104 |        " '_observation_ids',\n",
105 |        " '_observation_metadata',\n",
106 |        " '_sample_group_metadata',\n",
107 |        " '_sample_ids',\n",
108 |        " '_sample_index',\n",
109 |        " '_sample_metadata',\n",
110 |        " '_to_dense',\n",
111 |        " '_to_sparse',\n",
112 |        " '_union_id_order',\n",
113 |        " 'add_group_metadata',\n",
114 |        " 'add_metadata',\n",
115 |        " 'align_to',\n",
116 |        " 'collapse',\n",
117 |        " 'concat',\n",
118 |        " 'copy',\n",
119 |        " 'create_date',\n",
120 |        " 'data',\n",
121 |        " 'del_metadata',\n",
122 |        " 'delimited_self',\n",
123 |        " 'descriptive_equality',\n",
124 |        " 'dtype',\n",
125 |        " 'exists',\n",
126 |        " 'filter',\n",
127 |        " 'format_version',\n",
128 |        " 'from_hdf5',\n",
129 |        " 'from_json',\n",
130 |        " 'from_tsv',\n",
131 |        " 'generated_by',\n",
132 |        " 'get_table_density',\n",
133 |        " 'get_value_by_ids',\n",
134 |        " 'group_metadata',\n",
135 |        " 'head',\n",
136 |        " 'ids',\n",
137 |        " 'index',\n",
138 |        " 'is_empty',\n",
139 |        " 'iter',\n",
140 |        " 'iter_data',\n",
141 |        " 'iter_pairwise',\n",
142 |        " 'length',\n",
143 |        " 'matrix_data',\n",
144 |        " 'max',\n",
145 |        " 'merge',\n",
146 |        " 'metadata',\n",
147 |        " 'metadata_to_dataframe',\n",
148 |        " 'min',\n",
149 |        " 'nnz',\n",
150 |        " 'nonzero',\n",
151 |        " 'nonzero_counts',\n",
152 |        " 'norm',\n",
153 |        " 'pa',\n",
154 |        " 'partition',\n",
155 |        " 'rankdata',\n",
156 |        " 'reduce',\n",
157 |        " 'remove_empty',\n",
158 |        " 'shape',\n",
159 |        " 'sort',\n",
160 |        " 'sort_order',\n",
161 |        " 'subsample',\n",
162 |        " 'sum',\n",
163 |        " 'table_id',\n",
164 |        " 'to_dataframe',\n",
165 |        " 'to_hdf5',\n",
166 |        " 'to_json',\n",
167 |        " 'to_tsv',\n",
168 |        " 'transform',\n",
169 |        " 'transpose',\n",
170 |        " 'type',\n",
171 |        " 'update_ids']"
172 |       ]
173 |      },
174 |      "execution_count": 6,
175 |      "metadata": {},
176 |      "output_type": "execute_result"
177 |     }
178 |    ],
179 |    "source": [
180 |     "dir(biom_table)"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": 7,
186 |    "metadata": {},
187 |    "outputs": [
188 |     {
189 |      "data": {
190 |       "text/plain": [
191 |        "Results (name = value)\n",
192 |        "-----------------------------------------------------------------------------------------\n",
193 |        "visualization = <visualization: Visualization uuid: 263155b8-cf49-4f4e-8ffa-a2394a949cfd>"
194 |       ]
195 |      },
196 |      "execution_count": 7,
197 |      "metadata": {},
198 |      "output_type": "execute_result"
199 |     }
200 |    ],
201 |    "source": [
202 |     "feature_table.actions.filter_samples()"
203 |    ]
204 |   }
205 |  ],
206 |  "metadata": {
207 |   "kernelspec": {
208 |    "display_name": "Python 3",
209 |    "language": "python",
210 |    "name": "python3"
211 |   },
212 |   "language_info": {
213 |    "codemirror_mode": {
214 |     "name": "ipython",
215 |     "version": 3
216 |    },
217 |    "file_extension": ".py",
218 |    "mimetype": "text/x-python",
219 |    "name": "python",
220 |    "nbconvert_exporter": "python",
221 |    "pygments_lexer": "ipython3",
222 |    "version": "3.6.7"
223 |   }
224 |  },
225 |  "nbformat": 4,
226 |  "nbformat_minor": 2
227 | }
228 | 


--------------------------------------------------------------------------------
/examples/single-MANIFEST:
--------------------------------------------------------------------------------
 1 | sample-id,absolute-filepath,direction
 2 | 1A,$PWD/sequence_data/Osnat001-1-A_S1_L001_R1_001.fastq.gz,forward
 3 | 2A,$PWD/sequence_data/Osnat002-2-A_S2_L001_R1_001.fastq.gz,forward
 4 | 3A,$PWD/sequence_data/Osnat003-3-A_S3_L001_R1_001.fastq.gz,forward
 5 | 4A,$PWD/sequence_data/Osnat004-4-A_S4_L001_R1_001.fastq.gz,forward
 6 | 5A,$PWD/sequence_data/Osnat005-5-A_S5_L001_R1_001.fastq.gz,forward
 7 | 6A,$PWD/sequence_data/Osnat006-6-A_S6_L001_R1_001.fastq.gz,forward
 8 | 7A,$PWD/sequence_data/Osnat007-7-A_S7_L001_R1_001.fastq.gz,forward
 9 | 8A,$PWD/sequence_data/Osnat008-8-A_S8_L001_R1_001.fastq.gz,forward
10 | 9A,$PWD/sequence_data/Osnat009-9-A_S9_L001_R1_001.fastq.gz,forward
11 | 10A,$PWD/sequence_data/Osnat010-10-A_S10_L001_R1_001.fastq.gz,forward
12 | 11A,$PWD/sequence_data/Osnat011-11-A_S11_L001_R1_001.fastq.gz,forward
13 | 12A,$PWD/sequence_data/Osnat012-12-A_S12_L001_R1_001.fastq.gz,forward
14 | 13A,$PWD/sequence_data/Osnat013-13-A_S13_L001_R1_001.fastq.gz,forward
15 | 14A,$PWD/sequence_data/Osnat014-14-A_S14_L001_R1_001.fastq.gz,forward
16 | 15A,$PWD/sequence_data/Osnat015-15-A_S15_L001_R1_001.fastq.gz,forward
17 | 16A,$PWD/sequence_data/Osnat016-16-A_S16_L001_R1_001.fastq.gz,forward
18 | 17A,$PWD/sequence_data/Osnat017-17-A_S17_L001_R1_001.fastq.gz,forward
19 | 18A,$PWD/sequence_data/Osnat018-18-A_S18_L001_R1_001.fastq.gz,forward
20 | 19A,$PWD/sequence_data/Osnat019-19-A_S19_L001_R1_001.fastq.gz,forward
21 | 20A,$PWD/sequence_data/Osnat020-20-A_S20_L001_R1_001.fastq.gz,forward
22 | 22A,$PWD/sequence_data/Osnat021-22-A_S21_L001_R1_001.fastq.gz,forward
23 | 23A,$PWD/sequence_data/Osnat022-23-A_S22_L001_R1_001.fastq.gz,forward
24 | 24A,$PWD/sequence_data/Osnat023-24-A_S23_L001_R1_001.fastq.gz,forward
25 | 25A,$PWD/sequence_data/Osnat024-25-A_S24_L001_R1_001.fastq.gz,forward
26 | 26A,$PWD/sequence_data/Osnat025-26-A_S25_L001_R1_001.fastq.gz,forward
27 | 27A,$PWD/sequence_data/Osnat026-27-A_S26_L001_R1_001.fastq.gz,forward
28 | 28A,$PWD/sequence_data/Osnat027-28-A_S27_L001_R1_001.fastq.gz,forward
29 | 29A,$PWD/sequence_data/Osnat028-29-A_S28_L001_R1_001.fastq.gz,forward
30 | 30A,$PWD/sequence_data/Osnat029-30-A_S29_L001_R1_001.fastq.gz,forward
31 | 31A,$PWD/sequence_data/Osnat030-31-A_S30_L001_R1_001.fastq.gz,forward
32 | 32A,$PWD/sequence_data/Osnat031-32-A_S31_L001_R1_001.fastq.gz,forward
33 | 33A,$PWD/sequence_data/Osnat032-33-A_S32_L001_R1_001.fastq.gz,forward
34 | 34A,$PWD/sequence_data/Osnat033-34-A_S33_L001_R1_001.fastq.gz,forward
35 | 35A,$PWD/sequence_data/Osnat034-35-A_S34_L001_R1_001.fastq.gz,forward
36 | 36A,$PWD/sequence_data/Osnat035-36-A_S35_L001_R1_001.fastq.gz,forward
37 | 37A,$PWD/sequence_data/Osnat036-37-A_S36_L001_R1_001.fastq.gz,forward
38 | 38A,$PWD/sequence_data/Osnat037-38-A_S37_L001_R1_001.fastq.gz,forward
39 | 39A,$PWD/sequence_data/Osnat038-39-A_S38_L001_R1_001.fastq.gz,forward
40 | 40A,$PWD/sequence_data/Osnat039-40-A_S39_L001_R1_001.fastq.gz,forward
41 | 41A,$PWD/sequence_data/Osnat040-41-A_S40_L001_R1_001.fastq.gz,forward
42 | 42A,$PWD/sequence_data/Osnat041-42-A_S41_L001_R1_001.fastq.gz,forward
43 | 43A,$PWD/sequence_data/Osnat042-43-A_S42_L001_R1_001.fastq.gz,forward
44 | 44A,$PWD/sequence_data/Osnat043-44-A_S43_L001_R1_001.fastq.gz,forward
45 | 45A,$PWD/sequence_data/Osnat044-45-A_S44_L001_R1_001.fastq.gz,forward
46 | 46A,$PWD/sequence_data/Osnat045-46-A_S45_L001_R1_001.fastq.gz,forward
47 | 47A,$PWD/sequence_data/Osnat046-47-A_S46_L001_R1_001.fastq.gz,forward
48 | 48A,$PWD/sequence_data/Osnat047-48-A_S47_L001_R1_001.fastq.gz,forward
49 | 49A,$PWD/sequence_data/Osnat048-49-A_S48_L001_R1_001.fastq.gz,forward
50 | 50A,$PWD/sequence_data/Osnat049-50-A_S49_L001_R1_001.fastq.gz,forward
51 | 51A,$PWD/sequence_data/Osnat050-51-A_S50_L001_R1_001.fastq.gz,forward
52 | 52A,$PWD/sequence_data/Osnat051-52-A_S51_L001_R1_001.fastq.gz,forward
53 | 53A,$PWD/sequence_data/Osnat052-53-A_S52_L001_R1_001.fastq.gz,forward
54 | 54A,$PWD/sequence_data/Osnat053-54-A_S53_L001_R1_001.fastq.gz,forward
55 | 9A-2,$PWD/sequence_data/Osnat054-9-A-2_S54_L001_R1_001.fastq.gz,forward
56 | 10A-2,$PWD/sequence_data/Osnat055-10-A-2_S55_L001_R1_001.fastq.gz,forward
57 | 11A-2,$PWD/sequence_data/Osnat056-11-A-2_S56_L001_R1_001.fastq.gz,forward
58 | 12A-2,$PWD/sequence_data/Osnat057-12-A-2_S57_L001_R1_001.fastq.gz,forward
59 | 13A-2,$PWD/sequence_data/Osnat058-13-A-2_S58_L001_R1_001.fastq.gz,forward
60 | 14A-2,$PWD/sequence_data/Osnat059-14-A-2_S59_L001_R1_001.fastq.gz,forward
61 | 15A-2,$PWD/sequence_data/Osnat060-15-A-2_S60_L001_R1_001.fastq.gz,forward
62 | 16A-2,$PWD/sequence_data/Osnat061-16-A-2_S61_L001_R1_001.fastq.gz,forward
63 | 17A-2,$PWD/sequence_data/Osnat062-17-A-2_S62_L001_R1_001.fastq.gz,forward
64 | 18A-2,$PWD/sequence_data/Osnat063-18-A-2_S63_L001_R1_001.fastq.gz,forward
65 | 19A-2,$PWD/sequence_data/Osnat064-19-A-2_S64_L001_R1_001.fastq.gz,forward
66 | 20A-2,$PWD/sequence_data/Osnat065-20-A-2_S65_L001_R1_001.fastq.gz,forward
67 | 21A-2,$PWD/sequence_data/Osnat066-21-A-2_S66_L001_R1_001.fastq.gz,forward
68 | 22A-2,$PWD/sequence_data/Osnat067-22-A-2_S67_L001_R1_001.fastq.gz,forward
69 | 23A-2,$PWD/sequence_data/Osnat068-23-A-2_S68_L001_R1_001.fastq.gz,forward
70 | 25A-2,$PWD/sequence_data/Osnat069-25-A-2_S69_L001_R1_001.fastq.gz,forward
71 | 26A-2,$PWD/sequence_data/Osnat070-26-A-2_S70_L001_R1_001.fastq.gz,forward
72 | 27A-2,$PWD/sequence_data/Osnat071-27-A-2_S71_L001_R1_001.fastq.gz,forward
73 | 28A-2,$PWD/sequence_data/Osnat072-28-A-2_S72_L001_R1_001.fastq.gz,forward
74 | 29A-2,$PWD/sequence_data/Osnat073-29-A-2_S73_L001_R1_001.fastq.gz,forward
75 | 30A-2,$PWD/sequence_data/Osnat074-30-A-2_S74_L001_R1_001.fastq.gz,forward
76 | 31A-2,$PWD/sequence_data/Osnat075-31-A-2_S75_L001_R1_001.fastq.gz,forward
77 | 32A-2,$PWD/sequence_data/Osnat076-32-A-2_S76_L001_R1_001.fastq.gz,forward
78 | 33A-2,$PWD/sequence_data/Osnat077-33-A-2_S77_L001_R1_001.fastq.gz,forward
79 | 34A-2,$PWD/sequence_data/Osnat078-34-A-2_S78_L001_R1_001.fastq.gz,forward
80 | 35A-2,$PWD/sequence_data/Osnat079-35-A-2_S79_L001_R1_001.fastq.gz,forward
81 | 36A-2,$PWD/sequence_data/Osnat080-36-A-2_S80_L001_R1_001.fastq.gz,forward
82 | 37A-2,$PWD/sequence_data/Osnat081-37-A-2_S81_L001_R1_001.fastq.gz,forward
83 | 38A-2,$PWD/sequence_data/Osnat082-38-A-2_S82_L001_R1_001.fastq.gz,forward
84 | 39A-2,$PWD/sequence_data/Osnat083-39-A-2_S83_L001_R1_001.fastq.gz,forward
85 | 40A-2,$PWD/sequence_data/Osnat084-40-A-2_S84_L001_R1_001.fastq.gz,forward
86 | 41A-2,$PWD/sequence_data/Osnat085-41-A-2_S85_L001_R1_001.fastq.gz,forward
87 | 42A-2,$PWD/sequence_data/Osnat086-42-A-2_S86_L001_R1_001.fastq.gz,forward
88 | 43A-2,$PWD/sequence_data/Osnat087-43-A-2_S87_L001_R1_001.fastq.gz,forward
89 | 44A-2,$PWD/sequence_data/Osnat088-44-A-2_S88_L001_R1_001.fastq.gz,forward
90 | M-1,$PWD/sequence_data/Osnat157-M-1_S157_L001_R1_001.fastq.gz,forward
91 | M-2,$PWD/sequence_data/Osnat158-M-2_S158_L001_R1_001.fastq.gz,forward
92 | M-3,$PWD/sequence_data/Osnat159-M-3_S159_L001_R1_001.fastq.gz,forward
93 | M-NC,$PWD/sequence_data/Osnat160-M-_S160_L001_R1_001.fastq.gz,forward
94 | 


--------------------------------------------------------------------------------
/scripts/ancom_differential_abundance.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #$ -S /bin/bash
  3 | #$ -N diff_abundance
  4 | #$ -q bioinfo.q
  5 | #$ -V
  6 | #$ -cwd
  7 | #$ -notify
  8 | #$ -pe shared 40
  9 | 
 10 | 
 11 | set -e
 12 | 
 13 | source activate qiime2-2020.6
 14 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi'
 15 | 
 16 | 
 17 | TAXON_LEVELS=(2 3 4 5 6)
 18 | 
 19 | #FEATURE_TABLE_DIR=('05.filter_table/dada2' '05.filter_table/dada2' '05.filter_table/deblur/' '05.filter_table/deblur/' '05.filter_table/dada2/indoors' '05.filter_table/dada2/indoors' '05.filter_table/deblur/indoors' '05.filter_table/deblur/indoors' '05.filter_table/dada2/outdoors' '05.filter_table/dada2/outdoors' '05.filter_table/deblur/outdoors' '05.filter_table/deblur/outdoors')
 20 | 
 21 | #PREFIX=('se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined')
 22 | 
 23 | #METADATA=('00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv')
 24 | 
 25 | #OUT_DIR=('09.differential_abundance/dada2' '09.differential_abundance/dada2' '09.differential_abundance/deblur' '09.differential_abundance/deblur' '09.differential_abundance/dada2/indoors' '09.differential_abundance/dada2/indoors' '09.differential_abundance/deblur/indoors' '09.differential_abundance/deblur/indoors' '09.differential_abundance/dada2/outdoors' '09.differential_abundance/dada2/outdoors' '09.differential_abundance/deblur/outdoors' '09.differential_abundance/deblur/outdoors')
 26 | 
 27 | #METADATA_COLUMN=('treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment')
 28 | 
 29 | #TAXONOMY_DIR=('04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur' '04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur' '04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur')
 30 | 
 31 | 
 32 | #####################################################################################################################################################
 33 | 
 34 | # Dada2 Reanalysis modified maxEE and read trunc length
 35 | #TAXONOMY_DIR=('04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2')
 36 | 
 37 | #FEATURE_TABLE_DIR=('05.redo_filter_table/dada2' '05.redo_filter_table/dada2' '05.redo_filter_table/dada2' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/mock' '05.redo_filter_table/dada2/mock' '05.redo_filter_table/dada2/mock')
 38 | 
 39 | #PREFIX=('se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe')
 40 | 
 41 | #METADATA=('00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/pe-dada2/combined.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/pe-dada2/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/pe-dada2/outdoors.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/pe-dada2/mock.tsv')
 42 | 
 43 | #METADATA_COLUMN=('treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment')
 44 | 
 45 | 
 46 | #OUT_DIR=('09.redo_differential_abundance/dada2' '09.redo_differential_abundance/dada2' '09.redo_differential_abundance/dada2' '09.redo_differential_abundance/dada2/indoors' '09.redo_differential_abundance/dada2/indoors' '09.redo_differential_abundance/dada2/indoors' '09.redo_differential_abundance/dada2/outdoors' '09.redo_differential_abundance/dada2/outdoors' '09.redo_differential_abundance/dada2/outdoors' '09.redo_differential_abundance/dada2/mock' '09.redo_differential_abundance/dada2/mock' '09.redo_differential_abundance/dada2/mock')
 47 | 
 48 | 
 49 | ##################################################################################################################################################
 50 | 
 51 | TAXONOMY_DIR=(04.{,redo_}assign_taxonomy/dada2{,,})
 52 | FEATURE_TABLE_DIR=(05.{,redo_}filter_table/dada2/{indoors,outdoors,basins}/)
 53 | PREFIX=($( for i in {1..6}; do echo 'se'; done))
 54 | METADATA=($(for i in {1..2}; do echo 00.mapping/{indoors,outdoors,basins}-edited.tsv; done))
 55 | METADATA_COLUMN=($( for i in {1..6}; do echo 'treatment'; done))
 56 | OUT_DIR=(09.{,redo_}differential_abundance/dada2/{indoors,outdoors,basins})
 57 | 
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 
 64 | # Differntial abundance testing using ANCOM
 65 | # At ASV level
 66 | # Add pseudocount to ASV table because ANCOM can't deal with zero counts
 67 | parallel --jobs 0 --link qiime composition add-pseudocount \
 68 | 				--i-table {1}/{2}-filtered_table.qza \
 69 | 				--o-composition-table {3}/{2}-composition-table.qza \
 70 | 				::: ${FEATURE_TABLE_DIR[*]} ::: ${PREFIX[*]} ::: ${OUT_DIR[*]}  
 71 |   
 72 | # Apply ANCOM to identify ASV/OTUs that differ in abundance
 73 | parallel --jobs 0 --link qiime composition ancom  \
 74 | 				--i-table {3}/{1}-composition-table.qza \
 75 | 				--m-metadata-file {2} \
 76 | 				--m-metadata-column {4} \
 77 | 				--o-visualization {3}/{1}-{4}-ancom.qzv \
 78 | 				::: ${PREFIX[*]} ::: ${METADATA[*]} ::: ${OUT_DIR[*]} ::: ${METADATA_COLUMN[*]}
 79 | 
 80 | 
 81 | 
 82 | for TAXON_LEVEL in ${TAXON_LEVELS[*]}; do
 83 | 
 84 | 	# At specific taxonomy level - here at the genus level level 6 (L6)
 85 | 	# 1.  Collapse feauture table at a taxonomy level of interest
 86 | 	parallel --jobs 0 --link qiime taxa collapse \
 87 | 				--i-table {1}/{2}-filtered_table.qza \
 88 | 				--i-taxonomy {4}/{2}-taxonomy.qza \
 89 | 				--p-level ${TAXON_LEVEL} \
 90 | 				--o-collapsed-table {3}/{2}-L${TAXON_LEVEL}-filtered_table.qza \
 91 | 				::: ${FEATURE_TABLE_DIR[*]} ::: ${PREFIX[*]} ::: ${OUT_DIR[*]} ::: ${TAXONOMY_DIR[*]}
 92 | 				
 93 | 
 94 | 	# 2. Add pseudocount to ASV table because ANCOM can't deal with zero counts
 95 | 	parallel --jobs 0 --link qiime composition add-pseudocount \
 96 | 				--i-table {2}/{1}-L${TAXON_LEVEL}-filtered_table.qza \
 97 | 				--o-composition-table {2}/{1}-L${TAXON_LEVEL}-composition-table.qza \
 98 | 				::: ${PREFIX[*]} ::: ${OUT_DIR[*]}
 99 |   
100 | 	# 3. Apply ANCOM to identify ASV/OTUs that differ in abundance
101 | 	parallel --jobs 0 --link qiime composition ancom  \
102 | 				--i-table {3}/{1}-L${TAXON_LEVEL}-composition-table.qza \
103 | 				--m-metadata-file {2} \
104 | 				--m-metadata-column {4} \
105 | 				--o-visualization {3}/{1}-L${TAXON_LEVEL}-{4}-ancom.qzv \
106 | 			       ::: ${PREFIX[*]} ::: ${METADATA[*]} ::: ${OUT_DIR[*]} ::: ${METADATA_COLUMN[*]}
107 | 
108 | done
109 | 
110 | 


--------------------------------------------------------------------------------
/examples/merge-MANIFEST:
--------------------------------------------------------------------------------
 1 | sample-id,absolute-filepath,direction
 2 | 1A,$PWD/stitched_reads/Osnat001-1-A_S1_L001.assembled.fastq.gz,forward
 3 | 2A,$PWD/stitched_reads/Osnat002-2-A_S2_L001.assembled.fastq.gz,forward
 4 | 3A,$PWD/stitched_reads/Osnat003-3-A_S3_L001.assembled.fastq.gz,forward
 5 | 4A,$PWD/stitched_reads/Osnat004-4-A_S4_L001.assembled.fastq.gz,forward
 6 | 5A,$PWD/stitched_reads/Osnat005-5-A_S5_L001.assembled.fastq.gz,forward
 7 | 6A,$PWD/stitched_reads/Osnat006-6-A_S6_L001.assembled.fastq.gz,forward
 8 | 7A,$PWD/stitched_reads/Osnat007-7-A_S7_L001.assembled.fastq.gz,forward
 9 | 8A,$PWD/stitched_reads/Osnat008-8-A_S8_L001.assembled.fastq.gz,forward
10 | 9A,$PWD/stitched_reads/Osnat009-9-A_S9_L001.assembled.fastq.gz,forward
11 | 10A,$PWD/stitched_reads/Osnat010-10-A_S10_L001.assembled.fastq.gz,forward
12 | 11A,$PWD/stitched_reads/Osnat011-11-A_S11_L001.assembled.fastq.gz,forward
13 | 12A,$PWD/stitched_reads/Osnat012-12-A_S12_L001.assembled.fastq.gz,forward
14 | 13A,$PWD/stitched_reads/Osnat013-13-A_S13_L001.assembled.fastq.gz,forward
15 | 14A,$PWD/stitched_reads/Osnat014-14-A_S14_L001.assembled.fastq.gz,forward
16 | 15A,$PWD/stitched_reads/Osnat015-15-A_S15_L001.assembled.fastq.gz,forward
17 | 16A,$PWD/stitched_reads/Osnat016-16-A_S16_L001.assembled.fastq.gz,forward
18 | 17A,$PWD/stitched_reads/Osnat017-17-A_S17_L001.assembled.fastq.gz,forward
19 | 18A,$PWD/stitched_reads/Osnat018-18-A_S18_L001.assembled.fastq.gz,forward
20 | 19A,$PWD/stitched_reads/Osnat019-19-A_S19_L001.assembled.fastq.gz,forward
21 | 20A,$PWD/stitched_reads/Osnat020-20-A_S20_L001.assembled.fastq.gz,forward
22 | 22A,$PWD/stitched_reads/Osnat021-22-A_S21_L001.assembled.fastq.gz,forward
23 | 23A,$PWD/stitched_reads/Osnat022-23-A_S22_L001.assembled.fastq.gz,forward
24 | 24A,$PWD/stitched_reads/Osnat023-24-A_S23_L001.assembled.fastq.gz,forward
25 | 25A,$PWD/stitched_reads/Osnat024-25-A_S24_L001.assembled.fastq.gz,forward
26 | 26A,$PWD/stitched_reads/Osnat025-26-A_S25_L001.assembled.fastq.gz,forward
27 | 27A,$PWD/stitched_reads/Osnat026-27-A_S26_L001.assembled.fastq.gz,forward
28 | 28A,$PWD/stitched_reads/Osnat027-28-A_S27_L001.assembled.fastq.gz,forward
29 | 29A,$PWD/stitched_reads/Osnat028-29-A_S28_L001.assembled.fastq.gz,forward
30 | 30A,$PWD/stitched_reads/Osnat029-30-A_S29_L001.assembled.fastq.gz,forward
31 | 31A,$PWD/stitched_reads/Osnat030-31-A_S30_L001.assembled.fastq.gz,forward
32 | 32A,$PWD/stitched_reads/Osnat031-32-A_S31_L001.assembled.fastq.gz,forward
33 | 33A,$PWD/stitched_reads/Osnat032-33-A_S32_L001.assembled.fastq.gz,forward
34 | 34A,$PWD/stitched_reads/Osnat033-34-A_S33_L001.assembled.fastq.gz,forward
35 | 35A,$PWD/stitched_reads/Osnat034-35-A_S34_L001.assembled.fastq.gz,forward
36 | 36A,$PWD/stitched_reads/Osnat035-36-A_S35_L001.assembled.fastq.gz,forward
37 | 37A,$PWD/stitched_reads/Osnat036-37-A_S36_L001.assembled.fastq.gz,forward
38 | 38A,$PWD/stitched_reads/Osnat037-38-A_S37_L001.assembled.fastq.gz,forward
39 | 39A,$PWD/stitched_reads/Osnat038-39-A_S38_L001.assembled.fastq.gz,forward
40 | 40A,$PWD/stitched_reads/Osnat039-40-A_S39_L001.assembled.fastq.gz,forward
41 | 41A,$PWD/stitched_reads/Osnat040-41-A_S40_L001.assembled.fastq.gz,forward
42 | 42A,$PWD/stitched_reads/Osnat041-42-A_S41_L001.assembled.fastq.gz,forward
43 | 43A,$PWD/stitched_reads/Osnat042-43-A_S42_L001.assembled.fastq.gz,forward
44 | 44A,$PWD/stitched_reads/Osnat043-44-A_S43_L001.assembled.fastq.gz,forward
45 | 45A,$PWD/stitched_reads/Osnat044-45-A_S44_L001.assembled.fastq.gz,forward
46 | 46A,$PWD/stitched_reads/Osnat045-46-A_S45_L001.assembled.fastq.gz,forward
47 | 47A,$PWD/stitched_reads/Osnat046-47-A_S46_L001.assembled.fastq.gz,forward
48 | 48A,$PWD/stitched_reads/Osnat047-48-A_S47_L001.assembled.fastq.gz,forward
49 | 49A,$PWD/stitched_reads/Osnat048-49-A_S48_L001.assembled.fastq.gz,forward
50 | 50A,$PWD/stitched_reads/Osnat049-50-A_S49_L001.assembled.fastq.gz,forward
51 | 51A,$PWD/stitched_reads/Osnat050-51-A_S50_L001.assembled.fastq.gz,forward
52 | 52A,$PWD/stitched_reads/Osnat051-52-A_S51_L001.assembled.fastq.gz,forward
53 | 53A,$PWD/stitched_reads/Osnat052-53-A_S52_L001.assembled.fastq.gz,forward
54 | 54A,$PWD/stitched_reads/Osnat053-54-A_S53_L001.assembled.fastq.gz,forward
55 | 9A-2,$PWD/stitched_reads/Osnat054-9-A-2_S54_L001.assembled.fastq.gz,forward
56 | 10A-2,$PWD/stitched_reads/Osnat055-10-A-2_S55_L001.assembled.fastq.gz,forward
57 | 11A-2,$PWD/stitched_reads/Osnat056-11-A-2_S56_L001.assembled.fastq.gz,forward
58 | 12A-2,$PWD/stitched_reads/Osnat057-12-A-2_S57_L001.assembled.fastq.gz,forward
59 | 13A-2,$PWD/stitched_reads/Osnat058-13-A-2_S58_L001.assembled.fastq.gz,forward
60 | 14A-2,$PWD/stitched_reads/Osnat059-14-A-2_S59_L001.assembled.fastq.gz,forward
61 | 15A-2,$PWD/stitched_reads/Osnat060-15-A-2_S60_L001.assembled.fastq.gz,forward
62 | 16A-2,$PWD/stitched_reads/Osnat061-16-A-2_S61_L001.assembled.fastq.gz,forward
63 | 17A-2,$PWD/stitched_reads/Osnat062-17-A-2_S62_L001.assembled.fastq.gz,forward
64 | 18A-2,$PWD/stitched_reads/Osnat063-18-A-2_S63_L001.assembled.fastq.gz,forward
65 | 19A-2,$PWD/stitched_reads/Osnat064-19-A-2_S64_L001.assembled.fastq.gz,forward
66 | 20A-2,$PWD/stitched_reads/Osnat065-20-A-2_S65_L001.assembled.fastq.gz,forward
67 | 21A-2,$PWD/stitched_reads/Osnat066-21-A-2_S66_L001.assembled.fastq.gz,forward
68 | 22A-2,$PWD/stitched_reads/Osnat067-22-A-2_S67_L001.assembled.fastq.gz,forward
69 | 23A-2,$PWD/stitched_reads/Osnat068-23-A-2_S68_L001.assembled.fastq.gz,forward
70 | 25A-2,$PWD/stitched_reads/Osnat069-25-A-2_S69_L001.assembled.fastq.gz,forward
71 | 26A-2,$PWD/stitched_reads/Osnat070-26-A-2_S70_L001.assembled.fastq.gz,forward
72 | 27A-2,$PWD/stitched_reads/Osnat071-27-A-2_S71_L001.assembled.fastq.gz,forward
73 | 28A-2,$PWD/stitched_reads/Osnat072-28-A-2_S72_L001.assembled.fastq.gz,forward
74 | 29A-2,$PWD/stitched_reads/Osnat073-29-A-2_S73_L001.assembled.fastq.gz,forward
75 | 30A-2,$PWD/stitched_reads/Osnat074-30-A-2_S74_L001.assembled.fastq.gz,forward
76 | 31A-2,$PWD/stitched_reads/Osnat075-31-A-2_S75_L001.assembled.fastq.gz,forward
77 | 32A-2,$PWD/stitched_reads/Osnat076-32-A-2_S76_L001.assembled.fastq.gz,forward
78 | 33A-2,$PWD/stitched_reads/Osnat077-33-A-2_S77_L001.assembled.fastq.gz,forward
79 | 34A-2,$PWD/stitched_reads/Osnat078-34-A-2_S78_L001.assembled.fastq.gz,forward
80 | 35A-2,$PWD/stitched_reads/Osnat079-35-A-2_S79_L001.assembled.fastq.gz,forward
81 | 36A-2,$PWD/stitched_reads/Osnat080-36-A-2_S80_L001.assembled.fastq.gz,forward
82 | 37A-2,$PWD/stitched_reads/Osnat081-37-A-2_S81_L001.assembled.fastq.gz,forward
83 | 38A-2,$PWD/stitched_reads/Osnat082-38-A-2_S82_L001.assembled.fastq.gz,forward
84 | 39A-2,$PWD/stitched_reads/Osnat083-39-A-2_S83_L001.assembled.fastq.gz,forward
85 | 40A-2,$PWD/stitched_reads/Osnat084-40-A-2_S84_L001.assembled.fastq.gz,forward
86 | 41A-2,$PWD/stitched_reads/Osnat085-41-A-2_S85_L001.assembled.fastq.gz,forward
87 | 42A-2,$PWD/stitched_reads/Osnat086-42-A-2_S86_L001.assembled.fastq.gz,forward
88 | 43A-2,$PWD/stitched_reads/Osnat087-43-A-2_S87_L001.assembled.fastq.gz,forward
89 | 44A-2,$PWD/stitched_reads/Osnat088-44-A-2_S88_L001.assembled.fastq.gz,forward
90 | M-1,$PWD/stitched_reads/Osnat157-M-1_S157_L001.assembled.fastq.gz,forward
91 | M-2,$PWD/stitched_reads/Osnat158-M-2_S158_L001.assembled.fastq.gz,forward
92 | M-3,$PWD/stitched_reads/Osnat159-M-3_S159_L001.assembled.fastq.gz,forward
93 | M-NC,$PWD/stitched_reads/Osnat160-M-_S160_L001.assembled.fastq.gz,forward
94 | 


--------------------------------------------------------------------------------
/scripts/run_pear.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | 
  3 | use warnings;
  4 | use strict;
  5 | use File::Basename;
  6 | use Getopt::Long;
  7 | use Pod::Usage;
  8 | use Parallel::ForkManager;
  9 | use List::Util qw(min max sum);
 10 | 
 11 | 
 12 | my ($parallel,$help);
 13 | my $out_dir='./';
 14 | my $full_log='pear_full_log.txt';
 15 | my $summary_log='pear_summary_log.txt';
 16 | my $gzip_output;
 17 | 
 18 | my $res = GetOptions("out_dir=s" => \$out_dir,
 19 | 		     "parallel:i"=>\$parallel,
 20 | 		     "full_log=s"=>\$full_log,
 21 | 		     "summary_log=s"=>\$summary_log,
 22 | 		     "gzip_output"=>\$gzip_output,
 23 | 		     "help"=>\$help,
 24 |     )or pod2usage(2);
 25 | 
 26 | pod2usage(-verbose=>2) if $help;
 27 | 
 28 | my @files=@ARGV;
 29 | 
 30 | pod2usage($0.': You must provide a list of fastq files to be merged.') unless @files;
 31 | 
 32 | #make output directory 
 33 | system("mkdir -p $out_dir");
 34 | 
 35 | my $cpu_count=1;
 36 | #if the option is set
 37 | if(defined($parallel)){
 38 |     #option is set but with no value then use the max number of proccessors
 39 |     if($parallel ==0){
 40 | 	#load this module dynamically
 41 | 	eval("use Sys::CPU;");
 42 | 	$cpu_count=Sys::CPU::cpu_count();
 43 |     }else{
 44 | 	$cpu_count=$parallel;
 45 |     }
 46 | }
 47 | 
 48 | 
 49 | my %paired_files;
 50 | foreach my $file (@files){
 51 |     my ($file_name,$dir)=fileparse($file);
 52 |     if($file_name =~ /(.+)_R([1|2])[_|\.]/){
 53 | 	$paired_files{$1}[$2-1]=$file;
 54 |     #attempt different naming scheme
 55 |     }elsif($file_name =~ /(.+)_([1|2])/){
 56 | 	$paired_files{$1}[$2-1]=$file;
 57 |     }else{
 58 | 	warn "Input file \"$file\" does not contain '_R1_' or '_R2_' (or alternatively '_R1.' or '_R2.').";
 59 |     }
 60 | }
 61 | 
 62 | #clear the output log (and make sure it is writable)
 63 | #open(my $FULL_LOG,'>',$full_log) || die "Can't write to log file: $full_log";
 64 | #close($FULL_LOG);
 65 | 
 66 | 
 67 | foreach my $name (sort keys %paired_files){
 68 |     unless(defined($paired_files{$name}[0]) && defined($paired_files{$name}[1])){
 69 | 	warn "Couldn't find matching paired end files for file starting with: $name";
 70 | 	next;
 71 |     }
 72 |     my $out_file=$out_dir.'/'.$name;
 73 |     #check if this has already been done
 74 |     my $assembled_out_file=$out_file.'.assembled.fastq';
 75 |     if (-e $assembled_out_file || -e $assembled_out_file.'.gz'){
 76 | 	print "Skipping this sample because output file already exists: $assembled_out_file\n";
 77 | 	next;
 78 |     }
 79 |     my $cmd="/gpfs0/bioinfo/users/obayomi/pear/pear-0.9.11-linux-x86_64/bin/pear -f $paired_files{$name}[0] -r $paired_files{$name}[1] -j $cpu_count -o $out_file -m 480 -n 400 -t 400 -q 20  >>$full_log";
 80 | #-m 600 -n 400 -t 400 -q 20 - used this for the 16S sequences
 81 |     print $cmd,"\n";
 82 |     die if system($cmd);
 83 | 
 84 |     #compress output files (if the flag is set)
 85 |     if($gzip_output){
 86 | 	my $gzip_cmd="pigz -p $cpu_count -f $out_file".'*';
 87 | 	print $gzip_cmd,"\n";
 88 | 	die if system($gzip_cmd);
 89 |     }
 90 | }
 91 | 
 92 | print "Creating PEAR summary log at: $summary_log \n";
 93 | my $min_assembled=create_summary_log($full_log,$summary_log);
 94 | 
 95 | if($min_assembled < 90){
 96 |     print "Finished! Warning!! one or more samples were less than 90% assembled! You should manually inspect the log file: $summary_log \n";
 97 | }else{
 98 |     print "Finished! All samples assembled at 90% or greater. For more details you can check manually inspect the log file: $summary_log \n";
 99 | }
100 | 
101 | sub mean {
102 |     return sum(@_)/@_;
103 | }
104 | 
105 | sub create_summary_log{
106 | 
107 |     my $full_log=shift;
108 |     my $summary_log=shift;
109 |     open(my $FULL_LOG,'<',$full_log) || die "Can't read log file: $full_log";
110 | 
111 |     open(my $SUMMARY_LOG,'>',$summary_log) || die "Can't create summary log file for writing: $summary_log";
112 |     my @samples;
113 |     
114 |     while (<$FULL_LOG>) {
115 | 	chomp;
116 | 	if (/Assembled reads/) {
117 | 	    my $assembled_string=$_;
118 | 	    my $discarded_string=<$FULL_LOG>;
119 | 	    my $unassembled_string=<$FULL_LOG>;
120 | 	    my $assembled_file_string=<$FULL_LOG>;
121 | 	    my ($assembled_percent) = $assembled_string =~ /(\d+\.\d+)\%/;
122 | 	    my ($discarded_percent) = $discarded_string =~ /(\d+\.\d+)\%/;
123 | 	    my ($unassembled_percent) = $unassembled_string =~ /(\d+\.\d+)\%/;
124 | 	    my ($assembled_file) = $assembled_file_string =~ /([\w|\-|\.]+)\.assembled/;
125 | 	    push (@samples, [$assembled_file,$assembled_percent,$discarded_percent,$unassembled_percent]);
126 | 	}
127 |     }
128 | 
129 |     #Add min, mean, and max as first three lines of output
130 |     unshift @samples,['Max',sprintf("%.3f",max(map{$_->[1]}@samples)),sprintf("%.3f",max(map{$_->[2]}@samples)),sprintf("%.3f",max(map{$_->[3]}@samples))];
131 |     unshift @samples,['Mean',sprintf("%.3f",mean(map{$_->[1]}@samples)),sprintf("%.3f",mean(map{$_->[2]}@samples)),sprintf("%.3f",mean(map{$_->[3]}@samples))];
132 |     unshift @samples,['Min',sprintf("%.3f",min(map{$_->[1]}@samples)),sprintf("%.3f",min(map{$_->[2]}@samples)),sprintf("%.3f",min(map{$_->[3]}@samples))];
133 | 
134 |     #print header
135 |     print $SUMMARY_LOG join("\t","ID","Assembled","Discarded","Unassembled"),"\n";
136 | 
137 |     #print out all the data
138 |     foreach my $sample (@samples) {
139 | 	print $SUMMARY_LOG join("\t",@$sample),"\n";
140 |     }
141 |     return sprintf("%.3f",min(map{$_->[1]}@samples))
142 | }
143 | 
144 | 
145 | __END__
146 | 
147 | =head1 Name
148 | 
149 | run_pear.pl - A simple wrapper for PEAR to stich paired-end reads
150 | 
151 | =head1 USAGE
152 | 
153 | run_pear.pl [-p [<# proc>] -o <out_dir> -h] <list of fastq files>
154 | 
155 | E.g.
156 | 
157 | #Note: Files must have "_R1_" and "_R2_" (or "_R1." and "_R2.")  within the file name (or secondarily "_1" and "_2")
158 | 
159 | run_pear.pl sample1_R1_001.fastq sample1_R2_001.fastq sample2_R1_001.fastq sample2_R2_001.fastq
160 | 
161 | #Shorter way to do the same thing
162 | 
163 | run_pear.pl *.fastq
164 | 
165 | #Specify alternate location for output files (instead of default current directory)
166 | 
167 | run_pear.pl -o stitched_reads *.fastq
168 | 
169 | #Run in parallel and use all CPUs
170 | 
171 | run_pear.pl *.fastq -p
172 | 
173 | #Run in parallel limit to only 2 CPUs
174 | 
175 | run_pear.pl *.fastq -p 2
176 | 
177 | #Turn off gzip compression of output files
178 | 
179 | run_pear.pl -g *.fastq
180 | 
181 | =head1 OPTIONS
182 | 
183 | =over 4
184 | 
185 | =item B<-o, --out_dir <file>>
186 | 
187 | The name of the output directory to place all PEAR output files.
188 | 
189 | =item B<-p, --parallel [<# of proc>]>
190 | 
191 | Using this option without a value will use all CPUs on machine, while giving it a value will limit to that many CPUs. Without option only one CPU is used. 
192 | 
193 | =item B<-g, --gzip_output>
194 | 
195 | Gzip the PEAR output files.
196 | 
197 | =item B<-f, --full_log <file>>
198 | 
199 | The location to write the PEAR full log file. Default is "pear_full_log.txt"
200 | 
201 | =item B<-s, --summary_log <file>>
202 | 
203 | The location to write teh PEAR summary log file. Default is "pear_summary_log.txt"
204 | 
205 | =item B<-h, --help>
206 | 
207 | Displays the entire help documentation.
208 | 
209 | =back
210 | 
211 | =head1 DESCRIPTION
212 | 
213 | B<run_pear.pl> This script allows for more automated running of the PEAR program on multiple fastq files. PEAR is used to stitch (or assemble) paired end reads together. The assumption is made that the paired end files have the same name with the forward reads being indicated by "_R1_" and the reverse being "_R2_" (they can also be "_R1." and "_R2."). If file names are not found matching these then an simpler label is attempted ("_1" and "_2"). 
214 | 
215 | The script allows the use of multiple threads. 
216 | 
217 | This script also captures the output statistics from PEAR and outputs them to a single "pear_full_log.txt"(by default). It also parses this and simplifies the output into "pear_summary_log.txt" (by default). 
218 |  
219 | By default, output files from PEAR are gzipped to save on space. 
220 | 
221 | Before use make sure you have installed the "pear" program so it is accesible from your PATH.
222 | 
223 | =head1 AUTHOR
224 | 
225 | Morgan Langille, E<lt>morgan.g.i.langille@gmail.comE<gt>
226 | 
227 | =cut
228 | 
229 | 


--------------------------------------------------------------------------------
/scripts/taxa-plots.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #$ -S /bin/bash
  3 | #$ -N make_bar_plots 
  4 | #$ -q bioinfo.q
  5 | #$ -V 
  6 | #$ -cwd 
  7 | #$ -notify 
  8 | #$ -pe shared 40
  9 | 
 10 | set -e 
 11 | 
 12 | source activate qiime2-2020.6
 13 | export PERL5LIB='/gpfs0/bioinfo/users/obayomi/miniconda3/envs/qiime2-2020.6/lib/site_perl/5.26.2/x86_64-linux-thread-multi'
 14 | export TEMPDIR='/gpfs0/bioinfo/users/obayomi/hinuman_analysis/18S_illumina/tmp/' TMPDIR='/gpfs0/bioinfo/users/obayomi/hinuman_analysis/18S_illumina/tmp/'
 15 | 
 16 | 
 17 | 
 18 | #TAXONOMY_DIR=('04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur' '04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur' '04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur' '04.assign_taxonomy/dada2' '04.assign_taxonomy/dada2' '04.assign_taxonomy/deblur' '04.assign_taxonomy/deblur')
 19 | 
 20 | #FEATURE_TABLE_DIR=('05.filter_table/dada2' '05.filter_table/dada2' '05.filter_table/deblur/' '05.filter_table/deblur/' '05.filter_table/dada2/indoors' '05.filter_table/dada2/indoors' '05.filter_table/deblur/indoors' '05.filter_table/deblur/indoors' '05.filter_table/dada2/outdoors' '05.filter_table/dada2/outdoors' '05.filter_table/deblur/outdoors' '05.filter_table/deblur/outdoors' '05.filter_table/dada2/mock' '05.filter_table/dada2/mock' '05.filter_table/deblur/mock' '05.filter_table/deblur/mock')
 21 | 
 22 | #PREFIX=('se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined' 'se' 'pear-joined')
 23 | 
 24 | #METADATA=('00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv')
 25 | 
 26 | #METADATA_COLUMN=('treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment')
 27 | 
 28 | #MODE=('combined' 'combined' 'combined' 'combined' 'indoors' 'indoors' 'indoors' 'indoors' 'outdoors' 'outdoors' 'outdoors' 'outdoors' 'mock' 'mock' 'mock' 'mock')
 29 | 
 30 | #GROUP_METADATA=('00.mapping/combined-treatment.tsv' '00.mapping/combined-treatment.tsv' '00.mapping/combined-treatment.tsv' '00.mapping/combined-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/mock-treatment.tsv' '00.mapping/mock-treatment.tsv' '00.mapping/mock-treatment.tsv' '00.mapping/mock-treatment.tsv')
 31 | 
 32 | #PLOT_DIR=('07.make_taxa_plots/dada2' '07.make_taxa_plots/dada2' '07.make_taxa_plots/deblur/' '07.make_taxa_plots/deblur/' '07.make_taxa_plots/dada2/indoors' '07.make_taxa_plots/dada2/indoors' '07.make_taxa_plots/deblur/indoors' '07.make_taxa_plots/deblur/indoors' '07.make_taxa_plots/dada2/outdoors' '07.make_taxa_plots/dada2/outdoors' '07.make_taxa_plots/deblur/outdoors' '07.make_taxa_plots/deblur/outdoors' '07.make_taxa_plots/dada2/mock' '07.make_taxa_plots/dada2/mock' '07.make_taxa_plots/deblur/mock' '07.make_taxa_plots/deblur/mock')
 33 | 
 34 | 
 35 | ##########################################################################################################################################################
 36 | 
 37 | 
 38 | 
 39 | # Dada2 Reanalysis modified maxEE and read trunc length
 40 | #TAXONOMY_DIR=('04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2' '04.redo_assign_taxonomy/dada2')
 41 | 
 42 | #FEATURE_TABLE_DIR=('05.redo_filter_table/dada2' '05.redo_filter_table/dada2' '05.redo_filter_table/dada2' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/indoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/outdoors' '05.redo_filter_table/dada2/mock' '05.redo_filter_table/dada2/mock' '05.redo_filter_table/dada2/mock')
 43 | 
 44 | #PREFIX=('se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe' 'se' 'pear-joined' 'pe')
 45 | 
 46 | #METADATA=('00.mapping/combined.tsv' '00.mapping/combined.tsv' '00.mapping/pe-dada2/combined.tsv' '00.mapping/indoors.tsv' '00.mapping/indoors.tsv' '00.mapping/pe-dada2/indoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/outdoors.tsv' '00.mapping/pe-dada2/outdoors.tsv' '00.mapping/mock.tsv' '00.mapping/mock.tsv' '00.mapping/pe-dada2/mock.tsv')
 47 | 
 48 | #METADATA_COLUMN=('treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment' 'treatment')
 49 | 
 50 | #MODE=('combined' 'combined' 'combined' 'indoors' 'indoors' 'indoors' 'outdoors' 'outdoors' 'outdoors' 'mock' 'mock' 'mock')
 51 | 
 52 | #GROUP_METADATA=('00.mapping/combined-treatment.tsv' '00.mapping/combined-treatment.tsv' '00.mapping/pe-dada2/combined-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/indoors-treatment.tsv' '00.mapping/pe-dada2/indoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/outdoors-treatment.tsv' '00.mapping/pe-dada2/outdoors-treatment.tsv' '00.mapping/mock-treatment.tsv' '00.mapping/mock-treatment.tsv' '00.mapping/pe-dada2/mock-treatment.tsv')
 53 | 
 54 | #PLOT_DIR=('07.redo_make_taxa_plots/dada2' '07.redo_make_taxa_plots/dada2' '07.redo_make_taxa_plots/dada2' '07.redo_make_taxa_plots/dada2/indoors' '07.redo_make_taxa_plots/dada2/indoors' '07.redo_make_taxa_plots/dada2/indoors' '07.redo_make_taxa_plots/dada2/outdoors' '07.redo_make_taxa_plots/dada2/outdoors' '07.redo_make_taxa_plots/dada2/outdoors' '07.redo_make_taxa_plots/dada2/mock' '07.redo_make_taxa_plots/dada2/mock' '07.redo_make_taxa_plots/dada2/mock')
 55 | 
 56 | ######################################################################################################################################################
 57 | 
 58 | # Dada2 Reanalysis after splitting indoor samples and dropping some outdoor samples
 59 | TAXONOMY_DIR=(04.{,redo_}assign_taxonomy/dada2{,,})
 60 | 
 61 | FEATURE_TABLE_DIR=(05.{,redo_}filter_table/dada2/{indoors,outdoors,basins}/)
 62 | 
 63 | PREFIX=($( for i in {1..6}; do echo 'se'; done))
 64 | 
 65 | METADATA=($(for i in {1..2}; do echo 00.mapping/{indoors,outdoors,basins}-edited.tsv; done))
 66 | 
 67 | METADATA_COLUMN=($( for i in {1..6}; do echo 'treatment'; done))
 68 | 
 69 | MODE=($(for i in {1..2}; do echo {indoors,outdoors,basins}; done))
 70 | 
 71 | GROUP_METADATA=($(for i in {1..2}; do echo 00.mapping/{indoors-treatment,outdoors-treatment,basins-treatment}.tsv; done))
 72 | 
 73 | PLOT_DIR=(07.{,redo_}make_taxa_plots/dada2/{indoors,outdoors,basins})
 74 | 
 75 | 
 76 | 
 77 | 
 78 | # Sample bar plots
 79 | parallel --jobs 0 --link qiime taxa barplot \
 80 | 				--i-table {1}/{2}-filtered_table.qza \
 81 | 				--i-taxonomy {6}/{2}-taxonomy.qza \
 82 | 				--m-metadata-file {3} \
 83 | 				--o-visualization  {4}/{5}-samples-{2}-bar-plots.qzv \
 84 | 				::: ${FEATURE_TABLE_DIR[*]} ::: ${PREFIX[*]} ::: ${METADATA[*]} ::: ${PLOT_DIR[*]} ::: ${MODE[*]} ::: ${TAXONOMY_DIR[*]}
 85 | 
 86 | # Taxa bar plots of metadata group - here by treatment
 87 | # group feature table (*-filtered_table.qza)  by metadata column of interest
 88 | parallel --jobs 0 --link qiime feature-table group \
 89 | 				--i-table  {1}/{2}-filtered_table.qza \
 90 | 				--p-axis sample \
 91 | 				--m-metadata-file {3} \
 92 | 				--m-metadata-column {4} \
 93 | 				--p-mode sum \
 94 | 				--o-grouped-table {1}/{5}-{4}-{2}-filtered_table.qza \
 95 | 				::: ${FEATURE_TABLE_DIR[*]} ::: ${PREFIX[*]} ::: ${METADATA[*]} ::: ${METADATA_COLUMN[*]} ::: ${MODE[*]}
 96 | 
 97 | # Make bar plot of group table here by treatment
 98 | # Make sure you create a new metadata with the group level names as sample-id
 99 | parallel --jobs 0 --link qiime taxa barplot \
100 | 				--i-table  {1}/{5}-{4}-{2}-filtered_table.qza \
101 | 				--i-taxonomy {6}/{2}-taxonomy.qza \
102 | 				--m-metadata-file {3} \
103 | 				--o-visualization {7}/{5}-{4}-{2}-bar-plots.qzv \
104 | 				::: ${FEATURE_TABLE_DIR[*]} ::: ${PREFIX[*]} ::: ${GROUP_METADATA[*]} ::: ${METADATA_COLUMN[*]} ::: ${MODE[*]} ::: ${TAXONOMY_DIR[*]} ::: ${PLOT_DIR[*]}
105 | 
106 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Snakemake Workflow: Microbiome Amplicon (16S, 18S and ITS) sequence analysis using Qiime2 and PICRUSt2. 
  2 | 
  3 | <img alt="QIIME2-workflow" src="images/rulegraph.png">
  4 | 
  5 | This workflow performs microbiome analysis using QIIME2 and PICRUSt2 for functional annotation. Functional annotation is only performed for 16S amplicon sequences. 
  6 | 
  7 | Please note the following:
  8 | 
  9 | 1. I analyze my data with qiime2 version 2020.6 so that's what I have tested this pipeline with. 
 10 | 2. I have not tested the pipeline using deblur or vsearch even though I have implemented them, so use these methods at your own risk. I have tested the dada2 pipeline and it works great. Hence, I advice you run the dada2 pipeline.
 11 | 3. I provide 3 Snakefiles: Snakefile (16S, 18S and ITS), Snakefile.16S (16S and 18S) and Snakefile.ITS (ITS alone).
 12 | 4. I will be be happy to fix any bug that you migth find, so please feel free to reach out to me at obadbotanist@yahoo.com
 13 | 
 14 | 
 15 | Please do not forget to cite the authors of the tools used.
 16 | 
 17 | 
 18 | **The Pipeline does the following:**
 19 | 
 20 | - It renames your input files (optional) so that it conforms with the required input format i.e. 01.raw_data/{SAMPLE}_R{1|2}.fastq.gz for paired-end or 01.raw_data/{SAMPLE}.fastq.gz for single-end reads
 21 | - Quality checks and summarizes the input reads using FASTQC and MultiQC
 22 | - Imports the reads into Qiime2
 23 | - Quality checks the input artifact using Qiime2
 24 | - Trims the imported arfifact for primers and adaptors using cutadapt implemented in qiime2
 25 | - Quality checks the trimmed input artifact using Qiime2
 26 | - Denoises (filtering, chimera checking and ASV table generation) the reads using dada2 (default) 
 27 | - Asigns taxonomy to the representative sequences using sci-kit learn and your provided database. see the folder Create__DB for a pipeline that can be used to create the required databases
 28 | - Excludes singletons and non-target taxa such as Mitochondria, Chloroplast etc. The taxa to be filtered can be set from within the Snakefile file by editing the "taxa2filter" variable.
 29 | - Excludes rare ASV i.e. ASVs with sequences less than 0.005% of the total number of sequences (Navas-Molina et al. 2013) 
 30 | - Builds a phylogenetic tree
 31 | - Generates sample and group taxa plots
 32 | - Performs core diversity analysis i.e alpha and betadiversity analysis along with the related statistical tests
 33 | - Performs differential abundance testing using ANCOM
 34 | - Perform functional anaotation using PICRUSt2 for 16S sequences.
 35 | 
 36 | 
 37 | ## Authors
 38 | 
 39 | * Olabiyi Obayomi (@olabiyi)
 40 | 
 41 | 
 42 | Before you start, make sure you have miniconda, qiime2, picrust2 and snakemake installed. You can optionally install my bioinfo environment which contains snakemake and many other useful bioinformatics tools.
 43 | 
 44 | ### STEP 1:  Install miniconda and qiime 2 (optional)
 45 | 
 46 | See instructions on how to do so [here](https://docs.qiime2.org/2020.6/install/)
 47 | 
 48 | ### STEP 2:  Install picrust2 (optional)
 49 | 
 50 | See instuctions on how to do so [here](https://github.com/picrust/picrust2/blob/master/INSTALL.md)
 51 | 
 52 | 
 53 | ### STEP 3: Install Snakemake in a separate conda environment or install my bioinfo environment which contains snakemake(optional)
 54 | 
 55 | Install Snakemake using [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html):
 56 | 
 57 |     conda create -c bioconda -c conda-forge -n snakemake snakemake
 58 | 
 59 | For installation details, see the [instructions in the Snakemake documentation](https://snakemake.readthedocs.io/en/stable/getting_started/installation.html).
 60 | 
 61 | 
 62 | ### Step 4: Obtain a copy of this workflow
 63 | 
 64 | 	git clone https://github.com/olabiyi/sankemake-workflow-qiime2.git
 65 | 
 66 | ### Step 5: Configure workflow
 67 | 
 68 | Configure the workflow according to your needs by editing the files in the `config/` folder. Adjust `config.yaml` to configure the workflow execution, and `samples.tsv` to specify your sample setup. Make sure your sample.tsv file does not contain any error as this could lead to potentially losing all of your data when renaming the files.
 69 | 
 70 | ### Step 6: Install bioinfo environment (Optional)
 71 | 
 72 | If you would like to use my bioinfo environment:
 73 | 
 74 | 	conda env create -f envs/bioinfo.yaml  
 75 | 
 76 | 
 77 | ### Step 7:  Running the pipeline
 78 | 
 79 | #### Activate the conda environment containing snakemake  
 80 | 
 81 | 	source activate bioinfo
 82 | 
 83 | 
 84 | #### Set-up the mapping file and raw data directories
 85 | 
 86 | 	[ -d 00.mapping/ ] || mkdir 00.mapping/   
 87 |     [ -d 01.raw_data/ ] || mkdir 01.raw_data/
 88 | 
 89 | #### Move your raw data to the 01.raw_data directory 
 90 |     # Delete anything that may be present in the rawdata directory
 91 |     rm -rf  mkdir 01.raw_data/*
 92 |     # Move your read files to the rawa data directory - Every sample in its own directory - see the example in this repo
 93 | 	mv  location/rawData/16S/* 01.raw_data/
 94 | 
 95 | #### Create metadata files
 96 | 
 97 | You need two metadata files: a general metadata file called metadata.tsv and a treatment-treatment.tsv file.
 98 | Thes files can be createda nd editted with excel. Make sure to save the names as  *metadata.tsv* and *treatment-metadata.tsv*.
 99 | The treatment-metadata is used for makeing grouped bar plots while the metadata.tsv is used for corediversity analysis and general statistics.
100 | Please see the examples provided in this repository for specific formats.
101 | 
102 | 
103 | #### Create the required  MANIFEST FILE
104 |  
105 |     # Get the sample names. This assumes that the folders in the 01.raw_data/ directory are named by sample.
106 | 	SAMPLES=($(ls -1 01.raw_data/ | grep -Ev "MANIFEST|seq" - |sort -V))
107 |     
108 | 	# Get sample names for "samples" field in the config file
109 | 
110 | 	(echo -ne '[';echo ${SAMPLES[*]} | sed -E 's/ /, /g' | sed -E 's/(\w+)/"\1"/g'; echo -e ']') 
111 | 
112 | 	# Generate the MANIFEST file
113 | 	(echo "sample-id,absolute-filepath,direction"; \
114 | 	for SAMPLE in ${SAMPLES[*]}; do echo -ne "${SAMPLE},$PWD/01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz,forward\n${SAMPLE},$PWD/01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz,reverse\n";done) \
115 |     > 01.raw_data/MANIFEST
116 | 
117 | #### Create config/sample.tsv file
118 | 	(echo -ne "SampleID\tType\tOld_name\tNew_name\n"; \
119 | 	for SAMPLE in ${SAMPLES[*]}; do echo -ne "${SAMPLE}\tForward\t01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz\t01.raw_data/${SAMPLE}/${SAMPLE}_R1.fastq.gz\n${SAMPLE}\tReverse\t01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz\t01.raw_data/${SAMPLE}/${SAMPLE}_R2.fastq.gz\n";done) \
120 | 	> config/sample.tsv
121 | 
122 | 
123 | #### gzip fastq files if they are not already gziped as required by this pipeline. It also helps to save disk memory.
124 | 
125 | 	find 01.raw_data/ -type f -name '*.fastq' -exec gzip {} \;
126 | 
127 | 
128 | #### Executing the Workflow
129 | 
130 | ##### import reads and check their quality to determine trunc lengths for dada2
131 | 	
132 | 	snakemake -pr --cores 10 --keep-going "04.QC/trimmed_reads_qual_viz.qzv" "04.QC/raw_reads_qual_viz.qzv"
133 | 
134 | 
135 | ##### Denoise reads - chimera removal, reads merging, quality trimming and ASV feature table generation take a good look at 05.Denoise_reads/denoise_stats.qzv to see if you didn't lose too many reads and if the reads merged well. If the denoizing was not sucessful, adjust the parameters you set for dada2 and then re-run
136 | 
137 | 	snakemake -pr --cores 15 --keep-going "05.Denoise_reads/denoise_stats.qzv" "05.Denoise_reads/table_summary.qzv" "05.Denoise_reads/representative_sequences.qzv"
138 | 
139 | ##### Filter taxa - Examine "08.Filter_feature_table/taxa_filtered_table.qzv" to determine the threshold for filtering out rare taxa
140 | 
141 | 	snakemake -pr --cores 15 --keep-going  "06.Assign_taxonomy/taxonomy.qzv" "07.Build_phylogenetic_tree/rooted-tree.qza" "08.Filter_feature_table/taxa_filtered_table.qzv"
142 | 
143 | ##### Filter rare taxa and make relative abundance bar plots
144 | 	
145 | 	snakemake -pr --cores 15 --keep-going  "08.Filter_feature_table/filtered_table.qzv" "09.Taxa_bar_plots/group-bar-plot.qzv" "09.Taxa_bar_plots/samples-bar-plots.qzv"
146 | 
147 | ##### Get the rarefation depth for diversity analysis after viewing "08.Filter_feature_table/filtered_table.qzv" and run the complete pipeline
148 | 	
149 | 	snakemake -pr --cores 15 --keep-going
150 | 
151 | 
152 | #### Export the following files for downstream analysis with R Scripts
153 |  
154 | 1.  05.Denoise_reads/denoise_stats.qza -> Denoising statistics
155 | 2.  06.Assign_taxonomy/taxonomy.qza -> Taxonomy assignments of the representative sequences
156 | 3.  07.Build_phylogenetic_tree/rooted-tree.qza -> Phylogenetic tree for phylogenetic alphadiversity measurements
157 | 4.  08.Filter_feature_table/filtered_table.qza -> ASV table
158 | 5.  10.Diversity_analysis_{RAREFACTION_DEPTH}/bray_curtis_pcoa_results.qza -> Bray Curtis pcoa results
159 | 6.  10.Diversity_analysis_{RAREFACTION_DEPTH}/bray_curtis_distance_matrix.qza -> Bray Curtis distance matrix
160 | 7.  15.Function_annotation/picrust2_out_pipeline/pathways_out -> Picrust2 pathway output
161 | 8.  15.Function_annotation/picrust2_out_pipeline/KO_metagenome_out -> Picrust2 KO / genes output
162 | 


--------------------------------------------------------------------------------