├── lib
├── omics
│ ├── db
│ │ ├── management
│ │ │ ├── __init__.py
│ │ │ └── commands
│ │ │ │ └── __init__.py
│ │ ├── models.py
│ │ ├── __init__.py
│ │ └── apps.py
│ ├── _version.py
│ ├── __main__.py
│ ├── checkm.py
│ ├── utils.py
│ └── fastq2fasta.py
└── Makefile
├── modulefiles
├── omics
│ ├── 1
│ └── .version
├── flux.omics
│ ├── 1
│ ├── 2
│ └── .version
└── install
├── .gitignore
├── docs
├── _static
│ └── css
│ │ └── custom.css
├── index.txt
├── init.txt
├── template.txt
├── bins2fasta.txt
├── qc-check.txt
├── container.txt
├── merge-coverage.txt
├── separate-interleaved.txt
├── prep.txt
├── chop-contigs.txt
├── unchop-contigs.txt
├── binning.txt
├── run.txt
├── mapping.txt
├── qc.txt
├── assemble.txt
└── qc-sample.txt
├── scripts
├── parseTinySeqXML.xslt
├── twitterscript.xml
├── omics
├── omics-run
├── omics-init
├── omics-prep
├── omics-qc
├── bins2fasta
├── omics-container
├── omics-qc-check
├── assemblyModules
├── setup_metapathways
├── remove_space_from_filenames
├── tinySeq2fasta.xslt
├── antiSmash_summary
├── COPYRIGHT.tetramer_freqs_esom
├── removeCommentLines
├── tinySeq2table.xslt
├── do2list
├── getGIAnnotation
├── firefox_already_running
├── extractEuks
├── do2folder
├── GI_info_XMLParser
├── ESOM_binning_results_parser
├── folderLevelSize
├── getMasterList
├── genomeCheck
├── patchBlastLineage
├── Metabat_to_anvio_parser
├── calcN50
├── VizBin_parser
├── getGISummary
├── removeBlastSubj
├── createFastq
├── tally-weave
├── fixpod6
├── getMyContigs
├── rgi-setup
├── fixpod2
├── U2T
├── fixpod5
├── fixpod4
├── oasesPaired_pipe
├── fixpod3
├── silva-db
├── curateDB
├── dada2shared
├── refseq-rna
├── derep_getReadAbundance
├── tally
├── sangerSeqParser
├── clusterDensity
├── mapper_getQueryList
├── createNodes
├── reverse_complement
├── asv-map-update
├── countInstances
├── map_project_names
├── gbk2fna
├── length+GC
├── parseBlastXML
├── getGFF
├── nameClassFiles
├── Ebot.Output.Extract.Gi.Title.Rev3
├── tallyWrap
├── shared-filter-abundance
├── matchQueryNames
├── match-dada2-mothur
├── kmerFreq
└── shared-set-accessions
├── localenv
├── TruSeq3-PE-2+omics.fa
├── phylosiftrc
├── README.md
├── bash-completion
└── omics
└── test
└── run
/lib/omics/db/management/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lib/omics/db/management/commands/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/lib/omics/db/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 |
--------------------------------------------------------------------------------
/modulefiles/omics/.version:
--------------------------------------------------------------------------------
1 | #%Module1.0
2 |
3 | set ModulesVersion "1"
4 |
--------------------------------------------------------------------------------
/modulefiles/flux.omics/.version:
--------------------------------------------------------------------------------
1 | #%Module1.0
2 |
3 | set ModulesVersion "1"
4 |
--------------------------------------------------------------------------------
/lib/omics/db/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | This is the omics database module
3 |
4 | This is 'stand-alone' Django ORM and db backend for the geo-omics-scripts.
5 | """
6 | from .manage import main, setup
7 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | .*sw[op]
3 | *.pyc
4 |
5 | # make-generated files
6 | scripts/*.1
7 | geo-omics-scripts*.tar.gz
8 | geo-omics-scripts*/*
9 | docs/_build
10 |
11 | # contains hard-link to liba.sh, to allow running scripts in dev environment
12 | share/
13 |
--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | /* noredcode: hack to avoid red inline code or literals
2 | * Sphinx should allow one to amend themes somehow but who knows,
3 | * so append this file to theme.css of the rtd-theme
4 | */
5 | code.samp span.pre, code.literal span.pre {color: #444;}
6 |
--------------------------------------------------------------------------------
/modulefiles/install:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # --- CAUTION ---
4 | #
5 | # This script will install module files to production !
6 | #
7 | # --- CAUTION ---
8 |
9 | set -e
10 |
11 | # this will fail, no root rights after all
12 | # contact Mike Messina
13 | #scp -p omics/1 cayman:/usr/share/Modules/modulefiles/omics/1
14 | #scp -p omics/1 vondamm:/usr/share/Modules/modulefiles/omics/1
15 |
16 | # should work as heinro user
17 | scp -p flux.omics/1 guaymas.earth.lsa.umich.edu:/gmb/data9/flux/modulefiles/geomicro/omics/1
18 |
--------------------------------------------------------------------------------
/modulefiles/flux.omics/2:
--------------------------------------------------------------------------------
1 | #%Module1.0
2 | #
3 | # to be installed as /dept/geology/geomicro/data9/flux/modulefiles/geomicro/omics/2
4 | #
5 | # this module's maintainer's email: heinro@umich.edu
6 | #
7 | proc ModulesHelp { } {
8 | puts stderr "Module to enable the 'comics' command to enter the 'omics container"
9 | }
10 |
11 | module-whatis "Allows access to the 'omics container"
12 |
13 | set COMICS_ROOT /dept/geology/geomicro/data9/flux/apps/comics
14 |
15 | prepend-path PATH $COMICS_ROOT/bin
16 | prepend-path MANPATH $COMICS_ROOT/share/man
17 |
--------------------------------------------------------------------------------
/scripts/parseTinySeqXML.xslt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/localenv:
--------------------------------------------------------------------------------
1 | # To use this repo as a local installation run
2 | #
3 | # $ source localenv
4 | #
5 | # from a bash prompt
6 |
7 | base=$(readlink -f "$(dirname "${BASH_SOURCE[0]}")")
8 | export PATH="$base/scripts:$PATH"
9 | export PYTHONPATH="$base/lib:$PYTHONPATH"
10 | mkdir -p -- "$base/share/geo-omics-scripts"
11 | ln -f -s -t "$base/share/geo-omics-scripts/" ../../lib/liba.sh
12 | ln -f -s -t "$base/share/geo-omics-scripts/" ../../TruSeq3-PE-2+omics.fa
13 | ln -f -s -t "$base/share/geo-omics-scripts/" ../../phylosiftrc
14 | source "$base/bash-completion/omics"
15 |
--------------------------------------------------------------------------------
/docs/index.txt:
--------------------------------------------------------------------------------
1 | .. geo-omics-scripts documentation master file, created by
2 | sphinx-quickstart on Thu Mar 16 15:39:32 2017.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to geo-omics-scripts's documentation!
7 | =============================================
8 |
9 | Contents:
10 |
11 | .. toctree::
12 | :maxdepth: 2
13 | :glob:
14 |
15 | *
16 |
17 | Indices and tables
18 | ==================
19 |
20 | * :ref:`genindex`
21 | * :ref:`modindex`
22 | * :ref:`search`
23 |
24 |
--------------------------------------------------------------------------------
/docs/init.txt:
--------------------------------------------------------------------------------
1 | .. program:: omics init
2 |
3 | =========================================
4 | init - initialize omics project directory
5 | =========================================
6 |
7 | .. argparse::
8 | :module: omics.init
9 | :func: get_argp
10 | :prog: omics init
11 | :nodefault:
12 | :manpage:
13 |
14 |
15 | Exit Status
16 | ===========
17 |
18 | Exits with non-zero upon encountering an error.
19 |
20 | .. only:: man
21 |
22 | See Also
23 | ========
24 |
25 | :manpage:`omics-prep(1)`, :manpage:`omics(7)`, :manpage:`illumina-reads-processing(7)`
26 |
--------------------------------------------------------------------------------
/docs/template.txt:
--------------------------------------------------------------------------------
1 | .. program:: $program
2 |
3 | $header_line
4 | $program - $short_description
5 | $header_line
6 |
7 | Synopsis
8 | ========
9 |
10 | :program:`$program` $usage_args
11 |
12 |
13 | Description
14 | ===========
15 |
16 | $long_description
17 |
18 |
19 | Options
20 | =======
21 |
22 | $positional_args
23 |
24 | $optional_args
25 |
26 |
27 | Exit Status
28 | ===========
29 |
30 | Exits with non-zero upon encountering an error.
31 |
32 | .. only:: man
33 |
34 | See Also
35 | ========
36 |
37 | :manpage:`omics(7)`,
38 | :manpage:`illumina-reads-processing(7)`
39 |
--------------------------------------------------------------------------------
/docs/bins2fasta.txt:
--------------------------------------------------------------------------------
1 | .. program:: bins2fasta
2 |
3 | ===========================================
4 | bins2fasta - generate fasta files from bins
5 | ===========================================
6 |
7 | .. argparse::
8 | :module: omics.bins2fasta
9 | :func: get_argp
10 | :prog: bins2fasta
11 | :nodefault:
12 | :manpage:
13 |
14 |
15 | Exit Status
16 | ===========
17 |
18 | Exits with non-zero upon encountering an error.
19 |
20 | .. only:: man
21 |
22 | See Also
23 | ========
24 |
25 | :manpage:`omics-binning`, :manpage:`omics(7)`,
26 | :manpage:`illumina-reads-processing(7)`
27 |
28 |
--------------------------------------------------------------------------------
/docs/qc-check.txt:
--------------------------------------------------------------------------------
1 | .. program:: omics qc-check
2 |
3 | ===================================================
4 | qc-check - Quickly check results of quality control
5 | ===================================================
6 |
7 | .. argparse::
8 | :module: omics.qc_check
9 | :func: get_argp
10 | :prog: omics qc-check
11 | :nodefault:
12 | :manpage:
13 |
14 |
15 | Exit Status
16 | ===========
17 |
18 | Exits with non-zero upon encountering an error.
19 |
20 | .. only:: man
21 |
22 | See Also
23 | ========
24 |
25 | :manpage:`omics-qc`, :manpage:`omics(7)`,
26 | :manpage:`illumina-reads-processing(7)`
27 |
28 |
--------------------------------------------------------------------------------
/scripts/twitterscript.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Tweets by @umich_geomicro
7 |
8 | ]]>
9 |
10 |
11 |
--------------------------------------------------------------------------------
/TruSeq3-PE-2+omics.fa:
--------------------------------------------------------------------------------
1 | >PrefixPE/1
2 | TACACTCTTTCCCTACACGACGCTCTTCCGATCT
3 | >PrefixPE/2
4 | GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
5 | >PE1
6 | TACACTCTTTCCCTACACGACGCTCTTCCGATCT
7 | >PE1_rc
8 | AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA
9 | >PE2
10 | GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCT
11 | >PE2_rc
12 | AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
13 | >TruSeq_Adapter_Index_end
14 | CGTATGCCGTCTTCTGCTTG
15 | >TruSeq_Adapter_Index_end_rc
16 | CAAGCAGAAGACGGCATACG
17 | >Illumina_Paired_End_Sequencing_Primer_2
18 | CGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
19 | >Illumina_Paired_End_Sequencing_Primer_2_rc
20 | AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCG
21 | >Illumina_Paired_End_Adapter_2
22 | GATCGGAAGAGCGGTTCAGCAGGAATGCCGAG
23 | >Illumina_RNA_PCR_Primer_rc
24 | TCGGACTGTAGAACTCTGAACGTGTAGATCTCGGTGGTCGCCGTATCATT
25 |
--------------------------------------------------------------------------------
/scripts/omics:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | from omics.__main__ import main
21 |
22 |
23 | main()
24 |
--------------------------------------------------------------------------------
/scripts/omics-run:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | echo "omics-run is deprecated, use comics instead"
21 | exit 1
22 |
--------------------------------------------------------------------------------
/scripts/omics-init:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | from omics import init
21 |
22 |
23 | init.main()
24 |
--------------------------------------------------------------------------------
/scripts/omics-prep:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | from omics import prep
21 |
22 |
23 | prep.main()
24 |
--------------------------------------------------------------------------------
/scripts/omics-qc:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright 2014, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | from omics import qc
21 |
22 |
23 | qc.main()
24 |
--------------------------------------------------------------------------------
/scripts/bins2fasta:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | from omics import bins2fasta
21 |
22 |
23 | bins2fasta.main()
24 |
--------------------------------------------------------------------------------
/scripts/omics-container:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | echo "omics-container is deprecated, use comics instead"
21 | exit 1
22 |
--------------------------------------------------------------------------------
/scripts/omics-qc-check:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | from omics import qc_check
21 |
22 |
23 | qc_check.main()
24 |
--------------------------------------------------------------------------------
/lib/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 |
3 | # get all python files
4 | py_files = $(shell find -name "*.py")
5 |
6 | lib_files = liba.sh
7 |
8 | EXTRA_DIST = Makefile
9 |
10 | install: installdir = $(DESTDIR)$(datadir)/$(package_name)
11 | install: install-py
12 | $(info Installing lib files ...)
13 | mkdir -p -- "$(installdir)"
14 | $(INSTALL_DATA) -t $(installdir) $(lib_files)
15 |
16 | install-py: installdir = $(DESTDIR)$(prefix)/lib/python3.5/site-packages
17 | install-py:
18 | $(info Installing python packages ...)
19 | for i in $(py_files); do \
20 | $(INSTALL_DATA) -D $$i $(installdir)/$$i; \
21 | done
22 |
23 | distdir:
24 | $(info Copying lib files ...)
25 | mkdir -p -- "../$(dist_dir)/lib"
26 | cp -a $(lib_files) $(EXTRA_DIST) ../$(dist_dir)/lib/
27 | # copy each python package individually
28 | for i in $(shell find -name __init__.py -printf "%h "); do \
29 | mkdir -p ../$(dist_dir)/lib/$$i && \
30 | cp -p $$i/*.py ../$(dist_dir)/lib/$$i/ ; \
31 | done
32 |
--------------------------------------------------------------------------------
/scripts/assemblyModules:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 |
3 | # Copyright 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | module load AMOS/3.1.0
22 | module load velvet/1.1.07-MAX99-OPENMP
23 | module load MetaVelvet/1.0.01
24 |
25 | #meta-velvetg
26 |
27 |
--------------------------------------------------------------------------------
/lib/omics/db/apps.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Regents of The University of Michigan.
2 |
3 | # This file is part of geo-omics-scripts.
4 |
5 | # Geo-omics-scripts is free software: you can redistribute it and/or
6 | # modify it under the terms of the GNU General Public License as published
7 | # by the Free Software Foundation, either version 3 of the License, or (at
8 | # your option) any later version.
9 |
10 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
11 | # WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | # General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License along
16 | # with Geo-omics-scripts. If not, see .
17 |
18 | from django.apps import AppConfig
19 |
20 | class OmicsDBConfig(AppConfig):
21 | name = 'omics.db'
22 | label = 'omics_db'
23 | verbose_name = 'geo-omics-scripts data base'
24 |
--------------------------------------------------------------------------------
/modulefiles/omics/1:
--------------------------------------------------------------------------------
1 | #%Module1.0
2 | #
3 | # to be installed as /usr/share/Modules/modulefiles/omics/1 on vondamm, cayman
4 | #
5 | # this module's maintainer's email: heinro@umich.edu
6 | #
7 |
8 | proc ModulesHelp { } {
9 | puts stderr "Load this module to use the (geo-)omics scripts."
10 | }
11 |
12 | module-whatis "All-in-one omics module"
13 |
14 | if { [module-info mode load] } {
15 | # Load standard software packages
16 | module load AnacondaPython3
17 | module load AnacondaPython
18 | module load Scythe
19 | module load blast
20 | module load PhyloSift
21 | module load idba
22 | module load QUAST
23 | module load bwa
24 | module load samtools
25 | module load bedtools
26 | module load megahit
27 | }
28 |
29 | set OMICS_ROOT /geomicro/data9/flux/apps/omics_root
30 |
31 | append-path PATH $OMICS_ROOT/bin
32 | append-path MANPATH $OMICS_ROOT/share/man
33 | append-path PYTHONPATH $OMICS_ROOT/lib/python3.5/site-packages
34 |
35 | setenv PYTHONUSERBASE $OMICS_ROOT
36 |
--------------------------------------------------------------------------------
/scripts/setup_metapathways:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Copyright 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | ln -s /opt/packages/MetaPathways/1.0/blastDB
21 | ln -s /opt/packages/MetaPathways/1.0/executables
22 | cp /geomicro/data1/COMMON/src/MetaPathways/setup/template_* .
23 |
--------------------------------------------------------------------------------
/scripts/remove_space_from_filenames:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | find . -name '* *' | while read file;
21 | do
22 | target=`echo "$file" | sed 's/ /_/g'`;
23 | echo "Renaming '$file' to '$target'";
24 | mv "$file" "$target";
25 | done;
26 |
--------------------------------------------------------------------------------
/docs/container.txt:
--------------------------------------------------------------------------------
1 | .. program:: omics container
2 |
3 | ================================================
4 | container -- start a singularity container
5 | ================================================
6 |
7 | Synopsis
8 | ========
9 |
10 | :program:`omics container` [OPTIONS]...
11 |
12 |
13 | Description
14 | ===========
15 |
16 | Start a shell in a singularity container environment (typically on Flux HPC) with all
17 | omics scripts and software dependencies available.
18 |
19 |
20 | Options
21 | =======
22 |
23 | .. option:: -i, --container-image PATH
24 |
25 | Path to singularity container image. A sensible default is chosen if this
26 | option is not provided.
27 |
28 | .. option:: -k, --keep-modules-loaded
29 |
30 | Do not purge environment modules, by default all modules get purged.
31 |
32 |
33 | Exit Status
34 | ===========
35 |
36 | Exits with non-zero upon encountering an error.
37 |
38 | .. only:: man
39 |
40 | See Also
41 | ========
42 |
43 | :manpage:`omics(7)`, :manpage:`illumina-reads-processing(7)`
44 |
--------------------------------------------------------------------------------
/scripts/tinySeq2fasta.xslt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/scripts/antiSmash_summary:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | set -e
22 |
23 | echo "GeneClusters: $(grep -v "^>" Overview.geneclusters.txt | wc -l)"
24 | echo "smcogs: $(grep -c "^>>" Overview.smcogs.txt)"
25 |
26 | ls */structures/* | cut -f 1 -d "/" | sort -u > structures.list
27 | echo "Structures: `wc -l structures.list`"
28 |
--------------------------------------------------------------------------------
/phylosiftrc:
--------------------------------------------------------------------------------
1 | # PhyloSift run control file
2 | #
3 | # see also:
4 | # https://phylosift.wordpress.com/tutorials/running-phylosift/phylosift-run-control-file/
5 | #
6 |
7 | use Env qw($OMICS_REFERENCE_DATA);
8 |
9 | my $common = "data9/flux/reference-data/phylosift";
10 | my @ref_alternatives = (
11 | "$OMICS_REFERENCE_DATA/phylosift",
12 | "/geomicro/$common",
13 | "/gmb/$common",
14 | "/dept/geology/geomicro/$common",
15 | );
16 |
17 | foreach (@ref_alternatives) {
18 | $ref_data_path = $_ if (-d $_);
19 | }
20 | print "[phylosiftrc] using reference data from: $ref_data_path\n";
21 |
22 | $marker_path = "$ref_data_path";
23 | $ncbi_path = "$ref_data_path";
24 |
25 | # prevent this:
26 | # Error: requested HMM banded DP mx of 4749.29 Mb > 2500.00 Mb limit.
27 | # Increase limit with --mxsize or tau with --tau.
28 | $cm_align_long_mxsize = "10000";
29 | $cm_align_short_mxsize = "10000";
30 |
31 | # Use last-align bundled binaries if available
32 |
33 | $lastdb = "/usr/lib/phylosift/lastdb";
34 | $lastal = "/usr/lib/phylosift/lastal";
35 |
36 | -e $lastdb or $lastdb="";
37 | -e $lastal or $lastal="";
38 |
--------------------------------------------------------------------------------
/scripts/COPYRIGHT.tetramer_freqs_esom:
--------------------------------------------------------------------------------
1 | For tetramer_freqs_esom:
2 |
3 | ###############################################################################
4 | Copyright (C) 2007 Anders Andersson (anders.andersson@scilifelab.se)
5 |
6 | This program is free software; you can redistribute it and/or
7 | modify it under the terms of the GNU General Public License
8 | as published by the Free Software Foundation; either version 2
9 | of the License, or (at your option) any later version.
10 |
11 | This program is distributed in the hope that it will be useful,
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 | GNU General Public License for more details.
15 |
16 | You should have received a copy of the GNU General Public License
17 | along with this program. If not, see .
18 |
19 | Anders Andersson
20 | Assistant Professor
21 | SciLifeLab
22 | School of Biotechnology
23 | KTH Royal Institute of Technology
24 | Stockholm, Sweden
25 | Email: anders.andersson@scilifelab.se
26 | ###############################################################################
27 |
--------------------------------------------------------------------------------
/scripts/removeCommentLines:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | use strict;
22 |
23 | my $in=$ARGV[0];
24 | my $out=$ARGV[1];
25 |
26 | open(IN, $in)|| die $!."\n";
27 | open(OUT, ">".$out);
28 |
29 | while(my $line=){
30 | next if $line=~ /^#/;
31 | chomp $line;
32 | $line=~ s/\r//;
33 | next unless $line;
34 |
35 | print OUT $line."\n";
36 | }
37 | close IN;
38 | close OUT;
39 |
--------------------------------------------------------------------------------
/scripts/tinySeq2table.xslt:
--------------------------------------------------------------------------------
1 |
2 |
3 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/scripts/do2list:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | if [ -z $1 ]; then echo "command failed: Give a file name with list"; exit; fi
21 | if [ ! -s $1 ]; then echo "$1 does not exist"; exit; fi
22 | for i in $(grep "^" $1); do
23 | if [ ! -s $i ]; then echo "$i is empty; skipping..."; continue; fi
24 | OUT="$i.out"
25 | # replace the following line with the desired command and $i as input and $OUT as output
26 | echo "IN: $i, OUT:$OUT"
27 | done
28 |
--------------------------------------------------------------------------------
/scripts/getGIAnnotation:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | # usage: getGIAnnotation
22 | # example: getGIAnnotation test.blastn nucleotide
23 | if [ -z "$1" ]; then echo "command failed: Give a file name with list"; exit; fi
24 | if [ ! -s "$1" ]; then echo "$1 does not exist"; exit; fi
25 | cut -d '|' -f 2 "$1" | sort -u > gi.list
26 |
27 | getGiInfo -d "$2" -o anno.xml -l gi.list
28 |
29 | GI_info_XMLParser anno.xml gi.desc
30 |
--------------------------------------------------------------------------------
/scripts/firefox_already_running:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | LOCK=$(find $HOME/.mozilla/firefox/ -name lock)
22 | PLOCK=$(find $HOME/.mozilla/firefox/ -name \.parentlock)
23 |
24 | echo "Deleting Lock file: $LOCK"
25 | rm -f $LOCK
26 | echo "Deleting Parent Lock file: $PLOCK"
27 | rm -f $PLOCK
28 |
29 | echo "To see why these files had to be deleted, see: http://www.mattcutts.com/blog/how-to-fix-firefox-is-already-running-error/"
30 | echo "Try running firefox again..."
31 |
--------------------------------------------------------------------------------
/scripts/extractEuks:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | legacy_consolidateJGIdata -DIR . -OUTDIR consolidated
21 | awk < consolidated/Unclassified.tsv -F'\t' '{ print $6, t, $2 }' > locus_contig.list
22 | awk < *phylodist -F'\t' '{ print $1, "\t", $5 }' | cut -f 1 -d ";" | grep "Eukaryota" | cut -f 1 > eukaryota.list
23 | fgrep -f eukaryota.list locus_contig.list | cut -f 2 | sort -u | sed "s# ##" > eukaryota_contigs.list
24 | extractSeqs -e -l eukaryota_contigs.list -f *.fna -o euksRemoved.fasta
25 |
--------------------------------------------------------------------------------
/scripts/do2folder:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | #function do2all() {
21 | if [ -z $1 ]; then echo "command failed: Give a directroy name with files"; exit; fi
22 | if [ ! -d $1 ]; then echo "$1 does not exist"; exit; fi
23 | for i in $1/*; do
24 | if [ ! -s $i ]; then echo "$i is empty; skipping..."; continue; fi
25 | OUT="$i.out"
26 | # replace the following line with the desired command and $i as input and $OUT as output
27 | echo "IN: $i, OUT:$OUT"
28 | done
29 | #}
30 |
--------------------------------------------------------------------------------
/scripts/GI_info_XMLParser:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | use strict;
22 |
23 | my $in= $ARGV[0];
24 | my $out= $ARGV[1];
25 |
26 | open( SUMM, $in)|| die "$!\n";
27 | open (OUT, ">".$out);
28 |
29 | while(my $line=){
30 | my ($id);
31 | if($line=~ m/(\d*)<\/ID>/i){
32 | print OUT $1."\t";
33 | }
34 | elsif($line=~ m/([\w\W]*)<\/Item>/i){
35 | print OUT $1;
36 | }
37 | elsif($line=~ m/<\/DocSum>/i){
38 | print OUT "\n";
39 | }
40 | else{
41 | next;
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/modulefiles/flux.omics/1:
--------------------------------------------------------------------------------
1 | #%Module1.0
2 | #
3 | # to be installed as /dept/geology/geomicro/data9/flux/modulefiles/geomicro/omics/1
4 | #
5 | # this module's maintainer's email: heinro@umich.edu
6 | #
7 | proc ModulesHelp { } {
8 | puts stderr "Load this module to use the 'omics scripts."
9 | }
10 |
11 | module-whatis "All-in-one omics module"
12 |
13 | if { [module-info mode load] } {
14 | # prerequsites for quast module
15 | module load boost
16 | # gsl libary needed by concoct
17 | module load gsl
18 | # prerequsites for quast, concoct module
19 | module load python-anaconda2/latest
20 | # prerequisite for bedtools2 module
21 | module load samtools
22 | # Load standard software packages
23 | # (required for geo-omics scripts)
24 | module load bedtools2
25 | module load bwa
26 | module load fastqc
27 | module load geomicro/idba
28 | module load geomicro/scythe
29 | module load ncbi-blast
30 | module load phylosift
31 | module load python-anaconda3
32 | module load quast
33 | module load sickle
34 | module load megahit
35 | }
36 |
37 | set OMICS_ROOT /dept/geology/geomicro/data9/flux/apps/omics_root
38 |
39 | append-path PATH $OMICS_ROOT/bin
40 | append-path MANPATH $OMICS_ROOT/share/man
41 | append-path PYTHONPATH $OMICS_ROOT/lib/python3.5/site-packages
42 |
43 | setenv PYTHONUSERBASE $OMICS_ROOT
44 |
--------------------------------------------------------------------------------
/scripts/ESOM_binning_results_parser:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2019 Derek Smith
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | set -e
21 |
22 | cat ./*.conf > esom_scaffolds2bin.tsv
23 | sed '/^#/ d' esom_scaffolds2bin.tsv > esom_scaffolds2bin.cleaned.tsv
24 | awk 'BEGIN{OFS="\t"}{print $2,$1}' esom_scaffolds2bin.cleaned.tsv > esom_scaffolds2bin.tsv
25 | awk 'BEGIN{OFS="\t"}{$2="Bin_"$2; print}' esom_scaffolds2bin.tsv > ESOM_binning_results.txt
26 | perl -pe 's/(?<=\d)_(?=\d)/./g' ESOM_binning_results.txt > ESOM_binning_results.txt.fixed
27 | sed 's/k141\./k141_/g' ESOM_binning_results.txt.fixed > ESOM_binning_results.txt
28 | rm esom_scaffolds2bin.cleaned.tsv
29 | rm esom_scaffolds2bin.tsv
30 | rm ESOM_binning_results.txt.fixed
31 |
--------------------------------------------------------------------------------
/docs/merge-coverage.txt:
--------------------------------------------------------------------------------
1 | .. program:: merge-coverage
2 |
3 | ====================================
4 | merge-coverage
5 | ====================================
6 |
7 | Synopsis
8 | ========
9 |
10 | :program:`merge-coverage` [OPTIONS]... ...
11 |
12 |
13 | Description
14 | ===========
15 |
16 | Calculates the per-sample per-contig mean coverage from the per-sample coverage
17 | tables made with :program:`omics mapping` into on file suitable as input file for
18 | :program:`concoct`.
19 |
20 |
21 | Options
22 | =======
23 | .. option:: -h, --help
24 |
25 | show help message and exit
26 |
27 | .. option:: -a, --assembly FILE
28 |
29 | The assembly file. This is to be compatible with the CONCOCT workflow. If
30 | used then contigs not covered by any sample will appear in the output with
31 | zeros (unlike the output of bedtool's :program:`genomeCoverageBed`.)
32 |
33 | .. option:: -o, --out FILE
34 |
35 | Output file. By default stdout is used.
36 |
37 | .. option:: --debug
38 |
39 | Print stack trace on errors.
40 |
41 | .. option:: --length
42 |
43 | Insert column with contig length. The default is not to insert lengths.
44 |
45 | ..option:: -v, --verbose
46 |
47 | Report progress to stderr.
48 |
49 |
50 | Exit Status
51 | ===========
52 |
53 | Exits with non-zero upon encountering an error.
54 |
55 | .. only:: man
56 |
57 | See Also
58 | ========
59 |
60 | :manpage:`omics-binning(1)`, :manpage:`omics-mapping(1)`,
61 | :manpage:`omics(7)`, :manpage:`illumina-reads-processing(7)`
62 |
63 |
64 |
--------------------------------------------------------------------------------
/scripts/folderLevelSize:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | # This script will only look at folders and sub-folders of the present working directory. You'll HAVE TO paste this script to the other folder if you want it's stats. Also make sure you have read permissions for the folders and sub folders before you run this script.
22 |
23 | use strict;
24 |
25 | #my $path= `pwd`;
26 |
27 | my $level=$ARGV[0];
28 | my $tmp=$$.".tmp";
29 |
30 | `du -h > $tmp`;
31 |
32 |
33 | if(! $ARGV[1]){ $level =1;}
34 | $level++;
35 |
36 | open (IN, $tmp);
37 | while (my $line=){
38 | next if $line=~ /^#/;
39 | $line=~ s/\r//;
40 | chomp $line;
41 | next unless $line;
42 |
43 | my($size, $path)=split(/\t/, $line);
44 | my @levels=split(/\//, $path);
45 |
46 | print $size."\t".$path."\n" if (scalar(@levels) == $level);
47 | }
48 | unlink $tmp;
49 | close IN;
50 |
--------------------------------------------------------------------------------
/scripts/getMasterList:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2013, 2014, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | use strict;
21 | use Getopt::Long;
22 |
23 | my $ext="out";
24 | my $out=$$.".list";
25 | my $col=1;
26 | my $bs=0;
27 | GetOptions(
28 | 'e:s'=>\$ext,
29 | 'o:s'=>\$out,
30 | 'c:i'=>\$col,
31 | 's:f'=>\$bs,
32 | );
33 |
34 | my @listOfFiles=glob("*.".$ext);
35 | print @listOfFiles." Filenames provided\n";
36 |
37 | my $c= $col-1;
38 | my %masterList;
39 | open (OUT, ">".$out);
40 | foreach my $f(@listOfFiles){
41 | my $fh;
42 | open($fh, $f) || die "[error] $f: $!\n";
43 | while (my $line=<$fh>){
44 | next if ($line=~ m/^#/);
45 | chomp $line;
46 | $line=~ s/\r//g;
47 | next unless $line;
48 |
49 | my @cols=split(/\t/, $line);
50 | print OUT $cols[$c]."\n" unless ($masterList{$cols[$c]});
51 | $masterList{$cols[$c]}++;
52 | }
53 | close $fh;
54 | }
55 | close OUT;
56 | exit;
57 |
--------------------------------------------------------------------------------
/docs/separate-interleaved.txt:
--------------------------------------------------------------------------------
1 | .. program:: separate-interleaved
2 |
3 | ====================
4 | separate-interleaved
5 | ====================
6 |
7 | Synopsis
8 | ========
9 |
10 | :program:`separate-interleaved` [:option:`-v`] [:option:`-f` FILE] [:option:`-r` FILE]
11 |
12 |
13 | Description
14 | ===========
15 |
16 | Separate interleaved reads fastq file into forwards and reverse files.
17 |
18 | Separate interleaved-reads fasta/q file into forwards and reverse files. Input
19 | file must be in FASTQ or FASTA format, Sequence and quality score must be on a
20 | single line each, separated by a '+', read headers must start with '@' or '>'.
21 | The script will auto-detect the file format based on the first header. It is
22 | not checked if two reads are actually paired-end reads, however an error will
23 | be raised if the input file containes an uneven number of sequences.
24 |
25 |
26 |
27 | Options
28 | =======
29 | .. option:: -h, --help
30 |
31 | show this help message and exit
32 |
33 | .. option:: -f FILE, --fwd FILE, --forward-out FILE
34 |
35 | Name of forward output file. A value is derived from
36 | the inputfilename by default.
37 |
38 | .. option:: -r FILE, --rev FILE, --reverse-out FILE
39 |
40 | Name of reverse output file. A value is derived from
41 | the inputfilename by default.
42 |
43 | .. option:: -v, --verbose
44 |
45 | Print more informative output
46 |
47 |
48 | Exit Status
49 | ===========
50 |
51 | Exits with non-zero upon encountering an error.
52 |
53 | .. only:: man
54 |
55 | See Also
56 | ========
57 |
58 | :manpage:`omics(7)`,
59 | :manpage:`illumina-reads-processing(7)`
60 |
61 |
62 |
--------------------------------------------------------------------------------
/scripts/genomeCheck:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | use strict;
22 |
23 | sub checkForCompleteness{
24 | my $fName=shift;
25 | chomp($fName);
26 | open (CONTIGS, $fName) || die "Couldn't open $fName\n";
27 | $/= ">";
28 | my %sequences;
29 | while (my $b = ) {
30 | chomp $b;
31 | next unless $b;
32 | my ($name, @sequence) = split (/\n/, $b);
33 | my $seq = join ("", @sequence);
34 | $sequences{$name} = uc $seq;
35 | }
36 | close CONTIGS;
37 |
38 | while (my($n, $s)=each(%sequences)){
39 | chomp($s);
40 | print "F:$fName\tSN: $n\n" unless (length($s)>0);
41 | }
42 | $/="\n";
43 | return ();
44 | }
45 |
46 | my $listOfFiles= $ARGV[0];
47 | open (LOF, "$listOfFiles") || die "ERROR: $ARGV[0]\n $!\n";
48 | print "Summary for incomplete Genomes:\n";
49 | while (my $file=){
50 | checkForCompleteness($file);
51 | }
52 | print "All Done!!\n";
53 |
--------------------------------------------------------------------------------
/scripts/patchBlastLineage:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | use strict;
22 |
23 | # $ARGV[0]; Blast output.
24 | # $ARGV[1]; name of hit (y/n).
25 |
26 | my $fName= $ARGV[0];
27 | my $lFile= "l_".$fName.".txt";
28 |
29 | my %index;
30 | open (LF, $lFile) || die "[err] $lFile not found\n".$!."\n";
31 | while (my $desc=){
32 | my($gi, $taxa, $rank)=split(/\t/, $desc);
33 | chomp($gi);
34 | chomp($taxa);
35 | $index{$gi}=$taxa;
36 | }
37 | close LF;
38 |
39 | open (OUT, ">taxaBlast_".$ARGV[0]);
40 | open (BO, $fName) || die "[err] $fName not found\n".$!."\n";
41 | while(my $line=){
42 | next if ($line=~ m/^\#/);
43 | my @blast=split(/\t/, $line);
44 | chomp(@blast);
45 | my($giTag, $gi, $id, $name)=split(/\|/, $blast[1]);
46 | chomp($gi);
47 | $blast[1]=$index{$gi}."\|".$id;
48 | $blast[1].="\|".$name if (lc($ARGV[1]) eq 'y');
49 | my $bo=join("\t", @blast);
50 | print OUT $bo."\n";
51 | }
52 | close BO;
53 | close OUT;
54 |
--------------------------------------------------------------------------------
/scripts/Metabat_to_anvio_parser:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2019 Derek Smith
4 | # Copyright 2019 Regents of The University of Michigan.
5 |
6 | # This file is part of geo-omics-scripts.
7 |
8 | # Geo-omics-scripts is free software: you can redistribute it and/or
9 | # modify it under the terms of the GNU General Public License as published
10 | # by the Free Software Foundation, either version 3 of the License, or (at
11 | # your option) any later version.
12 |
13 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
14 | # WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 | # General Public License for more details.
17 |
18 | # You should have received a copy of the GNU General Public License along
19 | # with Geo-omics-scripts. If not, see .
20 |
21 | #############################################################################################
22 | #
23 | # Parse metabat output into format that can be import as a collection into the anvio profile
24 | #
25 | ############################################################################################
26 |
27 | set -eu
28 |
29 | [[ "$#" -gt 0 ]] || { echo "Arguments required: Metabat output files"; exit 1; }
30 | # This command will add the Bin ID in a column after the split name,
31 | # replace the dots with underscores to make anvio happy...,
32 | # and concatenate all the files into one binning results file for anvio.
33 | # (Assumes that filenames (if coming from different directories) don't collide)
34 | for i in "$@"; do
35 | binid=$(basename -s .fa "$i")
36 | binid=${binid/./_}
37 | sed "s/$/\t$binid/" "$i"
38 | done > Metabat_binning_results.txt
39 |
--------------------------------------------------------------------------------
/docs/prep.txt:
--------------------------------------------------------------------------------
1 | .. program:: omics prep
2 |
3 | ============================================================
4 | prep - prepare compressed fastq files for further processing
5 | ============================================================
6 |
7 | Synopsis
8 | ========
9 |
10 | :program:`omics prep` [OPTIONS]... [READS]...
11 |
12 |
13 | Description
14 | ===========
15 |
16 | To several tools of the Geomicro Illumina Reads Processing Pipeline assume
17 | that raw or intermediate data is available in files following certain naming
18 | conventions and formats and directory layout. To get started the script
19 | :program:`omics prep` will help following these conventions.
20 |
21 |
22 | Options
23 | =======
24 |
25 | .. option:: -f, --force
26 |
27 | Allow overwriting existing files.
28 |
29 | .. option:: --keep-lanes-separate
30 |
31 | Keep data from different lanes separate. The default is to collect reads
32 | originating from the same physical sample if sequencing was done using
33 | several lanes.
34 |
35 | .. option:: --suffix LIST
36 |
37 | Comma-separated list of valid file suffices used for raw reads. This is
38 | used to find files when a directory is given as positional argument. By
39 | default .fastq and .fastq.gz files are considered.
40 |
41 | .. option:: -t N, --threads N, --cpus N
42 |
43 | Number of threads / CPUs to employ
44 |
45 | .. option:: -h, --help
46 |
47 | Print help.
48 |
49 | .. option:: -v, --verbose
50 |
51 | Use one or multiple ``-v`` to increase verbosity of output.
52 |
53 | Exit Status
54 | ===========
55 |
56 | Exits with non-zero upon encountering an error.
57 |
58 | .. only:: man
59 |
60 | See Also
61 | ========
62 |
63 | :manpage:`omics-qc(1)`, :manpage:`omics(7)`, :manpage:`illumina-reads-processing(7)`
64 |
--------------------------------------------------------------------------------
/docs/chop-contigs.txt:
--------------------------------------------------------------------------------
1 | .. program:: chop-contigs
2 |
3 | ====================================
4 | chop-contigs
5 | ====================================
6 |
7 | Synopsis
8 | ========
9 |
10 | :program:`chop-contigs` [OPTIONS]... [-i ]
11 |
12 |
13 | Description
14 | ===========
15 |
16 | Chop up an assemblies contigs to fixes sizes.
17 |
18 |
19 | Options
20 | =======
21 | .. option:: -h, --help
22 |
23 | show this help message and exit
24 |
25 | .. option:: --chunk-size SIZE
26 |
27 | Size of chunk into which contigs are divided. Default is 10000
28 |
29 | .. option:: -i, --input FILE
30 |
31 | input, fasta-formatted file with contigs, if not given stdin is used.
32 |
33 | .. option:: -o, --output FILE
34 |
35 | Output file
36 |
37 | .. option:: --wrap
38 |
39 | Wrap output sequences to line of length 60.
40 |
41 | .. option:: --no-dot-zero
42 |
43 | Do not add a .0 to a fasta header of a short sequence that didnot need to
44 | be chopped up. This option make the output compatible with CONCOCT's
45 | cut_up_fasta.py script.
46 |
47 | .. option:: --no-truncate-headers
48 |
49 | Do not further manipulate fasta headers beyond adding the chop numbers. By
50 | default, the header is truncated at the first whitespace character,
51 | assuming this still uniquely identifies the contig. This default behaviour
52 | is needed for MEGAHIT assemblies and does no harm to IDBA assemblies.
53 |
54 | .. option:: --debug
55 |
56 | Print stack trace on errors.
57 |
58 |
59 | Exit Status
60 | ===========
61 |
62 | Exits with non-zero upon encountering an error.
63 |
64 | .. only:: man
65 |
66 | See Also
67 | ========
68 |
69 | :manpage:`omics-binning(1)`,
70 | :manpage:`omics-mapping(1)`, :manpage:`omics(7)`,
71 | :manpage:`illumina-reads-processing(7)`
72 |
73 |
74 |
--------------------------------------------------------------------------------
/scripts/calcN50:
--------------------------------------------------------------------------------
1 | #! /usr/bin/perl
2 |
3 | # Copyright 2013, 2017, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | use strict;
22 |
23 | my $fasta=$ARGV[0];
24 |
25 | ## Read Fasta File and compute N50, L50, N95 and L95 ##
26 | my $length;
27 | my $totalLength;
28 | my $totalContigs;
29 | my @allLen;
30 | open(FASTA, $fasta)|| die $!;
31 | $/=">";
32 | while(my $line=){
33 | chomp $line;
34 | next unless $line;
35 |
36 | my ($header, @sequence)=split(/\n/, $line);
37 | my $length=length(join("", @sequence));
38 |
39 | push (@allLen, $length);
40 | $totalLength += $length;
41 | $totalContigs++;
42 | }
43 | $/="\n";
44 | close(FASTA);
45 |
46 | my @sortedLen = sort {$b <=> $a} @allLen;
47 | my $cumLen;
48 | my $numContig;
49 | print "Total_Contigs:\t$totalContigs\n";
50 | foreach my $len(@sortedLen){
51 | $cumLen+=$len;
52 | $numContig++;
53 | if ($cumLen >= $totalLength * 0.95) {
54 | print "N95:\t$len\n";
55 | print "L95:\t$numContig\n";
56 | }
57 | if($cumLen >= $totalLength * 0.50){
58 | print "N50:\t$len\n";
59 | print "L50:\t$numContig\n";
60 | last;
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/scripts/VizBin_parser:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Copyright 2019 Derek Smith
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | #################################################################################
21 | #
22 | # DJS 5 September 2018
23 | #
24 | # Run this shell script in a directory containing bin fasta files to get a
25 | # summary tab-delimited file to import the bins as a collection in ANVIO. It
26 | # was originally written for VizBin collections, but will work for any group of
27 | # Bin fastas generated from any binning program
28 | #
29 | #################################################################################
30 |
31 | set -eu
32 |
33 | # Make a list of contigs in each bin, and add the bin file in a column next to the contig:
34 | for i in *.fa; do
35 | grep ">" "$i" | sed "s/$/ $i/" > "${i}".list;
36 | done
37 |
38 | # Concactenate the data into one list file:
39 | cat ./*.list > cat.list
40 |
41 | # remove the file extension from the bin name:
42 | sed 's/.fa//g' cat.list > cat2.list
43 |
44 | # Remove the ">" leftover from fasta headers:
45 | sed 's/>//g' cat2.list > VizBin_binning_results.txt
46 |
47 | # delete intermediate files:
48 | rm ./*.list
49 |
--------------------------------------------------------------------------------
/scripts/getGISummary:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | use Bio::DB::EUtilities;
21 | use strict;
22 |
23 | my @ids;
24 | open (IN, $ARGV[0]) || die "[error] $ARGV[0] : $!\n";
25 | while (my $line=){
26 | next if $line=~ m/^#/;
27 | chomp $line;
28 | $line=~ s/\r//;
29 | next unless $line;
30 |
31 | push(@ids, $line);
32 | }
33 |
34 | my $factory = Bio::DB::EUtilities->new(-eutil => 'esummary',
35 | -email => 'sunitj@umich.edu',
36 | -db => 'protein',
37 | -id => \@ids);
38 |
39 | open (OUT, ">".$ARGV[1]);
40 | while (my $ds = $factory->next_DocSum) {
41 | my $id=$ds->get_id;
42 | print OUT $id."\t";
43 | # flattened mode
44 | while (my $item = $ds->next_Item('flattened')) {
45 | # not all Items have content, so need to check...
46 | if ($item->get_content){
47 | my $name= $item->get_name;
48 | my $content= $item->get_content;
49 | print OUT $name."\t".$content;
50 | }
51 | }
52 | print OUT "\n";
53 | }
54 |
55 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Michigan Geomicrobiology Lab
2 |
3 | Welcome to the GitHub Repo for some general purpose NGS, Data analysis and
4 | mining scripts used in the lab. Some scripts implement our short-read QC,
5 | assembly, binning, etc. pipeline and depend on the presence of a number of
6 | third-party software. The rest are Bash scripts or in core
7 | [Perl](http://www.perl.org/ "Perl Home") or [Python](https://www.python.org/
8 | "Python Home"). This means, if you have Perl or Python 3 installed, you won't
9 | need anything else to work with these scripts.
10 |
11 | Since these scripts are actively being used by the Lab, you can expect full
12 | support for any [issues](https://github.com/Geo-omics/scripts/issues "Report an
13 | issue"). Please do let us know if you find any bugs or easier/quicker/more
14 | elegant solutions.
15 |
16 |
17 | ## Language and OS Dependencies
18 |
19 | The scripts should work with variuos flavors of Linux and other unix-like
20 | environments. Here is a list of easy to install languages that you'll need:
21 |
22 | * Perl version 5.10 +
23 | * Python version 3.5 +
24 | * R version 3 +
25 |
26 | ## Contact
27 |
28 | Please send questions or comments to .
29 |
30 | ## Principal Investigator
31 |
32 | [Gregory J. Dick](https://sites.lsa.umich.edu/geomicro/ "Geomicrobiology Lab Homepage"), gdick [AT] umich [DOT] edu
33 |
34 |
35 | ## License
36 |
37 | Geo-omics-scripts is free software: you can redistribute it and/or modify it
38 | under the terms of the GNU General Public License as published by the Free
39 | Software Foundation, either version 3 of the License, or (at your option) any
40 | later version.
41 |
42 |
43 | ## Disclaimer
44 |
45 | **Geo-omics scripts are distributed in the hope that they will be useful, but
46 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
47 | FITNESS FOR A PARTICULAR PURPOSE.**
48 |
--------------------------------------------------------------------------------
/scripts/removeBlastSubj:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | use strict;
22 |
23 | my $in=$ARGV[0];
24 | my $blastOut= $ARGV[1];
25 | my $out= $$.".QueriesFromListRemoved.out";
26 | my $list= $$.".QueriesFromList.out";
27 |
28 | my %exclude;
29 | open(LIST, $in)|| die $!;
30 | while (my $line=){
31 | next if ($line=~ m/^#/);
32 | chomp ($line);
33 | next unless ($line);
34 | $line=~ s/ //g;
35 | $line=~ s/\r//g;
36 | $line=lc($line);
37 | $exclude{$line}++;
38 | }
39 | close LIST;
40 |
41 | print keys(%exclude)."\n";
42 |
43 | open(BOUT, $blastOut) || die $!;
44 | open(OUT, ">".$out);
45 | open(OUT2, ">".$list);
46 | my $count=0;
47 | while (my $line= ){
48 | next if ($line=~ m/^#/);
49 | chomp ($line);
50 | next unless ($line);
51 |
52 | my ($query, $subj, @etc)=split(/\t/, $line);
53 | chomp($query, $subj);
54 | $subj=~ s/ //g;
55 |
56 | $subj=lc($subj);
57 | if ($exclude{$subj}){
58 | $count++;
59 | print OUT2 $line."\n";
60 | }
61 | else{
62 | print OUT $line."\n";
63 | }
64 | }
65 | print "Matches Found:".$count."\n";
66 | close BOUT;
67 | close OUT;
68 |
--------------------------------------------------------------------------------
/scripts/createFastq:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | use warnings;
22 | use strict;
23 | use File::Basename;
24 |
25 | my $inFasta = $ARGV[0];
26 | my $baseName = basename($inFasta, qw/.fasta .fna/);
27 | my $inQual = $baseName . ".qual";
28 | my $outFastq = $baseName . ".fastq";
29 |
30 | my %seqs;
31 |
32 | $/ = ">";
33 |
34 | open (FASTA, "<$inFasta");
35 | my $junk = ();
36 |
37 | while (my $frecord = ) {
38 | chomp $frecord;
39 | my ($fdef, @seqLines) = split /\n/, $frecord;
40 | my $seq = join '', @seqLines;
41 | $seqs{$fdef} = $seq;
42 | }
43 |
44 | close FASTA;
45 |
46 | open (QUAL, "<$inQual");
47 | $junk = ;
48 | open (FASTQ, ">$outFastq");
49 |
50 | while (my $qrecord = ) {
51 | chomp $qrecord;
52 | my ($qdef, @qualLines) = split /\n/, $qrecord;
53 | my $qualString = join ' ', @qualLines;
54 | my @quals = split / /, $qualString;
55 | print FASTQ "@","$qdef\n";
56 | print FASTQ "$seqs{$qdef}\n";
57 | print FASTQ "+\n";
58 | foreach my $qual (@quals) {
59 | print FASTQ chr($qual + 33);
60 | }
61 | print FASTQ "\n";
62 | }
63 |
64 | close QUAL;
65 | close FASTQ;
66 |
--------------------------------------------------------------------------------
/scripts/tally-weave:
--------------------------------------------------------------------------------
1 | #!/usr/bin/perl
2 |
3 | # Copyright 2013, 2014, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | use strict;
21 | use Getopt::Long;
22 | use File::Basename;
23 |
24 | my $ext="tally";
25 | my $out;
26 | GetOptions(
27 | 'e:s'=>\$ext,
28 | 'o:s'=>\$out,
29 | );
30 |
31 | my @DBs;
32 | my @files=glob("*.".$ext);
33 | open(OUT, ">".$out);
34 | print OUT "#Transcripts\t";
35 | my %master;
36 | print @files." Files will be tallied...!\n";
37 | foreach my $f(@files){
38 | my $dbName=basename($f,"\.$ext"); #split(/\_/, $f);
39 | push(@DBs, $dbName);
40 | print OUT $dbName."\t";
41 | my $fh;
42 | open($fh, $f) || die "[error] $f: $! \n";
43 | while (my $line=<$fh>){
44 | next if ($line=~ m/^#/);
45 | chomp $line;
46 | $line=~ s/\r//g;
47 | next unless $line;
48 |
49 | my @cols=split(/\t/, $line);
50 | $master{$cols[0]}{$dbName}=$cols[1];
51 | }
52 | close $fh;
53 | }
54 | print OUT "DB-presence\n";
55 |
56 | foreach my $key(keys %master){
57 | print OUT $key."\t";
58 | my $total=0;
59 | foreach my $db(@DBs){
60 | my $v;
61 | if($master{$key}{$db}){$v = $master{$key}{$db}}
62 | else{$v=0}
63 | print OUT $v."\t";
64 | $total++ if($v != 0);
65 | }
66 | print OUT $total."\n";
67 | }
68 |
--------------------------------------------------------------------------------
/scripts/fixpod6:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | """
21 | Improve SEE ALSO section to POD in perl scripts
22 |
23 | Note: Makes assumptions about input, little error checking
24 | """
25 | import argparse
26 | import re
27 | import sys
28 |
29 |
30 | argp = argparse.ArgumentParser(description=__doc__)
31 | argp.add_argument('inputfile', type=argparse.FileType())
32 | argp.add_argument('-w', '--write-to-file', action='store_true')
33 |
34 | args = argp.parse_args()
35 |
36 | out = ''
37 | in_see_also = False
38 | ref_count = 1
39 |
40 | for line in args.inputfile:
41 | if not in_see_also and line.startswith('=head1 SEE ALSO'):
42 | in_see_also = True
43 | if in_see_also and line.startswith('=cut'):
44 | in_see_also = False
45 |
46 | if in_see_also:
47 | line = re.sub(r'^=head3', '=head2', line)
48 | if line.startswith('=item'):
49 | line = '=item [{}]\n'.format(ref_count)
50 | ref_count += 1
51 |
52 | out += line
53 |
54 | # write output
55 | if args.write_to_file:
56 | args.inputfile.close()
57 | outfile = open(args.inputfile.name, 'w')
58 | else:
59 | outfile = sys.stdout
60 |
61 | outfile.write(out)
62 |
--------------------------------------------------------------------------------
/scripts/getMyContigs:
--------------------------------------------------------------------------------
1 | #! /usr/bin/perl
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | # USAGE: perl getMyContigs.pl
22 |
23 | use strict;
24 |
25 | my $readCov=$ARGV[0];
26 | my $list=$ARGV[1];
27 | my $OUT=$ARGV[2];
28 |
29 | die "Incorrect number of files input\nUSAGE: perl getMyContigs.pl " if (scalar(@ARGV) != 3);
30 |
31 | open(LIST, $list)|| $!;
32 | my %LIST;
33 | while(my $line=){
34 | chomp;
35 | next unless $line;
36 | next if $line=~ /^#/;
37 | # NODE_14_length_2679_cov_8.406121
38 | my @headerParts=split(/\_/, $line);
39 | $LIST{$headerParts[1]}++;
40 | }
41 | close LIST;
42 |
43 | my %READS;
44 | open(READ, $readCov)|| $!;
45 | while(my $line=){
46 | next if $line=~ /^#/;
47 | chomp $line;
48 | next unless $line;
49 |
50 | my ($contigName, $size, @reads)=split(/\t/, $line);
51 |
52 | next unless $LIST{$contigName};
53 | foreach my $r(@reads){
54 | $READS{$r}++;
55 | }
56 | }
57 | close READ;
58 | undef %LIST;
59 |
60 | print "Total # Reads Mapped to this bin:".keys(%READS)."\n";
61 | open(OUT, ">".$OUT)|| die $!;
62 | foreach my $r(keys %READS){
63 | print OUT $r."\n";
64 | }
65 | close OUT;
66 |
--------------------------------------------------------------------------------
/bash-completion/omics:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Regents of The University of Michigan.
2 |
3 | # This file is part of geo-omics-scripts.
4 |
5 | # Geo-omics-scripts is free software: you can redistribute it and/or
6 | # modify it under the terms of the GNU General Public License as published
7 | # by the Free Software Foundation, either version 3 of the License, or (at
8 | # your option) any later version.
9 |
10 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
11 | # WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | # General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License along
16 | # with Geo-omics-scripts. If not, see .
17 |
18 | _omics_completion()
19 | {
20 | # suppress stderr while running python unless we are debugging
21 | local hide_stderr=true
22 | [[ -v OMICS_AUTO_COMPLETE_DEBUG ]] && [[ -n "${OMICS_AUTO_COMPLETE_DEBUG}" ]] && hide_stderr=false
23 | $hide_stderr && exec {stderr}>&2 2>/dev/null
24 |
25 | local -a reply
26 | local do_file_completion
27 | if reply=(
28 | # call omics executable
29 | $(OMICS_AUTO_COMPLETE="$COMP_CWORD" "${COMP_WORDS[@]}")
30 | ); then
31 | # add file completion if requested, marker must be last
32 | if [[ ${#reply[@]} -eq 0 ]]; then
33 | do_file_completion=true
34 | elif [[ ${reply[-1]} == FILE_COMPLETION ]]; then
35 | unset reply[-1]
36 | do_file_completion=true
37 | else
38 | # normal, non-empty reply
39 | do_file_completion=false
40 | fi
41 | else
42 | # fall back to file completion
43 | do_file_completion=true
44 | fi
45 |
46 | # restore stderr as needed
47 | $hide_stderr && exec 2>&$stderr-
48 |
49 | $do_file_completion && reply+=( $(compgen -f -- "${COMP_WORDS[COMP_CWORD]}" ) )
50 |
51 | COMPREPLY=("${reply[@]}")
52 | }
53 | complete -F _omics_completion omics
54 |
--------------------------------------------------------------------------------
/docs/unchop-contigs.txt:
--------------------------------------------------------------------------------
1 | .. program:: unchop-contigs
2 |
3 | =================================================================
4 | unchop-contigs - Stitch together chopped up contigs after binning
5 | =================================================================
6 |
7 | Synopsis
8 | ========
9 |
10 | :program:`unchop-contigs` [-h] [-i [BACKUP_SUFFIX] | -o OUT_DIR] [-v] [input [input ...]]
11 |
12 |
13 | Description
14 | ===========
15 |
16 | The CONCOCT binner recommends to chop long contigs into even length chunks to
17 | reduce bias related to varying contigs sizes. This script glues them back
18 | together for downstream analysis of bins.
19 |
20 | It is assumed that there is one fasta file per bin and that the fasta headers
21 | consist of the original contig id followed by a dotand a decimal chunk number.
22 | For example if a bin has three contig chunks named::
23 |
24 | k141_531759.0
25 | k141_531759.1
26 | k141_531759.2
27 |
28 | they will be replaced by a single contig called::
29 |
30 | k141_531759.0-2
31 |
32 | Contigs that do not have chunk information will be left alone. However contigs
33 | will be sorted by contig id and numerical chunk number. A consequence is that
34 | applying unchop-contig a second time may change the order of some contigs.
35 |
36 |
37 | Options
38 | =======
39 |
40 | .. option:: input
41 |
42 | List of directories or fasta files. The default is to take the
43 | current diorectory.
44 |
45 | .. option:: -h, --help
46 |
47 | show this help message and exit
48 |
49 | .. option:: -i [BACKUP_SUFFIX], --in-place [BACKUP_SUFFIX]
50 |
51 | Replace input file. If provided, backup of each file is made using
52 | the provided suffix.
53 |
54 | .. option:: -o OUT_DIR, --out-dir OUT_DIR
55 |
56 | Output directory. The default is the current directory.
57 |
58 | .. option:: -v, --verbose
59 |
60 | Print diagnostic output.
61 |
62 |
63 | Exit Status
64 | ===========
65 |
66 | Exits with non-zero upon encountering an error.
67 |
68 | .. only:: man
69 |
70 | See Also
71 | ========
72 |
73 | :manpage:`omics(7)`,
74 | :manpage:`illumina-reads-processing(7)`
75 |
76 |
--------------------------------------------------------------------------------
/lib/omics/_version.py:
--------------------------------------------------------------------------------
1 | # Copyright 2019 Regents of The University of Michigan.
2 |
3 | # This file is part of geo-omics-scripts.
4 |
5 | # Geo-omics-scripts is free software: you can redistribute it and/or
6 | # modify it under the terms of the GNU General Public License as published
7 | # by the Free Software Foundation, either version 3 of the License, or (at
8 | # your option) any later version.
9 |
10 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
11 | # WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | # General Public License for more details.
14 |
15 | # You should have received a copy of the GNU General Public License along
16 | # with Geo-omics-scripts. If not, see .
17 |
18 | import os.path
19 | import subprocess
20 |
21 | # Set to real version when distribute outside of git vcs
22 | VERSION = None
23 |
24 |
25 | def get_version(version=VERSION, raise_on_error=False):
26 | """
27 | Get the version string
28 |
29 | Get the hard-coded version if possible, then fall back to ask git. If that
30 | fails raise an exeception or return an 'unknown' depending on the
31 | raise_on_error flag.
32 | """
33 | if version is not None:
34 | return version
35 |
36 | try:
37 | p = subprocess.run(
38 | ['git', 'describe'],
39 | cwd=os.path.dirname(__file__),
40 | stdout=subprocess.PIPE,
41 | stderr=subprocess.PIPE,
42 | check=raise_on_error,
43 | )
44 | except Exception as e:
45 | out = e.stdout.decode()
46 | err = e.stderr.decode()
47 | raise RuntimeError(
48 | 'Failed to get version info from git: {}: {}\n{}{}'
49 | ''.format(e.__class__.__name__, e, out, err))
50 | else:
51 | version = p.stdout.decode().strip()
52 | # version should be like 1.0.134-42-gd3adb33f
53 | # make this a PEP440 local version like 1.0.134+42-gd3adb33f
54 | version = version.replace('-', '+', 1)
55 | if version:
56 | return version
57 | return 'unknown'
58 |
--------------------------------------------------------------------------------
/scripts/rgi-setup:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright 2019 Regents of The University of Michigan.
3 |
4 | # This file is part of geo-omics-scripts.
5 |
6 | # Geo-omics-scripts is free software: you can redistribute it and/or
7 | # modify it under the terms of the GNU General Public License as published
8 | # by the Free Software Foundation, either version 3 of the License, or (at
9 | # your option) any later version.
10 |
11 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
12 | # WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 | # General Public License for more details.
15 |
16 | # You should have received a copy of the GNU General Public License along
17 | # with Geo-omics-scripts. If not, see .
18 |
19 | ###########################################
20 | # This script loads the CARD reference data
21 | # and prepares a directory from within you
22 | # can run `rgi main`
23 | # The commands were adatped from the README
24 | # of the RGI software.
25 | ###########################################
26 | set -euo pipefail
27 |
28 | trap 'echo "error at line $LINENO, exit status $?"' ERR
29 | card=./card.json
30 |
31 | # Load CARD reference data
32 |
33 | if [[ ! -e $card ]]; then
34 | wget https://card.mcmaster.ca/latest/data
35 | tar -xvf data $card
36 | rm data
37 |
38 | fi
39 |
40 | rgi load --card_json $card --local
41 | rgi card_annotation -i $card > card_annotation.log 2>&1
42 | [[ $(ls card_database_v*.fasta) =~ card_database_v(.*).fasta ]]
43 | version=${BASH_REMATCH[1]}
44 | echo "[INFO] version parsed: $version"
45 | rgi load -i $card --card_annotation card_database_v"$version".fasta --local
46 |
47 | wget -O wildcard_data.tar.bz2 https://card.mcmaster.ca/latest/variants
48 | mkdir -p wildcard
49 | tar -xvf wildcard_data.tar.bz2 -C wildcard
50 | rm wildcard_data.tar.bz2
51 | gunzip wildcard/*.gz
52 |
53 | rgi wildcard_annotation -i wildcard --card_json $card -v "$version" > wildcard_annotation.log 2>&1
54 | rgi load --wildcard_annotation wildcard_database_v"$version".fasta --wildcard_index wildcard/index-for-model-sequences.txt --card_annotation card_database_v"$version".fasta --local
55 |
56 | # check db version
57 | rgi database --version --local
58 |
--------------------------------------------------------------------------------
/scripts/fixpod2:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Copyright 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 | """
21 | Fix additional stuff in POD in perl scripts
22 |
23 | Note: Makes assumptions about input, little error checking
24 | """
25 | import argparse
26 | import re
27 | import sys
28 |
29 |
30 | argp = argparse.ArgumentParser(description=__doc__)
31 | argp.add_argument('inputfile', type=argparse.FileType())
32 | argp.add_argument('-w', '--write-to-file', action='store_true')
33 |
34 | args = argp.parse_args()
35 |
36 | out = ''
37 |
38 | for line in args.inputfile:
39 | if line.startswith('=item '):
40 | item_line = line.strip()
41 |
42 | empty_line = args.inputfile.readline() # empty line following =item
43 | if empty_line.strip():
44 | raise RuntimeError('non-empty line following =head')
45 |
46 | descr = args.inputfile.readline()
47 | m = re.match(r'^(or|OR)\s*-(?P\w+)\s*:?\s*(?P.*)$', descr)
48 | if m is None:
49 | # remove any leading ':\t+' from description
50 | descr = re.sub('^:\s+', '', descr.strip())
51 | else:
52 | # Fix options with 'or' between long and short option name
53 | _, opt, descr = m.groups()
54 | item_line = item_line + ', B<-{}>'.format(opt)
55 |
56 | out += item_line + '\n\n' + descr + '\n'
57 | else:
58 | out += line
59 |
60 | # write output
61 | if args.write_to_file:
62 | args.inputfile.close()
63 | outfile = open(args.inputfile.name, 'w')
64 | else:
65 | outfile = sys.stdout
66 |
67 | outfile.write(out)
68 |
--------------------------------------------------------------------------------
/docs/binning.txt:
--------------------------------------------------------------------------------
1 | .. program:: omics binning
2 |
3 | ====================================
4 | binning - bin metagenomic assemblies
5 | ====================================
6 |
7 | Synopsis
8 | ========
9 |
10 | :program:`omics binning` [OPTIONS]...
11 |
12 |
13 | Description
14 | ===========
15 |
16 | The :program:`omics binning` script implements the binning step for the Geomicro
17 | Illumina Reads Pipeline.
18 |
19 | Options
20 | =======
21 |
22 | .. option:: -a, --assembly FILE
23 |
24 | Fasta-formatted file containing the assembled contigs, by default this is
25 | :file:`contigs.fa`
26 |
27 | .. option:: -c, --coverage-file FILE
28 |
29 | Merged/shared per-sample-contig mean coverage file. If this option is not
30 | present, then this file will be compiled from the files found with the
31 | --coverage-path argument.
32 |
33 | .. option:: --coverage-path PATH
34 |
35 | Path to coverage files relative to each sample directory; the default is
36 | :file:`MAPPING/assembly.chop.genomeCovBed.tsv`. These files are made by the
37 | mapping script and correspond to the :file:`asm_pair-smds.bam` files made by
38 | CONCOCTs :program:`map-bowtie2-markduplicates.sh`. This option is
39 | incopmpatible with the -c option.
40 |
41 | .. option:: --force
42 |
43 | Overwrite existing data
44 |
45 | .. option:: -o, --out-dir PATH
46 |
47 | Path to output directory, by default this is :file:`BINNING`
48 |
49 | .. option:: --working-dir=DIR
50 |
51 | Directory under which output is stored. By default this is the current
52 | directory.
53 |
54 | .. option:: -h, --help
55 |
56 | Print help.
57 |
58 | .. option:: --no-color
59 |
60 | Disable colorful output.
61 |
62 | .. option:: -v, --verbosity=N
63 |
64 | Use one or multiple ``-v`` to increase verbosity of output or set a
65 | level of verbosity with ``--verbosity=N``. By default the verbosity
66 | level is 1. Setting verbosity to 0 silences the program. A level of
67 | 3 prints debugging info.
68 |
69 |
70 | Exit Status
71 | ===========
72 |
73 | Exits with non-zero upon encountering an error.
74 |
75 | .. only:: man
76 |
77 | See Also
78 | ========
79 |
80 | :manpage:`omics-prep(1)`, :manpage:`omics-qc(1)`, :manpage:`omics-assemble(1)`,
81 | :manpage:`omics-mapping(1)`, :manpage:`omics(7)`,
82 | :manpage:`illumina-reads-processing(7)`
83 |
84 |
--------------------------------------------------------------------------------
/docs/run.txt:
--------------------------------------------------------------------------------
1 | .. program:: omics run
2 |
3 | ========================================
4 | run - run command inside omics container
5 | ========================================
6 |
7 |
8 | Synopsis
9 | ========
10 |
11 | :program:`omics run` [OPTIONS...] [``--``] COMMAND...
12 |
13 |
14 | Description
15 | ===========
16 |
17 | This is a wrapper around :command:`singularity run`. It runs the given command
18 | inside the omics container environment. if :program:`omics run` is called from
19 | a shell then command may need to be protected from the shell with single or
20 | double quotes as needed. The container will provide a clean environment, so if
21 | environment variables are needed inside, e.g. ``SOMEVAR=foobar``, then set instead
22 | ``SINGULARITYENV_SOMEVAR=foobar``, and the variable will be set with the
23 | ``SINGULARITYENV_`` prefix stripped.
24 |
25 | Options
26 | =======
27 |
28 | Any options given to this script must be separated from the COMMAND
29 | by a double dash, otherwise the COMMAND will be interpreted as
30 | options with unintended consequences likely.
31 |
32 | .. option:: -i PATH, --container-image PATH
33 |
34 | Full path to singularity container image. A default is provided if this
35 | option is missing.
36 |
37 | .. option:: -s option, --singularity option
38 |
39 | Options passed on to :command:`singularity`. For instance, to additionally mount a
40 | path ``/some/path``, add ``--singularity "-B /some/path"`` and the option
41 | ``-B /some/path`` will be appended to the call to :command:`singularity run`. The
42 | empty space between the option and its parameter must be protected by
43 | quotes to prevent premature word splitting by the shell. Run
44 | :command:`singularity run --help` to see what options are supported.
45 |
46 | .. option:: --working-dir PATH
47 |
48 | Set the working directory for the command
49 |
50 | .. option:: -h, --help
51 |
52 | Print help.
53 |
54 | .. option:: --no-color
55 |
56 | Disable colorful terminal output
57 |
58 | .. option:: -v, --verbosity N
59 |
60 | Set verbosity level or use one or multiple :option:`-v` to increase verbosity of output.
61 |
62 |
63 | Exit Status
64 | ===========
65 |
66 | Exits with non-zero upon encountering an error.
67 |
68 | .. only:: man
69 |
70 | See Also
71 | ========
72 |
73 | :manpage:`omics-prep(1)`, :manpage:`omics(7)`, :manpage:`illumina-reads-processing(7)`, :manpage:`singularity(1)`
74 |
--------------------------------------------------------------------------------
/scripts/U2T:
--------------------------------------------------------------------------------
1 | #! /usr/bin/perl
2 |
3 | # Copyright 2013, 2015, 2019 Regents of The University of Michigan.
4 |
5 | # This file is part of geo-omics-scripts.
6 |
7 | # Geo-omics-scripts is free software: you can redistribute it and/or
8 | # modify it under the terms of the GNU General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or (at
10 | # your option) any later version.
11 |
12 | # Geo-omics-scripts is distributed in the hope that it will be useful, but
13 | # WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # General Public License for more details.
16 |
17 | # You should have received a copy of the GNU General Public License along
18 | # with Geo-omics-scripts. If not, see .
19 |
20 |
21 | use strict;
22 | use Getopt::Long;
23 |
24 | =head1 NAME
25 |
26 | U2T - Converts U -> T and removes gaps
27 |
28 |
29 | =head1 SYNOPSIS
30 |
31 | B B<-in> I B<-out> I