├── .gitattributes
├── .github
    ├── CONTRIBUTING.md
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── Dockerfile
├── LICENSE
├── README.md
├── Singularity
├── assets
    ├── email_template.html
    ├── email_template.txt
    └── sendmail_template.txt
├── bin
    ├── markdown_to_html.r
    └── scrape_software_versions.py
├── conf
    ├── awsbatch.config
    ├── base.config
    ├── binac.config
    ├── genomes.config
    ├── multiqc_config.yaml
    └── test.config
├── docs
    ├── README.md
    ├── configuration
    │   └── adding_your_own.md
    ├── images
    │   ├── deepvariant_logo.png
    │   └── deepvariant_logo.svg
    ├── installation.md
    ├── output.md
    ├── troubleshooting.md
    └── usage.md
├── environment.yml
├── main.nf
├── nextflow.config
└── pics
    └── pic_workflow.jpg


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.config linguist-language=nextflow
2 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # nf-core/deepvariant: Contributing Guidelines
 2 | 
 3 | Hi there! Many thanks for taking an interest in improving nf-core/deepvariant.
 4 | 
 5 | We try to manage the required tasks for nf-core/deepvariant using GitHub issues, you probably came to this page when creating one. Please use the pre-filled template to save time.
 6 | 
 7 | However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;)
 8 | 
 9 | > If you need help using or modifying nf-core/deepvariant then the best place to go is the Gitter chatroom where you can ask us questions directly: https://gitter.im/nf-core/Lobby
10 | 
11 | ## Contribution workflow
12 | 
13 | If you'd like to write some code for nf-core/deepvariant, the standard workflow
14 | is as follows:
15 | 
16 | 1. Check that there isn't already an issue about your idea in the
17 |    [nf-core/deepvariant issues](https://github.com/nf-core/deepvariant/issues) to avoid
18 |    duplicating work.
19 |    - Feel free to add a new issue here for the same reason.
20 | 2. Fork the [nf-core/deepvariant repository](https://github.com/nf-core/deepvariant) to your GitHub account
21 | 3. Make the necessary changes / additions within your forked repository
22 | 4. Submit a Pull Request against the master branch and wait for the code to be reviewed and merged.
23 | 
24 | If you're not used to this workflow with git, you can start with some [basic docs from GitHub](https://help.github.com/articles/fork-a-repo/) or even their [excellent interactive tutorial](https://try.github.io/).
25 | 
26 |     * If there isn't one already, please create one so that others know you're working on this
27 | 
28 | 2. Fork the [nf-core/deepvariant repository](https://github.com/nf-core/deepvariant) to your GitHub account
29 | 3. Make the necessary changes / additions within your forked repository
30 | 4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged.
31 | 
32 | If you're not used to this workflow with git, you can start with some [basic docs from GitHub](https://help.github.com/articles/fork-a-repo/) or even their [excellent interactive tutorial](https://try.github.io/).
33 | 
34 | ## Tests
35 | 
36 | When you create a pull request with changes, [Travis CI](https://travis-ci.org/) will run automatic tests.
37 | Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then.
38 | 
39 | There are typically two types of tests that run:
40 | 
41 | ### Lint Tests
42 | 
43 | The nf-core has a [set of guidelines](http://nf-co.re/developer_docs) which all pipelines must adhere to.
44 | To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint <pipeline-directory>` command.
45 | 
46 | If any failures or warnings are encountered, please follow the listed URL for more documentation.
47 | 
48 | ### Pipeline Tests
49 | 
50 | Each nf-core pipeline should be set up with a minimal set of test-data.
51 | Travis CI then runs the pipeline on this data to ensure that it exists successfully.
52 | If there are any failures then the automated tests fail.
53 | These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code.
54 | 
55 | ## Getting help
56 | 
57 | For further information/help, please consult the [nf-core/deepvariant documentation](https://github.com/nf-core/deepvariant#documentation) and don't hesitate to get in touch on [Gitter](https://gitter.im/nf-core/Lobby)
58 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | Hi there!
 2 | 
 3 | Thanks for telling us about a problem with the pipeline. Please delete this text and anything that's not relevant from the template below:
 4 | 
 5 | #### Describe the bug
 6 | A clear and concise description of what the bug is.
 7 | 
 8 | #### Steps to reproduce
 9 | Steps to reproduce the behaviour:
10 | 1. Command line: `nextflow run ...`
11 | 2. See error: _Please provide your error message_
12 | 
13 | #### Expected behaviour
14 | A clear and concise description of what you expected to happen.
15 | 
16 | #### System:
17 |  - Hardware: [e.g. HPC, Desktop, Cloud...]
18 |  - Executor: [e.g. slurm, local, awsbatch...]
19 |  - OS: [e.g. CentOS Linux, macOS, Linux Mint...]
20 |  - Version [e.g. 7, 10.13.6, 18.3...]
21 | 
22 | #### Nextflow Installation:
23 |  - Version: [e.g. 0.31.0]
24 | 
25 | #### Container engine:
26 |  - Engine: [e.g. Conda, Docker or Singularity]
27 |  - version: [e.g. 1.0.0]
28 |  - Image tag: [e.g. nfcore/deepvariant:1.0.0]
29 | 
30 | #### Additional context
31 | Add any other context about the problem here.
32 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | Hi there!
 2 | 
 3 | Thanks for suggesting a new feature for the pipeline! Please delete this text and anything that's not relevant from the template below:
 4 | 
 5 | #### Is your feature request related to a problem? Please describe.
 6 | A clear and concise description of what the problem is.
 7 | Ex. I'm always frustrated when [...]
 8 | 
 9 | #### Describe the solution you'd like
10 | A clear and concise description of what you want to happen.
11 | 
12 | #### Describe alternatives you've considered
13 | A clear and concise description of any alternative solutions or features you've considered.
14 | 
15 | #### Additional context
16 | Add any other context about the feature request here.
17 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Many thanks to contributing to nf-core/deepvariant!
 2 | 
 3 | Please fill in the appropriate checklist below (delete whatever is not relevant). These are the most common things requested on pull requests (PRs).
 4 | 
 5 | ## PR checklist
 6 |  - [ ] This comment contains a description of changes (with reason)
 7 |  - [ ] If you've fixed a bug or added code that should be tested, add tests!
 8 |  - [ ] If necessary, also make a PR on the [nf-core/deepvariant branch on the nf-core/test-datasets repo]( https://github.com/nf-core/test-datasets/pull/new/nf-core/deepvariant)
 9 |  - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`).
10 |  - [ ] Make sure your code lints (`nf-core lint .`).
11 |  - [ ] Documentation in `docs` is updated
12 |  - [ ] `CHANGELOG.md` is updated
13 |  - [ ] `README.md` is updated
14 | 
15 | **Learn more about contributing:** https://github.com/nf-core/deepvariant/tree/master/.github/CONTRIBUTING.md
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .nextflow*
 2 | work/
 3 | data/
 4 | results/
 5 | .DS_Store
 6 | tests/test_data
 7 | sampleDerivatives/
 8 | results/
 9 | *.pyc
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: required
 2 | language: python
 3 | jdk: openjdk8
 4 | services: docker
 5 | python: "3.6"
 6 | cache: pip
 7 | matrix:
 8 |   fast_finish: true
 9 | 
10 | before_install:
11 |   # PRs to master are only ok if coming from dev branch
12 |   - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])'
13 |   # Pull the docker image first so the test doesn't wait for this
14 |   - docker pull nfcore/deepvariant
15 |   # Fake the tag locally so that the pipeline runs properly
16 |   - docker tag nfcore/deepvariant nfcore/deepvariant:1.0
17 | 
18 | install:
19 |   # Install Nextflow
20 |   - mkdir /tmp/nextflow && cd /tmp/nextflow
21 |   - wget -qO- get.nextflow.io | bash
22 |   - sudo ln -s /tmp/nextflow/nextflow /usr/local/bin/nextflow
23 |   # Install nf-core/tools
24 |   - pip install nf-core
25 |   # Reset
26 |   - mkdir ${TRAVIS_BUILD_DIR}/tests && cd ${TRAVIS_BUILD_DIR}/tests
27 | 
28 | env:
29 |   - NXF_VER='18.10.1' # Specify a minimum NF version that should be tested and work
30 |   - NXF_VER='' # Plus: get the latest NF version and check that it works
31 | 
32 | script:
33 |   # Lint the pipeline code
34 |   - nf-core lint ${TRAVIS_BUILD_DIR}
35 |   # Run the pipeline with the test profile
36 |   - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker
37 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # nf-core/deepvariant: Changelog
 2 | 
 3 | ## v1.0 - 2018-11-19
 4 | 
 5 | This release marks the point where the pipeline was moved from lifebit-ai/DeepVariant over to the new nf-core community, at nf-core/DeepVariant. The [nf-core](http://nf-co.re/) template was used to help ensure that the pipeline meets the standards of nf-core.
 6 | 
 7 | In summary, the main changes are:
 8 | 
 9 | - Rebranding and renaming throughout the pipeline to nf-core
10 | - Updating many parts of the pipeline config and style to meet nf-core standards
11 | - Continuous integration tests with Travis CI
12 | - Dependencies installed via conda
13 | - Added support for BAM input as file, not just a folder
14 | - Added channels to process input files
15 | - Added separate processes for each of the steps in FASTA file preprocessing
16 | - Use of genomes config to specify relevant reference genome files similar to igenomes
17 | - Added BAM size dependent setting of memory
18 | - Slightly improved documentation
19 | 
20 | ...and many more minor tweaks.
21 | 
22 | Thanks to everyone who has worked on this release!
23 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on the [Gitter channel](https://gitter.im/nf-core/Lobby). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nfcore/base
2 | LABEL authors="phil@lifebit.ai" \
3 |       description="Docker image containing all requirements for nf-core/deepvariant pipeline"
4 | 
5 | COPY environment.yml /
6 | RUN conda env create -f /environment.yml && conda clean -a
7 | ENV PATH /opt/conda/envs/nf-core-deepvariant-1.0/bin:$PATH
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![deepvariant](https://raw.githubusercontent.com/nf-core/deepvariant/master/docs/images/deepvariant_logo.png)
 2 | 
 3 | # nf-core/deepvariant
 4 | 
 5 | **Deep Variant as a Nextflow pipeline**
 6 | 
 7 | [![Build Status](https://travis-ci.org/nf-core/deepvariant.svg?branch=master)](https://travis-ci.org/nf-core/deepvariant)
 8 | [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A518.10.1-brightgreen.svg)](https://www.nextflow.io/)
 9 | [![Gitter](https://img.shields.io/badge/gitter-%20join%20chat%20%E2%86%92-4fb99a.svg)](https://gitter.im/nf-core/Lobby)
10 | 
11 | [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/)
12 | [![Docker](https://img.shields.io/docker/automated/nfcore/deepvariant.svg)](https://hub.docker.com/r/nfcore/deepvariant)
13 | ![Singularity Container available](https://img.shields.io/badge/singularity-available-7E4C74.svg)
14 | 
15 | A Nextflow pipeline for running the [Google DeepVariant variant caller](https://github.com/google/deepvariant).
16 | 
17 | ## What is DeepVariant and why in Nextflow?
18 | 
19 | The Google Brain Team in December 2017 released a [Variant Caller](https://www.ebi.ac.uk/training/online/course/human-genetic-variation-i-introduction/variant-identification-and-analysis/what-variant) based on DeepLearning: DeepVariant.
20 | 
21 | In practice, DeepVariant first builds images based on the BAM file, then it uses a DeepLearning image recognition approach to obtain the variants and eventually it converts the output of the prediction in the standard VCF format.
22 | 
23 | DeepVariant as a Nextflow pipeline provides several advantages to the users. It handles automatically, through **preprocessing steps**, the creation of some extra needed indexed and compressed files which are a necessary input for DeepVariant, and which should normally manually be produced by the users.
24 | Variant Calling can be performed at the same time on **multiple BAM files** and thanks to the internal parallelization of Nextflow no resources are wasted.
25 | Nextflow's support of Docker allows to produce the results in a computational reproducible and clean way by running every step inside of a **Docker container**.
26 | 
27 | For more detailed information about Google's DeepVariant please refer to [google/deepvariant](https://github.com/google/deepvariant) or this [blog post](https://research.googleblog.com/2017/12/deepvariant-highly-accurate-genomes.html). <br />
28 | For more information about DeepVariant in Nextflow please refer to this [blog post](https://blog.lifebit.ai/post/deepvariant/?utm_campaign=documentation&utm_source=github&utm_medium=web)
29 | 
30 | ## Quick Start
31 | 
32 | **Warning DeepVariant can be very computationally intensive to run.**
33 | 
34 | To **test** the pipeline you can run:
35 | 
36 | ```bash
37 | nextflow run nf-core/deepvariant -profile test,docker
38 | ```
39 | 
40 | A typical run on **whole genome data** looks like this:
41 | 
42 | ```bash
43 | nextflow run nf-core/deepvariant --genome hg19 --bam yourBamFile --bed yourBedFile -profile standard,docker
44 | ```
45 | 
46 | In this case variants are called on the bam files contained in the testdata directory. The hg19 version of the reference genome is used.
47 | One vcf files is produced and can be found in the folder "results"
48 | 
49 | A typical run on **whole exome data** looks like this:
50 | 
51 | ```bash
52 | nextflow run nf-core/deepvariant --exome --genome hg19 --bam_folder myBamFolder --bed myBedFile -profile standard,docker
53 | ```
54 | 
55 | ## Documentation
56 | 
57 | The nf-core/deepvariant documentation is split into the following files:
58 | 
59 | 1. [Installation](docs/installation.md)
60 | 2. [Running the pipeline](docs/usage.md)
61 | 3. Pipeline configuration
62 |    - [Adding your own system](docs/configuration/adding_your_own.md)
63 |    - [Reference genomes](docs/configuration/reference_genomes.md)
64 | 4. [Output and how to interpret the results](docs/output.md)
65 | 5. [Troubleshooting](docs/troubleshooting.md)
66 | 6. [More about DeepVariant](docs/about.md)
67 | 
68 | ## More about the pipeline
69 | 
70 | As shown in the following picture, the worklow both contains **preprocessing steps** ( light blue ones ) and proper **variant calling steps** ( darker blue ones ).
71 | 
72 | Some input files ar optional and if not given, they will be automatically created for the user during the preprocessing steps. If these are given, the preprocessing steps are skipped. For more information about preprocessing, please refer to the "INPUT PARAMETERS" section.
73 | 
74 | The worklow **accepts one reference genome and multiple BAM files as input**. The variant calling for the several input BAM files will be processed completely indipendently and will produce indipendent VCF result files. The advantage of this approach is that the variant calling of the different BAM files can be parallelized internally by Nextflow and take advantage of all the cores of the machine in order to get the results at the fastest.
75 | 
76 | <p align="center">
77 |   <img src="https://github.com/nf-core/deepvariant/blob/master/pics/pic_workflow.jpg">
78 | </p>
79 | 
80 | ## Credits
81 | 
82 | This pipeline was originally developed at [Lifebit](https://lifebit.ai/?utm_campaign=documentation&utm_source=github&utm_medium=web), by @luisas, to ease and reduce cost for variant calling analyses
83 | 
84 | Many thanks to nf-core and those who have helped out along the way too, including (but not limited to): @ewels, @MaxUlysse, @apeltzer, @sven1103 & @pditommaso
85 | 


--------------------------------------------------------------------------------
/Singularity:
--------------------------------------------------------------------------------
 1 | From:nfcore/base
 2 | Bootstrap:docker
 3 | 
 4 | %labels
 5 |     MAINTAINER Phil Palmer <phil@lifebit.ai>
 6 |     DESCRIPTION Singularity image containing all requirements for the nf-core/deepvariant pipeline
 7 |     VERSION 1.0
 8 | 
 9 | %environment
10 |     PATH=/opt/conda/envs/nf-core-deepvariant-1.0/bin:$PATH
11 |     export PATH
12 | 
13 | %files
14 |     environment.yml /
15 | 
16 | %post
17 |     /opt/conda/bin/conda env create -f /environment.yml
18 |     /opt/conda/bin/conda clean -a
19 | 


--------------------------------------------------------------------------------
/assets/email_template.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 |   <head>
 4 |   <meta charset="utf-8">
 5 |   <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |   <meta name="viewport" content="width=device-width, initial-scale=1">
 7 | 
 8 |   <meta name="description" content="nf-core/deepvariant: Google's DeepVariant variant caller as a Nextflow pipeline">
 9 |   <title>nf-core/deepvariant Pipeline Report</title>
10 | </head>
11 | <body>
12 | <div style="font-family: Helvetica, Arial, sans-serif; padding: 30px; max-width: 800px; margin: 0 auto;">
13 | 
14 | <h1>nf-core/deepvariant v${version}</h1>
15 | <h2>Run Name: $runName</h2>
16 | 
17 | <% if (!success){
18 |     out << """
19 |     <div style="color: #a94442; background-color: #f2dede; border-color: #ebccd1; padding: 15px; margin-bottom: 20px; border: 1px solid transparent; border-radius: 4px;">
20 |         <h4 style="margin-top:0; color: inherit;">nf-core/deepvariant execution completed unsuccessfully!</h4>
21 |         <p>The exit status of the task that caused the workflow execution to fail was: <code>$exitStatus</code>.</p>
22 |         <p>The full error message was:</p>
23 |         <pre style="white-space: pre-wrap; overflow: visible; margin-bottom: 0;">${errorReport}</pre>
24 |     </div>
25 |     """
26 | } else {
27 |     out << """
28 |     <div style="color: #3c763d; background-color: #dff0d8; border-color: #d6e9c6; padding: 15px; margin-bottom: 20px; border: 1px solid transparent; border-radius: 4px;">
29 |         nf-core/deepvariant execution completed successfully!
30 |     </div>
31 |     """
32 | }
33 | %>
34 | 
35 | <p>The workflow was completed at <strong>$dateComplete</strong> (duration: <strong>$duration</strong>)</p>
36 | <p>The command used to launch the workflow was as follows:</p>
37 | <pre style="white-space: pre-wrap; overflow: visible; background-color: #ededed; padding: 15px; border-radius: 4px; margin-bottom:30px;">$commandLine</pre>
38 | 
39 | <h3>Pipeline Configuration:</h3>
40 | <table style="width:100%; max-width:100%; border-spacing: 0; border-collapse: collapse; border:0; margin-bottom: 30px;">
41 |     <tbody style="border-bottom: 1px solid #ddd;">
42 |         <% out << summary.collect{ k,v -> "<tr><th style='text-align:left; padding: 8px 0; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'>$k</th><td style='text-align:left; padding: 8px; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'><pre style='white-space: pre-wrap; overflow: visible;'>$v</pre></td></tr>" }.join("\n") %>
43 |     </tbody>
44 | </table>
45 | 
46 | <p>nf-core/deepvariant</p>
47 | <p><a href="https://github.com/nf-core/deepvariant">https://github.com/nf-core/deepvariant</a></p>
48 | 
49 | </div>
50 | 
51 | </body>
52 | </html>
53 | 


--------------------------------------------------------------------------------
/assets/email_template.txt:
--------------------------------------------------------------------------------
 1 | ========================================
 2 |  nf-core/deepvariant v${version}
 3 | ========================================
 4 | Run Name: $runName
 5 | 
 6 | <% if (success){
 7 |     out << "## nf-core/deepvariant execution completed successfully! ##"
 8 | } else {
 9 |     out << """####################################################
10 | ## nf-core/deepvariant execution completed unsuccessfully! ##
11 | ####################################################
12 | The exit status of the task that caused the workflow execution to fail was: $exitStatus.
13 | The full error message was:
14 | 
15 | ${errorReport}
16 | """
17 | } %>
18 | 
19 | 
20 | <% if (!success){
21 |     out << """####################################################
22 | ## nf-core/deepvariant execution completed unsuccessfully! ##
23 | ####################################################
24 | The exit status of the task that caused the workflow execution to fail was: $exitStatus.
25 | The full error message was:
26 | 
27 | ${errorReport}
28 | """
29 | } else {
30 |     out << "## nf-core/deepvariant execution completed successfully! ##"
31 | }
32 | %>
33 | 
34 | 
35 | 
36 | 
37 | The workflow was completed at $dateComplete (duration: $duration)
38 | 
39 | The command used to launch the workflow was as follows:
40 | 
41 |   $commandLine
42 | 
43 | 
44 | 
45 | Pipeline Configuration:
46 | -----------------------
47 | <% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %>
48 | 
49 | --
50 | nf-core/deepvariant
51 | https://github.com/nf-core/deepvariant
52 | 


--------------------------------------------------------------------------------
/assets/sendmail_template.txt:
--------------------------------------------------------------------------------
 1 | To: $email
 2 | Subject: $subject
 3 | Mime-Version: 1.0
 4 | Content-Type: multipart/related;boundary="nfmimeboundary"
 5 | 
 6 | --nfmimeboundary
 7 | Content-Type: text/html; charset=utf-8
 8 | 
 9 | $email_html
10 | 
11 | --nfmimeboundary--
12 | 


--------------------------------------------------------------------------------
/bin/markdown_to_html.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Command line argument processing
 4 | args = commandArgs(trailingOnly=TRUE)
 5 | if (length(args) < 2) {
 6 |   stop("Usage: markdown_to_html.r <input.md> <output.html>", call.=FALSE)
 7 | }
 8 | markdown_fn <- args[1]
 9 | output_fn <- args[2]
10 | 
11 | # Load / install packages
12 | if (!require("markdown")) {
13 |   install.packages("markdown", dependencies=TRUE, repos='http://cloud.r-project.org/')
14 |   library("markdown")
15 | }
16 | 
17 | base_css_fn <- getOption("markdown.HTML.stylesheet")
18 | base_css <- readChar(base_css_fn, file.info(base_css_fn)$size)
19 | custom_css <-  paste(base_css, "
20 | body {
21 |   padding: 3em;
22 |   margin-right: 350px;
23 |   max-width: 100%;
24 | }
25 | #toc {
26 |   position: fixed;
27 |   right: 20px;
28 |   width: 300px;
29 |   padding-top: 20px;
30 |   overflow: scroll;
31 |   height: calc(100% - 3em - 20px);
32 | }
33 | #toc_header {
34 |   font-size: 1.8em;
35 |   font-weight: bold;
36 | }
37 | #toc > ul {
38 |   padding-left: 0;
39 |   list-style-type: none;
40 | }
41 | #toc > ul ul { padding-left: 20px; }
42 | #toc > ul > li > a { display: none; }
43 | img { max-width: 800px; }
44 | ")
45 | 
46 | markdownToHTML(
47 |   file = markdown_fn,
48 |   output = output_fn,
49 |   stylesheet = custom_css,
50 |   options = c('toc', 'base64_images', 'highlight_code')
51 | )
52 | 


--------------------------------------------------------------------------------
/bin/scrape_software_versions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | from collections import OrderedDict
 4 | import re
 5 | 
 6 | regexes = {
 7 |     'nf-core/deepvariant': ['v_nf_deepvariant.txt', r"(\S+)"],
 8 |     'Nextflow': ['v_nextflow.txt', r"(\S+)"],
 9 |     'DeepVariant': ['v_deepvariant.txt', r"deepvariant-(\S+)-"],
10 |     'Python': ['v_python.txt', r"Python (\S+)"],
11 |     'Pip': ['v_pip.txt', r"pip (\S+)"],
12 |     'Samtools': ['v_samtools.txt', r"samtools (\S+)"],
13 |     'Htslib': ['v_samtools.txt', r"Using htslib (\S+)"],
14 |     'Lbzip2': ['v_lbzip2.txt', r"lbzip2 version (\S+)"],
15 |     'Bzip2': ['v_bzip2.txt', r"bzip2, Version (\S+)"],
16 | }
17 | results = OrderedDict()
18 | results['nf-core/deepvariant'] = '<span style="color:#999999;\">N/A</span>'
19 | results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
20 | results['DeepVariant'] = '<span style="color:#999999;\">N/A</span>'
21 | results['Python'] = '<span style="color:#999999;\">N/A</span>'
22 | results['Pip'] = '<span style="color:#999999;\">N/A</span>'
23 | results['Samtools'] = '<span style="color:#999999;\">N/A</span>'
24 | results['Htslib'] = '<span style="color:#999999;\">N/A</span>'
25 | results['Lbzip2'] = '<span style="color:#999999;\">N/A</span>'
26 | results['Bzip2'] = '<span style="color:#999999;\">N/A</span>'
27 | 
28 | # Search each file using its regex
29 | for k, v in regexes.items():
30 |     with open(v[0]) as x:
31 |         versions = x.read()
32 |         match = re.search(v[1], versions)
33 |         if match:
34 |             results[k] = "v{}".format(match.group(1))
35 | 
36 | # Dump to YAML
37 | print ('''
38 | id: 'nf-core/deepvariant-software-versions'
39 | section_name: 'nf-core/deepvariant Software Versions'
40 | section_href: 'https://github.com/nf-core/deepvariant'
41 | plot_type: 'html'
42 | description: 'are collected at run time from the software output.'
43 | data: |
44 |     <dl class="dl-horizontal">
45 | ''')
46 | for k,v in results.items():
47 |     print("        <dt>{}</dt><dd>{}</dd>".format(k,v))
48 | print ("    </dl>")
49 | 


--------------------------------------------------------------------------------
/conf/awsbatch.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for AWS Batch
 4 |  * -------------------------------------------------
 5 |  * Imported under the 'awsbatch' Nextflow profile in nextflow.config
 6 |  * Uses docker for software depedencies automagically, so not specified here.
 7 |  */
 8 | 
 9 | aws.region = params.awsregion
10 | process.executor = 'awsbatch'
11 | process.queue = params.awsqueue
12 | executor.awscli = '/home/ec2-user/miniconda/bin/aws'
13 | params.tracedir = './'
14 | 
15 | process {
16 |   withName:makeExamples_with_bed {
17 |    cpus = 4
18 |   }
19 |   withName:makeExamples {
20 |    cpus = 4
21 |   }
22 |   withName:call_variants {
23 |    cpus = 4
24 |   }
25 | }
26 | 


--------------------------------------------------------------------------------
/conf/base.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  nf-core/deepvariant Nextflow base config file
 4 |  * -------------------------------------------------
 5 |  * A 'blank slate' config file, appropriate for general
 6 |  * use on most high performace compute environments.
 7 |  * Assumes that all software is installed and available
 8 |  * on the PATH. Runs in `local` mode - all jobs will be
 9 |  * run on the logged in environment.
10 |  */
11 | 
12 | process {
13 | 
14 |   container = params.container
15 | 
16 |   cpus = { check_max( 1 * task.attempt, 'cpus' ) }
17 |   memory = { check_max( 8.GB * task.attempt, 'memory' ) }
18 |   time = { check_max( 2.h * task.attempt, 'time' ) }
19 | 
20 |   errorStrategy = { task.exitStatus in [143,137] ? 'retry' : 'finish' }
21 |   maxRetries = 1
22 |   maxErrors = '-1'
23 | 
24 |   withName:make_examples {
25 |   cpus = { check_max( 20, 'cpus' ) }
26 |   memory = { bam.size() < 1000000000 ? 4.GB : check_max( (bam.size() >> 30) * 10.GB * task.attempt, 'memory')}
27 |   time = { check_max( 10.h * task.attempt, 'time' ) }
28 |   }
29 |   withName:call_variants {
30 |   cpus = { check_max( 20, 'cpus' ) }
31 |   memory = { bam.size() < 1000000000 ? 4.GB : check_max( (bam.size() >> 30) * 10.GB * task.attempt, 'memory')}
32 |   time = { check_max( 10.h * task.attempt, 'time' ) }
33 |   }
34 |   withName:postprocess_variants {
35 |   cpus = { check_max( 20, 'cpus' ) }
36 |   memory = { bam.size() < 1000000000 ? 4.GB : check_max( (bam.size() >> 30) * 10.GB * task.attempt, 'memory')}
37 |   time = { check_max( 10.h * task.attempt, 'time' ) }
38 |   }
39 | 
40 | 
41 | }
42 | 
43 | params {
44 |   // Defaults only, expecting to be overwritten
45 |   max_memory = 128.GB
46 |   max_cpus = 16
47 |   max_time = 240.h
48 | }
49 | 


--------------------------------------------------------------------------------
/conf/binac.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * ----------------------------------------------------------------------------
 3 |  *  Nextflow config file for use with Singularity on BINAC cluster in Tuebingen
 4 |  * ----------------------------------------------------------------------------
 5 |  * Defines basic usage limits and singularity image id.
 6 |  */
 7 | 
 8 | singularity {
 9 |     enabled = true
10 | }
11 | 
12 | process {
13 |     beforeScript = 'module load devel/singularity/2.6.0'
14 |     executor = 'pbs'
15 |     queue = 'short'
16 | }
17 | 
18 | params {
19 |   max_memory = 128.GB
20 |   max_cpus = 28
21 |   max_time = 48.h
22 | }
23 | 


--------------------------------------------------------------------------------
/conf/genomes.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for Genomes paths
 4 |  * -------------------------------------------------
 5 |  * Defines reference genomes, using s3 paths
 6 |  * Can be used by any config that customises the base
 7 |  * path using $params.genomes_base / --genomes_base
 8 |  */
 9 | 
10 | params {
11 |   // Genomes reference file paths on UPPMAX
12 |   genomes {
13 |     'h38' {
14 |       fasta="${params.genomes_base}/h38/GRCh38.p10.genome.fa"
15 |       fai="${params.genomes_base}/h38/GRCh38.p10.genome.fa.fai"
16 |       fastagz="${params.genomes_base}/h38/GRCh38.p10.genome.fa.gz"
17 |       gzfai="${params.genomes_base}/h38/GRCh38.p10.genome.fa.gz.fai"
18 |       gzi="${params.genomes_base}/h38/GRCh38.p10.genome.fa.gz.gzi"
19 |     }
20 |     'hs37d5' {
21 |       fasta="${params.genomes_base}/hs37d5/hs37d5.fa"
22 |       fai="${params.genomes_base}/hs37d5/hs37d5.fa.fai"
23 |       fastagz="${params.genomes_base}/hs37d5/hs37d5.fa.gz"
24 |       gzfai="${params.genomes_base}/hs37d5/hs37d5.fa.gz.fai"
25 |       gzi="${params.genomes_base}/hs37d5/hs37d5.fa.gz.gzi"
26 |     }
27 |     'grch37primary' {
28 |       fasta="${params.genomes_base}/GRCh37.dna.primary/Homo_sapiens.GRCh37.dna.primary_assembly.fa"
29 |       fai="${params.genomes_base}/GRCh37.dna.primary/Homo_sapiens.GRCh37.dna.primary_assembly.fa.fai"
30 |       fastagz="${params.genomes_base}/GRCh37.dna.primary/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz"
31 |       gzfai="${params.genomes_base}/GRCh37.dna.primary/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.fai"
32 |       gzi="${params.genomes_base}/GRCh37.dna.primary/Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz.gzi"
33 |     }
34 |     'hg19chr20' {
35 |       fasta="${params.genomes_base}/hg19chr20/chr20.fa"
36 |       fai="${params.genomes_base}/hg19chr20/chr20.fa.fai"
37 |       fastagz="${params.genomes_base}/hg19chr20/chr20.fa.gz"
38 |       gzfai="${params.genomes_base}/hg19chr20/chr20.fa.gz.fai"
39 |       gzi="${params.genomes_base}/hg19chr20/chr20.fa.gz.gzi"
40 |     }
41 |     'hg19' {
42 |       fasta="${params.genomes_base}/hg19/hg19.fa"
43 |       fai="${params.genomes_base}/hg19/hg19.fa.fai"
44 |       fastagz="${params.genomes_base}/hg19/hg19.fa.gz"
45 |       gzfai="${params.genomes_base}/hg19/hg19.fa.gz.fai"
46 |       gzi="${params.genomes_base}/hg19/hg19.fa.gz.gzi"
47 |     }
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/conf/multiqc_config.yaml:
--------------------------------------------------------------------------------
1 | report_comment: >
2 |     This report has been generated by the <a href="https://github.com/nf-core/deepvariant" target="_blank">nf-core/deepvariant</a>
3 |     analysis pipeline. For information about how to interpret these results, please see the
4 |     <a href="https://github.com/nf-core/deepvariant" target="_blank">documentation</a>.
5 | report_section_order:
6 |     nf-core/deepvariant-software-versions:
7 |         order: -1000
8 | 


--------------------------------------------------------------------------------
/conf/test.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  *   nextflow run nf-core/deepvariant -profile test
 8 |  */
 9 | 
10 | params {
11 |   max_cpus = 2
12 |   max_memory = 6.GB
13 |   max_time = 48.h
14 |   bam = 'https://github.com/nf-core/test-datasets/raw/deepvariant/testdata/NA12878_S1.chr20.10_10p1mb.bam'
15 |   bed = 'https://github.com/nf-core/test-datasets/raw/deepvariant/testdata/test_nist.b37_chr20_100kbp_at_10mb.bed'
16 |   genome = 'hg19chr20'
17 | }
18 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # nf-core/deepvariant: Documentation
 2 | 
 3 | The nf-core/deepvariant documentation is split into the following files:
 4 | 
 5 | 1. [Installation](installation.md)
 6 | 2. [Running the pipeline](usage.md)
 7 | 3. Pipeline configuration
 8 |     * [Adding your own system](configuration/adding_your_own.md)
 9 |     * [Reference genomes](configuration/reference_genomes.md)
10 | 4. [Output and how to interpret the results](output.md)
11 | 5. [Troubleshooting](troubleshooting.md)
12 | 


--------------------------------------------------------------------------------
/docs/configuration/adding_your_own.md:
--------------------------------------------------------------------------------
 1 | # nf-core/deepvariant: Configuration for other clusters
 2 | 
 3 | It is entirely possible to run this pipeline on other clusters, though you will need to set up your own config file so that the pipeline knows how to work with your cluster.
 4 | 
 5 | > If you think that there are other people using the pipeline who would benefit from your configuration (eg. other common cluster setups), please let us know. We can add a new configuration and profile which can used by specifying `-profile <name>` when running the pipeline.
 6 | 
 7 | If you are the only person to be running this pipeline, you can create your config file as `~/.nextflow/config` and it will be applied every time you run Nextflow. Alternatively, save the file anywhere and reference it when running the pipeline with `-c path/to/config` (see the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more).
 8 | 
 9 | A basic configuration comes with the pipeline, which runs by default (the `standard` config profile - see [`conf/base.config`](../conf/base.config)). This means that you only need to configure the specifics for your system and overwrite any defaults that you want to change.
10 | 
11 | ## Cluster Environment
12 | By default, pipeline uses the `local` Nextflow executor - in other words, all jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node.
13 | 
14 | To specify your cluster environment, add the following line to your config file:
15 | 
16 | ```nextflow
17 | process.executor = 'YOUR_SYSTEM_TYPE'
18 | ```
19 | 
20 | Many different cluster types are supported by Nextflow. For more information, please see the [Nextflow documentation](https://www.nextflow.io/docs/latest/executor.html).
21 | 
22 | Note that you may need to specify cluster options, such as a project or queue. To do so, use the `clusterOptions` config option:
23 | 
24 | ```nextflow
25 | process {
26 |   executor = 'SLURM'
27 |   clusterOptions = '-A myproject'
28 | }
29 | ```
30 | 
31 | 
32 | ## Software Requirements
33 | To run the pipeline, several software packages are required. How you satisfy these requirements is essentially up to you and depends on your system. If possible, we _highly_ recommend using either Docker or Singularity.
34 | 
35 | Please see the [`installation documentation`](../installation.md) for how to run using the below as a one-off. These instructions are about configuring a config file for repeated use.
36 | 
37 | ### Docker
38 | Docker is a great way to run nf-core/deepvariant, as it manages all software installations and allows the pipeline to be run in an identical software environment across a range of systems.
39 | 
40 | Nextflow has [excellent integration](https://www.nextflow.io/docs/latest/docker.html) with Docker, and beyond installing the two tools, not much else is required - nextflow will automatically fetch the [nfcore/deepvariant](https://hub.docker.com/r/nfcore/deepvariant/) image that we have created and is hosted at dockerhub at run time.
41 | 
42 | To add docker support to your own config file, add the following:
43 | 
44 | ```nextflow
45 | docker.enabled = true
46 | process.container = "nfcore/deepvariant"
47 | ```
48 | 
49 | Note that the dockerhub organisation name annoyingly can't have a hyphen, so is `nfcore` and not `nf-core`.
50 | 
51 | 
52 | ### Singularity image
53 | Many HPC environments are not able to run Docker due to security issues.
54 | [Singularity](http://singularity.lbl.gov/) is a tool designed to run on such HPC systems which is very similar to Docker.
55 | 
56 | To specify singularity usage in your pipeline config file, add the following:
57 | 
58 | ```nextflow
59 | singularity.enabled = true
60 | process.container = "shub://nf-core/deepvariant"
61 | ```
62 | 
63 | If you intend to run the pipeline offline, nextflow will not be able to automatically download the singularity image for you.
64 | Instead, you'll have to do this yourself manually first, transfer the image file and then point to that.
65 | 
66 | First, pull the image file where you have an internet connection:
67 | 
68 | ```bash
69 | singularity pull --name nf-core-deepvariant.simg shub://nf-core/deepvariant
70 | ```
71 | 
72 | Then transfer this file and point the config file to the image:
73 | 
74 | ```nextflow
75 | singularity.enabled = true
76 | process.container = "/path/to/nf-core-deepvariant.simg"
77 | ```
78 | 
79 | 
80 | ### Conda
81 | If you're not able to use Docker or Singularity, you can instead use conda to manage the software requirements.
82 | To use conda in your own config file, add the following:
83 | 
84 | ```nextflow
85 | process.conda = "$baseDir/environment.yml"
86 | ```
87 | 


--------------------------------------------------------------------------------
/docs/images/deepvariant_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/deepvariant/2b5486356c4dbd4dcb598b611281997119c2e350/docs/images/deepvariant_logo.png


--------------------------------------------------------------------------------
/docs/images/deepvariant_logo.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <svg
  3 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  4 |    xmlns:cc="http://creativecommons.org/ns#"
  5 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  6 |    xmlns:svg="http://www.w3.org/2000/svg"
  7 |    xmlns="http://www.w3.org/2000/svg"
  8 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
  9 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 10 |    enable-background="new 0 0 1150.9 517"
 11 |    version="1.1"
 12 |    viewBox="0 0 1456.7841 522.44342"
 13 |    xml:space="preserve"
 14 |    id="svg2"
 15 |    inkscape:version="0.91 r13725"
 16 |    sodipodi:docname="EmptyName_logo.svg"
 17 |    width="1456.7842"
 18 |    height="522.44342"><sodipodi:namedview
 19 |      pagecolor="#ffffff"
 20 |      bordercolor="#666666"
 21 |      borderopacity="1"
 22 |      objecttolerance="10"
 23 |      gridtolerance="10"
 24 |      guidetolerance="10"
 25 |      inkscape:pageopacity="0"
 26 |      inkscape:pageshadow="2"
 27 |      inkscape:window-width="1920"
 28 |      inkscape:window-height="1015"
 29 |      id="namedview75"
 30 |      showgrid="false"
 31 |      inkscape:zoom="0.35757767"
 32 |      inkscape:cx="253.20897"
 33 |      inkscape:cy="13.773735"
 34 |      inkscape:window-x="1920"
 35 |      inkscape:window-y="724"
 36 |      inkscape:window-maximized="1"
 37 |      inkscape:current-layer="layer3"
 38 |      fit-margin-left="62.25"
 39 |      fit-margin-right="62.25"
 40 |      fit-margin-top="62.25"
 41 |      fit-margin-bottom="62.25" /><metadata
 42 |      id="metadata4"><rdf:RDF><cc:Work
 43 |          rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
 44 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs
 45 |      id="defs6"><clipPath
 46 |        id="e"><path
 47 |          d="m 280.17,136.33 -21.5,-21.584 61,0 0,21.584 -39.5,0 z"
 48 |          id="path9"
 49 |          inkscape:connector-curvature="0" /></clipPath><linearGradient
 50 |        id="f"
 51 |        x2="1"
 52 |        gradientTransform="matrix(37.935819,29.638391,-29.638391,37.935819,295.72019,166.19562)"
 53 |        gradientUnits="userSpaceOnUse"><stop
 54 |          stop-color="#0c542a"
 55 |          offset="0"
 56 |          id="stop12" /><stop
 57 |          stop-color="#0c542a"
 58 |          offset=".21472"
 59 |          id="stop14" /><stop
 60 |          stop-color="#25af64"
 61 |          offset=".57995"
 62 |          id="stop16" /><stop
 63 |          stop-color="#25af64"
 64 |          offset=".84663"
 65 |          id="stop18" /><stop
 66 |          stop-color="#25af64"
 67 |          offset="1"
 68 |          id="stop20" /></linearGradient></defs><style
 69 |      type="text/css"
 70 |      id="style22">
 71 | 	.st0{fill:#24AF63;}
 72 | 	.st1{font-family:'Maven Pro';}
 73 | 	.st1{font-weight:'bold';}
 74 | 	.st2{font-size:209.8672px;}
 75 | 	.st3{fill:#21AF62;}
 76 | 	.st4{fill:#ECDC86;}
 77 | 	.st5{fill:#A0918F;}
 78 | 	.st6{fill:#3F2B29;}
 79 | 	.st7{fill:#396E35;}
 80 | 	.st8{fill:url(#d);}
 81 | </style><linearGradient
 82 |      id="d"
 83 |      x1="295.45999"
 84 |      x2="333.34"
 85 |      y1="150.75"
 86 |      y2="180.35001"
 87 |      gradientUnits="userSpaceOnUse"><stop
 88 |        stop-color="#0D552B"
 89 |        offset=".2147"
 90 |        id="stop41" /><stop
 91 |        stop-color="#176837"
 92 |        offset=".311"
 93 |        id="stop43" /><stop
 94 |        stop-color="#1F8448"
 95 |        offset=".4609"
 96 |        id="stop45" /><stop
 97 |        stop-color="#239A56"
 98 |        offset=".604"
 99 |        id="stop47" /><stop
100 |        stop-color="#24A860"
101 |        offset=".7361"
102 |        id="stop49" /><stop
103 |        stop-color="#25AF64"
104 |        offset=".8466"
105 |        id="stop51" /></linearGradient><g
106 |      inkscape:groupmode="layer"
107 |      id="layer2"
108 |      inkscape:label="Icon"
109 |      style="display:inline"
110 |      transform="translate(5.3761467,0)"><g
111 |        id="g4209"><path
112 |          style="fill:#24af63"
113 |          inkscape:connector-curvature="0"
114 |          id="path24"
115 |          d="m 1084.1,163.75 0,3.6 c -0.1,0 -0.1,0 -0.2,0.1 l -1.8,-1.5 c -4,-3.4 -8.3,-6.4 -13.1,-8.8 -0.8,-0.4 -1.6,-0.9 -2.5,-1.1 -0.1,-0.1 -0.2,-0.2 -0.3,-0.2 -1.8,-0.7 -3.6,-1.3 -5.5,-1.8 -4.1,-0.9 -8.2,-1 -12.3,-0.2 -5.3,1.1 -10,3.4 -14.5,6.4 -4.4,3 -8.4,6.5 -12.1,10.2 -0.7,0.7 -0.7,0.7 -1.1,-0.2 -2,-4.1 -4.2,-8.1 -6.9,-11.8 -2.1,-2.8 -4.4,-5.4 -7.4,-7.2 -3,-1.9 -6.3,-2.6 -9.8,-1.7 -4.3,1 -7.8,3.6 -11.1,6.4 -2,1.5 -3.8,3.3 -5.6,5 -1.7,1.5 -3.3,3 -5,4.5 -0.3,0.3 -0.5,0.3 -0.8,0 -1.7,-1.8 -3.5,-3.4 -5.6,-4.5 -3.1,-1.7 -6.3,-1.9 -9.6,-0.8 -2.8,0.9 -5.2,2.4 -7.7,4 -1,0.6 -1.9,1.3 -2.9,1.8 l 0,-0.2 c 0.1,-0.2 0.1,-0.4 0.1,-0.6 0.2,-4.4 0.5,-8.9 1.2,-13.3 1,-6.1 2.5,-12 5.2,-17.5 2,-4.1 4.7,-7.9 8.1,-11 4.5,-4.1 9.8,-6.7 15.6,-8.3 6.3,-1.8 12.7,-2.6 19.2,-2.9 2.6,-0.1 5.1,-0.2 7.7,-0.3 1.3,0.5 2.6,0.8 3.9,1.2 1.9,0.6 3.8,1.2 5.7,1.7 1,0.4 1.9,0.7 2.9,1.1 3.7,1.3 7.3,3 10.4,5.5 0.8,0.6 1.6,1.3 2.4,2 -0.2,-0.6 -0.4,-1.1 -0.6,-1.7 -1.4,-3.7 -3.5,-6.7 -6.9,-8.8 -1.4,-0.9 -2.9,-1.5 -4.4,-2.3 0.1,0 0.3,0 0.4,-0.1 4.5,-0.8 9.1,-1.2 13.7,-1.4 3.9,-0.2 7.9,-0.1 11.8,0.3 4.6,0.5 9.1,1.4 13.4,3 6.4,2.4 11.9,6.1 16.2,11.5 3.7,4.7 6.1,10.1 7.6,15.9 1.5,5.7 2.1,11.6 2.3,17.5 -0.1,2.1 -0.1,4.3 -0.1,6.5 z"
116 |          class="st0" /><path
117 |          style="fill:#ecdc86"
118 |          inkscape:connector-curvature="0"
119 |          id="path26"
120 |          d="m 1084.1,157.15 0.1,0 0,6.6 -0.1,0 0,-6.6 z"
121 |          class="st4" /><path
122 |          style="fill:#a0918f"
123 |          inkscape:connector-curvature="0"
124 |          id="path28"
125 |          d="m 1047.6,62.25 0,0.1 -4.5,0 0,-0.1 4.5,0 z"
126 |          class="st5" /><path
127 |          style="fill:#24af63"
128 |          inkscape:connector-curvature="0"
129 |          id="path30"
130 |          d="m 1050.5,250.65 c 2.5,-1 4.9,-2.3 7.3,-3.6 2.8,-1.7 5.4,-3.5 8,-5.4 2.2,-1.6 4.3,-3.3 6.4,-5.1 l 3.6,-3 c 0.2,-0.2 0.2,-0.1 0.3,0.1 0.4,1.6 0.7,3.3 1.1,5 0.5,2.3 0.8,4.6 1.1,6.9 0.3,2.7 0.4,5.3 0.2,8 -0.2,3.3 -0.8,6.6 -2,9.7 -0.7,1.9 -1.6,3.7 -2.7,5.4 -1.4,2.2 -3,4.2 -5,5.9 -2.3,2.1 -4.9,3.9 -7.7,5.4 -3.7,2.1 -7.7,3.6 -11.8,4.8 -3.9,1.2 -7.9,2 -11.9,2.7 -1.1,0.2 -2.2,0.4 -3.3,0.4 -2.3,-0.1 -4.6,-0.6 -6.8,-1.4 -3.3,-1.3 -6.2,-3.3 -9.5,-4.8 -1.8,-0.8 -3.6,-1.4 -5.5,-1.5 -2.5,-0.2 -4.6,0.7 -6.4,2.4 l -3.9,3.9 c -2.2,2.2 -4.8,3.7 -7.9,4.2 -2.1,0.3 -4.1,0.2 -6.2,-0.1 -2.9,-0.4 -5.7,-1.1 -8.4,-1.9 -4,-1.3 -7.7,-3.1 -11.1,-5.7 -3.2,-2.4 -5.7,-5.4 -7.8,-8.8 -2.1,-3.5 -3.3,-7.2 -4.2,-11.1 -0.4,-1.7 -0.6,-3.5 -0.8,-5.2 -0.3,-2.5 -0.4,-4.9 -0.3,-7.4 0.1,-3.5 0.4,-6.9 0.9,-10.4 0.4,0.4 0.8,0.7 1.1,1 2.2,2 4.7,3.8 7.3,5.4 2.9,1.7 6.1,3.1 9.4,4 2.2,0.6 4.5,1 6.8,1.1 1.9,0.2 3.8,0.2 5.7,0.1 2.2,-0.1 4.5,-0.3 6.7,-0.9 0.3,0 0.6,0 0.8,-0.1 2,-0.4 4,-0.9 6,-1.5 2.3,-0.7 4.5,-1.4 6.7,-2.2 2.1,-0.8 4.3,-1.7 6.4,-2.6 0.6,-0.3 1,-0.2 1.5,0.2 3.5,2.7 7.3,5 11.4,6.7 4.6,1.8 9.3,2.7 14.2,2.3 3.6,-0.7 7,-1.6 10.3,-2.9 z"
131 |          class="st0" /><path
132 |          style="fill:#ecdc86"
133 |          inkscape:connector-curvature="0"
134 |          id="path32"
135 |          d="m 1050.5,250.65 c -3.3,1.3 -6.7,2.2 -10.2,2.5 -4.9,0.4 -9.6,-0.5 -14.2,-2.3 -4.1,-1.6 -7.9,-3.9 -11.4,-6.7 -0.5,-0.4 -0.9,-0.5 -1.5,-0.2 -2.1,0.9 -4.2,1.8 -6.4,2.6 -2.2,0.8 -4.4,1.6 -6.7,2.2 -2,0.6 -4,1 -6,1.5 -0.3,0.1 -0.6,0.1 -0.8,0.1 0.7,-0.8 1.4,-1.6 2.1,-2.4 2.8,-3.2 4.8,-6.9 5.9,-11.1 1.6,-5.6 3.2,-11.3 4.6,-17 1,-4.2 1.8,-8.4 2.4,-12.7 0.4,-3.1 1,-14.9 0.8,-17.7 -0.5,-8.6 -2.4,-16.8 -5.9,-24.7 -2.1,-4.7 -5.7,-7.9 -10.7,-9.2 -2.2,-0.6 -4.4,-0.4 -6.5,0.3 -0.2,0.1 -0.3,0.2 -0.5,0.1 3.3,-2.8 6.7,-5.4 11.1,-6.4 3.5,-0.8 6.8,-0.2 9.8,1.7 3,1.9 5.3,4.4 7.4,7.2 2.7,3.7 4.9,7.7 6.9,11.8 0.4,0.9 0.4,0.9 1.1,0.2 3.7,-3.8 7.7,-7.3 12.1,-10.2 4.4,-3 9.2,-5.3 14.5,-6.4 4.1,-0.8 8.2,-0.7 12.3,0.2 1.9,0.4 3.7,1 5.5,1.8 0.1,0.1 0.3,0.1 0.3,0.2 -5.3,0.1 -9.8,2.1 -13.9,5.2 -2,1.5 -3.8,3.2 -5.2,5.3 -1.1,1.7 -2.1,3.6 -2.9,5.5 -1.8,3.8 -3.3,7.8 -4.4,11.9 -0.9,3.5 -1.5,7.1 -1.8,10.7 -0.2,2.8 -0.3,5.6 -0.2,8.4 0.1,3.4 0.5,6.8 0.9,10.3 0.7,5.7 1.7,11.4 2.7,17.1 0.5,3.1 0.9,6.3 1.5,9.5 0.7,4.6 3.3,8 7,10.6 0,-0.1 0.2,0 0.3,0.1 z"
136 |          class="st4" /><path
137 |          style="fill:#3f2b29"
138 |          inkscape:connector-curvature="0"
139 |          id="path34"
140 |          d="m 1043.1,62.35 4.5,0 c 3.6,0.2 7.2,0.8 10.6,2 2.7,0.9 3.3,2.7 1.7,5 -1.1,1.6 -2.7,2.8 -4.4,3.9 -2.1,1.4 -4.4,2.6 -6.9,3.5 -2.5,1 -4.9,0 -6.5,-2.5 -0.5,-0.8 -0.9,-1.6 -1.1,-2.5 -0.1,-0.3 -0.2,-0.4 -0.5,-0.4 -5.6,-1 -10.6,0.3 -14.7,4.3 -3.4,3.2 -5.4,7.3 -6.8,11.7 -1.3,4 -1.9,8 -2.1,12.2 -0.2,3.7 0.1,7.4 0.6,11 0.1,0.6 0.3,1.2 0.3,1.9 0.1,0.8 -0.2,1.5 -0.8,1.9 -0.7,0.5 -1.5,0.4 -2.3,0.4 -1.9,-0.6 -3.8,-1.2 -5.7,-1.7 l 0,-1.3 c 0,-2 0,-3.9 0.1,-5.9 0.4,-7.7 1.6,-15.3 4.6,-22.5 2.2,-5.4 5.4,-10.1 9.9,-13.8 3.7,-3.1 7.9,-5.1 12.6,-6.2 2.4,-0.6 4.6,-0.9 6.9,-1 z"
141 |          class="st6" /><path
142 |          style="fill:#396e35"
143 |          inkscape:connector-curvature="0"
144 |          id="path36"
145 |          d="m 1014.8,114.65 c 0.8,0 1.6,0.1 2.3,-0.4 0.7,-0.5 0.9,-1.2 0.8,-1.9 -0.1,-0.6 -0.2,-1.3 -0.3,-1.9 0.4,0 0.7,-0.1 1.1,-0.1 1.4,0.8 2.9,1.5 4.4,2.3 3.4,2.1 5.5,5.1 6.9,8.8 0.2,0.6 0.4,1.1 0.6,1.7 -0.8,-0.7 -1.6,-1.4 -2.4,-2 -3.2,-2.4 -6.7,-4.1 -10.4,-5.5 -1.1,-0.3 -2,-0.6 -3,-1 z"
146 |          class="st7" /><path
147 |          style="fill:#396e35"
148 |          inkscape:connector-curvature="0"
149 |          id="path38"
150 |          d="m 1009.1,111.65 0,1.3 c -1.3,-0.4 -2.6,-0.7 -3.9,-1.2 1.4,-0.1 2.7,-0.1 3.9,-0.1 z"
151 |          class="st7" /></g></g><g
152 |      inkscape:groupmode="layer"
153 |      id="layer3"
154 |      inkscape:label="Text"
155 |      style="display:inline"
156 |      transform="translate(5.3761467,0)"><text
157 |        x="48.898899"
158 |        y="241.24541"
159 |        font-size="209.87px"
160 |        font-weight="bold"
161 |        id="text53"
162 |        style="font-weight:bold;font-size:209.86999512px;font-family:'Maven Pro'"><tspan
163 |          class="st0 st1 st2"
164 |          x="48.898899"
165 |          y="241.24541"
166 |          font-size="209.87px"
167 |          font-weight="bold"
168 |          id="tspan55"
169 |          style="font-weight:bold;font-size:209.86720276px;font-family:'Maven Pro';fill:#24af63">nf-<tspan
170 |    id="tspan57"
171 |    style="fill:#000000" /></tspan></text>
172 | <text
173 |        x="357.14139"
174 |        y="241.24541"
175 |        font-size="209.87px"
176 |        font-weight="bold"
177 |        id="text69"
178 |        style="font-weight:bold;font-size:209.86999512px;font-family:'Maven Pro'"><tspan
179 |          class="st0 st1 st2"
180 |          x="357.14139"
181 |          y="241.24541"
182 |          font-size="209.87px"
183 |          font-weight="bold"
184 |          id="tspan71"
185 |          style="font-weight:bold;font-size:209.86720276px;font-family:'Maven Pro';fill:#24af63"><tspan
186 |            id="tspan73"
187 |            style="fill:#000000">core/</tspan></tspan></text>
188 | <text
189 |        x="-260.05042"
190 |        y="457.04541"
191 |        font-weight="bold"
192 |        id="text59"
193 |        style="font-weight:bold;font-family:'Maven Pro'"><tspan
194 |          class="st1 st2"
195 |          x="47.849564"
196 |          y="457.04541"
197 |          font-size="209.87px"
198 |          font-weight="bold"
199 |          id="tspan61"
200 |          style="font-weight:bold;font-size:209.86720276px;font-family:'Maven Pro'">deepvariant</tspan></text>
201 | <path
202 |        d="m 300.43725,166.1155 -21.53224,21.61638 61.0915,0 0,-21.61638 -39.55926,0 z"
203 |        id="path67"
204 |        inkscape:connector-curvature="0"
205 |        style="fill:url(#f)" /></g></svg>


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
  1 | # nf-core/deepvariant: Installation
  2 | 
  3 | To start using the nf-core/deepvariant pipeline, follow the steps below:
  4 | 
  5 | 1. [Install Nextflow](#1-install-nextflow)
  6 | 2. [Install the pipeline](#2-install-the-pipeline)
  7 |     * [Automatic](#21-automatic)
  8 |     * [Offline](#22-offline)
  9 |     * [Development](#23-development)
 10 | 3. [Pipeline configuration](#3-pipeline-configuration)
 11 |     * [Software deps: Docker and Singularity](#31-software-deps-docker-and-singularity)
 12 |     * [Software deps: Bioconda](#32-software-deps-bioconda)
 13 |     * [Configuration profiles](#33-configuration-profiles)
 14 | 4. [Reference genomes](#4-reference-genomes)
 15 | 5. [Appendices](#appendices)
 16 |     * [Running on UPPMAX](#running-on-uppmax)
 17 | 
 18 | ## 1) Install NextFlow
 19 | Nextflow runs on most POSIX systems (Linux, Mac OSX etc). It can be installed by running the following commands:
 20 | 
 21 | ```bash
 22 | # Make sure that Java v8+ is installed:
 23 | java -version
 24 | 
 25 | # Install Nextflow
 26 | curl -fsSL get.nextflow.io | bash
 27 | 
 28 | # Add Nextflow binary to your PATH:
 29 | mv nextflow ~/bin/
 30 | # OR system-wide installation:
 31 | # sudo mv nextflow /usr/local/bin
 32 | ```
 33 | 
 34 | See [nextflow.io](https://www.nextflow.io/) for further instructions on how to install and configure Nextflow.
 35 | 
 36 | ## 2) Install the pipeline
 37 | 
 38 | #### 2.1) Automatic
 39 | This pipeline itself needs no installation - NextFlow will automatically fetch it from GitHub if `nf-core/deepvariant` is specified as the pipeline name.
 40 | 
 41 | #### 2.2) Offline
 42 | The above method requires an internet connection so that Nextflow can download the pipeline files. If you're running on a system that has no internet connection, you'll need to download and transfer the pipeline files manually:
 43 | 
 44 | ```bash
 45 | wget https://github.com/nf-core/deepvariant/archive/master.zip
 46 | mkdir -p ~/my-pipelines/nf-core/
 47 | unzip master.zip -d ~/my-pipelines/nf-core/
 48 | cd ~/my_data/
 49 | nextflow run ~/my-pipelines/nf-core/deepvariant-master
 50 | ```
 51 | 
 52 | To stop nextflow from looking for updates online, you can tell it to run in offline mode by specifying the following environment variable in your ~/.bashrc file:
 53 | 
 54 | ```bash
 55 | export NXF_OFFLINE='TRUE'
 56 | ```
 57 | 
 58 | #### 2.3) Development
 59 | 
 60 | If you would like to make changes to the pipeline, it's best to make a fork on GitHub and then clone the files. Once cloned you can run the pipeline directly as above.
 61 | 
 62 | 
 63 | ## 3) Pipeline configuration
 64 | By default, the pipeline runs with the `standard` configuration profile. This uses a number of sensible defaults for process requirements and is suitable for running on a simple (if powerful!) basic server. You can see this configuration in [`conf/base.config`](../conf/base.config).
 65 | 
 66 | Be warned of two important points about this default configuration:
 67 | 
 68 | 1. The default profile uses the `local` executor
 69 |     * All jobs are run in the login session. If you're using a simple server, this may be fine. If you're using a compute cluster, this is bad as all jobs will run on the head node.
 70 |     * See the [nextflow docs](https://www.nextflow.io/docs/latest/executor.html) for information about running with other hardware backends. Most job scheduler systems are natively supported.
 71 | 2. Nextflow will expect all software to be installed and available on the `PATH`
 72 | 
 73 | #### 3.1) Software deps: Docker
 74 | First, install docker on your system: [Docker Installation Instructions](https://docs.docker.com/engine/installation/)
 75 | 
 76 | Then, running the pipeline with the option `-profile standard,docker` tells Nextflow to enable Docker for this run. An image containing all of the software requirements will be automatically fetched and used from dockerhub (https://hub.docker.com/r/nfcore/deepvariant).
 77 | 
 78 | #### 3.1) Software deps: Singularity
 79 | If you're not able to use Docker then [Singularity](http://singularity.lbl.gov/) is a great alternative.
 80 | The process is very similar: running the pipeline with the option `-profile standard,singularity` tells Nextflow to enable singularity for this run. An image containing all of the software requirements will be automatically fetched and used from singularity hub.
 81 | 
 82 | If running offline with Singularity, you'll need to download and transfer the Singularity image first:
 83 | 
 84 | ```bash
 85 | singularity pull --name nf-core-deepvariant.simg shub://nf-core/deepvariant
 86 | ```
 87 | 
 88 | Once transferred, use `-with-singularity` and specify the path to the image file:
 89 | 
 90 | ```bash
 91 | nextflow run /path/to/nf-core-deepvariant -with-singularity nf-core-deepvariant.simg
 92 | ```
 93 | 
 94 | Remember to pull updated versions of the singularity image if you update the pipeline.
 95 | 
 96 | 
 97 | #### 3.2) Software deps: conda
 98 | If you're not able to use Docker _or_ Singularity, you can instead use conda to manage the software requirements.
 99 | This is slower and less reproducible than the above, but is still better than having to install all requirements yourself!
100 | The pipeline ships with a conda environment file and nextflow has built-in support for this.
101 | To use it first ensure that you have conda installed (we recommend [miniconda](https://conda.io/miniconda.html)), then follow the same pattern as above and use the flag `-profile standard,conda`
102 | 
103 | 
104 | ## Appendices
105 | 
106 | #### Running on UPPMAX
107 | To run the pipeline on the [Swedish UPPMAX](https://www.uppmax.uu.se/) clusters (`rackham`, `irma`, `bianca` etc), use the command line flag `-profile uppmax`. This tells Nextflow to submit jobs using the SLURM job executor with Singularity for software dependencies.
108 | 
109 | Note that you will need to specify your UPPMAX project ID when running a pipeline. To do this, use the command line flag `--project <project_ID>`. The pipeline will exit with an error message if you try to run it pipeline with the default UPPMAX config profile without a project.
110 | 
111 | **Optional Extra:** To avoid having to specify your project every time you run Nextflow, you can add it to your personal Nextflow config file instead. Add this line to `~/.nextflow/config`:
112 | 
113 | ```nextflow
114 | params.project = 'project_ID' // eg. b2017123
115 | ```
116 | 


--------------------------------------------------------------------------------
/docs/output.md:
--------------------------------------------------------------------------------
 1 | # nf-core/deepvariant: Output
 2 | 
 3 | This document describes the processes and output produced by the pipeline.
 4 | 
 5 | Main steps:
 6 | 
 7 | - preprocessing of fasta/reference files (fai, fastagz, gzfai & gzi)
 8 |   - These steps can be skipped if the the `--genome` options is used or the fai, fastagz, gzfai & gzi files are supplied.
 9 | - preprocessing of BAM files
10 |   - Also can be skipped if BAM files contain necessary read group line
11 | - make examples
12 |   - Gets bam files and converts them to images ( named examples )
13 | - call variants
14 |   - Does the variant calling based on the ML trained model.
15 | - post processing
16 | 
17 |   - Trasforms the variant calling output (tfrecord file) into a standard vcf file.
18 | 
19 | For further reading and documentation about deepvariant see [google/deepvariant](https://github.com/google/deepvariant)
20 | 
21 | ## VCF
22 | 
23 | The output from DeepVariant is a variant call file or [vcf v4.2](https://samtools.github.io/hts-specs/VCFv4.2.pdf)
24 | 
25 | **Output directory: `results`** (by default)
26 | 
27 | - `pipeline_info`
28 |   - produced by nextflow
29 | - `{bamSampleName}.vcf`
30 |   - output vcf file produced by deepvariant
31 | 


--------------------------------------------------------------------------------
/docs/troubleshooting.md:
--------------------------------------------------------------------------------
 1 | # nf-core/deepvariant: Troubleshooting
 2 | 
 3 | ## Input files not found
 4 | 
 5 | If you are having trouble with the inputs for the tool its recommended that you read [about preprocessing](usage.md#about-preprocessing) and [BAM folder input](usage.md#--bam_folder)
 6 | 
 7 | ## Data organization
 8 | 
 9 | The pipeline can't take a list of multiple input files - it takes a glob expression. If your input files are scattered in different paths then we recommend that you generate a directory with symlinked files. If running in paired end mode please make sure that your files are sensibly named so that they can be properly paired. See the previous point.
10 | 
11 | ## Extra resources and getting help
12 | 
13 | If you still have an issue with running the pipeline then feel free to contact us.
14 | Have a look at the [pipeline website](https://github.com/nf-core/deepvariant) to find out how.
15 | 
16 | If you have problems that are related to Nextflow and not our pipeline then check out the [Nextflow gitter channel](https://gitter.im/nextflow-io/nextflow) or the [google group](https://groups.google.com/forum/#!forum/nextflow).
17 | 


--------------------------------------------------------------------------------
/docs/usage.md:
--------------------------------------------------------------------------------
  1 | # nf-core/deepvariant: Usage
  2 | 
  3 | ## Table of contents
  4 | 
  5 | - [Introduction](#general-nextflow-info)
  6 | - [Running the pipeline](#running-the-pipeline)
  7 |   - [About preprocessing](#about-preprocessing)
  8 | - [Updating the pipeline](#updating-the-pipeline)
  9 | - [Reproducibility](#reproducibility)
 10 | - [Main arguments](#main-arguments)
 11 |   - [`-profile`](#-profile-single-dash)
 12 |     - [`docker`](#docker)
 13 |     - [`awsbatch`](#awsbatch)
 14 |     - [`standard`](#standard)
 15 |     - [`none`](#none)
 16 |   - [`--bam`](#--bam)
 17 |   - [`--bam_folder`](#--bam_folder)
 18 |   - [`--bam_file_prefix`](#--bam_file_prefix)
 19 |   - [`--bed`](#--bed)
 20 | - [Reference Genomes](#reference-genomes)
 21 |   - [`--genome`](#--genome)
 22 |     - [`hg19`](#hg19)
 23 |     - [`hg19chr20`](#hg19chr20)
 24 |     - [`h38`](#h38)
 25 |     - [`grch37primary`](#grch37primary)
 26 |     - [`hs37d5`](#hs37d5)
 27 |   - [`--genomes_base`](#--genomes_base)
 28 |   - [`--fasta`](#--fasta)
 29 |   - [`--fai`](#--fai)
 30 |   - [`--fastagz`](#--fastagz)
 31 |   - [`--gzfai`](#--gzfai)
 32 |   - [`--gzi`](#--gzi)
 33 | - [Exome Data](#exome-data)
 34 |   - [`--exome`](#--exome)
 35 | - [Job Resources](#job-resources)
 36 | - [Automatic resubmission](#automatic-resubmission)
 37 | - [Custom resource requests](#custom-resource-requests)
 38 | - [AWS batch specific parameters](#aws-batch-specific-parameters)
 39 |   - [`-awsbatch`](#-awsbatch)
 40 |   - [`--awsqueue`](#--awsqueue)
 41 |   - [`--awsregion`](#--awsregion)
 42 | - [Other command line parameters](#other-command-line-parameters)
 43 |   - [`--outdir`](#--outdir)
 44 |   - [`--email`](#--email)
 45 |   - [`-name`](#-name-single-dash)
 46 |   - [`-resume`](#-resume-single-dash)
 47 |   - [`-c`](#-c-single-dash)
 48 |   - [`--max_memory`](#--max_memory)
 49 |   - [`--max_time`](#--max_time)
 50 |   - [`--max_cpus`](#--max_cpus)
 51 |   - [`--plaintext_emails`](#--plaintext_emails)
 52 |   - [`--sampleLevel`](#--sampleLevel)
 53 |   - [`--multiqc_config`](#--multiqc_config)
 54 | - [Memory](#memory)
 55 | 
 56 | ## General Nextflow info
 57 | 
 58 | Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler.
 59 | 
 60 | It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`):
 61 | 
 62 | ```bash
 63 | NXF_OPTS='-Xms1g -Xmx4g'
 64 | ```
 65 | 
 66 | ## Running the pipeline
 67 | 
 68 | The typical command for running the pipeline is as follows:
 69 | 
 70 | ```bash
 71 | nextflow run nf-core/deepvariant --genome hg19 --bam testdata/test.bam --bed testdata/test.bed -profile standard,docker
 72 | ```
 73 | 
 74 | Note that the pipeline will create the following files in your working directory:
 75 | 
 76 | ```bash
 77 | work            # Directory containing the nextflow working files
 78 | results         # Finished results (configurable, see below)
 79 | .nextflow_log   # Log file from Nextflow
 80 | # Other nextflow hidden files, eg. history of pipeline runs and old logs.
 81 | ```
 82 | 
 83 | ### About preprocessing
 84 | 
 85 | DeepVariant, in order to run at its fastest, requires some indexed and compressed versions of both the reference genome and the BAM files. With DeepVariant in Nextflow, if you wish, you can only use as an input the fasta and the BAM file and let us do the work for you in a clean and standarized way (standard tools like [samtools](http://samtools.sourceforge.net/) are used for indexing and every step is run inside of a Docker container).
 86 | 
 87 | This is how the list of the needed input files looks like. If these are passed all as input parameters, the preprocessing steps will be skipped.
 88 | 
 89 | ```
 90 | NA12878_S1.chr20.10_10p1mb.bam   test_nist.b37_chr20_100kbp_at_10mb.bed   NA12878_S1.chr20.10_10p1mb.bam.bai
 91 | ucsc.hg19.chr20.unittest.fasta   ucsc.hg19.chr20.unittest.fasta.fai       ucsc.hg19.chr20.unittest.fasta.gz
 92 | ucsc.hg19.chr20.unittest.fasta.gz.fai   ucsc.hg19.chr20.unittest.fasta.gz.gzi
 93 | ```
 94 | 
 95 | If you do not have all of them, these are the file you can give as input to the Nextflow pipeline, and the rest will be automatically produced for you .
 96 | 
 97 | ```
 98 | NA12878_S1.chr20.10_10p1b.bam
 99 | test_nist.b37_chr20_100kbp_at_10mb.bed
100 | ucsc.hg19.chr20.unittest.fasta
101 | ```
102 | 
103 | ### Updating the pipeline
104 | 
105 | When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
106 | 
107 | ```bash
108 | nextflow pull nf-core/deepvariant
109 | ```
110 | 
111 | ### Reproducibility
112 | 
113 | It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since.
114 | 
115 | First, go to the [nf-core/deepvariant releases page](https://github.com/nf-core/deepvariant/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`.
116 | 
117 | This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future.
118 | 
119 | ## Main Arguments
120 | 
121 | ### `-profile`
122 | 
123 | Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile standard,docker` - the order of arguments is important!
124 | 
125 | - `standard`
126 |   - The default profile, used if `-profile` is not specified at all.
127 |   - Runs locally and expects all software to be installed and available on the `PATH`.
128 | - `docker`
129 |   - A generic configuration profile to be used with [Docker](http://docker.com/)
130 |   - Pulls software from dockerhub: [`nfcore/deepvariant`](http://hub.docker.com/r/nfcore/deepvariant/)
131 | - `singularity`
132 |   - A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/)
133 |   - Pulls software from singularity-hub
134 | - `conda`
135 |   - A generic configuration profile to be used with [conda](https://conda.io/docs/)
136 |   - Pulls most software from [Bioconda](https://bioconda.github.io/)
137 | - `awsbatch`
138 |   - A generic configuration profile to be used with AWS Batch.
139 | - `test`
140 |   - A profile with a complete configuration for automated testing
141 |   - Includes links to test data so needs no other parameters
142 | - `test_s3`
143 |   - A profile for testing the pipeline with files on an s3 bucket
144 |   - Other than the `docker` profile no further inputs are required
145 | - `none`
146 |   - No configuration at all. Useful if you want to build your own config from scratch and want to avoid loading in the default `base` config profile (not recommended).
147 | 
148 | ### `--bam`
149 | 
150 | Use this to specify the BAM file
151 | 
152 | ```
153 | --bam "/path/to/bam/file"
154 | ```
155 | 
156 | OR
157 | 
158 | ### `--bam_folder`
159 | 
160 | Use this to specify a folder containing BAM files. Allows multiple BAM files to be analyzed at once. All BAM files will be analyzed unless `--bame_file_prefix` is used (see below). For example:
161 | 
162 | ```
163 | --bam_folder "/path/to/folder/where/bam/files/are"
164 | ```
165 | 
166 | **! TIP**
167 | All the input files can be used in s3 buckets too and the s3://path/to/files/in/bucket can be used instead of a local path.
168 | 
169 | ### `--bam_file_prefix`
170 | 
171 | - In case only some specific files inside the BAM folder should be used as input, a file prefix can be defined by:
172 |   - `--bam_file_prefix`
173 | 
174 | ```
175 | --bam_file_prefix MYPREFIX                                    OPTIONAL
176 | ```
177 | 
178 | ### `--bed`
179 | 
180 | - Path to bedfile, specifying region to be analysed must also be supplied
181 | 
182 | ### Reference Genomes
183 | 
184 | The pipelines can acccept the refernece genome that was used to create the BAM file(s) in one of two ways. Either the reference genome can be specified eg `--genome hg19` (default) or by supplying a relevant fasta file (and optionally the indexes).
185 | 
186 | ### `--genome`
187 | 
188 | Standard versions of the genome are prepared with all their compressed and indexed file in a lifebit s3 bucket.
189 | They can be used with the following values for the `--genome` tag:
190 | 
191 | - `hg19`
192 |   - Use if reads were aligned against hg19 reference genome to produce input bam file(s)
193 | - `hg19chr20`
194 |   - For testing purposes: chromosome 20 of the hg19 reference genome
195 | - `h38`
196 |   - Use if reads were aligned against GRCh38 reference genome to produce input bam file(s)
197 | - `grch37primary`
198 |   - Use if reads were aligned against GRCh37 primary reference genome to produce input bam file(s)
199 | - `hs37d5`
200 |   - Use if reads were aligned against hs37d5 reference genome to produce input bam file(s)
201 | 
202 | ### `--genomes_base`
203 | 
204 | Base directory location of genomes (default = "s3://deepvariant-data/genomes") for use on computing clusters
205 | 
206 | OR you can use your own reference genome version, by using the following parameters:
207 | 
208 | The following parameter are optional:
209 | 
210 | ### `--fasta`
211 | 
212 | - Path to fasta reference
213 | 
214 | ### `--fai`
215 | 
216 | - Path to fasta index generated using `samtools faidx`
217 | 
218 | ### `--fastagz`
219 | 
220 | - Path to gzipped fasta
221 | 
222 | ### `--gzfai`
223 | 
224 | - Path to index of gzipped fasta generated using `samtools faidx`
225 | 
226 | ### `--gzi`
227 | 
228 | - Path to bgzip index format (.gzi)
229 | 
230 | If the `fai`, `fastagz`, `gzfai` and `gzi` parameters are not passed, they will be automatically be produced for you and you will be able to find them in the "preprocessingOUTPUT" folder.
231 | 
232 | ### Exome Data
233 | 
234 | ### `--exome`
235 | 
236 | - For exome bam files
237 | 
238 | If you are running on exome data you need to prodive the `--exome` flag so that the right verison of the model will be used.
239 | 
240 | ```bash
241 | nextflow run nf-core/deepvariant --genome hg19 --bam_folder myBamFolder --bed myBedFile --exome
242 | ```
243 | 
244 | ## Job Resources
245 | 
246 | ### Automatic resubmission
247 | 
248 | Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped.
249 | 
250 | ### Custom resource requests
251 | 
252 | Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files in [`conf`](../conf) for examples.
253 | 
254 | ## AWS Batch specific parameters
255 | 
256 | Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters.
257 | 
258 | ### `--awsqueue`
259 | 
260 | The JobQueue that you intend to use on AWS Batch.
261 | 
262 | ### `--awsregion`
263 | 
264 | The AWS region to run your job in. Default is set to `eu-west-1` but can be adjusted to your needs.
265 | 
266 | Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't.
267 | 
268 | ## Other command line parameters
269 | 
270 | ### `--outdir`
271 | 
272 | The output directory where the results will be saved.
273 | 
274 | ### `--email`
275 | 
276 | Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to speicfy this on the command line for every run.
277 | 
278 | ### `-name`
279 | 
280 | Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
281 | 
282 | This is used in the MultiQC report (if not default) and in the summary HTML / e-mail (always).
283 | 
284 | **NB:** Single hyphen (core Nextflow option)
285 | 
286 | ### `-resume`
287 | 
288 | Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously.
289 | 
290 | You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names.
291 | 
292 | **NB:** Single hyphen (core Nextflow option)
293 | 
294 | ### `-c`
295 | 
296 | Specify the path to a specific config file (this is a core NextFlow command).
297 | 
298 | **NB:** Single hyphen (core Nextflow option)
299 | 
300 | Note - you can use this to override defaults. For example, you can specify a config file using `-c` that contains the following:
301 | 
302 | ```nextflow
303 | process.$multiqc.module = []
304 | ```
305 | 
306 | ### `--max_memory`
307 | 
308 | Use to set a top-limit for the default memory requirement for each process.
309 | Should be a string in the format integer-unit. eg. `--max_memory '8.GB'``
310 | 
311 | ### `--max_time`
312 | Use to set a top-limit for the default time requirement for each process.
313 | Should be a string in the format integer-unit. eg. `--max_time '2.h'`
314 | 
315 | ### `--max_cpus`
316 | 
317 | Use to set a top-limit for the default CPU requirement for each process.
318 | Should be a string in the format integer-unit. eg. `--max_cpus 1`
319 | 
320 | ### `--plaintext_email`
321 | 
322 | Set to receive plain-text e-mails instead of HTML formatted.
323 | 
324 | ### `--multiqc_config`
325 | Specify a path to a custom MultiQC configuration file.
326 | 
327 | ## Memory
328 | DeepVariant is quite memory intensive. The most memory intensive process is `make_examples`. The memory requirement should be approximately 10-15x the size of your BAM file. For example, for a 5GB BAM file the memory should be set to 50GB. Fortunately this is set automaticaally for you in `base.config` for all of the man deepvariant processes, so you don't need change anything more and can run the pipeline as normal.
329 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: nf-core-deepvariant-1.0
 2 | channels:
 3 |   - bioconda
 4 |   - conda-forge
 5 |   - defaults
 6 | dependencies:
 7 |   - python=2.7.15
 8 |   - pip=10.0.1
 9 |   - deepvariant=0.7.0
10 |   - picard=2.18.7
11 |   - samtools=1.9
12 |   - htslib=1.9
13 |   - lbzip2=2.5
14 |   - bzip2=1.0.6
15 | 


--------------------------------------------------------------------------------
/main.nf:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env nextflow
  2 | /*
  3 | ========================================================================================
  4 |                          nf-core/deepvariant
  5 | ========================================================================================
  6 |  nf-core/deepvariant Analysis Pipeline.
  7 |  #### Homepage / Documentation
  8 |  https://github.com/nf-core/deepvariant
  9 | ----------------------------------------------------------------------------------------
 10 | */
 11 | 
 12 | 
 13 | def helpMessage() {
 14 |     log.info"""
 15 |     =========================================
 16 |     nf-core/deepvariant v${workflow.manifest.version}
 17 |     =========================================
 18 |     Usage:
 19 | 
 20 |     The typical command for running the pipeline is as follows:
 21 | 
 22 |     nextflow run nf-core/deepvariant --genome hg19 --bam_folder "s3://deepvariant-data/test-bam/" --bed testdata/test.bed -profile standard,docker
 23 | 
 24 |     Mandatory arguments:
 25 |       --bam_folder                  Path to folder containing BAM files (reads must have been aligned to specified reference file, see below)
 26 |       OR
 27 |       --bam                         Path to BAM file (reads must have been aligned to specified reference file, see below)
 28 |       --bed                         Path to bed file specifying regions to be analyzed
 29 | 
 30 |     References:                     If you wish to overwrite default reference of hg19.
 31 |       --genome                      Reference genome: hg19 | hg19chr20 (for testing) | h38 | grch37primary | hs37d5
 32 |       --genomes_base                Base directory location of genomes (default = "s3://deepvariant-data/genomes")
 33 |       OR
 34 |       --fasta                       Path to fasta reference
 35 |       --fai                         Path to fasta index generated using `samtools faidx`
 36 |       --fastagz                     Path to gzipped fasta
 37 |       --gzfai                       Path to index of gzipped fasta
 38 |       --gzi                         Path to bgzip index format (.gzi) produced by faidx
 39 |       *Pass all five files above to skip the fasta preprocessing step
 40 | 
 41 |       Options:
 42 |       -profile                      Configuration profile to use. Can use multiple (comma separated)
 43 |                                     Available: standard, conda, docker, singularity, awsbatch, test
 44 |       --exome                       For exome bam files
 45 |       --rgid                        Bam file read group line id incase its needed (default = 4)
 46 |       --rglb                        Bam file read group line library incase its needed (default = 'lib1')
 47 |       --rgpl                        Bam file read group line platform incase its needed (default = 'illumina')
 48 |       --rgpu                        Bam file read group line platform unit incase its needed (default = 'unit1')
 49 |       --rgsm                        Bam file read group line sample incase its needed (default = 20)
 50 | 
 51 |     Other options:
 52 |       --outdir                      The output directory where the results will be saved (default = results)
 53 |       --email                       Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits
 54 |       -name                         Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic.
 55 |       --help                        Bring up this help message
 56 | 
 57 |     AWSBatch options:
 58 |       --awsqueue                    The AWSBatch JobQueue that needs to be set when running on AWSBatch
 59 |       --awsregion                   The AWS Region for your AWS Batch job to run on
 60 |     """.stripIndent()
 61 | }
 62 | 
 63 | /*
 64 |  * SET UP CONFIGURATION VARIABLES
 65 |  */
 66 | 
 67 | // Show help emssage
 68 | if (params.help){
 69 |     helpMessage()
 70 |     exit 0
 71 | }
 72 | 
 73 | //set model for call variants either whole genome or exome
 74 | model= params.exome ? 'wes' : 'wgs'
 75 | 
 76 | //set fasta files equal to genome option if used
 77 | params.fasta = params.genome ? params.genomes[ params.genome ].fasta : false
 78 | params.fai = params.genome ? params.genomes[ params.genome ].fai : false
 79 | params.fastagz = params.genome ? params.genomes[ params.genome ].fastagz : false
 80 | params.gzfai = params.genome ? params.genomes[ params.genome ].gzfai : false
 81 | params.gzi = params.genome ? params.genomes[ params.genome ].gzi : false
 82 | 
 83 | //setup fasta channels
 84 | (fastaToIndexCh, fastaToGzCh, fastaToGzFaiCh, fastaToGziCh) = Channel.fromPath(params.fasta).into(4)
 85 | 
 86 | bedToExamples = Channel
 87 |     .fromPath(params.bed)
 88 |     .ifEmpty { exit 1, "please specify --bed option (--bed bedfile)"}
 89 | 
 90 | if(params.fai){
 91 | faiToExamples = Channel
 92 |     .fromPath(params.fai)
 93 |     .ifEmpty{exit 1, "Fai file not found: ${params.fai}"}
 94 | }
 95 | 
 96 | if(params.fastagz){
 97 | fastaGz = Channel
 98 |     .fromPath(params.fastagz)
 99 |     .ifEmpty{exit 1, "Fastagz file not found: ${params.fastagz}"}
100 |     .into {fastaGzToExamples; fastaGzToVariants }
101 | }
102 | 
103 | if(params.gzfai){
104 | gzFai = Channel
105 |     .fromPath(params.gzfai)
106 |     .ifEmpty{exit 1, "gzfai file not found: ${params.gzfai}"}
107 |     .into{gzFaiToExamples; gzFaiToVariants }
108 | }
109 | 
110 | if(params.gzi){
111 | gzi = Channel
112 |     .fromPath(params.gzi)
113 |     .ifEmpty{exit 1, "gzi file not found: ${params.gzi}"}
114 |     .into {gziToExamples; gziToVariants}
115 | }
116 | /*--------------------------------------------------
117 |   Bam related input files
118 | ---------------------------------------------------*/
119 | if(params.bam_folder) {
120 |   Channel
121 |       .fromPath("${params.bam_folder}/${params.bam_file_prefix}*.bam")
122 |       .ifEmpty { exit 1, "${params.bam_folder}/${params.bam_file_prefix}*.bam not found"}
123 |       .set{bamChannel}
124 | } else if(params.bam) {
125 |   Channel
126 |       .fromPath(params.bam)
127 |       .ifEmpty { exit 1, "${params.bam} not found"}
128 |       .set{bamChannel}
129 | } else {
130 |   exit 1, "please specify --bam OR --bam_folder"
131 | }
132 | 
133 | /*--------------------------------------------------
134 |   For workflow summary
135 | ---------------------------------------------------*/
136 | // Has the run name been specified by the user?
137 | //  this has the bonus effect of catching both -name and --name
138 | custom_runName = params.name
139 | if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){
140 |   custom_runName = workflow.runName
141 | }
142 | 
143 | // Check workDir/outdir paths to be S3 buckets if running on AWSBatch
144 | // related: https://github.com/nextflow-io/nextflow/issues/813
145 | if( workflow.profile == 'awsbatch') {
146 |     if(!workflow.workDir.startsWith('s3:') || !params.outdir.startsWith('s3:')) exit 1, "Workdir or Outdir not on S3 - specify S3 Buckets for each to run on AWSBatch!"
147 | }
148 | 
149 | 
150 | // Header log info
151 | log.info """=======================================================
152 |                                           ,--./,-.
153 |           ___     __   __   __   ___     /,-._.--~\'
154 |     |\\ | |__  __ /  ` /  \\ |__) |__         }  {
155 |     | \\| |       \\__, \\__/ |  \\ |___     \\`-._,-`-,
156 |                                           `._,._,\'
157 | nf-core/deepvariant v${workflow.manifest.version}"
158 | ======================================================="""
159 | def summary = [:]
160 | summary['Pipeline Name']    = 'nf-core/deepvariant'
161 | summary['Pipeline Version'] = workflow.manifest.version
162 | if(params.bam_folder) summary['Bam folder'] = params.bam_folder
163 | if(params.bam) summary['Bam file']          = params.bam
164 | summary['Bed file']                         = params.bed
165 | if(params.genome) summary['Reference genome']    = params.genome
166 | if(params.fasta) summary['Fasta Ref']            = params.fasta
167 | if(params.fai) summary['Fasta Index']            = params.fai
168 | if(params.fastagz) summary['Fasta gzipped ']     = params.fastagz
169 | if(params.gzfai) summary['Fasta gzipped Index']  = params.gzfai
170 | if(params.gzi) summary['Fasta bgzip Index']      = params.gzi
171 | if(params.rgid != 4) summary['BAM Read Group ID']                   = params.rgid
172 | if(params.rglb != 'lib1') summary['BAM Read Group Library']         = params.rglb
173 | if(params.rgpl != 'illumina') summary['BAM Read Group Platform']    = params.rgpl
174 | if(params.rgpu != 'unit1') summary['BAM Read Group Platform Unit']  = params.rgpu
175 | if(params.rgsm != 20) summary['BAM Read Group Sample']              = params.rgsm
176 | summary['Max Memory']       = params.max_memory
177 | summary['Max CPUs']         = params.max_cpus
178 | summary['Max Time']         = params.max_time
179 | summary['Model']            = model
180 | summary['Output dir']       = params.outdir
181 | summary['Working dir']      = workflow.workDir
182 | summary['Container Engine'] = workflow.containerEngine
183 | if(workflow.containerEngine) summary['Container'] = workflow.container
184 | summary['Current home']     = "$HOME"
185 | summary['Current user']     = "$USER"
186 | summary['Current path']     = "$PWD"
187 | summary['Working dir']      = workflow.workDir
188 | summary['Output dir']       = params.outdir
189 | summary['Script dir']       = workflow.projectDir
190 | summary['Config Profile']   = workflow.profile
191 | if(workflow.profile == 'awsbatch'){
192 |    summary['AWS Region'] = params.awsregion
193 |    summary['AWS Queue'] = params.awsqueue
194 | }
195 | if(params.email) summary['E-mail Address'] = params.email
196 | log.info summary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n")
197 | log.info "========================================="
198 | 
199 | 
200 | def create_workflow_summary(summary) {
201 | 
202 |     def yaml_file = workDir.resolve('workflow_summary_mqc.yaml')
203 |     yaml_file.text  = """
204 |     id: 'nf-core-deepvariant-summary'
205 |     description: " - this information is collected when the pipeline is started."
206 |     section_name: 'nf-core/deepvariant Workflow Summary'
207 |     section_href: 'https://github.com/nf-core/deepvariant'
208 |     plot_type: 'html'
209 |     data: |
210 |         <dl class=\"dl-horizontal\">
211 | ${summary.collect { k,v -> "            <dt>$k</dt><dd><samp>${v ?: '<span style=\"color:#999999;\">N/A</a>'}</samp></dd>" }.join("\n")}
212 |         </dl>
213 |     """.stripIndent()
214 | 
215 |    return yaml_file
216 | }
217 | 
218 | 
219 | /********************************************************************
220 |   preprocess fasta files processes
221 |   Collects all the files related to the reference genome, like
222 |   .fai,.gz ...
223 |   If the user gives them as an input, they are used
224 |   If not they are produced in this process given only the fasta file.
225 | ********************************************************************/
226 | 
227 | if(!params.fai) {
228 |   process preprocess_fai {
229 |       tag "${fasta}.fai"
230 |       publishDir "$baseDir/sampleDerivatives"
231 | 
232 |       input:
233 |       file(fasta) from fastaToIndexCh
234 | 
235 |       output:
236 |       file("${fasta}.fai") into faiToExamples
237 | 
238 |       script:
239 |       """
240 |       samtools faidx $fasta
241 |       """
242 |   }
243 | }
244 | 
245 | if(!params.fastagz) {
246 |   process preprocess_fastagz {
247 |       tag "${fasta}.gz"
248 |       publishDir "$baseDir/sampleDerivatives"
249 | 
250 |       input:
251 |       file(fasta) from fastaToGzCh
252 | 
253 |       output:
254 |       file("*.gz") into (tmpFastaGzCh, fastaGzToExamples, fastaGzToVariants)
255 | 
256 |       script:
257 |       """
258 |       bgzip -c ${fasta} > ${fasta}.gz
259 |       """
260 |   }
261 | }
262 | 
263 | if(!params.gzfai) {
264 |   process preprocess_gzfai {
265 |     tag "${fasta}.gz.fai"
266 |     publishDir "$baseDir/sampleDerivatives"
267 | 
268 |     input:
269 |     file(fasta) from fastaToGzFaiCh
270 |     file(fastagz) from tmpFastaGzCh
271 | 
272 |     output:
273 |     file("*.gz.fai") into (gzFaiToExamples, gzFaiToVariants)
274 | 
275 |     script:
276 |     """
277 |     samtools faidx $fastagz
278 |     """
279 |   }
280 | }
281 | 
282 | if(!params.gzi){
283 |   process preprocess_gzi {
284 |     tag "${fasta}.gz.gzi"
285 |     publishDir "$baseDir/sampleDerivatives"
286 | 
287 |     input:
288 |     file(fasta) from fastaToGziCh
289 | 
290 |     output:
291 |     file("*.gz.gzi") into (gziToExamples, gziToVariants)
292 | 
293 |     script:
294 |     """
295 |     bgzip -c -i ${fasta} > ${fasta}.gz
296 |     """
297 |   }
298 | }
299 | 
300 | /********************************************************************
301 |   process preprocess_bam
302 |   Takes care of the read group line.
303 | ********************************************************************/
304 | 
305 | process preprocess_bam{
306 | 
307 |   tag "${bam}"
308 |   publishDir "$baseDir/sampleDerivatives"
309 | 
310 |   input:
311 |   file(bam) from bamChannel
312 | 
313 |   output:
314 |   set file("ready/${bam}"), file("ready/${bam}.bai") into completeChannel
315 | 
316 |   script:
317 |   """
318 |   mkdir ready
319 |   [[ `samtools view -H ${bam} | grep '@RG' | wc -l`   > 0 ]] && { mv $bam ready;}|| { picard AddOrReplaceReadGroups \
320 |   I=${bam} \
321 |   O=ready/${bam} \
322 |   RGID=${params.rgid} \
323 |   RGLB=${params.rglb} \
324 |   RGPL=${params.rgpl} \
325 |   RGPU=${params.rgpu} \
326 |   RGSM=${params.rgsm};}
327 |   cd ready ;samtools index ${bam};
328 |   """
329 | }
330 | 
331 | /********************************************************************
332 |   process make_examples
333 |   Getting bam files and converting them to images ( named examples )
334 | ********************************************************************/
335 | 
336 | process make_examples{
337 | 
338 |   tag "${bam}"
339 |   publishDir "${params.outdir}/make_examples", mode: 'copy',
340 |   saveAs: {filename -> "logs/log"}
341 | 
342 |   input:
343 |   file fai from faiToExamples.collect()
344 |   file fastagz from fastaGzToExamples.collect()
345 |   file gzfai from gzFaiToExamples.collect()
346 |   file gzi from gziToExamples.collect()
347 |   file bed from bedToExamples.collect()
348 |   set file(bam), file(bai) from completeChannel
349 | 
350 |   output:
351 |   set file("${bam}"),file('*_shardedExamples') into examples
352 | 
353 |   script:
354 |   """
355 |   mkdir logs
356 |   mkdir ${bam.baseName}_shardedExamples
357 |   dv_make_examples.py \
358 |   --cores ${task.cpus} \
359 |   --sample ${bam} \
360 |   --ref ${fastagz} \
361 |   --reads ${bam} \
362 |   --regions ${bed} \
363 |   --logdir logs \
364 |   --examples ${bam.baseName}_shardedExamples
365 |   """
366 | }
367 | /********************************************************************
368 |   process call_variants
369 |   Doing the variant calling based on the ML trained model.
370 | ********************************************************************/
371 | 
372 | process call_variants{
373 | 
374 |   tag "${bam}"
375 | 
376 |   input:
377 |   set file(bam),file(shardedExamples) from examples
378 | 
379 |   output:
380 |   set file(bam),file('*_call_variants_output.tfrecord') into called_variants
381 | 
382 |   script:
383 |   """
384 |   dv_call_variants.py \
385 |     --cores ${task.cpus} \
386 |     --sample ${bam} \
387 |     --outfile ${bam.baseName}_call_variants_output.tfrecord \
388 |     --examples $shardedExamples \
389 |     --model ${model}
390 |   """
391 | }
392 | 
393 | 
394 | 
395 | /********************************************************************
396 |   process postprocess_variants
397 |   Trasforming the variant calling output (tfrecord file) into a standard vcf file.
398 | ********************************************************************/
399 | 
400 | process postprocess_variants{
401 | 
402 |   tag "${bam}"
403 | 
404 |   publishDir params.outdir, mode: 'copy'
405 | 
406 |   input:
407 |   file fastagz from fastaGzToVariants.collect()
408 |   file gzfai from gzFaiToVariants.collect()
409 |   file gzi from gziToVariants.collect()
410 |   set file(bam),file('call_variants_output.tfrecord') from called_variants
411 | 
412 |   output:
413 |    set val("${bam}"),file("${bam}.vcf") into postout
414 | 
415 |   script:
416 |   """
417 |   dv_postprocess_variants.py \
418 |   --ref ${fastagz} \
419 |   --infile call_variants_output.tfrecord \
420 |   --outfile "${bam}.vcf"
421 |   """
422 | }
423 | 
424 | /*
425 |  * Parse software version numbers
426 |  */
427 | process get_software_versions {
428 | 
429 |     output:
430 |     file 'software_versions_mqc.yaml' into software_versions_yaml
431 | 
432 |     script:
433 |     """
434 |     echo $workflow.manifest.version &> v_nf_deepvariant.txt
435 |     echo $workflow.nextflow.version &> v_nextflow.txt
436 |     ls /opt/conda/pkgs/ &> v_deepvariant.txt
437 |     python --version &> v_python.txt
438 |     pip --version &> v_pip.txt
439 |     samtools --version &> v_samtools.txt
440 |     lbzip2 --version &> v_lbzip2.txt
441 |     bzip2 --version &> v_bzip2.txt
442 |     scrape_software_versions.py &> software_versions_mqc.yaml
443 |     """
444 | }
445 | 
446 | workflow.onComplete {
447 |   // Set up the e-mail variables
448 |   def subject = "[nf-core/deepvariant] Successful: $workflow.runName"
449 |   if(!workflow.success){
450 |     subject = "[nf-core/deepvariant] FAILED: $workflow.runName"
451 |   }
452 |   def email_fields = [:]
453 |   email_fields['version'] = workflow.manifest.version
454 |   email_fields['runName'] = custom_runName ?: workflow.runName
455 |   email_fields['success'] = workflow.success
456 |   email_fields['dateComplete'] = workflow.complete
457 |   email_fields['duration'] = workflow.duration
458 |   email_fields['exitStatus'] = workflow.exitStatus
459 |   email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
460 |   email_fields['errorReport'] = (workflow.errorReport ?: 'None')
461 |   email_fields['commandLine'] = workflow.commandLine
462 |   email_fields['projectDir'] = workflow.projectDir
463 |   email_fields['summary'] = summary
464 |   email_fields['summary']['Date Started'] = workflow.start
465 |   email_fields['summary']['Date Completed'] = workflow.complete
466 |   email_fields['summary']['Pipeline script file path'] = workflow.scriptFile
467 |   email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId
468 |   if(workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository
469 |   if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId
470 |   if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision
471 |   email_fields['summary']['Nextflow Version'] = workflow.nextflow.version
472 |   email_fields['summary']['Nextflow Build'] = workflow.nextflow.build
473 |   email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
474 | 
475 |   // Render the TXT template
476 |   def engine = new groovy.text.GStringTemplateEngine()
477 |   def tf = new File("$baseDir/assets/email_template.txt")
478 |   def txt_template = engine.createTemplate(tf).make(email_fields)
479 |   def email_txt = txt_template.toString()
480 | 
481 |   // Render the HTML template
482 |   def hf = new File("$baseDir/assets/email_template.html")
483 |   def html_template = engine.createTemplate(hf).make(email_fields)
484 |   def email_html = html_template.toString()
485 | 
486 |   // Render the sendmail template
487 |   def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir" ]
488 |   def sf = new File("$baseDir/assets/sendmail_template.txt")
489 |   def sendmail_template = engine.createTemplate(sf).make(smail_fields)
490 |   def sendmail_html = sendmail_template.toString()
491 | 
492 |   // Send the HTML e-mail
493 |   if (params.email) {
494 |       try {
495 |         if( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') }
496 |         // Try to send HTML e-mail using sendmail
497 |         [ 'sendmail', '-t' ].execute() << sendmail_html
498 |         log.info "[nf-core/deepvariant] Sent summary e-mail to $params.email (sendmail)"
499 |       } catch (all) {
500 |         // Catch failures and try with plaintext
501 |         [ 'mail', '-s', subject, params.email ].execute() << email_txt
502 |         log.info "[nf-core/deepvariant] Sent summary e-mail to $params.email (mail)"
503 |       }
504 |   }
505 | 
506 |   // Write summary e-mail HTML to a file
507 |   def output_d = new File( "${params.outdir}/Documentation/" )
508 |   if( !output_d.exists() ) {
509 |     output_d.mkdirs()
510 |   }
511 |   def output_hf = new File( output_d, "pipeline_report.html" )
512 |   output_hf.withWriter { w -> w << email_html }
513 |   def output_tf = new File( output_d, "pipeline_report.txt" )
514 |   output_tf.withWriter { w -> w << email_txt }
515 | 
516 |   log.info "[nf-core/deepvariant] Pipeline Complete! You can find your results in $baseDir/${params.outdir}"
517 | }
518 | 


--------------------------------------------------------------------------------
/nextflow.config:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * -------------------------------------------------
  3 |  *  nf-core/deepvariant Nextflow config file
  4 |  * -------------------------------------------------
  5 |  * Default config options for all environments.
  6 |  * Cluster-specific config options should be saved
  7 |  * in the conf folder and imported under a profile
  8 |  * name here.
  9 |  */
 10 | 
 11 |  // Global default params, used in configs
 12 |  params {
 13 | 
 14 |    container = 'nfcore/deepvariant:1.0'
 15 | 
 16 |    help = false
 17 |    outdir = 'results'
 18 |    email = false
 19 |    name = false
 20 | 
 21 |    // BAM files
 22 |    bam=false
 23 |    bam_folder=false
 24 |    bam_file_prefix="*"
 25 |    getBai=false
 26 | 
 27 |    // Reference genomes
 28 |    genome = false
 29 |    genomes_base = 's3://deepvariant-data/genomes'
 30 |    testBaseFolder = 's3://deepvariant-test/input'
 31 | 
 32 |    // Exome data
 33 |    exome=false
 34 |    bed=false
 35 | 
 36 |    // Params for the Read Group Line to be added just in case its needed.
 37 |    rgid=4
 38 |    rglb="lib1"
 39 |    rgpl="illumina"
 40 |    rgpu="unit1"
 41 |    rgsm=20
 42 | 
 43 |    tracedir = "${params.outdir}/pipeline_info"
 44 |    clusterOptions = false
 45 |    awsqueue = false
 46 |    awsregion = 'eu-west-1'
 47 |    manifest.version = '1.0'
 48 |  }
 49 | 
 50 | profiles {
 51 | 
 52 |   standard {
 53 |     includeConfig 'conf/base.config'
 54 |     includeConfig 'conf/genomes.config'
 55 |   }
 56 |   conda { process.conda = "$baseDir/environment.yml" }
 57 |   docker { docker.enabled = true }
 58 |   singularity {
 59 |     singularity.enabled = true
 60 |   }
 61 | 
 62 |   binac {
 63 |     includeConfig 'conf/base.config'
 64 |     includeConfig 'conf/binac.config'
 65 |     includeConfig 'conf/genomes.config'
 66 |   }
 67 |   awsbatch {
 68 |     includeConfig 'conf/base.config'
 69 |     includeConfig 'conf/awsbatch.config'
 70 |     includeConfig 'conf/genomes.config'
 71 |   }
 72 |   test {
 73 |     includeConfig 'conf/base.config'
 74 |     includeConfig 'conf/test.config'
 75 |     includeConfig 'conf/genomes.config'
 76 |   }
 77 |   debug { process.beforeScript = 'echo $HOSTNAME' }
 78 |   none {
 79 |     // Don't load any config (for use with custom home configs)
 80 |   }
 81 | 
 82 |   // Profile for testing s3 environment
 83 |   test_s3{
 84 |     includeConfig 'conf/base.config'
 85 |     params.fasta="${params.testBaseFolder}/ucsc.hg19.chr20.unittest.fasta"
 86 |     params.fai="${params.testBaseFolder}/ucsc.hg19.chr20.unittest.fasta.fai"
 87 |     params.fastagz="${params.testBaseFolder}/ucsc.hg19.chr20.unittest.fasta.gz"
 88 |     params.gzfai="${params.testBaseFolder}/ucsc.hg19.chr20.unittest.fasta.gz.fai"
 89 |     params.gzi="${params.testBaseFolder}/ucsc.hg19.chr20.unittest.fasta.gz.gzi"
 90 |     params.bam_folder="${params.testBaseFolder}"
 91 |     params.bed = 'https://github.com/nf-core/test-datasets/raw/deepvariant/testdata/test_nist.b37_chr20_100kbp_at_10mb.bed'
 92 |   }
 93 | 
 94 | }
 95 | 
 96 | // Capture exit codes from upstream processes when piping
 97 | process.shell = ['/bin/bash', '-euo', 'pipefail']
 98 | 
 99 | timeline {
100 |   enabled = true
101 |   file = "${params.tracedir}/nf-core/deepvariant_timeline.html"
102 | }
103 | report {
104 |   enabled = true
105 |   file = "${params.tracedir}/nf-core/deepvariant_report.html"
106 | }
107 | trace {
108 |   enabled = true
109 |   file = "${params.tracedir}/nf-core/deepvariant_trace.txt"
110 | }
111 | dag {
112 |   enabled = true
113 |   file = "${params.tracedir}/nf-core/deepvariant_dag.svg"
114 | }
115 | 
116 | manifest {
117 |   name = 'nf-core/deepvariant'
118 |   author = 'Phil Palmer'
119 |   homePage = 'https://github.com/nf-core/deepvariant'
120 |   description = 'Google DeepVariant variant caller as a Nextflow pipeline'
121 |   mainScript = 'main.nf'
122 |   nextflowVersion = '>=18.10.1'
123 |   version = '1.0'
124 | }
125 | 
126 | // Function to ensure that resource requirements don't go beyond
127 | // a maximum limit
128 | def check_max(obj, type) {
129 |   if(type == 'memory'){
130 |     try {
131 |       if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
132 |         return params.max_memory as nextflow.util.MemoryUnit
133 |       else
134 |         return obj
135 |     } catch (all) {
136 |       println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
137 |       return obj
138 |     }
139 |   } else if(type == 'time'){
140 |     try {
141 |       if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
142 |         return params.max_time as nextflow.util.Duration
143 |       else
144 |         return obj
145 |     } catch (all) {
146 |       println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
147 |       return obj
148 |     }
149 |   } else if(type == 'cpus'){
150 |     try {
151 |       return Math.min( obj, params.max_cpus as int )
152 |     } catch (all) {
153 |       println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
154 |       return obj
155 |     }
156 |   }
157 | }
158 | 


--------------------------------------------------------------------------------
/pics/pic_workflow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/deepvariant/2b5486356c4dbd4dcb598b611281997119c2e350/pics/pic_workflow.jpg


--------------------------------------------------------------------------------