├── .gitattributes
├── docs
    ├── images
    │   ├── eager_logo.png
    │   ├── nf-core-eager_logo.png
    │   ├── usage
    │   │   ├── eager2_workflow.png
    │   │   ├── merging_files.png
    │   │   ├── eager2_metromap_complex.png
    │   │   └── nfcore-eager_tsv_template.tsv
    │   ├── nf-core_eager_logo_sticker.png
    │   ├── nf-core_eager_logo_flat_black.png
    │   ├── nf-core_eager_logo_flat_light.png
    │   ├── nf-core_eager_logo_outline_drop.png
    │   ├── output
    │   │   ├── kraken
    │   │   │   └── kraken_top_taxa.png
    │   │   ├── fastqc
    │   │   │   ├── fastqc_adapter_content.png
    │   │   │   ├── fastqc_sequence_counts.png
    │   │   │   ├── fastqc_per_base_n_content.png
    │   │   │   ├── fastqc_per_base_sequence_content.png
    │   │   │   ├── fastqc_per_sequence_GC_content.png
    │   │   │   ├── fastqc_per_sequence_quality_score.png
    │   │   │   ├── fastqc_sequence_duplication_level.png
    │   │   │   └── fastqc_sequence_quality_histogram.png
    │   │   ├── dedup
    │   │   │   └── dedup_deduplicated_reads.png
    │   │   ├── preseq
    │   │   │   └── preseq_complexity_curve.png
    │   │   ├── bowtie2
    │   │   │   └── bowtie2_alignment_scores.png
    │   │   ├── malt
    │   │   │   ├── malt_metagenomic_mappability.png
    │   │   │   └── malt_taxonomic_assignment_success.png
    │   │   ├── picard
    │   │   │   └── picard_deduplication_stats.png
    │   │   ├── qualimap
    │   │   │   ├── qualimap_coverage_histogram.png
    │   │   │   ├── qualimap_gc_content_distribution.png
    │   │   │   └── qualimap_cumulative_genome_coverage.png
    │   │   ├── samtools_flagstat
    │   │   │   └── samtools_flagstat.png
    │   │   ├── sexdeterrmine
    │   │   │   ├── sexdeterrmine_read_counts.png
    │   │   │   └── sexdeterrmine_relative_coverage.png
    │   │   ├── adapter_removal
    │   │   │   ├── adapter_removal_discarded_reads.png
    │   │   │   └── adapter_removal_length_distribution.png
    │   │   ├── damageprofiler
    │   │   │   └── damageprofiler_deaminationpatterns.png
    │   │   └── multivcfanalyzer
    │   │   │   └── multivcfanalyzer_call_categories.png
    │   ├── tutorials
    │   │   └── profiles
    │   │   │   └── config_profile_inheritence.png
    │   ├── README.md
    │   └── eager_logo.svg
    └── README.md
├── assets
    ├── nf-core-eager_logo.png
    ├── angsd_resources
    │   ├── HapMapALL.gz
    │   ├── HapMapChrX.gz
    │   ├── chrX.unique.gz
    │   ├── map100.chrX.gz
    │   ├── hapMapCeuXlift.map.gz
    │   ├── getALL.txt
    │   └── README
    ├── nf-core_eager_dummy.txt
    ├── nf-core_eager_dummy2.txt
    ├── where_are_my_files.txt
    ├── email_template.txt
    ├── sendmail_template.txt
    ├── email_template.html
    └── multiqc_config.yaml
├── lib
    ├── nfcore_external_java_deps.jar
    ├── Headers.groovy
    ├── Checks.groovy
    └── Completion.groovy
├── .github
    ├── yamllint.yml
    ├── .dockstore.yml
    ├── markdownlint.yml
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── feature_request.md
    │   └── bug_report.md
    ├── workflows
    │   ├── linting_comment.yml
    │   ├── push_dockerhub_dev.yml
    │   ├── push_dockerhub_release.yml
    │   ├── awstest.yml
    │   ├── awsfulltest.yml
    │   ├── branch.yml
    │   ├── linting.yml
    │   └── ci.yml
    ├── PULL_REQUEST_TEMPLATE
    │   └── pull_request_template.md
    ├── PULL_REQUEST_TEMPLATE.md
    └── CONTRIBUTING.md
├── .gitignore
├── .nf-core-lint.yml
├── Dockerfile
├── .gitpod.yml
├── conf
    ├── test_tsv_bam.config
    ├── test.config
    ├── test_tsv_fna.config
    ├── test_tsv_pretrim.config
    ├── test_tsv_complex.config
    ├── test_direct.config
    ├── test_tsv_kraken.config
    ├── test_tsv_humanbam.config
    ├── test_resources.config
    ├── benchmarking_vikingfish.config
    ├── test_full.config
    ├── benchmarking_human.config
    ├── test_stresstest_human.config
    └── base.config
├── bin
    ├── parse_snp_cov.py
    ├── merge_kraken_res.py
    ├── filter_bam_fragment_length.py
    ├── markdown_to_html.py
    ├── kraken_parse.py
    ├── print_x_contamination.py
    ├── endorS.py
    ├── scrape_software_versions.py
    └── extract_map_reads.py
├── LICENSE
├── environment.yml
├── CODE_OF_CONDUCT.md
├── nextflow.config
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.config linguist-language=nextflow
2 | 


--------------------------------------------------------------------------------
/docs/images/eager_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/eager_logo.png


--------------------------------------------------------------------------------
/assets/nf-core-eager_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/assets/nf-core-eager_logo.png


--------------------------------------------------------------------------------
/docs/images/nf-core-eager_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/nf-core-eager_logo.png


--------------------------------------------------------------------------------
/lib/nfcore_external_java_deps.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/lib/nfcore_external_java_deps.jar


--------------------------------------------------------------------------------
/assets/angsd_resources/HapMapALL.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/assets/angsd_resources/HapMapALL.gz


--------------------------------------------------------------------------------
/assets/angsd_resources/HapMapChrX.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/assets/angsd_resources/HapMapChrX.gz


--------------------------------------------------------------------------------
/assets/angsd_resources/chrX.unique.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/assets/angsd_resources/chrX.unique.gz


--------------------------------------------------------------------------------
/assets/angsd_resources/map100.chrX.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/assets/angsd_resources/map100.chrX.gz


--------------------------------------------------------------------------------
/docs/images/usage/eager2_workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/usage/eager2_workflow.png


--------------------------------------------------------------------------------
/docs/images/usage/merging_files.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/usage/merging_files.png


--------------------------------------------------------------------------------
/docs/images/nf-core_eager_logo_sticker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/nf-core_eager_logo_sticker.png


--------------------------------------------------------------------------------
/assets/angsd_resources/hapMapCeuXlift.map.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/assets/angsd_resources/hapMapCeuXlift.map.gz


--------------------------------------------------------------------------------
/docs/images/nf-core_eager_logo_flat_black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/nf-core_eager_logo_flat_black.png


--------------------------------------------------------------------------------
/docs/images/nf-core_eager_logo_flat_light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/nf-core_eager_logo_flat_light.png


--------------------------------------------------------------------------------
/docs/images/nf-core_eager_logo_outline_drop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/nf-core_eager_logo_outline_drop.png


--------------------------------------------------------------------------------
/docs/images/output/kraken/kraken_top_taxa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/kraken/kraken_top_taxa.png


--------------------------------------------------------------------------------
/docs/images/usage/eager2_metromap_complex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/usage/eager2_metromap_complex.png


--------------------------------------------------------------------------------
/docs/images/output/fastqc/fastqc_adapter_content.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/fastqc/fastqc_adapter_content.png


--------------------------------------------------------------------------------
/docs/images/output/fastqc/fastqc_sequence_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/fastqc/fastqc_sequence_counts.png


--------------------------------------------------------------------------------
/docs/images/usage/nfcore-eager_tsv_template.tsv:
--------------------------------------------------------------------------------
1 | Sample_Name	Library_ID	Lane	Colour_Chemistry	SeqType	Organism	Strandedness	UDG_Treatment	R1	R2	BAM
2 | 


--------------------------------------------------------------------------------
/docs/images/output/dedup/dedup_deduplicated_reads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/dedup/dedup_deduplicated_reads.png


--------------------------------------------------------------------------------
/docs/images/output/preseq/preseq_complexity_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/preseq/preseq_complexity_curve.png


--------------------------------------------------------------------------------
/docs/images/output/bowtie2/bowtie2_alignment_scores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/bowtie2/bowtie2_alignment_scores.png


--------------------------------------------------------------------------------
/docs/images/output/fastqc/fastqc_per_base_n_content.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/fastqc/fastqc_per_base_n_content.png


--------------------------------------------------------------------------------
/docs/images/output/malt/malt_metagenomic_mappability.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/malt/malt_metagenomic_mappability.png


--------------------------------------------------------------------------------
/docs/images/output/picard/picard_deduplication_stats.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/picard/picard_deduplication_stats.png


--------------------------------------------------------------------------------
/.github/yamllint.yml:
--------------------------------------------------------------------------------
1 | rules:
2 |   document-start: disable
3 |   comments: disable
4 |   truthy: disable
5 |   line-length: disable
6 |   empty-lines: disable
7 |   
8 | 


--------------------------------------------------------------------------------
/assets/nf-core_eager_dummy.txt:
--------------------------------------------------------------------------------
1 | This is a dummy file for when we need a 'fake' file to satisfy all nextflow channel inputs being filled, even if we actually only use one.


--------------------------------------------------------------------------------
/docs/images/output/qualimap/qualimap_coverage_histogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/qualimap/qualimap_coverage_histogram.png


--------------------------------------------------------------------------------
/docs/images/output/samtools_flagstat/samtools_flagstat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/samtools_flagstat/samtools_flagstat.png


--------------------------------------------------------------------------------
/docs/images/output/fastqc/fastqc_per_base_sequence_content.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/fastqc/fastqc_per_base_sequence_content.png


--------------------------------------------------------------------------------
/docs/images/output/fastqc/fastqc_per_sequence_GC_content.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/fastqc/fastqc_per_sequence_GC_content.png


--------------------------------------------------------------------------------
/docs/images/output/malt/malt_taxonomic_assignment_success.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/malt/malt_taxonomic_assignment_success.png


--------------------------------------------------------------------------------
/docs/images/output/sexdeterrmine/sexdeterrmine_read_counts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/sexdeterrmine/sexdeterrmine_read_counts.png


--------------------------------------------------------------------------------
/docs/images/tutorials/profiles/config_profile_inheritence.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/tutorials/profiles/config_profile_inheritence.png


--------------------------------------------------------------------------------
/assets/nf-core_eager_dummy2.txt:
--------------------------------------------------------------------------------
1 | This is a second dummy file for when we need a 'fake' file to satisfy all nextflow channel inputs being filled, even if we actually only use one.


--------------------------------------------------------------------------------
/docs/images/output/fastqc/fastqc_per_sequence_quality_score.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/fastqc/fastqc_per_sequence_quality_score.png


--------------------------------------------------------------------------------
/docs/images/output/fastqc/fastqc_sequence_duplication_level.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/fastqc/fastqc_sequence_duplication_level.png


--------------------------------------------------------------------------------
/docs/images/output/fastqc/fastqc_sequence_quality_histogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/fastqc/fastqc_sequence_quality_histogram.png


--------------------------------------------------------------------------------
/docs/images/output/qualimap/qualimap_gc_content_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/qualimap/qualimap_gc_content_distribution.png


--------------------------------------------------------------------------------
/docs/images/output/qualimap/qualimap_cumulative_genome_coverage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/qualimap/qualimap_cumulative_genome_coverage.png


--------------------------------------------------------------------------------
/docs/images/output/sexdeterrmine/sexdeterrmine_relative_coverage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/sexdeterrmine/sexdeterrmine_relative_coverage.png


--------------------------------------------------------------------------------
/docs/images/output/adapter_removal/adapter_removal_discarded_reads.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/adapter_removal/adapter_removal_discarded_reads.png


--------------------------------------------------------------------------------
/docs/images/output/damageprofiler/damageprofiler_deaminationpatterns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/damageprofiler/damageprofiler_deaminationpatterns.png


--------------------------------------------------------------------------------
/docs/images/output/multivcfanalyzer/multivcfanalyzer_call_categories.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/multivcfanalyzer/multivcfanalyzer_call_categories.png


--------------------------------------------------------------------------------
/docs/images/output/adapter_removal/adapter_removal_length_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nf-core/eager/HEAD/docs/images/output/adapter_removal/adapter_removal_length_distribution.png


--------------------------------------------------------------------------------
/.github/.dockstore.yml:
--------------------------------------------------------------------------------
1 | # Dockstore config version, not pipeline version
2 | version: 1.2
3 | workflows:
4 |   - subclass: nfl
5 |     primaryDescriptorPath: /nextflow.config
6 |     publish: True
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .nextflow*
 2 | work/
 3 | data/
 4 | results/
 5 | .DS_Store
 6 | tests/
 7 | testing/
 8 | testing*
 9 | *.pyc
10 | main_playground.nf
11 | .vscode
12 | *.code-workspace
13 | nf-params.json


--------------------------------------------------------------------------------
/.nf-core-lint.yml:
--------------------------------------------------------------------------------
1 | files_unchanged:
2 |   - assets/multiqc_config.yaml
3 |   - .github/CONTRIBUTING.md
4 |   - .github/ISSUE_TEMPLATE/bug_report.md
5 |   - docs/README.md
6 |   - .github/workflows/linting.yml
7 | 


--------------------------------------------------------------------------------
/.github/markdownlint.yml:
--------------------------------------------------------------------------------
 1 | # Markdownlint configuration file
 2 | default: true
 3 | line-length: false
 4 | no-duplicate-header:
 5 |     siblings_only: true
 6 | no-inline-html:
 7 |     allowed_elements:
 8 |         - img
 9 |         - p
10 |         - kbd
11 |         - details
12 |         - summary
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 |   - name: Join nf-core
4 |     url: https://nf-co.re/join
5 |     about: Please join the nf-core community here
6 |   - name: "Slack #eager channel"
7 |     url: https://nfcore.slack.com/channels/eager
8 |     about: Discussion about the nf-core/eager pipeline
9 | 


--------------------------------------------------------------------------------
/docs/images/README.md:
--------------------------------------------------------------------------------
1 | # Documentation Images Information
2 | 
3 | The font used for all documentation images is Kalam by Indian Type Foundry and is released under the [Open Font License](https://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=OFL)
4 | 
5 | Originally downloaded from [Google Fonts](https://fonts.google.com/specimen/Kalam?sidebar.open&selection.family=Kalam:wght@300;400;700)
6 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nfcore/base:1.14
 2 | LABEL authors="The nf-core/eager community" \
 3 |       description="Docker image containing all software requirements for the nf-core/eager pipeline"
 4 | 
 5 | # Install the conda environment
 6 | COPY environment.yml /
 7 | RUN conda env create --quiet -f /environment.yml && conda clean -a
 8 | 
 9 | # Add conda installation dir to PATH (instead of doing 'conda activate')
10 | ENV PATH /opt/conda/envs/nf-core-eager-2.5.3/bin:$PATH
11 | 
12 | # Dump the details of the installed packages to a file for posterity
13 | RUN conda env export --name nf-core-eager-2.5.3 > nf-core-eager-2.5.3.yml


--------------------------------------------------------------------------------
/assets/angsd_resources/getALL.txt:
--------------------------------------------------------------------------------
 1 | F="ASW CEU CHB CHD GIH JPT LWK MEX MKK TSI YRI"
 2 | for f in $F
 3 | do 
 4 |     echo $f
 5 |     wget http://hapmap.ncbi.nlm.nih.gov/downloads/frequencies/2010-08_phaseII+III/allele_freqs_chrX_${f}_r28_nr.b36_fwd.txt.gz
 6 | done
 7 | 
 8 | cat allele*.gz >allele_freqs_chrX_ALL_r28_nr.b36_fwd.txt.gz
 9 | 
10 | gunzip -c allele_freqs_chrX_ALL_r28_nr.b36_fwd.txt.gz| awk '{print $2" "$3-1" "$3" "$11" "$12" "$4" "$14}'|grep -v pos >allele.txt
11 | 
12 | 
13 | /opt/liftover/liftOver allele.txt /opt/liftover/hg18ToHg19.over.chain.gz hit nohit
14 | cut -f1,3 --complement hit |grep -v -P "\t1.0"|grep -v -P "\t0\t"|gzip -c  >HapMapALL.gz
15 | 
16 | 


--------------------------------------------------------------------------------
/.github/workflows/linting_comment.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: nf-core linting comment
 3 | # This workflow is triggered after the linting action is complete
 4 | # It posts an automated comment to the PR, even if the PR is coming from a fork
 5 | 
 6 | on:
 7 |   workflow_run:
 8 |     workflows: ["nf-core linting"]
 9 | 
10 | jobs:
11 |   test:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - name: Download lint results
15 |         uses: dawidd6/action-download-artifact@v2
16 |         with:
17 |           workflow: linting.yml
18 | 
19 |       - name: Get PR number
20 |         id: pr_number
21 |         run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)"
22 | 
23 |       - name: Post PR comment
24 |         uses: marocchino/sticky-pull-request-comment@v2
25 |         with:
26 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
27 |           number: ${{ steps.pr_number.outputs.pr_number }}
28 |           path: linting-logs/lint_results.md
29 | 
30 | 


--------------------------------------------------------------------------------
/.gitpod.yml:
--------------------------------------------------------------------------------
 1 | image: nfcore/gitpod:latest
 2 | 
 3 | vscode:
 4 |   extensions: # based on nf-core.nf-core-extensionpack
 5 |     - codezombiech.gitignore # Language support for .gitignore files
 6 |     # - cssho.vscode-svgviewer                 # SVG viewer
 7 |     - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code
 8 |     - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed
 9 |     - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files
10 |     - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar
11 |     - mechatroner.rainbow-csv # Highlight columns in csv files in different colors
12 |     # - nextflow.nextflow                      # Nextflow syntax highlighting
13 |     - oderwat.indent-rainbow # Highlight indentation level
14 |     - streetsidesoftware.code-spell-checker # Spelling checker for source code
15 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for the nf-core/eager pipeline
 4 | labels: enhancement
 5 | ---
 6 | 
 7 | <!--
 8 | # nf-core/eager feature request
 9 | 
10 | Hi there!
11 | 
12 | Thanks for suggesting a new feature for the pipeline!
13 | Please delete this text and anything that's not relevant from the template below:
14 | -->
15 | 
16 | ## Is your feature request related to a problem? Please describe
17 | 
18 | <!-- A clear and concise description of what the problem is. -->
19 | 
20 | <!-- e.g. [I'm always frustrated when ...] -->
21 | 
22 | ## Describe the solution you'd like
23 | 
24 | <!-- A clear and concise description of what you want to happen. -->
25 | 
26 | ## Describe alternatives you've considered
27 | 
28 | <!-- A clear and concise description of any alternative solutions or features you've considered. -->
29 | 
30 | ## Additional context
31 | 
32 | <!-- Add any other context about the feature request here. -->
33 | 


--------------------------------------------------------------------------------
/conf/test_tsv_bam.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  * nextflow run nf-core/eager -profile test, docker (or singularity, or conda)
 8 |  */
 9 | 
10 | includeConfig 'test_resources.config'
11 | 
12 | params {
13 |   config_profile_name = 'Test profile'
14 |   config_profile_description = 'Minimal test dataset to check pipeline function'
15 |   // Limit resources so that this can run on Travis
16 |   max_cpus = 2
17 |   max_memory = 6.GB
18 |   max_time = 48.h
19 |   genome = false
20 |   //Input data
21 |   input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/mammoth_design_bam.tsv'
22 |   // Genome references
23 |   fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Mammoth/Mammoth_MT_Krause.fasta'
24 | }


--------------------------------------------------------------------------------
/bin/parse_snp_cov.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Written by Thiseas C. Lamnidis and released under the MIT license. 
 4 | # See git repository (https://github.com/nf-core/eager) for full license text.
 5 | 
 6 | import sys, json
 7 | from collections import OrderedDict
 8 | 
 9 | jsonOut = OrderedDict()
10 | data = OrderedDict()
11 | 
12 | 
13 | input = open(sys.argv[1], 'r')
14 | for line in input:
15 |   fields = line.strip().split()
16 |   sample_id = fields[0]
17 |   covered_snps = fields[1]
18 |   total_snps = fields[2]
19 |   if sample_id[0] == "#":
20 |     continue
21 |   
22 |   data[sample_id] = {"Covered_Snps":covered_snps, "Total_Snps":total_snps}
23 | 
24 | jsonOut = {"plot_type": "generalstats", "id": "snp_coverage",
25 |     "pconfig": {
26 |         "Covered_Snps" : {"title" : "#SNPs Covered"},
27 |         "Total_Snps" : {"title": "#SNPs Total"}
28 |     }, 
29 |     "data" : data
30 | }
31 | 
32 | with open(sys.argv[1].rstrip('.txt')+'_mqc.json', 'w') as outfile:
33 |     json.dump(jsonOut, outfile)
34 | 


--------------------------------------------------------------------------------
/conf/test.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  * nextflow run nf-core/eager -profile test, docker (or singularity, or conda)
 8 |  */
 9 | 
10 | includeConfig 'test_resources.config'
11 | 
12 | params {
13 |   config_profile_name = 'Test profile'
14 |   config_profile_description = 'Minimal test dataset to check pipeline function'
15 |   // Limit resources so that this can run on GitHub Actions
16 |   max_cpus = 2
17 |   max_memory = 6.GB
18 |   max_time = 48.h
19 |   genome = false
20 |   //Input data
21 |   input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/mammoth_design_fastq.tsv'
22 |   // Genome references
23 |   fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Mammoth/Mammoth_MT_Krause.fasta'
24 | }
25 | 


--------------------------------------------------------------------------------
/conf/test_tsv_fna.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  * nextflow run nf-core/eager -profile test, docker (or singularity, or conda)
 8 |  */
 9 | 
10 | includeConfig 'test_resources.config'
11 | 
12 | params {
13 |   config_profile_name = 'Test profile'
14 |   config_profile_description = 'Minimal test dataset to check pipeline function'
15 |   // Limit resources so that this can run on Travis
16 |   max_cpus = 2
17 |   max_memory = 6.GB
18 |   max_time = 48.h
19 |   genome = false
20 |   //Input data
21 |   input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/mammoth_design_fastq.tsv'
22 |   // Genome references
23 |   fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Mammoth/Mammoth_MT_Krause.fna'
24 | }
25 | 


--------------------------------------------------------------------------------
/conf/test_tsv_pretrim.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  * nextflow run nf-core/eager -profile test, docker (or singularity, or conda)
 8 |  */
 9 | 
10 | includeConfig 'test_resources.config'
11 | 
12 | params {
13 |   config_profile_name = 'Test profile'
14 |   config_profile_description = 'Minimal test dataset to check pipeline function'
15 |   // Limit resources so that this can run on Travis
16 |   max_cpus = 2
17 |   max_memory = 6.GB
18 |   max_time = 48.h
19 |   genome = false
20 |   //Input data
21 |   input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/mammoth_design_fastq_pretrim.tsv'
22 |   // Genome references
23 |   fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Mammoth/Mammoth_MT_Krause.fasta'
24 | }
25 | 


--------------------------------------------------------------------------------
/conf/test_tsv_complex.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  * nextflow run nf-core/eager -profile test, docker (or singularity, or conda)
 8 |  */
 9 | 
10 | includeConfig 'test_resources.config'
11 | 
12 | 
13 | params {
14 |   config_profile_name = 'Test profile'
15 |   config_profile_description = 'Minimal test dataset to check pipeline function'
16 |   // Limit resources so that this can run on GitHub Actions
17 |   max_cpus = 2
18 |   max_memory = 6.GB
19 |   max_time = 48.h
20 |   genome = false
21 |   //Input data
22 |   input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/mammoth_design_fastq_multilane_multilib.tsv'
23 |   // Genome references
24 |   fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Mammoth/Mammoth_MT_Krause.fasta'
25 | }
26 | 


--------------------------------------------------------------------------------
/conf/test_direct.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  *   nextflow run nf-core/eager -profile test,<docker/singularity>
 8 |  */
 9 | 
10 | includeConfig 'test_resources.config'
11 | 
12 | 
13 | params {
14 |   config_profile_name = 'Test profile'
15 |   config_profile_description = 'Minimal test dataset to check pipeline function'
16 |   // Limit resources so that this can run on GitHub Actions
17 |   max_cpus = 2
18 |   max_memory = 6.GB
19 |   max_time = 48.h
20 |   genome = false
21 |   //Input data
22 |   single_end = false
23 |   // Genome references
24 |   fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Mammoth/Mammoth_MT_Krause.fasta'
25 |   // Ignore `--input` as otherwise the parameter validation will throw an error
26 |   schema_ignore_params = 'genomes,input_paths,input'
27 | }
28 | 


--------------------------------------------------------------------------------
/.github/workflows/push_dockerhub_dev.yml:
--------------------------------------------------------------------------------
 1 | name: nf-core Docker push (dev)
 2 | # This builds the docker image and pushes it to DockerHub
 3 | # Runs on nf-core repo releases and push event to 'dev' branch (PR merges)
 4 | on:
 5 |   push:
 6 |     branches:
 7 |       - dev
 8 | 
 9 | jobs:
10 |   push_dockerhub:
11 |     name: Push new Docker image to Docker Hub (dev)
12 |     runs-on: ubuntu-latest
13 |     # Only run for the nf-core repo, for releases and merged PRs
14 |     if: ${{ github.repository == 'nf-core/eager' }}
15 |     env:
16 |       DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
17 |       DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }}
18 |     steps:
19 |       - name: Check out pipeline code
20 |         uses: actions/checkout@v2
21 | 
22 |       - name: Build new docker image
23 |         run: docker build --no-cache . -t nfcore/eager:dev
24 | 
25 |       - name: Push Docker image to DockerHub (dev)
26 |         run: |
27 |           echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
28 |           docker push nfcore/eager:dev
29 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # nf-core/eager: Documentation
 2 | 
 3 | The nf-core/eager documentation is split into the following pages:
 4 | 
 5 | * [Usage](usage.md)
 6 |   * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags.
 7 |   * Also includes: FAQ, Troubleshooting and Tutorials
 8 | * [Output](output.md)
 9 |   * An overview of the different results produced by the pipeline and how to interpret them.
10 | 
11 | You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re).
12 | 
13 | Additional pages are:
14 | 
15 | * [Installation](https://nf-co.re/usage/installation)
16 | * Pipeline configuration
17 |   * [Local installation](https://nf-co.re/usage/local_installation)
18 |   * [Adding your own system config](https://nf-co.re/usage/adding_own_config)
19 |   * [Reference genomes](https://nf-co.re/usage/reference_genomes)
20 | * [Contribution Guidelines](../.github/CONTRIBUTING.md)
21 |   * Basic contribution & behaviour guidelines
22 |   * Checklists and guidelines for people who would like to contribute code
23 |   


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) The nf-core/eager community
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | Many thanks to contributing to nf-core/eager!
 2 | 
 3 | Please fill in the appropriate checklist below (delete whatever is not relevant). These are the most common things requested on pull requests (PRs).
 4 | 
 5 | ## PR checklist
 6 | 
 7 |  - [ ] This comment contains a description of changes (with reason).
 8 |  - [ ] If you've fixed a bug or added code that should be tested, add tests!
 9 |    - [ ] If you've added a new tool - add to the software_versions process and a regex to `scrape_software_versions.py`
10 |    - [ ] If necessary, also make a PR on the [nf-core/eager branch on the nf-core/test-datasets repo]( https://github.com/nf-core/test-datasets/pull/new/nf-core/eager).
11 |  - [ ] Make sure your code lints (`nf-core lint .`).
12 |  - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`).
13 |  - [ ] Usage Documentation in `docs/usage.md` is updated.
14 |  - [ ] Output Documentation in `docs/output.md` is updated.
15 |  - [ ] `CHANGELOG.md` is updated.
16 |  - [ ] `README.md` is updated (including new tool citations and authors/contributors).
17 | 
18 | **Learn more about contributing:** https://github.com/nf-core/eager/tree/master/.github/CONTRIBUTING.md
19 | 


--------------------------------------------------------------------------------
/conf/test_tsv_kraken.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  * nextflow run nf-core/eager -profile test, docker (or singularity, or conda)
 8 |  */
 9 | 
10 | includeConfig 'test_resources.config'
11 | 
12 | params {
13 |   config_profile_name = 'Test profile kraken'
14 |   config_profile_description = 'Minimal test dataset to check pipeline function with kraken metagenomic profiler'
15 |   // Limit resources so that this can run on Travis
16 |   max_cpus = 2
17 |   max_memory = 6.GB
18 |   max_time = 48.h
19 |   genome = false
20 |   //Input data
21 |   metagenomic_tool = 'kraken'
22 |   run_metagenomic_screening = true
23 |   input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/mammoth_design_fastq.tsv'
24 |   // Genome references
25 |   fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Mammoth/Mammoth_MT_Krause.fasta'
26 |   database = 'https://github.com/nf-core/test-datasets/raw/eager/databases/kraken/eager_test.tar.gz'
27 | }
28 | 


--------------------------------------------------------------------------------
/.github/workflows/push_dockerhub_release.yml:
--------------------------------------------------------------------------------
 1 | name: nf-core Docker push (release)
 2 | # This builds the docker image and pushes it to DockerHub
 3 | # Runs on nf-core repo releases and push event to 'dev' branch (PR merges)
 4 | on:
 5 |   release:
 6 |     types: [published]
 7 | 
 8 | jobs:
 9 |   push_dockerhub:
10 |     name: Push new Docker image to Docker Hub (release)
11 |     runs-on: ubuntu-latest
12 |     # Only run for the nf-core repo, for releases and merged PRs
13 |     if: ${{ github.repository == 'nf-core/eager' }}
14 |     env:
15 |       DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
16 |       DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }}
17 |     steps:
18 |       - name: Check out pipeline code
19 |         uses: actions/checkout@v2
20 | 
21 |       - name: Build new docker image
22 |         run: docker build --no-cache . -t nfcore/eager:latest
23 | 
24 |       - name: Push Docker image to DockerHub (release)
25 |         run: |
26 |           echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin
27 |           docker push nfcore/eager:latest
28 |           docker tag nfcore/eager:latest nfcore/eager:${{ github.event.release.tag_name }}
29 |           docker push nfcore/eager:${{ github.event.release.tag_name }}
30 | 


--------------------------------------------------------------------------------
/assets/where_are_my_files.txt:
--------------------------------------------------------------------------------
 1 | =====================
 2 |  Where are my files?
 3 | =====================
 4 | 
 5 | By default, the nfcore/eager pipeline does not save large intermediate files to the
 6 | results directory. This is to try to conserve disk space.
 7 | 
 8 | These files can be found in the pipeline `work` directory if needed.
 9 | Alternatively, re-run the pipeline using `-resume` in addition to one of
10 | the below command-line options and they will be copied into the results directory:
11 | 
12 | `--saveReference`
13 | Save any downloaded or generated reference genome files to your results folder.
14 | These can then be used for future pipeline runs, reducing processing times.
15 | 
16 | -----------------------------------
17 |  Setting defaults in a config file
18 | -----------------------------------
19 | If you would always like these files to be saved without having to specify this on
20 | the command line, you can save the following to your personal configuration file
21 | (eg. `~/.nextflow/config`):
22 | 
23 | params.saveReference = true
24 | 
25 | For more help, see the following documentation:
26 | 
27 | https://github.com/nf-core/eager/blob/master/docs/usage.md
28 | https://www.nextflow.io/docs/latest/getstarted.html
29 | https://www.nextflow.io/docs/latest/config.html
30 | 


--------------------------------------------------------------------------------
/assets/email_template.txt:
--------------------------------------------------------------------------------
 1 | ----------------------------------------------------
 2 |                                         ,--./,-.
 3 |         ___     __   __   __   ___     /,-._.--~\\
 4 |   |\\ | |__  __ /  ` /  \\ |__) |__         }  {
 5 |   | \\| |       \\__, \\__/ |  \\ |___     \\`-._,-`-,
 6 |                                         `._,._,'
 7 |   nf-core/eager v${version}
 8 | ----------------------------------------------------
 9 | 
10 | Run Name: $runName
11 | 
12 | <% if (success){
13 |     out << "## nf-core/eager execution completed successfully! ##"
14 | } else {
15 |     out << """####################################################
16 | ## nf-core/eager execution completed unsuccessfully! ##
17 | ####################################################
18 | The exit status of the task that caused the workflow execution to fail was: $exitStatus.
19 | The full error message was:
20 | 
21 | ${errorReport}
22 | """
23 | } %>
24 | 
25 | 
26 | The workflow was completed at $dateComplete (duration: $duration)
27 | 
28 | The command used to launch the workflow was as follows:
29 | 
30 |   $commandLine
31 | 
32 | 
33 | 
34 | Pipeline Configuration:
35 | -----------------------
36 | <% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %>
37 | 
38 | --
39 | nf-core/eager
40 | https://github.com/nf-core/eager
41 | 


--------------------------------------------------------------------------------
/assets/sendmail_template.txt:
--------------------------------------------------------------------------------
 1 | To: $email
 2 | Subject: $subject
 3 | Mime-Version: 1.0
 4 | Content-Type: multipart/related;boundary="nfcoremimeboundary"
 5 | 
 6 | --nfcoremimeboundary
 7 | Content-Type: text/html; charset=utf-8
 8 | 
 9 | $email_html
10 | 
11 | --nfcoremimeboundary
12 | Content-Type: image/png;name="nf-core-eager_logo.png"
13 | Content-Transfer-Encoding: base64
14 | Content-ID: <nfcorepipelinelogo>
15 | Content-Disposition: inline; filename="nf-core-eager_logo.png"
16 | 
17 | <% out << new File("$projectDir/assets/nf-core-eager_logo.png").
18 |   bytes.
19 |   encodeBase64().
20 |   toString().
21 |   tokenize( '\n' )*.
22 |   toList()*.
23 |   collate( 76 )*.
24 |   collect { it.join() }.
25 |   flatten().
26 |   join( '\n' ) %>
27 | 
28 | <%
29 | if (mqcFile){
30 | def mqcFileObj = new File("$mqcFile")
31 | if (mqcFileObj.length() < mqcMaxSize){
32 | out << """
33 | --nfcoremimeboundary
34 | Content-Type: text/html; name=\"multiqc_report\"
35 | Content-Transfer-Encoding: base64
36 | Content-ID: <mqcreport>
37 | Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\"
38 | 
39 | ${mqcFileObj.
40 |   bytes.
41 |   encodeBase64().
42 |   toString().
43 |   tokenize( '\n' )*.
44 |   toList()*.
45 |   collate( 76 )*.
46 |   collect { it.join() }.
47 |   flatten().
48 |   join( '\n' )}
49 | """
50 | }}
51 | %>
52 | 
53 | --nfcoremimeboundary--
54 | 


--------------------------------------------------------------------------------
/conf/test_tsv_humanbam.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  * nextflow run nf-core/eager -profile test, docker (or singularity, or conda)
 8 |  */
 9 | 
10 | includeConfig 'test_resources.config'
11 | 
12 | params {
13 |   config_profile_name = 'Test profile'
14 |   config_profile_description = 'Minimal test dataset to check pipeline function'
15 |   // Limit resources so that this can run on Travis
16 |   max_cpus = 2
17 |   max_memory = 6.GB
18 |   max_time = 48.h
19 |   genome = false
20 |   //Input data
21 |   input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Human/human_design_bam.tsv'
22 |   // Genome references
23 |   fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Mammoth/Mammoth_MT_Krause.fasta'
24 |   sexdeterrmine_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'
25 |   // Genotyping
26 |   pileupcaller_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'
27 |   pileupcaller_snpfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K_covered_in_JK2067_downsampled_s0.1.numeric_chromosomes.snp'
28 | }
29 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # nf-core/eager pull request
 3 | 
 4 | Many thanks for contributing to nf-core/eager!
 5 | 
 6 | Please fill in the appropriate checklist below (delete whatever is not relevant).
 7 | These are the most common things requested on pull requests (PRs).
 8 | 
 9 | Remember that PRs should be made against the dev branch, unless you're preparing a pipeline release.
10 | 
11 | Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/eager/tree/master/.github/CONTRIBUTING.md)
12 | -->
13 | <!-- markdownlint-disable ul-indent -->
14 | 
15 | ## PR checklist
16 | 
17 | - [ ] This comment contains a description of changes (with reason).
18 | - [ ] If you've fixed a bug or added code that should be tested, add tests!
19 |     - [ ] If you've added a new tool - add to the software_versions process and a regex to `scrape_software_versions.py`
20 |     - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](<https://github.com/>nf-core/eager/tree/master/.github/CONTRIBUTING.md)
21 |     - [ ] If necessary, also make a PR on the nf-core/eager _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository.
22 | - [ ] Make sure your code lints (`nf-core lint .`).
23 | - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`).
24 | - [ ] Usage Documentation in `docs/usage.md` is updated.
25 | - [ ] Output Documentation in `docs/output.md` is updated.
26 | - [ ] `CHANGELOG.md` is updated.
27 | - [ ] `README.md` is updated (including new tool citations and authors/contributors).
28 | 


--------------------------------------------------------------------------------
/.github/workflows/awstest.yml:
--------------------------------------------------------------------------------
 1 | name: nf-core AWS test
 2 | # This workflow is triggered on push to the master branch.
 3 | # It can be additionally triggered manually with GitHub actions workflow dispatch.
 4 | # It runs the -profile 'test' on AWS batch.
 5 | 
 6 | on:
 7 |   workflow_dispatch:
 8 | 
 9 | 
10 | env:
11 |   AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
12 |   AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
13 |   TOWER_ACCESS_TOKEN: ${{ secrets.AWS_TOWER_TOKEN }}
14 |   AWS_JOB_DEFINITION: ${{ secrets.AWS_JOB_DEFINITION }}
15 |   AWS_JOB_QUEUE: ${{ secrets.AWS_JOB_QUEUE }}
16 |   AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
17 | 
18 | 
19 | jobs:
20 |   run-awstest:
21 |     name: Run AWS tests
22 |     if: github.repository == 'nf-core/eager'
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - name: Setup Miniconda
26 |         uses: conda-incubator/setup-miniconda@v2
27 |         with:
28 |           auto-update-conda: true
29 |           python-version: 3.7
30 |       - name: Install awscli
31 |         run: conda install -c conda-forge awscli
32 |       - name: Start AWS batch job
33 |         # For example: adding multiple test runs with different parameters
34 |         # Remember that you can parallelise this by using strategy.matrix
35 |         run: |
36 |           aws batch submit-job \
37 |           --region eu-west-1 \
38 |           --job-name nf-core-eager \
39 |           --job-queue $AWS_JOB_QUEUE \
40 |           --job-definition $AWS_JOB_DEFINITION \
41 |           --container-overrides '{"command": ["nf-core/eager", "-r '"${GITHUB_SHA}"' -profile test_tsv_complex --outdir s3://'"${AWS_S3_BUCKET}"'/eager/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/eager/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}]}'
42 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | # You can use this file to create a conda environment for this pipeline:
 2 | #   conda env create -f environment.yml
 3 | name: nf-core-eager-2.5.3
 4 | channels:
 5 |   - conda-forge
 6 |   - bioconda
 7 |   - defaults
 8 | dependencies:
 9 |   - conda-forge::python=3.9.4
10 |   - conda-forge::markdown=3.3.4
11 |   - conda-forge::pymdown-extensions=8.2
12 |   - conda-forge::pygments=2.14.0
13 |   - bioconda::rename=1.601
14 |   - conda-forge::openjdk=8.0.144 # Don't upgrade - required for GATK
15 |   - bioconda::fastqc=0.11.9
16 |   - bioconda::adapterremoval=2.3.2
17 |   - bioconda::adapterremovalfixprefix=0.0.5
18 |   - bioconda::bwa=0.7.17
19 |   - bioconda::picard=2.26.0
20 |   - bioconda::samtools=1.12
21 |   - bioconda::dedup=0.12.8
22 |   - bioconda::angsd=0.935
23 |   - bioconda::circularmapper=1.93.5
24 |   - bioconda::gatk4=4.2.0.0
25 |   - bioconda::gatk=3.5 ## Don't upgrade - required for MultiVCFAnalyzer
26 |   - bioconda::qualimap=2.2.2d
27 |   - bioconda::vcf2genome=0.91
28 |   - bioconda::damageprofiler=0.4.9 # Don't upgrade - later versions don't allow java 8
29 |   - bioconda::multiqc=1.16
30 |   - bioconda::pmdtools=0.60
31 |   - bioconda::bedtools=2.30.0
32 |   - conda-forge::libiconv=1.16
33 |   - conda-forge::pigz=2.6
34 |   - bioconda::sequencetools=1.5.2
35 |   - bioconda::preseq=3.1.2
36 |   - bioconda::fastp=0.20.1
37 |   - bioconda::bamutil=1.0.15
38 |   - bioconda::mtnucratio=0.7
39 |   - bioconda::pysam=0.16.0
40 |   - bioconda::kraken2=2.1.2
41 |   - conda-forge::pandas=1.2.4
42 |   - bioconda::freebayes=1.3.5
43 |   - bioconda::sexdeterrmine=1.1.2
44 |   - bioconda::multivcfanalyzer=0.85.2
45 |   - bioconda::hops=0.35
46 |   - bioconda::malt=0.61
47 |   - conda-forge::biopython=1.79
48 |   - conda-forge::xopen=1.1.0
49 |   - bioconda::bowtie2=2.4.4
50 |   - bioconda::eigenstratdatabasetools=1.0.2
51 |   - bioconda::mapdamage2=2.2.1
52 |   - bioconda::bbmap=38.92
53 |   - bioconda::bcftools=1.12


--------------------------------------------------------------------------------
/conf/test_resources.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines the base computing resources used across all CI tests (primarily the
 6 |  * time limit)
 7 |  */
 8 | 
 9 | 
10 | process {
11 | 
12 |   withLabel:'sc_tiny'{
13 |       cpus = { check_max( 1, 'cpus' ) }
14 |       memory = { check_max( 1.GB * task.attempt, 'memory' ) }
15 |       time = { check_max( 10.m * task.attempt, 'time' ) }
16 |   }
17 | 
18 |   withLabel:'sc_small'{
19 |       cpus = { check_max( 1, 'cpus' ) }
20 |       memory = { check_max( 4.GB * task.attempt, 'memory' ) }
21 |       time = { check_max( 10.m * task.attempt, 'time' ) }
22 |   }
23 | 
24 |   withLabel:'sc_medium'{
25 |       cpus = { check_max( 1, 'cpus' ) }
26 |       memory = { check_max( 8.GB * task.attempt, 'memory' ) }
27 |       time = { check_max( 10.m * task.attempt, 'time' ) }
28 |   }
29 | 
30 |   withLabel:'mc_small'{
31 |       cpus = { check_max( 2 * task.attempt, 'cpus' ) }
32 |       memory = { check_max( 4.GB * task.attempt, 'memory' ) }
33 |       time = { check_max( 10.m * task.attempt, 'time' ) }
34 |   }
35 | 
36 |   withLabel:'mc_medium' {
37 |       cpus = { check_max( 4 * task.attempt, 'cpus' ) }
38 |       memory = { check_max( 8.GB * task.attempt, 'memory' ) }
39 |       time = { check_max( 10.m * task.attempt, 'time' ) }
40 |   }
41 | 
42 |   withLabel:'mc_large'{
43 |       cpus = { check_max( 8 * task.attempt, 'cpus' ) }
44 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
45 |       time = { check_max( 10.m * task.attempt, 'time' ) }
46 |   }
47 | 
48 |   withLabel:'mc_huge'{
49 |       cpus = { check_max( 32 * task.attempt, 'cpus' ) }
50 |       memory = { check_max( 256.GB * task.attempt, 'memory' ) }
51 |       time = { check_max( 10.m * task.attempt, 'time' ) }
52 |   }
53 | 
54 |   withName:'mapdamage_rescaling'{
55 |       time = { check_max( 20.m * task.attempt, 'time' ) }
56 |   }
57 | 
58 | }


--------------------------------------------------------------------------------
/.github/workflows/awsfulltest.yml:
--------------------------------------------------------------------------------
 1 | name: nf-core AWS full size tests
 2 | # This workflow is triggered on published releases.
 3 | # It can be additionally triggered manually with GitHub actions workflow dispatch.
 4 | # It runs the -profile 'test_full' on AWS batch
 5 | 
 6 | on:
 7 |   workflow_run:
 8 |     workflows: ["nf-core Docker push (release)"]
 9 |     types: [completed]
10 |   workflow_dispatch:
11 | 
12 | 
13 | env:
14 |   AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
15 |   AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
16 |   TOWER_ACCESS_TOKEN: ${{ secrets.AWS_TOWER_TOKEN }}
17 |   AWS_JOB_DEFINITION: ${{ secrets.AWS_JOB_DEFINITION }}
18 |   AWS_JOB_QUEUE: ${{ secrets.AWS_JOB_QUEUE }}
19 |   AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
20 | 
21 | 
22 | jobs:
23 |   run-awstest:
24 |     name: Run AWS full tests
25 |     if: github.repository == 'nf-core/eager'
26 |     runs-on: ubuntu-latest
27 |     steps:
28 |       - name: Setup Miniconda
29 |         uses: conda-incubator/setup-miniconda@v2
30 |         with:
31 |           auto-update-conda: true
32 |           python-version: 3.7
33 |       - name: Install awscli
34 |         run: conda install -c conda-forge awscli
35 |       - name: Start AWS batch job
36 |         # Add full size test data (but still relatively small datasets for few samples)
37 |         # on the `test_full.config` test runs with only one set of parameters
38 |         # Then specify `-profile test_full` instead of `-profile test` on the AWS batch command
39 |         run: |
40 |           aws batch submit-job \
41 |             --region eu-west-1 \
42 |             --job-name nf-core-eager \
43 |             --job-queue $AWS_JOB_QUEUE \
44 |             --job-definition $AWS_JOB_DEFINITION \
45 |             --container-overrides '{"command": ["nf-core/eager", "-r '"${GITHUB_SHA}"' -profile test_full --outdir s3://'"${AWS_S3_BUCKET}"'/eager/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/eager/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}]}'
46 | 


--------------------------------------------------------------------------------
/conf/benchmarking_vikingfish.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  * nextflow run nf-core/eager -profile test, docker (or singularity, or conda)
 8 |  */
 9 | 
10 | params {
11 |    config_profile_name = 'nf-core/eager benchmarking - Viking Fish profile'
12 |    config_profile_description = "A 'fullsized' benchmarking profile for deepish sequencing aDNA data" 
13 |    
14 |    //Input data
15 |    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Benchmarking/benchmarking_vikingfish.tsv'   
16 |    // Genome reference
17 |    fasta = 's3://nf-core-awsmegatests/eager/ENA_Data_Fish/GCF_902167405.1_gadMor3.0_genomic.fna.gz'
18 |    
19 |    bwaalnn = 0.04
20 |    bwaalnl = 1024
21 |    
22 |    run_bam_filtering = true
23 |    bam_unmapped_type = 'discard'
24 |    bam_mapping_quality_threshold = 25
25 |      
26 |    run_genotyping = true
27 |    genotyping_tool = 'hc'
28 |    genotyping_source = 'raw'
29 |    gatk_ploidy = 2
30 | }
31 | 
32 | process {
33 |    withName:'adapter_removal'{
34 |       cpus = { check_max( 8, 'cpus' ) }
35 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
36 |       time = { check_max( 2.h * task.attempt, 'time' ) }
37 |    }
38 |    withName:'bwa'{
39 |       cpus = { check_max( 8, 'cpus' ) }
40 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
41 |       time = { check_max( 8.h * task.attempt, 'time' ) }
42 |    }
43 |    withName:'dedup'{
44 |       cpus = { check_max( 8, 'cpus' ) }
45 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
46 |       time = { check_max( 4.h * task.attempt, 'time' ) }
47 |    }
48 |    withName:'genotyping_hc'{
49 |      cpus = { check_max( 8, 'cpus' ) }
50 |      memory = { check_max( 16.GB * task.attempt, 'memory' ) }
51 |      time = { check_max( 8.h * task.attempt, 'time' ) }
52 |    }
53 | 
54 | }
55 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Report something that is broken or incorrect
 4 | labels: bug
 5 | ---
 6 | 
 7 | <!--
 8 | # nf-core/eager bug report
 9 | 
10 | Hi there!
11 | 
12 | Thanks for telling us about a problem with the pipeline.
13 | Please delete this text and anything that's not relevant from the template below:
14 | -->
15 | 
16 | ## Check Documentation
17 | 
18 | I have checked the following places for your error:
19 | 
20 | - [ ] [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting)
21 | - [ ] [nf-core/eager pipeline documentation](https://nf-co.re/nf-core/eager/usage)
22 |       - nf-core/eager FAQ/troubleshooting can be found [here](https://nf-co.re/eager/usage#troubleshooting-and-faqs)
23 | 
24 | ## Description of the bug
25 | 
26 | <!-- A clear and concise description of what the bug is. -->
27 | 
28 | ## Steps to reproduce
29 | 
30 | Steps to reproduce the behaviour:
31 | 
32 | 1. Command line: `nextflow run ...`
33 | 2. See error: _Please provide your error message_
34 | 
35 | ## Expected behaviour
36 | 
37 | <!-- A clear and concise description of what you expected to happen. -->
38 | 
39 | ## Log files
40 | 
41 | Have you provided the following extra information/files:
42 | 
43 | - [ ] The command used to run the pipeline
44 | - [ ] The `.nextflow.log` file <!-- this is a hidden file in the directory where you launched the pipeline -->
45 | - [ ] The exact error: <!-- [Please provide your error message] -->
46 | 
47 | ## System
48 | 
49 | - Hardware: <!-- [e.g. HPC, Desktop, Cloud...] -->
50 | - Executor: <!-- [e.g. slurm, local, awsbatch...] -->
51 | - OS: <!-- [e.g. CentOS Linux, macOS, Linux Mint...] -->
52 | - Version <!-- [e.g. 7, 10.13.6, 18.3...] -->
53 | 
54 | ## Nextflow Installation
55 | 
56 | - Version: <!-- [e.g. 19.10.0] -->
57 | 
58 | ## Container engine
59 | 
60 | - Engine: <!-- [e.g. Conda, Docker, Singularity, Podman, Shifter or Charliecloud] -->
61 | - version: <!-- [e.g. 1.0.0] -->
62 | - Image tag: <!-- [e.g. nfcore/eager:1.0.0] -->
63 | 
64 | ## Additional context
65 | 
66 | <!-- Add any other context about the problem here. -->
67 | 


--------------------------------------------------------------------------------
/bin/merge_kraken_res.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Written by Maxime Borry and released under the MIT license. 
 4 | # See git repository (https://github.com/nf-core/eager) for full license text.
 5 | 
 6 | import argparse
 7 | import os
 8 | import pandas as pd
 9 | import numpy as np
10 | 
11 | def _get_args():
12 |     '''This function parses and return arguments passed in'''
13 |     parser = argparse.ArgumentParser(
14 |         prog='merge_kraken_res',
15 |         formatter_class=argparse.RawDescriptionHelpFormatter,
16 |         description='Merging csv count files in one table')
17 |     parser.add_argument(
18 |         '-or',
19 |         dest="readout",
20 |         default="kraken_read_count_table.csv",
21 |         help="Read count output file. Default = kraken_read_count_table.csv")
22 |     parser.add_argument(
23 |         '-ok',
24 |         dest="kmerout",
25 |         default="kraken_kmer_unicity_table.csv",
26 |         help="Kmer unicity output file. Default = kraken_kmer_unicity_table.csv")
27 | 
28 |     args = parser.parse_args()
29 | 
30 |     readout = args.readout
31 |     kmerout = args.kmerout
32 | 
33 |     return(readout, kmerout)
34 | 
35 | 
36 | def get_csv():
37 |     tmp = [i for i in os.listdir() if ".csv" in i]
38 |     kmer = [i for i in tmp if '.kmer_' in i]
39 |     read = [i for i in tmp if '.read_' in i]
40 |     return(read, kmer)
41 | 
42 | 
43 | def _get_basename(file_name):
44 |     if ("/") in file_name:
45 |         basename = file_name.split("/")[-1].split(".")[0]
46 |     else:
47 |         basename = file_name.split(".")[0]
48 |     return(basename)
49 | 
50 | 
51 | def merge_csv(all_csv):
52 |     df = pd.read_csv(all_csv[0], index_col=0)
53 |     for i in range(1, len(all_csv)):
54 |         df_tmp = pd.read_csv(all_csv[i], index_col=0)
55 |         df = pd.merge(left=df, right=df_tmp, on='TAXID', how='outer')
56 |     df.fillna(0, inplace=True)
57 |     return(df)
58 | 
59 | 
60 | def write_csv(pd_dataframe, outfile):
61 |     pd_dataframe.to_csv(outfile)
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     READOUT, KMEROUT = _get_args()
66 |     reads, kmers = get_csv()
67 |     read_df = merge_csv(reads)
68 |     kmer_df = merge_csv(kmers)
69 |     write_csv(read_df, READOUT)
70 |     write_csv(kmer_df, KMEROUT)


--------------------------------------------------------------------------------
/conf/test_full.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running full-size tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a full size pipeline test. Use as follows:
 7 |  *   nextflow run nf-core/eager -profile test_full,<docker/singularity>
 8 |  */
 9 | 
10 | params {
11 |   config_profile_name = 'Full test profile for nf-core/eager'
12 |   config_profile_description = 'Full test dataset to check nf-core/eager function'
13 | 
14 |   // Input data for full size test
15 |   input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Benchmarking/benchmarking_vikingfish.tsv'
16 |    
17 |    // Genome reference
18 |    fasta = 'https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_other/Gadus_morhua/representative/GCF_902167405.1_gadMor3.0/GCF_902167405.1_gadMor3.0_genomic.fna.gz'
19 |    
20 |    bwaalnn = 0.04
21 |    bwaalnl = 1024
22 |    
23 |    run_bam_filtering = true
24 |    bam_unmapped_type = 'discard'
25 |    bam_mapping_quality_threshold = 25
26 |      
27 |    run_genotyping = true
28 |    genotyping_tool = 'hc'
29 |    genotyping_source = 'raw'
30 |    gatk_ploidy = 2
31 | }
32 | 
33 | process {
34 |    withName:'adapter_removal'{
35 |       cpus = { check_max( 8, 'cpus' ) }
36 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
37 |       time = { check_max( 2.h * task.attempt, 'time' ) }
38 |    }
39 |    withName:'bwa'{
40 |       cpus = { check_max( 8, 'cpus' ) }
41 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
42 |       time = { check_max( 8.h * task.attempt, 'time' ) }
43 |    }
44 |    withName:'dedup'{
45 |       cpus = { check_max( 8, 'cpus' ) }
46 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
47 |       time = { check_max( 4.h * task.attempt, 'time' ) }
48 |    }
49 |    withName:'genotyping_hc'{
50 |      cpus = { check_max( 8, 'cpus' ) }
51 |      memory = { check_max( 16.GB * task.attempt, 'memory' ) }
52 |      time = { check_max( 8.h * task.attempt, 'time' ) }
53 |    }
54 |    
55 |   // Ignore `--input` as otherwise the parameter validation will throw an error
56 |   schema_ignore_params = 'genomes,input_paths,input'
57 | }
58 | 


--------------------------------------------------------------------------------
/lib/Headers.groovy:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file holds several functions used to render the nf-core ANSI header.
 3 |  */
 4 | 
 5 | class Headers {
 6 | 
 7 |     private static Map log_colours(Boolean monochrome_logs) {
 8 |         Map colorcodes = [:]
 9 |         colorcodes['reset']       = monochrome_logs ? '' : "\033[0m"
10 |         colorcodes['dim']         = monochrome_logs ? '' : "\033[2m"
11 |         colorcodes['black']       = monochrome_logs ? '' : "\033[0;30m"
12 |         colorcodes['green']       = monochrome_logs ? '' : "\033[0;32m"
13 |         colorcodes['yellow']      = monochrome_logs ? '' :  "\033[0;33m"
14 |         colorcodes['yellow_bold'] = monochrome_logs ? '' : "\033[1;93m"
15 |         colorcodes['blue']        = monochrome_logs ? '' : "\033[0;34m"
16 |         colorcodes['purple']      = monochrome_logs ? '' : "\033[0;35m"
17 |         colorcodes['cyan']        = monochrome_logs ? '' : "\033[0;36m"
18 |         colorcodes['white']       = monochrome_logs ? '' : "\033[0;37m"
19 |         colorcodes['red']         = monochrome_logs ? '' : "\033[1;91m"
20 |         return colorcodes
21 |     }
22 | 
23 |     static String dashed_line(monochrome_logs) {
24 |         Map colors = log_colours(monochrome_logs)
25 |         return "-${colors.dim}----------------------------------------------------${colors.reset}-"
26 |     }
27 | 
28 |     static String nf_core(workflow, monochrome_logs) {
29 |         Map colors = log_colours(monochrome_logs)
30 |         String.format(
31 |             """\n
32 |             ${dashed_line(monochrome_logs)}
33 |                                                     ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset}
34 |             ${colors.blue}        ___     __   __   __   ___     ${colors.green}/,-._.--~\'${colors.reset}
35 |             ${colors.blue}  |\\ | |__  __ /  ` /  \\ |__) |__         ${colors.yellow}}  {${colors.reset}
36 |             ${colors.blue}  | \\| |       \\__, \\__/ |  \\ |___     ${colors.green}\\`-._,-`-,${colors.reset}
37 |                                                     ${colors.green}`._,._,\'${colors.reset}
38 |             ${colors.purple}  ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset}
39 |             ${dashed_line(monochrome_logs)}
40 |             """.stripIndent()
41 |         )
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/assets/angsd_resources/README:
--------------------------------------------------------------------------------
 1 | **These files are originally part of angsd (release 0.931). They have been added here for convinence.**
 2 | 
 3 | This file describes how the 'hapmap' and mappability files used by angsd is generated
 4 | 
 5 | ##download
 6 | wget http://hapmap.ncbi.nlm.nih.gov/downloads/frequencies/2010-08_phaseII+III/allele_freqs_chrX_CEU_r28_nr.b36_fwd.txt.gz
 7 | wget http://hapmap.ncbi.nlm.nih.gov/downloads/frequencies/2010-08_phaseII+III/allele_freqs_chr21_CEU_r28_nr.b36_fwd.txt.gz
 8 | 
 9 | #with the md5sum
10 | a105316eaa2ebbdb3f8d62a9cb10a2d5  allele_freqs_chr21_CEU_r28_nr.b36_fwd.txt.gz
11 | 5a0f920951ce2ded4afe2f10227110ac  allele_freqs_chrX_CEU_r28_nr.b36_fwd.txt.gz
12 | 
13 | 
14 | ##create dummy bed file to use the liftover tools
15 | gunzip -c allele_freqs_chrX_CEU_r28_nr.b36_fwd.txt.gz| awk '{print $2" "$3-1" "$3" "$11" "$12" "$4" "$14}'|sed 1d >allele.txt
16 | 
17 | ##do the liftover
18 | liftOver allele.txt /opt/liftover/hg18ToHg19.over.chain.gz hit nohit
19 | 
20 | ##now remove invarible sites, and redundant columns
21 | cut -f1,3 --complement hit |grep -v -P "\t1.0"|grep -v -P "\t0\t"|gzip -c  >HapMapchrX.gz
22 | 
23 | 
24 | ##create dummy bed file to use the liftover tools
25 | gunzip -c allele_freqs_chr21_CEU_r28_nr.b36_fwd.txt| awk '{print $2" "$3-1" "$3" "$11" "$12" "$4" "$14}'|sed 1d >allele.txt
26 | 
27 | ##do the liftover
28 | liftOver allele.txt /opt/liftover/hg18ToHg19.over.chain.gz hit nohit
29 | 
30 | ##now remove invarible sites, and redundant columns
31 | cut -f1,3 --complement hit |grep -v -P "\t1.0"|grep -v -P "\t0\t"|gzip -c  >HapMapchr21.gz
32 | 
33 | 
34 | #######
35 | ##download 100kmer mappability
36 | wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeCrgMapabilityAlign100mer.bigWig
37 | 
38 | #md5sum
39 | a1b1a8c99431fedf6a3b4baef028cca4  wgEncodeCrgMapabilityAlign100mer.bigWig
40 | 
41 | ##download convert program
42 | wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/bigWigToBedGraph
43 | 
44 | ##convert
45 | ./bigWigToBedGraph wgEncodeCrgMapabilityAlign100mer.bigWig chrX -chrom=chrX
46 | ./bigWigToBedGraph wgEncodeCrgMapabilityAlign100mer.bigWig chr21 -chrom=chr21
47 | 
48 | ##only keep unique regions and discard the chr* column
49 | grep -P "\t1$" chr21 |cut -f2-3 |gzip -c >chr21.unique.gz
50 | grep -P "\t1$" chrX |cut -f2-3 |gzip -c >chrX.unique.gz
51 | 


--------------------------------------------------------------------------------
/.github/workflows/branch.yml:
--------------------------------------------------------------------------------
 1 | name: nf-core branch protection
 2 | # This workflow is triggered on PRs to master branch on the repository
 3 | # It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev`
 4 | on:
 5 |   pull_request_target:
 6 |     branches: [master]
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches
13 |       - name: Check PRs
14 |         if: github.repository == 'nf-core/eager'
15 |         run: |
16 |           { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/eager ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]
17 | 
18 | 
19 |       # If the above check failed, post a comment on the PR explaining the failure
20 |       # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets
21 |       - name: Post PR comment
22 |         if: failure()
23 |         uses: mshick/add-pr-comment@v1
24 |         with:
25 |           message: |
26 |             ## This PR is against the `master` branch :x:
27 | 
28 |             * Do not close this PR
29 |             * Click _Edit_ and change the `base` to `dev`
30 |             * This CI test will remain failed until you push a new commit
31 | 
32 |             ---
33 | 
34 |             Hi @${{ github.event.pull_request.user.login }},
35 | 
36 |             It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch.
37 |             The `master` branch on nf-core repositories should always contain code from the latest release.
38 |             Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch.
39 | 
40 |             You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page.
41 |             Note that even after this, the test will continue to show as failing until you push a new commit.
42 | 
43 |             Thanks again for your contribution!
44 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
45 |           allow-repeats: false
46 | 
47 | 


--------------------------------------------------------------------------------
/conf/benchmarking_human.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  * nextflow run nf-core/eager -profile test, docker (or singularity, or conda)
 8 |  */
 9 | 
10 | params {
11 |    config_profile_name = 'nf-core/eager benchmarking - human profile'
12 |    config_profile_description = "A 'fullsized' benchmarking profile for deepish Human sequencing aDNA data" 
13 | 
14 |    //Input data
15 |    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Benchmarking/benchmarking_human.tsv'
16 |    // Genome reference
17 |    fasta = 'https://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.gz'
18 | 
19 |    run_bam_filtering = true
20 |    bam_unmapped_type = 'discard'
21 |    bam_mapping_quality_threshold = 30
22 | 
23 |    dedupper = 'markduplicates'
24 |   
25 |    run_trim_bam = true
26 |    bamutils_clip_double_stranded_none_udg_left = 1
27 |    bamutils_clip_double_stranded_none_udg_right = 1
28 |    
29 |    // JAR will need to be downloaded first!
30 |    run_genotyping = true
31 |    genotyping_tool = 'ug'
32 |    genotyping_source = 'trimmed'
33 |    gatk_call_conf = 20
34 | 
35 |    run_sexdeterrmine = true
36 |    sexdeterrmine_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_HG19.0based.bed.gz'
37 | 
38 |    run_nuclear_contamination = true
39 |    contamination_chrom_name = 'chrX'
40 | 
41 |    run_mtnucratio = true
42 | }
43 | 
44 | process {
45 |    withName:'makeBWAIndex'{
46 |       time = { check_max( 4.h * task.attempt, 'time' ) }
47 |    }
48 |    withName:'adapter_removal'{
49 |       cpus = { check_max( 8, 'cpus' ) }
50 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
51 |       time = { check_max( 2.h * task.attempt, 'time' ) }
52 |    }
53 |    withName:'bwa'{
54 |       cpus = { check_max( 8, 'cpus' ) }
55 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
56 |       time = { check_max( 4.h * task.attempt, 'time' ) }
57 |    }
58 |    withName:'markDup'{
59 |       cpus = { check_max( 16, 'cpus' ) }
60 |       memory = { check_max( 64.GB * task.attempt, 'memory' ) }
61 |       time = { check_max( 4.h * task.attempt, 'time' ) }
62 |    }
63 |    withName:'damageprofiler'{
64 |       cpus = 1
65 |       memory = { check_max( 8.GB * task.attempt, 'memory' ) }
66 |       time = { check_max( 2.h * task.attempt, 'time' ) }
67 |    }
68 | }
69 | 


--------------------------------------------------------------------------------
/conf/test_stresstest_human.config:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * -------------------------------------------------
 3 |  *  Nextflow config file for running tests
 4 |  * -------------------------------------------------
 5 |  * Defines bundled input files and everything required
 6 |  * to run a fast and simple test. Use as follows:
 7 |  * nextflow run nf-core/eager -profile test, docker (or singularity, or conda)
 8 |  */
 9 | 
10 | params {
11 |    config_profile_name = 'nf-core/eager stresstess - human profile'
12 |    config_profile_description = "A large-scale benchmarking profile AWS stress-testing of large sample number study" 
13 | 
14 |    //Input data
15 |    input = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Benchmarking/human_stresstest.tsv'
16 |    // Genome reference
17 |    fasta = 'https://hgdownload.soe.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.gz'
18 | 
19 |    save_reference = true
20 | 
21 |    email = 'james@nf-co.re'
22 | 
23 |    run_mtnucratio = true
24 |    mtnucratio_header = 'ChrM'
25 | 
26 |    run_bam_filtering = true
27 |    bam_unmapped_type = 'discard'
28 |    bam_mapping_quality_threshold = 30
29 | 
30 |    dedupper = 'markduplicates'
31 |   
32 |    run_sexdeterrmine = true
33 |    sexdeterrmine_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_HG19.0based.bed.gz'
34 | 
35 |    run_nuclear_contamination = true
36 |    contamination_chrom_name = 'chrX'
37 | 
38 |    run_mtnucratio = true
39 | 
40 | 
41 | }
42 | 
43 | process {
44 | 
45 |    errorStrategy = 'retry'
46 |    
47 |    maxRetries = 5
48 | 
49 |    withName:'makeBWAIndex'{
50 |       time = { check_max( 48.h * task.attempt, 'time' ) }
51 |    }
52 |    withName:'adapter_removal'{
53 |       cpus = { check_max( 8, 'cpus' ) }
54 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
55 |       time = { check_max( 48.h * task.attempt, 'time' ) }
56 |    }
57 |    withName:'bwa'{
58 |       cpus = { check_max( 8, 'cpus' ) }
59 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
60 |       time = { check_max( 48.h * task.attempt, 'time' ) }
61 |    }
62 |    withName:'markduplicates'{
63 |       errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
64 |       cpus = { check_max( 16, 'cpus' ) }
65 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
66 |       time = { check_max( 48.h * task.attempt, 'time' ) }
67 |    }
68 |    withName:'damageprofiler'{
69 |       cpus = 1
70 |       memory = { check_max( 8.GB * task.attempt, 'memory' ) }
71 |       time = { check_max( 48.h * task.attempt, 'time' ) }
72 |    }
73 | }
74 | 


--------------------------------------------------------------------------------
/bin/filter_bam_fragment_length.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Written by Maxime Borry and released under the MIT license. 
 4 | # See git repository (https://github.com/nf-core/eager) for full license text.
 5 | 
 6 | import argparse
 7 | import pysam
 8 | 
 9 | 
10 | def get_args():
11 |     """This function parses and return arguments passed in"""
12 |     parser = argparse.ArgumentParser(
13 |         prog="bam_filter", description="Filter bam on fragment length"
14 |     )
15 |     parser.add_argument("bam", help="Bam aligment file")
16 |     parser.add_argument(
17 |         "-l",
18 |         dest="fraglen",
19 |         default=35,
20 |         type=int,
21 |         help="Minimum fragment length. Default = 35",
22 |     )
23 |     parser.add_argument(
24 |         "-a",
25 |         dest="all",
26 |         default=False,
27 |         action="store_true",
28 |         help="Include all reads, even unmapped",
29 |     )
30 |     parser.add_argument(
31 |         "-o",
32 |         dest="output",
33 |         default=None,
34 |         help="Output bam basename. Default = {bam_basename}.filtered.bam",
35 |     )
36 | 
37 |     args = parser.parse_args()
38 | 
39 |     bam = args.bam
40 |     fraglen = args.fraglen
41 |     allreads = args.all
42 |     outfile = args.output
43 | 
44 |     return (bam, fraglen, allreads, outfile)
45 | 
46 | 
47 | def getBasename(file_name):
48 |     if ("/") in file_name:
49 |         basename = file_name.split("/")[-1].split(".")[0]
50 |     else:
51 |         basename = file_name.split(".")[0]
52 |     return basename
53 | 
54 | 
55 | def filter_bam(infile, outfile, fraglen, allreads):
56 |     """Write bam to file
57 | 
58 |     Args:
59 |         infile (stream): pysam stream
60 |         outfile (str): Path to output bam
61 |         fraglen(int): Minimum fragment length to keep
62 |         allreads(bool): Apply on all reads, not only mapped
63 |     """
64 |     bamfile = pysam.AlignmentFile(infile, "rb")
65 |     bamwrite = pysam.AlignmentFile(outfile + ".filtered.bam", "wb", template=bamfile)
66 | 
67 |     for read in bamfile.fetch(until_eof=True):
68 |         if allreads:
69 |             if read.query_length >= fraglen:
70 |                 bamwrite.write(read)
71 |         else:
72 |             if read.is_unmapped == False and read.query_length >= fraglen:
73 |                 bamwrite.write(read)
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     BAM, FRAGLEN, ALLREADS, OUTFILE = get_args()
78 | 
79 |     BAMFILE = pysam.AlignmentFile(BAM, "rb")
80 | 
81 |     if OUTFILE is None:
82 |         OUTFILE = getBasename(BAM)
83 | 
84 |     filter_bam(BAM, OUTFILE, FRAGLEN, ALLREADS)
85 | 
86 | 


--------------------------------------------------------------------------------
/assets/email_template.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 |   <meta charset="utf-8">
 4 |   <meta http-equiv="X-UA-Compatible" content="IE=edge">
 5 |   <meta name="viewport" content="width=device-width, initial-scale=1">
 6 | 
 7 |   <meta name="description" content="nf-core/eager: A fully reproducible and state-of-the-art ancient DNA analysis pipeline">
 8 |   <title>nf-core/eager Pipeline Report</title>
 9 | </head>
10 | <body>
11 | <div style="font-family: Helvetica, Arial, sans-serif; padding: 30px; max-width: 800px; margin: 0 auto;">
12 | 
13 | <img src="cid:nfcorepipelinelogo">
14 | 
15 | <h1>nf-core/eager v${version}</h1>
16 | <h2>Run Name: $runName</h2>
17 | 
18 | <% if (!success){
19 |     out << """
20 |     <div style="color: #a94442; background-color: #f2dede; border-color: #ebccd1; padding: 15px; margin-bottom: 20px; border: 1px solid transparent; border-radius: 4px;">
21 |         <h4 style="margin-top:0; color: inherit;">nf-core/eager execution completed unsuccessfully!</h4>
22 |         <p>The exit status of the task that caused the workflow execution to fail was: <code>$exitStatus</code>.</p>
23 |         <p>The full error message was:</p>
24 |         <pre style="white-space: pre-wrap; overflow: visible; margin-bottom: 0;">${errorReport}</pre>
25 |     </div>
26 |     """
27 | } else {
28 |     out << """
29 |     <div style="color: #3c763d; background-color: #dff0d8; border-color: #d6e9c6; padding: 15px; margin-bottom: 20px; border: 1px solid transparent; border-radius: 4px;">
30 |         nf-core/eager execution completed successfully!
31 |     </div>
32 |     """
33 | }
34 | %>
35 | 
36 | <p>The workflow was completed at <strong>$dateComplete</strong> (duration: <strong>$duration</strong>)</p>
37 | <p>The command used to launch the workflow was as follows:</p>
38 | <pre style="white-space: pre-wrap; overflow: visible; background-color: #ededed; padding: 15px; border-radius: 4px; margin-bottom:30px;">$commandLine</pre>
39 | 
40 | <h3>Pipeline Configuration:</h3>
41 | <table style="width:100%; max-width:100%; border-spacing: 0; border-collapse: collapse; border:0; margin-bottom: 30px;">
42 |     <tbody style="border-bottom: 1px solid #ddd;">
43 |         <% out << summary.collect{ k,v -> "<tr><th style='text-align:left; padding: 8px 0; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'>$k</th><td style='text-align:left; padding: 8px; line-height: 1.42857143; vertical-align: top; border-top: 1px solid #ddd;'><pre style='white-space: pre-wrap; overflow: visible;'>$v</pre></td></tr>" }.join("\n") %>
44 |     </tbody>
45 | </table>
46 | 
47 | <p>nf-core/eager</p>
48 | <p><a href="https://github.com/nf-core/eager">https://github.com/nf-core/eager</a></p>
49 | 
50 | </div>
51 | 
52 | </body>
53 | </html>
54 | 


--------------------------------------------------------------------------------
/bin/markdown_to_html.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from __future__ import print_function
 3 | import argparse
 4 | import markdown
 5 | import os
 6 | import sys
 7 | import io
 8 | 
 9 | 
10 | def convert_markdown(in_fn):
11 |     input_md = io.open(in_fn, mode="r", encoding="utf-8").read()
12 |     html = markdown.markdown(
13 |         "[TOC]\n" + input_md,
14 |         extensions=["pymdownx.extra", "pymdownx.b64", "pymdownx.highlight", "pymdownx.emoji", "pymdownx.tilde", "toc"],
15 |         extension_configs={
16 |             "pymdownx.b64": {"base_path": os.path.dirname(in_fn)},
17 |             "pymdownx.highlight": {"noclasses": True},
18 |             "toc": {"title": "Table of Contents"},
19 |         },
20 |     )
21 |     return html
22 | 
23 | 
24 | def wrap_html(contents):
25 |     header = """<!DOCTYPE html><html>
26 |     <head>
27 |         <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
28 |         <style>
29 |             body {
30 |               font-family: -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";
31 |               padding: 3em;
32 |               margin-right: 350px;
33 |               max-width: 100%;
34 |             }
35 |             .toc {
36 |               position: fixed;
37 |               right: 20px;
38 |               width: 300px;
39 |               padding-top: 20px;
40 |               overflow: scroll;
41 |               height: calc(100% - 3em - 20px);
42 |             }
43 |             .toctitle {
44 |               font-size: 1.8em;
45 |               font-weight: bold;
46 |             }
47 |             .toc > ul {
48 |               padding: 0;
49 |               margin: 1rem 0;
50 |               list-style-type: none;
51 |             }
52 |             .toc > ul ul { padding-left: 20px; }
53 |             .toc > ul > li > a { display: none; }
54 |             img { max-width: 800px; }
55 |             pre {
56 |               padding: 0.6em 1em;
57 |             }
58 |             h2 {
59 | 
60 |             }
61 |         </style>
62 |     </head>
63 |     <body>
64 |     <div class="container">
65 |     """
66 |     footer = """
67 |     </div>
68 |     </body>
69 |     </html>
70 |     """
71 |     return header + contents + footer
72 | 
73 | 
74 | def parse_args(args=None):
75 |     parser = argparse.ArgumentParser()
76 |     parser.add_argument("mdfile", type=argparse.FileType("r"), nargs="?", help="File to convert. Defaults to stdin.")
77 |     parser.add_argument(
78 |         "-o", "--out", type=argparse.FileType("w"), default=sys.stdout, help="Output file name. Defaults to stdout."
79 |     )
80 |     return parser.parse_args(args)
81 | 
82 | 
83 | def main(args=None):
84 |     args = parse_args(args)
85 |     converted_md = convert_markdown(args.mdfile.name)
86 |     html = wrap_html(converted_md)
87 |     args.out.write(html)
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     sys.exit(main())
92 | 


--------------------------------------------------------------------------------
/bin/kraken_parse.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Written by Maxime Borry and released under the MIT license. 
  4 | # See git repository (https://github.com/nf-core/eager) for full license text.
  5 | 
  6 | import argparse
  7 | import csv
  8 | 
  9 | def _get_args():
 10 |     '''This function parses and return arguments passed in'''
 11 |     parser = argparse.ArgumentParser(
 12 |         prog='kraken_parse',
 13 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 14 |         description='Parsing kraken')
 15 |     parser.add_argument('krakenReport', help="path to kraken report file")
 16 |     parser.add_argument(
 17 |         '-c',
 18 |         dest="count",
 19 |         default=50,
 20 |         help="Minimum number of hits on clade to report it. Default = 50")
 21 |     parser.add_argument(
 22 |         '-or',
 23 |         dest="readout",
 24 |         default=None,
 25 |         help="Read count output file. Default = <basename>.read_kraken_parsed.csv")
 26 |     parser.add_argument(
 27 |         '-ok',
 28 |         dest="kmerout",
 29 |         default=None,
 30 |         help="Kmer Output file. Default = <basename>.kmer_kraken_parsed.csv")
 31 | 
 32 |     args = parser.parse_args()
 33 | 
 34 |     infile = args.krakenReport
 35 |     countlim = int(args.count)
 36 |     readout = args.readout
 37 |     kmerout = args.kmerout
 38 | 
 39 |     return(infile, countlim, readout, kmerout)
 40 | 
 41 | 
 42 | def _get_basename(file_name):
 43 |     if ("/") in file_name:
 44 |         basename = file_name.split("/")[-1].split(".")[0]
 45 |     else:
 46 |         basename = file_name.split(".")[0]
 47 |     return(basename)
 48 | 
 49 | 
 50 | def parse_kraken(infile, countlim):
 51 |     '''
 52 |     INPUT:
 53 |         infile (str): path to kraken report file
 54 |         countlim (int): lowest count threshold to report hit
 55 |     OUTPUT:
 56 |         resdict (dict): key=taxid, value=readCount
 57 | 
 58 |     '''
 59 |     with open(infile, 'r') as f:
 60 |         read_dict = {}
 61 |         kmer_dict = {}
 62 |         csvreader = csv.reader(f, delimiter='\t')
 63 |         for line in csvreader:
 64 |             reads = int(line[1])
 65 |             if reads >= countlim:
 66 |                 taxid = line[6]
 67 |                 kmer = line[3]
 68 |                 unique_kmer = line[4]
 69 |                 try:
 70 |                     kmer_duplicity = float(kmer)/float(unique_kmer)
 71 |                 except ZeroDivisionError:
 72 |                     kmer_duplicity = 0
 73 |                 read_dict[taxid] = reads
 74 |                 kmer_dict[taxid] = kmer_duplicity
 75 | 
 76 |         return(read_dict, kmer_dict)
 77 | 
 78 | 
 79 | def write_output(resdict, infile, outfile):
 80 |     with open(outfile, 'w') as f:
 81 |         basename = _get_basename(infile)
 82 |         f.write(f"TAXID,{basename}\n")
 83 |         for akey in resdict.keys():
 84 |             f.write(f"{akey},{resdict[akey]}\n")
 85 | 
 86 | 
 87 | if __name__ == '__main__':
 88 |     INFILE, COUNTLIM, readout, kmerout = _get_args()
 89 | 
 90 |     if not readout:
 91 |         read_outfile = _get_basename(INFILE)+".read_kraken_parsed.csv"
 92 |     else:
 93 |         read_outfile = readout
 94 |     if not kmerout:    
 95 |         kmer_outfile = _get_basename(INFILE)+".kmer_kraken_parsed.csv"
 96 |     else:
 97 |         kmer_outfile = kmerout
 98 | 
 99 |     read_dict, kmer_dict = parse_kraken(infile=INFILE, countlim=COUNTLIM)
100 |     write_output(resdict=read_dict, infile=INFILE, outfile=read_outfile)
101 |     write_output(resdict=kmer_dict, infile=INFILE, outfile=kmer_outfile)
102 | 


--------------------------------------------------------------------------------
/bin/print_x_contamination.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | # Written by Thiseas C. Lamnidis and released under the MIT license. 
 4 | # See git repository (https://github.com/nf-core/eager) for full license text.
 5 | 
 6 | import sys, re, json
 7 | from collections import OrderedDict
 8 | 
 9 | jsonOut=OrderedDict()
10 | data=OrderedDict()
11 | 
12 | ## Function to convert a set of elements into floating point numbers, when possible, else leave them be.
13 | def make_float(x):
14 |     # print (x)
15 |     output=[None for i in range(len(x))]
16 |     ## If value for an estimate/error is -nan, replace with "NA". JSON does not accept NaN as a valid field.
17 |     for i in range(len(x)):
18 |         if x[i] == "-nan" or x[i] == "nan":
19 |             output[i]="N/A"
20 |             continue
21 |         try:
22 |             output[i]=float(x[i])
23 |         except:
24 |             output[i]=x[i]
25 |     
26 |     return(tuple(output))
27 | 
28 | 
29 | Input_files=sys.argv[1:]
30 | 
31 | output = open("nuclear_contamination.txt", 'w')
32 | print ("Individual", "Num_SNPs", "Method1_MOM_estimate", "Method1_MOM_SE", "Method1_ML_estimate", "Method1_ML_SE", "Method2_MOM_estimate", "Method2_MOM_SE", "Method2_ML_estimate", "Method2_ML_SE", sep="\t", file=output)
33 | for fn in Input_files:
34 |     ## For each file, reset the values to "N/A" so they don't carry over from last file.
35 |     mom1, err_mom1= "N/A","N/A"
36 |     ml1, err_ml1="N/A","N/A"
37 |     mom2, err_mom2= "N/A","N/A"
38 |     ml2, err_ml2="N/A","N/A"
39 |     nSNPs="0"
40 |     with open(fn, 'r') as f:
41 |         Estimates={}
42 |         Ind=re.sub('\.X.contamination.out$', '', fn).split("/")[-1]
43 |         for line in f:
44 |             fields=line.strip().split()
45 |             if line.strip()[0:19] == "We have nSNP sites:":
46 |                 nSNPs=fields[4].rstrip(",")
47 |             elif line.strip()[0:7] == "Method1" and line.strip()[9:16] == 'new_llh':
48 |                 mom1=fields[3].split(":")[1]
49 |                 err_mom1=fields[4].split(":")[1]
50 |                 ml1=fields[5].split(":")[1]
51 |                 err_ml1=fields[6].split(":")[1]
52 |                 ## Sometimes angsd fails to run method 2, and the error is printed directly after the SE for ML. When that happens, exclude the first word in the error from the output. (Method 2 jsonOut will be shown as NA)
53 |                 if err_ml1.endswith("contamination"):
54 |                     err_ml1 = err_ml1[:-13]
55 |             elif line.strip()[0:7] == "Method2" and line.strip()[9:16] == 'new_llh':
56 |                 mom2=fields[3].split(":")[1]
57 |                 err_mom2=fields[4].split(":")[1]
58 |                 ml2=fields[5].split(":")[1]
59 |                 err_ml2=fields[6].split(":")[1]
60 |         ## Convert estimates and errors to floating point numbers
61 |         (ml1, err_ml1, mom1, err_mom1, ml2, err_ml2, mom2, err_mom2) = make_float((ml1, err_ml1, mom1, err_mom1, ml2, err_ml2, mom2, err_mom2))
62 |         data[Ind]={ "Num_SNPs" : int(nSNPs), "Method1_MOM_estimate" : mom1, "Method1_MOM_SE" : err_mom1, "Method1_ML_estimate" : ml1, "Method1_ML_SE" : err_ml1, "Method2_MOM_estimate" : mom2, "Method2_MOM_SE" : err_mom2, "Method2_ML_estimate" : ml2, "Method2_ML_SE" : err_ml2 }
63 |         print (Ind, nSNPs, mom1, err_mom1, ml1, err_ml1, mom2, err_mom2, ml2, err_ml2, sep="\t", file=output)
64 | 
65 | 
66 | jsonOut = {"plot_type": "generalstats", "id": "nuclear_contamination",
67 |     "pconfig": {
68 |         "Num_SNPs" : {"title" : "Number of SNPs"},
69 |         "Method1_MOM_estimate" : {"title": "Contamination Estimate (Method1_MOM)"},
70 |         "Method1_MOM_SE" : {"title": "Estimate Error (Method1_MOM)"},
71 |         "Method1_ML_estimate" : {"title": "Contamination Estimate (Method1_ML)"},
72 |         "Method1_ML_SE" : {"title": "Estimate Error (Method1_ML)"},
73 |         "Method2_MOM_estimate" : {"title": "Contamination Estimate (Method2_MOM)"},
74 |         "Method2_MOM_SE" : {"title": "Estimate Error (Method2_MOM)"},
75 |         "Method2_ML_estimate" : {"title": "Contamination Estimate (Method2_ML)"},
76 |         "Method2_ML_SE" : {"title": "Estimate Error (Method2_ML)"}
77 |     }, 
78 |     "data" : data
79 | }
80 | with open('nuclear_contamination_mqc.json', 'w') as outfile:
81 |     json.dump(jsonOut, outfile)
82 | 


--------------------------------------------------------------------------------
/lib/Checks.groovy:
--------------------------------------------------------------------------------
 1 | import org.yaml.snakeyaml.Yaml
 2 | 
 3 | /*
 4 |  * This file holds several functions used to perform standard checks for the nf-core pipeline template.
 5 |  */
 6 | 
 7 | class Checks {
 8 | 
 9 |     static void check_conda_channels(log) {
10 |         Yaml parser = new Yaml()
11 |         def channels = []
12 |         try {
13 |             def config = parser.load("conda config --show channels".execute().text)
14 |             channels = config.channels
15 |         } catch(NullPointerException | IOException e) {
16 |             log.warn "Could not verify conda channel configuration."
17 |             return
18 |         }
19 | 
20 |         // Check that all channels are present
21 |         def required_channels = ['conda-forge', 'bioconda', 'defaults']
22 |         def conda_check_failed = !required_channels.every { ch -> ch in channels }
23 | 
24 |         // Check that they are in the right order
25 |         conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda'))
26 |         conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults'))
27 | 
28 |         if (conda_check_failed) {
29 |             log.warn "=============================================================================\n" +
30 |                      "  There is a problem with your Conda configuration!\n\n" + 
31 |                      "  You will need to set-up the conda-forge and bioconda channels correctly.\n" +
32 |                      "  Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" +
33 |                      "  NB: The order of the channels matters!\n" +
34 |                      "==================================================================================="
35 |         }
36 |     }
37 | 
38 |     static void aws_batch(workflow, params) {
39 |         if (workflow.profile.contains('awsbatch')) {
40 |             assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!"
41 |             // Check outdir paths to be S3 buckets if running on AWSBatch
42 |             // related: https://github.com/nextflow-io/nextflow/issues/813
43 |             assert params.outdir.startsWith('s3:')       : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!"
44 |             // Prevent trace files to be stored on S3 since S3 does not support rolling files.
45 |             assert !params.tracedir.startsWith('s3:')    :  "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles."
46 |         }
47 |     }
48 | 
49 |     static void hostname(workflow, params, log) {
50 |         Map colors = Headers.log_colours(params.monochrome_logs)
51 |         if (params.hostnames) {
52 |             def hostname = "hostname".execute().text.trim()
53 |             params.hostnames.each { prof, hnames ->
54 |                 hnames.each { hname ->
55 |                     if (hostname.contains(hname) && !workflow.profile.contains(prof)) {
56 |                         log.info "=${colors.yellow}====================================================${colors.reset}=\n" +
57 |                                   "${colors.yellow}WARN: You are running with `-profile $workflow.profile`\n" +
58 |                                   "      but your machine hostname is ${colors.white}'$hostname'${colors.reset}.\n" +
59 |                                   "      ${colors.yellow_bold}Please use `-profile $prof${colors.reset}`\n" +
60 |                                   "=${colors.yellow}====================================================${colors.reset}="
61 |                     }
62 |                 }
63 |             }
64 |         }
65 |     }
66 | 
67 |     // Citation string
68 |     private static String citation(workflow) {
69 |         return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
70 |                "* The pipeline\n" + 
71 |                "  https://doi.org/10.1101/2020.06.11.145615\n\n" +
72 |                "* The nf-core framework\n" +
73 |                "  https://dx.doi.org/10.1038/s41587-020-0439-x\n" +
74 |                "  https://rdcu.be/b1GjZ\n\n" +
75 |                "* Software dependencies\n" +
76 |                "  https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
77 |     }
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | }
86 | 


--------------------------------------------------------------------------------
/conf/base.config:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * -------------------------------------------------
  3 |  *  nf-core/eager Nextflow base config file
  4 |  * -------------------------------------------------
  5 |  * A 'blank slate' config file, appropriate for general
  6 |  * use on most high performace compute environments.
  7 |  * Assumes that all software is installed and available
  8 |  * on the PATH. Runs in `local` mode - all jobs will be
  9 |  * run on the logged in environment.
 10 |  */
 11 | 
 12 | process {
 13 |   cpus = { check_max( 1 * task.attempt, 'cpus' ) }
 14 |   memory = { check_max( 7.GB * task.attempt, 'memory' ) }
 15 |   time = { check_max( 24.h * task.attempt, 'time' ) }
 16 | 
 17 |   errorStrategy = { task.exitStatus in [143,137,104,134,139, 140] ? 'retry' : 'finish' }
 18 |   maxRetries = 3
 19 |   maxErrors = '-1'
 20 | 
 21 |   // Process-specific resource requirements
 22 |   // NOTE - Only one of the labels below are used in the fastqc process in the main script.
 23 |   //        If possible, it would be nice to keep the same label naming convention when
 24 |   //        adding in your processes.
 25 |   // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
 26 | 
 27 |   // Generic resource requirements - s(ingle)c(ore)/m(ulti)c(ore)
 28 | 
 29 |   withLabel:'sc_tiny'{
 30 |       cpus = { check_max( 1, 'cpus' ) }
 31 |       memory = { check_max( 1.GB * task.attempt, 'memory' ) }
 32 |       time = { check_max( 4.h * task.attempt, 'time' ) }
 33 |   }
 34 | 
 35 |   withLabel:'sc_small'{
 36 |       cpus = { check_max( 1, 'cpus' ) }
 37 |       memory = { check_max( 4.GB * task.attempt, 'memory' ) }
 38 |       time = { check_max( 4.h * task.attempt, 'time' ) }
 39 |   }
 40 | 
 41 |   withLabel:'sc_medium'{
 42 |       cpus = { check_max( 1, 'cpus' ) }
 43 |       memory = { check_max( 8.GB * task.attempt, 'memory' ) }
 44 |       time = { check_max( 4.h * task.attempt, 'time' ) }
 45 |   }
 46 | 
 47 |   withLabel:'mc_small'{
 48 |       cpus = { check_max( 2 * task.attempt, 'cpus' ) }
 49 |       memory = { check_max( 4.GB * task.attempt, 'memory' ) }
 50 |       time = { check_max( 4.h * task.attempt, 'time' ) }
 51 |   }
 52 | 
 53 |   withLabel:'mc_medium' {
 54 |       cpus = { check_max( 4 * task.attempt, 'cpus' ) }
 55 |       memory = { check_max( 8.GB * task.attempt, 'memory' ) }
 56 |       time = { check_max( 4.h * task.attempt, 'time' ) }
 57 |   }
 58 | 
 59 |   withLabel:'mc_large'{
 60 |       cpus = { check_max( 8 * task.attempt, 'cpus' ) }
 61 |       memory = { check_max( 16.GB * task.attempt, 'memory' ) }
 62 |       time = { check_max( 4.h * task.attempt, 'time' ) }
 63 |   }
 64 | 
 65 |   withLabel:'mc_huge'{
 66 |       cpus = { check_max( 32 * task.attempt, 'cpus' ) }
 67 |       memory = { check_max( 256.GB * task.attempt, 'memory' ) }
 68 |       time = { check_max( 4.h * task.attempt, 'time' ) }
 69 |   }
 70 | 
 71 |   // Process-specific resource requirements (others leave at default, e.g. Fastqc)
 72 |   withName:get_software_versions {
 73 |     cache = false
 74 |   }
 75 | 
 76 |   withName:qualimap{
 77 |     errorStrategy = { task.exitStatus in [1,143,137,104,134,139, 140] ? 'retry' : task.exitStatus in [255] ? 'ignore' : 'finish' }
 78 |   }
 79 | 
 80 |   withName:preseq {
 81 |     errorStrategy = 'ignore'
 82 |   }
 83 | 
 84 |   withName:damageprofiler {
 85 |     errorStrategy = { task.exitStatus in [1,143,137,104,134,139, 140] ? 'retry' : 'finish' }
 86 |   }
 87 | 
 88 |   // Add 1 retry for certain java tools as not enough heap space java errors gives exit code 1
 89 |   withName: dedup {
 90 |     errorStrategy = { task.exitStatus in [1,143,137,104,134,139, 140] ? 'retry' : 'finish' } 
 91 |   }
 92 |   
 93 |   withName: markduplicates {
 94 |     errorStrategy = { task.exitStatus in [143,137, 140] ? 'retry' : 'finish' } 
 95 |   }
 96 | 
 97 |   // Add 1 retry as not enough heapspace java error gives exit code 1
 98 |   withName: malt {
 99 |     errorStrategy = { task.exitStatus in [1,143,137,104,134,139, 140] ? 'retry' : 'finish' } 
100 |   }
101 | 
102 |   // other process specific exit statuses
103 |   withName: nuclear_contamination {
104 |     errorStrategy = { task.exitStatus in [143,137,104,134,139, 140] ? 'ignore' : 'retry' }
105 |   }
106 | 
107 | }
108 | 
109 | params {
110 |   // Defaults only, expecting to be overwritten
111 |   max_memory = 128.GB
112 |   max_cpus = 16
113 |   max_time = 240.h
114 |   igenomes_base = 's3://ngi-igenomes/igenomes/'
115 | }
116 | 


--------------------------------------------------------------------------------
/.github/workflows/linting.yml:
--------------------------------------------------------------------------------
  1 | name: nf-core linting
  2 | # This workflow is triggered on pushes and PRs to the repository.
  3 | # It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines
  4 | on:
  5 |   push:
  6 |   pull_request:
  7 |   release:
  8 |     types: [published]
  9 | 
 10 | jobs:
 11 |   Markdown:
 12 |     runs-on: ubuntu-latest
 13 |     steps:
 14 |       - uses: actions/checkout@v2
 15 |       - uses: actions/setup-node@v2
 16 | 
 17 |       - name: Install markdownlint
 18 |         run: npm install -g markdownlint-cli
 19 |       - name: Run Markdownlint
 20 |         run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml
 21 | 
 22 |       # If the above check failed, post a comment on the PR explaining the failure
 23 |       - name: Post PR comment
 24 |         if: failure()
 25 |         uses: mshick/add-pr-comment@v1
 26 |         with:
 27 |           message: |
 28 |             ## Markdown linting is failing
 29 | 
 30 |             To keep the code consistent with lots of contributors, we run automated code consistency checks.
 31 |             To fix this CI test, please run:
 32 | 
 33 |             * Install `markdownlint-cli`
 34 |                 * On Mac: `brew install markdownlint-cli`
 35 |                 * Everything else: [Install `npm`](https://www.npmjs.com/get-npm) then [install `markdownlint-cli`](https://www.npmjs.com/package/markdownlint-cli) (`npm install -g markdownlint-cli`)
 36 |             * Fix the markdown errors
 37 |                 * Automatically: `markdownlint . --config .github/markdownlint.yml --fix`
 38 |                 * Manually resolve anything left from `markdownlint . --config .github/markdownlint.yml`
 39 | 
 40 |             Once you push these changes the test should pass, and you can hide this comment :+1:
 41 | 
 42 |             We highly recommend setting up markdownlint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help!
 43 | 
 44 |             Thanks again for your contribution!
 45 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
 46 |           allow-repeats: false
 47 | 
 48 |   YAML:
 49 |     runs-on: ubuntu-latest
 50 |     steps:
 51 |       - uses: actions/checkout@v1
 52 |       - uses: actions/setup-node@v2
 53 | 
 54 |       - name: Install yaml-lint
 55 |         run: npm install -g yaml-lint
 56 |       - name: Run yaml-lint
 57 |         run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml" -o -name "*.yaml") -c .github/yamllint.yml
 58 | 
 59 |       # If the above check failed, post a comment on the PR explaining the failure
 60 |       - name: Post PR comment
 61 |         if: failure()
 62 |         uses: mshick/add-pr-comment@v1
 63 |         with:
 64 |           message: |
 65 |             ## YAML linting is failing
 66 | 
 67 |             To keep the code consistent with lots of contributors, we run automated code consistency checks.
 68 |             To fix this CI test, please run:
 69 | 
 70 |             * Install `yaml-lint`
 71 |                 * [Install `npm`](https://www.npmjs.com/get-npm) then [install `yaml-lint`](https://www.npmjs.com/package/yaml-lint) (`npm install -g yaml-lint`)
 72 |             * Fix the markdown errors
 73 |                 * Run the test locally: `yamllint $(find . -type f -name "*.yml" -o -name "*.yaml")`
 74 |                 * Fix any reported errors in your YAML files
 75 | 
 76 |             Once you push these changes the test should pass, and you can hide this comment :+1:
 77 | 
 78 |             We highly recommend setting up yaml-lint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help!
 79 | 
 80 |             Thanks again for your contribution!
 81 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
 82 |           allow-repeats: false
 83 | 
 84 |   nf-core:
 85 |     runs-on: ubuntu-latest
 86 |     steps:
 87 |       - name: Check out pipeline code
 88 |         uses: actions/checkout@v2
 89 | 
 90 |       - name: Install Nextflow
 91 |         env:
 92 |           CAPSULE_LOG: none
 93 |         run: |
 94 |           wget -qO- get.nextflow.io | bash
 95 |           sudo mv nextflow /usr/local/bin/
 96 | 
 97 |       - uses: actions/setup-python@v1
 98 |         with:
 99 |           python-version: "3.6"
100 |           architecture: "x64"
101 | 
102 |       - name: Install dependencies
103 |         run: |
104 |           python -m pip install --upgrade pip
105 |           pip install nf-core==1.14
106 | 
107 |       - name: Run nf-core lint
108 |         env:
109 |           GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }}
110 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
111 |           GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }}
112 |         run: nf-core -l lint_log.txt lint ${GITHUB_WORKSPACE} --markdown lint_results.md
113 | 
114 |       - name: Save PR number
115 |         if: ${{ always() }}
116 |         run: echo ${{ github.event.pull_request.number }} > PR_number.txt
117 | 
118 |       - name: Upload linting log file artifact
119 |         if: ${{ always() }}
120 |         uses: actions/upload-artifact@v2
121 |         with:
122 |           name: linting-logs
123 |           path: |
124 |             lint_log.txt
125 |             lint_results.md
126 |             PR_number.txt
127 | 


--------------------------------------------------------------------------------
/bin/endorS.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Written by Aida Andrades Valtueña and released under MIT license. 
  4 | # See git repository (https://github.com/aidaanva/endorS.py) for full license text.
  5 | 
  6 | """Script to calculate the endogenous DNA in a sample from samtools flag stats.
  7 | It can accept up to two files: pre-quality and post-quality filtering. We recommend
  8 | to use both files but you can also use the pre-quality filtering.
  9 | """
 10 | import re
 11 | import sys
 12 | import json
 13 | import argparse
 14 | import textwrap
 15 | 
 16 | parser = argparse.ArgumentParser(prog='endorS.py',
 17 |    usage='python %(prog)s [-h] [--version] <samplesfile>.stats [<samplesfile>.stats]',
 18 |    formatter_class=argparse.RawDescriptionHelpFormatter,
 19 |    description=textwrap.dedent('''\
 20 |    author:
 21 |      Aida Andrades Valtueña (aida.andrades[at]gmail.com)
 22 | 
 23 |    description:
 24 |      %(prog)s calculates endogenous DNA from samtools flagstat files and print to screen
 25 |      Use --output flag to write results to a file
 26 |    '''))
 27 | parser.add_argument('samtoolsfiles', metavar='<samplefile>.stats', type=str, nargs='+',
 28 |                     help='output of samtools flagstat in a txt file (at least one required). If two files are supplied, the mapped reads of the second file is divided by the total reads in the first, since it assumes that the <samplefile.stats> are related to the same sample. Useful after BAM filtering')
 29 | parser.add_argument('-v','--version', action='version', version='%(prog)s 0.4')
 30 | parser.add_argument('--output', '-o', nargs='?', help='specify a file format for an output file. Options: <json> for a MultiQC json output. Default: none')
 31 | parser.add_argument('--name', '-n', nargs='?', help='specify name for the output file. Default: extracted from the first samtools flagstat file provided')
 32 | args = parser.parse_args()
 33 | 
 34 | #Open the samtools flag stats pre-quality filtering:
 35 | try:
 36 |     with open(args.samtoolsfiles[0], 'r') as pre:
 37 |         contentsPre = pre.read()
 38 |     #Extract number of total reads
 39 |     totalReads = float((re.findall(r'^([0-9]+) \+ [0-9]+ in total',contentsPre))[0])
 40 |     #Extract number of mapped reads pre-quality filtering:
 41 |     mappedPre = float((re.findall(r'([0-9]+) \+ [0-9]+ mapped ',contentsPre))[0])
 42 |     #Calculation of endogenous DNA pre-quality filtering:
 43 |     if totalReads == 0.0:
 44 |         endogenousPre = 0.000000
 45 |         print("WARNING: no reads in the fastq input, Endogenous DNA raw (%) set to 0.000000")
 46 |     elif mappedPre == 0.0:
 47 |         endogenousPre = 0.000000
 48 |         print("WARNING: no mapped reads, Endogenous DNA raw (%) set to 0.000000")
 49 |     else:
 50 |         endogenousPre = float("{0:.6f}".format(round((mappedPre / totalReads * 100), 6)))
 51 | except:
 52 |     print("Incorrect input, please provide at least a samtools flag stats as input\nRun:\npython endorS.py --help \nfor more information on how to run this script")
 53 |     sys.exit()
 54 | #Check if the samtools stats post-quality filtering have been provided:
 55 | try:
 56 |     #Open the samtools flag stats post-quality filtering:
 57 |     with open(args.samtoolsfiles[1], 'r') as post:
 58 |         contentsPost = post.read()
 59 |     #Extract number of mapped reads post-quality filtering:
 60 |     mappedPost = float((re.findall(r'([0-9]+) \+ [0-9]+ mapped',contentsPost))[0])
 61 |     #Calculation of endogenous DNA post-quality filtering:
 62 |     if totalReads == 0.0:
 63 |         endogenousPost = 0.000000
 64 |         print("WARNING: no reads in the fastq input, Endogenous DNA modified (%) set to 0.000000")
 65 |     elif mappedPost == 0.0:
 66 |         endogenousPost = 0.000000
 67 |         print("WARNING: no mapped reads, Endogenous DNA modified (%) set to 0.000000")
 68 |     else:
 69 |         endogenousPost = float("{0:.6f}".format(round((mappedPost / totalReads * 100),6)))
 70 | except:
 71 |     print("Only one samtools flagstat file provided")
 72 |     #Set the number of reads post-quality filtering to 0 if samtools
 73 |     #samtools flag stats not provided:
 74 |     mappedPost = "NA"
 75 | 
 76 | #Setting the name depending on the -name flag:
 77 | if args.name is not None:
 78 |     name = args.name
 79 | else:
 80 |     #Set up the name based on the first samtools flagstats:
 81 |     name= str(((args.samtoolsfiles[0].rsplit(".",1)[0]).rsplit("/"))[-1])
 82 | #print(name)
 83 | 
 84 | 
 85 | if mappedPost == "NA":
 86 |     #Creating the json file
 87 |     jsonOutput={
 88 |     "id": "endorSpy",
 89 |     "plot_type": "generalstats",
 90 |     "pconfig": {
 91 |         "endogenous_dna": { "max": 100, "min": 0, "title": "Endogenous DNA (%)", "format": '{:,.2f}'}
 92 |     },
 93 |     "data": {
 94 |         name : { "endogenous_dna": endogenousPre}
 95 |     }
 96 |     }
 97 | else:
 98 |     #Creating the json file
 99 |     jsonOutput={
100 |     "id": "endorSpy",
101 |     "plot_type": "generalstats",
102 |     "pconfig": {
103 |         "endogenous_dna": { "max": 100, "min": 0, "title": "Endogenous DNA (%)", "format": '{:,.2f}'},
104 |         "endogenous_dna_post": { "max": 100, "min": 0, "title": "Endogenous DNA Post (%)", "format": '{:,.2f}'}
105 |     },
106 |     "data": {
107 |         name : { "endogenous_dna": endogenousPre, "endogenous_dna_post": endogenousPost}
108 |     },
109 |     }
110 | #Checking for print to screen argument:
111 | if args.output is not None:
112 |    #Creating file with the named after the name variable:
113 |    #Writing the json output:
114 |    fileName = name + "_endogenous_dna_mqc.json"
115 |    #print(fileName)
116 |    with open(fileName, "w+") as outfile:
117 |       json.dump(jsonOutput, outfile)
118 |       print(fileName,"has been generated")
119 | else:
120 |    if mappedPost == "NA":
121 |       print("Endogenous DNA (%):",endogenousPre)
122 |    else:
123 |       print("Endogenous DNA raw (%):",endogenousPre)
124 |       print("Endogenous DNA modified (%):",endogenousPost)
125 | 


--------------------------------------------------------------------------------
/bin/scrape_software_versions.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import print_function
  3 | from collections import OrderedDict
  4 | import re
  5 | 
  6 | regexes = {
  7 |     "nf-core/eager": ["v_pipeline.txt", r"(\S+)"],
  8 |     "Nextflow": ["v_nextflow.txt", r"(\S+)"],
  9 |     "FastQC": ["v_fastqc.txt", r"FastQC v(\S+)"],
 10 |     "MultiQC": ["v_multiqc.txt", r"multiqc, version (\S+)"],
 11 |     'AdapterRemoval':['v_adapterremoval.txt', r"AdapterRemoval ver. (\S+)"],
 12 |     'Picard MarkDuplicates': ['v_markduplicates.txt', r"Version:(\S+)"],
 13 |     'Samtools': ['v_samtools.txt', r"samtools (\S+)"],
 14 |     'Preseq': ['v_preseq.txt', r"Version: (\S+)"],
 15 |     'BWA': ['v_bwa.txt', r"Version: (\S+)"], 
 16 |     'Bowtie2': ['v_bowtie2.txt', r"bowtie2-([0-9]+\.[0-9]+\.[0-9]+) -fdebug"],
 17 |     'Qualimap': ['v_qualimap.txt', r"QualiMap v.(\S+)"],
 18 |     'GATK HaplotypeCaller': ['v_gatk.txt', r"The Genome Analysis Toolkit \(GATK\) v(\S+)"],
 19 |     'GATK UnifiedGenotyper': ['v_gatk3.txt', r"(\S+)"],
 20 |     'bamUtil' : ['v_bamutil.txt', r"Version: (\S+);"],
 21 |     'fastP': ['v_fastp.txt', r"([\d\.]+)"],
 22 |     'DamageProfiler' : ['v_damageprofiler.txt', r"DamageProfiler v(\S+)"],
 23 |     'angsd':['v_angsd.txt',r"version: (\S+)"],
 24 |     'bedtools':['v_bedtools.txt',r"bedtools v(\S+)"],
 25 |     'circulargenerator':['v_circulargenerator.txt',r"CircularGeneratorv(\S+)"],
 26 |     'DeDup':['v_dedup.txt',r"DeDup v(\S+)"],
 27 |     'freebayes':['v_freebayes.txt',r"v([0-9]\S+)"],
 28 |     'sequenceTools':['v_sequencetools.txt',r"(\S+)"],
 29 |     'maltextract':['v_maltextract.txt', r"version(\S+)"],
 30 |     'malt':['v_malt.txt',r"version (\S+)"],
 31 |     'multivcfanalyzer':['v_multivcfanalyzer.txt', r"MultiVCFAnalyzer - (\S+)"],
 32 |     'pmdtools':['v_pmdtools.txt',r"pmdtools v(\S+)"],
 33 |     'sexdeterrmine':['v_sexdeterrmine.txt',r"(\S+)"],
 34 |     'MTNucRatioCalculator':['v_mtnucratiocalculator.txt',r"Version: (\S+)"],
 35 |     'VCF2genome':['v_vcf2genome.txt', r"VCF2Genome \(v. ([0-9].[0-9]+) "],
 36 |     'endorS.py':['v_endorSpy.txt', r"endorS.py (\S+)"],
 37 |     'kraken':['v_kraken.txt', r"Kraken version (\S+)"],
 38 |     'eigenstrat_snp_coverage':['v_eigenstrat_snp_coverage.txt',r"(\S+)"],
 39 |     'mapDamage2':['v_mapdamage.txt',r"(\S+)"],
 40 |     'bbduk':['v_bbduk.txt',r"(.*)"],
 41 |     'bcftools':['v_bcftools.txt',r"(\S+)"]
 42 | }
 43 | 
 44 | results = OrderedDict()
 45 | results["nf-core/eager"] = '<span style="color:#999999;">N/A</span>'
 46 | results["Nextflow"] = '<span style="color:#999999;">N/A</span>'
 47 | results["FastQC"] = '<span style="color:#999999;">N/A</span>'
 48 | results["MultiQC"] = '<span style="color:#999999;">N/A</span>'
 49 | results['AdapterRemoval'] = '<span style="color:#999999;\">N/A</span>'
 50 | results['fastP'] = '<span style="color:#999999;\">N/A</span>'
 51 | results['BWA'] = '<span style="color:#999999;\">N/A</span>'
 52 | results['Bowtie2'] = '<span style="color:#999999;\">N/A</span>'
 53 | results['circulargenerator'] = '<span style="color:#999999;\">N/A</span>'
 54 | results['Samtools'] = '<span style="color:#999999;\">N/A</span>'
 55 | results['endorS.py'] = '<span style="color:#999999;\">N/A</span>'
 56 | results['DeDup'] = '<span style="color:#999999;\">N/A</span>'
 57 | results['Picard MarkDuplicates'] = '<span style="color:#999999;\">N/A</span>'
 58 | results['Qualimap'] = '<span style="color:#999999;\">N/A</span>'
 59 | results['Preseq'] = '<span style="color:#999999;\">N/A</span>'
 60 | results['GATK HaplotypeCaller'] = '<span style="color:#999999;\">N/A</span>'
 61 | results['GATK UnifiedGenotyper'] = '<span style="color:#999999;\">N/A</span>'
 62 | results['freebayes'] = '<span style="color:#999999;\">N/A</span>'
 63 | results['sequenceTools'] = '<span style="color:#999999;\">N/A</span>'
 64 | results['VCF2genome'] = '<span style="color:#999999;\">N/A</span>'
 65 | results['MTNucRatioCalculator'] = '<span style="color:#999999;\">N/A</span>'
 66 | results['bedtools'] = '<span style="color:#999999;\">N/A</span>'
 67 | results['DamageProfiler'] = '<span style="color:#999999;\">N/A</span>'
 68 | results['bamUtil'] = '<span style="color:#999999;\">N/A</span>'
 69 | results['pmdtools'] = '<span style="color:#999999;\">N/A</span>'
 70 | results['angsd'] = '<span style="color:#999999;\">N/A</span>'
 71 | results['sexdeterrmine'] = '<span style="color:#999999;\">N/A</span>'
 72 | results['multivcfanalyzer'] = '<span style="color:#999999;\">N/A</span>'
 73 | results['malt'] = '<span style="color:#999999;\">N/A</span>'
 74 | results['kraken'] = '<span style="color:#999999;\">N/A</span>'
 75 | results['maltextract'] = '<span style="color:#999999;\">N/A</span>'
 76 | results['eigenstrat_snp_coverage'] = '<span style="color:#999999;\">N/A</span>'
 77 | results['mapDamage2'] = '<span style="color:#999999;\">N/A</span>'
 78 | results['bbduk'] = '<span style="color:#999999;\">N/A</span>'
 79 | results['bcftools'] = '<span style="color:#999999;\">N/A</span>'
 80 | 
 81 | # Search each file using its regex
 82 | for k, v in regexes.items():
 83 |     try:
 84 |         with open(v[0]) as x:
 85 |             versions = x.read()
 86 |             match = re.search(v[1], versions)
 87 |             if match:
 88 |                 results[k] = "v{}".format(match.group(1))
 89 |     except IOError:
 90 |         results[k] = False
 91 | 
 92 | # Remove software set to false in results
 93 | for k in list(results):
 94 |     if not results[k]:
 95 |         del results[k]
 96 | 
 97 | # Dump to YAML
 98 | print(
 99 |     """
100 | id: 'software_versions'
101 | section_name: 'nf-core/eager Software Versions'
102 | section_href: 'https://github.com/nf-core/eager'
103 | plot_type: 'html'
104 | description: 'are collected at run time from the software output.'
105 | data: |
106 |     <dl class="dl-horizontal">
107 | """
108 | )
109 | for k, v in results.items():
110 |     print("        <dt>{}</dt><dd><samp>{}</samp></dd>".format(k, v))
111 | print("    </dl>")
112 | 
113 | # Write out regexes as csv file:
114 | with open("software_versions.csv", "w") as f:
115 |     for k, v in results.items():
116 |         f.write("{}\t{}\n".format(k, v))
117 | 


--------------------------------------------------------------------------------
/bin/extract_map_reads.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Written by Maxime Borry and released under the MIT license.
  4 | # See git repository (https://github.com/nf-core/eager) for full license text.
  5 | 
  6 | import argparse
  7 | import pysam
  8 | from xopen import xopen
  9 | import logging
 10 | import os
 11 | from pathlib import Path
 12 | 
 13 | 
 14 | def _get_args():
 15 |     """This function parses and return arguments passed in"""
 16 |     parser = argparse.ArgumentParser(
 17 |         prog="extract_mapped_reads",
 18 |         formatter_class=argparse.RawDescriptionHelpFormatter,
 19 |         description="Remove mapped in bam file from fastq files",
 20 |     )
 21 |     parser.add_argument("bam_file", help="path to bam file")
 22 |     parser.add_argument("fwd", help="path to forward fastq file")
 23 |     parser.add_argument(
 24 |         "-merged",
 25 |         dest="merged",
 26 |         default=False,
 27 |         action="store_true",
 28 |         help="specify if bam file was created from merged fastq files",
 29 |     )
 30 |     parser.add_argument(
 31 |         "-rev", dest="rev", default=None, help="path to reverse fastq file"
 32 |     )
 33 |     parser.add_argument(
 34 |         "-of", dest="out_fwd", default=None, help="path to forward output fastq file"
 35 |     )
 36 |     parser.add_argument(
 37 |         "-or", dest="out_rev", default=None, help="path to forward output fastq file"
 38 |     )
 39 |     parser.add_argument(
 40 |         "-m",
 41 |         dest="mode",
 42 |         default="remove",
 43 |         help="Read removal mode: remove reads (remove) or replace sequence by N (replace). Default = remove",
 44 |     )
 45 |     parser.add_argument(
 46 |         "-t", dest="threads", default=4, help="Number of parallel threads"
 47 |     )
 48 | 
 49 |     args = parser.parse_args()
 50 | 
 51 |     bam = args.bam_file
 52 |     in_fwd = args.fwd
 53 |     merged = args.merged
 54 |     in_rev = args.rev
 55 |     out_fwd = args.out_fwd
 56 |     out_rev = args.out_rev
 57 |     mode = args.mode
 58 |     threads = int(args.threads)
 59 | 
 60 |     return (bam, in_fwd, merged, in_rev, out_fwd, out_rev, mode, threads)
 61 | 
 62 | 
 63 | def extract_mapped(bamfile, merged):
 64 |     """Get mapped reads in parallel
 65 |     Args:
 66 |         threads(int): number of threads to use
 67 |         bam(str): path to bamfile
 68 |     Returns:
 69 |         bamfile(str): path to bam alignment file
 70 |         result(set): list of mapped reads name (str)
 71 |     """
 72 |     if bamfile.endswith(".bam") or bamfile.endswith(".gz"):
 73 |         read_mode = "rb"
 74 |     else:
 75 |         read_mode = "r"
 76 |     mapped_reads = set()
 77 |     bamfile = pysam.AlignmentFile(bamfile, mode=read_mode)
 78 |     for read in bamfile.fetch():
 79 |         if read.flag != 4:
 80 |             if merged:
 81 |                 if read.query_name.startswith("M_"):
 82 |                     mapped_reads.add(read.query_name[2:])
 83 |                 elif read.query_name.startswith("MT_"):
 84 |                     mapped_reads.add(read.query_name[3:])
 85 |                 else:
 86 |                     mapped_reads.add(read.query_name)
 87 |             else:
 88 |                 mapped_reads.add(read.query_name)
 89 |     return mapped_reads
 90 | 
 91 | 
 92 | def read_write_fq(fq_in, fq_out, mapped_reads, mode, write_mode, proc):
 93 |     """
 94 |     Read and write fastq file with mapped reads removed
 95 |     Args:
 96 |         fq_in(str): path to input fastq file
 97 |         fq_out(str): path to output fastq file
 98 |         mapped_reads(set): set of mapped reads name (str)
 99 |         mode(str): read removal mode (remove or replace)
100 |         write_mode(str): write mode (w or wb)
101 |         proc(int): number of parallel processes
102 |         merged(bool): True if bam file was created from merged fastq files
103 |     """
104 |     if write_mode == "w":
105 |         cm = open(fq_out, write_mode)
106 |     elif write_mode == "wb":
107 |         cm = xopen(fq_out, mode=write_mode, threads=proc)
108 |     with pysam.FastxFile(fq_in) as fh:
109 |         with cm as fh_out:
110 |             for read in fh:
111 |                 try:
112 |                     if read.name in mapped_reads:
113 |                         if mode == "replace":
114 |                             read.sequence = "N" * len(read.sequence)
115 |                             read = str(read) + "\n"
116 |                             if write_mode == "w":
117 |                                 fh_out.write(read)
118 |                             elif write_mode == "wb":
119 |                                 fh_out.write(read.encode())
120 |                     else:
121 |                         read = str(read) + "\n"
122 |                         if write_mode == "w":
123 |                             fh_out.write(read)
124 |                         elif write_mode == "wb":
125 |                             fh_out.write(read.encode())
126 |                 except Exception as e:
127 |                     logging.error(f"Problem with {str(read)}")
128 |                     logging.error(e)
129 | 
130 | def check_remove_mode(mode):
131 |     if mode.lower() not in ["replace", "remove"]:
132 |         logging.info(f"Mode must be {' or '.join(mode)}")
133 |     return mode.lower()
134 | 
135 | 
136 | if __name__ == "__main__":
137 |     BAM, IN_FWD, MERGED, IN_REV, OUT_FWD, OUT_REV, MODE, PROC = _get_args()
138 | 
139 |     logging.basicConfig(level=logging.INFO, format="%(message)s")
140 | 
141 |     if OUT_FWD == None:
142 |         out_fwd = os.path.join(os.getcwd(), Path(IN_FWD).stem + ".r1.fq.gz")
143 |     else:
144 |         out_fwd = OUT_FWD
145 | 
146 |     if out_fwd.endswith(".gz"):
147 |         write_mode = "wb"
148 |     else:
149 |         write_mode = "w"
150 | 
151 |     remove_mode = check_remove_mode(MODE)
152 | 
153 |     # FORWARD OR SE FILE
154 |     logging.info(f"- Extracting mapped reads from {BAM}")
155 |     mapped_reads = extract_mapped(BAM, merged=MERGED)
156 |     logging.info(f"- Checking forward fq file {IN_FWD}")
157 |     read_write_fq(
158 |         fq_in=IN_FWD,
159 |         fq_out=out_fwd,
160 |         mapped_reads=mapped_reads,
161 |         mode=remove_mode,
162 |         write_mode=write_mode,
163 |         proc=PROC,
164 |     )
165 |     logging.info(f"- Cleaned forward FastQ file written to {out_fwd}")
166 | 
167 |     # REVERSE FILE
168 |     if IN_REV:
169 |         if OUT_REV == None:
170 |             out_rev = os.path.join(os.getcwd(), Path(IN_REV).stem + ".r2.fq.gz")
171 |         else:
172 |             out_rev = OUT_REV
173 |         logging.info(f"- Checking reverse fq file {IN_FWD}")
174 |         read_write_fq(
175 |             fq_in=IN_REV,
176 |             fq_out=out_rev,
177 |             mapped_reads=mapped_reads,
178 |             mode=remove_mode,
179 |             write_mode=write_mode,
180 |             proc=PROC,
181 |         )
182 |         logging.info(f"- Cleaned reverse FastQ file written to {out_rev}")
183 | 


--------------------------------------------------------------------------------
/lib/Completion.groovy:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Functions to be run on completion of pipeline
  3 |  */
  4 | 
  5 | class Completion {
  6 |     static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) {
  7 | 
  8 |         // Set up the e-mail variables
  9 |         def subject = "[$workflow.manifest.name] Successful: $workflow.runName"
 10 | 
 11 |         if (!workflow.success) {
 12 |             subject = "[$workflow.manifest.name] FAILED: $workflow.runName"
 13 |         }
 14 | 
 15 |         def summary = [:]
 16 |         for (group in summary_params.keySet()) {
 17 |             summary << summary_params[group]
 18 |         }
 19 |         
 20 |         def misc_fields = [:]
 21 |         misc_fields['Date Started']              = workflow.start
 22 |         misc_fields['Date Completed']            = workflow.complete
 23 |         misc_fields['Pipeline script file path'] = workflow.scriptFile
 24 |         misc_fields['Pipeline script hash ID']   = workflow.scriptId
 25 |         if (workflow.repository) misc_fields['Pipeline repository Git URL']    = workflow.repository
 26 |         if (workflow.commitId)   misc_fields['Pipeline repository Git Commit'] = workflow.commitId
 27 |         if (workflow.revision)   misc_fields['Pipeline Git branch/tag']        = workflow.revision
 28 |         misc_fields['Nextflow Version']           = workflow.nextflow.version
 29 |         misc_fields['Nextflow Build']             = workflow.nextflow.build
 30 |         misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
 31 | 
 32 |         def email_fields = [:]
 33 |         email_fields['version']             = workflow.manifest.version
 34 |         email_fields['runName']             = workflow.runName
 35 |         email_fields['success']             = workflow.success
 36 |         email_fields['dateComplete']        = workflow.complete
 37 |         email_fields['duration']            = workflow.duration
 38 |         email_fields['exitStatus']          = workflow.exitStatus
 39 |         email_fields['errorMessage']        = (workflow.errorMessage ?: 'None')
 40 |         email_fields['errorReport']         = (workflow.errorReport ?: 'None')
 41 |         email_fields['commandLine']         = workflow.commandLine
 42 |         email_fields['projectDir']          = workflow.projectDir
 43 |         email_fields['summary']             = summary << misc_fields
 44 |         
 45 |         // On success try attach the multiqc report
 46 |         def mqc_report = null
 47 |         try {
 48 |             if (workflow.success) {
 49 |                 mqc_report = multiqc_report.getVal()
 50 |                 if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) {
 51 |                     if (mqc_report.size() > 1) {
 52 |                         log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one"
 53 |                     }
 54 |                     mqc_report = mqc_report[0]
 55 |                 }
 56 |             }
 57 |         } catch (all) {
 58 |             log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email"
 59 |         }
 60 | 
 61 |         // Check if we are only sending emails on failure
 62 |         def email_address = params.email
 63 |         if (!params.email && params.email_on_fail && !workflow.success) {
 64 |             email_address = params.email_on_fail
 65 |         }
 66 | 
 67 |         // Render the TXT template
 68 |         def engine       = new groovy.text.GStringTemplateEngine()
 69 |         def tf           = new File("$projectDir/assets/email_template.txt")
 70 |         def txt_template = engine.createTemplate(tf).make(email_fields)
 71 |         def email_txt    = txt_template.toString()
 72 | 
 73 |         // Render the HTML template
 74 |         def hf            = new File("$projectDir/assets/email_template.html")
 75 |         def html_template = engine.createTemplate(hf).make(email_fields)
 76 |         def email_html    = html_template.toString()
 77 | 
 78 |         // Render the sendmail template
 79 |         def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit 
 80 |         def smail_fields           = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize:  max_multiqc_email_size.toBytes()]
 81 |         def sf                     = new File("$projectDir/assets/sendmail_template.txt")
 82 |         def sendmail_template      = engine.createTemplate(sf).make(smail_fields)
 83 |         def sendmail_html          = sendmail_template.toString()
 84 | 
 85 |         // Send the HTML e-mail
 86 |         Map colors = Headers.log_colours(params.monochrome_logs)
 87 |         if (email_address) {
 88 |             try {
 89 |                 if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
 90 |                 // Try to send HTML e-mail using sendmail
 91 |                 [ 'sendmail', '-t' ].execute() << sendmail_html
 92 |                 log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
 93 |             } catch (all) {
 94 |                 // Catch failures and try with plaintext
 95 |                 def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
 96 |                 if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) {
 97 |                     mail_cmd += [ '-A', mqc_report ]
 98 |                 }
 99 |                 mail_cmd.execute() << email_html
100 |                 log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-"
101 |             }
102 |         }
103 | 
104 |         // Write summary e-mail HTML to a file
105 |         def output_d = new File("${params.outdir}/pipeline_info/")
106 |         if (!output_d.exists()) {
107 |             output_d.mkdirs()
108 |         }
109 |         def output_hf = new File(output_d, "pipeline_report.html")
110 |         output_hf.withWriter { w -> w << email_html }
111 |         def output_tf = new File(output_d, "pipeline_report.txt")
112 |         output_tf.withWriter { w -> w << email_txt }
113 |     }
114 | 
115 |     static void summary(workflow, params, log, fail_percent_mapped=[:], pass_percent_mapped=[:]) {
116 |         Map colors = Headers.log_colours(params.monochrome_logs)
117 | 
118 |         if (workflow.success) {
119 |             if (workflow.stats.ignoredCount == 0) {
120 |                 log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
121 |             } else {
122 |                 log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
123 |             }
124 |         } else {
125 |             Checks.hostname(workflow, params, log)
126 |             log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
127 |         }
128 |     }
129 | }
130 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Code of Conduct at nf-core (v1.0)
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of:
  6 | 
  7 | - Age
  8 | - Body size
  9 | - Familial status
 10 | - Gender identity and expression
 11 | - Geographical location
 12 | - Level of experience
 13 | - Nationality and national origins
 14 | - Native language
 15 | - Physical and neurological ability
 16 | - Race or ethnicity
 17 | - Religion
 18 | - Sexual identity and orientation
 19 | - Socioeconomic status
 20 | 
 21 | Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance.
 22 | 
 23 | ## Preamble
 24 | 
 25 | > Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply.
 26 | 
 27 | An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva.
 28 | 
 29 | nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals.
 30 | 
 31 | We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc.
 32 | 
 33 | Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities.
 34 | 
 35 | We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC.
 36 | 
 37 | Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re
 38 | 
 39 | ## Our Responsibilities
 40 | 
 41 | The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour.
 42 | 
 43 | The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
 44 | 
 45 | Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC.
 46 | 
 47 | ## When are where does this Code of Conduct apply?
 48 | 
 49 | Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference:
 50 | 
 51 | - Communicating with an official project email address.
 52 | - Communicating with community members within the nf-core Slack channel.
 53 | - Participating in hackathons organised by nf-core (both online and in-person events).
 54 | - Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence.
 55 | - Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc.
 56 | - Representing nf-core on social media. This includes both official and personal accounts.
 57 | 
 58 | ## nf-core cares 😊
 59 | 
 60 | nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order):
 61 | 
 62 | - Ask for consent before sharing another community member’s personal information (including photographs) on social media.
 63 | - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity.
 64 | - Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !)
 65 | - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.)
 66 | - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can)
 67 | - Focus on what is best for the team and the community. (When in doubt, ask)
 68 | - Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn.
 69 | - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!)
 70 | - Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**)
 71 | - Take breaks when you feel like you need them.
 72 | - Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.)
 73 | 
 74 | ## nf-core frowns on 😕
 75 | 
 76 | The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces.
 77 | 
 78 | - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom.
 79 | - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online.
 80 | - Spamming or trolling of individuals on social media.
 81 | - Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention.
 82 | - Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience.
 83 | 
 84 | ### Online Trolling
 85 | 
 86 | The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately.
 87 | 
 88 | All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls.
 89 | 
 90 | ## Procedures for Reporting CoC violations
 91 | 
 92 | If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible.
 93 | 
 94 | You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s).
 95 | 
 96 | Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course.
 97 | 
 98 | All reports will be handled with utmost discretion and confidentially.
 99 | 
100 | ## Attribution and Acknowledgements
101 | 
102 | - The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4)
103 | - The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition)
104 | - The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/)
105 | - The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla)
106 | 
107 | ## Changelog
108 | 
109 | ### v1.0 - March 12th, 2021
110 | 
111 | - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC.
112 | 


--------------------------------------------------------------------------------
/assets/multiqc_config.yaml:
--------------------------------------------------------------------------------
  1 | custom_logo: "nf-core_eager_logo_outline_drop.png"
  2 | custom_logo_url: https://github.com/nf-core/eager/
  3 | custom_logo_title: "nf-core/eager"
  4 | 
  5 | report_comment: >
  6 |   This report has been generated by the <a href="https://github.com/nf-core/eager" target="_blank">nf-core/eager</a>
  7 |   analysis pipeline. For information about how to interpret these results, please see the
  8 |   <a href="https://github.com/nf-core/eager" target="_blank">documentation</a>.
  9 | run_modules:
 10 |   - adapterRemoval
 11 |   - bowtie2
 12 |   - custom_content
 13 |   - damageprofiler
 14 |   - dedup
 15 |   - fastp
 16 |   - fastqc
 17 |   - gatk
 18 |   - kraken
 19 |   - malt
 20 |   - mapdamage
 21 |   - mtnucratio
 22 |   - multivcfanalyzer
 23 |   - picard
 24 |   - preseq
 25 |   - qualimap
 26 |   - samtools
 27 |   - sexdeterrmine
 28 |   - hops
 29 |   - bcftools
 30 | 
 31 | extra_fn_clean_exts:
 32 |   - "_fastp"
 33 |   - ".pe.settings"
 34 |   - ".se.settings"
 35 |   - ".settings"
 36 |   - ".pe.combined"
 37 |   - ".se.truncated"
 38 |   - ".mapped"
 39 |   - ".mapped_rmdup"
 40 |   - ".mapped_rmdup_stats"
 41 |   - "_libmerged_rg_rmdup"
 42 |   - "_libmerged_rg_rmdup_stats"
 43 |   - "_postfilterflagstat.stats"
 44 |   - "_flagstat.stat"
 45 |   - ".filtered"
 46 |   - ".filtered_rmdup"
 47 |   - ".filtered_rmdup_stats"
 48 |   - "_libmerged_rg_add"
 49 |   - "_libmerged_rg_add_stats"
 50 |   - "_rmdup"
 51 |   - ".unmapped"
 52 |   - ".fastq.gz"
 53 |   - ".fastq"
 54 |   - ".fq.gz"
 55 |   - ".fq"
 56 |   - ".bam"
 57 |   - ".kreport"
 58 |   - ".unifiedgenotyper"
 59 |   - ".trimmed_stats"
 60 |   - "_libmerged"
 61 |   - "_bt2"
 62 |   - type: "regex"
 63 |     pattern: "_udg(half|none|full)"
 64 | 
 65 | top_modules:
 66 |   - "fastqc":
 67 |       name: "FastQC (pre-Trimming)"
 68 |       path_filters:
 69 |         - "*_raw_fastqc.zip"
 70 |   - "fastp"
 71 |   - "adapterRemoval"
 72 |   - "fastqc":
 73 |       name: "FastQC (post-Trimming)"
 74 |       path_filters:
 75 |         - "*.truncated_fastqc.zip"
 76 |         - "*.combined*_fastqc.zip"
 77 |         - "*_postartrimmed_fastqc.zip"
 78 |   - "bowtie2":
 79 |       path_filters:
 80 |         - "*_bt2.log"
 81 |   - "malt"
 82 |   - "hops"
 83 |   - "kraken"
 84 |   - "samtools":
 85 |       name: "Samtools Flagstat (pre-samtools filter)"
 86 |       path_filters:
 87 |         - "*_flagstat.stats"
 88 |   - "samtools":
 89 |       name: "Samtools Flagstat (post-samtools filter)"
 90 |       path_filters:
 91 |         - "*_postfilterflagstat.stats"
 92 |   - "dedup"
 93 |   - "picard"
 94 |   - "preseq":
 95 |       path_filters:
 96 |         - "*.preseq"
 97 |   - "damageprofiler"
 98 |   - "mapdamage"
 99 |   - "mtnucratio"
100 |   - "qualimap"
101 |   - "sexdeterrmine"
102 |   - "bcftools"
103 |   - "multivcfanalyzer":
104 |       path_filters:
105 |         - "*MultiVCFAnalyzer.json"
106 | qualimap_config:
107 |   general_stats_coverage:
108 |     - 1
109 |     - 2
110 |     - 3
111 |     - 4
112 |     - 5
113 | 
114 | remove_sections:
115 |   - sexdeterrmine-snps
116 | 
117 | table_columns_visible:
118 |   FastQC (pre-Trimming):
119 |     percent_duplicates: False
120 |     percent_gc: True
121 |     avg_sequence_length: True
122 |   fastp:
123 |     pct_duplication: False
124 |     after_filtering_gc_content: True
125 |     pct_surviving: False
126 |   Adapter Removal:
127 |     aligned_total: False
128 |     percent_aligned: True
129 |   FastQC (post-Trimming):
130 |     avg_sequence_length: True
131 |     percent_duplicates: False
132 |     total_sequences: True
133 |     percent_gc: True
134 |   bowtie2:
135 |     overall_alignment_rate: True
136 |   MALT:
137 |     Taxonomic assignment success: False
138 |     Assig. Taxonomy: False
139 |     Mappability: True
140 |     Total reads: False
141 |     Num. of queries: False
142 |   Kraken:
143 |     "% Unclassified": True
144 |     "% Top 5": False
145 |   Samtools Flagstat (pre-samtools filter):
146 |     flagstat_total: True
147 |     mapped_passed: True
148 |   Samtools Flagstat (post-samtools filter):
149 |     mapped_passed: True
150 |   DeDup:
151 |     dup_rate: False
152 |     clusterfactor: True
153 |     mapped_after_dedup: True
154 |   Picard:
155 |     PERCENT_DUPLICATION: True
156 |   DamageProfiler:
157 |     5 Prime1: True
158 |     5 Prime2: True
159 |     3 Prime1: False
160 |     3 Prime2: False
161 |     mean_readlength: True
162 |     median: True
163 |   mapDamage:
164 |     5 Prime1: True
165 |     5 Prime2: True
166 |     3 Prime1: False
167 |     3 Prime2: False
168 |   mtnucratio:
169 |     mt_nuc_ratio: True
170 |   QualiMap:
171 |     mapped_reads: True
172 |     mean_coverage: True
173 |     1_x_pc: True
174 |     5_x_pc: True
175 |     percentage_aligned: False
176 |     median_insert_size: False
177 |   MultiVCFAnalyzer:
178 |     Heterozygous SNP alleles (percent): True
179 |   endorSpy:
180 |     endogenous_dna: True
181 |     endogenous_dna_post: True
182 |   nuclear_contamination:
183 |     Num_SNPs: True
184 |     Method1_MOM_estimate: False
185 |     Method1_MOM_SE: False
186 |     Method1_ML_estimate: True
187 |     Method1_ML_SE: True
188 |     Method2_MOM_estimate: False
189 |     Method2_MOM_SE: False
190 |     Method2_ML_estimate: False
191 |     Method2_ML_SE: False
192 |   snp_coverage:
193 |     Covered_Snps: True
194 |     Total_Snps: False
195 | 
196 | table_columns_placement:
197 |   FastQC (pre-Trimming):
198 |     total_sequences: 100
199 |     avg_sequence_length: 110
200 |     percent_gc: 120
201 |   fastp:
202 |     after_filtering_gc_content: 200
203 |   Adapter Removal:
204 |     percent_aligned: 300
205 |   FastQC (post-Trimming):
206 |     total_sequences: 400
207 |     avg_sequence_length: 410
208 |     percent_gc: 420
209 |   Bowtie 2 / HiSAT2:
210 |     overall_alignment_rate: 450
211 |   MALT:
212 |     Num. of queries: 430
213 |     Total reads: 440
214 |     Mappability: 450
215 |     Assig. Taxonomy: 460
216 |     Taxonomic assignment success: 470
217 |   Kraken:
218 |     "% Unclassified": 480
219 |   Samtools Flagstat (pre-samtools filter):
220 |     flagstat_total: 551
221 |     mapped_passed: 552
222 |   Samtools Flagstat (post-samtools filter):
223 |     flagstat_total: 600
224 |     mapped_passed: 620
225 |   endorSpy:
226 |     endogenous_dna: 610
227 |     endogenous_dna_post: 640
228 |   nuclear_contamination:
229 |     Num_SNPs: 1100
230 |     Method1_MOM_estimate: 1110
231 |     Method1_MOM_SE: 1120
232 |     Method1_ML_estimate: 1130
233 |     Method1_ML_SE: 1140
234 |     Method2_MOM_estimate: 1150
235 |     Method2_MOM_SE: 1160
236 |     Method2_ML_estimate: 1170
237 |     Method2_ML_SE: 1180
238 |   snp_coverage:
239 |     Covered_Snps: 1050
240 |     Total_Snps: 1060
241 |   DeDup:
242 |     mapped_after_dedup: 620
243 |     clusterfactor: 630
244 |   Picard:
245 |     PERCENT_DUPLICATION: 650
246 |   DamageProfiler:
247 |     5 Prime1: 700
248 |     5 Prime2: 710
249 |     3 Prime1: 720
250 |     3 Prime2: 730
251 |     mean_readlength: 740
252 |     median: 750
253 |   mapDamage:
254 |     5 Prime1: 760
255 |     5 Prime2: 765
256 |     3 Prime1: 770
257 |     3 Prime2: 775
258 |   mtnucratio:
259 |     mtreads: 780
260 |     mt_cov_avg: 785
261 |     mt_nuc_ratio: 790
262 |   QualiMap:
263 |     mapped_reads: 800
264 |     mean_coverage: 805
265 |     median_coverage: 810
266 |     1_x_pc: 820
267 |     2_x_pc: 830
268 |     3_x_pc: 840
269 |     4_x_pc: 850
270 |     5_x_pc: 860
271 |     avg_gc: 870
272 |   sexdeterrmine:
273 |     RateX: 1000
274 |     RateY: 1010
275 |   MultiVCFAnalyzer:
276 |     Heterozygous SNP alleles (percent): 1200
277 | read_count_multiplier: 1
278 | read_count_prefix: ""
279 | read_count_desc: ""
280 | ancient_read_count_prefix: ""
281 | ancient_read_count_desc: ""
282 | ancient_read_count_multiplier: 1
283 | decimalPoint_format: "."
284 | thousandsSep_format: ","
285 | report_section_order:
286 |   software_versions:
287 |     order: -1000
288 |   nf-core-eager-summary:
289 |     order: -1001
290 | export_plots: true
291 | table_columns_name:
292 |   FastQC (pre-Trimming):
293 |     total_sequences: "Nr. Input Reads"
294 |     avg_sequence_length: "Length Input Reads"
295 |     percent_gc: "% GC Input Reads"
296 |     percent_duplicates: "% Dups Input Reads"
297 |     percent_fails: "% Failed Input Reads"
298 |   FastQC (post-Trimming):
299 |     total_sequences: "Nr. Processed Reads"
300 |     avg_sequence_length: "Length Processed Reads"
301 |     percent_gc: "% GC Processed Reads"
302 |     percent_duplicates: "% Dups Processed Reads"
303 |     percent_fails: "%Failed Processed Reads"
304 |   Samtools Flagstat (pre-samtools filter):
305 |     flagstat_total: "Nr. Reads Into Mapping"
306 |     mapped_passed: "Nr. Mapped Reads"
307 |   Samtools Flagstat (post-samtools filter):
308 |     flagstat_total: "Nr. Mapped Reads Post-Filter"
309 |     mapped_passed: "Nr. Mapped Reads Passed Post-Filter"
310 |   Endogenous DNA Post (%):
311 |     endogenous_dna_post (%): "Endogenous DNA Post-Filter (%)"
312 |   Picard:
313 |     PERCENT_DUPLICATION: "% Dup. Mapped Reads"
314 |   DamageProfiler:
315 |     mean_readlength: "Mean Length Mapped Reads"
316 |     median_readlength: "Median Length Mapped Reads"
317 |   QualiMap:
318 |     mapped_reads: "Nr. Dedup. Mapped Reads"
319 |     total_reads: "Nr. Dedup. Total Reads"
320 |     avg_gc: "% GC Dedup. Mapped Reads"
321 |   Bcftools Stats:
322 |     number_of_records: "Nr. Overall Variants"
323 |     number_of_SNPs: "Nr. SNPs"
324 |     number_of_indels: "Nr. InDels"
325 |   MALT:
326 |     Mappability: "% Metagenomic Mappability"
327 |   SexDetErrmine:
328 |     RateErrX: "SexDet Err X Chr"
329 |     RateErrY: "SexDet Err Y Chr"
330 |     RateX: "SexDet Rate X Chr"
331 |     RateY: "SexDet Rate Y Chr"
332 |   custom_table_header_config:
333 |     general_stats_table:
334 |       median_coverage:
335 |         format: "{:,.3f}"
336 |       mean_coverage:
337 |         format: "{:,.3f}"
338 | 


--------------------------------------------------------------------------------
/docs/images/eager_logo.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <svg
  3 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  4 |    xmlns:cc="http://creativecommons.org/ns#"
  5 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  6 |    xmlns:svg="http://www.w3.org/2000/svg"
  7 |    xmlns="http://www.w3.org/2000/svg"
  8 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
  9 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 10 |    enable-background="new 0 0 1150.9 517"
 11 |    version="1.1"
 12 |    viewBox="0 0 1456.7841 522.44342"
 13 |    xml:space="preserve"
 14 |    id="svg2"
 15 |    inkscape:version="0.91 r13725"
 16 |    sodipodi:docname="EmptyName_logo.svg"
 17 |    width="1456.7842"
 18 |    height="522.44342"><sodipodi:namedview
 19 |      pagecolor="#ffffff"
 20 |      bordercolor="#666666"
 21 |      borderopacity="1"
 22 |      objecttolerance="10"
 23 |      gridtolerance="10"
 24 |      guidetolerance="10"
 25 |      inkscape:pageopacity="0"
 26 |      inkscape:pageshadow="2"
 27 |      inkscape:window-width="1920"
 28 |      inkscape:window-height="1015"
 29 |      id="namedview75"
 30 |      showgrid="false"
 31 |      inkscape:zoom="0.35757767"
 32 |      inkscape:cx="253.20897"
 33 |      inkscape:cy="13.773735"
 34 |      inkscape:window-x="1920"
 35 |      inkscape:window-y="724"
 36 |      inkscape:window-maximized="1"
 37 |      inkscape:current-layer="layer3"
 38 |      fit-margin-left="62.25"
 39 |      fit-margin-right="62.25"
 40 |      fit-margin-top="62.25"
 41 |      fit-margin-bottom="62.25" /><metadata
 42 |      id="metadata4"><rdf:RDF><cc:Work
 43 |          rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
 44 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:title></dc:title></cc:Work></rdf:RDF></metadata><defs
 45 |      id="defs6"><clipPath
 46 |        id="e"><path
 47 |          d="m 280.17,136.33 -21.5,-21.584 61,0 0,21.584 -39.5,0 z"
 48 |          id="path9"
 49 |          inkscape:connector-curvature="0" /></clipPath><linearGradient
 50 |        id="f"
 51 |        x2="1"
 52 |        gradientTransform="matrix(37.935819,29.638391,-29.638391,37.935819,295.72019,166.19562)"
 53 |        gradientUnits="userSpaceOnUse"><stop
 54 |          stop-color="#0c542a"
 55 |          offset="0"
 56 |          id="stop12" /><stop
 57 |          stop-color="#0c542a"
 58 |          offset=".21472"
 59 |          id="stop14" /><stop
 60 |          stop-color="#25af64"
 61 |          offset=".57995"
 62 |          id="stop16" /><stop
 63 |          stop-color="#25af64"
 64 |          offset=".84663"
 65 |          id="stop18" /><stop
 66 |          stop-color="#25af64"
 67 |          offset="1"
 68 |          id="stop20" /></linearGradient></defs><style
 69 |      type="text/css"
 70 |      id="style22">
 71 | 	.st0{fill:#24AF63;}
 72 | 	.st1{font-family:'Maven Pro';}
 73 | 	.st1{font-weight:'bold';}
 74 | 	.st2{font-size:209.8672px;}
 75 | 	.st3{fill:#21AF62;}
 76 | 	.st4{fill:#ECDC86;}
 77 | 	.st5{fill:#A0918F;}
 78 | 	.st6{fill:#3F2B29;}
 79 | 	.st7{fill:#396E35;}
 80 | 	.st8{fill:url(#d);}
 81 | </style><linearGradient
 82 |      id="d"
 83 |      x1="295.45999"
 84 |      x2="333.34"
 85 |      y1="150.75"
 86 |      y2="180.35001"
 87 |      gradientUnits="userSpaceOnUse"><stop
 88 |        stop-color="#0D552B"
 89 |        offset=".2147"
 90 |        id="stop41" /><stop
 91 |        stop-color="#176837"
 92 |        offset=".311"
 93 |        id="stop43" /><stop
 94 |        stop-color="#1F8448"
 95 |        offset=".4609"
 96 |        id="stop45" /><stop
 97 |        stop-color="#239A56"
 98 |        offset=".604"
 99 |        id="stop47" /><stop
100 |        stop-color="#24A860"
101 |        offset=".7361"
102 |        id="stop49" /><stop
103 |        stop-color="#25AF64"
104 |        offset=".8466"
105 |        id="stop51" /></linearGradient><g
106 |      inkscape:groupmode="layer"
107 |      id="layer2"
108 |      inkscape:label="Icon"
109 |      style="display:inline"
110 |      transform="translate(5.3761467,0)"><g
111 |        id="g4209"><path
112 |          style="fill:#24af63"
113 |          inkscape:connector-curvature="0"
114 |          id="path24"
115 |          d="m 1084.1,163.75 0,3.6 c -0.1,0 -0.1,0 -0.2,0.1 l -1.8,-1.5 c -4,-3.4 -8.3,-6.4 -13.1,-8.8 -0.8,-0.4 -1.6,-0.9 -2.5,-1.1 -0.1,-0.1 -0.2,-0.2 -0.3,-0.2 -1.8,-0.7 -3.6,-1.3 -5.5,-1.8 -4.1,-0.9 -8.2,-1 -12.3,-0.2 -5.3,1.1 -10,3.4 -14.5,6.4 -4.4,3 -8.4,6.5 -12.1,10.2 -0.7,0.7 -0.7,0.7 -1.1,-0.2 -2,-4.1 -4.2,-8.1 -6.9,-11.8 -2.1,-2.8 -4.4,-5.4 -7.4,-7.2 -3,-1.9 -6.3,-2.6 -9.8,-1.7 -4.3,1 -7.8,3.6 -11.1,6.4 -2,1.5 -3.8,3.3 -5.6,5 -1.7,1.5 -3.3,3 -5,4.5 -0.3,0.3 -0.5,0.3 -0.8,0 -1.7,-1.8 -3.5,-3.4 -5.6,-4.5 -3.1,-1.7 -6.3,-1.9 -9.6,-0.8 -2.8,0.9 -5.2,2.4 -7.7,4 -1,0.6 -1.9,1.3 -2.9,1.8 l 0,-0.2 c 0.1,-0.2 0.1,-0.4 0.1,-0.6 0.2,-4.4 0.5,-8.9 1.2,-13.3 1,-6.1 2.5,-12 5.2,-17.5 2,-4.1 4.7,-7.9 8.1,-11 4.5,-4.1 9.8,-6.7 15.6,-8.3 6.3,-1.8 12.7,-2.6 19.2,-2.9 2.6,-0.1 5.1,-0.2 7.7,-0.3 1.3,0.5 2.6,0.8 3.9,1.2 1.9,0.6 3.8,1.2 5.7,1.7 1,0.4 1.9,0.7 2.9,1.1 3.7,1.3 7.3,3 10.4,5.5 0.8,0.6 1.6,1.3 2.4,2 -0.2,-0.6 -0.4,-1.1 -0.6,-1.7 -1.4,-3.7 -3.5,-6.7 -6.9,-8.8 -1.4,-0.9 -2.9,-1.5 -4.4,-2.3 0.1,0 0.3,0 0.4,-0.1 4.5,-0.8 9.1,-1.2 13.7,-1.4 3.9,-0.2 7.9,-0.1 11.8,0.3 4.6,0.5 9.1,1.4 13.4,3 6.4,2.4 11.9,6.1 16.2,11.5 3.7,4.7 6.1,10.1 7.6,15.9 1.5,5.7 2.1,11.6 2.3,17.5 -0.1,2.1 -0.1,4.3 -0.1,6.5 z"
116 |          class="st0" /><path
117 |          style="fill:#ecdc86"
118 |          inkscape:connector-curvature="0"
119 |          id="path26"
120 |          d="m 1084.1,157.15 0.1,0 0,6.6 -0.1,0 0,-6.6 z"
121 |          class="st4" /><path
122 |          style="fill:#a0918f"
123 |          inkscape:connector-curvature="0"
124 |          id="path28"
125 |          d="m 1047.6,62.25 0,0.1 -4.5,0 0,-0.1 4.5,0 z"
126 |          class="st5" /><path
127 |          style="fill:#24af63"
128 |          inkscape:connector-curvature="0"
129 |          id="path30"
130 |          d="m 1050.5,250.65 c 2.5,-1 4.9,-2.3 7.3,-3.6 2.8,-1.7 5.4,-3.5 8,-5.4 2.2,-1.6 4.3,-3.3 6.4,-5.1 l 3.6,-3 c 0.2,-0.2 0.2,-0.1 0.3,0.1 0.4,1.6 0.7,3.3 1.1,5 0.5,2.3 0.8,4.6 1.1,6.9 0.3,2.7 0.4,5.3 0.2,8 -0.2,3.3 -0.8,6.6 -2,9.7 -0.7,1.9 -1.6,3.7 -2.7,5.4 -1.4,2.2 -3,4.2 -5,5.9 -2.3,2.1 -4.9,3.9 -7.7,5.4 -3.7,2.1 -7.7,3.6 -11.8,4.8 -3.9,1.2 -7.9,2 -11.9,2.7 -1.1,0.2 -2.2,0.4 -3.3,0.4 -2.3,-0.1 -4.6,-0.6 -6.8,-1.4 -3.3,-1.3 -6.2,-3.3 -9.5,-4.8 -1.8,-0.8 -3.6,-1.4 -5.5,-1.5 -2.5,-0.2 -4.6,0.7 -6.4,2.4 l -3.9,3.9 c -2.2,2.2 -4.8,3.7 -7.9,4.2 -2.1,0.3 -4.1,0.2 -6.2,-0.1 -2.9,-0.4 -5.7,-1.1 -8.4,-1.9 -4,-1.3 -7.7,-3.1 -11.1,-5.7 -3.2,-2.4 -5.7,-5.4 -7.8,-8.8 -2.1,-3.5 -3.3,-7.2 -4.2,-11.1 -0.4,-1.7 -0.6,-3.5 -0.8,-5.2 -0.3,-2.5 -0.4,-4.9 -0.3,-7.4 0.1,-3.5 0.4,-6.9 0.9,-10.4 0.4,0.4 0.8,0.7 1.1,1 2.2,2 4.7,3.8 7.3,5.4 2.9,1.7 6.1,3.1 9.4,4 2.2,0.6 4.5,1 6.8,1.1 1.9,0.2 3.8,0.2 5.7,0.1 2.2,-0.1 4.5,-0.3 6.7,-0.9 0.3,0 0.6,0 0.8,-0.1 2,-0.4 4,-0.9 6,-1.5 2.3,-0.7 4.5,-1.4 6.7,-2.2 2.1,-0.8 4.3,-1.7 6.4,-2.6 0.6,-0.3 1,-0.2 1.5,0.2 3.5,2.7 7.3,5 11.4,6.7 4.6,1.8 9.3,2.7 14.2,2.3 3.6,-0.7 7,-1.6 10.3,-2.9 z"
131 |          class="st0" /><path
132 |          style="fill:#ecdc86"
133 |          inkscape:connector-curvature="0"
134 |          id="path32"
135 |          d="m 1050.5,250.65 c -3.3,1.3 -6.7,2.2 -10.2,2.5 -4.9,0.4 -9.6,-0.5 -14.2,-2.3 -4.1,-1.6 -7.9,-3.9 -11.4,-6.7 -0.5,-0.4 -0.9,-0.5 -1.5,-0.2 -2.1,0.9 -4.2,1.8 -6.4,2.6 -2.2,0.8 -4.4,1.6 -6.7,2.2 -2,0.6 -4,1 -6,1.5 -0.3,0.1 -0.6,0.1 -0.8,0.1 0.7,-0.8 1.4,-1.6 2.1,-2.4 2.8,-3.2 4.8,-6.9 5.9,-11.1 1.6,-5.6 3.2,-11.3 4.6,-17 1,-4.2 1.8,-8.4 2.4,-12.7 0.4,-3.1 1,-14.9 0.8,-17.7 -0.5,-8.6 -2.4,-16.8 -5.9,-24.7 -2.1,-4.7 -5.7,-7.9 -10.7,-9.2 -2.2,-0.6 -4.4,-0.4 -6.5,0.3 -0.2,0.1 -0.3,0.2 -0.5,0.1 3.3,-2.8 6.7,-5.4 11.1,-6.4 3.5,-0.8 6.8,-0.2 9.8,1.7 3,1.9 5.3,4.4 7.4,7.2 2.7,3.7 4.9,7.7 6.9,11.8 0.4,0.9 0.4,0.9 1.1,0.2 3.7,-3.8 7.7,-7.3 12.1,-10.2 4.4,-3 9.2,-5.3 14.5,-6.4 4.1,-0.8 8.2,-0.7 12.3,0.2 1.9,0.4 3.7,1 5.5,1.8 0.1,0.1 0.3,0.1 0.3,0.2 -5.3,0.1 -9.8,2.1 -13.9,5.2 -2,1.5 -3.8,3.2 -5.2,5.3 -1.1,1.7 -2.1,3.6 -2.9,5.5 -1.8,3.8 -3.3,7.8 -4.4,11.9 -0.9,3.5 -1.5,7.1 -1.8,10.7 -0.2,2.8 -0.3,5.6 -0.2,8.4 0.1,3.4 0.5,6.8 0.9,10.3 0.7,5.7 1.7,11.4 2.7,17.1 0.5,3.1 0.9,6.3 1.5,9.5 0.7,4.6 3.3,8 7,10.6 0,-0.1 0.2,0 0.3,0.1 z"
136 |          class="st4" /><path
137 |          style="fill:#3f2b29"
138 |          inkscape:connector-curvature="0"
139 |          id="path34"
140 |          d="m 1043.1,62.35 4.5,0 c 3.6,0.2 7.2,0.8 10.6,2 2.7,0.9 3.3,2.7 1.7,5 -1.1,1.6 -2.7,2.8 -4.4,3.9 -2.1,1.4 -4.4,2.6 -6.9,3.5 -2.5,1 -4.9,0 -6.5,-2.5 -0.5,-0.8 -0.9,-1.6 -1.1,-2.5 -0.1,-0.3 -0.2,-0.4 -0.5,-0.4 -5.6,-1 -10.6,0.3 -14.7,4.3 -3.4,3.2 -5.4,7.3 -6.8,11.7 -1.3,4 -1.9,8 -2.1,12.2 -0.2,3.7 0.1,7.4 0.6,11 0.1,0.6 0.3,1.2 0.3,1.9 0.1,0.8 -0.2,1.5 -0.8,1.9 -0.7,0.5 -1.5,0.4 -2.3,0.4 -1.9,-0.6 -3.8,-1.2 -5.7,-1.7 l 0,-1.3 c 0,-2 0,-3.9 0.1,-5.9 0.4,-7.7 1.6,-15.3 4.6,-22.5 2.2,-5.4 5.4,-10.1 9.9,-13.8 3.7,-3.1 7.9,-5.1 12.6,-6.2 2.4,-0.6 4.6,-0.9 6.9,-1 z"
141 |          class="st6" /><path
142 |          style="fill:#396e35"
143 |          inkscape:connector-curvature="0"
144 |          id="path36"
145 |          d="m 1014.8,114.65 c 0.8,0 1.6,0.1 2.3,-0.4 0.7,-0.5 0.9,-1.2 0.8,-1.9 -0.1,-0.6 -0.2,-1.3 -0.3,-1.9 0.4,0 0.7,-0.1 1.1,-0.1 1.4,0.8 2.9,1.5 4.4,2.3 3.4,2.1 5.5,5.1 6.9,8.8 0.2,0.6 0.4,1.1 0.6,1.7 -0.8,-0.7 -1.6,-1.4 -2.4,-2 -3.2,-2.4 -6.7,-4.1 -10.4,-5.5 -1.1,-0.3 -2,-0.6 -3,-1 z"
146 |          class="st7" /><path
147 |          style="fill:#396e35"
148 |          inkscape:connector-curvature="0"
149 |          id="path38"
150 |          d="m 1009.1,111.65 0,1.3 c -1.3,-0.4 -2.6,-0.7 -3.9,-1.2 1.4,-0.1 2.7,-0.1 3.9,-0.1 z"
151 |          class="st7" /></g></g><g
152 |      inkscape:groupmode="layer"
153 |      id="layer3"
154 |      inkscape:label="Text"
155 |      style="display:inline"
156 |      transform="translate(5.3761467,0)"><text
157 |        x="48.898899"
158 |        y="241.24541"
159 |        font-size="209.87px"
160 |        font-weight="bold"
161 |        id="text53"
162 |        style="font-weight:bold;font-size:209.86999512px;font-family:'Maven Pro'"><tspan
163 |          class="st0 st1 st2"
164 |          x="48.898899"
165 |          y="241.24541"
166 |          font-size="209.87px"
167 |          font-weight="bold"
168 |          id="tspan55"
169 |          style="font-weight:bold;font-size:209.86720276px;font-family:'Maven Pro';fill:#24af63">nf-<tspan
170 |    id="tspan57"
171 |    style="fill:#000000" /></tspan></text>
172 | <text
173 |        x="357.14139"
174 |        y="241.24541"
175 |        font-size="209.87px"
176 |        font-weight="bold"
177 |        id="text69"
178 |        style="font-weight:bold;font-size:209.86999512px;font-family:'Maven Pro'"><tspan
179 |          class="st0 st1 st2"
180 |          x="357.14139"
181 |          y="241.24541"
182 |          font-size="209.87px"
183 |          font-weight="bold"
184 |          id="tspan71"
185 |          style="font-weight:bold;font-size:209.86720276px;font-family:'Maven Pro';fill:#24af63"><tspan
186 |            id="tspan73"
187 |            style="fill:#000000">core/</tspan></tspan></text>
188 | <text
189 |        x="-260.05042"
190 |        y="457.04541"
191 |        font-weight="bold"
192 |        id="text59"
193 |        style="font-weight:bold;font-family:'Maven Pro'"><tspan
194 |          class="st1 st2"
195 |          x="47.849564"
196 |          y="457.04541"
197 |          font-size="209.87px"
198 |          font-weight="bold"
199 |          id="tspan61"
200 |          style="font-weight:bold;font-size:209.86720276px;font-family:'Maven Pro'">eager</tspan></text>
201 | <path
202 |        d="m 300.43725,166.1155 -21.53224,21.61638 61.0915,0 0,-21.61638 -39.55926,0 z"
203 |        id="path67"
204 |        inkscape:connector-curvature="0"
205 |        style="fill:url(#f)" /></g></svg>


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # nf-core/eager: Contributing Guidelines
  2 | 
  3 | Hi there!
  4 | Many thanks for taking an interest in improving nf-core/eager.
  5 | 
  6 | We try to manage the required tasks for nf-core/eager using GitHub issues, you probably came to this page when creating one.
  7 | Please use the pre-filled template to save time.
  8 | 
  9 | However, don't be put off by this template - other more general issues and suggestions are welcome!
 10 | Contributions to the code are even more welcome ;)
 11 | 
 12 | > If you need help using or modifying nf-core/eager then the best place to ask is on the nf-core Slack [#eager](https://nfcore.slack.com/channels/eager) channel ([join our Slack here](https://nf-co.re/join/slack)).
 13 | 
 14 | ## Contribution workflow
 15 | 
 16 | If you'd like to write some code for nf-core/eager, the standard workflow is as follows:
 17 | 
 18 | 1. Check that there isn't already an issue about your idea in the [nf-core/eager issues](https://github.com/nf-core/eager/issues) to avoid duplicating work
 19 |     * If there isn't one already, please create one so that others know you're working on this
 20 | 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/eager repository](https://github.com/nf-core/eager) to your GitHub account
 21 | 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions)
 22 | 4. Use `nf-core schema build .` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10).
 23 | 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged
 24 | 
 25 | If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/).
 26 | 
 27 | ## Tests
 28 | 
 29 | When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests.
 30 | Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then.
 31 | 
 32 | There are typically two types of tests that run:
 33 | 
 34 | ### Lint tests
 35 | 
 36 | `nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to.
 37 | To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint <pipeline-directory>` command.
 38 | 
 39 | If any failures or warnings are encountered, please follow the listed URL for more documentation.
 40 | 
 41 | ### Pipeline tests
 42 | 
 43 | Each `nf-core` pipeline should be set up with a minimal set of test-data.
 44 | `GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully.
 45 | If there are any failures then the automated tests fail.
 46 | These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code.
 47 | 
 48 | ## Patch
 49 | 
 50 | :warning: Only in the unlikely and regretful event of a release happening with a bug.
 51 | 
 52 | * On your own fork, make a new branch `patch` based on `upstream/master`.
 53 | * Fix the bug, and bump version (X.Y.Z+1).
 54 | * A PR should be made on `master` from patch to directly this particular bug.
 55 | 
 56 | ## Getting help
 57 | 
 58 | For further information/help, please consult the [nf-core/eager documentation](https://nf-co.re/eager/usage) and don't hesitate to get in touch on the nf-core Slack [#eager](https://nfcore.slack.com/channels/eager) channel ([join our Slack here](https://nf-co.re/join/slack)).
 59 | 
 60 | ## Pipeline contribution conventions
 61 | 
 62 | To make the nf-core/eager code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written.
 63 | 
 64 | ### Adding a new step
 65 | 
 66 | If you wish to contribute a new step, please use the following coding standards:
 67 | 
 68 | 1. Define the corresponding input channel into your new process from the expected previous process channel
 69 | 2. Write the process block (see below).
 70 | 3. Define the output channel if needed (see below).
 71 | 4. Add any new flags/options to `nextflow.config` with a default (see below).
 72 | 5. Add any new flags/options to `nextflow_schema.json` with help text (with `nf-core schema build .`).
 73 | 6. Add sanity checks for all relevant parameters.
 74 | 7. Add any new software to the `scrape_software_versions.py` script in `bin/` and the version command to the `scrape_software_versions` process in `main.nf`.
 75 | 8. Do local tests that the new code works properly and as expected.
 76 | 9. Add a new test command in `.github/workflow/ci.yaml`.
 77 | 10. If applicable add a [MultiQC](https://https://multiqc.info/) module.
 78 | 11. Update MultiQC config `assets/multiqc_config.yaml` so relevant suffixes, name clean up, General Statistics Table column order, and module figures are in the right order.
 79 | 12. Optional: Add any descriptions of MultiQC report sections and output files to `docs/output.md`.
 80 | 
 81 | ### Default values
 82 | 
 83 | Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope.
 84 | 
 85 | Once there, use `nf-core schema build .` to add to `nextflow_schema.json`.
 86 | 
 87 | ### Default processes resource requirements
 88 | 
 89 | Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels.
 90 | 
 91 | :warning: Note that in nf-core/eager we currently have our own custom process labels, so please check `base.config`!
 92 | 
 93 | The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block.
 94 | 
 95 | ### Naming schemes
 96 | 
 97 | Please use the following naming schemes, to make it easy to understand what is going where.
 98 | 
 99 | * initial process channel: `ch_output_from_<process>`
100 | * intermediate and terminal channels: `ch_<previousprocess>_for_<nextprocess>`
101 | * skipped process output: `ch_<previousstage>_for_<skipprocess>`(this goes out of the bypass statement described above)
102 | 
103 | ### Nextflow version bumping
104 | 
105 | If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]`
106 | 
107 | ### Software version reporting
108 | 
109 | If you add a new tool to the pipeline, please ensure you add the information of the tool to the `get_software_version` process.
110 | 
111 | Add to the script block of the process, something like the following:
112 | 
113 | ```bash
114 | <YOUR_TOOL> --version &> v_<YOUR_TOOL>.txt 2>&1 || true
115 | ```
116 | 
117 | or
118 | 
119 | ```bash
120 | <YOUR_TOOL> --help | head -n 1 &> v_<YOUR_TOOL>.txt 2>&1 || true
121 | ```
122 | 
123 | You then need to edit the script `bin/scrape_software_versions.py` to:
124 | 
125 | 1. Add a Python regex for your tool's `--version` output (as in stored in the `v_<YOUR_TOOL>.txt` file), to ensure the version is reported as a `v` and the version number e.g. `v2.1.1`
126 | 2. Add a HTML entry to the `OrderedDict` for formatting in MultiQC.
127 | 
128 | ### Images and figures
129 | 
130 | For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines).
131 | 
132 | For all internal nf-core/eager documentation images we are using the 'Kalam' font by the Indian Type Foundry and licensed under the Open Font License. It can be found for download here [here](https://fonts.google.com/specimen/Kalam).
133 | 
134 | ## Process Concept
135 | 
136 | We are providing a highly configurable pipeline, with many options to turn on and off different processes in different combinations. This can make a very complex graph structure that can cause a large amount of duplicated channels coming out of every process to account for each possible combination.
137 | 
138 | The EAGER pipeline can currently be broken down into the following 'stages', where a stage is a collection of  non-terminal mutually exclusive processes, which is the output of which is used for another file reporting module (but not reporting!) .
139 | 
140 | * Input
141 | * Convert BAM
142 | * PolyG Clipping
143 | * AdapterRemoval
144 | * Mapping (either `bwa`, `bwamem`, or `circularmapper`)
145 | * BAM Filtering
146 | * Deduplication (either `dedup` or `markduplicates`)
147 | * BAM Trimming
148 | * PMDtools
149 | * Genotyping
150 | 
151 | Every step can potentially be skipped, therefore the output of a previous stage must be able to be passed to the next stage, if the given stage is not run.
152 | 
153 | To somewhat simplify this logic, we have implemented the following structure.
154 | 
155 | The concept is as follows:
156 | 
157 | * Every 'stage' of the pipeline (i.e. collection of mutually exclusive processes) must always have a if else statement following it.
158 | * This if else 'bypass' statement collects and standardises all possible input files into single channel(s) for the next stage.
159 | * Importantly - within the bypass statement, a channel from the previous stage's bypass mixes into these output channels. This additional channel is named `ch_previousstage_for_skipcurrentstage`. This contains the output from the previous stage, i.e. not the modified version from the current stage.
160 | * The bypass statement works as follows:
161 |   * If the current stage is turned on: will mix the previous stage and current stage output and filter for file suffixes unique to the current stage output
162 |   * If the current stage is turned off or skipped: will mix the previous stage and current stage output. However as there there is no files in the output channel from the current stage, no filtering is required and the files in the 'ch_XXX_for_skipXXX' stage will be used.
163 |   
164 |  This ensures the same channel inputs to the next stage is 'homogeneous' - i.e. all comes from the same source (the bypass statement)
165 |   
166 |  An example schematic can be given as follows
167 | 
168 | ```nextflow
169 |  // PREVIOUS STAGE OUTPUT
170 | if (params.run_bam_filtering) {
171 |     ch_input_for_skipconvertbam.mix(ch_output_ch_convertbam)
172 |         .filter{ it =~/.*converted.fq/}
173 |         .into { ch_convertbam_for_fastp; ch_convertbam_for_skipfastp }
174 | } else {
175 |     ch_input_for_skipconvertbam
176 |         .into { ch_convertbam_for_fastp; ch_convertbam_for_skipfastp }
177 | }
178 | 
179 | // SKIPPABLE CURRENT STAGE PROCESS
180 | process fastp {
181 |     publishDir "${params.outdir}/fastp", mode: 'copy'
182 | 
183 |     when:
184 |     params.run_fastp
185 | 
186 |     input:
187 |     file fq from ch_convertbam_for_fastp
188 | 
189 |     output:
190 |     file "*pG.fq" into ch_output_from_fastp
191 | 
192 |     script:
193 |     """
194 |     echo "I have been fastp'd" > ${fq}  
195 |     mv ${fq} ${fq}.pG.fq
196 |     """
197 | }
198 | 
199 | // NEXT STAGE INPUT PREPARATION
200 | if (params.run_fastp) {
201 |     ch_convertbam_for_skipfastp.mix(ch_output_from_fastp)
202 |         .filter { it =~/.*pG.fq/ }
203 |         .into { ch_fastp_for_adapterremoval; ch_fastp_for_skipadapterremoval }
204 | } else {
205 |     ch_convertbam_for_skipfastp
206 |         .into { ch_fastp_for_adapterremoval; ch_fastp_for_skipadapterremoval }
207 | }
208 | 
209 |  ```
210 | 


--------------------------------------------------------------------------------
/nextflow.config:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * -------------------------------------------------
  3 |  *  nf-core/eager Nextflow config file
  4 |  * -------------------------------------------------
  5 |  * Default config options for all environments.
  6 |  */
  7 | // Global default params, used in configs
  8 | params {
  9 | 
 10 |   // Workflow flags
 11 |   genome = false
 12 |   input = null
 13 |   input_paths = null
 14 |   single_end = false
 15 |   outdir = './results'
 16 |   publish_dir_mode = 'copy'
 17 |   config_profile_name = null
 18 | 
 19 |   // aws
 20 |   awsqueue = null
 21 |   awsregion = 'eu-west-1'
 22 |   awscli = null
 23 | 
 24 |   //Pipeline options
 25 |   enable_conda               = false
 26 |   validate_params            = true
 27 |   schema_ignore_params       = 'genome'
 28 |   show_hidden_params         = false
 29 | 
 30 |   //Input reads
 31 |   udg_type = 'none'
 32 |   single_stranded = false
 33 |   single_end = false
 34 |   colour_chemistry = 4
 35 |   bam = false
 36 |   
 37 |   // Optional input information
 38 |   snpcapture_bed = null
 39 |   run_convertinputbam = false
 40 | 
 41 |   //Input reference
 42 |   fasta = null
 43 |   bwa_index = null
 44 |   bt2_index = null
 45 |   fasta_index = null
 46 |   seq_dict = null
 47 |   large_ref = false
 48 |   save_reference = false
 49 |   
 50 |   // this is just to stop the iGenomes WARN as we set as FALSE by default. Otherwise should be overwritten by optional config load below.
 51 |   genomes = false 
 52 | 
 53 | 
 54 |   //Skipping parts of the pipeline for impatient users
 55 |   skip_fastqc = false
 56 |   skip_adapterremoval = false 
 57 |   skip_preseq = false
 58 |   skip_deduplication = false
 59 |   skip_damage_calculation = false
 60 |   skip_qualimap = false
 61 | 
 62 |   //More defaults
 63 |   complexity_filter_poly_g = false
 64 |   complexity_filter_poly_g_min = 10
 65 | 
 66 |   //Read clipping and merging parameters
 67 |   clip_forward_adaptor = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'
 68 |   clip_reverse_adaptor = 'AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTA'
 69 |   clip_adapters_list = null 
 70 |   clip_readlength = 30
 71 |   clip_min_read_quality = 20
 72 |   min_adap_overlap = 1
 73 |   skip_collapse = false
 74 |   skip_trim = false
 75 |   preserve5p = false
 76 |   mergedonly = false
 77 |   qualitymax = 41
 78 |   run_post_ar_trimming = false
 79 |   post_ar_trim_front = 7
 80 |   post_ar_trim_tail = 7
 81 |   post_ar_trim_front2 = 7
 82 |   post_ar_trim_tail2 = 7
 83 | 
 84 |   //Mapping algorithm
 85 |   mapper = 'bwaaln'
 86 |   bwaalnn = 0.01 // From Oliva et al. 2021 (10.1093/bib/bbab076)
 87 |   bwaalnk = 2
 88 |   bwaalnl = 1024 // From Oliva et al. 2021 (10.1093/bib/bbab076)
 89 |   bwaalno = 2 // From Oliva et al. 2021 (10.1093/bib/bbab076)
 90 |   circularextension = 500
 91 |   circulartarget = 'MT'
 92 |   circularfilter = false
 93 |   bt2_alignmode = 'local' // from Cahill 2018 (10.1093/molbev/msy018) and, Poullet and Orlando (10.3389/fevo.2020.00105)
 94 |   bt2_sensitivity = 'sensitive' // from Poullet and Orlando (10.3389/fevo.2020.00105)
 95 |   bt2n = 0 // Do not set Cahill 2018 recommendation of 1 here, so not to 'hide' overriding bowtie2 presets
 96 |   bt2l = 0
 97 |   bt2_trim5 = 0
 98 |   bt2_trim3 = 0
 99 |   bt2_maxins = 500
100 | 
101 |   //Mapped read removal from input FASTQ
102 |   hostremoval_input_fastq = false
103 |   hostremoval_mode = 'remove'
104 | 
105 |   //BAM Filtering steps (default = discard unmapped reads)
106 |   run_bam_filtering = false
107 |   bam_mapping_quality_threshold = 0
108 |   bam_filter_minreadlength = 0
109 |   bam_unmapped_type = 'discard'
110 | 
111 |   //DeDuplication settings
112 |   dedupper = 'markduplicates'
113 |   dedup_all_merged = false
114 | 
115 |   //Preseq settings
116 |   preseq_step_size = 1000
117 |   preseq_mode = 'c_curve'
118 |   preseq_bootstrap = 100
119 |   preseq_maxextrap = 10000000000
120 |   preseq_cval = 0.95
121 |   preseq_terms = 100
122 | 
123 |   //Damage estimation settings
124 |   damage_calculation_tool = 'damageprofiler'
125 |   damageprofiler_length = 100
126 |   damageprofiler_threshold = 15
127 |   damageprofiler_yaxis = 0.30
128 |   mapdamage_downsample = 0
129 |   mapdamage_yaxis = 0.30
130 | 
131 |   //PMDTools settings
132 |   run_pmdtools = false
133 |   pmdtools_range = 10
134 |   pmdtools_threshold = 3
135 |   pmdtools_reference_mask = null
136 |   pmdtools_max_reads = 10000
137 |   pmdtools_platypus = false
138 | 
139 |   // mapDamage
140 |   run_mapdamage_rescaling = false
141 |   rescale_length_5p = 0
142 |   rescale_length_3p = 0
143 |   rescale_seqlength = 12
144 | 
145 |   //Bedtools settings
146 |   run_bedtools_coverage = false
147 |   anno_file = null
148 |   anno_file_is_unsorted = false
149 | 
150 |   //bamUtils trimbam settings
151 |   run_trim_bam = false 
152 |   bamutils_clip_double_stranded_half_udg_left = 0
153 |   bamutils_clip_double_stranded_half_udg_right = 0
154 |   bamutils_clip_double_stranded_none_udg_left = 0
155 |   bamutils_clip_double_stranded_none_udg_right = 0
156 |   bamutils_clip_single_stranded_half_udg_left = 0
157 |   bamutils_clip_single_stranded_half_udg_right = 0
158 |   bamutils_clip_single_stranded_none_udg_left = 0
159 |   bamutils_clip_single_stranded_none_udg_right = 0
160 |   bamutils_softclip = false
161 | 
162 |   //Genotyping options
163 |   run_genotyping = false
164 |   genotyping_tool = null
165 |   genotyping_source = 'raw'
166 |   // gatk options
167 |   gatk_call_conf = 30
168 |   gatk_ploidy = 2
169 |   gatk_downsample = 250
170 |   gatk_dbsnp = null
171 |   gatk_hc_out_mode = 'EMIT_VARIANTS_ONLY'
172 |   gatk_hc_emitrefconf = 'GVCF'
173 |   gatk_ug_genotype_model = 'SNP'
174 |   gatk_ug_out_mode = 'EMIT_VARIANTS_ONLY'
175 |   gatk_ug_keep_realign_bam = false
176 |   gatk_ug_defaultbasequalities = null
177 |   // freebayes options
178 |   freebayes_C = 1
179 |   freebayes_g = 0
180 |   freebayes_p = 2
181 |   // Sequencetools pileupCaller
182 |   pileupcaller_snpfile = null
183 |   pileupcaller_bedfile = null
184 |   pileupcaller_method = 'randomHaploid'
185 |   pileupcaller_transitions_mode = 'AllSites'
186 |   pileupcaller_min_map_quality = 30
187 |   pileupcaller_min_base_quality = 30
188 |   // ANGSD Genotype Likelihoods
189 |   angsd_glmodel = 'samtools'
190 |   angsd_glformat = 'binary'
191 |   angsd_createfasta = false
192 |   angsd_fastamethod = 'random'
193 |   run_bcftools_stats = true
194 | 
195 |   //Consensus sequence generation
196 |   run_vcf2genome = false
197 |   vcf2genome_outfile = ''
198 |   vcf2genome_header = ''
199 |   vcf2genome_minc = 5
200 |   vcf2genome_minq = 30
201 |   vcf2genome_minfreq = 0.8
202 | 
203 |   //MultiVCFAnalyzer Options
204 |   run_multivcfanalyzer = false
205 |   write_allele_frequencies = false
206 |   min_genotype_quality = 30
207 |   min_base_coverage = 5
208 |   min_allele_freq_hom = 0.9
209 |   min_allele_freq_het = 0.9
210 |   additional_vcf_files = null
211 |   reference_gff_annotations = 'NA'
212 |   reference_gff_exclude = 'NA'
213 |   snp_eff_results = 'NA'
214 | 
215 |   //mtnucratio
216 |   run_mtnucratio = false
217 |   mtnucratio_header = 'MT'
218 | 
219 |   //Sex.DetERRmine settings
220 |   run_sexdeterrmine = false
221 |   sexdeterrmine_bedfile = null
222 | 
223 |   //Nuclear contamination based on chromosome X heterozygosity.
224 |   run_nuclear_contamination = false
225 |   contamination_chrom_name = 'X' // Default to using hs37d5 name
226 | 
227 |   // taxonomic classifier
228 |   run_metagenomic_screening  = false
229 |   
230 |   metagenomic_complexity_filter = false
231 |   metagenomic_complexity_entropy = 0.3
232 | 
233 |   metagenomic_tool = null
234 |   database  = null
235 |   metagenomic_min_support_reads = 1
236 |   percent_identity = 85
237 |   malt_mode = 'BlastN'
238 |   malt_alignment_mode = 'SemiGlobal'
239 |   malt_top_percent = 1
240 |   malt_min_support_mode = 'percent'
241 |   malt_min_support_percent = 0.01
242 |   malt_max_queries = 100
243 |   malt_memory_mode = 'load'
244 |   malt_sam_output = false
245 | 
246 |   // maltextract - only including number 
247 |   // parameters if default documented or duplicate of MALT
248 |   run_maltextract = false
249 |   maltextract_taxon_list = null
250 |   maltextract_ncbifiles = null
251 |   maltextract_filter = 'def_anc'
252 |   maltextract_toppercent = 0.01
253 |   maltextract_destackingoff = false
254 |   maltextract_downsamplingoff = false
255 |   maltextract_duplicateremovaloff = false
256 |   maltextract_matches = false
257 |   maltextract_megansummary = false
258 |   maltextract_percentidentity = 85.0
259 |   maltextract_topalignment =  false
260 | 
261 |   // Boilerplate options
262 |   multiqc_config = false
263 |   email = false
264 |   email_on_fail = false
265 |   max_multiqc_email_size = 25.MB
266 |   plaintext_email = false
267 |   monochrome_logs = false
268 |   help = false
269 |   igenomes_base = 's3://ngi-igenomes/igenomes'
270 |   tracedir = "${params.outdir}/pipeline_info"
271 |   igenomes_ignore = true
272 |   custom_config_version = 'master'
273 |   custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
274 |   hostnames = false
275 |   config_profile_name = null
276 |   config_profile_description = false
277 |   config_profile_contact = false
278 |   config_profile_url = false
279 |   validate_params = true
280 |   show_hidden_params = false
281 |   schema_ignore_params = 'genomes,input_paths'
282 | 
283 |   // Defaults only, expecting to be overwritten
284 |   max_memory = 128.GB
285 |   max_cpus = 16
286 |   max_time = 240.h
287 | 
288 | }
289 | 
290 | // Container slug. Stable releases should specify release tag!
291 | // Developmental code should specify :dev
292 | process.container = 'nfcore/eager:2.5.3'
293 | 
294 | // Load base.config by default for all pipelines
295 | includeConfig 'conf/base.config'
296 | 
297 | // Load nf-core custom profiles from different Institutions
298 | try {
299 |   includeConfig "${params.custom_config_base}/nfcore_custom.config"
300 | } catch (Exception e) {
301 |   System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config")
302 | }
303 | 
304 | // Load nf-core/eager custom profiles from different institutions
305 | try {
306 |   includeConfig "${params.custom_config_base}/pipeline/eager.config"
307 | } catch (Exception e) {
308 |   System.err.println("WARNING: Could not load nf-core/config/eager profiles: ${params.custom_config_base}/pipeline/eager.config")
309 | }
310 | 
311 | profiles {
312 |   conda {
313 |     docker.enabled = false
314 |     singularity.enabled = false
315 |     podman.enabled = false
316 |     shifter.enabled = false
317 |     charliecloud.enabled = false
318 |     process.conda = "$projectDir/environment.yml"
319 |   }
320 |   debug { process.beforeScript = 'echo $HOSTNAME' }
321 |   docker {
322 |     docker.enabled = true
323 |     singularity.enabled = false
324 |     podman.enabled = false
325 |     shifter.enabled = false
326 |     charliecloud.enabled = false
327 |     // Avoid this error:
328 |     //   WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap.
329 |     // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351
330 |     // once this is established and works well, nextflow might implement this behavior as new default.
331 |     docker.runOptions = '-u \$(id -u):\$(id -g)'
332 |   }
333 |   singularity {
334 |     docker.enabled = false
335 |     singularity.enabled = true
336 |     podman.enabled = false
337 |     shifter.enabled = false
338 |     charliecloud.enabled = false
339 |     singularity.autoMounts = true
340 |   }
341 |   podman {
342 |     singularity.enabled = false
343 |     docker.enabled = false
344 |     podman.enabled = true
345 |     shifter.enabled = false
346 |     charliecloud.enabled = false
347 |   }
348 |   shifter {
349 |     singularity.enabled = false
350 |     docker.enabled = false
351 |     podman.enabled = false
352 |     shifter.enabled = true
353 |     charliecloud.enabled = false
354 |   }
355 |   charliecloud {
356 |     singularity.enabled = false
357 |     docker.enabled = false
358 |     podman.enabled = false
359 |     shifter.enabled = false
360 |     charliecloud.enabled = true
361 |   }
362 |   test { includeConfig 'conf/test.config'}
363 |   test_direct { includeConfig 'conf/test_direct.config' }
364 |   test_full { includeConfig 'conf/test_full.config' }
365 |   test_bam { includeConfig 'conf/test_bam.config'}
366 |   test_fna { includeConfig 'conf/test_fna.config'}
367 |   test_humanbam { includeConfig 'conf/test_humanbam.config' }
368 |   test_pretrim { includeConfig 'conf/test_pretrim.config' }
369 |   test_kraken { includeConfig 'conf/test_kraken.config' }
370 |   test_tsv_bam { includeConfig 'conf/test_tsv_bam.config'}
371 |   test_tsv_fna { includeConfig 'conf/test_tsv_fna.config'}
372 |   test_tsv_humanbam { includeConfig 'conf/test_tsv_humanbam.config' }
373 |   test_tsv_pretrim { includeConfig 'conf/test_tsv_pretrim.config' }
374 |   test_tsv_kraken { includeConfig 'conf/test_tsv_kraken.config' }
375 |   test_tsv_complex { includeConfig 'conf/test_tsv_complex.config' }
376 |   test_stresstest_human { includeConfig 'conf/test_stresstest_human.config' }
377 |   benchmarking_human { includeConfig 'conf/benchmarking_human.config' }
378 |   benchmarking_vikingfish { includeConfig 'conf/benchmarking_vikingfish.config' }
379 | }
380 | 
381 | 
382 | // Load igenomes.config if required
383 | if (!params.igenomes_ignore) {
384 |   includeConfig 'conf/igenomes.config'
385 | }
386 | 
387 | // Export these variables to prevent local Python/R libraries from conflicting with those in the container
388 | env {
389 |   PYTHONNOUSERSITE = 1
390 |   R_PROFILE_USER = "/.Rprofile"
391 |   R_ENVIRON_USER = "/.Renviron"
392 | }
393 | 
394 | // Capture exit codes from upstream processes when piping
395 | process.shell = ['/bin/bash', '-euo', 'pipefail']
396 | 
397 | def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
398 | timeline {
399 |   enabled = true
400 |   file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html"
401 | }
402 | report {
403 |   enabled = true
404 |   file = "${params.tracedir}/execution_report_${trace_timestamp}.html"
405 | }
406 | trace {
407 |   enabled = true
408 |   file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt"
409 | }
410 | dag {
411 |   enabled = true
412 |   file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg"
413 | }
414 | 
415 | manifest {
416 |   name = 'nf-core/eager'
417 |   author = 'The nf-core/eager community'
418 |   homePage = 'https://github.com/nf-core/eager'
419 |   description = 'A fully reproducible and state-of-the-art ancient DNA analysis pipeline'
420 |   mainScript = 'main.nf'
421 |   nextflowVersion = '>=20.07.1'
422 |   version = '2.5.3'
423 | }
424 | 
425 | // Function to ensure that resource requirements don't go beyond
426 | // a maximum limit
427 | def check_max(obj, type) {
428 |   if (type == 'memory') {
429 |     try {
430 |       if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
431 |         return params.max_memory as nextflow.util.MemoryUnit
432 |       else
433 |         return obj
434 |     } catch (all) {
435 |       println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
436 |       return obj
437 |     }
438 |   } else if (type == 'time') {
439 |     try {
440 |       if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
441 |         return params.max_time as nextflow.util.Duration
442 |       else
443 |         return obj
444 |     } catch (all) {
445 |       println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
446 |       return obj
447 |     }
448 |   } else if (type == 'cpus') {
449 |     try {
450 |       return Math.min( obj, params.max_cpus as int )
451 |     } catch (all) {
452 |       println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
453 |       return obj
454 |     }
455 |   }
456 | }


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: nf-core CI
  2 | # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - dev
  7 |   pull_request:
  8 |   release:
  9 |     types: [published]
 10 | 
 11 | # Uncomment if we need an edge release of Nextflow again
 12 | # env: NXF_EDGE: 1
 13 | 
 14 | jobs:
 15 |   test:
 16 |     name: Run workflow tests
 17 |     # Only run on push if this is the nf-core dev branch (merged PRs)
 18 |     if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/eager') }}
 19 |     runs-on: ubuntu-latest
 20 |     env:
 21 |       NXF_VER: ${{ matrix.nxf_ver }}
 22 |       NXF_ANSI_LOG: false
 23 |     strategy:
 24 |       matrix:
 25 |         # Nextflow versions: check pipeline minimum and current latest
 26 |         nxf_ver: ["20.07.1", "22.10.6"]
 27 |     steps:
 28 |       - name: Check out pipeline code
 29 |         uses: actions/checkout@v2
 30 |       - name: Install older Java
 31 |         uses: actions/setup-java@v4
 32 |         with:
 33 |           distribution: "temurin" # See 'Supported distributions' for available options
 34 |           java-version: "11"
 35 |       - name: Check if Dockerfile or Conda environment changed
 36 |         uses: technote-space/get-diff-action@v4
 37 |         with:
 38 |           FILES: |
 39 |             Dockerfile
 40 |             environment.yml
 41 | 
 42 |       - name: Build new docker image
 43 |         if: env.MATCHED_FILES
 44 |         run: docker build --no-cache . -t nfcore/eager:2.5.3
 45 | 
 46 |       - name: Pull docker image
 47 |         if: ${{ !env.MATCHED_FILES }}
 48 |         run: |
 49 |           docker pull nfcore/eager:dev
 50 |           docker tag nfcore/eager:dev nfcore/eager:2.5.3
 51 |       - name: Install Nextflow
 52 |         env:
 53 |           CAPSULE_LOG: none
 54 |         run: |
 55 |           wget -qO- https://github.com/nextflow-io/nextflow/releases/download/v22.10.6/nextflow | bash
 56 |           sudo mv nextflow /usr/local/bin/
 57 |       - name: HELPTEXT Run with the help flag
 58 |         run: |
 59 |           nextflow run ${GITHUB_WORKSPACE} --help
 60 |       - name: Get test data for cases where we don't use TSV input
 61 |         run: |
 62 |           git clone --single-branch --branch eager https://github.com/nf-core/test-datasets.git data
 63 |       - name: DELAY to try address some odd behaviour with what appears to be a conflict between parallel htslib jobs leading to CI hangs
 64 |         run: |
 65 |           if [[ $NXF_VER = '' ]]; then sleep 1200; fi
 66 |       - name: BASIC Run the basic pipeline with directly supplied single-end FASTQ
 67 |         run: |
 68 |           nextflow run ${GITHUB_WORKSPACE} -profile test_direct,docker --input 'data/testdata/Mammoth/fastq/*_R1_*.fq.gz' --single_end
 69 |       - name: BASIC Run the basic pipeline with directly supplied paired-end FASTQ
 70 |         run: |
 71 |           nextflow run ${GITHUB_WORKSPACE} -profile test_direct,docker --input 'data/testdata/Mammoth/fastq/*_{R1,R2}_*tengrand.fq.gz'
 72 |       - name: BASIC Run the basic pipeline with supplied --input BAM
 73 |         run: |
 74 |           nextflow run ${GITHUB_WORKSPACE} -profile test_direct,docker --input 'data/testdata/Mammoth/bam/*_R1_*.bam' --bam --single_end
 75 |       - name: BASIC Run the basic pipeline with the test profile with, PE/SE, bwa aln
 76 |         run: |
 77 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --save_reference
 78 |       - name: REFERENCE Basic workflow, with supplied indices
 79 |         run: |
 80 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --bwa_index 'results/reference_genome/bwa_index/BWAIndex/' --fasta_index 'https://github.com/nf-core/test-datasets/blob/eager/reference/Mammoth/Mammoth_MT_Krause.fasta.fai'
 81 |       - name: REFERENCE Run the basic pipeline with FastA reference with `fna` extension
 82 |         run: |
 83 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_fna,docker
 84 |       - name: REFERENCE Test with zipped reference input
 85 |         run: |
 86 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --fasta 'https://github.com/nf-core/test-datasets/raw/eager/reference/Mammoth/Mammoth_MT_Krause.fasta.gz'
 87 |       - name: FASTP Test fastp complexity filtering
 88 |         run: |
 89 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --complexity_filter_poly_g
 90 |       - name: ADAPTERREMOVAL Test skip paired end collapsing
 91 |         run: |
 92 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --skip_collapse
 93 |       - name: ADAPTERREMOVAL Test paired end collapsing but no trimming
 94 |         run: |
 95 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_pretrim,docker --skip_trim
 96 |       - name: ADAPTERREMOVAL Run the basic pipeline with paired end data without adapterRemoval
 97 |         run: |
 98 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --skip_adapterremoval
 99 |       - name: ADAPTERREMOVAL Run the basic pipeline with preserve5p end option
100 |         run: |
101 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --preserve5p
102 |       - name: ADAPTERREMOVAL Run the basic pipeline with merged only option
103 |         run: |
104 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --mergedonly
105 |       - name: ADAPTERREMOVAL Run the basic pipeline with preserve5p end and merged reads only options
106 |         run: |
107 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --preserve5p --mergedonly
108 |       - name: ADAPTER LIST Run the basic pipeline using an adapter list
109 |         run: |
110 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --clip_adapters_list 'https://github.com/nf-core/test-datasets/raw/eager/databases/adapters/adapter-list.txt'
111 |       - name: ADAPTER LIST Run the basic pipeline using an adapter list, skipping adapter removal
112 |         run: |
113 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --clip_adapters_list 'https://github.com/nf-core/test-datasets/raw/eager/databases/adapters/adapter-list.txt' --skip_adapterremoval
114 |       - name: POST_AR_FASTQ_TRIMMING Run the basic pipeline post-adapterremoval FASTQ trimming
115 |         run: |
116 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_post_ar_trimming
117 |       - name: POST_AR_FASTQ_TRIMMING Run the basic pipeline post-adapterremoval FASTQ trimming, but skip adapterremoval
118 |         run: |
119 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_post_ar_trimming --skip_adapterremoval
120 |       - name: MAPPER_CIRCULARMAPPER Test running with CircularMapper
121 |         run: |
122 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --mapper 'circularmapper' --circulartarget 'NC_007596.2'
123 |       - name: MAPPER_BWAMEM Test running with BWA Mem
124 |         run: |
125 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --mapper 'bwamem' --skip_collapse
126 |       - name: MAPPER_BT2 Test running with BowTie2
127 |         run: |
128 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --mapper 'bowtie2' --bt2_alignmode 'local' --bt2_sensitivity 'sensitive' --bt2n 1 --bt2l 16 --bt2_trim5 1 --bt2_trim3 1
129 |       - name: HOST_REMOVAL_FASTQ Run the basic pipeline with output unmapped reads as fastq
130 |         run: |
131 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --hostremoval_input_fastq
132 |       - name: BAM_FILTERING Run basic mapping pipeline with mapping quality filtering, and unmapped export
133 |         run: |
134 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_bam_filtering --bam_mapping_quality_threshold 37  --bam_unmapped_type 'fastq'
135 |       - name: BAM_FILTERING Run basic mapping pipeline with post-mapping length filtering
136 |         run: |
137 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --clip_readlength 0 --run_bam_filtering --bam_filter_minreadlength 50
138 |       - name: PRESEQ Run basic mapping pipeline with different preseq mode
139 |         run: |
140 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --preseq_mode 'lc_extrap' --preseq_maxextrap 10000 --preseq_bootstrap 10
141 |       - name: DEDUPLICATION Test with dedup
142 |         run: |
143 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --dedupper 'dedup' --dedup_all_merged
144 |       - name: BEDTOOLS Test bedtools feature annotation
145 |         run: |
146 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_bedtools_coverage --anno_file 'https://github.com/nf-core/test-datasets/raw/eager/reference/Mammoth/Mammoth_MT_Krause.gff3'
147 |       - name: MAPDAMAGE2 damage calculation
148 |         run: |
149 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --damage_calculation_tool 'mapdamage'
150 |       - name: GENOTYPING_HC Test running GATK HaplotypeCaller
151 |         run: |
152 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_fna,docker --run_genotyping --genotyping_tool 'hc' --gatk_hc_out_mode 'EMIT_ALL_ACTIVE_SITES' --gatk_hc_emitrefconf 'BP_RESOLUTION'
153 |       - name: GENOTYPING_FB Test running FreeBayes
154 |         run: |
155 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_genotyping --genotyping_tool 'freebayes'
156 |       - name: GENOTYPING_PC Test running pileupCaller
157 |         run: |
158 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --run_genotyping --genotyping_tool 'pileupcaller'
159 |       - name: GENOTYPING_ANGSD Test running ANGSD genotype likelihood calculation
160 |         run: |
161 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --run_genotyping --genotyping_tool 'angsd'
162 |       - name: GENOTYPING_BCFTOOLS Test running FreeBayes with bcftools stats turned on
163 |         run: |
164 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_genotyping --genotyping_tool 'freebayes' --run_bcftools_stats
165 |       - name: SKIPPING Test checking all skip steps work i.e. input bam, skipping straight to genotyping
166 |         run: |
167 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_genotyping --genotyping_tool 'freebayes'
168 |       - name: TRIMBAM Test bamutils works alone
169 |         run: |
170 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_trim_bam
171 |       - name: PMDTOOLS Test PMDtools works alone
172 |         run: |
173 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_pmdtools
174 |       - name: GENOTYPING_UG AND MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS
175 |         run: |
176 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_genotyping --genotyping_tool 'ug' --gatk_ug_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies
177 |       - name: COMPLEX LANE/LIBRARY MERGING Test running lane and library merging prior to GATK UnifiedGenotyper and running MultiVCFAnalyzer
178 |         run: |
179 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --genotyping_tool 'ug' --gatk_ug_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer
180 |       - name: GENOTYPING_UG ON TRIMMED BAM Test
181 |         run: |
182 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_ug_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP'
183 |       - name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam
184 |         run: |
185 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval
186 |       - name: BAM_INPUT Run the basic pipeline with the bam input profile, convert to FASTQ for adapterremoval test and downstream
187 |         run: |
188 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --run_convertinputbam
189 |       - name: METAGENOMIC Download MALT database
190 |         run: |
191 |           mkdir -p databases/malt
192 |           readlink -f databases/malt/
193 |           for i in index0.idx ref.db ref.idx ref.inf table0.db table0.idx taxonomy.idx taxonomy.map taxonomy.tre; do wget https://github.com/nf-core/test-datasets/raw/eager/databases/malt/"$i" -P databases/malt/; done
194 |       - name: METAGENOMIC Run the basic pipeline but with unmapped reads going into MALT
195 |         run: |
196 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_bam_filtering  --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --malt_sam_output
197 |       - name: METAGENOMIC Run the basic pipeline but low-complexity filtered reads going into MALT
198 |         run: |
199 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_bam_filtering  --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt/" --metagenomic_complexity_filter
200 |       - name: MALTEXTRACT Download resource files
201 |         run: |
202 |           mkdir -p databases/maltextract
203 |           for i in ncbi.tre ncbi.map; do wget https://github.com/rhuebler/HOPS/raw/0.33/Resources/"$i" -P databases/maltextract/; done
204 |       - name: MALTEXTRACT Basic with MALT plus MaltExtract
205 |         run: |
206 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_bam_filtering  --bam_unmapped_type 'fastq' --run_metagenomic_screening --metagenomic_tool 'malt' --database "/home/runner/work/eager/eager/databases/malt" --run_maltextract --maltextract_ncbifiles "/home/runner/work/eager/eager/databases/maltextract/" --maltextract_taxon_list 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/maltextract/MaltExtract_list.txt'
207 |       - name: METAGENOMIC Run the basic pipeline but with unmapped reads going into Kraken
208 |         run: |
209 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_kraken,docker --run_bam_filtering  --bam_unmapped_type 'fastq'
210 |       - name: SNPCAPTURE Run the basic pipeline with the bam input profile, generating statistics with a SNP capture bed
211 |         run: |
212 |           wget https://github.com/nf-core/test-datasets/raw/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz && gunzip 1240K.pos.list_hs37d5.0based.bed.gz
213 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --snpcapture_bed 1240K.pos.list_hs37d5.0based.bed
214 |       - name: SEXDETERMINATION Run the basic pipeline with the bam input profile, but don't convert BAM, skip everything but sex determination
215 |         run: |
216 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --run_sexdeterrmine
217 |       - name: NUCLEAR CONTAMINATION Run basic pipeline with bam input profile, but don't convert BAM, skip everything but nuclear contamination estimation
218 |         run: |
219 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --run_nuclear_contamination
220 |       - name: MTNUCRATIO Run basic pipeline with bam input profile, but don't convert BAM, skip everything but nmtnucratio
221 |         run: |
222 |           nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_humanbam,docker --skip_fastqc --skip_adapterremoval --skip_deduplication --skip_qualimap --skip_preseq --skip_damage_calculation --run_mtnucratio
223 |       - name: RESCALING Run basic pipeline with basic pipeline but with mapDamage rescaling of BAM files. Note this will be slow
224 |         run: |
225 |           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --run_mapdamage_rescaling --run_genotyping --genotyping_tool hc --genotyping_source 'rescaled'
226 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ![nf-core/eager](docs/images/nf-core_eager_logo_outline_drop.png)
  2 | 
  3 | **A fully reproducible and state-of-the-art ancient DNA analysis pipeline**.
  4 | 
  5 | [![GitHub Actions CI Status](https://github.com/nf-core/eager/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/eager/actions)
  6 | [![GitHub Actions Linting Status](https://github.com/nf-core/eager/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/eager/actions)
  7 | [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A520.07.1-brightgreen.svg)](https://www.nextflow.io/)
  8 | [![nf-core](https://img.shields.io/badge/nf--core-pipeline-brightgreen.svg)](https://nf-co.re/)
  9 | [![DOI](https://zenodo.org/badge/135918251.svg)](https://zenodo.org/badge/latestdoi/135918251)
 10 | [![Published in PeerJ](https://img.shields.io/badge/peerj-published-%2300B2FF)](https://peerj.com/articles/10947/)
 11 | 
 12 | [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](https://bioconda.github.io/)
 13 | [![Docker](https://img.shields.io/docker/automated/nfcore/eager.svg)](https://hub.docker.com/r/nfcore/eager)
 14 | ![Singularity Container available](https://img.shields.io/badge/singularity-available-7E4C74.svg)
 15 | 
 16 | [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23eager-4A154B?logo=slack)](https://nfcore.slack.com/channels/eager)
 17 | 
 18 | >[!IMPORTANT]  
 19 | > nf-core/eager versions 2.* are only compatible with Nextflow versions up to 22.10.6!
 20 | 
 21 | ## Introduction
 22 | 
 23 | <!-- nf-core: Write a 1-2 sentence summary of what data the pipeline is for and what it does -->
 24 | **nf-core/eager** is a scalable and reproducible bioinformatics best-practise processing pipeline for genomic NGS sequencing data, with a focus on ancient DNA (aDNA) data. It is ideal for the (palaeo)genomic analysis of humans, animals, plants, microbes and even microbiomes.
 25 | 
 26 | The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. The pipeline pre-processes raw data from FASTQ inputs, or preprocessed BAM inputs. It can align reads and performs extensive general NGS and aDNA specific quality-control on the results. It comes with docker, singularity or conda containers making installation trivial and results highly reproducible.
 27 | 
 28 | <p align="center">
 29 |     <img src="docs/images/usage/eager2_workflow.png" alt="nf-core/eager schematic workflow" width="70%"
 30 | </p>
 31 | 
 32 | ## Quick Start
 33 | 
 34 | 1. Install [`nextflow`](https://nf-co.re/usage/installation) (`>=20.07.1` && `<=22.10.6`)
 35 | 
 36 | 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_
 37 | 
 38 | 3. Download the pipeline and test it on a minimal dataset with a single command:
 39 | 
 40 |     ```bash
 41 |     nextflow run nf-core/eager -profile test,<docker/singularity/podman/shifter/charliecloud/conda/institute>
 42 |     ```
 43 | 
 44 |     > Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile <institute>` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment.
 45 | 
 46 | 4. Start running your own analysis!
 47 | 
 48 |     ```bash
 49 |     nextflow run nf-core/eager -profile <docker/singularity/podman/conda/institute> --input '*_R{1,2}.fastq.gz' --fasta '<your_reference>.fasta'
 50 |     ```
 51 | 
 52 | 5. Once your run has completed successfully, clean up the intermediate files.
 53 | 
 54 |     ```bash
 55 |     nextflow clean -f -k
 56 |     ```
 57 | 
 58 | See [usage docs](https://nf-co.re/eager/usage) for all of the available options when running the pipeline.
 59 | 
 60 | **N.B.** You can see an overview of the run in the MultiQC report located at `./results/MultiQC/multiqc_report.html`
 61 | 
 62 | Modifications to the default pipeline are easily made using various options as described in the documentation.
 63 | 
 64 | ## Pipeline Summary
 65 | 
 66 | ### Default Steps
 67 | 
 68 | By default the pipeline currently performs the following:
 69 | 
 70 | * Create reference genome indices for mapping (`bwa`, `samtools`, and `picard`)
 71 | * Sequencing quality control (`FastQC`)
 72 | * Sequencing adapter removal, paired-end data merging (`AdapterRemoval`)
 73 | * Read mapping to reference using (`bwa aln`, `bwa mem`, `CircularMapper`, or `bowtie2`)
 74 | * Post-mapping processing, statistics and conversion to bam (`samtools`)
 75 | * Ancient DNA C-to-T damage pattern visualisation (`DamageProfiler` or `mapDamage`)
 76 | * PCR duplicate removal (`DeDup` or `MarkDuplicates`)
 77 | * Post-mapping statistics and BAM quality control (`Qualimap`)
 78 | * Library Complexity Estimation (`preseq`)
 79 | * Overall pipeline statistics summaries (`MultiQC`)
 80 | 
 81 | ### Additional Steps
 82 | 
 83 | Additional functionality contained by the pipeline currently includes:
 84 | 
 85 | #### Input
 86 | 
 87 | * Automatic merging of complex sequencing setups (e.g. multiple lanes, sequencing configurations, library types)
 88 | 
 89 | #### Preprocessing
 90 | 
 91 | * Illumina two-coloured sequencer poly-G tail removal (`fastp`)
 92 | * Post-AdapterRemoval trimming of FASTQ files prior mapping (`fastp`)
 93 | * Automatic conversion of unmapped reads to FASTQ (`samtools`)
 94 | * Host DNA (mapped reads) stripping from input FASTQ files (for sensitive samples)
 95 | 
 96 | #### aDNA Damage manipulation
 97 | 
 98 | * Damage removal/clipping for UDG+/UDG-half treatment protocols (`BamUtil`)
 99 | * Damaged reads extraction and assessment (`PMDTools`)
100 | * Nuclear DNA contamination estimation of human samples (`angsd`)
101 | 
102 | #### Genotyping
103 | 
104 | * Creation of VCF genotyping files (`GATK UnifiedGenotyper`, `GATK HaplotypeCaller` and `FreeBayes`)
105 | * Creation of EIGENSTRAT genotyping files (`pileupCaller`)
106 | * Creation of Genotype Likelihood files (`angsd`)
107 | * Consensus sequence FASTA creation (`VCF2Genome`)
108 | * SNP Table generation (`MultiVCFAnalyzer`)
109 | 
110 | #### Biological Information
111 | 
112 | * Mitochondrial to Nuclear read ratio calculation (`MtNucRatioCalculator`)
113 | * Statistical sex determination of human individuals (`Sex.DetERRmine`)
114 | 
115 | #### Metagenomic Screening
116 | 
117 | * Low-sequenced complexity filtering (`BBduk`)
118 | * Taxonomic binner with alignment (`MALT`)
119 | * Taxonomic binner without alignment (`Kraken2`)
120 | * aDNA characteristic screening of taxonomically binned data from MALT (`MaltExtract`)
121 | 
122 | #### Functionality Overview
123 | 
124 | A graphical overview of suggested routes through the pipeline depending on context can be seen below.
125 | 
126 | <p align="center">
127 |     <img src="docs/images/usage/eager2_metromap_complex.png" alt="nf-core/eager metro map" width="70%"
128 | </p>
129 | 
130 | ## Documentation
131 | 
132 | The nf-core/eager pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/eager/usage) and [output](https://nf-co.re/eager/output).
133 | 
134 | 1. [Nextflow installation](https://nf-co.re/usage/installation)
135 | 2. Pipeline configuration
136 |     * [Pipeline installation](https://nf-co.re/usage/local_installation)
137 |     * [Adding your own system config](https://nf-co.re/usage/adding_own_config)
138 |     * [Reference genomes](https://nf-co.re/usage/reference_genomes)
139 | 3. [Running the pipeline](https://nf-co.re/eager/usage)
140 |    * This includes tutorials, FAQs, and troubleshooting instructions
141 | 4. [Output and how to interpret the results](https://nf-co.re/eager/output)
142 | 
143 | ## Credits
144 | 
145 | This pipeline was mostly written by Alexander Peltzer ([apeltzer](https://github.com/apeltzer)) and [James A. Fellows Yates](https://github.com/jfy133), with contributions from [Stephen Clayton](https://github.com/sc13-bioinf), [Thiseas C. Lamnidis](https://github.com/TCLamnidis), [Maxime Borry](https://github.com/maxibor), [Zandra Fagernäs](https://github.com/ZandraFagernas), [Aida Andrades Valtueña](https://github.com/aidaanva) and [Maxime Garcia](https://github.com/MaxUlysse) and the nf-core community.
146 | 
147 | We thank the following people for their extensive assistance in the development
148 | of this pipeline:
149 | 
150 | ## Authors (alphabetical)
151 | 
152 | * [Aida Andrades Valtueña](https://github.com/aidaanva)
153 | * [Alexander Peltzer](https://github.com/apeltzer)
154 | * [James A. Fellows Yates](https://github.com/jfy133)
155 | * [Judith Neukamm](https://github.com/JudithNeukamm)
156 | * [Maxime Borry](https://github.com/maxibor)
157 | * [Maxime Garcia](https://github.com/MaxUlysse)
158 | * [Stephen Clayton](https://github.com/sc13-bioinf)
159 | * [Thiseas C. Lamnidis](https://github.com/TCLamnidis)
160 | * [Zandra Fagernäs](https://github.com/ZandraFagernas)
161 | 
162 | ## Additional Contributors (alphabetical)
163 | 
164 | Those who have provided conceptual guidance, suggestions, bug reports etc.
165 | 
166 | * [Alex Hübner](https://github.com/alexhbnr)
167 | * [Alexandre Gilardet](https://github.com/alexandregilardet)
168 | * Arielle Munters
169 | * [Åshild Vågene](https://github.com/ashildv)
170 | * [Asmaa Ali](https://github.com/asmaa-a-abdelwahab)
171 | * [Charles Plessy](https://github.com/charles-plessy)
172 | * [Elina Salmela](https://github.com/esalmela)
173 | * [Fabian Lehmann](https://github.com/Lehmann-Fabian)
174 | * [He Yu](https://github.com/paulayu)
175 | * [Hester van Schalkwyk](https://github.com/hesterjvs)
176 | * [Ido Bar](https://github.com/IdoBar)
177 | * [Irina Velsko](https://github.com/ivelsko)
178 | * [Işın Altınkaya](https://github.com/isinaltinkaya)
179 | * [Johan Nylander](https://github.com/nylander)
180 | * [Jonas Niemann](https://github.com/NiemannJ)
181 | * [Katerine Eaton](https://github.com/ktmeaton)
182 | * [Kathrin Nägele](https://github.com/KathrinNaegele)
183 | * [Kevin Lord](https://github.com/lordkev)
184 | * [Laura Lacher](https://github.com/neija2611)
185 | * [Luc Venturini](https://github.com/lucventurini)
186 | * [Mahesh Binzer-Panchal](https://github.com/mahesh-panchal)
187 | * [Marcel Keller](https://github.com/marcel-keller)
188 | * [Megan Michel](https://github.com/meganemichel)
189 | * [Pierre Lindenbaum](https://github.com/lindenb)
190 | * [Pontus Skoglund](https://github.com/pontussk)
191 | * [Raphael Eisenhofer](https://github.com/EisenRa)
192 | * [Roberta Davidson](https://github.com/roberta-davidson)
193 | * [Rodrigo Barquera](https://github.com/RodrigoBarquera)
194 | * [Selina Carlhoff](https://github.com/scarlhoff)
195 | * [Torsten Günter](https://bitbucket.org/tguenther)
196 | 
197 | If you've contributed and you're missing in here, please let us know and we will add you in of course!
198 | 
199 | ## Contributions and Support
200 | 
201 | If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).
202 | 
203 | For further information or help, don't hesitate to get in touch on the [Slack `#eager` channel](https://nfcore.slack.com/channels/eager) (you can join with [this invite](https://nf-co.re/join/slack)).
204 | 
205 | ## Citations
206 | 
207 | If you use `nf-core/eager` for your analysis, please cite the `eager` preprint as follows:
208 | 
209 | > Fellows Yates JA, Lamnidis TC, Borry M, Valtueña Andrades A, Fagernäs Z, Clayton S, Garcia MU, Neukamm J, Peltzer A. 2021. Reproducible, portable, and efficient ancient genome reconstruction with nf-core/eager. PeerJ 9:e10947. DOI: [10.7717/peerj.10947](https://doi.org/10.7717/peerj.10947).
210 | 
211 | You can cite the eager zenodo record for a specific version using the following [doi: 10.5281/zenodo.3698082](https://zenodo.org/badge/latestdoi/135918251)
212 | 
213 | You can cite the `nf-core` publication as follows:
214 | 
215 | > **The nf-core framework for community-curated bioinformatics pipelines.**
216 | >
217 | > Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.
218 | >
219 | > _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).
220 | 
221 | In addition, references of tools and data used in this pipeline are as follows:
222 | 
223 | * **EAGER v1**, CircularMapper, DeDup* Peltzer, A., Jäger, G., Herbig, A., Seitz, A., Kniep, C., Krause, J., & Nieselt, K. (2016). EAGER: efficient ancient genome reconstruction. Genome Biology, 17(1), 1–14. [https://doi.org/10.1186/s13059-016-0918-z](https://doi.org/10.1186/s13059-016-0918-z).  Download: [https://github.com/apeltzer/EAGER-GUI](https://github.com/apeltzer/EAGER-GUI) and [https://github.com/apeltzer/EAGER-CLI](https://github.com/apeltzer/EAGER-CLI)
224 | * **FastQC** Download: [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
225 | * **AdapterRemoval v2** Schubert, M., Lindgreen, S., & Orlando, L. (2016). AdapterRemoval v2: rapid adapter trimming, identification, and read merging. BMC Research Notes, 9, 88. [https://doi.org/10.1186/s13104-016-1900-2](https://doi.org/10.1186/s13104-016-1900-2). Download: [https://github.com/MikkelSchubert/adapterremoval](https://github.com/MikkelSchubert/adapterremoval)
226 | * **bwa** Li, H., & Durbin, R. (2009). Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics , 25(14), 1754–1760. [https://doi.org/10.1093/bioinformatics/btp324](https://doi.org/10.1093/bioinformatics/btp324). Download: [http://bio-bwa.sourceforge.net/bwa.shtml](http://bio-bwa.sourceforge.net/bwa.shtml)
227 | * **SAMtools** Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., … 1000 Genome Project Data Processing Subgroup. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics , 25(16), 2078–2079. [https://doi.org/10.1093/bioinformatics/btp352](https://doi.org/10.1093/bioinformatics/btp352). Download: [http://www.htslib.org/](http://www.htslib.org/)
228 | * **DamageProfiler** Neukamm, J., Peltzer, A., & Nieselt, K. (2020). DamageProfiler: Fast damage pattern calculation for ancient DNA. In Bioinformatics (btab190). [https://doi.org/10.1093/bioinformatics/btab190](https://doi.org/10.1093/bioinformatics/btab190). Download: [https://github.com/Integrative-Transcriptomics/DamageProfiler](https://github.com/Integrative-Transcriptomics/DamageProfiler)
229 | * **QualiMap** Okonechnikov, K., Conesa, A., & García-Alcalde, F. (2016). Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics , 32(2), 292–294. [https://doi.org/10.1093/bioinformatics/btv566](https://doi.org/10.1093/bioinformatics/btv566). Download: [http://qualimap.bioinfo.cipf.es/](http://qualimap.bioinfo.cipf.es/)
230 | * **preseq** Daley, T., & Smith, A. D. (2013). Predicting the molecular complexity of sequencing libraries. Nature Methods, 10(4), 325–327. [https://doi.org/10.1038/nmeth.2375](https://doi.org/10.1038/nmeth.2375). Download: [http://smithlabresearch.org/software/preseq/](http://smithlabresearch.org/software/preseq/)
231 | * **PMDTools** Skoglund, P., Northoff, B. H., Shunkov, M. V., Derevianko, A. P., Pääbo, S., Krause, J., & Jakobsson, M. (2014). Separating endogenous ancient DNA from modern day contamination in a Siberian Neandertal. Proceedings of the National Academy of Sciences of the United States of America, 111(6), 2229–2234. [https://doi.org/10.1073/pnas.1318934111](https://doi.org/10.1073/pnas.1318934111). Download: [https://github.com/pontussk/PMDtools](https://github.com/pontussk/PMDtools)
232 | * **MultiQC** Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. [https://doi.org/10.1093/bioinformatics/btw354](https://doi.org/10.1093/bioinformatics/btw354). Download: [https://multiqc.info/](https://multiqc.info/)
233 | * **BamUtils** Jun, G., Wing, M. K., Abecasis, G. R., & Kang, H. M. (2015). An efficient and scalable analysis framework for variant extraction and refinement from population-scale DNA sequence data. Genome Research, 25(6), 918–925. [https://doi.org/10.1101/gr.176552.114](https://doi.org/10.1101/gr.176552.114). Download: [https://genome.sph.umich.edu/wiki/BamUtil](https://genome.sph.umich.edu/wiki/BamUtil)
234 | * **FastP** Chen, S., Zhou, Y., Chen, Y., & Gu, J. (2018). fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics , 34(17), i884–i890. [https://doi.org/10.1093/bioinformatics/bty560](https://doi.org/10.1093/bioinformatics/bty560). Download: [https://github.com/OpenGene/fastp](https://github.com/OpenGene/fastp)
235 | * **GATK 3.5** DePristo, M. A., Banks, E., Poplin, R., Garimella, K. V., Maguire, J. R., Hartl, C., … Daly, M. J. (2011). A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nature Genetics, 43(5), 491–498. [https://doi.org/10.1038/ng.806](https://doi.org/10.1038/ng.806.).Download: [https://console.cloud.google.com/storage/browser/gatk](https://console.cloud.google.com/storage/browser/gatk)
236 | * **GATK 4.X** - no citation available yet. Download: [https://github.com/broadinstitute/gatk/releases](https://github.com/broadinstitute/gatk/releases)
237 | * **VCF2Genome** - Alexander Herbig and Alex Peltzer (unpublished). Download: [https://github.com/apeltzer/VCF2Genome](https://github.com/apeltzer/VCF2Genome)
238 | * **MultiVCFAnalyzer** Bos, K.I. et al., 2014. Pre-Columbian mycobacterial genomes reveal seals as a source of New World human tuberculosis. Nature, 514(7523), pp.494–497. Available at: [http://dx.doi.org/10.1038/nature13591](http://dx.doi.org/10.1038/nature13591). Download: [https://github.com/alexherbig/MultiVCFAnalyzer](https://github.com/alexherbig/MultiVCFAnalyzer)
239 | * **MTNucRatioCalculator** Alex Peltzter (Unpublished). Download: [https://github.com/apeltzer/MTNucRatioCalculator](https://github.com/apeltzer/MTNucRatioCalculator)
240 | * **Sex.DetERRmine.py** Lamnidis, T.C. et al., 2018. Ancient Fennoscandian genomes reveal origin and spread of Siberian ancestry in Europe. Nature communications, 9(1), p.5018. Available at: [http://dx.doi.org/10.1038/s41467-018-07483-5](http://dx.doi.org/10.1038/s41467-018-07483-5). Download: [https://github.com/TCLamnidis/Sex.DetERRmine.git](https://github.com/TCLamnidis/Sex.DetERRmine.git)
241 | * **ANGSD** Korneliussen, T.S., Albrechtsen, A. & Nielsen, R., 2014. ANGSD: Analysis of Next Generation Sequencing Data. BMC bioinformatics, 15, p.356. Available at: [http://dx.doi.org/10.1186/s12859-014-0356-4](http://dx.doi.org/10.1186/s12859-014-0356-4). Download: [https://github.com/ANGSD/angsd](https://github.com/ANGSD/angsd)
242 | * **bedtools** Quinlan, A.R. & Hall, I.M., 2010. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics , 26(6), pp.841–842. Available at: [http://dx.doi.org/10.1093/bioinformatics/btq033](http://dx.doi.org/10.1093/bioinformatics/btq033). Download: [https://github.com/arq5x/bedtools2/releases](https://github.com/arq5x/bedtools2/)
243 | * **MALT**. Download: [https://software-ab.informatik.uni-tuebingen.de/download/malt/welcome.html](https://software-ab.informatik.uni-tuebingen.de/download/malt/welcome.html)
244 |   * Vågene, Å.J. et al., 2018. Salmonella enterica genomes from victims of a major sixteenth-century epidemic in Mexico. Nature ecology & evolution, 2(3), pp.520–528. Available at: [http://dx.doi.org/10.1038/s41559-017-0446-6](http://dx.doi.org/10.1038/s41559-017-0446-6).
245 |   * Herbig, A. et al., 2016. MALT: Fast alignment and analysis of metagenomic DNA sequence data applied to the Tyrolean Iceman. bioRxiv, p.050559. Available at: [http://biorxiv.org/content/early/2016/04/27/050559](http://biorxiv.org/content/early/2016/04/27/050559).
246 | * **MaltExtract** Huebler, R. et al., 2019. HOPS: Automated detection and authentication of pathogen DNA in archaeological remains. bioRxiv, p.534198. Available at: [https://www.biorxiv.org/content/10.1101/534198v1?rss=1](https://www.biorxiv.org/content/10.1101/534198v1?rss=1). Download: [https://github.com/rhuebler/MaltExtract](https://github.com/rhuebler/MaltExtract)
247 | * **Kraken2** Wood, D et al., 2019. Improved metagenomic analysis with Kraken 2. Genome Biology volume 20, Article number: 257. Available at: [https://doi.org/10.1186/s13059-019-1891-0](https://doi.org/10.1186/s13059-019-1891-0). Download: [https://ccb.jhu.edu/software/kraken2/](https://ccb.jhu.edu/software/kraken2/)
248 | * **endorS.py** Aida Andrades Valtueña (Unpublished). Download: [https://github.com/aidaanva/endorS.py](https://github.com/aidaanva/endorS.py)
249 | * **Bowtie2**  Langmead, B. and Salzberg, S. L. 2012 Fast gapped-read alignment with Bowtie 2. Nature methods, 9(4), p. 357–359. doi: [10.1038/nmeth.1923](https:/dx.doi.org/10.1038/nmeth.1923).
250 | * **sequenceTools** Stephan Schiffels (Unpublished). Download: [https://github.com/stschiff/sequenceTools](https://github.com/stschiff/sequenceTools)
251 | * **EigenstratDatabaseTools** Thiseas C. Lamnidis (Unpublished). Download: [https://github.com/TCLamnidis/EigenStratDatabaseTools.git](https://github.com/TCLamnidis/EigenStratDatabaseTools.git)
252 | * **mapDamage** Jónsson, H., et al 2013. mapDamage2.0: fast approximate Bayesian estimates of ancient DNA damage parameters. Bioinformatics , 29(13), 1682–1684. [https://doi.org/10.1093/bioinformatics/btt193](https://doi.org/10.1093/bioinformatics/btt193)
253 | * **BBduk** Brian Bushnell (Unpublished). Download: [https://sourceforge.net/projects/bbmap/](sourceforge.net/projects/bbmap/)
254 | 
255 | ## Data References
256 | 
257 | This repository uses test data from the following studies:
258 | 
259 | * Fellows Yates, J. A. et al. (2017) ‘Central European Woolly Mammoth Population Dynamics: Insights from Late Pleistocene Mitochondrial Genomes’, Scientific reports, 7(1), p. 17714. [doi: 10.1038/s41598-017-17723-1](https://doi.org/10.1038/s41598-017-17723-1).
260 | * Gamba, C. et al. (2014) ‘Genome flux and stasis in a five millennium transect of European prehistory’, Nature communications, 5, p. 5257. [doi: 10.1038/ncomms6257](https://doi.org/10.1038/ncomms6257).
261 | * Star, B. et al. (2017) ‘Ancient DNA reveals the Arctic origin of Viking Age cod from Haithabu, Germany’, Proceedings of the National Academy of Sciences of the United States of America, 114(34), pp. 9152–9157. [doi: 10.1073/pnas.1710186114](https://doi.org/10.1073/pnas.1710186114).
262 | * de Barros Damgaard, P. et al. (2018). '137 ancient human genomes from across the Eurasian steppes.', Nature, 557(7705), 369–374. [doi: 10.1038/s41586-018-0094-2](https://doi.org/10.1038/s41586-018-0094-2)
263 | 


--------------------------------------------------------------------------------