├── .readthedocs.yml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── CONTRIBUTORS.md
├── LICENSE.md
├── README.rst
├── VERSION
├── assets
    └── images
    │   ├── bbknn.svg
    │   ├── bbknn_scenic.svg
    │   ├── decontx.svg
    │   ├── harmony.svg
    │   ├── harmony_scenic.svg
    │   ├── mnncorrect.svg
    │   ├── scenic.svg
    │   ├── scenic_multiruns.svg
    │   ├── single_sample.svg
    │   ├── single_sample_decontx.svg
    │   ├── single_sample_decontx_scrublet.svg
    │   ├── single_sample_scenic.svg
    │   └── single_sample_scrublet.svg
├── conf
    ├── atac
    │   ├── preprocess.config
    │   ├── preprocess_rapid.config
    │   └── qc_filtering.config
    ├── compute_resources.config
    ├── compute_resources_with_retry.config
    ├── docker.config
    ├── generic.config
    ├── genomes
    │   ├── dm6.config
    │   ├── hg19.config
    │   ├── hg38.config
    │   └── mm10.config
    ├── global.config
    ├── logger.config
    ├── min.config
    ├── nemesh.config
    ├── singularity.config
    ├── test.config
    ├── test__bbknn.config
    ├── test__bbknn_scenic.config
    ├── test__cell_annotate_filter.config
    ├── test__compute_resources.config
    ├── test__decontx.config
    ├── test__harmony.config
    ├── test__harmony_scenic.config
    ├── test__mnncorrect.config
    ├── test__scenic.config
    ├── test__scenic_multiruns.config
    ├── test__single_sample.config
    ├── test__single_sample_decontx_correct.config
    ├── test__single_sample_decontx_correct_scrublet.config
    ├── test__single_sample_decontx_filter.config
    ├── test__single_sample_param_exploration.config
    ├── test__single_sample_scenic.config
    ├── test__single_sample_scenic_multiruns.config
    ├── test__single_sample_scrublet.config
    ├── test_disabled.config
    ├── vpcx.config
    └── vsc.config
├── data
    ├── 10x
    │   └── 1k_pbmc
    │   │   └── metadata.tsv
    ├── README.md
    └── sample_data_tiny
    │   └── sample_data_tiny_dummy_annotation.tsv.gz
├── docs
    ├── Makefile
    ├── attributions.rst
    ├── case-studies.rst
    ├── conf.py
    ├── development.rst
    ├── features.rst
    ├── getting-started.rst
    ├── index.rst
    ├── input_formats.rst
    ├── pipelines.rst
    ├── scatac-seq.rst
    └── scatac-seq_qc.rst
├── main.nf
├── main_atac.nf
├── nextflow.config
├── samtools_markdup.sh
├── src
    ├── barcard
    │   ├── barcard.config
    │   ├── bin
    │   │   ├── .ipynb_checkpoints
    │   │   │   ├── barcard_otsu_filtering-Copy1-checkpoint.ipynb
    │   │   │   └── barcard_otsu_filtering-checkpoint.ipynb
    │   │   ├── barcard_otsu_filtering-Copy1.ipynb
    │   │   ├── barcard_otsu_filtering.ipynb
    │   │   └── barcard_otsu_filtering_test.ipynb
    │   ├── conf
    │   │   └── barcard_barcode_multiplet.config
    │   ├── main.nf
    │   └── processes
    │   │   ├── create_fragments_from_bam.nf
    │   │   ├── detect_barcode_multiplets.nf
    │   │   ├── detect_barcode_multiplets.nf_new
    │   │   ├── detect_barcode_multiplets.nf_old
    │   │   ├── merge_barcode_multiplets.nf
    │   │   └── report.nf
    ├── bwamaptools
    │   ├── .gitattributes
    │   ├── .gitignore
    │   ├── Dockerfile
    │   ├── LICENSE
    │   ├── README.rst
    │   ├── bin
    │   │   ├── .gitkeep
    │   │   └── mapping_summary.sh
    │   ├── bwamaptools.config
    │   ├── conf
    │   │   ├── .gitkeep
    │   │   └── bwa_mapping.config
    │   ├── main.nf
    │   ├── processes
    │   │   ├── .gitkeep
    │   │   ├── index.nf
    │   │   ├── mapping.nf
    │   │   └── mapping_summary.nf
    │   └── workflows
    │   │   └── .gitkeep
    ├── channels
    │   ├── channels.nf
    │   ├── conf
    │   │   ├── bam.config
    │   │   ├── csv.config
    │   │   ├── fragments.config
    │   │   ├── h5ad.config
    │   │   ├── loom.config
    │   │   ├── seurat_rds.config
    │   │   ├── sra.config
    │   │   ├── tenx_arc_cellranger_mex.config
    │   │   ├── tenx_atac_cellranger_mex.config
    │   │   ├── tenx_cellranger_h5.config
    │   │   ├── tenx_cellranger_mex.config
    │   │   └── tsv.config
    │   ├── file.nf
    │   ├── singleend.nf
    │   ├── sra.nf
    │   └── tenx.nf
    ├── edirect
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── edirect.config
    │   ├── processes
    │   │   └── sra_metadata.nf
    │   └── workflows
    │   │   └── sra_fastq_urls.nf
    ├── popscle
    │   ├── .gitattributes
    │   ├── .gitignore
    │   ├── Dockerfile
    │   ├── LICENSE
    │   ├── README.rst
    │   ├── conf
    │   │   └── .gitkeep
    │   ├── main.nf
    │   ├── popscle.config
    │   ├── processes
    │   │   ├── demuxlet.nf
    │   │   └── dsc_pileup.nf
    │   └── workflows
    │   │   ├── demuxlet.nf
    │   │   └── dsc_pileup.nf
    ├── pycistopic
    │   ├── .gitattributes
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── README.rst
    │   ├── bin
    │   │   ├── .gitkeep
    │   │   ├── barcode_level_statistics.py
    │   │   ├── biomart_annot.py
    │   │   ├── call_cells.py
    │   │   ├── compute_qc_stats.py
    │   │   ├── plot_qc_stats.py
    │   │   ├── pycisTopic_qc_report_template.ipynb
    │   │   └── pycisTopic_qc_report_template.ipynb2
    │   ├── conf
    │   │   ├── .gitkeep
    │   │   ├── pycistopic_dmel.config
    │   │   ├── pycistopic_hg38.config
    │   │   └── pycistopic_mm10.config
    │   ├── processes
    │   │   ├── .gitkeep
    │   │   ├── barcode_level_statistics.nf
    │   │   ├── biomart_annot.nf
    │   │   ├── call_cells.nf
    │   │   ├── compute_qc_stats.nf
    │   │   ├── macs2_call_peaks.nf
    │   │   └── plot_qc_stats.nf
    │   ├── pycistopic.config
    │   └── workflows
    │   │   └── .gitkeep
    ├── samtools
    │   ├── Dockerfile
    │   ├── Dockerfile.samtools-base
    │   ├── README.rst
    │   ├── processes
    │   │   ├── merge_bam.nf
    │   │   └── sort_bam.nf
    │   └── samtools.config
    ├── singlecelltoolkit
    │   ├── .gitattributes
    │   ├── .gitignore
    │   ├── Dockerfile
    │   ├── LICENSE
    │   ├── README.rst
    │   ├── bin
    │   │   └── .gitkeep
    │   ├── conf
    │   │   ├── .gitkeep
    │   │   ├── sctk_mapping.config
    │   │   └── sctk_saturation.config
    │   ├── main.nf
    │   ├── processes
    │   │   ├── barcode_10x_scatac_fastqs.nf
    │   │   ├── barcode_correction.nf
    │   │   ├── extract_and_correct_biorad_barcode.nf
    │   │   ├── extract_hydrop_atac_barcode.nf
    │   │   ├── fix_and_compress_fastqs.nf
    │   │   └── saturation.nf
    │   ├── singlecelltoolkit.config
    │   └── workflows
    │   │   └── .gitkeep
    ├── trimgalore
    │   ├── .gitattributes
    │   ├── .gitignore
    │   ├── Dockerfile
    │   ├── LICENSE
    │   ├── README.rst
    │   ├── bin
    │   │   └── .gitkeep
    │   ├── conf
    │   │   └── .gitkeep
    │   ├── processes
    │   │   ├── .gitkeep
    │   │   └── trim.nf
    │   ├── trimgalore.config
    │   └── workflows
    │   │   └── .gitkeep
    └── utils
    │   ├── Dockerfile
    │   ├── README.md
    │   ├── bin
    │       ├── create_cistopic_object.R
    │       ├── h5ad_to_filtered_loom.py
    │       ├── h5ad_to_loom.py
    │       ├── reports
    │       │   └── workflow_configuration_template.ipynb
    │       ├── sc_file_concatenator.py
    │       ├── sc_file_converter.R
    │       ├── sc_file_converter.py
    │       ├── sc_h5ad_annotate_by_cell_metadata.py
    │       ├── sc_h5ad_annotate_by_sample_metadata.py
    │       ├── sc_h5ad_apply_obs_filter.py
    │       ├── sc_h5ad_extract_metadata.py
    │       ├── sc_h5ad_merge.py
    │       ├── sc_h5ad_prepare_obs_filter.py
    │       ├── sc_h5ad_update.py
    │       ├── sc_h5ad_update_metadata.py
    │       ├── sc_star_concatenator.py
    │       └── sra_to_metadata.py
    │   ├── conf
    │       ├── base.config
    │       ├── cell_annotate.config
    │       ├── cell_filter.config
    │       ├── h5ad_clean.config
    │       ├── h5ad_concatenate.config
    │       ├── h5ad_extract_metadata.config
    │       ├── h5ad_update_metadata.config
    │       ├── sample_annotate.config
    │       ├── sample_annotate_old_v1.config
    │       ├── scope.config
    │       ├── sra_metadata.config
    │       ├── sra_metadata.test.config
    │       ├── sra_normalize_fastqs.config
    │       ├── star_concatenate.config
    │       ├── test.config
    │       ├── update_feature_nomenclature.config
    │       └── workflow_report.config
    │   ├── main.test.nf
    │   ├── processes
    │       ├── .ipynb_checkpoints
    │       │   └── config-checkpoint.nf
    │       ├── config.nf
    │       ├── files.nf
    │       ├── gtf.nf
    │       ├── h5adAnnotate.nf
    │       ├── h5adExtractMetadata.nf
    │       ├── h5adMerge.nf
    │       ├── h5adSubset.nf
    │       ├── h5adToLoom.nf
    │       ├── h5adUpdate.nf
    │       ├── h5adUpdateMetadata.nf
    │       ├── reports.nf
    │       ├── sra.nf
    │       └── utils.nf
    │   ├── utils.config
    │   └── workflows
    │       ├── annotateByCellMetadata.nf
    │       ├── downloadFromSRA.nf
    │       ├── fileConverter.nf
    │       ├── filterAnnotateClean.nf
    │       ├── filterByCellMetadata.nf
    │       ├── finalize.nf
    │       ├── updateFeatureNomenclature.nf
    │       └── utils.nf
└── workflows
    ├── atac
        ├── .ipynb_checkpoints
        │   └── preprocess_rapid-checkpoint.nf
        ├── preprocess.nf
        ├── preprocess_rapid.nf
        └── qc_filtering.nf
    ├── bbknn.nf
    ├── harmony.nf
    ├── mnncorrect.nf
    ├── multi_sample.nf
    ├── nemesh.nf
    ├── single_sample.nf
    ├── single_sample_star.nf
    └── star.nf


/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | sphinx:
 4 |     configuration: docs/conf.py
 5 | 
 6 | formats: all
 7 | 
 8 | submodules:
 9 |     exclude: all
10 | 
11 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at kristofer.davie@kuleuven.vib.be. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | When contributing to this repository, please first discuss the change you wish to make via issue,
 4 | [email](mailto:vib.singlecell.nf@gmail.com), or any other method (i.e. [Gitter](https://gitter.im/vib-singlecell-nf/community)) with the owners of this repository before making a change. 
 5 | 
 6 | Please note we have a code of conduct, please follow it in all your interactions with the project.
 7 | 
 8 | ## Pull Request Process
 9 | 
10 | All in-development pull requests must be submitted to the `develop` branch. Only the `develop`
11 | branch can be merged into the `master` branch, this will be done when sufficient changes are in
12 | place to increase the version of the pipeline and will be performed at the discetion of the lead
13 | developers.
14 | 
15 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a 
16 | build.
17 | 2. Update the README.md with details of changes to the interface, this includes new environment
18 | variables, exposed ports, useful file locations and container parameters.
19 | 3. Increase the version numbers in any examples files and the README.md to the new version that 
20 | this Pull Request would represent. The versioning scheme we use is [SemVer](http://semver.org/).
21 | 4. Ensure that all current and new tests pass successfully.
22 | 5. The Pull Request can be merged once you have the sign-off of two other developers, and must 
23 | be merged by one of the lead developers.
24 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
 1 | # Contributors
 2 | 
 3 | `vib-singlecell-nf` is the result of a collabration between the following groups:
 4 | 
 5 | 1) Contributed to the development of the pipelines and modules
 6 |     - [Stein Aerts Lab (VIB-KULeuven)](https://www.aertslab.org/)
 7 |         - Gert Hulselmans - Lead Developer (scATAC pipeline)
 8 |         - Florian De Rop - Developer/Testing (scATAC pipeline)
 9 |         - Chris Flerin - Former Lead Developer (scATAC pipeline)
10 | 
11 | 2) Provided input, expert advice, testing, benchmarking and fruitful discussions
12 |     - [Single Cell Bioinformatics Expertise Unit (CBD VIB)](https://cbd.vib.be/research/expertise-units/bioinformatics/)
13 |         - Kris Davie - Lead Developer
14 | 
15 | For a full breakdown of code contributions for each repository, see GitHub.
16 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | PUMATAC
 2 | ==============
 3 | Pipeline for Universal Mapping of ATAC-seq
 4 | 
 5 | |PUMATAC| |ReadTheDocs| |Zenodo| |Gitter| |Nextflow|
 6 | 
 7 | A detailed, step-by-step tutorial with examples is available `here <https://github.com/aertslab/PUMATAC_tutorial>`_.
 8 | 
 9 | If PUMATAC is useful for your research, consider citing:
10 | 
11 | - PUMATAC All Versions (latest): `10.5281/zenodo.7764892 <https://doi.org/10.5281/zenodo.7764884>`_.
12 | - Our Nature Biotechnology article: `10.1038/s41587-023-01881-x <https://doi.org/10.1038/s41587-023-01881-x>`_.
13 | 
14 | Currently, a preprocesing workflow is available, which will take fastq inputs, apply barcode correction, read trimming, bwa mapping, and output bam and fragments files for further downstream analysis.
15 | 
16 | .. |VSN-Pipelines| image:: https://img.shields.io/github/v/release/vib-singlecell-nf/vsn-pipelines
17 |     :target: https://github.com/vib-singlecell-nf/vsn-pipelines/releases
18 |     :alt: GitHub release (latest by date)
19 | 
20 | .. |PUMATAC| image:: https://img.shields.io/github/v/release/vib-singlecell-nf/vsn-pipelines
21 |     :target: https://github.com/aertslab/ATACflow/releases
22 |     :alt: GitHub release (latest by date)
23 | 
24 | .. |ReadTheDocs| image:: https://readthedocs.org/projects/vsn-pipelines/badge/?version=latest
25 |     :target: https://vsn-pipelines.readthedocs.io/en/latest/?badge=latest
26 |     :alt: Documentation Status
27 | 
28 | .. |Nextflow| image:: https://img.shields.io/badge/nextflow-21.04.3-brightgreen.svg
29 |     :target: https://www.nextflow.io/
30 |     :alt: Nextflow
31 | 
32 | .. |Gitter| image:: https://badges.gitter.im/vib-singlecell-nf/community.svg
33 |     :target: https://gitter.im/vib-singlecell-nf/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge
34 |     :alt: Gitter
35 | 
36 | .. |Zenodo| image:: https://zenodo.org/badge/199477571.svg
37 |     :target: https://doi.org/10.5281/zenodo.7764884
38 |     :alt: Zenodo
39 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.27.0


--------------------------------------------------------------------------------
/conf/atac/preprocess.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     atac_preprocess_tools {
 3 |         mark_duplicates_method = 'MarkDuplicates'
 4 |         adapter_trimming_method = 'Trim_Galore'
 5 |     }
 6 |     data {
 7 |         atac_preprocess {
 8 |             metadata = 'metadata.tsv'
 9 |         }
10 |     }
11 | }
12 | 
13 | includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config'
14 | includeConfig './../../src/singlecelltoolkit/conf/sctk_mapping.config'
15 | includeConfig './../../src/trimgalore/trimgalore.config'
16 | includeConfig './../../src/fastp/fastp.config'
17 | includeConfig './../../src/bwamaptools/bwamaptools.config'
18 | includeConfig './../../src/gatk/gatk.config'
19 | includeConfig './../../src/bwamaptools/conf/bwa_mapping.config'
20 | includeConfig './../../src/sinto/sinto.config'
21 | includeConfig './../../src/bap/bap.config'
22 | //includeConfig './../../src/bap/conf/bap_biorad_debarcode.config'
23 | 
24 | 


--------------------------------------------------------------------------------
/conf/atac/preprocess_rapid.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     atac_preprocess_tools {
 3 |         mark_duplicates_method = 'MarkDuplicates'
 4 |         adapter_trimming_method = 'Trim_Galore'
 5 |     }
 6 |     data {
 7 |         atac_preprocess {
 8 |             metadata = 'metadata.tsv'
 9 |         }
10 |     }
11 | }
12 | 
13 | includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config'
14 | includeConfig './../../src/singlecelltoolkit/conf/sctk_mapping.config'
15 | includeConfig './../../src/trimgalore/trimgalore.config'
16 | includeConfig './../../src/bwamaptools/bwamaptools.config'
17 | includeConfig './../../src/samtools/samtools.config'
18 | includeConfig './../../src/bwamaptools/conf/bwa_mapping.config'
19 | includeConfig './../../src/barcard/conf/barcard_barcode_multiplet.config'
20 | 


--------------------------------------------------------------------------------
/conf/atac/qc_filtering.config:
--------------------------------------------------------------------------------
1 | includeConfig './../../src/pycistopic/pycistopic.config'
2 | includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config'
3 | includeConfig './../../src/singlecelltoolkit/conf/sctk_saturation.config'
4 | 
5 | 


--------------------------------------------------------------------------------
/conf/compute_resources.config:
--------------------------------------------------------------------------------
 1 | 
 2 | // define computing resources via process labels
 3 | process {
 4 | 
 5 |     // set global executor for all processes. Can be overridden by other tool-specific labels
 6 |     executor = 'local'
 7 | 
 8 |     // set default options that apply to all processes:
 9 |     cpus = 2
10 |     memory = '60 GB'
11 | 
12 |     // additional cluster options (applies to grid based executors):
13 |     clusterOptions = "-A cluster_account"
14 | 
15 |     //  set a default compute profile
16 |     withLabel: 'compute_resources__default' {
17 |         time = '1h'
18 |     }
19 | 
20 |     withLabel:compute_resources__sctk_barcode {
21 |        cpus = 2
22 |        memory = '20 GB'
23 |        maxForks = 8
24 |     }
25 | 
26 |     withLabel:compute_resources__barcode_10x_scatac_fastq_5cpus {
27 |        cpus = 5
28 |        memory = '40 GB'
29 |        maxForks = 5
30 |     }
31 | 
32 |     withLabel:compute_resources__trimgalore__trim_5cpus {
33 |        cpus = 5
34 |        memory = '20 GB'
35 |        maxForks = 5
36 |     }
37 | 
38 |     withLabel:compute_resources__picard__merge_sam_files_and_sort {
39 |        cpus = 4
40 |        memory = '100 GB'
41 |        maxForks = 4
42 |     }
43 | 
44 |     withLabel:compute_resources__picard__mark_duplicates_and_sort {
45 |        cpus = 8
46 |        memory = '100 GB'
47 |        maxForks = 4
48 |     }
49 | 
50 |     withLabel:compute_resources__sinto__fragments {
51 |        cpus = 4
52 |        memory = '40 GB'
53 |        maxForks = 8
54 |     }
55 | 
56 |     withLabel:compute_resources__sinto__sort_fragments {
57 |        cpus = 1
58 |        memory = '40 GB'
59 |        maxForks = 8
60 |     }
61 | 
62 |     withLabel:compute_resources__bap_barcode_multiplet_pipeline_8cpus {
63 |        cpus = 8
64 |        memory = '80 GB'
65 |        maxForks = 3
66 |     }
67 | 
68 |     withLabel: 'compute_resources__minimal' {
69 |         cpus = 1
70 |         memory = '1 GB'
71 |     }
72 | 
73 |     withLabel: 'compute_resources__mem' {
74 |         cpus = 4
75 |         memory = '160 GB'
76 |     }
77 | 
78 |     withLabel: 'compute_resources__cpu' {
79 |         cpus = 20
80 |         memory = '80 GB'
81 |     }
82 | 
83 |     withLabel: 'compute_resources__report' {
84 |         maxForks = 2
85 |         cpus = 1
86 |         memory = '160 GB'
87 |     }
88 | 
89 |     // can be used in conjunction with any other label to extend the queue time
90 |     withLabel: 'compute_resources__24hqueue' {
91 |         time = '24h'
92 |     }
93 | 
94 | }
95 | 
96 | 


--------------------------------------------------------------------------------
/conf/compute_resources_with_retry.config:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |     This error retry strategy and check_max function was modified from nf-core:
 4 |     https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/%7B%7Bcookiecutter.name_noslash%7D%7D/conf/base.config
 5 | */
 6 | 
 7 | params {
 8 |     // Defaults only, expecting to be overwritten based on available cluster resources
 9 |     max_memory = 170.GB
10 |     max_cpus = 20
11 |     max_time = 168.h
12 | }
13 | 
14 | // Function to ensure that resource requirements don't go beyond
15 | // a maximum limit
16 | def check_max(obj, type) {
17 |     if (type == 'memory') {
18 |         try {
19 |             if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
20 |                 return params.max_memory as nextflow.util.MemoryUnit
21 |             else
22 |                 return obj
23 |         } catch (all) {
24 |             println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
25 |                 return obj
26 |         }
27 |     } else if (type == 'time') {
28 |         try {
29 |             if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
30 |                 return params.max_time as nextflow.util.Duration
31 |             else
32 |                 return obj
33 |         } catch (all) {
34 |             println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
35 |                 return obj
36 |         }
37 |     } else if (type == 'cpus') {
38 |         try {
39 |             return Math.min( obj, params.max_cpus as int )
40 |         } catch (all) {
41 |             println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
42 |                 return obj
43 |         }
44 |     }
45 | }
46 | 
47 | 
48 | // define computing resources via process labels
49 | process {
50 | 
51 |     // this executor applies to all processes, except when overridden in another label
52 |     executor = 'local'
53 | 
54 |     // allow a process to be re-tried if the exit code falls in this range. Otherwise, set to 'finish' (wait for completion of existing jobs)
55 |     errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' }
56 | 
57 |     maxRetries = 2
58 | 
59 |     // set default options that apply to all processes:
60 |     cpus = { check_max(2 * task.attempt, 'cpus') }
61 |     memory = { check_max(30.GB * task.attempt, 'memory') }
62 | 
63 |     // additional cluster options (applies to grid based executors):
64 |     clusterOptions = "-A cluster_account"
65 | 
66 |     // set a default compute profile
67 |     withLabel: 'compute_resources__default' {
68 |         time = { check_max(1.h * task.attempt, 'time') }
69 |     }
70 | 
71 |     withLabel: 'compute_resources__minimal' {
72 |         cpus = { check_max(1 * task.attempt, 'cpus') }
73 |         memory = { check_max(1.GB * task.attempt, 'memory') }
74 |     }
75 | 
76 |     withLabel: 'compute_resources__mem' {
77 |         cpus = { check_max(4, 'cpus') }
78 |         memory = { check_max(160.GB * task.attempt, 'memory') }
79 |     }
80 | 
81 |     withLabel: 'compute_resources__cpu' {
82 |         cpus = { check_max(20, 'cpus') }
83 |         memory = { check_max(80.GB * task.attempt, 'memory') }
84 |     }
85 | 
86 | }
87 | 
88 | 


--------------------------------------------------------------------------------
/conf/docker.config:
--------------------------------------------------------------------------------
1 | docker {
2 |     enabled = true
3 |     runOptions = "-i -v ${HOME}:${HOME}"
4 | }


--------------------------------------------------------------------------------
/conf/generic.config:
--------------------------------------------------------------------------------
 1 | import static groovy.json.JsonOutput.*
 2 | 
 3 | params {
 4 |    breakPrettyPrintMap = { p ->
 5 |       throw new Exception(prettyPrint(toJson(p)))
 6 |    }
 7 |    // This closure facilitates the usage of sample specific parameters
 8 |    parseConfig = { sample, paramsGlobal, paramsLocal ->
 9 |          def lv = { a,b -> return org.codehaus.groovy.runtime.MethodRankHelper.delDistance(a, b) }
10 |          def pL = paramsLocal.collectEntries { k,v ->
11 |             if (v instanceof Map) {
12 |                if (v.containsKey(sample))
13 |                   return [k, v[sample]]
14 |                if (v.containsKey('default'))
15 |                   return [k, v['default']]
16 |                def closeMatches = v.collectEntries { vk, vv -> [lv(vk, sample), vk] }.keySet().findAll { it < 30}
17 |                if(closeMatches.size() > 0)
18 |                   throw new Exception("The sample " + sample + " is not found in " + v +" ; Make sure your samples are correctly specified when using the multi-sample feature.")
19 |                else
20 |                   return [k,v]
21 |             } else {
22 |                return [k,v]
23 |          }
24 |       }
25 |       return [global: paramsGlobal, local: pL]
26 |    }
27 | }
28 | 


--------------------------------------------------------------------------------
/conf/genomes/dm6.config:
--------------------------------------------------------------------------------
1 | params {
2 |     global {
3 |         species = 'fly'
4 |         genome {
5 |             assembly = 'dm6'
6 |         }
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/conf/genomes/hg19.config:
--------------------------------------------------------------------------------
1 | params {
2 |     global {
3 |         species = 'human'
4 |         genome {
5 |             assembly = 'hg19'
6 |         }
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/conf/genomes/hg38.config:
--------------------------------------------------------------------------------
1 | params {
2 |     global {
3 |         species = 'human'
4 |         genome {
5 |             assembly = 'hg38'
6 |         }
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/conf/genomes/mm10.config:
--------------------------------------------------------------------------------
1 | params {
2 |     global {
3 |         species = 'mouse'
4 |         genome {
5 |             assembly = 'mm10'
6 |         }
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/conf/global.config:
--------------------------------------------------------------------------------
1 | params {
2 |     global {
3 |         project_name = '10x_PBMC'
4 |         outdir = 'out'
5 |     }
6 | }
7 | 


--------------------------------------------------------------------------------
/conf/logger.config:
--------------------------------------------------------------------------------
 1 | process {
 2 |    afterScript = {
 3 |       // Source: https://github.com/nextflow-io/nextflow/issues/1166#issuecomment-502467562
 4 |       logMainDir = params.logDir
 5 | 
 6 |       // Check whether log dir is located in S3 if using awsbatch and is a local directory otherwise
 7 |       if (workflow.profile == "aws") {
 8 |          if (!logMainDir.matches("^s3://.*")) logMainDir = "s3:/" + workflow.workDir.toString() + "/log"
 9 |       } else {
10 |          logMainDir = workflow.launchDir.resolve(logMainDir).toString()
11 |          if (!logMainDir.matches("^/.*")) logMainDir = workflow.launchDir.toString() + "/log"
12 |       }
13 | 
14 |       // Build log directory path based on task name
15 |       logSubDir = task.name.replace(" (null)", "").replace(" ", "/").replaceAll(" ", "_").replaceAll("[()]", "")
16 |       logDir = logMainDir + "/" + logSubDir
17 | 
18 |       // Define command to copy log files 
19 |       cpLogCmd = workflow.profile == "aws" ?
20 |          "nxf_s3_upload '*.txt' ${logDir}; " :
21 |          "mkdir -p ${logDir}; cp -a *.txt ${logDir}; "
22 | 
23 |       // Assemble final command
24 |       cmd = "ls -alR --full-time > .command.ls; "
25 |       cmd += "mkdir nxf_log; "
26 |       cmd += "for file in .command.*; do cp -a \${file} nxf_log/\${file#.}.txt; done; "
27 |       cmd += "cd nxf_log; "
28 |       cmd += cpLogCmd
29 |       cmd += "cd ..;"
30 |       cmd
31 |    }
32 | }


--------------------------------------------------------------------------------
/conf/min.config:
--------------------------------------------------------------------------------
1 | min {
2 |     enabled = true
3 | }


--------------------------------------------------------------------------------
/conf/nemesh.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     global {
 3 |         genome = '/ddn1/vol1/staging/leuven/res_00001/genomes/homo_sapiens/hg38_iGenomes/iGenomes_Raw/Sequence/WholeGenomeFasta/genome.fa'
 4 |         genome_annotation = '/ddn1/vol1/staging/leuven/res_00001/genomes/homo_sapiens/hg38_iGenomes/iGenomes_Raw/Annotation/Archives/archive-2015-08-14-08-18-15/Genes/genes.gtf'
 5 |         tmpDir = '/ddn1/vol1/staging/leuven/stg_00002/lcb/dwmax'
 6 |         threads= 1
 7 |         qsubaccount = ''
 8 |     }
 9 | 
10 |     tools {
11 |         nemesh {
12 |             // User can extract custom cell barcodes by providing it with a TSV containing all the barcodes
13 |             // custom_selected_barcodes = ''
14 |             // custom_selected_barcodes_tag = ''
15 |         }
16 |     }
17 | }


--------------------------------------------------------------------------------
/conf/singularity.config:
--------------------------------------------------------------------------------
1 | singularity {
2 |     enabled = true
3 |     autoMounts = true
4 |     runOptions = '--cleanenv -H $PWD -B ${HOME}'
5 | }
6 | 


--------------------------------------------------------------------------------
/conf/test.config:
--------------------------------------------------------------------------------
1 | params {
2 |     misc {
3 |         test {
4 |             enabled = true
5 |         }
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/conf/test__bbknn.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'bbknn_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = "testdata/*/outs/"
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 2
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 2
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |     }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/conf/test__bbknn_scenic.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'bbknn_scenic_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = "testdata/*/outs/"
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 2
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 2
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |         scenic {
31 |             numWorkers = 2
32 |             grn {
33 |                 tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_small.txt'
34 |             }
35 |             cistarget {
36 |                 motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather'
37 |                 motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl'
38 |                 tracksDb = ''
39 |                 tracksAnnotation = ''
40 |             }
41 |         }
42 |     }
43 | }
44 | 
45 | 


--------------------------------------------------------------------------------
/conf/test__cell_annotate_filter.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'cell_annotate_filter_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = 'sample_data/outs'
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             container = 'vibsinglecellnf/scanpy:1.8.1'
14 |         }
15 |     }
16 |     utils {
17 |         file_converter {
18 |             off = 'h5ad'
19 |             tagCellWithSampleId = false
20 |             useFilteredMatrix = true
21 |             makeVarIndexUnique = false
22 |         }
23 |         cell_annotate {
24 |             off = 'h5ad'
25 |             method = 'aio'
26 |             indexColumnName = 'index'
27 |             cellMetaDataFilePath = "sample_data_tiny_dummy_annotation.tsv.gz"
28 |             annotationColumnNames = ['dummy_annotation']
29 |         }
30 |         cell_filter {
31 |             off = 'h5ad'
32 |             method = 'internal'
33 |             filters = [[
34 |                 id : 'foobar',
35 |                 indexColumnName:'index',
36 |                 filterColumnName:'dummy_annotation',
37 |                 valuesToKeepFromFilterColumn: ['foo']
38 |             ]]
39 |         }
40 |     }
41 | }
42 | 
43 | 


--------------------------------------------------------------------------------
/conf/test__compute_resources.config:
--------------------------------------------------------------------------------
 1 | 
 2 | process {
 3 | 
 4 |     executor = 'local'
 5 | 
 6 |     /*
 7 |       This label is activated when using the profile "test__compute_resources", and overwrites all settings from other labels. 
 8 |       Used primarily to keep requested resources within the allowed bounds of GitHub Actions tests.
 9 |     */
10 |     withLabel: 'compute_resources__.*' {
11 |         cpus = 2
12 |         memory = '4 GB'
13 |         time = '1h'
14 |     }
15 | 
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/conf/test__decontx.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'decontx_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = 'sample_data/outs'
 9 |         }
10 |     }
11 |     tools {
12 |         celda {
13 |             container = 'vibsinglecellnf/celda:1.4.5'
14 |             decontx {
15 |                 strategy = 'correct'
16 |             }
17 |         }
18 |     }
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/conf/test__harmony.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'harmony_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = "testdata/*/outs/"
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 2
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 2
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |     }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/conf/test__harmony_scenic.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'harmony_scenic_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = "testdata/*/outs/"
 9 |         }
10 |     }
11 |     utils {
12 |         file_annotator {
13 |             metadataFilePath = ''
14 |         }
15 |     }
16 |     tools {
17 |         file_annotator {
18 |             metadataFilePath = ''
19 |         }
20 |         scanpy {
21 |             filter {
22 |                 cellFilterMinNGenes = 1
23 |             }
24 |             neighborhood_graph {
25 |                 nPcs = 2
26 |             }
27 |             dim_reduction {
28 |                 pca {
29 |                     method = 'pca'
30 |                     nComps = 2
31 |                 }
32 |             }
33 |             clustering {
34 |                 method = 'louvain' 
35 |                 resolution = 1
36 |             }
37 |         }
38 |         scenic {
39 |             numWorkers = 2
40 |             grn {
41 |                 tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_small.txt'
42 |             }
43 |             cistarget {
44 |                 motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather'
45 |                 motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl'
46 |                 tracksDb = ''
47 |                 tracksAnnotation = ''
48 |             }
49 |         }
50 |     }
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/conf/test__mnncorrect.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'mnncorrect_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = "testdata/*/outs/"
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 2
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 2
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |     }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/conf/test__scenic.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'scenic_CI'
 5 |     }
 6 |     tools {
 7 |         file_annotator {
 8 |             metadataFilePath = ''
 9 |         }
10 |         scenic {
11 |             filteredLoom = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/expr_mat_tiny.loom'
12 |             grn {
13 |                 tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_tiny.txt'
14 |             }
15 |             cistarget {
16 |                 motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather'
17 |                 motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl'
18 |                 tracksDb = ''
19 |                 tracksAnnotation = ''
20 |             }
21 |         }
22 |     }
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/conf/test__scenic_multiruns.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'scenic_multiruns_CI'
 5 |     }
 6 |     tools {
 7 |         scenic {
 8 |             filteredLoom = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/expr_mat_small.loom'
 9 |             grn {
10 |                 tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_small.txt'
11 |             }
12 |             cistarget {
13 |                 motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather'
14 |                 motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl'
15 |                 tracksDb = ''
16 |                 tracksAnnotation = ''
17 |             }
18 |             aucell {
19 |                 min_genes_regulon = 0
20 |                 min_regulon_gene_occurrence = 0
21 |             }
22 |         }
23 |     }
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/conf/test__single_sample.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'single_sample_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = 'sample_data/outs'
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 2
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 2
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |     }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/conf/test__single_sample_decontx_correct.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'single_sample_decontx_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = 'sample_data/outs'
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 10
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 10
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |         celda {
31 |             container = 'vibsinglecellnf/celda:1.4.5'
32 |             decontx {
33 |                 strategy = 'correct'
34 |             }
35 |         }
36 |     }
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/conf/test__single_sample_decontx_correct_scrublet.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'single_sample_decontx_scrublet_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = 'sample_data/outs'
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 10
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 10
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |         celda {
31 |             container = 'vibsinglecellnf/celda:1.4.5'
32 |             decontx {
33 |                 strategy = 'correct'
34 |             }
35 |         }
36 |         scrublet {
37 |             container = 'vibsinglecellnf/scrublet:0.2.3'
38 |             labels {
39 |                 processExecutor = 'local'
40 |             }
41 |             doublet_detection {
42 |                 report_ipynb = '/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb'
43 |                 useVariableFeatures = 'False'
44 |                 technology = '10x'
45 |                 off = 'h5ad'
46 |             }
47 |             cell_annotate {
48 |                 off = 'h5ad'
49 |                 method = 'obo'
50 |                 indexColumnName = 'index'
51 |             }
52 |             cell_filter {
53 |                 off = 'h5ad'
54 |                 method = 'internal'
55 |                 filters = [
56 |                     [
57 |                         id:'NO_DOUBLETS',
58 |                         sampleColumnName: 'sample_id',
59 |                         filterColumnName:'scrublet__predicted_doublets_based_on_10x_chromium_spec',
60 |                         valuesToKeepFromFilterColumn:['False']
61 |                     ]
62 |                 ]
63 |             }
64 |         }
65 |     }
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/conf/test__single_sample_decontx_filter.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'single_sample_decontx_filter_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = 'sample_data/outs'
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 10
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 10
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |         celda {
31 |             container = 'vibsinglecellnf/celda:1.4.5'
32 |             decontx {
33 |                 cell_filter {
34 |                     off = 'h5ad'
35 |                     method = 'internal'
36 |                     filters = [[id:'DECONTX_FILTERED', sampleColumnName:'sample_id', filterColumnName:'celda_decontx__doublemad_predicted_outliers', valuesToKeepFromFilterColumn:['False']]]
37 |                 }
38 |                 strategy = 'filter'
39 |                 cell_annotate {
40 |                     off = 'h5ad'
41 |                     method = 'obo'
42 |                     indexColumnName = 'index'
43 |                 }
44 |                 filters {
45 |                     numMadsThresholds = [3]
46 |                     contaminationScoreThresholds = [0.5]
47 |                 }
48 |             }
49 |         }
50 |     }
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/conf/test__single_sample_param_exploration.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'single_sample_param_exploration_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = 'sample_data/outs'
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 2
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 2
23 |                 }
24 |             }
25 |             clustering {
26 |                 resolutions = [1.0,1.2]
27 |             }
28 |         }
29 |     }
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/conf/test__single_sample_scenic.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'single_sample_scenic_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = 'sample_data/outs'
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 2
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 2
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |         scenic {
31 |             grn {
32 |                 tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_small.txt'
33 |             }
34 |             cistarget {
35 |                 motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather'
36 |                 motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl'
37 |                 tracksDb = ''
38 |                 tracksAnnotation = ''
39 |             }
40 |         }
41 |     }
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/conf/test__single_sample_scenic_multiruns.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'single_sample_scenic_multiruns_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = 'sample_data/outs'
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 2
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 2
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |         scenic {
31 |             //filteredLoom = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/expr_mat_small.loom'
32 |             grn {
33 |                 tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_small.txt'
34 |             }
35 |             cistarget {
36 |                 motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather'
37 |                 motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl'
38 |                 tracksDb = ''
39 |                 tracksAnnotation = ''
40 |             }
41 |             aucell {
42 |                 min_genes_regulon = 0
43 |                 min_regulon_gene_occurrence = 0
44 |             }
45 |         }
46 |     }
47 | }
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/conf/test__single_sample_scrublet.config:
--------------------------------------------------------------------------------
 1 | 
 2 | params {
 3 |     global {
 4 |         project_name = 'single_sample_scrublet_CI'
 5 |     }
 6 |     data {
 7 |         tenx {
 8 |             cellranger_mex = 'sample_data/outs'
 9 |         }
10 |     }
11 |     tools {
12 |         scanpy {
13 |             filter {
14 |                 cellFilterMinNGenes = 1
15 |             }
16 |             neighborhood_graph {
17 |                 nPcs = 10
18 |             }
19 |             dim_reduction {
20 |                 pca {
21 |                     method = 'pca'
22 |                     nComps = 10
23 |                 }
24 |             }
25 |             clustering {
26 |                 method = 'louvain' 
27 |                 resolution = 1
28 |             }
29 |         }
30 |         scrublet {
31 |             container = 'vibsinglecellnf/scrublet:0.2.3'
32 |             labels {
33 |                 processExecutor = 'local'
34 |             }
35 |             doublet_detection {
36 |                 report_ipynb = '/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb'
37 |                 useVariableFeatures = 'False'
38 |                 technology = '10x'
39 |                 off = 'h5ad'
40 |             }
41 |             cell_annotate {
42 |                 off = 'h5ad'
43 |                 method = 'obo'
44 |                 indexColumnName = 'index'
45 |             }
46 |             cell_filter {
47 |                 off = 'h5ad'
48 |                 method = 'internal'
49 |                 filters = [
50 |                     [
51 |                         id:'NO_DOUBLETS',
52 |                         sampleColumnName: 'sample_id',
53 |                         filterColumnName:'scrublet__predicted_doublets_based_on_10x_chromium_spec',
54 |                         valuesToKeepFromFilterColumn:['False']
55 |                     ]
56 |                 ]
57 |             }
58 |         }
59 |     }
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/conf/test_disabled.config:
--------------------------------------------------------------------------------
1 | params {
2 |     misc {
3 |         test {
4 |             enabled = false
5 |         }
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/conf/vpcx.config:
--------------------------------------------------------------------------------
1 | vpcx {
2 |     docker.enabled = true
3 |     docker.runOptions = "-i -v /app:/app -v /root/:/root"
4 |     docker.registry = "itx-aiv.artifactrepo.jnj.com/"
5 | }


--------------------------------------------------------------------------------
/conf/vsc.config:
--------------------------------------------------------------------------------
 1 | singularity {
 2 |     enabled = true
 3 |     autoMounts = true
 4 |     runOptions = '--cleanenv -H $PWD -B /lustre1,/staging,/data,${VSC_SCRATCH},${VSC_SCRATCH}/tmp:/tmp,${HOME}/.nextflow/assets/'
 5 |     cacheDir = 'PUMATAC_dependencies/cache'
 6 | }
 7 | 
 8 | vsc {
 9 |     enabled = true
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/data/10x/1k_pbmc/metadata.tsv:
--------------------------------------------------------------------------------
1 | id	chromium_chemistry
2 | 1k_pbmc_v2_chemistry	v2
3 | 1k_pbmc_v3_chemistry	v3
4 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | # Datasets
 2 | 
 3 | Public datasets that can be used to test one of the pipelines.
 4 | Start by creating a working directory to contain the data, intermediate Nextflow files, and final analysis outputs:
 5 | ```bash
 6 | mkdir single_sample_test && cd single_sample_test
 7 | ```
 8 | 
 9 | # 10x Genomics
10 | 
11 | Some 10x datasets that can be used to run the `single_sample` pipeline:
12 | - 1k PBMCs from a Healthy Donor (v2 chemistry)
13 | ```
14 | wget http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v2/pbmc_1k_v2_filtered_feature_bc_matrix.tar.gz
15 | mkdir -p data/10x/1k_pbmc/1k_pbmc_v2_chemistry/outs/
16 | tar -xzvf pbmc_1k_v2_filtered_feature_bc_matrix.tar.gz -C data/10x/1k_pbmc/1k_pbmc_v2_chemistry/outs/
17 | ```
18 | - 1k PBMCs from a Healthy Donor (v3 chemistry)
19 | ```
20 | wget http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_filtered_feature_bc_matrix.tar.gz
21 | mkdir -p data/10x/1k_pbmc/1k_pbmc_v3_chemistry/outs/
22 | tar -xzvf pbmc_1k_v3_filtered_feature_bc_matrix.tar.gz -C data/10x/1k_pbmc/1k_pbmc_v3_chemistry/outs/
23 | ```
24 | 
25 | Download the small meta data to annotate the samples:
26 | ```
27 | wget https://raw.githubusercontent.com/vib-singlecell-nf/vsn-pipelines/master/data/10x/1k_pbmc/metadata.tsv -O data/10x/1k_pbmc/metadata.tsv
28 | ```
29 | 
30 | If these links appear not work, you can always download them from https://support.10xgenomics.com/single-cell-gene-expression/datasets.
31 | 
32 | 


--------------------------------------------------------------------------------
/data/sample_data_tiny/sample_data_tiny_dummy_annotation.tsv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/data/sample_data_tiny/sample_data_tiny_dummy_annotation.tsv.gz


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/attributions.rst:
--------------------------------------------------------------------------------
 1 | Attributions
 2 | ============
 3 | 
 4 | 
 5 | VSN-Pipelines is a collection of workflows targeted toward the analysis of single cell data.
 6 | VSN is dependendent on, and takes functions from many tools, developed both internally and externally, which are listed here.
 7 | 
 8 | Tools
 9 | ----------------------------------------------------
10 | 
11 | 
12 | - `GreenleafLab/ArchR <https://github.com/GreenleafLab/ArchR>`_
13 | - `caleblareau/bap <https://github.com/caleblareau/bap>`_
14 | - `lh3/bwa <https://github.com/lh3/bwa>`_
15 | - `Samtools <http://www.htslib.org/>`_
16 | - `campbio/celda <https://github.com/campbio/celda>`_
17 | - Directs
18 | - `DropletUtils <https://bioconductor.org/packages/devel/bioc/html/DropletUtils.html>`_
19 | - `Drop-seq Tools <http://mccarrolllab.org/dropseq/>`_
20 | - `EDirect <https://dataguide.nlm.nih.gov/edirect/documentation.html>`_
21 | - `OpenGene/fastp <https://github.com/OpenGene/fastp>`_
22 | - `hangnoh/flybaseR <https://github.com/hangnoh/flybaseR>`_
23 | - `dweemx/flybaseR <https://github.com/dweemx/flybaseR>`_
24 | - `immunogenomics/harmony <https://github.com/immunogenomics/harmony>`_
25 | - pcacv
26 | - `Picard <https://broadinstitute.github.io/picard/>`_
27 | - `statgen/popscle <https://github.com/statgen/popscle>`_
28 | - `aertslab/popscle_helper_tools <https://github.com/aertslab/popscle_helper_tools>`_
29 | - `aertslab/cisTopic <https://github.com/aertslab/cisTopic>`_
30 | - `theislab/scanpy <https://github.com/theislab/scanpy>`_
31 | - `aertslab/pySCENIC <https://github.com/aertslab/pySCENIC>`_
32 | - `aertslab/SCENIC <https://github.com/aertslab/SCENIC>`_
33 | - `swolock/scrublet <https://github.com/swolock/scrublet>`_
34 | - `aertslab/single_cell_toolkit <https://github.com/aertslab/single_cell_toolkit>`_
35 | - `timoast/sinto <https://github.com/timoast/sinto>`_
36 | - `constantAmateur/SoupX <https://github.com/constantAmateur/SoupX>`_
37 | - `ncbi/sra-tools <https://github.com/ncbi/sra-tools>`_
38 | - `alexdobin/STAR <https://github.com/alexdobin/STAR>`_
39 | - `Trim Galore <https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/>`_
40 | 
41 | 


--------------------------------------------------------------------------------
/docs/case-studies.rst:
--------------------------------------------------------------------------------
1 | Case Studies
2 | =============
3 | 
4 | See the full list of case studies and examples at `VSN-Pipelines-examples <https://vsn-pipelines-examples.readthedocs.io/en/latest/>`_.
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'VSN-Pipelines'
21 | copyright = '2020, Kristofer Davie, Maxime De Waegeneer, Christopher Flerin'
22 | author = 'Kristofer Davie, Maxime De Waegeneer, Christopher Flerin'
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = [
31 | ]
32 | 
33 | # Add any paths that contain templates here, relative to this directory.
34 | templates_path = ['_templates']
35 | 
36 | # List of patterns, relative to source directory, that match files and
37 | # directories to ignore when looking for source files.
38 | # This pattern also affects html_static_path and html_extra_path.
39 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
40 | 
41 | # Resolve issue with RTD build
42 | master_doc = 'index'
43 | 
44 | 
45 | # -- Options for HTML output -------------------------------------------------
46 | 
47 | # The theme to use for HTML and HTML Help pages.  See the documentation for
48 | # a list of builtin themes.
49 | #
50 | html_theme = 'sphinx_rtd_theme'
51 | 
52 | # Add any paths that contain custom static files (such as style sheets) here,
53 | # relative to this directory. They are copied after the builtin static files,
54 | # so a file named "default.css" will overwrite the builtin "default.css".
55 | html_static_path = ['_static']


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. VSN-Pipelines documentation master file, created by
 2 |    sphinx-quickstart on Tue Feb 11 13:06:44 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 |    :hidden:
 9 | 
10 |    Home <self>
11 |    getting-started
12 |    input_formats
13 |    pipelines
14 |    features
15 |    case-studies
16 | 
17 | .. toctree::
18 |    :maxdepth: 2
19 |    :hidden:
20 |    :caption: scATAC-seq
21 | 
22 |    scatac-seq
23 |    scatac-seq_qc
24 | 
25 | .. toctree::
26 |    :maxdepth: 2
27 |    :hidden:
28 |    :caption: Development
29 | 
30 |    development
31 |    attributions
32 | 
33 | 
34 | .. include:: ../README.rst
35 | 
36 | .. Indices and tables
37 | .. ==================
38 | 
39 | .. * :ref:`genindex`
40 | .. * :ref:`modindex`
41 | .. * :ref:`search`
42 | 


--------------------------------------------------------------------------------
/main_atac.nf:
--------------------------------------------------------------------------------
  1 | import static groovy.json.JsonOutput.*
  2 | 
  3 | nextflow.enable.dsl=2
  4 | 
  5 | include {
  6 |     INIT;
  7 | } from './src/utils/workflows/utils' params(params)
  8 | 
  9 | INIT(params)
 10 | 
 11 | include {
 12 |     SC__FILE_CONVERTER;
 13 | } from './src/utils/processes/utils' params(params)
 14 | 
 15 | include {
 16 |     getDataChannel;
 17 | } from './src/channels/channels' params(params)
 18 | 
 19 | /*
 20 |     ATAC-seq pipelines
 21 | */
 22 | 
 23 | 
 24 | // runs mkfastq, then cellranger-atac count:
 25 | workflow cellranger_atac {
 26 | 
 27 |     include {
 28 |         CELLRANGER_ATAC
 29 |     } from './src/cellranger-atac/main.nf' params(params)
 30 | 
 31 |     CELLRANGER_ATAC(
 32 |         file(params.tools.cellranger_atac.mkfastq.csv),
 33 |         file(params.tools.cellranger_atac.mkfastq.runFolder),
 34 |         file(params.tools.cellranger_atac.count.reference)
 35 |     )
 36 | 
 37 | }
 38 | 
 39 | 
 40 | workflow atac_preprocess {
 41 | 
 42 |     // generic ATAC-seq preprocessing pipeline: adapter trimming, mapping, fragments file generation
 43 |     include {
 44 |         ATAC_PREPROCESS;
 45 |     } from './workflows/atac/preprocess.nf' params(params)
 46 | 
 47 |     ATAC_PREPROCESS(file(params.data.atac_preprocess.metadata))
 48 | 
 49 | }
 50 | 
 51 | 
 52 | workflow atac_preprocess_bap {
 53 | 
 54 |     include {
 55 |         ATAC_PREPROCESS;
 56 |     } from './workflows/atac/preprocess.nf' params(params)
 57 |     include {
 58 |         BAP__BARCODE_MULTIPLET_WF;
 59 |     } from './src/bap/main.nf' params(params)
 60 | 
 61 |     ATAC_PREPROCESS(file(params.data.atac_preprocess.metadata)) |
 62 |         get_bam |
 63 |         BAP__BARCODE_MULTIPLET_WF
 64 | 
 65 | }
 66 | 
 67 | workflow atac_preprocess_rapid {
 68 | 
 69 |     include {
 70 |         ATAC_PREPROCESS_RAPID;
 71 |     } from './workflows/atac/preprocess_rapid.nf' params(params)
 72 |     //include {
 73 |     //    BARCARD__FRAGMENTS_POSTPROCESSING;
 74 |     //} from './src/barcard/main.nf' params(params)
 75 | 
 76 |     ATAC_PREPROCESS_RAPID(file(params.data.atac_preprocess.metadata))
 77 |     //ATAC_PREPROCESS_RAPID(file(params.data.atac_preprocess.metadata)) |
 78 |     //    get_bam |
 79 |     //    BARCARD__FRAGMENTS_POSTPROCESSING
 80 | 
 81 | }
 82 | 
 83 | 
 84 | workflow bap {
 85 |     include {
 86 |         BAP__BARCODE_MULTIPLET_WF;
 87 |     } from './src/bap/main.nf' params(params)
 88 | 
 89 |     getDataChannel | BAP__BARCODE_MULTIPLET_WF
 90 | 
 91 | }
 92 | 
 93 | 
 94 | /*
 95 |  QC
 96 |  */
 97 | workflow atac_qc_filtering {
 98 | 
 99 |     include {
100 |         ATAC_QC_PREFILTER;
101 |     } from './workflows/atac/qc_filtering.nf' params(params)
102 | 
103 |     getDataChannel | ATAC_QC_PREFILTER
104 | 
105 | }
106 | 
107 | workflow atac_preprocess_with_qc {
108 | 
109 |     // generic ATAC-seq preprocessing pipeline: adapter trimming, mapping, fragments file generation
110 |     include {
111 |         ATAC_PREPROCESS;
112 |     } from './workflows/atac/preprocess.nf' params(params)
113 |     include {
114 |         ATAC_QC_PREFILTER;
115 |     } from './workflows/atac/qc_filtering.nf' params(params)
116 | 
117 |     pp = ATAC_PREPROCESS(file(params.data.atac_preprocess.metadata))
118 |     ATAC_QC_PREFILTER(pp.bam.mix(pp.fragments))
119 | 
120 | }
121 | 
122 | workflow atac_preprocess_freemuxlet {
123 | 
124 |     // generic ATAC-seq preprocessing pipeline: adapter trimming, mapping, fragments file generation
125 |     include {
126 |         ATAC_PREPROCESS_WITH_METADATA;
127 |     } from './workflows/atac/preprocess.nf' params(params)
128 |     include {
129 |         freemuxlet as FREEMUXLET;
130 |     } from './workflows/popscle' params(params)
131 | 
132 |     ATAC_PREPROCESS_WITH_METADATA(file(params.tools.atac.preprocess.metadata))
133 |     FREEMUXLET(ATAC_PREPROCESS_WITH_METADATA.out.bam)
134 | }
135 | 
136 | 


--------------------------------------------------------------------------------
/src/barcard/barcard.config:
--------------------------------------------------------------------------------
1 | params {
2 |     tools {
3 |         barcard {
4 |             container = 'vibsinglecellnf/bap:2021-04-27-3b48f4b'
5 |         }
6 |     }
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/src/barcard/bin/barcard_otsu_filtering_test.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "id": "7be3b9b5-12dc-4ed5-a80c-bdb8a80facf6",
 7 |    "metadata": {},
 8 |    "outputs": [],
 9 |    "source": []
10 |   }
11 |  ],
12 |  "metadata": {
13 |   "kernelspec": {
14 |    "display_name": "20220609_pycistopic.sif",
15 |    "language": "python",
16 |    "name": "20220609_pycistopic"
17 |   },
18 |   "language_info": {
19 |    "codemirror_mode": {
20 |     "name": "ipython",
21 |     "version": 3
22 |    },
23 |    "file_extension": ".py",
24 |    "mimetype": "text/x-python",
25 |    "name": "python",
26 |    "nbconvert_exporter": "python",
27 |    "pygments_lexer": "ipython3",
28 |    "version": "3.8.13"
29 |   }
30 |  },
31 |  "nbformat": 4,
32 |  "nbformat_minor": 5
33 | }
34 | 


--------------------------------------------------------------------------------
/src/barcard/conf/barcard_barcode_multiplet.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         barcard {
 4 |             barcode_multiplet {
 5 |                 report_ipynb = '/src/barcard/bin/barcard_otsu_filtering.ipynb'
 6 |             }
 7 |         }
 8 |     }
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/barcard/main.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //////////////////////////////////////////////////////
 4 | //  Import sub-workflows from the modules:
 5 | 
 6 | include {
 7 |     CREATE_FRAGMENTS_FROM_BAM as BARCARD__CREATE_FRAGMENTS_FROM_BAM
 8 | } from './processes/create_fragments_from_bam.nf' params(params)
 9 | 
10 | include {
11 |     DETECT_BARCODE_MULTIPLETS as BARCARD__DETECT_BARCODE_MULTIPLETS;
12 | } from './processes/detect_barcode_multiplets.nf' params(params)
13 | 
14 | include {
15 |     MERGE_BARCODE_MULTIPLETS as BARCARD__MERGE_BARCODE_MULTIPLETS;
16 | } from './processes/merge_barcode_multiplets.nf' params(params)
17 | 
18 | include {
19 |     GENERATE_REPORT;
20 |     REPORT_TO_HTML;
21 | } from './processes/report.nf' params(params)
22 | 
23 | //////////////////////////////////////////////////////
24 | //  Import sub-workflows from the modules:
25 | 
26 | include {
27 |     BWAMAPTOOLS__INDEX_BED;
28 | } from './../../src/bwamaptools/processes/index.nf' params(params)
29 | include {
30 |     PUBLISH as PUBLISH_FRAGMENTS;
31 |     PUBLISH as PUBLISH_FRAGMENTS_INDEX;
32 | } from "../utils/workflows/utils.nf" params(params)
33 | 
34 | 
35 | //////////////////////////////////////////////////////
36 | // Define the workflow
37 | 
38 | workflow BAM_TO_FRAGMENTS {
39 | 
40 |     take:
41 |         bam
42 | 
43 |     main:
44 | 
45 |         // sampleID, frag, frag idx
46 |         fragments = BARCARD__CREATE_FRAGMENTS_FROM_BAM(bam)
47 | 
48 |         //fragments_sort = SINTO__SORT_FRAGMENTS(fragments)
49 |         //index = BWAMAPTOOLS__INDEX_BED(fragments_sort)
50 | 
51 |         // join bed index into the fragments channel:
52 |         //fragments_out = fragments_sort.join(index)
53 | 
54 |     emit:
55 |         fragments
56 |         //fragments_out
57 | 
58 | }
59 | 
60 | 
61 | workflow DETECT_BARCODE_MULTIPLETS {
62 | 
63 |     take:
64 |         fragments
65 | 
66 |     main:
67 | 
68 | //        barcard_multiplets = BARCARD__DETECT_BARCODE_MULTIPLETS(fragments.map { it -> tuple(it[0], it[1][0], it[1][1]) })
69 |         barcard_multiplets = BARCARD__DETECT_BARCODE_MULTIPLETS(fragments.map { it -> tuple(it[0], it[1]) })
70 | 
71 |         //GENERATE_REPORT(
72 |         //    file(workflow.projectDir + params.tools.barcard.barcode_multiplet.report_ipynb),
73 |         //    barcard_multiplets.map { it -> tuple(it[0], it[3]) },
74 |         //    "BARCARD__multiplet_report"
75 |         //) |
76 |         //REPORT_TO_HTML
77 | 
78 |         GENERATE_REPORT(
79 |             file(workflow.projectDir + params.tools.barcard.barcode_multiplet.report_ipynb),
80 |             barcard_multiplets.map { it -> tuple(it[0], it[1]) },
81 |             //"BARCARD__otsu_filtering_report"
82 |         ) |
83 |         REPORT_TO_HTML
84 | 
85 |     emit:
86 |         barcard_multiplets
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/src/barcard/processes/create_fragments_from_bam.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | 
 6 | process CREATE_FRAGMENTS_FROM_BAM {
 7 |     //container params.tools.barcard.container
 8 |     container "vibsinglecellnf/singlecelltoolkit:2024-04-09-62429e9"
 9 |     label 'compute_resources__barcard__create_fragments_from_bam'
10 |     publishDir "${params.global.outdir}/data/fragments", mode: 'copy'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               path(bam) //,
15 | //              path(bai)
16 | 
17 |     output:
18 |         tuple val(sampleId),
19 |               path("${sampleId}.fragments.raw.tsv.gz"),
20 |               path("${sampleId}.fragments.raw.tsv.gz.tbi")
21 | 
22 |     script:
23 |         //def sampleParams = params.parseConfig(sampleId, params.global)
24 |         //processParams = sampleParams.local
25 |         """
26 |         set -euo pipefail
27 | 
28 |         create_fragments_file --bam "${bam}" --fragments "${sampleId}.fragments.raw.tsv.gz"
29 | 
30 |         tabix -p bed "${sampleId}.fragments.raw.tsv.gz"
31 |         """
32 | }
33 | 


--------------------------------------------------------------------------------
/src/barcard/processes/detect_barcode_multiplets.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | 
 6 | process DETECT_BARCODE_MULTIPLETS {
 7 |     //container params.tools.barcard.container
 8 |     container "vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d"
 9 |     label 'compute_resources__barcard__detect_barcode_multiplets'
10 |     publishDir "${params.global.outdir}/data/reports/barcard/", mode: 'copy'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               path(fragments)
15 | 
16 |     output:
17 |         tuple val(sampleId),
18 |               //path(fragments),
19 |               path("${sampleId}.barcard.overlap.tsv")
20 | 
21 |     script:
22 |         //def sampleParams = params.parseConfig(sampleId, params.global)
23 |         //processParams = sampleParams.local
24 |         """
25 |         set -euo pipefail
26 | 
27 |         chromosome_regex='^(chr)?([0-9]+|[XY])\$'
28 |         calculate_jaccard_index_cbs.py -i ${fragments} -o ${sampleId}.barcard.overlap.tsv -t 1000 -c \${chromosome_regex}
29 |         """
30 | }
31 | 


--------------------------------------------------------------------------------
/src/barcard/processes/detect_barcode_multiplets.nf_new:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | 
 6 | process DETECT_BARCODE_MULTIPLETS {
 7 |     //container params.tools.barcard.container
 8 |     container "vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d"
 9 |     label 'compute_resources__barcard__detect_barcode_multiplets'
10 |     publishDir "${params.global.outdir}/data/reports/barcard/", mode: 'copy'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               path(fragments)
15 | 
16 |     output:
17 |         tuple val(sampleId),
18 |               //path(fragments),
19 |               path("${sampleId}.barcard.overlap.tsv")
20 | 
21 |     script:
22 |         //def sampleParams = params.parseConfig(sampleId, params.global)
23 |         //processParams = sampleParams.local
24 |         """
25 |         set -euo pipefail
26 | 
27 |         chromosome_regex='^(chr)?([0-9]+|[XY])\$'
28 |         calculate_jaccard_index_cbs.py -i ${fragments} -o ${sampleId}.barcard.overlap.tsv -t 1000 -c \${chromosome_regex}
29 |         """
30 | }
31 | 


--------------------------------------------------------------------------------
/src/barcard/processes/detect_barcode_multiplets.nf_old:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | 
 6 | process DETECT_BARCODE_MULTIPLETS {
 7 |     //container params.tools.barcard.container
 8 |     container "vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d"
 9 |     label 'compute_resources__barcard__detect_barcode_multiplets'
10 |     publishDir "${params.global.outdir}/data/reports/barcard/", mode: 'copy'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               path(fragments)
15 | 
16 |     output:
17 |         tuple val(sampleId),
18 |               //path(fragments),
19 |               path("${sampleId}.barcard.overlap.tsv")
20 | 
21 |     script:
22 |         //def sampleParams = params.parseConfig(sampleId, params.global)
23 |         //processParams = sampleParams.local
24 |         """
25 |         set -euo pipefail
26 | 
27 |         calculate_jaccard_index_cbs.py -i ${fragments} -o ${sampleId}.barcard.overlap.tsv -t 1000
28 |         """
29 | }
30 | 


--------------------------------------------------------------------------------
/src/barcard/processes/merge_barcode_multiplets.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | 
 6 | process MERGE_BARCODE_MULTIPLETS {
 7 |     //container params.tools.barcard.container
 8 |     container "vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d"
 9 |     label 'compute_resources__barcard__merge_barcode_multiplets'
10 |     publishDir "${params.global.outdir}/data/fragments", mode: 'copy'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               path(bam) //,
15 | //              path(bai)
16 | 
17 |     output:
18 |         tuple val(sampleId),
19 |               path("${sampleId}.fragments.raw.tsv.gz"),
20 |               path("${sampleId}.fragments.raw.tsv.gz.tbi")
21 | 
22 |     script:
23 |         //def sampleParams = params.parseConfig(sampleId, params.global)
24 |         //processParams = sampleParams.local
25 |         """
26 |         set -euo pipefail
27 | 
28 |         create_fragments_file \
29 |             "${bam}" \
30 |             _unused \
31 |           | coreutils sort --parallel=8 -S 16G -k 1,1V -k 2,2n -k 3,3n -k 4,4 \
32 |           | uniq -c \
33 |           | mawk -v 'OFS=\t' '{ print \$2, \$3, \$4, \$5, \$1 }' \
34 |           | bgzip -@ 4 -c /dev/stdin \
35 |           > ${sampleId}.fragments.raw.tsv.gz
36 | 
37 |         tabix -p bed ${sampleId}.fragments.raw.tsv.gz
38 |         """
39 | }
40 | 
41 | 
42 | 
43 | nextflow.enable.dsl=2
44 | 
45 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
46 | 


--------------------------------------------------------------------------------
/src/barcard/processes/report.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | import java.nio.file.Paths
 4 | import static groovy.json.JsonOutput.*
 5 | 
 6 | toolParams = params.tools.barcard
 7 | 
 8 | process GENERATE_REPORT {
 9 |     container "vibsinglecellnf/bap:2021-04-27-3b48f4b"
10 |     publishDir "${params.global.outdir}/notebooks/", mode: params.utils.publish.mode
11 |     label 'compute_resources__report'
12 | 
13 |     input:
14 |         path(ipynb)
15 |         tuple val(sampleId),
16 |               path("${sampleId}.barcard.overlap.tsv")
17 |         //val(reportTitle)
18 | 
19 |     output:
20 |         tuple val(sampleId),
21 |               path("${sampleId}.barcard_otsu.ipynb"),
22 |               path("${sampleId}.barcard_kneeplot.png"),
23 |               path("${sampleId}.barcard.overlap.otsu_filtered.tsv")
24 | 
25 |     script:
26 |         //def sampleParams = params.parseConfig(sampleId)
27 |         //processParams = sampleParams
28 |         //barcardParams = toJson(processParams)
29 | 
30 |         //def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_multiplet)
31 |         //processParams = sampleParams.local
32 |         //barcard_params = toJson(processParams)
33 |         """
34 |         mkdir .cache/
35 |         mkdir .cache/black/
36 |         mkdir .cache/black/21.4b1/
37 | 
38 |         papermill ${ipynb} \
39 |             ${sampleId}.barcard_otsu.ipynb \
40 |             --report-mode \
41 |             -p SAMPLE ${sampleId} \
42 |             -p BARCARD_OVERLAP_TSV '${sampleId}.barcard.overlap.tsv'
43 |         """
44 | }
45 | 
46 | 
47 | process REPORT_TO_HTML {
48 |     container "vibsinglecellnf/bap:2021-04-27-3b48f4b"
49 |     publishDir "${params.global.outdir}/notebooks/", mode: params.utils.publish.mode
50 |     label 'compute_resources__report'
51 | 
52 |     input:
53 |         tuple val(sampleId),
54 |               path(ipynb)
55 | 
56 |     output:
57 |         file("*.html")
58 | 
59 |     script:
60 |         """
61 |         jupyter nbconvert ${ipynb} --to html
62 |         """
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/src/bwamaptools/.gitattributes:
--------------------------------------------------------------------------------
1 | notebooks/* linguist-vendored
2 | 


--------------------------------------------------------------------------------
/src/bwamaptools/.gitignore:
--------------------------------------------------------------------------------
 1 | *checkpoint.ipynb
 2 | *checkpoint*
 3 | *checkpoint.py
 4 | *.test.ipynb
 5 | *.csv
 6 | *.loom
 7 | *.pickle
 8 | *.pyc
 9 | *.html
10 | *egg*
11 | .vscode
12 | .nextflow
13 | .nextflow*
14 | data
15 | refdata
16 | work
17 | out/notebooks
18 | src/scenic/out
19 | src/scenic/notebooks
20 | src/scenic/data
21 | refdata
22 | data/10x/tiny
23 | work/
24 | out/
25 | tests/
26 | debug/
27 | *.swp
28 | *.swo
29 | 


--------------------------------------------------------------------------------
/src/bwamaptools/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM vibsinglecellnf/samtools:0.3-1.15.1
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | # install bwa
 6 | RUN git clone https://github.com/lh3/bwa.git && \
 7 |     cd bwa && \
 8 |     make && \
 9 |     mv /bwa/bwa /usr/local/bin/
10 | 
11 | # install bwa-mem2
12 | ENV BWAMEM2_VER 2.2.1
13 | RUN cd /tmp && \
14 |     curl -L https://github.com/bwa-mem2/bwa-mem2/releases/download/v${BWAMEM2_VER}/bwa-mem2-${BWAMEM2_VER}_x64-linux.tar.bz2 \
15 |       | tar jxf - --no-same-owner && \
16 |     mv bwa-mem2-${BWAMEM2_VER}_x64-linux/bwa-mem2* /usr/local/bin
17 | 
18 | RUN rm -rf /var/cache/apt/* && \
19 |     rm -rf /var/lib/apt/lists/* && \
20 |     ldconfig
21 | 
22 | 


--------------------------------------------------------------------------------
/src/bwamaptools/README.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | BWA maptools module
 3 | ===================
 4 | 
 5 | This repository contains an implementation of BWA for VIB-SingleCell-NF (VSN) pipelines, along with several supporing tools (htslib, samtools).
 6 | See `lh3/bwa <https://github.com/lh3/bwa>`_ for the original source.
 7 | 
 8 | To build the Docker image
 9 | -------------------------
10 | 
11 | Image tag format: ``<bwa-mem2>-<bwa-mem2 release version>``.
12 | 
13 | .. code:: bash
14 | 
15 |     docker build -t vibsinglecellnf/bwamaptools:bwa-mem2-2.2.1-zlibng-2.0.6 .
16 |     podman build -t vibsinglecellnf/bwamaptools:bwa-mem2-2.2.1-zlibng-2.0.6 .
17 | 
18 | This image uses the ``vibsinglecellnf/samtools`` image as a base.
19 | 
20 | 


--------------------------------------------------------------------------------
/src/bwamaptools/bin/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/bwamaptools/bin/.gitkeep


--------------------------------------------------------------------------------
/src/bwamaptools/bin/mapping_summary.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | sampleId="${1}";
 4 | bam="${2}";
 5 | 
 6 | if [ ${#@} -ne 2 ] ; then
 7 |     printf 'Usage: mapping_summary.sh sampleId bam_file\n' >&2;
 8 |     exit 1;
 9 | fi
10 | 
11 | 
12 | # Get mapping statistics from BAM file:
13 | #   - Read BAM file and write uncompressed BAM.
14 | #   - Uncompressed BAM file is written to each samtools command with tee (writes to each specified file and stdout).
15 | #   - samtools commands:
16 | #       - Get samtools statistics with:
17 | #           samtools stat "${bam}" > "${sampleId}.stat"
18 | #       - Uniquely mapped reads (BWA):
19 | #           samtools view -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' "${bam}"
20 | #       - Fraction of total read pairs mapped confidently to genome (>30 mapq):
21 | #           samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 "${bam}"
22 | #   - Only use threads for "samtools stat". Using it with any of the other samtools commands
23 | #     makes everything slower than not using any threads at all.
24 | samtools view -u "${bam}" \
25 |   | tee \
26 |         >(samtools view -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' - > "${sampleId}.uniquely_mapped_reads.txt") \
27 |         >(samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 - > "${sampleId}.fraction_total_read_pairs.txt") \
28 |   | samtools stat -@ 2 - > "${sampleId}.stat"
29 | 
30 | 
31 | # Output file:
32 | printf "\t${sampleId}\n" > "${sampleId}.mapping_stats.tsv";
33 | 
34 | grep '^SN' "${sampleId}.stat" | cut -f 2,3 >> "${sampleId}.mapping_stats.tsv";
35 | 
36 | printf "Uniquely mapped reads:\t" >> "${sampleId}.mapping_stats.tsv";
37 | cat "${sampleId}.uniquely_mapped_reads.txt" >> "${sampleId}.mapping_stats.tsv";
38 | 
39 | printf "Reads mapped with MAPQ>30:\t" >> "${sampleId}.mapping_stats.tsv";
40 | cat "${sampleId}.fraction_total_read_pairs.txt" >> "${sampleId}.mapping_stats.tsv";
41 | 
42 | rm "${sampleId}.uniquely_mapped_reads.txt" "${sampleId}.fraction_total_read_pairs.txt";
43 | 


--------------------------------------------------------------------------------
/src/bwamaptools/bwamaptools.config:
--------------------------------------------------------------------------------
1 | params {
2 |     tools {
3 |         bwamaptools {
4 |             container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.2.1-zlibng-2.0.6'
5 |         }
6 |     }
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/src/bwamaptools/conf/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/bwamaptools/conf/.gitkeep


--------------------------------------------------------------------------------
/src/bwamaptools/conf/bwa_mapping.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         bwamaptools {
 4 |             bwa_fasta = 'PUMATAC_dependencies/genomes/hg38_bwamem2/genome.fa'
 5 |             bwa_version = 'bwa-mem2'
 6 |         }
 7 |     }
 8 | }
 9 | 
10 | // define computing resources via process labels
11 | process {
12 |     withLabel: 'compute_resources__bwa_mem' {
13 |         executor = 'local'
14 |         cpus = 6
15 |         memory = '60 GB'
16 |         time = '24h'
17 |         maxForks = 8
18 |     }
19 | }
20 | 
21 | 


--------------------------------------------------------------------------------
/src/bwamaptools/main.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | import java.nio.file.Paths
 4 | 
 5 | //////////////////////////////////////////////////////
 6 | //  Import sub-workflows from the modules:
 7 | 
 8 | include {
 9 |     BWAMAPTOOLS__BWA_MEM_PE as BWA_MEM_PE;
10 | } from './processes/mapping.nf' params(params)
11 | include {
12 |     BWAMAPTOOLS__MAPPING_SUMMARY as MAPPING_SUMMARY;
13 | } from './processes/mapping_summary.nf' params(params)
14 | include {
15 |     SIMPLE_PUBLISH as PUBLISH_BAM;
16 |     SIMPLE_PUBLISH as PUBLISH_BAM_INDEX;
17 |     SIMPLE_PUBLISH as PUBLISH_MAPPING_SUMMARY;
18 |     SIMPLE_PUBLISH as PUBLISH_MARKDUPS_METRICS;
19 |     SIMPLE_PUBLISH as PUBLISH_LIBRARY_METRICS;
20 | } from "../utils/processes/utils.nf" params(params)
21 | 
22 | //////////////////////////////////////////////////////
23 | // Define the workflow
24 | 
25 | workflow get_bwa_index {
26 | 
27 |     take:
28 |         fasta_path
29 | 
30 |     main:
31 | 
32 |         bwa_fasta = Channel.fromPath(fasta_path)
33 | 
34 |         bwa_index_path = Paths.get(
35 |                                    Paths.get(fasta_path).getParent().toString(),
36 |                                    "*.{amb,ann,bwt,fai,flat,gdx,pac,sa,0123,bwt.2bit.64}"
37 |                                    )
38 |         bwa_index = Channel.fromPath(bwa_index_path,
39 |                                      glob: true,
40 |                                      type: 'file',
41 |                                      )
42 |                            .ifEmpty { exit 1, "ERROR: Could not find bwa indices from: ${bwa_index_path}." }
43 |                            .collect()
44 |                            .toList()
45 | 
46 |         data_channel = bwa_fasta.combine(bwa_index)
47 | 
48 |     emit:
49 |         data_channel
50 | 
51 | }
52 | 
53 | 
54 | workflow BWA_MAPPING_PE {
55 | 
56 |     take:
57 |         data // a channel of [val(unique_sampleId), val(sampleId), path(fastq_PE1), path(fastq_PE2)]
58 |         // unique_sampleId is used to label the read group field "SM" and (part of) "LB",
59 |         // while sampleId represents each split fastq file for a unique sample.
60 | 
61 |     main:
62 |         /*
63 |            1) create a channel linking bwa index files from genome.fa in params, and
64 |            2) combine this channel with the items in the data channel
65 |         */
66 |         bwa_inputs = get_bwa_index(params.tools.bwamaptools.bwa_fasta).combine(data)
67 | 
68 |         aligned_bam = BWA_MEM_PE(bwa_inputs)
69 | 
70 | 
71 |         // publish output:
72 | 
73 |         MAPPING_SUMMARY(aligned_bam)
74 |         PUBLISH_MAPPING_SUMMARY(MAPPING_SUMMARY.out, '.mapping_stats.tsv', 'reports/mapping_stats')
75 | 
76 |     emit:
77 |         aligned_bam
78 | }
79 | 


--------------------------------------------------------------------------------
/src/bwamaptools/processes/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/bwamaptools/processes/.gitkeep


--------------------------------------------------------------------------------
/src/bwamaptools/processes/index.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | toolParams = params.tools.bwamaptools
 6 | 
 7 | process BWAMAPTOOLS__INDEX_BAM {
 8 | 
 9 |     container toolParams.container
10 |     label 'compute_resources__default'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               path(bam)
15 | 
16 |     output:
17 |         tuple val(sampleId),
18 |               path(bam),
19 |               path("*.bai")
20 | 
21 |     script:
22 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams)
23 |         processParams = sampleParams.local
24 |         """
25 |         samtools index ${bam}
26 |         """
27 | }
28 | 
29 | process BWAMAPTOOLS__INDEX_BED {
30 | 
31 |     container toolParams.container
32 |     label 'compute_resources__default'
33 | 
34 |     input:
35 |         tuple val(sampleId),
36 |               path(bed)
37 | 
38 |     output:
39 |         tuple val(sampleId),
40 |               path("*.tbi")
41 | 
42 |     script:
43 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams)
44 |         processParams = sampleParams.local
45 |         """
46 |         tabix -p bed ${bed}
47 |         """
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/src/bwamaptools/processes/mapping.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | toolParams = params.tools.bwamaptools
 6 | 
 7 | process BWAMAPTOOLS__BWA_MEM_PE {
 8 | 
 9 |     container toolParams.container
10 |     label 'compute_resources__bwa_mem'
11 | 
12 |     input:
13 |         tuple path(bwa_fasta),
14 |               path(bwa_index),
15 |               val(unique_sampleId),
16 |               val(sampleId),
17 |               path(fastq_PE1),
18 |               path(fastq_PE2)
19 | 
20 |     output:
21 |         tuple val(sampleId),
22 |               path("${sampleId}.bwa.out.fixmate.possorted.bam"),
23 |               path("${sampleId}.bwa.out.fixmate.possorted.bam.bai")
24 | 
25 |     script:
26 |         def sampleParams = params.parseConfig(unique_sampleId, params.global, toolParams)
27 |         processParams = sampleParams.local
28 |         """
29 |         id=\$(zcat ${fastq_PE1} | head -n 1 | cut -f 1-4 -d':' | sed 's/@//')
30 |         ${toolParams.bwa_version} mem \
31 |             -t ${task.cpus} \
32 |             -C \
33 |             -R "@RG\\tID:\${id}\\tSM:${unique_sampleId}\\tLB:\${id}"__"${unique_sampleId}\\tPL:ILLUMINA" \
34 |             ${bwa_fasta} \
35 |             ${fastq_PE1} \
36 |             ${fastq_PE2} \
37 |         | samtools fixmate -u -m -O bam - - \
38 |         | samtools sort -@ 2 -m 2G -O bam --write-index -T '${sampleId}.bwa.out.fixmate.possorted.TMP' -o '${sampleId}.bwa.out.fixmate.possorted.bam##idx##${sampleId}.bwa.out.fixmate.possorted.bam.bai' -
39 |         """
40 | }
41 | 


--------------------------------------------------------------------------------
/src/bwamaptools/processes/mapping_summary.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/bwamaptools/bin/" : ""
 4 | 
 5 | toolParams = params.tools.bwamaptools
 6 | 
 7 | process BWAMAPTOOLS__MAPPING_SUMMARY {
 8 | 
 9 |     container toolParams.container
10 |     label 'compute_resources__default','compute_resources__24hqueue'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               path(bam),
15 |               path(bai)
16 | 
17 |     output:
18 |         tuple val(sampleId),
19 |               path("${sampleId}.mapping_stats.tsv")
20 | 
21 |     script:
22 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams)
23 |         processParams = sampleParams.local
24 |         """
25 |         ${binDir}mapping_summary.sh \
26 |             ${sampleId} \
27 |             ${bam} \
28 |         """
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/src/bwamaptools/workflows/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/bwamaptools/workflows/.gitkeep


--------------------------------------------------------------------------------
/src/channels/conf/bam.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     data {
 3 |         bam {
 4 |             file_paths = ''
 5 |             suffix = '.bam'
 6 |             index_extension = '.bai'
 7 |         }
 8 |     }
 9 |     tools {
10 |         file_converter {
11 |              iff = 'bam'
12 |          }
13 |      }
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/src/channels/conf/csv.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     data {
 3 |         csv {
 4 |             file_paths = ''
 5 |             suffix = '.csv'
 6 |         }
 7 |     }
 8 |     tools {
 9 |         file_converter {
10 |              iff = 'csv'
11 |          }
12 |      }
13 | }
14 | 


--------------------------------------------------------------------------------
/src/channels/conf/fragments.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     data {
 3 |         fragments {
 4 |             file_paths = ''
 5 |             suffix = '.tsv.gz'
 6 |             index_extension = '.tbi'
 7 |         }
 8 |     }
 9 |     tools {
10 |         file_converter {
11 |              iff = 'fragments'
12 |          }
13 |      }
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/src/channels/conf/h5ad.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     data {
 3 |         h5ad {
 4 |             file_paths = ''
 5 |             suffix = '.h5ad'
 6 |         }
 7 |     }
 8 |     tools {
 9 |         file_converter {
10 |              iff = 'h5ad'
11 |          }
12 |      }
13 | }
14 | 


--------------------------------------------------------------------------------
/src/channels/conf/loom.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     data {
 3 |         loom {
 4 |             file_paths = ''
 5 |             suffix = '.loom'
 6 |         }
 7 |     }
 8 |     tools {
 9 |         file_converter {
10 |              iff = 'loom'
11 |          }
12 |      }
13 | }
14 | 


--------------------------------------------------------------------------------
/src/channels/conf/seurat_rds.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     data {
 3 |         seurat_rds {
 4 |             file_paths = ''
 5 |             suffix = '.Rds'
 6 |         }
 7 |     }
 8 |     tools {
 9 |         file_converter {
10 |              iff = 'seurat_rds'
11 |          }
12 |      }
13 | }
14 | 


--------------------------------------------------------------------------------
/src/channels/conf/sra.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     data {
 3 |         // Based on SRA Project Identifiers
 4 |         sra = [
 5 |             [
 6 |                 id: '',
 7 |                 samples: [""] // Use Unix globbing
 8 |             ]
 9 |         ]
10 |     }
11 | }


--------------------------------------------------------------------------------
/src/channels/conf/tenx_arc_cellranger_mex.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     data {
 3 |         tenx_arc {
 4 |             cellranger_mex = 'data/10x/1k_pbmc/1k_pbmc_*/outs/'
 5 |         }
 6 |     }
 7 |     tools {
 8 |         file_converter {
 9 |             off = 'cistopic_rds'
10 |         }
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/src/channels/conf/tenx_atac_cellranger_mex.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     data {
 3 |         tenx_atac {
 4 |             cellranger_mex = 'data/10x/1k_pbmc/1k_pbmc_*/outs/'
 5 |         }
 6 |     }
 7 | 
 8 |     tools {
 9 |         file_converter {
10 |             off = 'cistopic_rds'
11 |         }
12 |     }
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/src/channels/conf/tenx_cellranger_h5.config:
--------------------------------------------------------------------------------
1 | params {
2 |     data {
3 |         tenx {
4 |             cellranger_h5 = ''
5 |         }
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/src/channels/conf/tenx_cellranger_mex.config:
--------------------------------------------------------------------------------
1 | params {
2 |     data {
3 |         tenx {
4 |             cellranger_mex = 'data/10x/1k_pbmc/1k_pbmc_*/outs/'
5 |         }
6 |     }
7 | }
8 | 


--------------------------------------------------------------------------------
/src/channels/conf/tsv.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     data {
 3 |         tsv {
 4 |             file_paths = ''
 5 |             suffix = '.tsv'
 6 |         }
 7 |     }
 8 |     tools {
 9 |         file_converter {
10 |              iff = 'tsv'
11 |          }
12 |      }
13 | }
14 | 


--------------------------------------------------------------------------------
/src/channels/file.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | include {
 4 |     extractSample
 5 | } from '../utils/processes/files.nf'
 6 | 
 7 | workflow getChannel {
 8 | 
 9 |     take:
10 |         glob
11 |         sampleSuffixWithExtension // Suffix after the sample name in the file paths
12 |         groups
13 | 
14 |     main:
15 |         // Check whether multiple globs are provided
16 |         if(glob.contains(',')) {
17 |             glob = Arrays.asList(glob.split(',')); 
18 |         }
19 |         data_channel = Channel
20 |             .fromPath(glob, checkIfExists: true)
21 |             .map {
22 |                 path -> tuple(
23 |                     *extractSample(
24 |                         "${path}",
25 |                         sampleSuffixWithExtension,
26 |                         groups
27 |                     ),
28 |                     file("${path}")
29 |                 )
30 |             }.map {
31 |                 // reorder: sample ID, file path, tag
32 |                 it -> tuple(it[0], it[2], it[1])
33 |             }
34 | 
35 |     emit:
36 |         data_channel
37 | 
38 | }
39 | 
40 | workflow getChannelWithIndex {
41 | 
42 |     take:
43 |         glob
44 |         sampleSuffixWithExtension // Suffix after the sample name in the file paths
45 |         indexFileExtension // file extension of the paired index file (e.g. '.bai', '.tbi')
46 |         groups
47 | 
48 |     main:
49 |         // Check whether multiple globs are provided
50 |         if(glob.contains(',')) {
51 |             glob = Arrays.asList(glob.split(',')); 
52 |         }
53 |         data_channel = Channel
54 |             .fromPath(glob, checkIfExists: true)
55 |             .map {
56 |                 path -> tuple(*extractSample("${path}", sampleSuffixWithExtension, groups), file("${path}"), file("${path}${indexFileExtension}"))
57 |             }
58 |             .map {
59 |                 // reorder: sample ID, [file path, file index path], tag
60 |                 it -> tuple(it[0], [it[2],it[3]], it[1])
61 |             }
62 | 
63 |     emit:
64 |         data_channel
65 | 
66 | }
67 | 
68 | workflow getChannelFromFilePath {
69 | 
70 |     take:
71 |         filePath
72 |         sampleSuffixWithExtension // Suffix after the sample name in the file paths
73 |         groups
74 |     
75 |     main:
76 |         data_channel = Channel.of(
77 |             tuple(filePath)
78 |             )
79 |             .map {
80 |                 it -> tuple(*extractSample("${it[0]}", sampleSuffixWithExtension, groups), file("${it[0]}"))
81 |             }
82 |             .map {
83 |                 // reorder: sample ID, file path, tag
84 |                 it -> tuple(it[0], it[2], it[1])
85 |             }
86 | 
87 |     emit:
88 |         data_channel
89 | 
90 | }
91 | 
92 | 


--------------------------------------------------------------------------------
/src/channels/singleend.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | def extractSample(path) {
 4 |     pattern = /(.+)\/(.+)_R[1-2](.*)\.fastq(\.gz)?/
 5 |     (full, parentDir, id, whateverSuffix, compressionExtension) = (path =~ pattern)[0]
 6 |     return id
 7 | }
 8 | 
 9 | workflow getChannel {
10 | 
11 |     take:
12 |         glob
13 | 
14 |     main:
15 |         // Check whether multiple globs are provided
16 |         if(glob.contains(',')) {
17 |             glob = Arrays.asList(glob.split(',')); 
18 |         }
19 |         channel = Channel
20 |             .fromPath(glob, checkIfExists: true)
21 |             .map {
22 |                 path -> tuple(extractSample( "${path}" ), file("${path}"))
23 |             }
24 | 
25 |     emit:
26 |         channel
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/src/channels/sra.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | workflow getChannel {
 4 | 
 5 |     take:
 6 |         // Expects sra Map [[id: "id1", samples: ["glob1", ...]], ...]
 7 |         sra
 8 | 
 9 |     main:
10 |         data_channel = Channel.fromList(
11 |             sra
12 |         ).map {
13 |             it -> tuple(it.id, it.samples)
14 |         }
15 | 
16 |     emit:
17 |         data_channel
18 | 
19 | }


--------------------------------------------------------------------------------
/src/channels/tenx.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | CELLRANGER_OUTS_REGEX = /(.+)\/(.+)\/outs/
 4 | CELLRANGER_H5_REGEX = /(.+)\/(.+)\/outs\/(.+)\.h5/
 5 | CELLRANGER_MEX_REGEX = /(.+)\/(.+)\/outs\/(.+)/
 6 | 
 7 | def extractSampleFromOuts(path) {
 8 |     // Allow to detect data generated by CellRanger prior and post to version 3.
 9 |     if (!(path ==~ CELLRANGER_OUTS_REGEX))
10 |         throw new Exception("Incorrect Cell Ranger MEX path. The parameter params.data.tenx.cellranger_outs in the config file should point to the outs folder.")
11 |     (full, parentDir, id) = (path =~ CELLRANGER_OUTS_REGEX)[0]
12 |     return id
13 | }
14 | 
15 | workflow getOutsChannel {
16 | 
17 |     take:
18 |         glob
19 | 
20 |     main:
21 |         // Check whether multiple globs are provided
22 |         if(glob.contains(',')) {
23 |             glob = Arrays.asList(glob.split(',')); 
24 |         }
25 |         data_channel = Channel
26 |             .fromPath(glob, type: 'dir', checkIfExists: true)
27 |             .map {
28 |                 filePath -> tuple(extractSampleFromOuts( "${filePath}" ), file("${filePath}"))
29 |             }
30 | 
31 |     emit:
32 |         data_channel
33 | 
34 | }
35 | 
36 | def extractSampleFromH5(path) {
37 |     if (!(path ==~ CELLRANGER_H5_REGEX))
38 |         throw new Exception("Incorrect Cell Ranger .h5 path. The parameter params.data.tenx.cellranger_h5 in the config file should point to the .h5 file.")
39 |     // Allow to detect data generated by CellRanger prior and post to version 3.
40 |     (full, parentDir, id, filename) = (path =~ CELLRANGER_H5_REGEX)[0]
41 |     return id
42 | }
43 | 
44 | workflow getH5Channel {
45 | 
46 |     take:
47 |         glob
48 | 
49 |     main:
50 |         // Check whether multiple globs are provided
51 |         if(glob.contains(',')) {
52 |             glob = Arrays.asList(glob.split(',')); 
53 |         }
54 |         data_channel = Channel
55 |             .fromPath(glob, type: 'file', checkIfExists: true)
56 |             .map {
57 |                 filePath -> tuple(extractSampleFromH5( "${filePath}" ), file("${filePath}"))
58 |             }
59 | 
60 |     emit:
61 |         data_channel
62 | 
63 | }
64 | 
65 | 
66 | 
67 | def extractSampleFromMEX(path) {
68 |     // Allow to detect data generated by CellRanger prior and post to version 3.
69 |     if (!(path ==~ CELLRANGER_MEX_REGEX))
70 |         throw new Exception("Incorrect Cell Ranger MEX path. The parameter params.data.tenx.cellranger_mex in the config file should point to a MEX folder.")
71 |     (full, parentDir, id, filename, mexFolder) = (path =~ CELLRANGER_MEX_REGEX)[0]
72 |     return id
73 | }
74 | 
75 | workflow getMEXChannel {
76 | 
77 |     take:
78 |         glob
79 | 
80 |     main:
81 |         // Check whether multiple globs are provided
82 |         if(glob.contains(',')) {
83 |             glob = Arrays.asList(glob.split(',')); 
84 |         }
85 |         data_channel = Channel
86 |             .fromPath(glob, type: 'dir', checkIfExists: true)
87 |             .map {
88 |                 filePath -> tuple(extractSampleFromMEX( "${filePath}" ), file("${filePath}"))
89 |             }
90 | 
91 |     emit:
92 |         data_channel
93 | 
94 | }
95 | 


--------------------------------------------------------------------------------
/src/edirect/.gitignore:
--------------------------------------------------------------------------------
 1 | *checkpoint.ipynb
 2 | *checkpoint*
 3 | *checkpoint.py
 4 | *.test.ipynb
 5 | *.csv
 6 | *.loom
 7 | *.pickle
 8 | *.pyc
 9 | *.html
10 | *egg*
11 | .vscode
12 | .nextflow
13 | .nextflow*
14 | data
15 | refdata
16 | work
17 | out/notebooks
18 | src/scenic/out
19 | src/scenic/notebooks
20 | src/scenic/data
21 | refdata
22 | data/10x/tiny
23 | work/
24 | out/
25 | tests/
26 | debug/
27 | *.swp
28 | *.swo
29 | 


--------------------------------------------------------------------------------
/src/edirect/edirect.config:
--------------------------------------------------------------------------------
1 | params {
2 |     tools {
3 |         edirect {
4 |             container = 'ncbi/edirect:latest'
5 |         }
6 |     }
7 | }


--------------------------------------------------------------------------------
/src/edirect/processes/sra_metadata.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | process EDIRECT__SRAID_TO_SAMPLENAME {
 4 |     
 5 |     container params.edirect.container
 6 |     label 'compute_resources__default'
 7 |     maxForks 1
 8 | 
 9 |     input:
10 |         val(sraId)
11 |     output:
12 |         tuple val(sraId), stdout
13 |     shell:
14 |         """
15 |         esearch -db sra -query ${sraId} \
16 |            | efetch --format native \
17 |            | sed -r 's/(.*)<TITLE>(.*)<\\/TITLE>(.*)/\\2/' \
18 |            | grep "^[^<;]" \
19 |            | tr -d '\\n' 
20 |         """
21 | }
22 | 


--------------------------------------------------------------------------------
/src/edirect/workflows/sra_fastq_urls.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | include {
 4 |     EDIRECT__SRAID_TO_SAMPLENAME
 5 | } from '../processes/sra_fastq_urls.nf'
 6 | 
 7 | workflow SRA_FASTQ_URLS {
 8 | 
 9 |     take:
10 |         sraProjectId
11 |         sampleNamesToRetrieve
12 | 
13 |     main:
14 |         Channel
15 |             .fromSRA(sraProjectId)
16 |             .map { it[0] }
17 |             .set { sraIDs }
18 |         sraIDsToSample = EDIRECT__SRAID_TO_SAMPLENAME( sraIDs )
19 |         sraFastqUrls = sraIDsToSample
20 |             .join(sraIDs)
21 |             .map { it -> tuple(it[0],it[1],"ftp://ftp.sra.ebi.ac.uk/" + it[2])} 
22 | 
23 |         if(!params.containsKey('quiet')) sraFastqUrls.view()
24 | 
25 |     emit:
26 |         sraFastqUrls
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/src/popscle/.gitattributes:
--------------------------------------------------------------------------------
1 | notebooks/* linguist-vendored
2 | 


--------------------------------------------------------------------------------
/src/popscle/.gitignore:
--------------------------------------------------------------------------------
 1 | *checkpoint.ipynb
 2 | *checkpoint*
 3 | *checkpoint.py
 4 | *.test.ipynb
 5 | *.csv
 6 | *.loom
 7 | *.pickle
 8 | *.pyc
 9 | *.html
10 | *egg*
11 | .vscode
12 | .nextflow
13 | .nextflow*
14 | data
15 | refdata
16 | work
17 | out/notebooks
18 | src/scenic/out
19 | src/scenic/notebooks
20 | src/scenic/data
21 | refdata
22 | data/10x/tiny
23 | work/
24 | out/
25 | tests/
26 | debug/
27 | *.swp
28 | *.swo
29 | 


--------------------------------------------------------------------------------
/src/popscle/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM vibsinglecellnf/samtools:0.2-1.12
 2 | 
 3 | RUN echo 'deb http://deb.debian.org/debian testing main' > /etc/apt/sources.list.d/testing.list && \
 4 |     apt-get update && \
 5 |     apt-get install -y --no-install-recommends \
 6 |         build-essential \
 7 |         cmake
 8 | 
 9 | # Install popscle
10 | RUN git clone --depth 1 https://github.com/statgen/popscle.git /tmp/popscle && \
11 |     mkdir -p /tmp/popscle/build && \
12 |     cd /tmp/popscle/build && \
13 |     cmake .. && \
14 |     make && \
15 |     cp /tmp/popscle/bin/popscle /usr/local/bin
16 | 
17 | # install bedtools
18 | ENV BEDTOOLS_VERSION 2.30.0
19 | RUN curl -L -o /usr/local/bin/bedtools \
20 |     https://github.com/arq5x/bedtools2/releases/download/v${BEDTOOLS_VERSION}/bedtools.static.binary && \
21 |     chmod a+x /usr/local/bin/bedtools
22 | 
23 | # install popscle_helper_tools into this image
24 | # (https://github.com/aertslab/popscle_helper_tools)
25 | RUN git clone --depth 1 https://github.com/aertslab/popscle_helper_tools.git /tmp/popscle_helper_tools && \
26 |     mv /tmp/popscle_helper_tools/*sh /usr/local/bin
27 | 
28 | 


--------------------------------------------------------------------------------
/src/popscle/README.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | VSN-Pipelines popscle
 3 | ======================
 4 | 
 5 | This is a repository for the popslce module of the VIB-SingleCell-NF (VSN) pipelines.
 6 | 
 7 | Current Status
 8 | ---------------
 9 | 
10 | This module currently has two workflows ``freemuxlet`` and ``demuxlet``. 
11 | Both of these workflows expect an input channel consisting of a tuple where
12 | element 1 is the sampleID and element 2 is the output folder of a 10X run.
13 | 
14 | Currently the workflows are fixed to the filtered matrices.
15 | 
16 | To build the Docker image
17 | -------------------------
18 | 
19 | Image tag format: ``<date of latest git commit>-<short hash of latest git commit>``.
20 | 
21 | .. code:: bash
22 | 
23 |     docker build -t vibsinglecellnf/popscle:2021-05-05-da70fc7 .
24 | 
25 | This image uses the ``vibsinglecellnf/samtools`` image as a base.
26 | 
27 | Acknowledgements
28 | ----------------
29 | 
30 | This module implements functionality developed by Gert Hulselmens designed to
31 | speed up the running time of dsc-pileup. The `filter_bam_file_for_popscle_dsc_pileup`_
32 | script can lead to speedups of 5-10x depending on the input data.
33 | 
34 | .. _`filter_bam_file_for_popscle_dsc_pileup`: https://github.com/aertslab/popscle_helper_tools
35 | 
36 | 


--------------------------------------------------------------------------------
/src/popscle/conf/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/popscle/conf/.gitkeep


--------------------------------------------------------------------------------
/src/popscle/main.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //////////////////////////////////////////////////////
 4 | //  Import sub-workflows from the modules:
 5 | 
 6 | include {
 7 |     SC__FILE_CONVERTER
 8 | } from '../utils/processes/utils.nf' params(params)
 9 | 
10 | include {
11 |     SC__POPSCLE__DSC_PILEUP
12 | } from './processes/dsc_pileup.nf' params(params)
13 | include {
14 |     SC__POPSCLE__PREFILTER_DSC_PILEUP
15 | } from './processes/dsc_pileup.nf' params(params)
16 | 
17 | 
18 | //////////////////////////////////////////////////////
19 | // Define the workflow
20 | 
21 | workflow popscle {
22 | 
23 |     take:
24 |         data
25 | 
26 |     main:
27 |         data = SC__FILE_CONVERTER(data)
28 |         SC__POPSCLE__PREFILTER_DSC_PILEUP(data) |
29 |         SC__POPSCLE__DSC_PILEUP
30 | 
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/src/popscle/popscle.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         popscle {
 4 |             container = 'vibsinglecellnf/popscle:2021-05-05-da70fc7'
 5 |             vcf = '/path/to/vcf_file'
 6 |             barcode_tag = 'CB'
 7 |             freemuxlet {
 8 |                 nSamples = 2
 9 |             }
10 |             demuxlet {
11 |                 field = 'GT'
12 |             }
13 |         }
14 |     }
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/src/popscle/processes/demuxlet.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/popscle/bin/" : ""
 4 | 
 5 | process SC__POPSCLE__DEMUXLET {
 6 | 
 7 |     container params.tools.popscle.container
 8 |     publishDir "${params.global.outdir}/data/demuxlet", mode: params.utils.publish.mode
 9 |     label 'compute_resources__cpu'
10 | 
11 |     input:
12 |         tuple val(sampleId), path(f)
13 |         file vcf
14 | 
15 |     output:
16 |         tuple val(sampleId), path("${sampleId}_demuxlet*")
17 | 
18 |     script:
19 |         def sampleParams = params.parseConfig(sampleId, params.global, params.tools.popscle.demuxlet)
20 | 		processParams = sampleParams.local
21 | 
22 |         """
23 |         popscle demuxlet \
24 |             --vcf ${vcf} \
25 |             ${(processParams.containsKey('field')) ? '--field ' + processParams.field : ''} \
26 |             --plp ${sampleId}_dsc-pileup \
27 |             --out ${sampleId}_demuxlet
28 |         """
29 | }
30 | 
31 | process SC__POPSCLE__FREEMUXLET {
32 | 
33 |     container params.tools.popscle.container
34 |     publishDir "${params.global.outdir}/data/freemuxlet", mode: params.utils.publish.mode
35 |     label 'compute_resources__cpu'
36 | 
37 |     input:
38 |         tuple val(sampleId), path(f)
39 | 
40 |     output:
41 |         tuple val(sampleId), path("${sampleId}_freemuxlet*")
42 | 
43 |     script:
44 |         def sampleParams = params.parseConfig(sampleId, params.global, params.tools.popscle.freemuxlet)
45 | 		processParams = sampleParams.local
46 | 
47 |         """
48 |         popscle freemuxlet \
49 |             --nsample ${processParams.nSamples} \
50 |             --plp ${sampleId}_dsc-pileup \
51 |             --out ${sampleId}_freemuxlet
52 |         """
53 | }
54 | 


--------------------------------------------------------------------------------
/src/popscle/processes/dsc_pileup.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/popscle/bin/" : ""
 4 | 
 5 | toolParams = params.tools.popscle
 6 | 
 7 | process SC__POPSCLE__DSC_PILEUP {
 8 | 
 9 |     container params.tools.popscle.container
10 |     publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink'
11 |     label 'compute_resources__cpu','compute_resources__24hqueue'
12 | 
13 |     input:
14 |         tuple val(sampleId), path(f)
15 |         file vcf
16 | 
17 |     output:
18 |         tuple val(sampleId), path("${sampleId}_dsc-pileup*.gz")
19 | 
20 |     script:
21 |         """
22 |         popscle dsc-pileup \
23 |             --sam ${f} \
24 |             ${toolParams?.barcode_tag ? '--tag-group ' +  toolParams.barcode_tag : ''} \
25 |             --vcf ${vcf} \
26 |             --out ${sampleId}_dsc-pileup
27 |         """
28 | }
29 | 
30 | process SC__POPSCLE__PREFILTER_DSC_PILEUP {
31 | 
32 |     container params.tools.popscle.container
33 |     publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink'
34 |     label 'compute_resources__cpu'
35 | 
36 |     input:
37 |         tuple val(sampleId),
38 |               path(bam),
39 |               path(barcodes)
40 |         file vcf
41 | 
42 |     output:
43 |         tuple val(sampleId), path("${sampleId}_filtered_possorted_genome_bam.bam")
44 | 
45 |     script:
46 |         """
47 |         filter_bam_file_for_popscle_dsc_pileup.sh \
48 |             ${bam} \
49 |             ${barcodes} \
50 |             ${vcf} \
51 |             ${sampleId}_filtered_possorted_genome_bam.bam \
52 |             ${toolParams?.barcode_tag ? toolParams.barcode_tag : ''}
53 |         """
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/src/popscle/workflows/dsc_pileup.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //////////////////////////////////////////////////////
 4 | //  Import sub-workflows from the modules:
 5 | 
 6 | include {
 7 |     SC__FILE_CONVERTER;
 8 | } from '../../utils/processes/utils.nf' params(params)
 9 | 
10 | include {
11 |     SC__POPSCLE__DSC_PILEUP;
12 |     SC__POPSCLE__PREFILTER_DSC_PILEUP;
13 | } from '../processes/dsc_pileup.nf' params(params)
14 | 
15 | 
16 | //////////////////////////////////////////////////////
17 | // Define the workflow
18 | 
19 | workflow DSC_PILEUP_FILTERED {
20 | 
21 |     take:
22 |         data
23 | 
24 |     main:
25 |         vcf = file(params.tools.popscle.vcf)
26 |         SC__POPSCLE__PREFILTER_DSC_PILEUP(data, vcf)
27 |         SC__POPSCLE__DSC_PILEUP(SC__POPSCLE__PREFILTER_DSC_PILEUP.out, vcf)
28 |     
29 |     emit:
30 |         SC__POPSCLE__DSC_PILEUP.out
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/src/pycistopic/.gitattributes:
--------------------------------------------------------------------------------
1 | notebooks/* linguist-vendored
2 | 


--------------------------------------------------------------------------------
/src/pycistopic/.gitignore:
--------------------------------------------------------------------------------
 1 | *checkpoint.ipynb
 2 | *checkpoint*
 3 | *checkpoint.py
 4 | *.test.ipynb
 5 | *.csv
 6 | *.loom
 7 | *.pickle
 8 | *.pyc
 9 | *.html
10 | *egg*
11 | .vscode
12 | .nextflow
13 | .nextflow*
14 | data
15 | refdata
16 | work
17 | out/notebooks
18 | src/scenic/out
19 | src/scenic/notebooks
20 | src/scenic/data
21 | refdata
22 | data/10x/tiny
23 | work/
24 | out/
25 | tests/
26 | debug/
27 | *.swp
28 | *.swo
29 | 


--------------------------------------------------------------------------------
/src/pycistopic/README.rst:
--------------------------------------------------------------------------------
1 | 
2 | pycisTopic module
3 | =================
4 | 
5 | This repository contains an implementation of pycisTopic for VIB-SingleCell-NF (VSN) pipelines.
6 | 
7 | 


--------------------------------------------------------------------------------
/src/pycistopic/bin/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/pycistopic/bin/.gitkeep


--------------------------------------------------------------------------------
/src/pycistopic/bin/biomart_annot.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import os
 5 | import pickle
 6 | import sys
 7 | 
 8 | import pybiomart as pbm
 9 | 
10 | 
11 | def main():
12 |     parser = argparse.ArgumentParser(description="Biomart gene annotation download.")
13 | 
14 |     parser.add_argument(
15 |         "--biomart_dataset_name",
16 |         type=str,
17 |         required=True,
18 |         help='Biomart dataset name, e.g. "hsapiens_gene_ensembl", '
19 |         '"mmusculus_gene_ensembl", "dmelanogaster_gene_ensembl", ... .',
20 |     )
21 |     parser.add_argument(
22 |         "--biomart_host",
23 |         type=str,
24 |         required=True,
25 |         help='Biomart host address, e.g. "http://www.ensembl.org", '
26 |         '"http://nov2020.archive.ensembl.org/", ... .',
27 |     )
28 | 
29 |     args = parser.parse_args()
30 | 
31 |     # Skip retrieving annotation from biomart, if it was already done.
32 |     if os.path.exists("biomart_annot.pickle"):
33 |         sys.exit(0)
34 | 
35 |     dataset = pbm.Dataset(name=args.biomart_dataset_name, host=args.biomart_host)
36 |     annot = dataset.query(
37 |         attributes=[
38 |             "chromosome_name",
39 |             "transcription_start_site",
40 |             "strand",
41 |             "external_gene_name",
42 |             "transcript_biotype",
43 |         ]
44 |     )
45 | 
46 |     # Rename columns.
47 |     annot.columns = ["Chromosome", "Start", "Strand", "Gene", "Transcript_type"]
48 | 
49 |     # Convert objects in chromosome column to strings.
50 |     annot["Chromosome"] = annot["Chromosome"].astype(str)
51 | 
52 |     # Only keep protein coding genes.
53 |     annot = annot[annot.Transcript_type == "protein_coding"]
54 | 
55 |     # Only keep genes on normal chromosomes: (1-99, X, Y, 2L, 2R, 2L, 3R).
56 |     filter_chroms = annot["Chromosome"].str.contains("^[0-9]{1,2}$|^[XY]$|^[23][LR]$")
57 |     annot = annot[(filter_chroms)]
58 | 
59 |     # Add "chr" to the beginning of the chromosome names to make them UCSC compatible.
60 |     annot["Chromosome"] = annot["Chromosome"].str.replace(r"(\b\S)", r"chr\1")
61 | 
62 |     with open("biomart_annot.pickle", "wb") as fh:
63 |         pickle.dump(annot, fh)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main()
68 | 


--------------------------------------------------------------------------------
/src/pycistopic/bin/plot_qc_stats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import pickle
 5 | 
 6 | from pycisTopic.qc import plot_sample_metrics
 7 | 
 8 | ################################################################################
 9 | 
10 | parser = argparse.ArgumentParser(description='Compute QC stats')
11 | 
12 | parser.add_argument(
13 |     "--sampleId",
14 |     type=str,
15 |     required=True,
16 |     help='Sample ID.'
17 | )
18 | parser.add_argument(
19 |     "--profile_data_pkl",
20 |     type=str,
21 |     help='Profile data, pickle format.'
22 | )
23 | parser.add_argument(
24 |     "--output_pdf",
25 |     type=str,
26 |     help='Output plots, pdf format.'
27 | )
28 | 
29 | args = parser.parse_args()
30 | 
31 | ################################################################################
32 | 
33 | # Load sample metrics
34 | infile = open(args.profile_data_pkl, 'rb')
35 | profile_data_dict = pickle.load(infile)
36 | infile.close()
37 | 
38 | 
39 | # plot:
40 | plot_sample_metrics(profile_data_dict,
41 |            insert_size_distriubtion_xlim=[0,600],
42 |            ncol=5,
43 |            cmap='viridis',
44 |            save=args.sampleId + "_qc_sample_metrics.pdf"
45 |            )
46 | 
47 | 


--------------------------------------------------------------------------------
/src/pycistopic/conf/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/pycistopic/conf/.gitkeep


--------------------------------------------------------------------------------
/src/pycistopic/conf/pycistopic_dmel.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         pycistopic {
 4 |             biomart_annot {
 5 |                 biomart_dataset_name = 'dmelanogaster_gene_ensembl'
 6 |                 biomart_host = 'http://www.ensembl.org'
 7 |             }
 8 |             macs2_call_peaks {
 9 |                 gsize = 'dm' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9'
10 |             }
11 |         }
12 |     }
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/src/pycistopic/conf/pycistopic_hg38.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         pycistopic {
 4 |             biomart_annot {
 5 |                 biomart_dataset_name = 'hsapiens_gene_ensembl'
 6 |                 biomart_host = 'http://www.ensembl.org'
 7 |             }
 8 |             macs2_call_peaks {
 9 |                 gsize = 'hs' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9'
10 |             }
11 |         }
12 |     }
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/src/pycistopic/conf/pycistopic_mm10.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         pycistopic {
 4 |             biomart_annot {
 5 |                 biomart_dataset_name = 'mmusculus_gene_ensembl'
 6 |                 biomart_host = 'http://nov2020.archive.ensembl.org/'
 7 |             }
 8 |             macs2_call_peaks {
 9 |                 gsize = 'mm' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9'
10 |             }
11 |         }
12 |     }
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/src/pycistopic/processes/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/pycistopic/processes/.gitkeep


--------------------------------------------------------------------------------
/src/pycistopic/processes/barcode_level_statistics.nf:
--------------------------------------------------------------------------------
 1 | nextflow.preview.dsl=2
 2 | 
 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : ""
 4 | 
 5 | toolParams = params.tools.pycistopic
 6 | //processParams = params.tools.pycistopic.barcode_level_statistics
 7 | 
 8 | process PYCISTOPIC__BARCODE_LEVEL_STATISTICS {
 9 | 
10 |     publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink'
11 |     container toolParams.container
12 |     label 'compute_resources__default','compute_resources__24hqueue'
13 | 
14 |     input:
15 |         tuple val(sampleId),
16 |               path(metadata),
17 |               path(metadata_pkl),
18 |               path(profile_data_pkl)
19 | 
20 |     output:
21 |         tuple val(sampleId),
22 |               path(selected_barcodes),
23 |               path(output_pdf_ff),
24 |               path(output_pdf_tf),
25 |               path(output_pdf_df)
26 | 
27 |     script:
28 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_level_statistics)
29 |         processParams = sampleParams.local
30 |         selected_barcodes = "${sampleId}__selected_barcodes.txt"
31 |         output_pdf_ff = "${sampleId}__FRIP-vs-nFrag.pdf"
32 |         output_pdf_tf = "${sampleId}__TSS-vs-nFrag.pdf"
33 |         output_pdf_df = "${sampleId}__duprate-vs-nFrag.pdf"
34 |         """
35 |         export NUMEXPR_MAX_THREADS=${task.cpus}
36 |         ${binDir}barcode_level_statistics.py \
37 |             --sampleId ${sampleId} \
38 |             --metadata_pkl ${metadata_pkl} \
39 |             --selected_barcodes ${selected_barcodes} \
40 |             ${processParams?.filter_frags_lower    ? '--filter_frags_lower '    + processParams?.filter_frags_lower    : ''} \
41 |             ${processParams?.filter_frags_upper    ? '--filter_frags_upper '    + processParams?.filter_frags_upper    : ''} \
42 |             ${processParams?.filter_tss_lower      ? '--filter_tss_lower '      + processParams?.filter_tss_lower      : ''} \
43 |             ${processParams?.filter_tss_upper      ? '--filter_tss_upper '      + processParams?.filter_tss_upper      : ''} \
44 |             ${processParams?.filter_frip_lower     ? '--filter_frip_lower '     + processParams?.filter_frip_lower     : ''} \
45 |             ${processParams?.filter_frip_upper     ? '--filter_frip_upper '     + processParams?.filter_frip_upper     : ''} \
46 |             ${processParams?.filter_dup_rate_lower ? '--filter_dup_rate_lower ' + processParams?.filter_dup_rate_lower : ''} \
47 |             ${processParams?.filter_dup_rate_upper ? '--filter_dup_rate_upper ' + processParams?.filter_dup_rate_upper : ''}
48 |         """
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/src/pycistopic/processes/biomart_annot.nf:
--------------------------------------------------------------------------------
 1 | nextflow.preview.dsl=2
 2 | 
 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : ""
 4 | 
 5 | toolParams = params.tools.pycistopic
 6 | processParams = params.tools.pycistopic.biomart_annot
 7 | 
 8 | process PYCISTOPIC__BIOMART_ANNOT {
 9 | 
10 |     publishDir "${params.global.outdir}/intermediate/pycistopic/biomart/", mode: 'symlink'
11 |     container toolParams.container
12 |     label 'compute_resources__default'
13 | 
14 |     output:
15 |         path("biomart_annot.pickle")
16 | 
17 |     script:
18 |         """
19 |         ${binDir}biomart_annot.py \
20 |             --biomart_dataset_name ${processParams.biomart_dataset_name} \
21 |             --biomart_host ${processParams.biomart_host}
22 |         """
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/src/pycistopic/processes/compute_qc_stats.nf:
--------------------------------------------------------------------------------
 1 | nextflow.preview.dsl=2
 2 | 
 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : ""
 4 | 
 5 | toolParams = params.tools.pycistopic
 6 | processParams = params.tools.pycistopic.compute_qc_stats
 7 | 
 8 | process rename_fragments {
 9 | 
10 |     container toolParams.container
11 |     label 'compute_resources__minimal'
12 | 
13 |     input:
14 |         tuple val(sampleId),
15 |               path(f)
16 |     output:
17 |         tuple val(sampleId),
18 |               path("${sampleId}_${f}*")
19 | 
20 |     script:
21 |         """
22 |         ln -s ${f[0]} ${sampleId}_${f[0]}
23 |         ln -s ${f[1]} ${sampleId}_${f[1]}
24 |         """
25 | 
26 | }
27 | 
28 | 
29 | process PYCISTOPIC__COMPUTE_QC_STATS {
30 | 
31 |     publishDir "${params.global.outdir}/data/pycistopic/qc/", mode: params.utils.publish.mode
32 |     container toolParams.container
33 |     label 'compute_resources__pycisTopic'
34 | 
35 |     input:
36 |         val(input)
37 |         path(biomart_annot)
38 |         path(fragments)
39 |         path(peaks)
40 | 
41 |     output:
42 |         tuple path('metadata/*.metadata.pkl'),
43 |               path('profile_data/*.profile_data.pkl')
44 | 
45 |     script:
46 |         """
47 |         export NUMEXPR_MAX_THREADS=1
48 |         export OMP_NUM_THREADS=1
49 |         ${binDir}compute_qc_stats.py \
50 |             ${"--input_files "+input.join(" --input_files ")} \
51 |             --n_frag ${processParams.n_frag} \
52 |             --tss_flank_window ${processParams.tss_flank_window} \
53 |             --tss_window ${processParams.tss_window} \
54 |             --tss_minimum_signal_window ${processParams.tss_minimum_signal_window} \
55 |             --tss_rolling_window ${processParams.tss_rolling_window} \
56 |             --min_norm ${processParams.min_norm} \
57 |             --threads ${task.cpus} \
58 |             --biomart_annot_pkl ${biomart_annot} \
59 |             --output_metadata_dir metadata \
60 |             --output_profile_data_dir profile_data
61 |         """
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/src/pycistopic/processes/macs2_call_peaks.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | toolParams = params.tools.pycistopic
 6 | processParams = params.tools.pycistopic.macs2_call_peaks
 7 | 
 8 | process PYCISTOPIC__MACS2_CALL_PEAKS {
 9 | 
10 |     container toolParams.container
11 |     label 'compute_resources__default','compute_resources__24hqueue'
12 | 
13 |     input:
14 |         tuple val(sampleId),
15 |               path(bam),
16 |               path(bam_index)
17 | 
18 |     output:
19 |         tuple val(sampleId),
20 |               path("${sampleId}_peaks.narrowPeak"),
21 |               path("${sampleId}_summits.bed")
22 | 
23 |     script:
24 |         //def sampleParams = params.parseConfig(sampleId, params.global, toolParams)
25 |         """
26 |         macs2 callpeak \
27 |             --treatment ${bam} \
28 |             --name ${sampleId} \
29 |             --outdir . \
30 |             --format BAMPE \
31 |             --gsize ${processParams.gsize} \
32 |             --qvalue ${processParams.qvalue} \
33 |             --nomodel \
34 |             --shift ${processParams.shift} \
35 |             --extsize ${processParams.extsize} \
36 |             --keep-dup ${processParams.keepdup} \
37 |             --call-summits \
38 |             --nolambda
39 |         """
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/src/pycistopic/processes/plot_qc_stats.nf:
--------------------------------------------------------------------------------
 1 | nextflow.preview.dsl=2
 2 | 
 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : ""
 4 | 
 5 | toolParams = params.tools.pycistopic
 6 | processParams = params.tools.pycistopic.compute_qc_stats
 7 | 
 8 | process PYCISTOPIC__PLOT_QC_STATS {
 9 | 
10 |     container toolParams.container
11 |     label 'compute_resources__default'
12 | 
13 |     input:
14 |         tuple val(sampleId),
15 |               path(output_metadata),
16 |               path(output_metadata_pkl),
17 |               path(output_profile_data_pkl)
18 | 
19 |     output:
20 |         tuple val(sampleId),
21 |               path(output_pdf)
22 | 
23 |     script:
24 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams)
25 |         output_metadata = "${sampleId}_metadata.tsv.gz"
26 |         output_pdf = "${sampleId}_qc_sample_metrics.pdf"
27 |         output_metadata_pkl = "${sampleId}_metadata.pickle"
28 |         output_profile_data_pkl = "${sampleId}_profile_data.pickle"
29 |         """
30 |         ${binDir}plot_qc_stats.py \
31 |             --sampleId ${sampleId} \
32 |             --profile_data_pkl ${output_profile_data_pkl} \
33 |             --output_pdf ${output_pdf}
34 |         """
35 | }
36 | 
37 | 


--------------------------------------------------------------------------------
/src/pycistopic/pycistopic.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         pycistopic {
 4 |             container = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/pycistopic.sif'
 5 |             biomart_annot {
 6 |                 biomart_dataset_name = 'hsapiens_gene_ensembl'
 7 |                 biomart_host = 'http://www.ensembl.org'
 8 |             }
 9 |             macs2_call_peaks {
10 |                 gsize = 'hs' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9'
11 |                 qvalue = 0.01
12 |                 extsize = 146
13 |                 shift = 73
14 |                 keepdup = 'all'
15 |             }
16 |             compute_qc_stats {
17 |                 n_frag = 100
18 |                 tss_flank_window = 2000
19 |                 tss_window = 50
20 |                 tss_minimum_signal_window = 100
21 |                 tss_rolling_window = 10
22 |                 min_norm = 0.1
23 |             }
24 |             call_cells {
25 |                 report_ipynb = '/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb'
26 |                 use_density_coloring_on_scatterplot = true
27 |                 use_detailed_title_on_scatterplot = true
28 |                 filter_frags_lower = '1000'
29 |                 filter_frags_upper = ''
30 |                 filter_tss_lower = '8'
31 |                 filter_tss_upper = ''
32 |                 filter_frip_lower = ''
33 |                 filter_frip_upper = ''
34 |                 filter_dup_rate_lower = ''
35 |                 filter_dup_rate_upper = ''
36 |             }
37 |         }
38 |     }
39 | }
40 | 
41 | // define computing resources via process labels
42 | process {
43 |     withLabel: 'compute_resources__pycisTopic' {
44 |         executor = 'local' // or 'pbs'
45 |         cpus = 8
46 |         memory = '120 GB'
47 |         time = '24h'
48 |     }
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/src/pycistopic/workflows/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/pycistopic/workflows/.gitkeep


--------------------------------------------------------------------------------
/src/samtools/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM vibsinglecellnf/samtools:base-0.3
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | RUN BUILDPKGS="git \
 5 |         autoconf \
 6 |         automake \
 7 |         perl \
 8 |         libbz2-dev \
 9 |         liblzma-dev \
10 |         libcurl4-openssl-dev \
11 |         libssl-dev \
12 |         bedtools \
13 |         libncurses5-dev" && \
14 |     apt-get update && \
15 |     apt-get upgrade -y --no-install-recommends && \
16 |     apt-get install -y --no-install-recommends $BUILDPKGS
17 | 
18 | # install htslib
19 | ENV HTSLIB_VERSION 1.15.1
20 | RUN curl -L -o /tmp/htslib-${HTSLIB_VERSION}.tar.bz2 \
21 |         https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 && \
22 |     mkdir -p /tmp/htslib-${HTSLIB_VERSION} && \
23 |     tar jxvf /tmp/htslib-${HTSLIB_VERSION}.tar.bz2 -C /tmp/htslib-${HTSLIB_VERSION} --strip-components 1 && \
24 |     cd /tmp/htslib-${HTSLIB_VERSION} && \
25 |     ./configure \
26 |         CFLAGS="-fPIC" && \
27 |     make && \
28 |     make install && \
29 |     cd .. && rm -rf htslib-${HTSLIB_VERSION}*
30 | 
31 | # install samtools
32 | ENV SAMTOOLS_VERSION 1.15.1
33 | RUN curl -L -o /tmp/samtools-${SAMTOOLS_VERSION}.tar.bz2 \
34 |         https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 && \
35 |     mkdir -p /tmp/samtools-${SAMTOOLS_VERSION} && \
36 |     tar jxvf /tmp/samtools-${SAMTOOLS_VERSION}.tar.bz2 -C /tmp/samtools-${SAMTOOLS_VERSION} --strip-components 1 && \
37 |     cd /tmp/samtools-${SAMTOOLS_VERSION} && \
38 |     ./configure \
39 |         --with-htslib=system && \
40 |     make && \
41 |     make install && \
42 |     cd .. && rm -rf samtools-${SAMTOOLS_VERSION}*
43 | 
44 | RUN rm -rf /var/cache/apt/* && \
45 |     rm -rf /var/lib/apt/lists/* && \
46 |     ldconfig
47 | 
48 | 


--------------------------------------------------------------------------------
/src/samtools/Dockerfile.samtools-base:
--------------------------------------------------------------------------------
 1 | FROM debian:bullseye-slim
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | RUN apt-get update && \
 5 |     apt-get upgrade -y --no-install-recommends && \
 6 |     apt-get install -y --reinstall ca-certificates && \
 7 |     apt-get install -y --no-install-recommends \
 8 |         cmake \
 9 |         make \
10 |         mawk \
11 |         gcc \
12 |         libbz2-dev \
13 |         liblzma-dev \
14 |         libdeflate-dev \
15 |         bzip2 \
16 |         pigz \
17 |         curl \
18 |         procps \
19 |         less
20 | 
21 | # zlib-ng
22 | ENV ZLIBNG 2.0.6
23 | RUN curl -L -o /tmp/zlib-ng-${ZLIBNG}.tar.gz https://github.com/zlib-ng/zlib-ng/archive/refs/tags/${ZLIBNG}.tar.gz && \
24 |     cd /tmp && tar xvf zlib-ng-${ZLIBNG}.tar.gz && \
25 |     cd zlib-ng-${ZLIBNG} && \
26 |     cmake -DZLIB_COMPAT=ON -DINSTALL_UTILS=ON . && \
27 |     cmake --build . --config Release && \
28 |     ctest --verbose -C Release && \
29 |     cmake --build . --target install && \
30 |     cd .. && rm -r zlib-ng-${ZLIBNG}*
31 | 
32 | RUN rm -rf /var/cache/apt/* && \
33 |     rm -rf /var/lib/apt/lists/* && \
34 |     ldconfig
35 | 
36 | 


--------------------------------------------------------------------------------
/src/samtools/README.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Samtools Docker images
 3 | ======================
 4 | 
 5 | This directory contains Dockerfiles for base images used here and for other images in the VSN Pipelines repository.
 6 | 
 7 | 
 8 | To build the Base image
 9 | -----------------------
10 | 
11 | This base image is based on ``debian:buster-slim`` and has a compiled verison of 
12 | `zlib-ng <https://github.com/zlib-ng/zlib-ng>`_ for faster compression and decompression.
13 | 
14 | Image tag format: simple version numbers (0.1, 0.2, ...).
15 | 
16 | .. code:: bash
17 | 
18 |     docker build -t vibsinglecellnf/samtools:base-0.3 . -f Dockerfile.samtools-base
19 |     podman build -t vibsinglecellnf/samtools:base-0.3 . -f Dockerfile.samtools-base
20 | 
21 | This base image is used in several other images within VSN::
22 |     
23 | - samtools [this directory]
24 | 
25 | 
26 | To build the Samtools image
27 | ---------------------------
28 | 
29 | This uses the base image above and adds Samtools and HTSlib
30 | 
31 | Image tag format: ``<base image version>-<samtools release version>``.
32 | 
33 | .. code:: bash
34 | 
35 |     docker build -t vibsinglecellnf/samtools:0.3-1.15.1 .
36 |     podman build -t vibsinglecellnf/samtools:0.3-1.15.1 .
37 | 
38 | This samtools image is used in several other images within VSN::
39 |     
40 | - singlecelltoolkit
41 | - bwamaptools
42 | - popscle
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/src/samtools/processes/merge_bam.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | toolParams = params.tools.samtools
 6 | 
 7 | process SAMTOOLS__MERGE_BAM {
 8 |     container toolParams.container
 9 |     label 'compute_resources__samtools__merge_bam'
10 | 
11 |     input:
12 |         tuple val(sampleId),
13 |               path(bams)
14 | 
15 |     output:
16 |         tuple val(sampleId),
17 |               path("${sampleId}.bwa.out.fixmate.possorted.merged.bam"),
18 |               path("${sampleId}.bwa.out.fixmate.possorted.merged.bam.bai")
19 | 
20 |     script:
21 |         //def sampleParams = params.parseConfig(sampleId, params.global)
22 |         //processParams = sampleParams.local
23 |         """
24 |         set -euo pipefail
25 | 
26 |         samtools merge \
27 |             -@ 4 \
28 |             -O bam \
29 |             --write-index \
30 |             -o '${sampleId}.bwa.out.fixmate.possorted.merged.bam##idx##${sampleId}.bwa.out.fixmate.possorted.merged.bam.bai' \
31 |             ${"'" + bams.join("' '") + "'"}
32 |         """
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/src/samtools/processes/sort_bam.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | 
 6 | process SAMTOOLS__SORT_BAM {
 7 |     label 'compute_resources__samtools__sort_bam'
 8 | 
 9 |     input:
10 |         tuple val(sampleId),
11 |               path(bam)
12 | 
13 |     output:
14 |         tuple val(sampleId),
15 |               path("${sampleId}.bwa.out.fixmate.possorted.bam"),
16 |               path("${sampleId}.bwa.out.fixmate.possorted.bai")
17 | 
18 |     script:
19 |         def sampleParams = params.parseConfig(sampleId, params.global)
20 |         processParams = sampleParams.local
21 |         """
22 |         set -euo pipefail
23 |         samtools sort \
24 |             -o ${sampleID}.bwa.out.fixmate.possorted.bam
25 |             -@ 4 \
26 |             ${bam}
27 |         samtools index \
28 |             -@ 4 \
29 |             ${bam}
30 |         """
31 | }
32 | 


--------------------------------------------------------------------------------
/src/samtools/samtools.config:
--------------------------------------------------------------------------------
1 | params {
2 |     tools {
3 |         samtools {
4 |             container = 'vibsinglecellnf/samtools:0.3-1.16.1'
5 |         }
6 |     }
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/.gitattributes:
--------------------------------------------------------------------------------
1 | notebooks/* linguist-vendored
2 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/.gitignore:
--------------------------------------------------------------------------------
 1 | *checkpoint.ipynb
 2 | *checkpoint*
 3 | *checkpoint.py
 4 | *.test.ipynb
 5 | *.csv
 6 | *.loom
 7 | *.pickle
 8 | *.pyc
 9 | *.html
10 | *egg*
11 | .vscode
12 | .nextflow
13 | .nextflow*
14 | data
15 | refdata
16 | work
17 | out/notebooks
18 | src/scenic/out
19 | src/scenic/notebooks
20 | src/scenic/data
21 | refdata
22 | data/10x/tiny
23 | work/
24 | out/
25 | tests/
26 | debug/
27 | *.swp
28 | *.swo
29 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM vibsinglecellnf/samtools:0.3-1.15.1
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | RUN apt-get update && \
 5 |     apt-get install -y --no-install-recommends \
 6 |         python3 \
 7 |         python3-venv \
 8 |         nasm \
 9 |         libtool \
10 |         wget && \
11 |         update-alternatives --install /usr/bin/python python /usr/bin/python3.9 100
12 | 
13 | # install igzip (https://github.com/intel/isa-l)
14 | RUN git clone --depth=1 https://github.com/intel/isa-l.git /tmp/isa-l && \
15 |     cd /tmp/isa-l && \
16 |     ./autogen.sh && \
17 |     ./configure && \
18 |     make && \
19 |     make install && \
20 |     cd .. && \
21 |     rm -r isa-l
22 | 
23 | 
24 | RUN python -m venv /opt/venv
25 | # Make sure we use the virtualenv:
26 | ENV PATH="/opt/venv/bin:$PATH"
27 | 
28 | ENV POLARS_MAX_THREADS=8
29 | RUN pip install --no-cache-dir --upgrade pip wheel && \
30 |     pip install --no-cache-dir \
31 |         pandas \
32 |         scipy \
33 |         uncertainties \
34 |         typing \
35 |         pathlib \
36 |         matplotlib \
37 |         numpy && \
38 |     wget https://temp.aertslab.org/.barcard/polars-0.13.51-cp37-abi3-manylinux_2_27_x86_64.whl && \
39 |     pip install polars-0.13.51-cp37-abi3-manylinux_2_27_x86_64.whl && \
40 |     rm polars-0.13.51-cp37-abi3-manylinux_2_27_x86_64.whl
41 | 
42 | #        polars>=0.13.52 \
43 | 
44 | RUN cd /opt/venv/bin && \
45 |     wget https://temp.aertslab.org/.barcard/create_fragments_file && \
46 |     wget https://temp.aertslab.org/.barcard/coreutils && \
47 |     chmod a+x create_fragments_file coreutils
48 | 
49 | # install seq (https://github.com/seq-lang/seq/):
50 | ENV SEQ_VERSION=0.11.0
51 | RUN mkdir -p /opt/seq && \
52 |     wget https://github.com/seq-lang/seq/releases/download/v${SEQ_VERSION}/seq-linux-x86_64.tar.gz && \
53 |     tar xzf seq-linux-x86_64.tar.gz --strip-components 1 -C /opt/seq && \
54 |     rm seq-linux-x86_64.tar.gz
55 | ENV PATH="/opt/seq/bin:${PATH}"
56 | ENV OMP_NUM_THREADS=4
57 | ENV SEQ_PYTHON=/usr/lib/x86_64-linux-gnu/libpython3.9.so.1
58 | 
59 | # install single_cell_toolkit
60 | # https://github.com/aertslab/single_cell_toolkit
61 | RUN git clone --depth=1 https://github.com/aertslab/single_cell_toolkit.git /opt/single_cell_toolkit
62 | ENV seq_root_dir=/opt/seq
63 | ENV PATH="/opt/single_cell_toolkit:/opt/single_cell_toolkit/barcard:${PATH}"
64 | 
65 | RUN rm -rf /var/cache/apt/* && \
66 |     rm -rf /var/lib/apt/lists/* && \
67 |     ldconfig
68 | 
69 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/README.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | single_cell_toolkit template
 3 | ============================
 4 | 
 5 | This repository contains an implementation of single_cell_toolkit for VIB-SingleCell-NF (VSN) pipelines.
 6 | See `aertslab/single_cell_toolkit <https://github.com/aertslab/single_cell_toolkit>`_ for the original source.
 7 | 
 8 | To build the Docker image
 9 | -------------------------
10 | 
11 | Image tag format: ``<date of latest git commit>-<short hash of latest git commit>``.
12 | 
13 | .. code:: bash
14 | 
15 |     docker build -t vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d .
16 |     podman build -t vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d .
17 | 
18 | This image uses the ``vibsinglecellnf/samtools`` image as a base.
19 | 
20 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/bin/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/singlecelltoolkit/bin/.gitkeep


--------------------------------------------------------------------------------
/src/singlecelltoolkit/conf/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/singlecelltoolkit/conf/.gitkeep


--------------------------------------------------------------------------------
/src/singlecelltoolkit/conf/sctk_mapping.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         singlecelltoolkit {
 4 |             barcode_correction {
 5 |                 max_mismatches = 1
 6 |                 min_frac_bcs_to_find = 0.5
 7 |                 whitelist {
 8 |                     atac = 'PUMATAC_dependencies/whitelists/737K-cratac-v1.txt.gz'
 9 |                     atac_revcomp = 'PUMATAC_dependencies/whitelists/737K-cratac-v1.REVCOMP.txt.gz'
10 |                     multiome = 'PUMATAC_dependencies/whitelists/737K-arc-v1.txt.gz'
11 |                     multiome_revcomp = 'PUMATAC_dependencies/whitelists/737K-arc-v1.REVCOMP.txt.gz'
12 |                     hydrop_2x384 = 'PUMATAC_dependencies/whitelists/hydrop_384x384.REVCOMP.txt.gz'
13 |                     hydrop_3x96_short = 'PUMATAC_dependencies/whitelists/20230120_hydrop-atac_ligation_all_revcomp.txt.gz'
14 |                     s3_atac_1 = 'PUMATAC_dependencies/whitelists/s3_atac_1.txt.gz'
15 |                 }
16 |             }
17 |             barcode_10x_scatac_fastqs {
18 |                 uncorrected_bc_tag = 'CR'
19 |                 barcode_quality_tag = 'CY'
20 |             }
21 |         }
22 |     }
23 | }
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/conf/sctk_saturation.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         singlecelltoolkit {
 4 |             saturation {
 5 |                 percentages = '0.3,0.6,0.9'
 6 |                 sampling_fractions = '0.0,0.1,0.2,0.3,0.4,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.92,0.94,0.96,0.98,1.0'
 7 |                 min_frags_per_cb = 200
 8 |                 skip = true
 9 |             }
10 |         }
11 |     }
12 | }
13 | 
14 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/main.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //////////////////////////////////////////////////////
 4 | // process imports:
 5 | include { SCTK__BARCODE_CORRECTION; } from './processes/barcode_correction.nf'
 6 | include { SCTK__BARCODE_10X_SCATAC_FASTQ; } from './processes/barcode_10x_scatac_fastqs.nf'
 7 | include { SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE; } from './processes/extract_and_correct_biorad_barcode.nf'
 8 | 
 9 | include {
10 |     SIMPLE_PUBLISH as PUBLISH_BC_STATS;
11 |     SIMPLE_PUBLISH as PUBLISH_BR_BC_STATS;
12 | } from '../../src/utils/processes/utils.nf'
13 | 
14 | //////////////////////////////////////////////////////
15 | //  Define the workflow
16 | 
17 | 
18 | /* Barcode correction */
19 | workflow barcode_correction {
20 |     take:
21 |         data
22 | 
23 |     main:
24 | 
25 |         // gather barcode whitelists from params into a channel:
26 |         wl = Channel.empty()
27 |         wl_cnt = 0
28 |         params.tools.singlecelltoolkit.barcode_correction.whitelist.each { k, v ->
29 |             if(v != '') {
30 |                 wl = wl.mix( Channel.of(tuple(k, file(v)) ))
31 |                 wl_cnt = wl_cnt + 1
32 |             }
33 |         }
34 | 
35 |         /* TO DO: fix ability to skip barcode correction */
36 |         if(wl_cnt == 0) {
37 |             if(!params.containsKey('quiet')) {
38 |                 println("No whitelist files were found in 'params.tools.singlecelltoolkit.barcode_correction.whitelist'. Skipping barcode correction for standard-type samples.")
39 |             }
40 |             // run barcode demultiplexing on each read+barcode:
41 |             fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ(data)
42 |         } else {
43 |             // join wl to the data channel:
44 |             data_wl = wl.cross( data.map { it -> tuple(it[1], it[0], it[2], it[3], it[4]) } ) // technology, sampleId, R1, R2, R3
45 |                         .map { it -> tuple(it[1][1], it[1][0],           // sampleId, technology
46 |                                            it[1][2], it[1][3], it[1][4], // R1, R2, R3
47 |                                            it[0][1]                      // whitelist
48 |                                            ) }
49 | 
50 |             // run barcode correction against a whitelist:
51 |             fastq_bc_corrected = SCTK__BARCODE_CORRECTION(data_wl)
52 |             PUBLISH_BC_STATS(fastq_bc_corrected.map { it -> tuple(it[0], it[5]) }, '.corrected.bc_stats.log', 'reports/barcode')
53 | 
54 |             // run barcode demultiplexing on each read+barcode:
55 |             fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ(
56 |                 fastq_bc_corrected.map { it -> tuple(*it[0..4]) }
57 |             )
58 |         }
59 | 
60 |     emit:
61 |         fastq_dex
62 | }
63 | 
64 | 
65 | workflow biorad_bc {
66 | 
67 |     take:
68 |         data_biorad
69 | 
70 |     main:
71 | 
72 |         /* run BioRad barcode correction and debarcoding separately: */
73 |         // using singlecelltoolkit:
74 |         fastq_dex_br = SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data_biorad.map{ it -> tuple(it[0], it[1], it[2], it[4]) })
75 |         PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, '.corrected.bc_stats.log', 'reports/barcode')
76 | 
77 |     emit:
78 |         fastq_dex_br.map { it -> tuple(*it[0..2]) }
79 | 
80 | }
81 | 
82 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : ""
 4 | 
 5 | toolParams = params.tools.singlecelltoolkit
 6 | 
 7 | process SCTK__BARCODE_10X_SCATAC_FASTQ {
 8 | 
 9 |     container toolParams.container
10 |     label 'compute_resources__sctk__barcode_10x_scatac_fastq_5cpus'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               val(technology),
15 |               path(fastq_PE1),
16 |               path(fastq_bc),
17 |               path(fastq_PE2)
18 | 
19 |     output:
20 |         tuple val(sampleId),
21 |               path("${sampleId}_dex_R1.fastq.gz"),
22 |               path("${sampleId}_dex_R2.fastq.gz")
23 | 
24 |     script:
25 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_10x_scatac_fastqs)
26 |         processParams = sampleParams.local
27 |         def max_threads = (task.cpus > 5) ? 5 : task.cpus
28 |         """
29 |         export compress_fastq_threads="${max_threads}"
30 |         barcode_10x_scatac_fastqs.sh \
31 |             ${fastq_PE1} \
32 |             ${fastq_bc} \
33 |             ${fastq_PE2} \
34 |             ${sampleId}_dex \
35 |             false \
36 |             true \
37 |             ${processParams.uncorrected_bc_tag}_${processParams.barcode_quality_tag}
38 |         """
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/processes/barcode_correction.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : ""
 4 | 
 5 | toolParams = params.tools.singlecelltoolkit
 6 | 
 7 | process SCTK__BARCODE_CORRECTION {
 8 | 
 9 |     container toolParams.container
10 |     label 'compute_resources__sctk_barcode'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               val(technology),
15 |               path(fastq_PE1),
16 |               path(fastq_bc),
17 |               path(fastq_PE2),
18 |               path(bc_whitelist)
19 | 
20 |     output:
21 |         tuple val(sampleId),
22 |               val(technology),
23 |               path(fastq_PE1),
24 |               path("${sampleId}_bc_corrected.fastq.gz"),
25 |               path(fastq_PE2),
26 |               path("${sampleId}_bc_corrected.fastq.gz.corrected.bc_stats.tsv")
27 | 
28 |     script:
29 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_correction)
30 |         processParams = sampleParams.local
31 |         """
32 |         correct_barcode_in_fastq.sh \
33 |             ${bc_whitelist} \
34 |             ${fastq_bc} \
35 |             ${sampleId}_bc_corrected.fastq.gz \
36 |             ${processParams.max_mismatches} \
37 |             ${processParams.min_frac_bcs_to_find}
38 |         """
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : ""
 4 | 
 5 | toolParams = params.tools.singlecelltoolkit
 6 | 
 7 | process SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE {
 8 | 
 9 |     container toolParams.container
10 |     label 'compute_resources__sctk_barcode'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               val(technology),
15 |               path(fastq_PE1),
16 |               path(fastq_PE2)
17 | 
18 |     output:
19 |         tuple val(sampleId),
20 |               path("${sampleId}_dex_R1.fastq.gz"),
21 |               path("${sampleId}_dex_R2.fastq.gz"),
22 |               path("${sampleId}_dex.corrected_bc_stats.tsv")
23 | 
24 |     script:
25 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams)
26 |         //processParams = sampleParams.local
27 |         """
28 |         extract_and_correct_biorad_barcode_in_fastq.sh \
29 |             ${fastq_PE1} \
30 |             ${fastq_PE2} \
31 |             ${sampleId}_dex
32 |         """
33 | }
34 | 
35 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : ""
 4 | 
 5 | toolParams = params.tools.singlecelltoolkit
 6 | 
 7 | process SCTK__EXTRACT_HYDROP_ATAC_BARCODE {
 8 | 
 9 |     container "vibsinglecellnf/singlecelltoolkit:2024-04-09-62429e9"
10 |     label 'compute_resources__default'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               val(technology),
15 |               path(fastq_PE1),
16 |               path(fastq_bc),
17 |               path(fastq_PE2)
18 |         val(hydrop_atac_barcode_design)
19 | 
20 |     output:
21 |         tuple val(sampleId),
22 |               val(technology),
23 |               path(fastq_PE1),
24 |               path("${sampleId}_hydrop_barcode_R2.fastq.gz"),
25 |               path(fastq_PE2)
26 | 
27 |     script:
28 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams)
29 |         //processParams = sampleParams.local
30 |         """
31 |         extract_hydrop_atac_barcode_from_R2_fastq.sh \
32 |             ${fastq_bc} \
33 |             ${sampleId}_hydrop_barcode_R2.fastq.gz \
34 |             ${hydrop_atac_barcode_design} \
35 |             pigz
36 |         """
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | if(!params.containsKey("test")) {
 4 |     binDir = "${workflow.projectDir}/src/sratoolkit/bin/"
 5 | } else {
 6 |     binDir = ""
 7 | }
 8 | 
 9 | process FIX_AND_COMPRESS_SRA_FASTQS {
10 | 
11 |     container "vibsinglecellnf/singlecelltoolkit:2022-04-15-16314db"
12 |     publishDir "${params.global.outdir}/data/raw/fastqs_fixed_and_compressed", mode: 'symlink', overwrite: true
13 |     label 'compute_resources__cpu'
14 | 
15 |     input:
16 |         tuple val(sraId), file("${sraId}_*.fastq")
17 |     
18 |     output:
19 |         tuple val(sraId), file("${sraId}_*.fastq.gz")
20 |     
21 |     script:
22 |         """
23 |         # Fixing the FASTQ files is required for future pre-processing (e.g.: scATAC-seq pipelines) because fasterq-dump does not have the -F option as fastq-dump do to keep original sequence names.
24 |         # Fix the FASTQ files and compress them
25 |         export compress_fastq_threads="${task.cpus}"
26 |         NUM_FASTQ_FILES=\$(ls ./*.fastq | wc -l)
27 |         echo "Fixing and compressing \${NUM_FASTQ_FILES} FASTQ files in parallel with \${compress_fastq_threads} compression threads for each task..."
28 |         echo *.fastq | tr ' ' '\n' | xargs -P "\${NUM_FASTQ_FILES}" -n 1 -I {} fix_sra_fastq.sh "{}" "{}.gz" pigz
29 |         echo "Removing all uncompressed FASTQ files"
30 |         for FASTQ in *.fastq; do
31 |            echo "Removing uncompressed FASTQ file \${FASTQ}..."
32 |            rm "\$(readlink -f \${FASTQ})"
33 |         done
34 |         echo "Done."
35 |         """
36 | 
37 | }
38 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/processes/saturation.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : ""
 4 | 
 5 | toolParams = params.tools.singlecelltoolkit
 6 | 
 7 | process SCTK__SATURATION {
 8 | 
 9 |     container toolParams.container
10 |     label 'compute_resources__default','compute_resources__24hqueue'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               path(fragments),
15 |               path(fragments_index)
16 |         file(bc_whitelists)
17 |         val(optional)
18 | 
19 |     output:
20 |         tuple val(sampleId),
21 |               path("${sampleId}.sampling_stats.tsv"),
22 |               path("${sampleId}.saturation.png")
23 | 
24 |     script:
25 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams)
26 |         processParams = sampleParams.local
27 |         def bc_wl_param = optional == 'RUN' ? '-w selected_barcodes/' + sampleId + '.cell_barcodes.txt' : ''
28 |         def polars_max_threads = (task.cpus > 6) ? 6 : task.cpus
29 |         """
30 |         # Max threads polars is allowed to use (else will uses all cores).
31 |         export POLARS_MAX_THREADS=${polars_max_threads};
32 |         # Max threads pyarrow is allowed to use (else will uses all cores) (used to read the fragments file in the beginning).
33 |         export OMP_NUM_THREADS=${polars_max_threads};
34 |         calculate_saturation_from_fragments.py \
35 |             -i ${fragments} \
36 |             -o ${sampleId} \
37 |             -p ${toolParams.saturation.percentages} \
38 |             -m ${toolParams.saturation.min_frags_per_cb} \
39 |             -s ${toolParams.saturation.sampling_fractions} \
40 |             ${bc_wl_param}
41 |         """
42 | }
43 | 
44 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/singlecelltoolkit.config:
--------------------------------------------------------------------------------
1 | params {
2 |     tools {
3 |         singlecelltoolkit {
4 |             container = 'vibsinglecellnf/singlecelltoolkit:2022-04-15-16314db'
5 |         }
6 |     }
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/src/singlecelltoolkit/workflows/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/singlecelltoolkit/workflows/.gitkeep


--------------------------------------------------------------------------------
/src/trimgalore/.gitattributes:
--------------------------------------------------------------------------------
1 | notebooks/* linguist-vendored
2 | 


--------------------------------------------------------------------------------
/src/trimgalore/.gitignore:
--------------------------------------------------------------------------------
 1 | *checkpoint.ipynb
 2 | *checkpoint*
 3 | *checkpoint.py
 4 | *.test.ipynb
 5 | *.csv
 6 | *.loom
 7 | *.pickle
 8 | *.pyc
 9 | *.html
10 | *egg*
11 | .vscode
12 | .nextflow
13 | .nextflow*
14 | data
15 | refdata
16 | work
17 | out/notebooks
18 | src/scenic/out
19 | src/scenic/notebooks
20 | src/scenic/data
21 | refdata
22 | data/10x/tiny
23 | work/
24 | out/
25 | tests/
26 | debug/
27 | *.swp
28 | *.swo
29 | 


--------------------------------------------------------------------------------
/src/trimgalore/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.7-slim
 2 | 
 3 | ENV DEBIAN_FRONTEND=noninteractive
 4 | RUN BUILDPKGS="build-essential zlib1g-dev git curl" && \
 5 |     apt-get update && \
 6 |     apt-get install -y --no-install-recommends apt-utils debconf locales && dpkg-reconfigure locales && \
 7 |     apt-get install -y --no-install-recommends $BUILDPKGS
 8 | 
 9 | RUN pip install -U pip
10 | 
11 | ##################################################
12 | # cutadapt
13 | RUN pip install cutadapt
14 | 
15 | ##################################################
16 | # fastQC
17 | # RUN wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.9.zip
18 | 
19 | ##################################################
20 | # trim galore
21 | RUN curl -fsSL https://github.com/FelixKrueger/TrimGalore/archive/0.6.6.tar.gz -o trim_galore.tar.gz && \
22 |     tar xvzf trim_galore.tar.gz && \
23 |     mv TrimGalore-0.6.6/trim_galore /usr/bin/ && \
24 |     rm -r TrimGalore-0.6.6
25 | 
26 | 
27 | RUN apt-get -y update && \
28 |     apt-get -y --no-install-recommends install \
29 |         # Need to run ps
30 |         procps \
31 |         pigz \
32 |         less && \
33 |     rm -rf /var/cache/apt/* && \
34 |     rm -rf /var/lib/apt/lists/*
35 | 
36 | 


--------------------------------------------------------------------------------
/src/trimgalore/README.rst:
--------------------------------------------------------------------------------
1 | 
2 | Trim Galore module
3 | ==================
4 | 
5 | This repository contains an implementation of Trim Galore for VIB-SingleCell-NF (VSN) pipelines.
6 | See `here <https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/>`_ for the original source.
7 | 
8 | 


--------------------------------------------------------------------------------
/src/trimgalore/bin/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/trimgalore/bin/.gitkeep


--------------------------------------------------------------------------------
/src/trimgalore/conf/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/trimgalore/conf/.gitkeep


--------------------------------------------------------------------------------
/src/trimgalore/processes/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/trimgalore/processes/.gitkeep


--------------------------------------------------------------------------------
/src/trimgalore/processes/trim.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : ""
 4 | 
 5 | toolParams = params.tools.trimgalore
 6 | 
 7 | process TRIMGALORE__TRIM {
 8 | 
 9 |     container toolParams.container
10 |     label 'compute_resources__trimgalore__trim'
11 | 
12 |     input:
13 |         tuple val(sampleId),
14 |               path(fastq_PE1),
15 |               path(fastq_PE2)
16 | 
17 |     output:
18 |         tuple val(sampleId),
19 |               path("${sampleId}_dex_R1_val_1.fq.gz"),
20 |               path("${sampleId}_dex_R2_val_2.fq.gz"),
21 |               path("${sampleId}_dex_R1.fastq.gz_trimming_report.txt"),
22 |               path("${sampleId}_dex_R2.fastq.gz_trimming_report.txt")
23 | 
24 |     script:
25 |         def sampleParams = params.parseConfig(sampleId, params.global, toolParams.trim)
26 |         processParams = sampleParams.local
27 |         def max_threads = (task.cpus > 6) ? 6 : task.cpus
28 |         """
29 |         trim_galore \
30 |             -j ${max_threads} \
31 |             -o . \
32 |             ${fastq_PE1} \
33 |             ${fastq_PE2} \
34 |             --paired \
35 |             --gzip
36 |         """
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/src/trimgalore/trimgalore.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         trimgalore {
 4 |             container = 'vibsinglecellnf/trimgalore:0.6.6'
 5 |             trim {
 6 |                 paired = 'true'
 7 |             }
 8 |         }
 9 |     }
10 | }
11 | 
12 | 


--------------------------------------------------------------------------------
/src/trimgalore/workflows/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/trimgalore/workflows/.gitkeep


--------------------------------------------------------------------------------
/src/utils/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.7.9-slim-stretch AS compile-image
 2 | 
 3 | RUN apt-get update && \
 4 |     apt-get install -y --no-install-recommends build-essential gcc apt-utils cmake openssh-client git && \
 5 |     apt-get install -y python-dev libxml2-dev zlib1g-dev && \
 6 |     rm -rf /var/cache/apt/* && \
 7 |     rm -rf /var/lib/apt/lists/*
 8 | 
 9 | RUN python -m venv /opt/venv
10 | # Make sure we use the virtualenv:
11 | ENV PATH="/opt/venv/bin:$PATH"
12 | 
13 | RUN python3 -m pip install ipykernel && \
14 |     pip install --no-cache-dir papermill && \
15 |     pip install --no-cache-dir pysradb==1.0.0 && \
16 |     pip install --no-cache-dir nbconvert==5.6.0 && \
17 |     pip install --no-cache-dir nbmerge==0.0.4 && \
18 |     pip install --no-cache-dir nbformat==5.0.8
19 | 
20 | FROM python:3.7.9-slim-stretch AS build-image
21 | RUN apt-get -y update && \
22 |     # Need to run ps
23 |     apt-get -y install procps && \
24 |     apt-get -y install libxml2 && \
25 |     rm -rf /var/cache/apt/* && \
26 |     rm -rf /var/lib/apt/lists/*
27 | 
28 | COPY --from=compile-image /opt/venv /opt/venv
29 | 
30 | # Make sure we use the virtualenv:
31 | ENV PATH="/opt/venv/bin:$PATH"


--------------------------------------------------------------------------------
/src/utils/README.md:
--------------------------------------------------------------------------------
 1 | # Utils module
 2 | 
 3 | ## Cell-based metadata annotation
 4 | 
 5 | The profile `utils_cell_annotate` should be added when generating the main config using `nextflow config`. This will add the following entry in the config:
 6 | 
 7 | ```
 8 | params {
 9 |     tools {
10 |         cell_annotate {
11 |             iff = '10x_cellranger_mex'
12 |             off = 'h5ad'
13 |             cellMetaDataFilePath = ''
14 |             indexColumnName = ''
15 |             sampleColumnName = ''
16 |             annotationColumnNames = ['']
17 |         }
18 |     }
19 | }
20 | ```
21 | Then, the following parameters should be updated to use the module feature:
22 | 
23 | - `cellMetaDataFilePath` is a .tsv file (with header) with at least 2 columns: a column containing all the cell IDs and an annotation column.
24 | - `indexColumnName` is the column name from `cellMetaDataFilePath` containing the cell IDs information.
25 | - `sampleColumnName` is the column name from `cellMetaDataFilePath` containing the sample ID/name information.
26 | - `annotationColumnNames` is an array of columns names from `cellMetaDataFilePath` containing different annotation metadata to add.
27 | 
28 | ## Sample-based metadata annotation
29 | The profile `utils_sample_annotate` should be added when generating the main config using nextflow config. This will add the following entry in the config:
30 | 
31 | ```
32 | params {
33 |     tools {
34 |         sample_annotate {
35 |             iff = '10x_cellranger_mex'
36 |             off = 'h5ad' 
37 |             type = 'sample' 
38 |             metadataFilePath = 'data/10x/1k_pbmc/metadata.tsv'
39 |         }
40 |     }
41 | }
42 | ```
43 | Then, the following parameters should be updated to use the module feature:
44 | 
45 | - `metadataFilePath` is a .tsv file (with header) with at least 2 columns where the first column need to match the sample IDs. Any other columns will be added as annotation in the final loom i.e.: all the cells related to their sample will get annotated with their given annotations.
46 | 
47 | | id  | chemistry | ... |
48 | | ------------- | ------------- | ------------- |
49 | | 1k_pbmc_v2_chemistry  | v2  | ... |
50 | | 1k_pbmc_v3_chemistry  | v3  | ... |
51 | 
52 | Sample-annotating the samples using this system will allow any user to query all the annotation using the SCope portal. This is especially relevant when samples needs to be compared across specific annotations (check compare tab with SCope).
53 | 


--------------------------------------------------------------------------------
/src/utils/bin/create_cistopic_object.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | print("##################################################")
 4 | print("# cisTopic #")
 5 | print("##################################################")
 6 | 
 7 | # Loading dependencies scripts
 8 | 
 9 | library("optparse")
10 | parser <- OptionParser(
11 |   prog = "create_cistopic_object.R",
12 |   description = "Create cisTopic object from 10x Cell Ranger MEX output"
13 | )
14 | parser <- add_option(
15 |   parser,
16 |   c("-i", "--tenx_path"),
17 |   action = "store",
18 |   default = NULL,
19 |   help = "Path to Cell Ranger 10x output containing filtered_peak_bc_matrix/ directory"
20 | )
21 | parser <- add_option(
22 |   parser,
23 |   c("-m", "--metrics_fname"),
24 |   action = "store",
25 |   default = "singlecell.csv",
26 |   help = "Filename of Cell Ranger 10x output per barcode metrics"
27 | )
28 | parser <- add_option(
29 |   parser,
30 |   c("-s", "--sampleId"),
31 |   action = "store",
32 |   default = "",
33 |   help = "sample ID"
34 | )
35 | parser <- add_option(
36 |   parser,
37 |   c("-o", "--output"),
38 |   action = "store",
39 |   default = NULL,
40 |   help = "Output file, rds format"
41 | )
42 | 
43 | args <- parse_args(parser)
44 | 
45 | cat("Parameters: \n")
46 | print(args)
47 | 
48 | ################################################################################
49 | 
50 | suppressWarnings(library(cisTopic))
51 | 
52 | data_folder = file.path(args$tenx_path, 'filtered_peak_bc_matrix')
53 | metrics = file.path(args$tenx_path, args$metrics_fname)
54 | 
55 | cisTopicObject <- createcisTopicObjectFrom10Xmatrix(data_folder, metrics, project.name='VSN-ATAC')
56 | 
57 | saveRDS(cisTopicObject,file=args$output)
58 | 
59 | 


--------------------------------------------------------------------------------
/src/utils/bin/h5ad_to_filtered_loom.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import loompy as lp
 5 | import numpy as np
 6 | import os
 7 | import scanpy as sc
 8 | 
 9 | parser = argparse.ArgumentParser(description='')
10 | 
11 | parser.add_argument(
12 |     "input",
13 |     type=argparse.FileType('r'),
14 |     help='Input h5ad file.'
15 | )
16 | 
17 | parser.add_argument(
18 |     "output",
19 |     type=argparse.FileType('w'),
20 |     help='Output h5ad file.'
21 | )
22 | 
23 | args = parser.parse_args()
24 | 
25 | # Define the arguments properly
26 | FILE_PATH_IN = args.input
27 | FILE_PATH_OUT_BASENAME = os.path.splitext(args.output.name)[0]
28 | 
29 | try:
30 |     adata = sc.read_h5ad(filename=FILE_PATH_IN.name)
31 | except IOError:
32 |     raise Exception("VSN ERROR: Wrong input format. Expects .h5ad files, got .{}".format(os.path.splitext(FILE_PATH_IN)[0]))
33 | 
34 | row_attrs = {
35 |     "Gene": np.array(adata.var.index),
36 | }
37 | col_attrs = {
38 |     "CellID": np.array(adata.obs.index),
39 |     "nGene": np.array(np.sum(adata.X.transpose() > 0, axis=0)).flatten(),
40 |     "nUMI": np.array(np.sum(adata.X.transpose(), axis=0)).flatten(),
41 | }
42 | 
43 | matrix = (adata.X).T
44 | 
45 | lp.create(
46 |     filename=f"{FILE_PATH_OUT_BASENAME}.loom",
47 |     layers=matrix if type(matrix) == np.ndarray else matrix.toarray(),
48 |     row_attrs=row_attrs,
49 |     col_attrs=col_attrs,
50 | )
51 | 


--------------------------------------------------------------------------------
/src/utils/bin/reports/workflow_configuration_template.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Workflow Configuration"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "code",
12 |    "execution_count": null,
13 |    "metadata": {},
14 |    "outputs": [],
15 |    "source": [
16 |     "from IPython.display import JSON\n",
17 |     "import json"
18 |    ]
19 |   },
20 |   {
21 |    "cell_type": "markdown",
22 |    "metadata": {},
23 |    "source": [
24 |     "## Manifest"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "code",
29 |    "execution_count": null,
30 |    "metadata": {},
31 |    "outputs": [],
32 |    "source": [
33 |     "JSON(json.loads(WORKFLOW_MANIFEST))"
34 |    ]
35 |   },
36 |   {
37 |    "cell_type": "markdown",
38 |    "metadata": {},
39 |    "source": [
40 |     "## Parameters"
41 |    ]
42 |   },
43 |   {
44 |    "cell_type": "code",
45 |    "execution_count": null,
46 |    "metadata": {},
47 |    "outputs": [],
48 |    "source": [
49 |     "JSON(json.loads(WORKFLOW_PARAMETERS))"
50 |    ]
51 |   }
52 |  ],
53 |  "metadata": {
54 |   "kernelspec": {
55 |    "display_name": "Python 3",
56 |    "language": "python",
57 |    "name": "python3"
58 |   },
59 |   "language_info": {
60 |    "codemirror_mode": {
61 |     "name": "ipython",
62 |     "version": 3
63 |    },
64 |    "file_extension": ".py",
65 |    "mimetype": "text/x-python",
66 |    "name": "python",
67 |    "nbconvert_exporter": "python",
68 |    "pygments_lexer": "ipython3",
69 |    "version": "3.6.8"
70 |   }
71 |  },
72 |  "nbformat": 4,
73 |  "nbformat_minor": 4
74 | }
75 | 


--------------------------------------------------------------------------------
/src/utils/bin/sc_file_concatenator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import numpy as np
 5 | import os
 6 | import scanpy as sc
 7 | 
 8 | parser = argparse.ArgumentParser(description='')
 9 | 
10 | parser.add_argument(
11 |     "input",
12 |     nargs='+',
13 |     type=argparse.FileType('r'),
14 |     help='Input h5ad files.'
15 | )
16 | 
17 | parser.add_argument(
18 |     "-f", "--file-format",
19 |     action="store",
20 |     dest="format",
21 |     default="h5ad",
22 |     help="Concatenate the data. Choose one of : h5ad"
23 | )
24 | 
25 | parser.add_argument(
26 |     "-j", "--join",
27 |     type=str,
28 |     action="store",
29 |     dest="join",
30 |     default="inner",
31 |     help="How to concatenate the multiple datasets. Choose one of : inner (intersect), outer (union)."
32 | )
33 | 
34 | parser.add_argument(
35 |     "-o", "--output",
36 |     action="store",
37 |     dest="output",
38 |     default=None,
39 |     help="Output file name."
40 | )
41 | 
42 | args = parser.parse_args()
43 | 
44 | # Define the arguments properly
45 | FILE_PATH_OUT_BASENAME = os.path.splitext(args.output)[0]
46 | 
47 | # I/O
48 | files = []
49 | cell_ids = []
50 | 
51 | if args.format == 'h5ad':
52 |     for FILE_PATH_IN in args.input:
53 |         try:
54 |             FILE_PATH_IN = FILE_PATH_IN.name
55 |             adata = sc.read_h5ad(filename=FILE_PATH_IN)
56 |             cell_ids.extend(adata.obs.index.values)
57 |             files.append(adata)
58 |         except IOError:
59 |             raise Exception("VSN ERROR: Wrong input format. Expects .h5ad files, got .{}".format(FILE_PATH_IN))
60 | 
61 | index_unique = None
62 | 
63 | if len(cell_ids) != len(np.unique(cell_ids)):
64 |     print("Non-unique cell index detected!")
65 |     print("Make the index unique by joining the existing index names with the batch category, using index_unique='-'")
66 |     index_unique = '-'
67 | #
68 | # Concatenate the data
69 | #
70 | 
71 | if args.format == 'h5ad':
72 |     # Concatenate multiple h5ad files
73 |     # Source: https://anndata.readthedocs.io/en/latest/anndata.AnnData.concatenate.html#anndata.AnnData.concatenate
74 |     adata = files[0].concatenate(
75 |         files[1:],
76 |         join=args.join,
77 |         index_unique=index_unique
78 |     )
79 |     # Not casting to float 64 bits can lead to not exact reproducible results. See:
80 |     # - https://github.com/theislab/scanpy/issues/1612
81 |     # - https://github.com/vib-singlecell-nf/vsn-pipelines/issues/295
82 |     adata.X = adata.X.astype(np.float64)
83 |     adata.var.index = adata.var.index.astype(str)
84 |     adata = adata[:, np.sort(adata.var.index)]
85 |     print(f"Total number of cells: {adata.obs.shape[0]}, genes: {adata.var.shape[0]}.")
86 | else:
87 |     raise Exception("VSN ERROR: Concatenation of .{} files is not implemented.".format(args.format))
88 | 
89 | # I/O
90 | adata.write_h5ad("{}.h5ad".format(FILE_PATH_OUT_BASENAME))
91 | 


--------------------------------------------------------------------------------
/src/utils/bin/sc_h5ad_apply_obs_filter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | import argparse
 6 | import pandas as pd
 7 | import numpy as np
 8 | import scanpy as sc
 9 | 
10 | parser = argparse.ArgumentParser(description='')
11 | 
12 | parser.add_argument(
13 |     "input",
14 |     type=argparse.FileType('r'),
15 |     help=''
16 | )
17 | 
18 | parser.add_argument(
19 |     "-o", "--output",
20 |     type=argparse.FileType('w'),
21 |     help=''
22 | )
23 | 
24 | parser.add_argument(
25 |     '-f', '--filter-file-path',
26 |     type=argparse.FileType('r'),
27 |     action="append",
28 |     dest="filter_file_paths",
29 |     help=""
30 | )
31 | 
32 | args = parser.parse_args()
33 | 
34 | FILE_PATH_IN = args.input.name
35 | FILE_PATH_OUT_BASENAME = os.path.splitext(args.output.name)[0]
36 | 
37 | # I/O
38 | # Expects h5ad file
39 | try:
40 |     adata = sc.read_h5ad(filename=FILE_PATH_IN)
41 | except IOError:
42 |     raise Exception("VSN ERROR: Can only handle .h5ad files.")
43 | 
44 | #
45 | # Subset the h5ad using the given cell IDs
46 | #
47 | 
48 | obs_to_keep = []
49 | 
50 | for filter_file_path in args.filter_file_paths:
51 |     obs_to_keep.extend(
52 |         pd.read_csv(filepath_or_buffer=filter_file_path, header=None)[0].values
53 |     )
54 | 
55 | if len(obs_to_keep) != len(np.unique(obs_to_keep)):
56 |     raise Exception("VSN ERROR: This use case is currently not handled. This could happen if you are using different filters")
57 | 
58 | print(f"Dimension of pre-filtered AnnData: {adata.shape}")
59 | adata_filtered = adata[obs_to_keep, :]
60 | print(f"Dimension of post-filtered AnnData: {adata_filtered.shape}")
61 | 
62 | # I/O
63 | adata_filtered.write_h5ad("{}.h5ad".format(FILE_PATH_OUT_BASENAME))
64 | 


--------------------------------------------------------------------------------
/src/utils/bin/sc_h5ad_extract_metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | import argparse
 6 | import pandas as pd
 7 | import scanpy as sc
 8 | import numpy as np
 9 | 
10 | parser = argparse.ArgumentParser(description='')
11 | 
12 | parser.add_argument(
13 |     "input",
14 |     type=argparse.FileType('r'),
15 |     help='The path to the input h5ad file '
16 | )
17 | 
18 | parser.add_argument(
19 |     "output",
20 |     type=argparse.FileType('w'),
21 |     help='The path to the output containing cells IDs that will be used for applying the filter.'
22 | )
23 | 
24 | parser.add_argument(
25 |     '-a', '--axis',
26 |     type=str,
27 |     dest="axis",
28 |     help='The axis defining the metadata which the given column_names will be extracted from. '
29 | )
30 | 
31 | parser.add_argument(
32 |     '-c', '--column-name',
33 |     type=str,
34 |     action="append",
35 |     dest="column_names",
36 |     help=""
37 | )
38 | 
39 | args = parser.parse_args()
40 | 
41 | FILE_PATH_IN = args.input.name
42 | 
43 | # I/O
44 | # Expects h5ad file
45 | try:
46 |     adata = sc.read_h5ad(filename=FILE_PATH_IN)
47 | except IOError:
48 |     raise Exception("VSN ERROR: Can only handle .h5ad files.")
49 | 
50 | #
51 | # Extract the given column_names from the feature/observation-based metadata.
52 | #
53 | 
54 | if args.axis == 'feature':
55 |     metadata = adata.var[args.column_names]
56 | elif args.axis == 'observation':
57 |     raise Exception("VSN ERROR: Extracting the observation-based metadata is currently not implemented.")
58 | else:
59 |     raise Exception(f"Cannot extract from the {args.axis}-based metadata.")
60 | 
61 | # I/O
62 | metadata.to_csv(
63 |     path_or_buf=args.output,
64 |     sep='\t',
65 |     header=True,
66 |     columns=args.column_names,
67 |     index=False
68 | )
69 | 


--------------------------------------------------------------------------------
/src/utils/bin/sc_h5ad_update_metadata.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import os
 4 | import sys
 5 | import argparse
 6 | import pandas as pd
 7 | import scanpy as sc
 8 | import numpy as np
 9 | 
10 | parser = argparse.ArgumentParser(description='')
11 | 
12 | parser.add_argument(
13 |     "input",
14 |     type=argparse.FileType('r'),
15 |     help='The path to the input h5ad file '
16 | )
17 | 
18 | parser.add_argument(
19 |     "output",
20 |     type=argparse.FileType('w'),
21 |     help='The path to the output containing cells IDs that will be used for applying the filter.'
22 | )
23 | 
24 | parser.add_argument(
25 |     '-m', "--additional-metadata",
26 |     type=argparse.FileType('r'),
27 |     dest="additional_metadata",
28 |     required=True,
29 |     help='The path the additional metadata used to update the metadata of the given input h5ad.'
30 | )
31 | 
32 | parser.add_argument(
33 |     '-a', '--axis',
34 |     type=str,
35 |     dest="axis",
36 |     required=True,
37 |     help='The axis defining the metadata which the given column_names will be extracted from. '
38 | )
39 | 
40 | parser.add_argument(
41 |     '-j', '--join-key',
42 |     type=str,
43 |     dest="join_key",
44 |     required=True,
45 |     help="The column name used to join the metadata with the given additional metadata."
46 | )
47 | 
48 | parser.add_argument(
49 |     '-i', '--index-column-name',
50 |     type=str,
51 |     dest="index_column_name",
52 |     help="The column name to use as index for the metadata."
53 | )
54 | 
55 | 
56 | args = parser.parse_args()
57 | 
58 | FILE_PATH_IN = args.input.name
59 | FILE_PATH_OUT_BASENAME = os.path.splitext(args.output.name)[0]
60 | 
61 | # I/O
62 | # Expects h5ad file
63 | try:
64 |     adata = sc.read_h5ad(filename=FILE_PATH_IN)
65 | except IOError:
66 |     raise Exception("VSN ERROR: Can only handle .h5ad files.")
67 | 
68 | #
69 | # Update the feature/observation-based metadata with all the columns present within the look-up table.
70 | #
71 | 
72 | additional_metadata = pd.read_csv(
73 |     filepath_or_buffer=args.additional_metadata,
74 |     sep="\t",
75 |     header=0
76 | )
77 | 
78 | if args.axis == 'feature':
79 |     adata.var = pd.merge(
80 |         adata.var,
81 |         additional_metadata,
82 |         on=args.join_key
83 |     )
84 |     if args.index_column_name is not None:
85 |         adata.var.set_index(args.index_column_name, inplace=True)
86 |         adata.var.index.names = ['index']
87 | 
88 | elif args.axis == 'observation':
89 |     raise Exception("VSN ERROR: Updating the observation-based metadata is currently not implemented.")
90 | 
91 | else:
92 |     raise Exception(f"Cannot update the {args.axis}-based metadata.")
93 | 
94 | 
95 | # I/O
96 | adata.write_h5ad("{}.h5ad".format(FILE_PATH_OUT_BASENAME))
97 | 


--------------------------------------------------------------------------------
/src/utils/bin/sc_star_concatenator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import argparse
 4 | import os
 5 | import pandas as pd
 6 | 
 7 | strand_options = {
 8 |     "no": 1,
 9 |     "forward": 2,
10 |     "reverse": 3
11 | }
12 | 
13 | parser = argparse.ArgumentParser(description='')
14 | 
15 | parser.add_argument(
16 |     "input",
17 |     nargs='+',
18 |     type=argparse.FileType('r'),
19 |     help='Input h5ad file.'
20 | )
21 | 
22 | parser.add_argument(
23 |     "-s", "--stranded",
24 |     action="store",
25 |     dest="stranded",
26 |     default="no",
27 |     help=f"Stranded nature of the library. Choose one of: {', '.join(strand_options.keys())}"
28 | )
29 | 
30 | parser.add_argument(
31 |     "-o", "--output",
32 |     action="store",
33 |     dest="output",
34 |     default=None,
35 |     help="Output file name."
36 | )
37 | 
38 | args = parser.parse_args()
39 | 
40 | # Define the arguments properly
41 | FILE_PATH_OUT_BASENAME = os.path.splitext(args.output)[0]
42 | 
43 | # I/O
44 | files = []
45 | 
46 | for FILE_PATH_IN in args.input:
47 |     FILE_PATH_IN = FILE_PATH_IN.name
48 |     if not os.path.isfile(FILE_PATH_IN):
49 |         raise Exception(f"Could not find file {FILE_PATH_IN}.")
50 |     if not FILE_PATH_IN.endswith('ReadsPerGene.out.tab'):
51 |         raise Exception(f"Expecting file ending with 'ReadsPerGene.out.tab', {os.path.basename(FILE_PATH_IN)} does not.")
52 | 
53 |     try:
54 |         cell_name = os.path.basename(FILE_PATH_IN)[:-len("ReadsPerGene.out.tab")]
55 |         counts = pd.read_csv(FILE_PATH_IN, sep='\t', index_col=0, skiprows=4, header=None)
56 |         files.append((counts, cell_name))
57 |     except IOError:
58 |         raise Exception("VSN ERROR: Wrong input format. Expects .tab files, got .{}".format(FILE_PATH_IN))
59 | 
60 | #
61 | # Adjust the data
62 | #
63 | try:
64 |     all_counts = pd.DataFrame()
65 |     for counts, cell_name in files:
66 |         all_counts.loc[:, cell_name] = counts[strand_options[args.stranded]].astype(int)
67 | except IOError:
68 |     raise Exception("VSN ERROR: Concatenation failed.")
69 | 
70 | all_counts.to_csv(f"{FILE_PATH_OUT_BASENAME}.tsv", header=True, index=True, sep='\t')
71 | 


--------------------------------------------------------------------------------
/src/utils/conf/base.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     utils {
 3 |         container = 'vibsinglecellnf/utils:0.4.0'
 4 |         file_converter {
 5 |             off = 'h5ad'
 6 |             tagCellWithSampleId = true
 7 |             remove10xGEMWell = false
 8 |             useFilteredMatrix = true
 9 |             makeVarIndexUnique = false
10 |         }
11 |         publish {
12 |             // pipelineOutputSuffix = ''
13 |             compressionLevel = 6
14 |             annotateWithBatchVariableName = false
15 |             mode = 'copy'
16 |         }
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/utils/conf/cell_annotate.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     utils {
 3 |         cell_annotate {
 4 |             off = 'h5ad'
 5 |             method = 'obo' // or 'aio'
 6 |             indexColumnName = ''
 7 |             // cellMetaDataFilePath = '' // Required in static mode and with 'aio' method
 8 |             // sampleSuffixWithExtension = '' // Required in static mode and with 'aio' method
 9 |             // sampleColumnName = '' // Required with 'aio' method
10 |             // annotationColumnNames = [''] // Required with 'aio' method
11 |             // publish = false
12 |         }
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/utils/conf/cell_filter.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     utils {
 3 |         cell_filter {
 4 |             off = 'h5ad'
 5 |             method = 'internal' // or 'external' (requires the following additional params cellMetaDataFilePath, sampleColumnName, indexColumnName)
 6 |             filters = [
 7 |                 [
 8 |                     id: '', // Short identifier for the filter
 9 |                     indexColumnName: '',
10 |                     filterColumnName: '',
11 |                     valuesToKeepFromFilterColumn: ['']
12 |                     // sampleColumnName: '',
13 |                     // cellMetaDataFilePath: ''
14 |                 ]
15 |             ]
16 |             // publish = false
17 |         }
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/utils/conf/h5ad_clean.config:
--------------------------------------------------------------------------------
1 | params {
2 |     utils {
3 |         file_cleaner {
4 |             obsColumnMapper = []
5 |             obsColumnValueMapper = []
6 |             obsColumnsToRemove = []
7 |         }
8 |     }
9 | }


--------------------------------------------------------------------------------
/src/utils/conf/h5ad_concatenate.config:
--------------------------------------------------------------------------------
1 | params {
2 |     utils {
3 |         file_concatenator {
4 |             join = 'outer'
5 |             off = 'h5ad' 
6 |         }
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/src/utils/conf/h5ad_extract_metadata.config:
--------------------------------------------------------------------------------
1 | params {
2 |     utils {
3 |         extract_feature_metadata {
4 |             columnNames = ['']
5 |         }
6 |     }
7 | }


--------------------------------------------------------------------------------
/src/utils/conf/h5ad_update_metadata.config:
--------------------------------------------------------------------------------
1 | params {
2 |     utils {
3 |         update_feature_metadata_index {
4 |             indexColumnName = 'gene_symbol'
5 |             joinKey = 'gene_ids'
6 |         }
7 |     }
8 | }


--------------------------------------------------------------------------------
/src/utils/conf/sample_annotate.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     utils {
 3 |         sample_annotate {
 4 |             off = 'h5ad'
 5 |             by {
 6 |                 metadataFilePath = 'data/10x/1k_pbmc/metadata.tsv'
 7 |                 method = 'sample'
 8 |                 sampleColumnName = 'sample_id'
 9 |                 compIndexColumnNames = [
10 |                     "<adataIndexColumnName>" : "<metadataIndexColumnName>"
11 |                 ]
12 |                 annotationColumnNames = []
13 |             }
14 |         }
15 |     }
16 | }


--------------------------------------------------------------------------------
/src/utils/conf/sample_annotate_old_v1.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     utils {
 3 |         sample_annotate_v1 {
 4 |             iff = '10x_cellranger_mex'
 5 |             off = 'h5ad' 
 6 |             type = 'sample' 
 7 |             metadataFilePath = 'data/10x/1k_pbmc/metadata.tsv'
 8 |         }
 9 |     }
10 | }


--------------------------------------------------------------------------------
/src/utils/conf/scope.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     utils {
 3 |         scope {
 4 |             genome = ''
 5 |             tree {
 6 |                 level_1 = ''
 7 |                 level_2 = ''
 8 |                 level_3 = ''
 9 |             }
10 |         }
11 |     }
12 | }
13 | 


--------------------------------------------------------------------------------
/src/utils/conf/sra_metadata.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     global {
 3 |         outdir = 'out'
 4 |     }
 5 |     utils {
 6 |         container = 'vibsinglecellnf/utils:0.3.0'
 7 |         sra_metadata {
 8 |             mode = 'web' // or db
 9 |             // sraDb = ''
10 |             // sraDbForceDownload = false
11 |             // sraDbOutDir = ''
12 |         }
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/utils/conf/sra_metadata.test.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     global {
 3 |         outdir = 'out'
 4 |     }
 5 |     data {
 6 |         sra = [
 7 |             [
 8 |                 id:'SRP125768',
 9 |                 samples: [
10 |                     "DGRP-551_.*d_r[0-9]+(?! )", 
11 |                     "w1118_.*d_r[0-9]+(?! )"
12 |                 ]
13 |             ]
14 |         ]
15 |     }
16 |     utils {
17 |         container = 'vibsinglecellnf/utils:0.3.0'
18 |         sra_metadata {
19 |             mode = 'web' // or db
20 |             // sraDb = ''
21 |             // sraDbForceDownload = false
22 |             // sraDbOutDir = ''
23 |         }
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/utils/conf/sra_normalize_fastqs.config:
--------------------------------------------------------------------------------
1 | params {
2 |     utils {
3 |         sra_normalize_fastqs {
4 |             // Downloading FASTQ from SRA will give FASTQ in the following format SRRXXXXXX_[1-9].fastq. This index minus one will be used to retrieve the FASTQ read suffix from the array of suffixes defined hereunder
5 |             fastq_read_suffixes = ["R1","R2"] // ["R1","R2","I1","I2"] would be used for SRR11442498 (this requires params.tools.sratoolkit.includeTechnicalReads = true)
6 |         }
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/src/utils/conf/star_concatenate.config:
--------------------------------------------------------------------------------
1 | params {
2 |     utils {
3 |         star_concatenator {
4 |             stranded = 'no'
5 |             off = 'tsv'
6 |         }
7 |     }
8 | }
9 | 


--------------------------------------------------------------------------------
/src/utils/conf/test.config:
--------------------------------------------------------------------------------
 1 | params {
 2 |     tools {
 3 |         scanpy {
 4 |             container = 'vibsinglecellnf/scanpy:1.8.1'
 5 |         }
 6 |     }
 7 |     utils {
 8 |         file_converter {
 9 |             iff = '10x_cellranger_mex'
10 |             off = 'h5ad' 
11 |             useFilteredMatrix = true
12 |         }
13 |         file_annotator {
14 |             iff = '10x_cellranger_mex'
15 |             off = 'h5ad' 
16 |             type = 'sample' 
17 |             metadataFilePath = '/ddn1/vol1/staging/leuven/stg_00002/lcb/lcb_projects/TWE/cellranger/metadata.tsv'
18 |         }
19 |         file_concatenator {
20 |             join = 'outer'
21 |             iff = '10x_cellranger_mex' 
22 |             off = 'h5ad' 
23 |         }
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/src/utils/conf/update_feature_nomenclature.config:
--------------------------------------------------------------------------------
1 | includeConfig './h5adExtractMetadata.config'
2 | includeConfig './../../flybaser/flybaser.config'
3 | includeConfig './h5adUpdateMetadata.config'


--------------------------------------------------------------------------------
/src/utils/conf/workflow_report.config:
--------------------------------------------------------------------------------
1 | params {
2 |     utils {
3 |         workflow_configuration {
4 |             report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/utils/bin/reports/workflow_configuration_template.ipynb"
5 |         }
6 |     }
7 | }


--------------------------------------------------------------------------------
/src/utils/processes/.ipynb_checkpoints/config-checkpoint.nf:
--------------------------------------------------------------------------------
 1 | import java.nio.file.Paths
 2 | import groovy.transform.Memoized
 3 | import nextflow.script.ScriptBinding
 4 | import nextflow.config.ConfigParser
 5 | import static groovy.json.JsonOutput.*
 6 | 
 7 | 
 8 | def updateParams(params, resolvedParams, setter) {
 9 |     resolvedParams.each { k, v ->
10 |         if(setter == null) {
11 |             if(v instanceof Map) {
12 |                 if(!params.containsKey(k))
13 |                     params."${k}" = [:]
14 |                 updateParams(params, v, params."${k}")
15 |             } else {
16 |                 params."${k}" = v
17 |             }
18 |         } else {
19 |             if(!setter.containsKey(k))
20 |                 setter."${k}" = [:]
21 |             setter."${k}" = v instanceof Map ? updateParams(params, v, setter."${k}") : v
22 |         }
23 |     }
24 | }
25 | 
26 | @Memoized
27 | def resolveParams(Map params, boolean verbose) {
28 |     if(!params.containsKey("strategy"))
29 |         return params
30 |     if(params.strategy != "min")
31 |         return params
32 |     def isRootDir = workflow.projectDir.getParent().getName() == "vib-singlecell-nf"
33 |     def config = new ConfigParser().setBinding([params: params])
34 |     def co = new ConfigObject()
35 |     co.putAll(params)
36 |     co.flatten().each { key, val ->
37 |         if(key.endsWith("configVersion")) {
38 |             // Extract the tool name based on the key
39 |             def tool = key.split("\\.")[-2]
40 |             // Build the path the versioned config of the current tool
41 |             def toolBaseDir = isRootDir ? Paths.get(workflow.projectDir.toRealPath(), "src", tool) : workflow.projectDir.toRealPath()
42 |             config = config.parse(Paths.get(toolBaseDir.toString(), "conf/min/base/${val}.config"))
43 |         }
44 |     }
45 |     // Update the strategy since params has been resolved
46 |     config.params.strategy = "max"
47 |     updateParams(params, config.params, null)
48 |     if(verbose)
49 |         println(prettyPrint(toJson(params)))
50 |     return params
51 | }
52 | 
53 | def includeConfig(Map params, String configRelativeFilePath) {
54 |     def repoFilePath = workflow.scriptFile.getParent()
55 |     def isMainRepo = repoFilePath.getName() == "PUMATAC"
56 |     def config = new ConfigParser().setBinding([params: params])
57 |     def co = new ConfigObject()
58 |     def toolBaseDir = isMainRepo ? repoFilePath.toRealPath().toString() : repoFilePath.getParent().getParent().toRealPath().toString()
59 |     config = config.parse(Paths.get(toolBaseDir, configRelativeFilePath))
60 |     updateParams(params, config.params, null)
61 |     return params
62 | }
63 | 


--------------------------------------------------------------------------------
/src/utils/processes/config.nf:
--------------------------------------------------------------------------------
 1 | import java.nio.file.Paths
 2 | import groovy.transform.Memoized
 3 | import nextflow.script.ScriptBinding
 4 | import nextflow.config.ConfigParser
 5 | import static groovy.json.JsonOutput.*
 6 | 
 7 | 
 8 | def updateParams(params, resolvedParams, setter) {
 9 |     resolvedParams.each { k, v ->
10 |         if(setter == null) {
11 |             if(v instanceof Map) {
12 |                 if(!params.containsKey(k))
13 |                     params."${k}" = [:]
14 |                 updateParams(params, v, params."${k}")
15 |             } else {
16 |                 params."${k}" = v
17 |             }
18 |         } else {
19 |             if(!setter.containsKey(k))
20 |                 setter."${k}" = [:]
21 |             setter."${k}" = v instanceof Map ? updateParams(params, v, setter."${k}") : v
22 |         }
23 |     }
24 | }
25 | 
26 | @Memoized
27 | def resolveParams(Map params, boolean verbose) {
28 |     if(!params.containsKey("strategy"))
29 |         return params
30 |     if(params.strategy != "min")
31 |         return params
32 |     def isRootDir = workflow.projectDir.getParent().getName() == "vib-singlecell-nf"
33 |     def config = new ConfigParser().setBinding([params: params])
34 |     def co = new ConfigObject()
35 |     co.putAll(params)
36 |     co.flatten().each { key, val ->
37 |         if(key.endsWith("configVersion")) {
38 |             // Extract the tool name based on the key
39 |             def tool = key.split("\\.")[-2]
40 |             // Build the path the versioned config of the current tool
41 |             def toolBaseDir = isRootDir ? Paths.get(workflow.projectDir.toRealPath(), "src", tool) : workflow.projectDir.toRealPath()
42 |             config = config.parse(Paths.get(toolBaseDir.toString(), "conf/min/base/${val}.config"))
43 |         }
44 |     }
45 |     // Update the strategy since params has been resolved
46 |     config.params.strategy = "max"
47 |     updateParams(params, config.params, null)
48 |     if(verbose)
49 |         println(prettyPrint(toJson(params)))
50 |     return params
51 | }
52 | 
53 | def includeConfig(Map params, String configRelativeFilePath) {
54 |     def repoFilePath = workflow.scriptFile.getParent()
55 |     def isMainRepo = repoFilePath.getName() == "PUMATAC"
56 |     def config = new ConfigParser().setBinding([params: params])
57 |     def co = new ConfigObject()
58 |     def toolBaseDir = isMainRepo ? repoFilePath.toRealPath().toString() : repoFilePath.getParent().getParent().toRealPath().toString()
59 |     config = config.parse(Paths.get(toolBaseDir, configRelativeFilePath))
60 |     updateParams(params, config.params, null)
61 |     return params
62 | }
63 | 


--------------------------------------------------------------------------------
/src/utils/processes/files.nf:
--------------------------------------------------------------------------------
 1 | 
 2 | def getBaseName(file, suffix) {
 3 |     // Default value suffix = "SC" does not work! Weird...
 4 |     res = (file.getName() =~ /(.+)\.${suffix}(.+)\.(.+)/)
 5 |     if(res.size() == 0) {
 6 |         throw new Exception("VSN ERROR: Cannot get base name.")
 7 |     }
 8 |     (full, filename, process, ext) = res[0]
 9 |     return filename
10 | }
11 | 
12 | def extractSample(path, suffix, groups) {
13 |     // Extract the sample name based on the given path and on the given suffix
14 |     def _suffix = suffix instanceof String ? [suffix] : suffix
15 |     _suffix = _suffix.collect { it.replace(".","\\.") }
16 |     for(int i = 0; i<_suffix.size(); i++) {
17 |         def sufx = _suffix[i]
18 |         
19 |         def pattern = /(.+)\/(.+)${sufx}/
20 |         def res = (path =~ pattern)
21 |         if(res.size() == 0) continue
22 |         if(res.size() == 1) {
23 |             def (full, parentDir, id) = res[0]
24 |             if(groups != null) {
25 |                 return new Tuple(id, groups[i])
26 |             } else {
27 |                 return new Tuple(id, 'NULL')
28 |             }
29 |         }
30 |     }
31 |     throw new Exception("VSN ERROR: the suffix params couldn't match any of the file paths. Make sure the suffix param exist in the file paths.")
32 | }
33 | 


--------------------------------------------------------------------------------
/src/utils/processes/gtf.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | process FORMAT_GTF {
 4 | 
 5 |     publishDir "${params.global.outdir}/00.refdata", mode: 'symlink'
 6 |     label 'compute_resources__default'
 7 | 
 8 |     input:
 9 |         file(annotation)
10 | 
11 |     output:
12 |         file "*.formatted.gtf"
13 | 
14 |     script:
15 |         """
16 |         sed -r 's/(.*); transcript_id (.*); (.*); gene_name (.*); \$/\\1; transcript_id \\2; \\3; gene_name \\4; transcript_name \\2;/' \
17 |             ${annotation} \
18 |             > ${annotation.baseName}.formatted.gtf
19 |         """
20 | 
21 | }
22 | 
23 | process FORMAT_GTF_IGENOMES {
24 | 
25 |     publishDir "${params.global.outdir}/00.refdata", mode: 'symlink'
26 |     label 'compute_resources__default'
27 | 
28 |     input:
29 |         file(annotation)
30 | 
31 |     output:
32 |         file "*.formatted.gtf"
33 | 
34 |     script:
35 |         """
36 |         sed -r 's/(.*); gene_name (.*); transcript_id (.*); (.*);\$/\\1; gene_name \\2; transcript_id \\3; \\4; transcript_name \\3;/' \
37 |             ${annotation} \
38 |             > ${annotation.baseName}.formatted.gtf
39 |         """
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/src/utils/processes/h5adExtractMetadata.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | import java.nio.file.Paths
 4 | 
 5 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin")
 6 | 
 7 | 
 8 | process SC__UTILS__EXTRACT_FEATURE_METADATA {
 9 | 
10 |     container params.tools.scanpy.container
11 |     publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true
12 |     label 'compute_resources__default'
13 | 
14 |     input:
15 |         tuple val(sampleId), path(f)
16 | 
17 |     output:
18 |         tuple val(sampleId), path("${sampleId}.SC__UTILS__EXTRACT_FEATURE_METADATA.tsv")
19 | 
20 |     script:
21 |         def sampleParams = params.parseConfig(sampleId, params.global, params.utils.extract_feature_metadata)
22 | 		processParams = sampleParams.local
23 |         columnNamesAsArguments = processParams.columnNames.collect({ '--column-name' + ' ' + it }).join(' ')
24 |         """
25 |         ${binDir}/sc_h5ad_extract_metadata.py \
26 |             --axis feature \
27 |             ${columnNamesAsArguments} \
28 |             $f \
29 |             "${sampleId}.SC__UTILS__EXTRACT_FEATURE_METADATA.tsv"
30 |         """
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/src/utils/processes/h5adMerge.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | import java.nio.file.Paths
 4 | 
 5 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin")
 6 | 
 7 | 
 8 | process SC__H5AD_MERGE {
 9 | 
10 | 	container params.tools.scanpy.container
11 |     publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true
12 |     label 'compute_resources__mem'
13 | 
14 | 	input:
15 | 		// Expects:
16 | 		// - data to be multiple h5ad files containing the final results to be merged
17 | 		tuple \
18 |             val(sampleId), \
19 | 			path(data)
20 | 
21 | 	output:
22 | 		tuple \
23 |             val(sampleId), \
24 | 		    path("${sampleId}.SC__H5AD_MERGE.h5ad")
25 | 
26 | 	script:
27 | 		"""
28 |         ${binDir}/sc_h5ad_merge.py \
29 |             * \
30 |             "${sampleId}.SC__H5AD_MERGE.h5ad"
31 | 		"""
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/utils/processes/h5adToLoom.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | import java.nio.file.Paths
 4 | 
 5 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin")
 6 | 
 7 | 
 8 | process SC__H5AD_TO_LOOM {
 9 | 
10 | 	container params.tools.scanpy.container
11 |     publishDir "${params.global.outdir}/loom", mode: 'link', overwrite: true, saveAs: { filename -> "${sampleId}.SCope_output.loom" }
12 |     label 'compute_resources__mem'
13 | 
14 | 	input:
15 | 		// Expects:
16 | 		// - rawFilteredData to be h5ad file containing the raw filtered (gene + cell filtered) data
17 | 		// - data to be one or more h5ad files containing the final results to be stored in the loom
18 | 		tuple \
19 | 			val(sampleId), \
20 | 			path(rawFilteredData), \
21 | 			path(data)
22 | 
23 | 	output:
24 | 		tuple \
25 | 			val(sampleId), \
26 | 			path("${sampleId}.SC__H5AD_TO_LOOM.loom")
27 | 
28 | 	script:
29 | 		"""
30 | 		${binDir}/h5ad_to_loom.py \
31 | 			${params.utils?.scope.genome.length() > 0 ? '--nomenclature "' + params.utils?.scope.genome + '"' : ''} \
32 | 			${params.utils?.scope.tree.level_1.length() > 0 ? '--scope-tree-level-1 "' + params.utils.scope.tree.level_1 + '"'  : ''} \
33 | 			${params.utils?.scope.tree.level_2.length() > 0 ? '--scope-tree-level-2 "' + params.utils.scope.tree.level_2 + '"'  : ''} \
34 | 			${params.utils?.scope.tree.level_3.length() > 0  ? '--scope-tree-level-3 "' + params.utils.scope.tree.level_3 + '"'  : ''} \
35 | 			${params.utils?.scope?.markers?.log_fc_threshold ? '--markers-log-fc-threshold ' + params.utils.scope.markers.log_fc_threshold : ''} \
36 | 			${params.utils?.scope?.markers?.fdr_threshold ? '--markers-fdr-threshold ' + params.utils.scope.markers.fdr_threshold : ''} \
37 | 			$data \
38 | 			$rawFilteredData \
39 | 			"${sampleId}.SC__H5AD_TO_LOOM.loom"
40 | 		"""
41 | 
42 | }
43 | 
44 | process SC__H5AD_TO_FILTERED_LOOM {
45 | 
46 | 	container params.tools.scanpy.container
47 |     publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true
48 |     label 'compute_resources__mem'
49 | 
50 | 	input:
51 | 		tuple val(sampleId), path(f)
52 | 
53 | 	output:
54 | 		tuple val(sampleId), path("${sampleId}.filtered.loom")
55 | 
56 | 	script:
57 | 		"""
58 | 		${binDir}/h5ad_to_filtered_loom.py \
59 | 			$f \
60 | 			"${sampleId}.filtered.loom"
61 | 		"""
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/src/utils/processes/h5adUpdate.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | import java.nio.file.Paths
 4 | import static groovy.json.JsonOutput.*
 5 | 
 6 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin")
 7 | 
 8 | 
 9 | process SC__H5AD_UPDATE_X_PCA {
10 | 
11 | 	container params.tools.scanpy.container
12 |     label 'compute_resources__mem'
13 | 
14 | 	input:
15 | 		tuple \
16 |             val(sampleId), \
17 | 		    path(data), \
18 |             path(xPca)
19 | 
20 | 	output:
21 | 		tuple \
22 |             val(sampleId), \
23 | 		    path("${sampleId}.SC__H5AD_UPDATE_X_PCA.h5ad")
24 | 
25 | 	script:
26 | 		"""
27 | 		${binDir}/sc_h5ad_update.py \
28 | 			--x-pca ${xPca} \
29 | 			$data \
30 | 			"${sampleId}.SC__H5AD_UPDATE_X_PCA.h5ad"
31 | 		"""
32 | 
33 | }
34 | 
35 | process SC__H5AD_CLEAN {
36 | 
37 | 	container params.tools.scanpy.container
38 |     label 'compute_resources__mem'
39 | 
40 | 	input:
41 | 		tuple \
42 |             val(sampleId), \
43 | 		    path(data), \
44 | 			val(stashedParams)
45 | 
46 | 	output:
47 | 		tuple \
48 |             val(sampleId), \
49 | 		    path("${sampleId}.SC__H5AD_CLEAN.h5ad"), \
50 | 			val(stashedParams)
51 | 
52 | 	script:
53 | 		"""
54 | 		${binDir}/sc_h5ad_update.py \
55 | 			--empty-x \
56 | 			$data \
57 | 			"${sampleId}.SC__H5AD_CLEAN.h5ad"
58 | 		"""
59 | 
60 | }
61 | 
62 | process SC__H5AD_BEAUTIFY {
63 | 
64 | 	container params.tools.scanpy.container
65 | 	publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true
66 |     label 'compute_resources__mem'
67 | 
68 | 	input:
69 | 		tuple \
70 |             val(sampleId), \
71 | 		    path(data), \
72 | 			val(stashedParams)
73 | 
74 | 	output:
75 | 		tuple \
76 |             val(sampleId), \
77 | 		    path("${sampleId}.SC__H5AD_BEAUTIFY.h5ad"), \
78 | 			val(stashedParams)
79 | 
80 | 	script:
81 | 		def sampleParams = params.parseConfig(sampleId, params.global, params.utils.file_cleaner)
82 |         processParams = sampleParams.local
83 | 
84 | 		obsColumnsToRemoveAsArgument = processParams.containsKey("obsColumnsToRemove") ? 
85 | 			processParams.obsColumnsToRemove.collect({ '--obs-column-to-remove' + ' ' + it }).join(' ') : 
86 | 			''
87 | 		"""
88 | 		${binDir}/sc_h5ad_update.py \
89 | 			${obsColumnsToRemoveAsArgument} \
90 | 			${processParams.containsKey("obsColumnMapper") ? "--obs-column-mapper '" + toJson(processParams.obsColumnMapper) + "'": ''} \
91 | 			${processParams.containsKey("obsColumnValueMapper") ? "--obs-column-value-mapper '" + toJson(processParams.obsColumnValueMapper) + "'": ''} \
92 | 			$data \
93 | 			"${sampleId}.SC__H5AD_BEAUTIFY.h5ad"
94 | 		"""
95 | 
96 | }
97 | 
98 | 


--------------------------------------------------------------------------------
/src/utils/processes/h5adUpdateMetadata.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | import java.nio.file.Paths
 4 | 
 5 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin")
 6 | 
 7 | 
 8 | process SC__UTILS__UPDATE_FEATURE_METADATA_INDEX {
 9 | 
10 |     container params.tools.scanpy.container
11 |     publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true
12 |     label 'compute_resources__default'
13 | 
14 |     input:
15 |         tuple val(sampleId), path(f), path(additionalMetadata)
16 | 
17 |     output:
18 |         tuple val(sampleId), path("${sampleId}.SC__UTILS__UPDATE_FEATURE_METADATA_INDEX.h5ad")
19 | 
20 |     script:
21 |         def sampleParams = params.parseConfig(sampleId, params.global, params.utils.update_feature_metadata_index)
22 | 		processParams = sampleParams.local
23 |         """
24 |         ${binDir}/sc_h5ad_update_metadata.py \
25 |             --additional-metadata ${additionalMetadata} \
26 |             --axis feature \
27 |             --index-column-name ${processParams.indexColumnName} \
28 |             --join-key ${processParams.joinKey} \
29 |             $f \
30 |             "${sampleId}.SC__UTILS__UPDATE_FEATURE_METADATA_INDEX.h5ad"
31 |         """
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/src/utils/processes/reports.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | import static groovy.json.JsonOutput.*
 4 | 
 5 | process UTILS__GENERATE_WORKFLOW_CONFIG_REPORT {
 6 | 
 7 |   	container params.utils.container
 8 |   	publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true
 9 |     label 'compute_resources__report'
10 | 
11 |     input:
12 |         path(ipynb)
13 | 
14 | 	output:
15 | 		path("workflow_configuration_report.ipynb")
16 | 
17 | 	script:
18 | 		"""
19 | 		papermill ${ipynb} \
20 | 			workflow_configuration_report.ipynb \
21 | 			-p WORKFLOW_MANIFEST '${params.misc.manifestAsJSON}' \
22 | 			-p WORKFLOW_PARAMETERS '${params.misc.paramsAsJSON}'
23 | 		"""
24 | 
25 | }
26 | 
27 | process UTILS__REPORT_TO_HTML {
28 | 
29 | 	container params.utils.container
30 | 	publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true
31 | 	// copy final "merged_report" to notbooks root:
32 | 	publishDir "${params.global.outdir}/notebooks", pattern: '*merged_report*', mode: 'link', overwrite: true
33 | 	label 'compute_resources__report'
34 | 
35 | 	input:
36 | 		tuple \
37 | 			val(sampleId), \
38 | 			path(ipynb)
39 | 
40 | 	output:
41 | 		file("*.html")
42 | 
43 | 	script:
44 | 		"""
45 | 		jupyter nbconvert ${ipynb} --to html
46 | 		"""
47 | 
48 | }
49 | 


--------------------------------------------------------------------------------
/src/utils/utils.config:
--------------------------------------------------------------------------------
1 | includeConfig 'conf/base.config'


--------------------------------------------------------------------------------
/src/utils/workflows/annotateByCellMetadata.nf:
--------------------------------------------------------------------------------
  1 | nextflow.enable.dsl=2
  2 | 
  3 | //////////////////////////////////////////////////////
  4 | //  Process imports:
  5 | include {
  6 |     isParamNull;
  7 |     getToolParams;
  8 | } from './../processes/utils.nf' params(params)
  9 | include {
 10 |     getChannel;
 11 | } from './../../channels/file' params(params)
 12 | include {
 13 |     SC__ANNOTATE_BY_CELL_METADATA;
 14 | } from './../processes/h5adAnnotate.nf' params(params)
 15 | 
 16 | //////////////////////////////////////////////////////
 17 | //  Define the workflow 
 18 | 
 19 | workflow ANNOTATE_BY_CELL_METADATA {
 20 | 
 21 |     take:
 22 |         // Expects (sampleId, h5ad) : Channel
 23 |         data
 24 |         // Expects (sampleId, tsv) : (Channel || null)
 25 |         metadata
 26 |         // Describes: name of tool
 27 |         // Expects tool: (string || null)
 28 |         // Values
 29 |         // - tool != null:
 30 |         //   - The given tool is performing itself a cell-based annotation
 31 |         //   - params.tools[tool] should exist
 32 |         // - tool == null:
 33 |         //   - params.utils.cell_annotate should exist
 34 |         tool
 35 | 
 36 |     main:
 37 |         def workflowParams = isParamNull(tool) ?
 38 |             params.utils.cell_annotate :
 39 |             getToolParams(params.tools, tool)["cell_annotate"]
 40 |         def method = workflowParams.method
 41 |         if(method == 'aio') {
 42 |             out = SC__ANNOTATE_BY_CELL_METADATA( 
 43 |                 data.map {
 44 |                     it -> tuple(it[0], it[1], file(workflowParams.cellMetaDataFilePath))
 45 |                 },
 46 |                 isParamNull(tool) ? 'NULL' : tool
 47 |             )
 48 |         } else if(method == 'obo') {
 49 |             if(metadata == null) {
 50 |                 metadata = getChannel(
 51 |                     workflowParams.cellMetaDataFilePath,
 52 |                     workflowParams.sampleSuffixWithExtension,
 53 |                     'NULL'
 54 |                 )
 55 |             }
 56 |             out = SC__ANNOTATE_BY_CELL_METADATA(
 57 |                 data.join(metadata),
 58 |                 isParamNull(tool) ? 'NULL' : tool
 59 |             )
 60 |         } else {
 61 |             throw new Exception("The given method '" + method + "' is not valid for cell_annotate.")
 62 |         }
 63 | 
 64 |     emit:
 65 |         out
 66 | 
 67 | }
 68 | 
 69 | workflow ANNOTATE_BY_CELL_METADATA_BY_PAIR {
 70 |     take:
 71 |         one
 72 |         two
 73 |         tool
 74 |     main:
 75 |         ANNOTATE_BY_CELL_METADATA(
 76 |             one.map {
 77 |                 it -> tuple(it[0], it[1])
 78 |             },
 79 |             two.map {
 80 |                 it -> tuple(it[0], it[1])
 81 |             },
 82 |             tool
 83 |         )
 84 |     emit:
 85 |         ANNOTATE_BY_CELL_METADATA.out
 86 | }
 87 | 
 88 | workflow STATIC__ANNOTATE_BY_CELL_METADATA {
 89 | 
 90 |     take:
 91 |         // Expects (sampleId, h5ad)
 92 |         data
 93 |         // Expects name of tool ([string] || null)
 94 |         tool
 95 | 
 96 |     main:
 97 |         out = ANNOTATE_BY_CELL_METADATA(
 98 |             data,
 99 |             null,
100 |             tool
101 |         )
102 | 
103 |     emit:
104 |         out
105 | 
106 | }
107 | 
108 | 


--------------------------------------------------------------------------------
/src/utils/workflows/downloadFromSRA.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | import java.nio.file.Files
 4 | import java.nio.file.Paths
 5 | 
 6 | //////////////////////////////////////////////////////
 7 | //  process imports:
 8 | 
 9 | include {
10 |     SRATOOLKIT__DOWNLOAD_FASTQS;
11 | } from './../../sratoolkit/workflows/downloadFastQ' params(params)
12 | include {
13 |     GET_SRA_DB;
14 | } from './../processes/sra' params(params)
15 | include {
16 |     SRA_TO_METADATA;
17 | } from './../processes/sra' params(params)
18 | include {
19 |     NORMALIZE_SRA_FASTQS;
20 | } from './../processes/sra' params(params)
21 | 
22 | //////////////////////////////////////////////////////
23 | //  Define the workflow 
24 | 
25 | // dataParams = params.data.sra
26 | utilsParams = params.utils
27 | 
28 | if(!utilsParams.containsKey("sra_metadata"))
29 |     throw new Exception("DOWNLOAD_FROM_SRA workflow requires sra_metadata.config")
30 | 
31 | workflowParams = params.utils.sra_metadata
32 | 
33 | workflow DOWNLOAD_FROM_SRA {
34 | 
35 |     take:
36 |         // Expects (sraProjectId, sampleFilters)
37 |         sra
38 | 
39 |     main:
40 |         if(workflowParams.mode == 'db') {
41 |             sraDbFile = workflowParams.sraDb != '' ? file(workflowParams.sraDb): file(workflowParams.sraDbOutDir + "/SRAmetadb.sqlite")
42 |             if(sraDbFile.exists() 
43 |                 && sraDbFile.canRead()
44 |                 && !workflowParams.sraDbForceDownload) {
45 |                 println("Local SRA database detected ${sraDbFile}!")
46 |                 db = sraDbFile
47 |             } else {
48 |                 if(workflowParams.sraDbForceDownload
49 |                     || workflowParams.sraDb == '') {
50 |                     println("Downloading SRA database to ${sraDbFile}...")
51 |                     db = GET_SRA_DB()
52 |                     println("Done!")
53 |                 }
54 |             }
55 |         } else if(workflowParams.mode == 'web') {
56 |             db = file('NO_FILE')
57 |         } else {
58 |             throw new Exception("The "+ workflowParams.mode +" mode does not exist. Choose one of: web, db.")
59 |         }
60 |         // Get metadata for the given SRA Project ID and keep only the samples that passes the given sampleFilters
61 |         metadata = SRA_TO_METADATA( 
62 |             sra,
63 |             db
64 |         ).splitCsv(
65 |             header:true,
66 |             sep: '\t'
67 |         ).map {
68 |             // Remove ending characters (])), all special characters ([]()), /) by underscores
69 |             row -> tuple( 
70 |                 row.run_accession, \
71 |                 row.sample_name.replaceAll("[\\])]\$","").replaceAll("[\\]\\[)(), /\\.]","_") 
72 |             )
73 |         }
74 |         if(!params.containsKey('quiet')) metadata.view()
75 |         // Download and compress all the SRA runs defined in the metadata
76 |         data = SRATOOLKIT__DOWNLOAD_FASTQS( 
77 |             metadata 
78 |         ).join(
79 |             metadata
80 |         ).map {
81 |             // Put sample as primary key
82 |             run -> tuple(run[2], run[1])
83 |         }
84 |         out = NORMALIZE_SRA_FASTQS( data )
85 | 
86 |     emit:
87 |         out
88 | 
89 | }
90 | 
91 | // workflow test {
92 | //     Channel
93 | //         .fromFilePairs('work/**/SRR*_{1,2}.fastq.gz')
94 | // }
95 | 


--------------------------------------------------------------------------------
/src/utils/workflows/fileConverter.nf:
--------------------------------------------------------------------------------
 1 | import nextflow.util.ArrayBag
 2 | 
 3 | nextflow.enable.dsl=2
 4 | 
 5 | //////////////////////////////////////////////////////
 6 | //  process imports:
 7 | 
 8 | include {
 9 |     SC__H5AD_TO_LOOM;
10 | } from './../processes/h5adToLoom.nf' params(params)
11 | include {
12 |     SC__H5AD_MERGE
13 | } from "./../processes/h5adMerge.nf" params(params)
14 | include {
15 |     isParamNull;
16 |     PUBLISH;
17 | } from "./utils.nf" params(params)
18 | 
19 | //////////////////////////////////////////////////////
20 | //  Define the workflow 
21 | 
22 | inputFormatsAllowed = ['h5ad']
23 | outputFormatsAllowed = ['loom', 'h5ad']
24 | 
25 | workflow FILE_CONVERTER {
26 | 
27 |     take:
28 |         // Expects (sampleId, data[])
29 |         data
30 |         // Expects outputSuffix: string
31 |         outputSuffix
32 |         // Expects outputFormat: string
33 |         outputFormat
34 |         // Expects (sampleId, rawFilteredData)
35 |         rawFilteredData
36 | 
37 |     main:
38 |         out = Channel.empty()
39 | 
40 |         if(outputFormat == "mergeToSCopeLoom") {
41 |             if(isParamNull(rawFilteredData)) {
42 |                 throw new Exception("VSN ERROR: Expecting rawFilteredData not to be null when outputFormat is "+ outputFormat)
43 |             }
44 |             out = SC__H5AD_TO_LOOM(
45 |                 rawFilteredData.combine(
46 |                     data.map {
47 |                         it -> tuple(it[0], it[1]) 
48 |                     }, 
49 |                     by: 0
50 |                 )
51 |             )
52 |         } else if(outputFormat == "mergeToScanpyH5ad") {
53 |             out = SC__H5AD_MERGE(
54 |                 data.map {
55 |                     it -> tuple(it[0], it[1])
56 |                 }
57 |             )
58 |         } else {
59 |             throw new Exception("VSN ERROR: Output format "+ outputFormat +"not supported")
60 |         }
61 | 
62 |     emit:
63 |         out
64 | 
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/src/utils/workflows/filterAnnotateClean.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //////////////////////////////////////////////////////
 4 | //  Process imports:
 5 | include {
 6 |     UPDATE_FEATURE_NOMENCLATURE
 7 | } from './updateFeatureNomenclature.nf' params(params)
 8 | include {
 9 |     FILTER_BY_CELL_METADATA
10 | } from './filterByCellMetadata.nf' params(params)
11 | include {
12 |     STATIC__ANNOTATE_BY_CELL_METADATA
13 | } from './annotateByCellMetadata.nf' params(params)
14 | include {
15 |     hasMetadataFilePath;
16 |     SC__ANNOTATE_BY_SAMPLE_METADATA
17 | } from '../processes/h5adAnnotate.nf' params(params)
18 | include {
19 |     SC__H5AD_BEAUTIFY;
20 | } from '../processes/h5adUpdate.nf' params(params)
21 | 
22 | //////////////////////////////////////////////////////
23 | //  Define the workflow 
24 | 
25 | workflow FILTER_AND_ANNOTATE_AND_CLEAN {
26 | 
27 |     take:
28 |         // Expects (sampleId, h5ad) : Channel
29 |         data
30 | 
31 |     main:
32 |         out = data
33 |         if(params.utils?.update_feature_metadata_index) {
34 |             out = UPDATE_FEATURE_NOMENCLATURE( data )
35 |         }
36 |         // Filter cells based on an indexed cell-based metadata table
37 |         if(params.utils?.cell_filter) {
38 |             out = FILTER_BY_CELL_METADATA( out, 'NULL' )
39 |         }
40 |         // Annotate cells based on an indexed cell-based metadata table
41 |         if(params.utils?.cell_annotate) {
42 |             out = STATIC__ANNOTATE_BY_CELL_METADATA( 
43 |                 out,
44 |                 null
45 |             )
46 |         }
47 |         // Annotate cells based on an indexed sample-based metadata table
48 |         if(params.utils?.sample_annotate) {
49 |             if (!hasMetadataFilePath(params.utils.sample_annotate)) {
50 |                 throw new Exception("The metadataFilePath param is missing in sample_annotate.")
51 |             }
52 |             out = SC__ANNOTATE_BY_SAMPLE_METADATA( out )
53 |         }
54 |         // Clean
55 |         // e.g.: 
56 |         // - h5ad: rename adata.obs values, remove adata.obs columns
57 |         if(params.utils?.file_cleaner) {
58 |             out = SC__H5AD_BEAUTIFY( out )
59 |         }
60 | 
61 |     emit:
62 |         out
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/src/utils/workflows/filterByCellMetadata.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //////////////////////////////////////////////////////
 4 | //  Process imports:
 5 | include {
 6 |     isParamNull;
 7 |     getToolParams;
 8 | } from './../processes/utils.nf' params(params)
 9 | include {
10 |     SC__PREPARE_OBS_FILTER;
11 | } from './../processes/h5adSubset' params(params)
12 | include {
13 |     SC__APPLY_OBS_FILTER;
14 | } from './../processes/h5adSubset' params(params)
15 | 
16 | //////////////////////////////////////////////////////
17 | //  Define the workflow 
18 | 
19 | workflow FILTER_BY_CELL_METADATA {
20 | 
21 |     take:
22 |         // Expects (sampleId, h5ad) : Channel
23 |         data
24 |         // Describes: name of tool
25 |         // Expects tool: (string || null)
26 |         // Values
27 |         // - tool != null:
28 |         //   - The given tool is performing itself a cell-based filtering
29 |         //   - params.tools[tool] should exist
30 |         // - tool == null:
31 |         //   - params.utils.cell_filter should exist
32 |         tool
33 | 
34 |     main:
35 |         def workflowParams = isParamNull(tool) ?
36 |             params.utils.cell_filter :
37 |             getToolParams(params.tools, tool)["cell_filter"]
38 |         Channel
39 |             .from(workflowParams.filters)
40 |             .set{ filters }
41 |         SC__PREPARE_OBS_FILTER(
42 |             data.combine(filters),
43 |             isParamNull(tool) ? 'NULL' : tool
44 |         )
45 |         out = SC__APPLY_OBS_FILTER(
46 |             SC__PREPARE_OBS_FILTER.out.groupTuple(),
47 |             isParamNull(tool) ? 'NULL' : tool
48 |         )
49 | 
50 |     emit:
51 |         out
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/src/utils/workflows/finalize.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | include {
 4 |     SC__H5AD_TO_FILTERED_LOOM
 5 | } from './../processes/h5adToLoom.nf' params(params)
 6 | include {
 7 |     FILE_CONVERTER as FILE_CONVERTER_TO_SCOPE;
 8 |     FILE_CONVERTER as FILE_CONVERTER_TO_SCANPY;
 9 | } from "./fileConverter"
10 | 
11 | // Convert to 
12 | // - SCope-ready
13 | // - Scanpy-ready files
14 | workflow FINALIZE {
15 | 
16 |     take:
17 |         rawFilteredData
18 |         finalProcessedData
19 |         fileOutputSuffix
20 | 
21 |     main:
22 |         // Conversion
23 |         // Convert h5ad to X (here we choose: loom format)
24 |         filteredloom = SC__H5AD_TO_FILTERED_LOOM( rawFilteredData )
25 |         FILE_CONVERTER_TO_SCOPE(
26 |             finalProcessedData.groupTuple(),
27 |             fileOutputSuffix,
28 |             'mergeToSCopeLoom',
29 |             rawFilteredData
30 |         )
31 |         FILE_CONVERTER_TO_SCANPY(
32 |             finalProcessedData.groupTuple(),
33 |             fileOutputSuffix,
34 |             'mergeToScanpyH5ad',
35 |             rawFilteredData
36 |         )
37 | 
38 |     emit:
39 |         filteredloom
40 |         scopeloom = FILE_CONVERTER_TO_SCOPE.out
41 |         scanpyh5ad = FILE_CONVERTER_TO_SCANPY.out
42 | 
43 | }


--------------------------------------------------------------------------------
/src/utils/workflows/updateFeatureNomenclature.nf:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Conversion workflow 
 3 |  * Source:
 4 |  * 
 5 |  */ 
 6 | 
 7 | nextflow.enable.dsl=2
 8 | 
 9 | //////////////////////////////////////////////////////
10 | //  process imports:
11 | 
12 | // Imports
13 | include {
14 |     SC__UTILS__EXTRACT_FEATURE_METADATA;
15 | } from './../processes/h5adExtractMetadata' params(params)
16 | include {
17 |     FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL;
18 | } from './../../flybaser/processes/convertNomenclature' params(params)
19 | include {
20 |     SC__UTILS__UPDATE_FEATURE_METADATA_INDEX;
21 | } from './../processes/h5adUpdateMetadata' params(params)
22 | 
23 | //////////////////////////////////////////////////////
24 | //  Define the workflow 
25 | 
26 | workflow UPDATE_FEATURE_NOMENCLATURE {
27 | 
28 |     take:
29 |         // Expects (sampleId, data)
30 |         data
31 | 
32 |     main:
33 |         SC__UTILS__EXTRACT_FEATURE_METADATA( data )
34 |         FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL( SC__UTILS__EXTRACT_FEATURE_METADATA.out )
35 |         out = SC__UTILS__UPDATE_FEATURE_METADATA_INDEX( data.join(FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL.out) )
36 | 
37 |     emit:
38 |         out
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/workflows/single_sample.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | // Utils
 4 | include {
 5 |     clean;
 6 |     SC__FILE_CONVERTER;
 7 | } from '../src/utils/processes/utils.nf' params(params)
 8 | 
 9 | // Pipeline
10 | include {
11 |     SINGLE_SAMPLE as SCANPY__SINGLE_SAMPLE;
12 | } from '../src/scanpy/workflows/single_sample.nf' params(params)
13 | include {
14 |     SC__SCANPY__CLUSTERING_PARAMS;
15 | } from '../src/scanpy/processes/cluster.nf' params(params)
16 | include {
17 |     SC__DIRECTS__SELECT_DEFAULT_CLUSTERING
18 | } from '../src/directs/processes/selectDefaultClustering.nf'
19 | 
20 | workflow single_sample {
21 | 
22 |     take:
23 |         data
24 | 
25 |     main:
26 |         /*******************************************
27 |         * Run the pipeline
28 |         */
29 |         SC__FILE_CONVERTER( data )
30 |         SCANPY__SINGLE_SAMPLE( SC__FILE_CONVERTER.out )
31 | 
32 |         // Define the parameters for clustering
33 |         def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) )
34 | 
35 |         // Select a default clustering when in parameter exploration mode
36 |         if(params.tools?.directs && clusteringParams.isParameterExplorationModeOn()) {
37 |             scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING(
38 |                 SCANPY__SINGLE_SAMPLE.out.final_processed_scope_loom
39 |             )
40 |         } else {
41 |             scopeloom = SCANPY__SINGLE_SAMPLE.out.final_processed_scope_loom
42 |         }
43 | 
44 |     emit:
45 |         filteredloom = SCANPY__SINGLE_SAMPLE.out.filtered_loom
46 |         scanpyh5ad = SCANPY__SINGLE_SAMPLE.out.final_processed_scanpy_h5ad
47 |         scopeloom = scopeloom
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/workflows/star.nf:
--------------------------------------------------------------------------------
 1 | nextflow.enable.dsl=2
 2 | 
 3 | //////////////////////////////////////////////////////
 4 | //  Define the parameters for current testing proces
 5 | 
 6 | include {
 7 |     SC__STAR__LOAD_GENOME;
 8 | } from '../src/star/processes/load_genome'  params(params)
 9 | include {
10 |     SC__STAR__MAP_COUNT;
11 | } from '../src/star/processes/map_count'  params(params)
12 | include {
13 |     SC__STAR__UNLOAD_GENOME;
14 | } from '../src/star/processes/unload_genome'  params(params)
15 | include {
16 |     SC__STAR_CONCATENATOR;
17 | } from '../src/utils/processes/utils.nf' params(params)
18 | 
19 | include {
20 |     getChannel;
21 | } as getSingleEndChannel from '../src/channels/singleend.nf' params(params)
22 | 
23 | //////////////////////////////////////////////////////
24 | //  Define the workflow 
25 | 
26 | /*
27 |  * Run the workflow for each 10xGenomics CellRanger output folders specified.
28 |  */ 
29 | workflow star {
30 | 
31 |     main:
32 |         SC__STAR__LOAD_GENOME( file(params.tools.star.map_count.index) )
33 |         SC__STAR__MAP_COUNT( 
34 |             file(params.tools.star.map_count.index),
35 |             SC__STAR__LOAD_GENOME.out,
36 |             getSingleEndChannel(params.tools.star.map_count.fastqs)
37 |         )
38 |         SC__STAR__UNLOAD_GENOME(
39 |             file(params.tools.star.map_count.index),
40 |             SC__STAR__MAP_COUNT.out.isDone.collect()
41 |         )
42 |         SC__STAR_CONCATENATOR( SC__STAR__MAP_COUNT.out.counts.map { it[1] }.collect() )
43 | 
44 |     emit:
45 |         SC__STAR_CONCATENATOR.out
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------