├── .readthedocs.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── CONTRIBUTORS.md ├── LICENSE.md ├── README.rst ├── VERSION ├── assets └── images │ ├── bbknn.svg │ ├── bbknn_scenic.svg │ ├── decontx.svg │ ├── harmony.svg │ ├── harmony_scenic.svg │ ├── mnncorrect.svg │ ├── scenic.svg │ ├── scenic_multiruns.svg │ ├── single_sample.svg │ ├── single_sample_decontx.svg │ ├── single_sample_decontx_scrublet.svg │ ├── single_sample_scenic.svg │ └── single_sample_scrublet.svg ├── conf ├── atac │ ├── preprocess.config │ ├── preprocess_rapid.config │ └── qc_filtering.config ├── compute_resources.config ├── compute_resources_with_retry.config ├── docker.config ├── generic.config ├── genomes │ ├── dm6.config │ ├── hg19.config │ ├── hg38.config │ └── mm10.config ├── global.config ├── logger.config ├── min.config ├── nemesh.config ├── singularity.config ├── test.config ├── test__bbknn.config ├── test__bbknn_scenic.config ├── test__cell_annotate_filter.config ├── test__compute_resources.config ├── test__decontx.config ├── test__harmony.config ├── test__harmony_scenic.config ├── test__mnncorrect.config ├── test__scenic.config ├── test__scenic_multiruns.config ├── test__single_sample.config ├── test__single_sample_decontx_correct.config ├── test__single_sample_decontx_correct_scrublet.config ├── test__single_sample_decontx_filter.config ├── test__single_sample_param_exploration.config ├── test__single_sample_scenic.config ├── test__single_sample_scenic_multiruns.config ├── test__single_sample_scrublet.config ├── test_disabled.config ├── vpcx.config └── vsc.config ├── data ├── 10x │ └── 1k_pbmc │ │ └── metadata.tsv ├── README.md └── sample_data_tiny │ └── sample_data_tiny_dummy_annotation.tsv.gz ├── docs ├── Makefile ├── attributions.rst ├── case-studies.rst ├── conf.py ├── development.rst ├── features.rst ├── getting-started.rst ├── index.rst ├── input_formats.rst ├── pipelines.rst ├── scatac-seq.rst └── scatac-seq_qc.rst ├── main.nf ├── main_atac.nf ├── nextflow.config ├── samtools_markdup.sh ├── src ├── barcard │ ├── barcard.config │ ├── bin │ │ ├── .ipynb_checkpoints │ │ │ ├── barcard_otsu_filtering-Copy1-checkpoint.ipynb │ │ │ └── barcard_otsu_filtering-checkpoint.ipynb │ │ ├── barcard_otsu_filtering-Copy1.ipynb │ │ ├── barcard_otsu_filtering.ipynb │ │ └── barcard_otsu_filtering_test.ipynb │ ├── conf │ │ └── barcard_barcode_multiplet.config │ ├── main.nf │ └── processes │ │ ├── create_fragments_from_bam.nf │ │ ├── detect_barcode_multiplets.nf │ │ ├── detect_barcode_multiplets.nf_new │ │ ├── detect_barcode_multiplets.nf_old │ │ ├── merge_barcode_multiplets.nf │ │ └── report.nf ├── bwamaptools │ ├── .gitattributes │ ├── .gitignore │ ├── Dockerfile │ ├── LICENSE │ ├── README.rst │ ├── bin │ │ ├── .gitkeep │ │ └── mapping_summary.sh │ ├── bwamaptools.config │ ├── conf │ │ ├── .gitkeep │ │ └── bwa_mapping.config │ ├── main.nf │ ├── processes │ │ ├── .gitkeep │ │ ├── index.nf │ │ ├── mapping.nf │ │ └── mapping_summary.nf │ └── workflows │ │ └── .gitkeep ├── channels │ ├── channels.nf │ ├── conf │ │ ├── bam.config │ │ ├── csv.config │ │ ├── fragments.config │ │ ├── h5ad.config │ │ ├── loom.config │ │ ├── seurat_rds.config │ │ ├── sra.config │ │ ├── tenx_arc_cellranger_mex.config │ │ ├── tenx_atac_cellranger_mex.config │ │ ├── tenx_cellranger_h5.config │ │ ├── tenx_cellranger_mex.config │ │ └── tsv.config │ ├── file.nf │ ├── singleend.nf │ ├── sra.nf │ └── tenx.nf ├── edirect │ ├── .gitignore │ ├── LICENSE │ ├── edirect.config │ ├── processes │ │ └── sra_metadata.nf │ └── workflows │ │ └── sra_fastq_urls.nf ├── popscle │ ├── .gitattributes │ ├── .gitignore │ ├── Dockerfile │ ├── LICENSE │ ├── README.rst │ ├── conf │ │ └── .gitkeep │ ├── main.nf │ ├── popscle.config │ ├── processes │ │ ├── demuxlet.nf │ │ └── dsc_pileup.nf │ └── workflows │ │ ├── demuxlet.nf │ │ └── dsc_pileup.nf ├── pycistopic │ ├── .gitattributes │ ├── .gitignore │ ├── LICENSE │ ├── README.rst │ ├── bin │ │ ├── .gitkeep │ │ ├── barcode_level_statistics.py │ │ ├── biomart_annot.py │ │ ├── call_cells.py │ │ ├── compute_qc_stats.py │ │ ├── plot_qc_stats.py │ │ ├── pycisTopic_qc_report_template.ipynb │ │ └── pycisTopic_qc_report_template.ipynb2 │ ├── conf │ │ ├── .gitkeep │ │ ├── pycistopic_dmel.config │ │ ├── pycistopic_hg38.config │ │ └── pycistopic_mm10.config │ ├── processes │ │ ├── .gitkeep │ │ ├── barcode_level_statistics.nf │ │ ├── biomart_annot.nf │ │ ├── call_cells.nf │ │ ├── compute_qc_stats.nf │ │ ├── macs2_call_peaks.nf │ │ └── plot_qc_stats.nf │ ├── pycistopic.config │ └── workflows │ │ └── .gitkeep ├── samtools │ ├── Dockerfile │ ├── Dockerfile.samtools-base │ ├── README.rst │ ├── processes │ │ ├── merge_bam.nf │ │ └── sort_bam.nf │ └── samtools.config ├── singlecelltoolkit │ ├── .gitattributes │ ├── .gitignore │ ├── Dockerfile │ ├── LICENSE │ ├── README.rst │ ├── bin │ │ └── .gitkeep │ ├── conf │ │ ├── .gitkeep │ │ ├── sctk_mapping.config │ │ └── sctk_saturation.config │ ├── main.nf │ ├── processes │ │ ├── barcode_10x_scatac_fastqs.nf │ │ ├── barcode_correction.nf │ │ ├── extract_and_correct_biorad_barcode.nf │ │ ├── extract_hydrop_atac_barcode.nf │ │ ├── fix_and_compress_fastqs.nf │ │ └── saturation.nf │ ├── singlecelltoolkit.config │ └── workflows │ │ └── .gitkeep ├── trimgalore │ ├── .gitattributes │ ├── .gitignore │ ├── Dockerfile │ ├── LICENSE │ ├── README.rst │ ├── bin │ │ └── .gitkeep │ ├── conf │ │ └── .gitkeep │ ├── processes │ │ ├── .gitkeep │ │ └── trim.nf │ ├── trimgalore.config │ └── workflows │ │ └── .gitkeep └── utils │ ├── Dockerfile │ ├── README.md │ ├── bin │ ├── create_cistopic_object.R │ ├── h5ad_to_filtered_loom.py │ ├── h5ad_to_loom.py │ ├── reports │ │ └── workflow_configuration_template.ipynb │ ├── sc_file_concatenator.py │ ├── sc_file_converter.R │ ├── sc_file_converter.py │ ├── sc_h5ad_annotate_by_cell_metadata.py │ ├── sc_h5ad_annotate_by_sample_metadata.py │ ├── sc_h5ad_apply_obs_filter.py │ ├── sc_h5ad_extract_metadata.py │ ├── sc_h5ad_merge.py │ ├── sc_h5ad_prepare_obs_filter.py │ ├── sc_h5ad_update.py │ ├── sc_h5ad_update_metadata.py │ ├── sc_star_concatenator.py │ └── sra_to_metadata.py │ ├── conf │ ├── base.config │ ├── cell_annotate.config │ ├── cell_filter.config │ ├── h5ad_clean.config │ ├── h5ad_concatenate.config │ ├── h5ad_extract_metadata.config │ ├── h5ad_update_metadata.config │ ├── sample_annotate.config │ ├── sample_annotate_old_v1.config │ ├── scope.config │ ├── sra_metadata.config │ ├── sra_metadata.test.config │ ├── sra_normalize_fastqs.config │ ├── star_concatenate.config │ ├── test.config │ ├── update_feature_nomenclature.config │ └── workflow_report.config │ ├── main.test.nf │ ├── processes │ ├── .ipynb_checkpoints │ │ └── config-checkpoint.nf │ ├── config.nf │ ├── files.nf │ ├── gtf.nf │ ├── h5adAnnotate.nf │ ├── h5adExtractMetadata.nf │ ├── h5adMerge.nf │ ├── h5adSubset.nf │ ├── h5adToLoom.nf │ ├── h5adUpdate.nf │ ├── h5adUpdateMetadata.nf │ ├── reports.nf │ ├── sra.nf │ └── utils.nf │ ├── utils.config │ └── workflows │ ├── annotateByCellMetadata.nf │ ├── downloadFromSRA.nf │ ├── fileConverter.nf │ ├── filterAnnotateClean.nf │ ├── filterByCellMetadata.nf │ ├── finalize.nf │ ├── updateFeatureNomenclature.nf │ └── utils.nf └── workflows ├── atac ├── .ipynb_checkpoints │ └── preprocess_rapid-checkpoint.nf ├── preprocess.nf ├── preprocess_rapid.nf └── qc_filtering.nf ├── bbknn.nf ├── harmony.nf ├── mnncorrect.nf ├── multi_sample.nf ├── nemesh.nf ├── single_sample.nf ├── single_sample_star.nf └── star.nf /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sphinx: 4 | configuration: docs/conf.py 5 | 6 | formats: all 7 | 8 | submodules: 9 | exclude: all 10 | 11 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at kristofer.davie@kuleuven.vib.be. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing to this repository, please first discuss the change you wish to make via issue, 4 | [email](mailto:vib.singlecell.nf@gmail.com), or any other method (i.e. [Gitter](https://gitter.im/vib-singlecell-nf/community)) with the owners of this repository before making a change. 5 | 6 | Please note we have a code of conduct, please follow it in all your interactions with the project. 7 | 8 | ## Pull Request Process 9 | 10 | All in-development pull requests must be submitted to the `develop` branch. Only the `develop` 11 | branch can be merged into the `master` branch, this will be done when sufficient changes are in 12 | place to increase the version of the pipeline and will be performed at the discetion of the lead 13 | developers. 14 | 15 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a 16 | build. 17 | 2. Update the README.md with details of changes to the interface, this includes new environment 18 | variables, exposed ports, useful file locations and container parameters. 19 | 3. Increase the version numbers in any examples files and the README.md to the new version that 20 | this Pull Request would represent. The versioning scheme we use is [SemVer](http://semver.org/). 21 | 4. Ensure that all current and new tests pass successfully. 22 | 5. The Pull Request can be merged once you have the sign-off of two other developers, and must 23 | be merged by one of the lead developers. 24 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | # Contributors 2 | 3 | `vib-singlecell-nf` is the result of a collabration between the following groups: 4 | 5 | 1) Contributed to the development of the pipelines and modules 6 | - [Stein Aerts Lab (VIB-KULeuven)](https://www.aertslab.org/) 7 | - Gert Hulselmans - Lead Developer (scATAC pipeline) 8 | - Florian De Rop - Developer/Testing (scATAC pipeline) 9 | - Chris Flerin - Former Lead Developer (scATAC pipeline) 10 | 11 | 2) Provided input, expert advice, testing, benchmarking and fruitful discussions 12 | - [Single Cell Bioinformatics Expertise Unit (CBD VIB)](https://cbd.vib.be/research/expertise-units/bioinformatics/) 13 | - Kris Davie - Lead Developer 14 | 15 | For a full breakdown of code contributions for each repository, see GitHub. 16 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | PUMATAC 2 | ============== 3 | Pipeline for Universal Mapping of ATAC-seq 4 | 5 | |PUMATAC| |ReadTheDocs| |Zenodo| |Gitter| |Nextflow| 6 | 7 | A detailed, step-by-step tutorial with examples is available `here `_. 8 | 9 | If PUMATAC is useful for your research, consider citing: 10 | 11 | - PUMATAC All Versions (latest): `10.5281/zenodo.7764892 `_. 12 | - Our Nature Biotechnology article: `10.1038/s41587-023-01881-x `_. 13 | 14 | Currently, a preprocesing workflow is available, which will take fastq inputs, apply barcode correction, read trimming, bwa mapping, and output bam and fragments files for further downstream analysis. 15 | 16 | .. |VSN-Pipelines| image:: https://img.shields.io/github/v/release/vib-singlecell-nf/vsn-pipelines 17 | :target: https://github.com/vib-singlecell-nf/vsn-pipelines/releases 18 | :alt: GitHub release (latest by date) 19 | 20 | .. |PUMATAC| image:: https://img.shields.io/github/v/release/vib-singlecell-nf/vsn-pipelines 21 | :target: https://github.com/aertslab/ATACflow/releases 22 | :alt: GitHub release (latest by date) 23 | 24 | .. |ReadTheDocs| image:: https://readthedocs.org/projects/vsn-pipelines/badge/?version=latest 25 | :target: https://vsn-pipelines.readthedocs.io/en/latest/?badge=latest 26 | :alt: Documentation Status 27 | 28 | .. |Nextflow| image:: https://img.shields.io/badge/nextflow-21.04.3-brightgreen.svg 29 | :target: https://www.nextflow.io/ 30 | :alt: Nextflow 31 | 32 | .. |Gitter| image:: https://badges.gitter.im/vib-singlecell-nf/community.svg 33 | :target: https://gitter.im/vib-singlecell-nf/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge 34 | :alt: Gitter 35 | 36 | .. |Zenodo| image:: https://zenodo.org/badge/199477571.svg 37 | :target: https://doi.org/10.5281/zenodo.7764884 38 | :alt: Zenodo 39 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.27.0 -------------------------------------------------------------------------------- /conf/atac/preprocess.config: -------------------------------------------------------------------------------- 1 | params { 2 | atac_preprocess_tools { 3 | mark_duplicates_method = 'MarkDuplicates' 4 | adapter_trimming_method = 'Trim_Galore' 5 | } 6 | data { 7 | atac_preprocess { 8 | metadata = 'metadata.tsv' 9 | } 10 | } 11 | } 12 | 13 | includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config' 14 | includeConfig './../../src/singlecelltoolkit/conf/sctk_mapping.config' 15 | includeConfig './../../src/trimgalore/trimgalore.config' 16 | includeConfig './../../src/fastp/fastp.config' 17 | includeConfig './../../src/bwamaptools/bwamaptools.config' 18 | includeConfig './../../src/gatk/gatk.config' 19 | includeConfig './../../src/bwamaptools/conf/bwa_mapping.config' 20 | includeConfig './../../src/sinto/sinto.config' 21 | includeConfig './../../src/bap/bap.config' 22 | //includeConfig './../../src/bap/conf/bap_biorad_debarcode.config' 23 | 24 | -------------------------------------------------------------------------------- /conf/atac/preprocess_rapid.config: -------------------------------------------------------------------------------- 1 | params { 2 | atac_preprocess_tools { 3 | mark_duplicates_method = 'MarkDuplicates' 4 | adapter_trimming_method = 'Trim_Galore' 5 | } 6 | data { 7 | atac_preprocess { 8 | metadata = 'metadata.tsv' 9 | } 10 | } 11 | } 12 | 13 | includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config' 14 | includeConfig './../../src/singlecelltoolkit/conf/sctk_mapping.config' 15 | includeConfig './../../src/trimgalore/trimgalore.config' 16 | includeConfig './../../src/bwamaptools/bwamaptools.config' 17 | includeConfig './../../src/samtools/samtools.config' 18 | includeConfig './../../src/bwamaptools/conf/bwa_mapping.config' 19 | includeConfig './../../src/barcard/conf/barcard_barcode_multiplet.config' 20 | -------------------------------------------------------------------------------- /conf/atac/qc_filtering.config: -------------------------------------------------------------------------------- 1 | includeConfig './../../src/pycistopic/pycistopic.config' 2 | includeConfig './../../src/singlecelltoolkit/singlecelltoolkit.config' 3 | includeConfig './../../src/singlecelltoolkit/conf/sctk_saturation.config' 4 | 5 | -------------------------------------------------------------------------------- /conf/compute_resources.config: -------------------------------------------------------------------------------- 1 | 2 | // define computing resources via process labels 3 | process { 4 | 5 | // set global executor for all processes. Can be overridden by other tool-specific labels 6 | executor = 'local' 7 | 8 | // set default options that apply to all processes: 9 | cpus = 2 10 | memory = '60 GB' 11 | 12 | // additional cluster options (applies to grid based executors): 13 | clusterOptions = "-A cluster_account" 14 | 15 | // set a default compute profile 16 | withLabel: 'compute_resources__default' { 17 | time = '1h' 18 | } 19 | 20 | withLabel:compute_resources__sctk_barcode { 21 | cpus = 2 22 | memory = '20 GB' 23 | maxForks = 8 24 | } 25 | 26 | withLabel:compute_resources__barcode_10x_scatac_fastq_5cpus { 27 | cpus = 5 28 | memory = '40 GB' 29 | maxForks = 5 30 | } 31 | 32 | withLabel:compute_resources__trimgalore__trim_5cpus { 33 | cpus = 5 34 | memory = '20 GB' 35 | maxForks = 5 36 | } 37 | 38 | withLabel:compute_resources__picard__merge_sam_files_and_sort { 39 | cpus = 4 40 | memory = '100 GB' 41 | maxForks = 4 42 | } 43 | 44 | withLabel:compute_resources__picard__mark_duplicates_and_sort { 45 | cpus = 8 46 | memory = '100 GB' 47 | maxForks = 4 48 | } 49 | 50 | withLabel:compute_resources__sinto__fragments { 51 | cpus = 4 52 | memory = '40 GB' 53 | maxForks = 8 54 | } 55 | 56 | withLabel:compute_resources__sinto__sort_fragments { 57 | cpus = 1 58 | memory = '40 GB' 59 | maxForks = 8 60 | } 61 | 62 | withLabel:compute_resources__bap_barcode_multiplet_pipeline_8cpus { 63 | cpus = 8 64 | memory = '80 GB' 65 | maxForks = 3 66 | } 67 | 68 | withLabel: 'compute_resources__minimal' { 69 | cpus = 1 70 | memory = '1 GB' 71 | } 72 | 73 | withLabel: 'compute_resources__mem' { 74 | cpus = 4 75 | memory = '160 GB' 76 | } 77 | 78 | withLabel: 'compute_resources__cpu' { 79 | cpus = 20 80 | memory = '80 GB' 81 | } 82 | 83 | withLabel: 'compute_resources__report' { 84 | maxForks = 2 85 | cpus = 1 86 | memory = '160 GB' 87 | } 88 | 89 | // can be used in conjunction with any other label to extend the queue time 90 | withLabel: 'compute_resources__24hqueue' { 91 | time = '24h' 92 | } 93 | 94 | } 95 | 96 | -------------------------------------------------------------------------------- /conf/compute_resources_with_retry.config: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | This error retry strategy and check_max function was modified from nf-core: 4 | https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/%7B%7Bcookiecutter.name_noslash%7D%7D/conf/base.config 5 | */ 6 | 7 | params { 8 | // Defaults only, expecting to be overwritten based on available cluster resources 9 | max_memory = 170.GB 10 | max_cpus = 20 11 | max_time = 168.h 12 | } 13 | 14 | // Function to ensure that resource requirements don't go beyond 15 | // a maximum limit 16 | def check_max(obj, type) { 17 | if (type == 'memory') { 18 | try { 19 | if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) 20 | return params.max_memory as nextflow.util.MemoryUnit 21 | else 22 | return obj 23 | } catch (all) { 24 | println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" 25 | return obj 26 | } 27 | } else if (type == 'time') { 28 | try { 29 | if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) 30 | return params.max_time as nextflow.util.Duration 31 | else 32 | return obj 33 | } catch (all) { 34 | println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" 35 | return obj 36 | } 37 | } else if (type == 'cpus') { 38 | try { 39 | return Math.min( obj, params.max_cpus as int ) 40 | } catch (all) { 41 | println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" 42 | return obj 43 | } 44 | } 45 | } 46 | 47 | 48 | // define computing resources via process labels 49 | process { 50 | 51 | // this executor applies to all processes, except when overridden in another label 52 | executor = 'local' 53 | 54 | // allow a process to be re-tried if the exit code falls in this range. Otherwise, set to 'finish' (wait for completion of existing jobs) 55 | errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } 56 | 57 | maxRetries = 2 58 | 59 | // set default options that apply to all processes: 60 | cpus = { check_max(2 * task.attempt, 'cpus') } 61 | memory = { check_max(30.GB * task.attempt, 'memory') } 62 | 63 | // additional cluster options (applies to grid based executors): 64 | clusterOptions = "-A cluster_account" 65 | 66 | // set a default compute profile 67 | withLabel: 'compute_resources__default' { 68 | time = { check_max(1.h * task.attempt, 'time') } 69 | } 70 | 71 | withLabel: 'compute_resources__minimal' { 72 | cpus = { check_max(1 * task.attempt, 'cpus') } 73 | memory = { check_max(1.GB * task.attempt, 'memory') } 74 | } 75 | 76 | withLabel: 'compute_resources__mem' { 77 | cpus = { check_max(4, 'cpus') } 78 | memory = { check_max(160.GB * task.attempt, 'memory') } 79 | } 80 | 81 | withLabel: 'compute_resources__cpu' { 82 | cpus = { check_max(20, 'cpus') } 83 | memory = { check_max(80.GB * task.attempt, 'memory') } 84 | } 85 | 86 | } 87 | 88 | -------------------------------------------------------------------------------- /conf/docker.config: -------------------------------------------------------------------------------- 1 | docker { 2 | enabled = true 3 | runOptions = "-i -v ${HOME}:${HOME}" 4 | } -------------------------------------------------------------------------------- /conf/generic.config: -------------------------------------------------------------------------------- 1 | import static groovy.json.JsonOutput.* 2 | 3 | params { 4 | breakPrettyPrintMap = { p -> 5 | throw new Exception(prettyPrint(toJson(p))) 6 | } 7 | // This closure facilitates the usage of sample specific parameters 8 | parseConfig = { sample, paramsGlobal, paramsLocal -> 9 | def lv = { a,b -> return org.codehaus.groovy.runtime.MethodRankHelper.delDistance(a, b) } 10 | def pL = paramsLocal.collectEntries { k,v -> 11 | if (v instanceof Map) { 12 | if (v.containsKey(sample)) 13 | return [k, v[sample]] 14 | if (v.containsKey('default')) 15 | return [k, v['default']] 16 | def closeMatches = v.collectEntries { vk, vv -> [lv(vk, sample), vk] }.keySet().findAll { it < 30} 17 | if(closeMatches.size() > 0) 18 | throw new Exception("The sample " + sample + " is not found in " + v +" ; Make sure your samples are correctly specified when using the multi-sample feature.") 19 | else 20 | return [k,v] 21 | } else { 22 | return [k,v] 23 | } 24 | } 25 | return [global: paramsGlobal, local: pL] 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /conf/genomes/dm6.config: -------------------------------------------------------------------------------- 1 | params { 2 | global { 3 | species = 'fly' 4 | genome { 5 | assembly = 'dm6' 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /conf/genomes/hg19.config: -------------------------------------------------------------------------------- 1 | params { 2 | global { 3 | species = 'human' 4 | genome { 5 | assembly = 'hg19' 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /conf/genomes/hg38.config: -------------------------------------------------------------------------------- 1 | params { 2 | global { 3 | species = 'human' 4 | genome { 5 | assembly = 'hg38' 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /conf/genomes/mm10.config: -------------------------------------------------------------------------------- 1 | params { 2 | global { 3 | species = 'mouse' 4 | genome { 5 | assembly = 'mm10' 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /conf/global.config: -------------------------------------------------------------------------------- 1 | params { 2 | global { 3 | project_name = '10x_PBMC' 4 | outdir = 'out' 5 | } 6 | } 7 | -------------------------------------------------------------------------------- /conf/logger.config: -------------------------------------------------------------------------------- 1 | process { 2 | afterScript = { 3 | // Source: https://github.com/nextflow-io/nextflow/issues/1166#issuecomment-502467562 4 | logMainDir = params.logDir 5 | 6 | // Check whether log dir is located in S3 if using awsbatch and is a local directory otherwise 7 | if (workflow.profile == "aws") { 8 | if (!logMainDir.matches("^s3://.*")) logMainDir = "s3:/" + workflow.workDir.toString() + "/log" 9 | } else { 10 | logMainDir = workflow.launchDir.resolve(logMainDir).toString() 11 | if (!logMainDir.matches("^/.*")) logMainDir = workflow.launchDir.toString() + "/log" 12 | } 13 | 14 | // Build log directory path based on task name 15 | logSubDir = task.name.replace(" (null)", "").replace(" ", "/").replaceAll(" ", "_").replaceAll("[()]", "") 16 | logDir = logMainDir + "/" + logSubDir 17 | 18 | // Define command to copy log files 19 | cpLogCmd = workflow.profile == "aws" ? 20 | "nxf_s3_upload '*.txt' ${logDir}; " : 21 | "mkdir -p ${logDir}; cp -a *.txt ${logDir}; " 22 | 23 | // Assemble final command 24 | cmd = "ls -alR --full-time > .command.ls; " 25 | cmd += "mkdir nxf_log; " 26 | cmd += "for file in .command.*; do cp -a \${file} nxf_log/\${file#.}.txt; done; " 27 | cmd += "cd nxf_log; " 28 | cmd += cpLogCmd 29 | cmd += "cd ..;" 30 | cmd 31 | } 32 | } -------------------------------------------------------------------------------- /conf/min.config: -------------------------------------------------------------------------------- 1 | min { 2 | enabled = true 3 | } -------------------------------------------------------------------------------- /conf/nemesh.config: -------------------------------------------------------------------------------- 1 | params { 2 | global { 3 | genome = '/ddn1/vol1/staging/leuven/res_00001/genomes/homo_sapiens/hg38_iGenomes/iGenomes_Raw/Sequence/WholeGenomeFasta/genome.fa' 4 | genome_annotation = '/ddn1/vol1/staging/leuven/res_00001/genomes/homo_sapiens/hg38_iGenomes/iGenomes_Raw/Annotation/Archives/archive-2015-08-14-08-18-15/Genes/genes.gtf' 5 | tmpDir = '/ddn1/vol1/staging/leuven/stg_00002/lcb/dwmax' 6 | threads= 1 7 | qsubaccount = '' 8 | } 9 | 10 | tools { 11 | nemesh { 12 | // User can extract custom cell barcodes by providing it with a TSV containing all the barcodes 13 | // custom_selected_barcodes = '' 14 | // custom_selected_barcodes_tag = '' 15 | } 16 | } 17 | } -------------------------------------------------------------------------------- /conf/singularity.config: -------------------------------------------------------------------------------- 1 | singularity { 2 | enabled = true 3 | autoMounts = true 4 | runOptions = '--cleanenv -H $PWD -B ${HOME}' 5 | } 6 | -------------------------------------------------------------------------------- /conf/test.config: -------------------------------------------------------------------------------- 1 | params { 2 | misc { 3 | test { 4 | enabled = true 5 | } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /conf/test__bbknn.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'bbknn_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = "testdata/*/outs/" 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 2 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 2 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /conf/test__bbknn_scenic.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'bbknn_scenic_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = "testdata/*/outs/" 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 2 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 2 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | scenic { 31 | numWorkers = 2 32 | grn { 33 | tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_small.txt' 34 | } 35 | cistarget { 36 | motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather' 37 | motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl' 38 | tracksDb = '' 39 | tracksAnnotation = '' 40 | } 41 | } 42 | } 43 | } 44 | 45 | -------------------------------------------------------------------------------- /conf/test__cell_annotate_filter.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'cell_annotate_filter_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = 'sample_data/outs' 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | container = 'vibsinglecellnf/scanpy:1.8.1' 14 | } 15 | } 16 | utils { 17 | file_converter { 18 | off = 'h5ad' 19 | tagCellWithSampleId = false 20 | useFilteredMatrix = true 21 | makeVarIndexUnique = false 22 | } 23 | cell_annotate { 24 | off = 'h5ad' 25 | method = 'aio' 26 | indexColumnName = 'index' 27 | cellMetaDataFilePath = "sample_data_tiny_dummy_annotation.tsv.gz" 28 | annotationColumnNames = ['dummy_annotation'] 29 | } 30 | cell_filter { 31 | off = 'h5ad' 32 | method = 'internal' 33 | filters = [[ 34 | id : 'foobar', 35 | indexColumnName:'index', 36 | filterColumnName:'dummy_annotation', 37 | valuesToKeepFromFilterColumn: ['foo'] 38 | ]] 39 | } 40 | } 41 | } 42 | 43 | -------------------------------------------------------------------------------- /conf/test__compute_resources.config: -------------------------------------------------------------------------------- 1 | 2 | process { 3 | 4 | executor = 'local' 5 | 6 | /* 7 | This label is activated when using the profile "test__compute_resources", and overwrites all settings from other labels. 8 | Used primarily to keep requested resources within the allowed bounds of GitHub Actions tests. 9 | */ 10 | withLabel: 'compute_resources__.*' { 11 | cpus = 2 12 | memory = '4 GB' 13 | time = '1h' 14 | } 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /conf/test__decontx.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'decontx_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = 'sample_data/outs' 9 | } 10 | } 11 | tools { 12 | celda { 13 | container = 'vibsinglecellnf/celda:1.4.5' 14 | decontx { 15 | strategy = 'correct' 16 | } 17 | } 18 | } 19 | } 20 | 21 | -------------------------------------------------------------------------------- /conf/test__harmony.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'harmony_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = "testdata/*/outs/" 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 2 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 2 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /conf/test__harmony_scenic.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'harmony_scenic_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = "testdata/*/outs/" 9 | } 10 | } 11 | utils { 12 | file_annotator { 13 | metadataFilePath = '' 14 | } 15 | } 16 | tools { 17 | file_annotator { 18 | metadataFilePath = '' 19 | } 20 | scanpy { 21 | filter { 22 | cellFilterMinNGenes = 1 23 | } 24 | neighborhood_graph { 25 | nPcs = 2 26 | } 27 | dim_reduction { 28 | pca { 29 | method = 'pca' 30 | nComps = 2 31 | } 32 | } 33 | clustering { 34 | method = 'louvain' 35 | resolution = 1 36 | } 37 | } 38 | scenic { 39 | numWorkers = 2 40 | grn { 41 | tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_small.txt' 42 | } 43 | cistarget { 44 | motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather' 45 | motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl' 46 | tracksDb = '' 47 | tracksAnnotation = '' 48 | } 49 | } 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /conf/test__mnncorrect.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'mnncorrect_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = "testdata/*/outs/" 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 2 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 2 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /conf/test__scenic.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'scenic_CI' 5 | } 6 | tools { 7 | file_annotator { 8 | metadataFilePath = '' 9 | } 10 | scenic { 11 | filteredLoom = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/expr_mat_tiny.loom' 12 | grn { 13 | tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_tiny.txt' 14 | } 15 | cistarget { 16 | motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather' 17 | motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl' 18 | tracksDb = '' 19 | tracksAnnotation = '' 20 | } 21 | } 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /conf/test__scenic_multiruns.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'scenic_multiruns_CI' 5 | } 6 | tools { 7 | scenic { 8 | filteredLoom = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/expr_mat_small.loom' 9 | grn { 10 | tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_small.txt' 11 | } 12 | cistarget { 13 | motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather' 14 | motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl' 15 | tracksDb = '' 16 | tracksAnnotation = '' 17 | } 18 | aucell { 19 | min_genes_regulon = 0 20 | min_regulon_gene_occurrence = 0 21 | } 22 | } 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /conf/test__single_sample.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'single_sample_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = 'sample_data/outs' 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 2 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 2 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /conf/test__single_sample_decontx_correct.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'single_sample_decontx_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = 'sample_data/outs' 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 10 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 10 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | celda { 31 | container = 'vibsinglecellnf/celda:1.4.5' 32 | decontx { 33 | strategy = 'correct' 34 | } 35 | } 36 | } 37 | } 38 | 39 | -------------------------------------------------------------------------------- /conf/test__single_sample_decontx_correct_scrublet.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'single_sample_decontx_scrublet_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = 'sample_data/outs' 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 10 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 10 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | celda { 31 | container = 'vibsinglecellnf/celda:1.4.5' 32 | decontx { 33 | strategy = 'correct' 34 | } 35 | } 36 | scrublet { 37 | container = 'vibsinglecellnf/scrublet:0.2.3' 38 | labels { 39 | processExecutor = 'local' 40 | } 41 | doublet_detection { 42 | report_ipynb = '/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb' 43 | useVariableFeatures = 'False' 44 | technology = '10x' 45 | off = 'h5ad' 46 | } 47 | cell_annotate { 48 | off = 'h5ad' 49 | method = 'obo' 50 | indexColumnName = 'index' 51 | } 52 | cell_filter { 53 | off = 'h5ad' 54 | method = 'internal' 55 | filters = [ 56 | [ 57 | id:'NO_DOUBLETS', 58 | sampleColumnName: 'sample_id', 59 | filterColumnName:'scrublet__predicted_doublets_based_on_10x_chromium_spec', 60 | valuesToKeepFromFilterColumn:['False'] 61 | ] 62 | ] 63 | } 64 | } 65 | } 66 | } 67 | 68 | -------------------------------------------------------------------------------- /conf/test__single_sample_decontx_filter.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'single_sample_decontx_filter_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = 'sample_data/outs' 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 10 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 10 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | celda { 31 | container = 'vibsinglecellnf/celda:1.4.5' 32 | decontx { 33 | cell_filter { 34 | off = 'h5ad' 35 | method = 'internal' 36 | filters = [[id:'DECONTX_FILTERED', sampleColumnName:'sample_id', filterColumnName:'celda_decontx__doublemad_predicted_outliers', valuesToKeepFromFilterColumn:['False']]] 37 | } 38 | strategy = 'filter' 39 | cell_annotate { 40 | off = 'h5ad' 41 | method = 'obo' 42 | indexColumnName = 'index' 43 | } 44 | filters { 45 | numMadsThresholds = [3] 46 | contaminationScoreThresholds = [0.5] 47 | } 48 | } 49 | } 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /conf/test__single_sample_param_exploration.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'single_sample_param_exploration_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = 'sample_data/outs' 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 2 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 2 23 | } 24 | } 25 | clustering { 26 | resolutions = [1.0,1.2] 27 | } 28 | } 29 | } 30 | } 31 | 32 | -------------------------------------------------------------------------------- /conf/test__single_sample_scenic.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'single_sample_scenic_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = 'sample_data/outs' 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 2 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 2 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | scenic { 31 | grn { 32 | tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_small.txt' 33 | } 34 | cistarget { 35 | motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather' 36 | motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl' 37 | tracksDb = '' 38 | tracksAnnotation = '' 39 | } 40 | } 41 | } 42 | } 43 | 44 | -------------------------------------------------------------------------------- /conf/test__single_sample_scenic_multiruns.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'single_sample_scenic_multiruns_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = 'sample_data/outs' 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 2 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 2 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | scenic { 31 | //filteredLoom = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/expr_mat_small.loom' 32 | grn { 33 | tfs = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/test_TFs_small.txt' 34 | } 35 | cistarget { 36 | motifsDb = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/genome-ranking.feather' 37 | motifsAnnotation = 'https://raw.githubusercontent.com/aertslab/SCENICprotocol/master/example/motifs.tbl' 38 | tracksDb = '' 39 | tracksAnnotation = '' 40 | } 41 | aucell { 42 | min_genes_regulon = 0 43 | min_regulon_gene_occurrence = 0 44 | } 45 | } 46 | } 47 | } 48 | 49 | 50 | -------------------------------------------------------------------------------- /conf/test__single_sample_scrublet.config: -------------------------------------------------------------------------------- 1 | 2 | params { 3 | global { 4 | project_name = 'single_sample_scrublet_CI' 5 | } 6 | data { 7 | tenx { 8 | cellranger_mex = 'sample_data/outs' 9 | } 10 | } 11 | tools { 12 | scanpy { 13 | filter { 14 | cellFilterMinNGenes = 1 15 | } 16 | neighborhood_graph { 17 | nPcs = 10 18 | } 19 | dim_reduction { 20 | pca { 21 | method = 'pca' 22 | nComps = 10 23 | } 24 | } 25 | clustering { 26 | method = 'louvain' 27 | resolution = 1 28 | } 29 | } 30 | scrublet { 31 | container = 'vibsinglecellnf/scrublet:0.2.3' 32 | labels { 33 | processExecutor = 'local' 34 | } 35 | doublet_detection { 36 | report_ipynb = '/src/scrublet/bin/reports/sc_doublet_detection_report.ipynb' 37 | useVariableFeatures = 'False' 38 | technology = '10x' 39 | off = 'h5ad' 40 | } 41 | cell_annotate { 42 | off = 'h5ad' 43 | method = 'obo' 44 | indexColumnName = 'index' 45 | } 46 | cell_filter { 47 | off = 'h5ad' 48 | method = 'internal' 49 | filters = [ 50 | [ 51 | id:'NO_DOUBLETS', 52 | sampleColumnName: 'sample_id', 53 | filterColumnName:'scrublet__predicted_doublets_based_on_10x_chromium_spec', 54 | valuesToKeepFromFilterColumn:['False'] 55 | ] 56 | ] 57 | } 58 | } 59 | } 60 | } 61 | 62 | -------------------------------------------------------------------------------- /conf/test_disabled.config: -------------------------------------------------------------------------------- 1 | params { 2 | misc { 3 | test { 4 | enabled = false 5 | } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /conf/vpcx.config: -------------------------------------------------------------------------------- 1 | vpcx { 2 | docker.enabled = true 3 | docker.runOptions = "-i -v /app:/app -v /root/:/root" 4 | docker.registry = "itx-aiv.artifactrepo.jnj.com/" 5 | } -------------------------------------------------------------------------------- /conf/vsc.config: -------------------------------------------------------------------------------- 1 | singularity { 2 | enabled = true 3 | autoMounts = true 4 | runOptions = '--cleanenv -H $PWD -B /lustre1,/staging,/data,${VSC_SCRATCH},${VSC_SCRATCH}/tmp:/tmp,${HOME}/.nextflow/assets/' 5 | cacheDir = 'PUMATAC_dependencies/cache' 6 | } 7 | 8 | vsc { 9 | enabled = true 10 | } 11 | 12 | -------------------------------------------------------------------------------- /data/10x/1k_pbmc/metadata.tsv: -------------------------------------------------------------------------------- 1 | id chromium_chemistry 2 | 1k_pbmc_v2_chemistry v2 3 | 1k_pbmc_v3_chemistry v3 4 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # Datasets 2 | 3 | Public datasets that can be used to test one of the pipelines. 4 | Start by creating a working directory to contain the data, intermediate Nextflow files, and final analysis outputs: 5 | ```bash 6 | mkdir single_sample_test && cd single_sample_test 7 | ``` 8 | 9 | # 10x Genomics 10 | 11 | Some 10x datasets that can be used to run the `single_sample` pipeline: 12 | - 1k PBMCs from a Healthy Donor (v2 chemistry) 13 | ``` 14 | wget http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v2/pbmc_1k_v2_filtered_feature_bc_matrix.tar.gz 15 | mkdir -p data/10x/1k_pbmc/1k_pbmc_v2_chemistry/outs/ 16 | tar -xzvf pbmc_1k_v2_filtered_feature_bc_matrix.tar.gz -C data/10x/1k_pbmc/1k_pbmc_v2_chemistry/outs/ 17 | ``` 18 | - 1k PBMCs from a Healthy Donor (v3 chemistry) 19 | ``` 20 | wget http://cf.10xgenomics.com/samples/cell-exp/3.0.0/pbmc_1k_v3/pbmc_1k_v3_filtered_feature_bc_matrix.tar.gz 21 | mkdir -p data/10x/1k_pbmc/1k_pbmc_v3_chemistry/outs/ 22 | tar -xzvf pbmc_1k_v3_filtered_feature_bc_matrix.tar.gz -C data/10x/1k_pbmc/1k_pbmc_v3_chemistry/outs/ 23 | ``` 24 | 25 | Download the small meta data to annotate the samples: 26 | ``` 27 | wget https://raw.githubusercontent.com/vib-singlecell-nf/vsn-pipelines/master/data/10x/1k_pbmc/metadata.tsv -O data/10x/1k_pbmc/metadata.tsv 28 | ``` 29 | 30 | If these links appear not work, you can always download them from https://support.10xgenomics.com/single-cell-gene-expression/datasets. 31 | 32 | -------------------------------------------------------------------------------- /data/sample_data_tiny/sample_data_tiny_dummy_annotation.tsv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/data/sample_data_tiny/sample_data_tiny_dummy_annotation.tsv.gz -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/attributions.rst: -------------------------------------------------------------------------------- 1 | Attributions 2 | ============ 3 | 4 | 5 | VSN-Pipelines is a collection of workflows targeted toward the analysis of single cell data. 6 | VSN is dependendent on, and takes functions from many tools, developed both internally and externally, which are listed here. 7 | 8 | Tools 9 | ---------------------------------------------------- 10 | 11 | 12 | - `GreenleafLab/ArchR `_ 13 | - `caleblareau/bap `_ 14 | - `lh3/bwa `_ 15 | - `Samtools `_ 16 | - `campbio/celda `_ 17 | - Directs 18 | - `DropletUtils `_ 19 | - `Drop-seq Tools `_ 20 | - `EDirect `_ 21 | - `OpenGene/fastp `_ 22 | - `hangnoh/flybaseR `_ 23 | - `dweemx/flybaseR `_ 24 | - `immunogenomics/harmony `_ 25 | - pcacv 26 | - `Picard `_ 27 | - `statgen/popscle `_ 28 | - `aertslab/popscle_helper_tools `_ 29 | - `aertslab/cisTopic `_ 30 | - `theislab/scanpy `_ 31 | - `aertslab/pySCENIC `_ 32 | - `aertslab/SCENIC `_ 33 | - `swolock/scrublet `_ 34 | - `aertslab/single_cell_toolkit `_ 35 | - `timoast/sinto `_ 36 | - `constantAmateur/SoupX `_ 37 | - `ncbi/sra-tools `_ 38 | - `alexdobin/STAR `_ 39 | - `Trim Galore `_ 40 | 41 | -------------------------------------------------------------------------------- /docs/case-studies.rst: -------------------------------------------------------------------------------- 1 | Case Studies 2 | ============= 3 | 4 | See the full list of case studies and examples at `VSN-Pipelines-examples `_. 5 | 6 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'VSN-Pipelines' 21 | copyright = '2020, Kristofer Davie, Maxime De Waegeneer, Christopher Flerin' 22 | author = 'Kristofer Davie, Maxime De Waegeneer, Christopher Flerin' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = [ 31 | ] 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ['_templates'] 35 | 36 | # List of patterns, relative to source directory, that match files and 37 | # directories to ignore when looking for source files. 38 | # This pattern also affects html_static_path and html_extra_path. 39 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 40 | 41 | # Resolve issue with RTD build 42 | master_doc = 'index' 43 | 44 | 45 | # -- Options for HTML output ------------------------------------------------- 46 | 47 | # The theme to use for HTML and HTML Help pages. See the documentation for 48 | # a list of builtin themes. 49 | # 50 | html_theme = 'sphinx_rtd_theme' 51 | 52 | # Add any paths that contain custom static files (such as style sheets) here, 53 | # relative to this directory. They are copied after the builtin static files, 54 | # so a file named "default.css" will overwrite the builtin "default.css". 55 | html_static_path = ['_static'] -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. VSN-Pipelines documentation master file, created by 2 | sphinx-quickstart on Tue Feb 11 13:06:44 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | :hidden: 9 | 10 | Home 11 | getting-started 12 | input_formats 13 | pipelines 14 | features 15 | case-studies 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | :hidden: 20 | :caption: scATAC-seq 21 | 22 | scatac-seq 23 | scatac-seq_qc 24 | 25 | .. toctree:: 26 | :maxdepth: 2 27 | :hidden: 28 | :caption: Development 29 | 30 | development 31 | attributions 32 | 33 | 34 | .. include:: ../README.rst 35 | 36 | .. Indices and tables 37 | .. ================== 38 | 39 | .. * :ref:`genindex` 40 | .. * :ref:`modindex` 41 | .. * :ref:`search` 42 | -------------------------------------------------------------------------------- /main_atac.nf: -------------------------------------------------------------------------------- 1 | import static groovy.json.JsonOutput.* 2 | 3 | nextflow.enable.dsl=2 4 | 5 | include { 6 | INIT; 7 | } from './src/utils/workflows/utils' params(params) 8 | 9 | INIT(params) 10 | 11 | include { 12 | SC__FILE_CONVERTER; 13 | } from './src/utils/processes/utils' params(params) 14 | 15 | include { 16 | getDataChannel; 17 | } from './src/channels/channels' params(params) 18 | 19 | /* 20 | ATAC-seq pipelines 21 | */ 22 | 23 | 24 | // runs mkfastq, then cellranger-atac count: 25 | workflow cellranger_atac { 26 | 27 | include { 28 | CELLRANGER_ATAC 29 | } from './src/cellranger-atac/main.nf' params(params) 30 | 31 | CELLRANGER_ATAC( 32 | file(params.tools.cellranger_atac.mkfastq.csv), 33 | file(params.tools.cellranger_atac.mkfastq.runFolder), 34 | file(params.tools.cellranger_atac.count.reference) 35 | ) 36 | 37 | } 38 | 39 | 40 | workflow atac_preprocess { 41 | 42 | // generic ATAC-seq preprocessing pipeline: adapter trimming, mapping, fragments file generation 43 | include { 44 | ATAC_PREPROCESS; 45 | } from './workflows/atac/preprocess.nf' params(params) 46 | 47 | ATAC_PREPROCESS(file(params.data.atac_preprocess.metadata)) 48 | 49 | } 50 | 51 | 52 | workflow atac_preprocess_bap { 53 | 54 | include { 55 | ATAC_PREPROCESS; 56 | } from './workflows/atac/preprocess.nf' params(params) 57 | include { 58 | BAP__BARCODE_MULTIPLET_WF; 59 | } from './src/bap/main.nf' params(params) 60 | 61 | ATAC_PREPROCESS(file(params.data.atac_preprocess.metadata)) | 62 | get_bam | 63 | BAP__BARCODE_MULTIPLET_WF 64 | 65 | } 66 | 67 | workflow atac_preprocess_rapid { 68 | 69 | include { 70 | ATAC_PREPROCESS_RAPID; 71 | } from './workflows/atac/preprocess_rapid.nf' params(params) 72 | //include { 73 | // BARCARD__FRAGMENTS_POSTPROCESSING; 74 | //} from './src/barcard/main.nf' params(params) 75 | 76 | ATAC_PREPROCESS_RAPID(file(params.data.atac_preprocess.metadata)) 77 | //ATAC_PREPROCESS_RAPID(file(params.data.atac_preprocess.metadata)) | 78 | // get_bam | 79 | // BARCARD__FRAGMENTS_POSTPROCESSING 80 | 81 | } 82 | 83 | 84 | workflow bap { 85 | include { 86 | BAP__BARCODE_MULTIPLET_WF; 87 | } from './src/bap/main.nf' params(params) 88 | 89 | getDataChannel | BAP__BARCODE_MULTIPLET_WF 90 | 91 | } 92 | 93 | 94 | /* 95 | QC 96 | */ 97 | workflow atac_qc_filtering { 98 | 99 | include { 100 | ATAC_QC_PREFILTER; 101 | } from './workflows/atac/qc_filtering.nf' params(params) 102 | 103 | getDataChannel | ATAC_QC_PREFILTER 104 | 105 | } 106 | 107 | workflow atac_preprocess_with_qc { 108 | 109 | // generic ATAC-seq preprocessing pipeline: adapter trimming, mapping, fragments file generation 110 | include { 111 | ATAC_PREPROCESS; 112 | } from './workflows/atac/preprocess.nf' params(params) 113 | include { 114 | ATAC_QC_PREFILTER; 115 | } from './workflows/atac/qc_filtering.nf' params(params) 116 | 117 | pp = ATAC_PREPROCESS(file(params.data.atac_preprocess.metadata)) 118 | ATAC_QC_PREFILTER(pp.bam.mix(pp.fragments)) 119 | 120 | } 121 | 122 | workflow atac_preprocess_freemuxlet { 123 | 124 | // generic ATAC-seq preprocessing pipeline: adapter trimming, mapping, fragments file generation 125 | include { 126 | ATAC_PREPROCESS_WITH_METADATA; 127 | } from './workflows/atac/preprocess.nf' params(params) 128 | include { 129 | freemuxlet as FREEMUXLET; 130 | } from './workflows/popscle' params(params) 131 | 132 | ATAC_PREPROCESS_WITH_METADATA(file(params.tools.atac.preprocess.metadata)) 133 | FREEMUXLET(ATAC_PREPROCESS_WITH_METADATA.out.bam) 134 | } 135 | 136 | -------------------------------------------------------------------------------- /src/barcard/barcard.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | barcard { 4 | container = 'vibsinglecellnf/bap:2021-04-27-3b48f4b' 5 | } 6 | } 7 | } 8 | 9 | -------------------------------------------------------------------------------- /src/barcard/bin/barcard_otsu_filtering_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "7be3b9b5-12dc-4ed5-a80c-bdb8a80facf6", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [] 10 | } 11 | ], 12 | "metadata": { 13 | "kernelspec": { 14 | "display_name": "20220609_pycistopic.sif", 15 | "language": "python", 16 | "name": "20220609_pycistopic" 17 | }, 18 | "language_info": { 19 | "codemirror_mode": { 20 | "name": "ipython", 21 | "version": 3 22 | }, 23 | "file_extension": ".py", 24 | "mimetype": "text/x-python", 25 | "name": "python", 26 | "nbconvert_exporter": "python", 27 | "pygments_lexer": "ipython3", 28 | "version": "3.8.13" 29 | } 30 | }, 31 | "nbformat": 4, 32 | "nbformat_minor": 5 33 | } 34 | -------------------------------------------------------------------------------- /src/barcard/conf/barcard_barcode_multiplet.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | barcard { 4 | barcode_multiplet { 5 | report_ipynb = '/src/barcard/bin/barcard_otsu_filtering.ipynb' 6 | } 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/barcard/main.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | ////////////////////////////////////////////////////// 4 | // Import sub-workflows from the modules: 5 | 6 | include { 7 | CREATE_FRAGMENTS_FROM_BAM as BARCARD__CREATE_FRAGMENTS_FROM_BAM 8 | } from './processes/create_fragments_from_bam.nf' params(params) 9 | 10 | include { 11 | DETECT_BARCODE_MULTIPLETS as BARCARD__DETECT_BARCODE_MULTIPLETS; 12 | } from './processes/detect_barcode_multiplets.nf' params(params) 13 | 14 | include { 15 | MERGE_BARCODE_MULTIPLETS as BARCARD__MERGE_BARCODE_MULTIPLETS; 16 | } from './processes/merge_barcode_multiplets.nf' params(params) 17 | 18 | include { 19 | GENERATE_REPORT; 20 | REPORT_TO_HTML; 21 | } from './processes/report.nf' params(params) 22 | 23 | ////////////////////////////////////////////////////// 24 | // Import sub-workflows from the modules: 25 | 26 | include { 27 | BWAMAPTOOLS__INDEX_BED; 28 | } from './../../src/bwamaptools/processes/index.nf' params(params) 29 | include { 30 | PUBLISH as PUBLISH_FRAGMENTS; 31 | PUBLISH as PUBLISH_FRAGMENTS_INDEX; 32 | } from "../utils/workflows/utils.nf" params(params) 33 | 34 | 35 | ////////////////////////////////////////////////////// 36 | // Define the workflow 37 | 38 | workflow BAM_TO_FRAGMENTS { 39 | 40 | take: 41 | bam 42 | 43 | main: 44 | 45 | // sampleID, frag, frag idx 46 | fragments = BARCARD__CREATE_FRAGMENTS_FROM_BAM(bam) 47 | 48 | //fragments_sort = SINTO__SORT_FRAGMENTS(fragments) 49 | //index = BWAMAPTOOLS__INDEX_BED(fragments_sort) 50 | 51 | // join bed index into the fragments channel: 52 | //fragments_out = fragments_sort.join(index) 53 | 54 | emit: 55 | fragments 56 | //fragments_out 57 | 58 | } 59 | 60 | 61 | workflow DETECT_BARCODE_MULTIPLETS { 62 | 63 | take: 64 | fragments 65 | 66 | main: 67 | 68 | // barcard_multiplets = BARCARD__DETECT_BARCODE_MULTIPLETS(fragments.map { it -> tuple(it[0], it[1][0], it[1][1]) }) 69 | barcard_multiplets = BARCARD__DETECT_BARCODE_MULTIPLETS(fragments.map { it -> tuple(it[0], it[1]) }) 70 | 71 | //GENERATE_REPORT( 72 | // file(workflow.projectDir + params.tools.barcard.barcode_multiplet.report_ipynb), 73 | // barcard_multiplets.map { it -> tuple(it[0], it[3]) }, 74 | // "BARCARD__multiplet_report" 75 | //) | 76 | //REPORT_TO_HTML 77 | 78 | GENERATE_REPORT( 79 | file(workflow.projectDir + params.tools.barcard.barcode_multiplet.report_ipynb), 80 | barcard_multiplets.map { it -> tuple(it[0], it[1]) }, 81 | //"BARCARD__otsu_filtering_report" 82 | ) | 83 | REPORT_TO_HTML 84 | 85 | emit: 86 | barcard_multiplets 87 | 88 | } 89 | -------------------------------------------------------------------------------- /src/barcard/processes/create_fragments_from_bam.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | 6 | process CREATE_FRAGMENTS_FROM_BAM { 7 | //container params.tools.barcard.container 8 | container "vibsinglecellnf/singlecelltoolkit:2024-04-09-62429e9" 9 | label 'compute_resources__barcard__create_fragments_from_bam' 10 | publishDir "${params.global.outdir}/data/fragments", mode: 'copy' 11 | 12 | input: 13 | tuple val(sampleId), 14 | path(bam) //, 15 | // path(bai) 16 | 17 | output: 18 | tuple val(sampleId), 19 | path("${sampleId}.fragments.raw.tsv.gz"), 20 | path("${sampleId}.fragments.raw.tsv.gz.tbi") 21 | 22 | script: 23 | //def sampleParams = params.parseConfig(sampleId, params.global) 24 | //processParams = sampleParams.local 25 | """ 26 | set -euo pipefail 27 | 28 | create_fragments_file --bam "${bam}" --fragments "${sampleId}.fragments.raw.tsv.gz" 29 | 30 | tabix -p bed "${sampleId}.fragments.raw.tsv.gz" 31 | """ 32 | } 33 | -------------------------------------------------------------------------------- /src/barcard/processes/detect_barcode_multiplets.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | 6 | process DETECT_BARCODE_MULTIPLETS { 7 | //container params.tools.barcard.container 8 | container "vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d" 9 | label 'compute_resources__barcard__detect_barcode_multiplets' 10 | publishDir "${params.global.outdir}/data/reports/barcard/", mode: 'copy' 11 | 12 | input: 13 | tuple val(sampleId), 14 | path(fragments) 15 | 16 | output: 17 | tuple val(sampleId), 18 | //path(fragments), 19 | path("${sampleId}.barcard.overlap.tsv") 20 | 21 | script: 22 | //def sampleParams = params.parseConfig(sampleId, params.global) 23 | //processParams = sampleParams.local 24 | """ 25 | set -euo pipefail 26 | 27 | chromosome_regex='^(chr)?([0-9]+|[XY])\$' 28 | calculate_jaccard_index_cbs.py -i ${fragments} -o ${sampleId}.barcard.overlap.tsv -t 1000 -c \${chromosome_regex} 29 | """ 30 | } 31 | -------------------------------------------------------------------------------- /src/barcard/processes/detect_barcode_multiplets.nf_new: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | 6 | process DETECT_BARCODE_MULTIPLETS { 7 | //container params.tools.barcard.container 8 | container "vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d" 9 | label 'compute_resources__barcard__detect_barcode_multiplets' 10 | publishDir "${params.global.outdir}/data/reports/barcard/", mode: 'copy' 11 | 12 | input: 13 | tuple val(sampleId), 14 | path(fragments) 15 | 16 | output: 17 | tuple val(sampleId), 18 | //path(fragments), 19 | path("${sampleId}.barcard.overlap.tsv") 20 | 21 | script: 22 | //def sampleParams = params.parseConfig(sampleId, params.global) 23 | //processParams = sampleParams.local 24 | """ 25 | set -euo pipefail 26 | 27 | chromosome_regex='^(chr)?([0-9]+|[XY])\$' 28 | calculate_jaccard_index_cbs.py -i ${fragments} -o ${sampleId}.barcard.overlap.tsv -t 1000 -c \${chromosome_regex} 29 | """ 30 | } 31 | -------------------------------------------------------------------------------- /src/barcard/processes/detect_barcode_multiplets.nf_old: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | 6 | process DETECT_BARCODE_MULTIPLETS { 7 | //container params.tools.barcard.container 8 | container "vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d" 9 | label 'compute_resources__barcard__detect_barcode_multiplets' 10 | publishDir "${params.global.outdir}/data/reports/barcard/", mode: 'copy' 11 | 12 | input: 13 | tuple val(sampleId), 14 | path(fragments) 15 | 16 | output: 17 | tuple val(sampleId), 18 | //path(fragments), 19 | path("${sampleId}.barcard.overlap.tsv") 20 | 21 | script: 22 | //def sampleParams = params.parseConfig(sampleId, params.global) 23 | //processParams = sampleParams.local 24 | """ 25 | set -euo pipefail 26 | 27 | calculate_jaccard_index_cbs.py -i ${fragments} -o ${sampleId}.barcard.overlap.tsv -t 1000 28 | """ 29 | } 30 | -------------------------------------------------------------------------------- /src/barcard/processes/merge_barcode_multiplets.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | 6 | process MERGE_BARCODE_MULTIPLETS { 7 | //container params.tools.barcard.container 8 | container "vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d" 9 | label 'compute_resources__barcard__merge_barcode_multiplets' 10 | publishDir "${params.global.outdir}/data/fragments", mode: 'copy' 11 | 12 | input: 13 | tuple val(sampleId), 14 | path(bam) //, 15 | // path(bai) 16 | 17 | output: 18 | tuple val(sampleId), 19 | path("${sampleId}.fragments.raw.tsv.gz"), 20 | path("${sampleId}.fragments.raw.tsv.gz.tbi") 21 | 22 | script: 23 | //def sampleParams = params.parseConfig(sampleId, params.global) 24 | //processParams = sampleParams.local 25 | """ 26 | set -euo pipefail 27 | 28 | create_fragments_file \ 29 | "${bam}" \ 30 | _unused \ 31 | | coreutils sort --parallel=8 -S 16G -k 1,1V -k 2,2n -k 3,3n -k 4,4 \ 32 | | uniq -c \ 33 | | mawk -v 'OFS=\t' '{ print \$2, \$3, \$4, \$5, \$1 }' \ 34 | | bgzip -@ 4 -c /dev/stdin \ 35 | > ${sampleId}.fragments.raw.tsv.gz 36 | 37 | tabix -p bed ${sampleId}.fragments.raw.tsv.gz 38 | """ 39 | } 40 | 41 | 42 | 43 | nextflow.enable.dsl=2 44 | 45 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 46 | -------------------------------------------------------------------------------- /src/barcard/processes/report.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | import java.nio.file.Paths 4 | import static groovy.json.JsonOutput.* 5 | 6 | toolParams = params.tools.barcard 7 | 8 | process GENERATE_REPORT { 9 | container "vibsinglecellnf/bap:2021-04-27-3b48f4b" 10 | publishDir "${params.global.outdir}/notebooks/", mode: params.utils.publish.mode 11 | label 'compute_resources__report' 12 | 13 | input: 14 | path(ipynb) 15 | tuple val(sampleId), 16 | path("${sampleId}.barcard.overlap.tsv") 17 | //val(reportTitle) 18 | 19 | output: 20 | tuple val(sampleId), 21 | path("${sampleId}.barcard_otsu.ipynb"), 22 | path("${sampleId}.barcard_kneeplot.png"), 23 | path("${sampleId}.barcard.overlap.otsu_filtered.tsv") 24 | 25 | script: 26 | //def sampleParams = params.parseConfig(sampleId) 27 | //processParams = sampleParams 28 | //barcardParams = toJson(processParams) 29 | 30 | //def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_multiplet) 31 | //processParams = sampleParams.local 32 | //barcard_params = toJson(processParams) 33 | """ 34 | mkdir .cache/ 35 | mkdir .cache/black/ 36 | mkdir .cache/black/21.4b1/ 37 | 38 | papermill ${ipynb} \ 39 | ${sampleId}.barcard_otsu.ipynb \ 40 | --report-mode \ 41 | -p SAMPLE ${sampleId} \ 42 | -p BARCARD_OVERLAP_TSV '${sampleId}.barcard.overlap.tsv' 43 | """ 44 | } 45 | 46 | 47 | process REPORT_TO_HTML { 48 | container "vibsinglecellnf/bap:2021-04-27-3b48f4b" 49 | publishDir "${params.global.outdir}/notebooks/", mode: params.utils.publish.mode 50 | label 'compute_resources__report' 51 | 52 | input: 53 | tuple val(sampleId), 54 | path(ipynb) 55 | 56 | output: 57 | file("*.html") 58 | 59 | script: 60 | """ 61 | jupyter nbconvert ${ipynb} --to html 62 | """ 63 | } 64 | 65 | -------------------------------------------------------------------------------- /src/bwamaptools/.gitattributes: -------------------------------------------------------------------------------- 1 | notebooks/* linguist-vendored 2 | -------------------------------------------------------------------------------- /src/bwamaptools/.gitignore: -------------------------------------------------------------------------------- 1 | *checkpoint.ipynb 2 | *checkpoint* 3 | *checkpoint.py 4 | *.test.ipynb 5 | *.csv 6 | *.loom 7 | *.pickle 8 | *.pyc 9 | *.html 10 | *egg* 11 | .vscode 12 | .nextflow 13 | .nextflow* 14 | data 15 | refdata 16 | work 17 | out/notebooks 18 | src/scenic/out 19 | src/scenic/notebooks 20 | src/scenic/data 21 | refdata 22 | data/10x/tiny 23 | work/ 24 | out/ 25 | tests/ 26 | debug/ 27 | *.swp 28 | *.swo 29 | -------------------------------------------------------------------------------- /src/bwamaptools/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM vibsinglecellnf/samtools:0.3-1.15.1 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | 5 | # install bwa 6 | RUN git clone https://github.com/lh3/bwa.git && \ 7 | cd bwa && \ 8 | make && \ 9 | mv /bwa/bwa /usr/local/bin/ 10 | 11 | # install bwa-mem2 12 | ENV BWAMEM2_VER 2.2.1 13 | RUN cd /tmp && \ 14 | curl -L https://github.com/bwa-mem2/bwa-mem2/releases/download/v${BWAMEM2_VER}/bwa-mem2-${BWAMEM2_VER}_x64-linux.tar.bz2 \ 15 | | tar jxf - --no-same-owner && \ 16 | mv bwa-mem2-${BWAMEM2_VER}_x64-linux/bwa-mem2* /usr/local/bin 17 | 18 | RUN rm -rf /var/cache/apt/* && \ 19 | rm -rf /var/lib/apt/lists/* && \ 20 | ldconfig 21 | 22 | -------------------------------------------------------------------------------- /src/bwamaptools/README.rst: -------------------------------------------------------------------------------- 1 | 2 | BWA maptools module 3 | =================== 4 | 5 | This repository contains an implementation of BWA for VIB-SingleCell-NF (VSN) pipelines, along with several supporing tools (htslib, samtools). 6 | See `lh3/bwa `_ for the original source. 7 | 8 | To build the Docker image 9 | ------------------------- 10 | 11 | Image tag format: ``-``. 12 | 13 | .. code:: bash 14 | 15 | docker build -t vibsinglecellnf/bwamaptools:bwa-mem2-2.2.1-zlibng-2.0.6 . 16 | podman build -t vibsinglecellnf/bwamaptools:bwa-mem2-2.2.1-zlibng-2.0.6 . 17 | 18 | This image uses the ``vibsinglecellnf/samtools`` image as a base. 19 | 20 | -------------------------------------------------------------------------------- /src/bwamaptools/bin/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/bwamaptools/bin/.gitkeep -------------------------------------------------------------------------------- /src/bwamaptools/bin/mapping_summary.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | sampleId="${1}"; 4 | bam="${2}"; 5 | 6 | if [ ${#@} -ne 2 ] ; then 7 | printf 'Usage: mapping_summary.sh sampleId bam_file\n' >&2; 8 | exit 1; 9 | fi 10 | 11 | 12 | # Get mapping statistics from BAM file: 13 | # - Read BAM file and write uncompressed BAM. 14 | # - Uncompressed BAM file is written to each samtools command with tee (writes to each specified file and stdout). 15 | # - samtools commands: 16 | # - Get samtools statistics with: 17 | # samtools stat "${bam}" > "${sampleId}.stat" 18 | # - Uniquely mapped reads (BWA): 19 | # samtools view -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' "${bam}" 20 | # - Fraction of total read pairs mapped confidently to genome (>30 mapq): 21 | # samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 "${bam}" 22 | # - Only use threads for "samtools stat". Using it with any of the other samtools commands 23 | # makes everything slower than not using any threads at all. 24 | samtools view -u "${bam}" \ 25 | | tee \ 26 | >(samtools view -c -F 0x4 -F 0x100 -F 0x800 -e '! [XA] && ! [SA]' - > "${sampleId}.uniquely_mapped_reads.txt") \ 27 | >(samtools view -c -F 0x4 -F 0x100 -F 0x800 -q 30 - > "${sampleId}.fraction_total_read_pairs.txt") \ 28 | | samtools stat -@ 2 - > "${sampleId}.stat" 29 | 30 | 31 | # Output file: 32 | printf "\t${sampleId}\n" > "${sampleId}.mapping_stats.tsv"; 33 | 34 | grep '^SN' "${sampleId}.stat" | cut -f 2,3 >> "${sampleId}.mapping_stats.tsv"; 35 | 36 | printf "Uniquely mapped reads:\t" >> "${sampleId}.mapping_stats.tsv"; 37 | cat "${sampleId}.uniquely_mapped_reads.txt" >> "${sampleId}.mapping_stats.tsv"; 38 | 39 | printf "Reads mapped with MAPQ>30:\t" >> "${sampleId}.mapping_stats.tsv"; 40 | cat "${sampleId}.fraction_total_read_pairs.txt" >> "${sampleId}.mapping_stats.tsv"; 41 | 42 | rm "${sampleId}.uniquely_mapped_reads.txt" "${sampleId}.fraction_total_read_pairs.txt"; 43 | -------------------------------------------------------------------------------- /src/bwamaptools/bwamaptools.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | bwamaptools { 4 | container = 'vibsinglecellnf/bwamaptools:bwa-mem2-2.2.1-zlibng-2.0.6' 5 | } 6 | } 7 | } 8 | 9 | -------------------------------------------------------------------------------- /src/bwamaptools/conf/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/bwamaptools/conf/.gitkeep -------------------------------------------------------------------------------- /src/bwamaptools/conf/bwa_mapping.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | bwamaptools { 4 | bwa_fasta = 'PUMATAC_dependencies/genomes/hg38_bwamem2/genome.fa' 5 | bwa_version = 'bwa-mem2' 6 | } 7 | } 8 | } 9 | 10 | // define computing resources via process labels 11 | process { 12 | withLabel: 'compute_resources__bwa_mem' { 13 | executor = 'local' 14 | cpus = 6 15 | memory = '60 GB' 16 | time = '24h' 17 | maxForks = 8 18 | } 19 | } 20 | 21 | -------------------------------------------------------------------------------- /src/bwamaptools/main.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | import java.nio.file.Paths 4 | 5 | ////////////////////////////////////////////////////// 6 | // Import sub-workflows from the modules: 7 | 8 | include { 9 | BWAMAPTOOLS__BWA_MEM_PE as BWA_MEM_PE; 10 | } from './processes/mapping.nf' params(params) 11 | include { 12 | BWAMAPTOOLS__MAPPING_SUMMARY as MAPPING_SUMMARY; 13 | } from './processes/mapping_summary.nf' params(params) 14 | include { 15 | SIMPLE_PUBLISH as PUBLISH_BAM; 16 | SIMPLE_PUBLISH as PUBLISH_BAM_INDEX; 17 | SIMPLE_PUBLISH as PUBLISH_MAPPING_SUMMARY; 18 | SIMPLE_PUBLISH as PUBLISH_MARKDUPS_METRICS; 19 | SIMPLE_PUBLISH as PUBLISH_LIBRARY_METRICS; 20 | } from "../utils/processes/utils.nf" params(params) 21 | 22 | ////////////////////////////////////////////////////// 23 | // Define the workflow 24 | 25 | workflow get_bwa_index { 26 | 27 | take: 28 | fasta_path 29 | 30 | main: 31 | 32 | bwa_fasta = Channel.fromPath(fasta_path) 33 | 34 | bwa_index_path = Paths.get( 35 | Paths.get(fasta_path).getParent().toString(), 36 | "*.{amb,ann,bwt,fai,flat,gdx,pac,sa,0123,bwt.2bit.64}" 37 | ) 38 | bwa_index = Channel.fromPath(bwa_index_path, 39 | glob: true, 40 | type: 'file', 41 | ) 42 | .ifEmpty { exit 1, "ERROR: Could not find bwa indices from: ${bwa_index_path}." } 43 | .collect() 44 | .toList() 45 | 46 | data_channel = bwa_fasta.combine(bwa_index) 47 | 48 | emit: 49 | data_channel 50 | 51 | } 52 | 53 | 54 | workflow BWA_MAPPING_PE { 55 | 56 | take: 57 | data // a channel of [val(unique_sampleId), val(sampleId), path(fastq_PE1), path(fastq_PE2)] 58 | // unique_sampleId is used to label the read group field "SM" and (part of) "LB", 59 | // while sampleId represents each split fastq file for a unique sample. 60 | 61 | main: 62 | /* 63 | 1) create a channel linking bwa index files from genome.fa in params, and 64 | 2) combine this channel with the items in the data channel 65 | */ 66 | bwa_inputs = get_bwa_index(params.tools.bwamaptools.bwa_fasta).combine(data) 67 | 68 | aligned_bam = BWA_MEM_PE(bwa_inputs) 69 | 70 | 71 | // publish output: 72 | 73 | MAPPING_SUMMARY(aligned_bam) 74 | PUBLISH_MAPPING_SUMMARY(MAPPING_SUMMARY.out, '.mapping_stats.tsv', 'reports/mapping_stats') 75 | 76 | emit: 77 | aligned_bam 78 | } 79 | -------------------------------------------------------------------------------- /src/bwamaptools/processes/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/bwamaptools/processes/.gitkeep -------------------------------------------------------------------------------- /src/bwamaptools/processes/index.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | toolParams = params.tools.bwamaptools 6 | 7 | process BWAMAPTOOLS__INDEX_BAM { 8 | 9 | container toolParams.container 10 | label 'compute_resources__default' 11 | 12 | input: 13 | tuple val(sampleId), 14 | path(bam) 15 | 16 | output: 17 | tuple val(sampleId), 18 | path(bam), 19 | path("*.bai") 20 | 21 | script: 22 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams) 23 | processParams = sampleParams.local 24 | """ 25 | samtools index ${bam} 26 | """ 27 | } 28 | 29 | process BWAMAPTOOLS__INDEX_BED { 30 | 31 | container toolParams.container 32 | label 'compute_resources__default' 33 | 34 | input: 35 | tuple val(sampleId), 36 | path(bed) 37 | 38 | output: 39 | tuple val(sampleId), 40 | path("*.tbi") 41 | 42 | script: 43 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams) 44 | processParams = sampleParams.local 45 | """ 46 | tabix -p bed ${bed} 47 | """ 48 | } 49 | 50 | -------------------------------------------------------------------------------- /src/bwamaptools/processes/mapping.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | toolParams = params.tools.bwamaptools 6 | 7 | process BWAMAPTOOLS__BWA_MEM_PE { 8 | 9 | container toolParams.container 10 | label 'compute_resources__bwa_mem' 11 | 12 | input: 13 | tuple path(bwa_fasta), 14 | path(bwa_index), 15 | val(unique_sampleId), 16 | val(sampleId), 17 | path(fastq_PE1), 18 | path(fastq_PE2) 19 | 20 | output: 21 | tuple val(sampleId), 22 | path("${sampleId}.bwa.out.fixmate.possorted.bam"), 23 | path("${sampleId}.bwa.out.fixmate.possorted.bam.bai") 24 | 25 | script: 26 | def sampleParams = params.parseConfig(unique_sampleId, params.global, toolParams) 27 | processParams = sampleParams.local 28 | """ 29 | id=\$(zcat ${fastq_PE1} | head -n 1 | cut -f 1-4 -d':' | sed 's/@//') 30 | ${toolParams.bwa_version} mem \ 31 | -t ${task.cpus} \ 32 | -C \ 33 | -R "@RG\\tID:\${id}\\tSM:${unique_sampleId}\\tLB:\${id}"__"${unique_sampleId}\\tPL:ILLUMINA" \ 34 | ${bwa_fasta} \ 35 | ${fastq_PE1} \ 36 | ${fastq_PE2} \ 37 | | samtools fixmate -u -m -O bam - - \ 38 | | samtools sort -@ 2 -m 2G -O bam --write-index -T '${sampleId}.bwa.out.fixmate.possorted.TMP' -o '${sampleId}.bwa.out.fixmate.possorted.bam##idx##${sampleId}.bwa.out.fixmate.possorted.bam.bai' - 39 | """ 40 | } 41 | -------------------------------------------------------------------------------- /src/bwamaptools/processes/mapping_summary.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/bwamaptools/bin/" : "" 4 | 5 | toolParams = params.tools.bwamaptools 6 | 7 | process BWAMAPTOOLS__MAPPING_SUMMARY { 8 | 9 | container toolParams.container 10 | label 'compute_resources__default','compute_resources__24hqueue' 11 | 12 | input: 13 | tuple val(sampleId), 14 | path(bam), 15 | path(bai) 16 | 17 | output: 18 | tuple val(sampleId), 19 | path("${sampleId}.mapping_stats.tsv") 20 | 21 | script: 22 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams) 23 | processParams = sampleParams.local 24 | """ 25 | ${binDir}mapping_summary.sh \ 26 | ${sampleId} \ 27 | ${bam} \ 28 | """ 29 | } 30 | 31 | -------------------------------------------------------------------------------- /src/bwamaptools/workflows/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/bwamaptools/workflows/.gitkeep -------------------------------------------------------------------------------- /src/channels/conf/bam.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | bam { 4 | file_paths = '' 5 | suffix = '.bam' 6 | index_extension = '.bai' 7 | } 8 | } 9 | tools { 10 | file_converter { 11 | iff = 'bam' 12 | } 13 | } 14 | } 15 | 16 | -------------------------------------------------------------------------------- /src/channels/conf/csv.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | csv { 4 | file_paths = '' 5 | suffix = '.csv' 6 | } 7 | } 8 | tools { 9 | file_converter { 10 | iff = 'csv' 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/channels/conf/fragments.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | fragments { 4 | file_paths = '' 5 | suffix = '.tsv.gz' 6 | index_extension = '.tbi' 7 | } 8 | } 9 | tools { 10 | file_converter { 11 | iff = 'fragments' 12 | } 13 | } 14 | } 15 | 16 | -------------------------------------------------------------------------------- /src/channels/conf/h5ad.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | h5ad { 4 | file_paths = '' 5 | suffix = '.h5ad' 6 | } 7 | } 8 | tools { 9 | file_converter { 10 | iff = 'h5ad' 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/channels/conf/loom.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | loom { 4 | file_paths = '' 5 | suffix = '.loom' 6 | } 7 | } 8 | tools { 9 | file_converter { 10 | iff = 'loom' 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/channels/conf/seurat_rds.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | seurat_rds { 4 | file_paths = '' 5 | suffix = '.Rds' 6 | } 7 | } 8 | tools { 9 | file_converter { 10 | iff = 'seurat_rds' 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/channels/conf/sra.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | // Based on SRA Project Identifiers 4 | sra = [ 5 | [ 6 | id: '', 7 | samples: [""] // Use Unix globbing 8 | ] 9 | ] 10 | } 11 | } -------------------------------------------------------------------------------- /src/channels/conf/tenx_arc_cellranger_mex.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | tenx_arc { 4 | cellranger_mex = 'data/10x/1k_pbmc/1k_pbmc_*/outs/' 5 | } 6 | } 7 | tools { 8 | file_converter { 9 | off = 'cistopic_rds' 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/channels/conf/tenx_atac_cellranger_mex.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | tenx_atac { 4 | cellranger_mex = 'data/10x/1k_pbmc/1k_pbmc_*/outs/' 5 | } 6 | } 7 | 8 | tools { 9 | file_converter { 10 | off = 'cistopic_rds' 11 | } 12 | } 13 | } 14 | 15 | -------------------------------------------------------------------------------- /src/channels/conf/tenx_cellranger_h5.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | tenx { 4 | cellranger_h5 = '' 5 | } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/channels/conf/tenx_cellranger_mex.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | tenx { 4 | cellranger_mex = 'data/10x/1k_pbmc/1k_pbmc_*/outs/' 5 | } 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/channels/conf/tsv.config: -------------------------------------------------------------------------------- 1 | params { 2 | data { 3 | tsv { 4 | file_paths = '' 5 | suffix = '.tsv' 6 | } 7 | } 8 | tools { 9 | file_converter { 10 | iff = 'tsv' 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/channels/file.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | include { 4 | extractSample 5 | } from '../utils/processes/files.nf' 6 | 7 | workflow getChannel { 8 | 9 | take: 10 | glob 11 | sampleSuffixWithExtension // Suffix after the sample name in the file paths 12 | groups 13 | 14 | main: 15 | // Check whether multiple globs are provided 16 | if(glob.contains(',')) { 17 | glob = Arrays.asList(glob.split(',')); 18 | } 19 | data_channel = Channel 20 | .fromPath(glob, checkIfExists: true) 21 | .map { 22 | path -> tuple( 23 | *extractSample( 24 | "${path}", 25 | sampleSuffixWithExtension, 26 | groups 27 | ), 28 | file("${path}") 29 | ) 30 | }.map { 31 | // reorder: sample ID, file path, tag 32 | it -> tuple(it[0], it[2], it[1]) 33 | } 34 | 35 | emit: 36 | data_channel 37 | 38 | } 39 | 40 | workflow getChannelWithIndex { 41 | 42 | take: 43 | glob 44 | sampleSuffixWithExtension // Suffix after the sample name in the file paths 45 | indexFileExtension // file extension of the paired index file (e.g. '.bai', '.tbi') 46 | groups 47 | 48 | main: 49 | // Check whether multiple globs are provided 50 | if(glob.contains(',')) { 51 | glob = Arrays.asList(glob.split(',')); 52 | } 53 | data_channel = Channel 54 | .fromPath(glob, checkIfExists: true) 55 | .map { 56 | path -> tuple(*extractSample("${path}", sampleSuffixWithExtension, groups), file("${path}"), file("${path}${indexFileExtension}")) 57 | } 58 | .map { 59 | // reorder: sample ID, [file path, file index path], tag 60 | it -> tuple(it[0], [it[2],it[3]], it[1]) 61 | } 62 | 63 | emit: 64 | data_channel 65 | 66 | } 67 | 68 | workflow getChannelFromFilePath { 69 | 70 | take: 71 | filePath 72 | sampleSuffixWithExtension // Suffix after the sample name in the file paths 73 | groups 74 | 75 | main: 76 | data_channel = Channel.of( 77 | tuple(filePath) 78 | ) 79 | .map { 80 | it -> tuple(*extractSample("${it[0]}", sampleSuffixWithExtension, groups), file("${it[0]}")) 81 | } 82 | .map { 83 | // reorder: sample ID, file path, tag 84 | it -> tuple(it[0], it[2], it[1]) 85 | } 86 | 87 | emit: 88 | data_channel 89 | 90 | } 91 | 92 | -------------------------------------------------------------------------------- /src/channels/singleend.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | def extractSample(path) { 4 | pattern = /(.+)\/(.+)_R[1-2](.*)\.fastq(\.gz)?/ 5 | (full, parentDir, id, whateverSuffix, compressionExtension) = (path =~ pattern)[0] 6 | return id 7 | } 8 | 9 | workflow getChannel { 10 | 11 | take: 12 | glob 13 | 14 | main: 15 | // Check whether multiple globs are provided 16 | if(glob.contains(',')) { 17 | glob = Arrays.asList(glob.split(',')); 18 | } 19 | channel = Channel 20 | .fromPath(glob, checkIfExists: true) 21 | .map { 22 | path -> tuple(extractSample( "${path}" ), file("${path}")) 23 | } 24 | 25 | emit: 26 | channel 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/channels/sra.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | workflow getChannel { 4 | 5 | take: 6 | // Expects sra Map [[id: "id1", samples: ["glob1", ...]], ...] 7 | sra 8 | 9 | main: 10 | data_channel = Channel.fromList( 11 | sra 12 | ).map { 13 | it -> tuple(it.id, it.samples) 14 | } 15 | 16 | emit: 17 | data_channel 18 | 19 | } -------------------------------------------------------------------------------- /src/channels/tenx.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | CELLRANGER_OUTS_REGEX = /(.+)\/(.+)\/outs/ 4 | CELLRANGER_H5_REGEX = /(.+)\/(.+)\/outs\/(.+)\.h5/ 5 | CELLRANGER_MEX_REGEX = /(.+)\/(.+)\/outs\/(.+)/ 6 | 7 | def extractSampleFromOuts(path) { 8 | // Allow to detect data generated by CellRanger prior and post to version 3. 9 | if (!(path ==~ CELLRANGER_OUTS_REGEX)) 10 | throw new Exception("Incorrect Cell Ranger MEX path. The parameter params.data.tenx.cellranger_outs in the config file should point to the outs folder.") 11 | (full, parentDir, id) = (path =~ CELLRANGER_OUTS_REGEX)[0] 12 | return id 13 | } 14 | 15 | workflow getOutsChannel { 16 | 17 | take: 18 | glob 19 | 20 | main: 21 | // Check whether multiple globs are provided 22 | if(glob.contains(',')) { 23 | glob = Arrays.asList(glob.split(',')); 24 | } 25 | data_channel = Channel 26 | .fromPath(glob, type: 'dir', checkIfExists: true) 27 | .map { 28 | filePath -> tuple(extractSampleFromOuts( "${filePath}" ), file("${filePath}")) 29 | } 30 | 31 | emit: 32 | data_channel 33 | 34 | } 35 | 36 | def extractSampleFromH5(path) { 37 | if (!(path ==~ CELLRANGER_H5_REGEX)) 38 | throw new Exception("Incorrect Cell Ranger .h5 path. The parameter params.data.tenx.cellranger_h5 in the config file should point to the .h5 file.") 39 | // Allow to detect data generated by CellRanger prior and post to version 3. 40 | (full, parentDir, id, filename) = (path =~ CELLRANGER_H5_REGEX)[0] 41 | return id 42 | } 43 | 44 | workflow getH5Channel { 45 | 46 | take: 47 | glob 48 | 49 | main: 50 | // Check whether multiple globs are provided 51 | if(glob.contains(',')) { 52 | glob = Arrays.asList(glob.split(',')); 53 | } 54 | data_channel = Channel 55 | .fromPath(glob, type: 'file', checkIfExists: true) 56 | .map { 57 | filePath -> tuple(extractSampleFromH5( "${filePath}" ), file("${filePath}")) 58 | } 59 | 60 | emit: 61 | data_channel 62 | 63 | } 64 | 65 | 66 | 67 | def extractSampleFromMEX(path) { 68 | // Allow to detect data generated by CellRanger prior and post to version 3. 69 | if (!(path ==~ CELLRANGER_MEX_REGEX)) 70 | throw new Exception("Incorrect Cell Ranger MEX path. The parameter params.data.tenx.cellranger_mex in the config file should point to a MEX folder.") 71 | (full, parentDir, id, filename, mexFolder) = (path =~ CELLRANGER_MEX_REGEX)[0] 72 | return id 73 | } 74 | 75 | workflow getMEXChannel { 76 | 77 | take: 78 | glob 79 | 80 | main: 81 | // Check whether multiple globs are provided 82 | if(glob.contains(',')) { 83 | glob = Arrays.asList(glob.split(',')); 84 | } 85 | data_channel = Channel 86 | .fromPath(glob, type: 'dir', checkIfExists: true) 87 | .map { 88 | filePath -> tuple(extractSampleFromMEX( "${filePath}" ), file("${filePath}")) 89 | } 90 | 91 | emit: 92 | data_channel 93 | 94 | } 95 | -------------------------------------------------------------------------------- /src/edirect/.gitignore: -------------------------------------------------------------------------------- 1 | *checkpoint.ipynb 2 | *checkpoint* 3 | *checkpoint.py 4 | *.test.ipynb 5 | *.csv 6 | *.loom 7 | *.pickle 8 | *.pyc 9 | *.html 10 | *egg* 11 | .vscode 12 | .nextflow 13 | .nextflow* 14 | data 15 | refdata 16 | work 17 | out/notebooks 18 | src/scenic/out 19 | src/scenic/notebooks 20 | src/scenic/data 21 | refdata 22 | data/10x/tiny 23 | work/ 24 | out/ 25 | tests/ 26 | debug/ 27 | *.swp 28 | *.swo 29 | -------------------------------------------------------------------------------- /src/edirect/edirect.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | edirect { 4 | container = 'ncbi/edirect:latest' 5 | } 6 | } 7 | } -------------------------------------------------------------------------------- /src/edirect/processes/sra_metadata.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | process EDIRECT__SRAID_TO_SAMPLENAME { 4 | 5 | container params.edirect.container 6 | label 'compute_resources__default' 7 | maxForks 1 8 | 9 | input: 10 | val(sraId) 11 | output: 12 | tuple val(sraId), stdout 13 | shell: 14 | """ 15 | esearch -db sra -query ${sraId} \ 16 | | efetch --format native \ 17 | | sed -r 's/(.*)(.*)<\\/TITLE>(.*)/\\2/' \ 18 | | grep "^[^<;]" \ 19 | | tr -d '\\n' 20 | """ 21 | } 22 | -------------------------------------------------------------------------------- /src/edirect/workflows/sra_fastq_urls.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | include { 4 | EDIRECT__SRAID_TO_SAMPLENAME 5 | } from '../processes/sra_fastq_urls.nf' 6 | 7 | workflow SRA_FASTQ_URLS { 8 | 9 | take: 10 | sraProjectId 11 | sampleNamesToRetrieve 12 | 13 | main: 14 | Channel 15 | .fromSRA(sraProjectId) 16 | .map { it[0] } 17 | .set { sraIDs } 18 | sraIDsToSample = EDIRECT__SRAID_TO_SAMPLENAME( sraIDs ) 19 | sraFastqUrls = sraIDsToSample 20 | .join(sraIDs) 21 | .map { it -> tuple(it[0],it[1],"ftp://ftp.sra.ebi.ac.uk/" + it[2])} 22 | 23 | if(!params.containsKey('quiet')) sraFastqUrls.view() 24 | 25 | emit: 26 | sraFastqUrls 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/popscle/.gitattributes: -------------------------------------------------------------------------------- 1 | notebooks/* linguist-vendored 2 | -------------------------------------------------------------------------------- /src/popscle/.gitignore: -------------------------------------------------------------------------------- 1 | *checkpoint.ipynb 2 | *checkpoint* 3 | *checkpoint.py 4 | *.test.ipynb 5 | *.csv 6 | *.loom 7 | *.pickle 8 | *.pyc 9 | *.html 10 | *egg* 11 | .vscode 12 | .nextflow 13 | .nextflow* 14 | data 15 | refdata 16 | work 17 | out/notebooks 18 | src/scenic/out 19 | src/scenic/notebooks 20 | src/scenic/data 21 | refdata 22 | data/10x/tiny 23 | work/ 24 | out/ 25 | tests/ 26 | debug/ 27 | *.swp 28 | *.swo 29 | -------------------------------------------------------------------------------- /src/popscle/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM vibsinglecellnf/samtools:0.2-1.12 2 | 3 | RUN echo 'deb http://deb.debian.org/debian testing main' > /etc/apt/sources.list.d/testing.list && \ 4 | apt-get update && \ 5 | apt-get install -y --no-install-recommends \ 6 | build-essential \ 7 | cmake 8 | 9 | # Install popscle 10 | RUN git clone --depth 1 https://github.com/statgen/popscle.git /tmp/popscle && \ 11 | mkdir -p /tmp/popscle/build && \ 12 | cd /tmp/popscle/build && \ 13 | cmake .. && \ 14 | make && \ 15 | cp /tmp/popscle/bin/popscle /usr/local/bin 16 | 17 | # install bedtools 18 | ENV BEDTOOLS_VERSION 2.30.0 19 | RUN curl -L -o /usr/local/bin/bedtools \ 20 | https://github.com/arq5x/bedtools2/releases/download/v${BEDTOOLS_VERSION}/bedtools.static.binary && \ 21 | chmod a+x /usr/local/bin/bedtools 22 | 23 | # install popscle_helper_tools into this image 24 | # (https://github.com/aertslab/popscle_helper_tools) 25 | RUN git clone --depth 1 https://github.com/aertslab/popscle_helper_tools.git /tmp/popscle_helper_tools && \ 26 | mv /tmp/popscle_helper_tools/*sh /usr/local/bin 27 | 28 | -------------------------------------------------------------------------------- /src/popscle/README.rst: -------------------------------------------------------------------------------- 1 | 2 | VSN-Pipelines popscle 3 | ====================== 4 | 5 | This is a repository for the popslce module of the VIB-SingleCell-NF (VSN) pipelines. 6 | 7 | Current Status 8 | --------------- 9 | 10 | This module currently has two workflows ``freemuxlet`` and ``demuxlet``. 11 | Both of these workflows expect an input channel consisting of a tuple where 12 | element 1 is the sampleID and element 2 is the output folder of a 10X run. 13 | 14 | Currently the workflows are fixed to the filtered matrices. 15 | 16 | To build the Docker image 17 | ------------------------- 18 | 19 | Image tag format: ``<date of latest git commit>-<short hash of latest git commit>``. 20 | 21 | .. code:: bash 22 | 23 | docker build -t vibsinglecellnf/popscle:2021-05-05-da70fc7 . 24 | 25 | This image uses the ``vibsinglecellnf/samtools`` image as a base. 26 | 27 | Acknowledgements 28 | ---------------- 29 | 30 | This module implements functionality developed by Gert Hulselmens designed to 31 | speed up the running time of dsc-pileup. The `filter_bam_file_for_popscle_dsc_pileup`_ 32 | script can lead to speedups of 5-10x depending on the input data. 33 | 34 | .. _`filter_bam_file_for_popscle_dsc_pileup`: https://github.com/aertslab/popscle_helper_tools 35 | 36 | -------------------------------------------------------------------------------- /src/popscle/conf/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/popscle/conf/.gitkeep -------------------------------------------------------------------------------- /src/popscle/main.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | ////////////////////////////////////////////////////// 4 | // Import sub-workflows from the modules: 5 | 6 | include { 7 | SC__FILE_CONVERTER 8 | } from '../utils/processes/utils.nf' params(params) 9 | 10 | include { 11 | SC__POPSCLE__DSC_PILEUP 12 | } from './processes/dsc_pileup.nf' params(params) 13 | include { 14 | SC__POPSCLE__PREFILTER_DSC_PILEUP 15 | } from './processes/dsc_pileup.nf' params(params) 16 | 17 | 18 | ////////////////////////////////////////////////////// 19 | // Define the workflow 20 | 21 | workflow popscle { 22 | 23 | take: 24 | data 25 | 26 | main: 27 | data = SC__FILE_CONVERTER(data) 28 | SC__POPSCLE__PREFILTER_DSC_PILEUP(data) | 29 | SC__POPSCLE__DSC_PILEUP 30 | 31 | } 32 | 33 | -------------------------------------------------------------------------------- /src/popscle/popscle.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | popscle { 4 | container = 'vibsinglecellnf/popscle:2021-05-05-da70fc7' 5 | vcf = '/path/to/vcf_file' 6 | barcode_tag = 'CB' 7 | freemuxlet { 8 | nSamples = 2 9 | } 10 | demuxlet { 11 | field = 'GT' 12 | } 13 | } 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /src/popscle/processes/demuxlet.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/popscle/bin/" : "" 4 | 5 | process SC__POPSCLE__DEMUXLET { 6 | 7 | container params.tools.popscle.container 8 | publishDir "${params.global.outdir}/data/demuxlet", mode: params.utils.publish.mode 9 | label 'compute_resources__cpu' 10 | 11 | input: 12 | tuple val(sampleId), path(f) 13 | file vcf 14 | 15 | output: 16 | tuple val(sampleId), path("${sampleId}_demuxlet*") 17 | 18 | script: 19 | def sampleParams = params.parseConfig(sampleId, params.global, params.tools.popscle.demuxlet) 20 | processParams = sampleParams.local 21 | 22 | """ 23 | popscle demuxlet \ 24 | --vcf ${vcf} \ 25 | ${(processParams.containsKey('field')) ? '--field ' + processParams.field : ''} \ 26 | --plp ${sampleId}_dsc-pileup \ 27 | --out ${sampleId}_demuxlet 28 | """ 29 | } 30 | 31 | process SC__POPSCLE__FREEMUXLET { 32 | 33 | container params.tools.popscle.container 34 | publishDir "${params.global.outdir}/data/freemuxlet", mode: params.utils.publish.mode 35 | label 'compute_resources__cpu' 36 | 37 | input: 38 | tuple val(sampleId), path(f) 39 | 40 | output: 41 | tuple val(sampleId), path("${sampleId}_freemuxlet*") 42 | 43 | script: 44 | def sampleParams = params.parseConfig(sampleId, params.global, params.tools.popscle.freemuxlet) 45 | processParams = sampleParams.local 46 | 47 | """ 48 | popscle freemuxlet \ 49 | --nsample ${processParams.nSamples} \ 50 | --plp ${sampleId}_dsc-pileup \ 51 | --out ${sampleId}_freemuxlet 52 | """ 53 | } 54 | -------------------------------------------------------------------------------- /src/popscle/processes/dsc_pileup.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/popscle/bin/" : "" 4 | 5 | toolParams = params.tools.popscle 6 | 7 | process SC__POPSCLE__DSC_PILEUP { 8 | 9 | container params.tools.popscle.container 10 | publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' 11 | label 'compute_resources__cpu','compute_resources__24hqueue' 12 | 13 | input: 14 | tuple val(sampleId), path(f) 15 | file vcf 16 | 17 | output: 18 | tuple val(sampleId), path("${sampleId}_dsc-pileup*.gz") 19 | 20 | script: 21 | """ 22 | popscle dsc-pileup \ 23 | --sam ${f} \ 24 | ${toolParams?.barcode_tag ? '--tag-group ' + toolParams.barcode_tag : ''} \ 25 | --vcf ${vcf} \ 26 | --out ${sampleId}_dsc-pileup 27 | """ 28 | } 29 | 30 | process SC__POPSCLE__PREFILTER_DSC_PILEUP { 31 | 32 | container params.tools.popscle.container 33 | publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink' 34 | label 'compute_resources__cpu' 35 | 36 | input: 37 | tuple val(sampleId), 38 | path(bam), 39 | path(barcodes) 40 | file vcf 41 | 42 | output: 43 | tuple val(sampleId), path("${sampleId}_filtered_possorted_genome_bam.bam") 44 | 45 | script: 46 | """ 47 | filter_bam_file_for_popscle_dsc_pileup.sh \ 48 | ${bam} \ 49 | ${barcodes} \ 50 | ${vcf} \ 51 | ${sampleId}_filtered_possorted_genome_bam.bam \ 52 | ${toolParams?.barcode_tag ? toolParams.barcode_tag : ''} 53 | """ 54 | } 55 | 56 | -------------------------------------------------------------------------------- /src/popscle/workflows/dsc_pileup.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | ////////////////////////////////////////////////////// 4 | // Import sub-workflows from the modules: 5 | 6 | include { 7 | SC__FILE_CONVERTER; 8 | } from '../../utils/processes/utils.nf' params(params) 9 | 10 | include { 11 | SC__POPSCLE__DSC_PILEUP; 12 | SC__POPSCLE__PREFILTER_DSC_PILEUP; 13 | } from '../processes/dsc_pileup.nf' params(params) 14 | 15 | 16 | ////////////////////////////////////////////////////// 17 | // Define the workflow 18 | 19 | workflow DSC_PILEUP_FILTERED { 20 | 21 | take: 22 | data 23 | 24 | main: 25 | vcf = file(params.tools.popscle.vcf) 26 | SC__POPSCLE__PREFILTER_DSC_PILEUP(data, vcf) 27 | SC__POPSCLE__DSC_PILEUP(SC__POPSCLE__PREFILTER_DSC_PILEUP.out, vcf) 28 | 29 | emit: 30 | SC__POPSCLE__DSC_PILEUP.out 31 | } 32 | 33 | -------------------------------------------------------------------------------- /src/pycistopic/.gitattributes: -------------------------------------------------------------------------------- 1 | notebooks/* linguist-vendored 2 | -------------------------------------------------------------------------------- /src/pycistopic/.gitignore: -------------------------------------------------------------------------------- 1 | *checkpoint.ipynb 2 | *checkpoint* 3 | *checkpoint.py 4 | *.test.ipynb 5 | *.csv 6 | *.loom 7 | *.pickle 8 | *.pyc 9 | *.html 10 | *egg* 11 | .vscode 12 | .nextflow 13 | .nextflow* 14 | data 15 | refdata 16 | work 17 | out/notebooks 18 | src/scenic/out 19 | src/scenic/notebooks 20 | src/scenic/data 21 | refdata 22 | data/10x/tiny 23 | work/ 24 | out/ 25 | tests/ 26 | debug/ 27 | *.swp 28 | *.swo 29 | -------------------------------------------------------------------------------- /src/pycistopic/README.rst: -------------------------------------------------------------------------------- 1 | 2 | pycisTopic module 3 | ================= 4 | 5 | This repository contains an implementation of pycisTopic for VIB-SingleCell-NF (VSN) pipelines. 6 | 7 | -------------------------------------------------------------------------------- /src/pycistopic/bin/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/pycistopic/bin/.gitkeep -------------------------------------------------------------------------------- /src/pycistopic/bin/biomart_annot.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import os 5 | import pickle 6 | import sys 7 | 8 | import pybiomart as pbm 9 | 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser(description="Biomart gene annotation download.") 13 | 14 | parser.add_argument( 15 | "--biomart_dataset_name", 16 | type=str, 17 | required=True, 18 | help='Biomart dataset name, e.g. "hsapiens_gene_ensembl", ' 19 | '"mmusculus_gene_ensembl", "dmelanogaster_gene_ensembl", ... .', 20 | ) 21 | parser.add_argument( 22 | "--biomart_host", 23 | type=str, 24 | required=True, 25 | help='Biomart host address, e.g. "http://www.ensembl.org", ' 26 | '"http://nov2020.archive.ensembl.org/", ... .', 27 | ) 28 | 29 | args = parser.parse_args() 30 | 31 | # Skip retrieving annotation from biomart, if it was already done. 32 | if os.path.exists("biomart_annot.pickle"): 33 | sys.exit(0) 34 | 35 | dataset = pbm.Dataset(name=args.biomart_dataset_name, host=args.biomart_host) 36 | annot = dataset.query( 37 | attributes=[ 38 | "chromosome_name", 39 | "transcription_start_site", 40 | "strand", 41 | "external_gene_name", 42 | "transcript_biotype", 43 | ] 44 | ) 45 | 46 | # Rename columns. 47 | annot.columns = ["Chromosome", "Start", "Strand", "Gene", "Transcript_type"] 48 | 49 | # Convert objects in chromosome column to strings. 50 | annot["Chromosome"] = annot["Chromosome"].astype(str) 51 | 52 | # Only keep protein coding genes. 53 | annot = annot[annot.Transcript_type == "protein_coding"] 54 | 55 | # Only keep genes on normal chromosomes: (1-99, X, Y, 2L, 2R, 2L, 3R). 56 | filter_chroms = annot["Chromosome"].str.contains("^[0-9]{1,2}$|^[XY]$|^[23][LR]$") 57 | annot = annot[(filter_chroms)] 58 | 59 | # Add "chr" to the beginning of the chromosome names to make them UCSC compatible. 60 | annot["Chromosome"] = annot["Chromosome"].str.replace(r"(\b\S)", r"chr\1") 61 | 62 | with open("biomart_annot.pickle", "wb") as fh: 63 | pickle.dump(annot, fh) 64 | 65 | 66 | if __name__ == "__main__": 67 | main() 68 | -------------------------------------------------------------------------------- /src/pycistopic/bin/plot_qc_stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import pickle 5 | 6 | from pycisTopic.qc import plot_sample_metrics 7 | 8 | ################################################################################ 9 | 10 | parser = argparse.ArgumentParser(description='Compute QC stats') 11 | 12 | parser.add_argument( 13 | "--sampleId", 14 | type=str, 15 | required=True, 16 | help='Sample ID.' 17 | ) 18 | parser.add_argument( 19 | "--profile_data_pkl", 20 | type=str, 21 | help='Profile data, pickle format.' 22 | ) 23 | parser.add_argument( 24 | "--output_pdf", 25 | type=str, 26 | help='Output plots, pdf format.' 27 | ) 28 | 29 | args = parser.parse_args() 30 | 31 | ################################################################################ 32 | 33 | # Load sample metrics 34 | infile = open(args.profile_data_pkl, 'rb') 35 | profile_data_dict = pickle.load(infile) 36 | infile.close() 37 | 38 | 39 | # plot: 40 | plot_sample_metrics(profile_data_dict, 41 | insert_size_distriubtion_xlim=[0,600], 42 | ncol=5, 43 | cmap='viridis', 44 | save=args.sampleId + "_qc_sample_metrics.pdf" 45 | ) 46 | 47 | -------------------------------------------------------------------------------- /src/pycistopic/conf/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/pycistopic/conf/.gitkeep -------------------------------------------------------------------------------- /src/pycistopic/conf/pycistopic_dmel.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | pycistopic { 4 | biomart_annot { 5 | biomart_dataset_name = 'dmelanogaster_gene_ensembl' 6 | biomart_host = 'http://www.ensembl.org' 7 | } 8 | macs2_call_peaks { 9 | gsize = 'dm' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9' 10 | } 11 | } 12 | } 13 | } 14 | 15 | -------------------------------------------------------------------------------- /src/pycistopic/conf/pycistopic_hg38.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | pycistopic { 4 | biomart_annot { 5 | biomart_dataset_name = 'hsapiens_gene_ensembl' 6 | biomart_host = 'http://www.ensembl.org' 7 | } 8 | macs2_call_peaks { 9 | gsize = 'hs' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9' 10 | } 11 | } 12 | } 13 | } 14 | 15 | -------------------------------------------------------------------------------- /src/pycistopic/conf/pycistopic_mm10.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | pycistopic { 4 | biomart_annot { 5 | biomart_dataset_name = 'mmusculus_gene_ensembl' 6 | biomart_host = 'http://nov2020.archive.ensembl.org/' 7 | } 8 | macs2_call_peaks { 9 | gsize = 'mm' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9' 10 | } 11 | } 12 | } 13 | } 14 | 15 | -------------------------------------------------------------------------------- /src/pycistopic/processes/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/pycistopic/processes/.gitkeep -------------------------------------------------------------------------------- /src/pycistopic/processes/barcode_level_statistics.nf: -------------------------------------------------------------------------------- 1 | nextflow.preview.dsl=2 2 | 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : "" 4 | 5 | toolParams = params.tools.pycistopic 6 | //processParams = params.tools.pycistopic.barcode_level_statistics 7 | 8 | process PYCISTOPIC__BARCODE_LEVEL_STATISTICS { 9 | 10 | publishDir "${params.global.outdir}/intermediate/pycistopic/qc/", mode: 'symlink' 11 | container toolParams.container 12 | label 'compute_resources__default','compute_resources__24hqueue' 13 | 14 | input: 15 | tuple val(sampleId), 16 | path(metadata), 17 | path(metadata_pkl), 18 | path(profile_data_pkl) 19 | 20 | output: 21 | tuple val(sampleId), 22 | path(selected_barcodes), 23 | path(output_pdf_ff), 24 | path(output_pdf_tf), 25 | path(output_pdf_df) 26 | 27 | script: 28 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_level_statistics) 29 | processParams = sampleParams.local 30 | selected_barcodes = "${sampleId}__selected_barcodes.txt" 31 | output_pdf_ff = "${sampleId}__FRIP-vs-nFrag.pdf" 32 | output_pdf_tf = "${sampleId}__TSS-vs-nFrag.pdf" 33 | output_pdf_df = "${sampleId}__duprate-vs-nFrag.pdf" 34 | """ 35 | export NUMEXPR_MAX_THREADS=${task.cpus} 36 | ${binDir}barcode_level_statistics.py \ 37 | --sampleId ${sampleId} \ 38 | --metadata_pkl ${metadata_pkl} \ 39 | --selected_barcodes ${selected_barcodes} \ 40 | ${processParams?.filter_frags_lower ? '--filter_frags_lower ' + processParams?.filter_frags_lower : ''} \ 41 | ${processParams?.filter_frags_upper ? '--filter_frags_upper ' + processParams?.filter_frags_upper : ''} \ 42 | ${processParams?.filter_tss_lower ? '--filter_tss_lower ' + processParams?.filter_tss_lower : ''} \ 43 | ${processParams?.filter_tss_upper ? '--filter_tss_upper ' + processParams?.filter_tss_upper : ''} \ 44 | ${processParams?.filter_frip_lower ? '--filter_frip_lower ' + processParams?.filter_frip_lower : ''} \ 45 | ${processParams?.filter_frip_upper ? '--filter_frip_upper ' + processParams?.filter_frip_upper : ''} \ 46 | ${processParams?.filter_dup_rate_lower ? '--filter_dup_rate_lower ' + processParams?.filter_dup_rate_lower : ''} \ 47 | ${processParams?.filter_dup_rate_upper ? '--filter_dup_rate_upper ' + processParams?.filter_dup_rate_upper : ''} 48 | """ 49 | } 50 | 51 | -------------------------------------------------------------------------------- /src/pycistopic/processes/biomart_annot.nf: -------------------------------------------------------------------------------- 1 | nextflow.preview.dsl=2 2 | 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : "" 4 | 5 | toolParams = params.tools.pycistopic 6 | processParams = params.tools.pycistopic.biomart_annot 7 | 8 | process PYCISTOPIC__BIOMART_ANNOT { 9 | 10 | publishDir "${params.global.outdir}/intermediate/pycistopic/biomart/", mode: 'symlink' 11 | container toolParams.container 12 | label 'compute_resources__default' 13 | 14 | output: 15 | path("biomart_annot.pickle") 16 | 17 | script: 18 | """ 19 | ${binDir}biomart_annot.py \ 20 | --biomart_dataset_name ${processParams.biomart_dataset_name} \ 21 | --biomart_host ${processParams.biomart_host} 22 | """ 23 | } 24 | 25 | -------------------------------------------------------------------------------- /src/pycistopic/processes/compute_qc_stats.nf: -------------------------------------------------------------------------------- 1 | nextflow.preview.dsl=2 2 | 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : "" 4 | 5 | toolParams = params.tools.pycistopic 6 | processParams = params.tools.pycistopic.compute_qc_stats 7 | 8 | process rename_fragments { 9 | 10 | container toolParams.container 11 | label 'compute_resources__minimal' 12 | 13 | input: 14 | tuple val(sampleId), 15 | path(f) 16 | output: 17 | tuple val(sampleId), 18 | path("${sampleId}_${f}*") 19 | 20 | script: 21 | """ 22 | ln -s ${f[0]} ${sampleId}_${f[0]} 23 | ln -s ${f[1]} ${sampleId}_${f[1]} 24 | """ 25 | 26 | } 27 | 28 | 29 | process PYCISTOPIC__COMPUTE_QC_STATS { 30 | 31 | publishDir "${params.global.outdir}/data/pycistopic/qc/", mode: params.utils.publish.mode 32 | container toolParams.container 33 | label 'compute_resources__pycisTopic' 34 | 35 | input: 36 | val(input) 37 | path(biomart_annot) 38 | path(fragments) 39 | path(peaks) 40 | 41 | output: 42 | tuple path('metadata/*.metadata.pkl'), 43 | path('profile_data/*.profile_data.pkl') 44 | 45 | script: 46 | """ 47 | export NUMEXPR_MAX_THREADS=1 48 | export OMP_NUM_THREADS=1 49 | ${binDir}compute_qc_stats.py \ 50 | ${"--input_files "+input.join(" --input_files ")} \ 51 | --n_frag ${processParams.n_frag} \ 52 | --tss_flank_window ${processParams.tss_flank_window} \ 53 | --tss_window ${processParams.tss_window} \ 54 | --tss_minimum_signal_window ${processParams.tss_minimum_signal_window} \ 55 | --tss_rolling_window ${processParams.tss_rolling_window} \ 56 | --min_norm ${processParams.min_norm} \ 57 | --threads ${task.cpus} \ 58 | --biomart_annot_pkl ${biomart_annot} \ 59 | --output_metadata_dir metadata \ 60 | --output_profile_data_dir profile_data 61 | """ 62 | } 63 | 64 | -------------------------------------------------------------------------------- /src/pycistopic/processes/macs2_call_peaks.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | toolParams = params.tools.pycistopic 6 | processParams = params.tools.pycistopic.macs2_call_peaks 7 | 8 | process PYCISTOPIC__MACS2_CALL_PEAKS { 9 | 10 | container toolParams.container 11 | label 'compute_resources__default','compute_resources__24hqueue' 12 | 13 | input: 14 | tuple val(sampleId), 15 | path(bam), 16 | path(bam_index) 17 | 18 | output: 19 | tuple val(sampleId), 20 | path("${sampleId}_peaks.narrowPeak"), 21 | path("${sampleId}_summits.bed") 22 | 23 | script: 24 | //def sampleParams = params.parseConfig(sampleId, params.global, toolParams) 25 | """ 26 | macs2 callpeak \ 27 | --treatment ${bam} \ 28 | --name ${sampleId} \ 29 | --outdir . \ 30 | --format BAMPE \ 31 | --gsize ${processParams.gsize} \ 32 | --qvalue ${processParams.qvalue} \ 33 | --nomodel \ 34 | --shift ${processParams.shift} \ 35 | --extsize ${processParams.extsize} \ 36 | --keep-dup ${processParams.keepdup} \ 37 | --call-summits \ 38 | --nolambda 39 | """ 40 | } 41 | 42 | -------------------------------------------------------------------------------- /src/pycistopic/processes/plot_qc_stats.nf: -------------------------------------------------------------------------------- 1 | nextflow.preview.dsl=2 2 | 3 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/pycistopic/bin/" : "" 4 | 5 | toolParams = params.tools.pycistopic 6 | processParams = params.tools.pycistopic.compute_qc_stats 7 | 8 | process PYCISTOPIC__PLOT_QC_STATS { 9 | 10 | container toolParams.container 11 | label 'compute_resources__default' 12 | 13 | input: 14 | tuple val(sampleId), 15 | path(output_metadata), 16 | path(output_metadata_pkl), 17 | path(output_profile_data_pkl) 18 | 19 | output: 20 | tuple val(sampleId), 21 | path(output_pdf) 22 | 23 | script: 24 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams) 25 | output_metadata = "${sampleId}_metadata.tsv.gz" 26 | output_pdf = "${sampleId}_qc_sample_metrics.pdf" 27 | output_metadata_pkl = "${sampleId}_metadata.pickle" 28 | output_profile_data_pkl = "${sampleId}_profile_data.pickle" 29 | """ 30 | ${binDir}plot_qc_stats.py \ 31 | --sampleId ${sampleId} \ 32 | --profile_data_pkl ${output_profile_data_pkl} \ 33 | --output_pdf ${output_pdf} 34 | """ 35 | } 36 | 37 | -------------------------------------------------------------------------------- /src/pycistopic/pycistopic.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | pycistopic { 4 | container = '/staging/leuven/stg_00002/lcb/cbravo/Multiomics_pipeline/pycistopic.sif' 5 | biomart_annot { 6 | biomart_dataset_name = 'hsapiens_gene_ensembl' 7 | biomart_host = 'http://www.ensembl.org' 8 | } 9 | macs2_call_peaks { 10 | gsize = 'hs' // hs, mm, ce, dm, or numeric effective genome size, e.g. '2.7e9' 11 | qvalue = 0.01 12 | extsize = 146 13 | shift = 73 14 | keepdup = 'all' 15 | } 16 | compute_qc_stats { 17 | n_frag = 100 18 | tss_flank_window = 2000 19 | tss_window = 50 20 | tss_minimum_signal_window = 100 21 | tss_rolling_window = 10 22 | min_norm = 0.1 23 | } 24 | call_cells { 25 | report_ipynb = '/src/pycistopic/bin/pycisTopic_qc_report_template.ipynb' 26 | use_density_coloring_on_scatterplot = true 27 | use_detailed_title_on_scatterplot = true 28 | filter_frags_lower = '1000' 29 | filter_frags_upper = '' 30 | filter_tss_lower = '8' 31 | filter_tss_upper = '' 32 | filter_frip_lower = '' 33 | filter_frip_upper = '' 34 | filter_dup_rate_lower = '' 35 | filter_dup_rate_upper = '' 36 | } 37 | } 38 | } 39 | } 40 | 41 | // define computing resources via process labels 42 | process { 43 | withLabel: 'compute_resources__pycisTopic' { 44 | executor = 'local' // or 'pbs' 45 | cpus = 8 46 | memory = '120 GB' 47 | time = '24h' 48 | } 49 | } 50 | 51 | -------------------------------------------------------------------------------- /src/pycistopic/workflows/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/pycistopic/workflows/.gitkeep -------------------------------------------------------------------------------- /src/samtools/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM vibsinglecellnf/samtools:base-0.3 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN BUILDPKGS="git \ 5 | autoconf \ 6 | automake \ 7 | perl \ 8 | libbz2-dev \ 9 | liblzma-dev \ 10 | libcurl4-openssl-dev \ 11 | libssl-dev \ 12 | bedtools \ 13 | libncurses5-dev" && \ 14 | apt-get update && \ 15 | apt-get upgrade -y --no-install-recommends && \ 16 | apt-get install -y --no-install-recommends $BUILDPKGS 17 | 18 | # install htslib 19 | ENV HTSLIB_VERSION 1.15.1 20 | RUN curl -L -o /tmp/htslib-${HTSLIB_VERSION}.tar.bz2 \ 21 | https://github.com/samtools/htslib/releases/download/${HTSLIB_VERSION}/htslib-${HTSLIB_VERSION}.tar.bz2 && \ 22 | mkdir -p /tmp/htslib-${HTSLIB_VERSION} && \ 23 | tar jxvf /tmp/htslib-${HTSLIB_VERSION}.tar.bz2 -C /tmp/htslib-${HTSLIB_VERSION} --strip-components 1 && \ 24 | cd /tmp/htslib-${HTSLIB_VERSION} && \ 25 | ./configure \ 26 | CFLAGS="-fPIC" && \ 27 | make && \ 28 | make install && \ 29 | cd .. && rm -rf htslib-${HTSLIB_VERSION}* 30 | 31 | # install samtools 32 | ENV SAMTOOLS_VERSION 1.15.1 33 | RUN curl -L -o /tmp/samtools-${SAMTOOLS_VERSION}.tar.bz2 \ 34 | https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2 && \ 35 | mkdir -p /tmp/samtools-${SAMTOOLS_VERSION} && \ 36 | tar jxvf /tmp/samtools-${SAMTOOLS_VERSION}.tar.bz2 -C /tmp/samtools-${SAMTOOLS_VERSION} --strip-components 1 && \ 37 | cd /tmp/samtools-${SAMTOOLS_VERSION} && \ 38 | ./configure \ 39 | --with-htslib=system && \ 40 | make && \ 41 | make install && \ 42 | cd .. && rm -rf samtools-${SAMTOOLS_VERSION}* 43 | 44 | RUN rm -rf /var/cache/apt/* && \ 45 | rm -rf /var/lib/apt/lists/* && \ 46 | ldconfig 47 | 48 | -------------------------------------------------------------------------------- /src/samtools/Dockerfile.samtools-base: -------------------------------------------------------------------------------- 1 | FROM debian:bullseye-slim 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN apt-get update && \ 5 | apt-get upgrade -y --no-install-recommends && \ 6 | apt-get install -y --reinstall ca-certificates && \ 7 | apt-get install -y --no-install-recommends \ 8 | cmake \ 9 | make \ 10 | mawk \ 11 | gcc \ 12 | libbz2-dev \ 13 | liblzma-dev \ 14 | libdeflate-dev \ 15 | bzip2 \ 16 | pigz \ 17 | curl \ 18 | procps \ 19 | less 20 | 21 | # zlib-ng 22 | ENV ZLIBNG 2.0.6 23 | RUN curl -L -o /tmp/zlib-ng-${ZLIBNG}.tar.gz https://github.com/zlib-ng/zlib-ng/archive/refs/tags/${ZLIBNG}.tar.gz && \ 24 | cd /tmp && tar xvf zlib-ng-${ZLIBNG}.tar.gz && \ 25 | cd zlib-ng-${ZLIBNG} && \ 26 | cmake -DZLIB_COMPAT=ON -DINSTALL_UTILS=ON . && \ 27 | cmake --build . --config Release && \ 28 | ctest --verbose -C Release && \ 29 | cmake --build . --target install && \ 30 | cd .. && rm -r zlib-ng-${ZLIBNG}* 31 | 32 | RUN rm -rf /var/cache/apt/* && \ 33 | rm -rf /var/lib/apt/lists/* && \ 34 | ldconfig 35 | 36 | -------------------------------------------------------------------------------- /src/samtools/README.rst: -------------------------------------------------------------------------------- 1 | 2 | Samtools Docker images 3 | ====================== 4 | 5 | This directory contains Dockerfiles for base images used here and for other images in the VSN Pipelines repository. 6 | 7 | 8 | To build the Base image 9 | ----------------------- 10 | 11 | This base image is based on ``debian:buster-slim`` and has a compiled verison of 12 | `zlib-ng <https://github.com/zlib-ng/zlib-ng>`_ for faster compression and decompression. 13 | 14 | Image tag format: simple version numbers (0.1, 0.2, ...). 15 | 16 | .. code:: bash 17 | 18 | docker build -t vibsinglecellnf/samtools:base-0.3 . -f Dockerfile.samtools-base 19 | podman build -t vibsinglecellnf/samtools:base-0.3 . -f Dockerfile.samtools-base 20 | 21 | This base image is used in several other images within VSN:: 22 | 23 | - samtools [this directory] 24 | 25 | 26 | To build the Samtools image 27 | --------------------------- 28 | 29 | This uses the base image above and adds Samtools and HTSlib 30 | 31 | Image tag format: ``<base image version>-<samtools release version>``. 32 | 33 | .. code:: bash 34 | 35 | docker build -t vibsinglecellnf/samtools:0.3-1.15.1 . 36 | podman build -t vibsinglecellnf/samtools:0.3-1.15.1 . 37 | 38 | This samtools image is used in several other images within VSN:: 39 | 40 | - singlecelltoolkit 41 | - bwamaptools 42 | - popscle 43 | 44 | 45 | -------------------------------------------------------------------------------- /src/samtools/processes/merge_bam.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | toolParams = params.tools.samtools 6 | 7 | process SAMTOOLS__MERGE_BAM { 8 | container toolParams.container 9 | label 'compute_resources__samtools__merge_bam' 10 | 11 | input: 12 | tuple val(sampleId), 13 | path(bams) 14 | 15 | output: 16 | tuple val(sampleId), 17 | path("${sampleId}.bwa.out.fixmate.possorted.merged.bam"), 18 | path("${sampleId}.bwa.out.fixmate.possorted.merged.bam.bai") 19 | 20 | script: 21 | //def sampleParams = params.parseConfig(sampleId, params.global) 22 | //processParams = sampleParams.local 23 | """ 24 | set -euo pipefail 25 | 26 | samtools merge \ 27 | -@ 4 \ 28 | -O bam \ 29 | --write-index \ 30 | -o '${sampleId}.bwa.out.fixmate.possorted.merged.bam##idx##${sampleId}.bwa.out.fixmate.possorted.merged.bam.bai' \ 31 | ${"'" + bams.join("' '") + "'"} 32 | """ 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/samtools/processes/sort_bam.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | 6 | process SAMTOOLS__SORT_BAM { 7 | label 'compute_resources__samtools__sort_bam' 8 | 9 | input: 10 | tuple val(sampleId), 11 | path(bam) 12 | 13 | output: 14 | tuple val(sampleId), 15 | path("${sampleId}.bwa.out.fixmate.possorted.bam"), 16 | path("${sampleId}.bwa.out.fixmate.possorted.bai") 17 | 18 | script: 19 | def sampleParams = params.parseConfig(sampleId, params.global) 20 | processParams = sampleParams.local 21 | """ 22 | set -euo pipefail 23 | samtools sort \ 24 | -o ${sampleID}.bwa.out.fixmate.possorted.bam 25 | -@ 4 \ 26 | ${bam} 27 | samtools index \ 28 | -@ 4 \ 29 | ${bam} 30 | """ 31 | } 32 | -------------------------------------------------------------------------------- /src/samtools/samtools.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | samtools { 4 | container = 'vibsinglecellnf/samtools:0.3-1.16.1' 5 | } 6 | } 7 | } 8 | 9 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/.gitattributes: -------------------------------------------------------------------------------- 1 | notebooks/* linguist-vendored 2 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/.gitignore: -------------------------------------------------------------------------------- 1 | *checkpoint.ipynb 2 | *checkpoint* 3 | *checkpoint.py 4 | *.test.ipynb 5 | *.csv 6 | *.loom 7 | *.pickle 8 | *.pyc 9 | *.html 10 | *egg* 11 | .vscode 12 | .nextflow 13 | .nextflow* 14 | data 15 | refdata 16 | work 17 | out/notebooks 18 | src/scenic/out 19 | src/scenic/notebooks 20 | src/scenic/data 21 | refdata 22 | data/10x/tiny 23 | work/ 24 | out/ 25 | tests/ 26 | debug/ 27 | *.swp 28 | *.swo 29 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM vibsinglecellnf/samtools:0.3-1.15.1 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN apt-get update && \ 5 | apt-get install -y --no-install-recommends \ 6 | python3 \ 7 | python3-venv \ 8 | nasm \ 9 | libtool \ 10 | wget && \ 11 | update-alternatives --install /usr/bin/python python /usr/bin/python3.9 100 12 | 13 | # install igzip (https://github.com/intel/isa-l) 14 | RUN git clone --depth=1 https://github.com/intel/isa-l.git /tmp/isa-l && \ 15 | cd /tmp/isa-l && \ 16 | ./autogen.sh && \ 17 | ./configure && \ 18 | make && \ 19 | make install && \ 20 | cd .. && \ 21 | rm -r isa-l 22 | 23 | 24 | RUN python -m venv /opt/venv 25 | # Make sure we use the virtualenv: 26 | ENV PATH="/opt/venv/bin:$PATH" 27 | 28 | ENV POLARS_MAX_THREADS=8 29 | RUN pip install --no-cache-dir --upgrade pip wheel && \ 30 | pip install --no-cache-dir \ 31 | pandas \ 32 | scipy \ 33 | uncertainties \ 34 | typing \ 35 | pathlib \ 36 | matplotlib \ 37 | numpy && \ 38 | wget https://temp.aertslab.org/.barcard/polars-0.13.51-cp37-abi3-manylinux_2_27_x86_64.whl && \ 39 | pip install polars-0.13.51-cp37-abi3-manylinux_2_27_x86_64.whl && \ 40 | rm polars-0.13.51-cp37-abi3-manylinux_2_27_x86_64.whl 41 | 42 | # polars>=0.13.52 \ 43 | 44 | RUN cd /opt/venv/bin && \ 45 | wget https://temp.aertslab.org/.barcard/create_fragments_file && \ 46 | wget https://temp.aertslab.org/.barcard/coreutils && \ 47 | chmod a+x create_fragments_file coreutils 48 | 49 | # install seq (https://github.com/seq-lang/seq/): 50 | ENV SEQ_VERSION=0.11.0 51 | RUN mkdir -p /opt/seq && \ 52 | wget https://github.com/seq-lang/seq/releases/download/v${SEQ_VERSION}/seq-linux-x86_64.tar.gz && \ 53 | tar xzf seq-linux-x86_64.tar.gz --strip-components 1 -C /opt/seq && \ 54 | rm seq-linux-x86_64.tar.gz 55 | ENV PATH="/opt/seq/bin:${PATH}" 56 | ENV OMP_NUM_THREADS=4 57 | ENV SEQ_PYTHON=/usr/lib/x86_64-linux-gnu/libpython3.9.so.1 58 | 59 | # install single_cell_toolkit 60 | # https://github.com/aertslab/single_cell_toolkit 61 | RUN git clone --depth=1 https://github.com/aertslab/single_cell_toolkit.git /opt/single_cell_toolkit 62 | ENV seq_root_dir=/opt/seq 63 | ENV PATH="/opt/single_cell_toolkit:/opt/single_cell_toolkit/barcard:${PATH}" 64 | 65 | RUN rm -rf /var/cache/apt/* && \ 66 | rm -rf /var/lib/apt/lists/* && \ 67 | ldconfig 68 | 69 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/README.rst: -------------------------------------------------------------------------------- 1 | 2 | single_cell_toolkit template 3 | ============================ 4 | 5 | This repository contains an implementation of single_cell_toolkit for VIB-SingleCell-NF (VSN) pipelines. 6 | See `aertslab/single_cell_toolkit <https://github.com/aertslab/single_cell_toolkit>`_ for the original source. 7 | 8 | To build the Docker image 9 | ------------------------- 10 | 11 | Image tag format: ``<date of latest git commit>-<short hash of latest git commit>``. 12 | 13 | .. code:: bash 14 | 15 | docker build -t vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d . 16 | podman build -t vibsinglecellnf/singlecelltoolkit:2022-07-07-0638c1d . 17 | 18 | This image uses the ``vibsinglecellnf/samtools`` image as a base. 19 | 20 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/bin/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/singlecelltoolkit/bin/.gitkeep -------------------------------------------------------------------------------- /src/singlecelltoolkit/conf/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/singlecelltoolkit/conf/.gitkeep -------------------------------------------------------------------------------- /src/singlecelltoolkit/conf/sctk_mapping.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | singlecelltoolkit { 4 | barcode_correction { 5 | max_mismatches = 1 6 | min_frac_bcs_to_find = 0.5 7 | whitelist { 8 | atac = 'PUMATAC_dependencies/whitelists/737K-cratac-v1.txt.gz' 9 | atac_revcomp = 'PUMATAC_dependencies/whitelists/737K-cratac-v1.REVCOMP.txt.gz' 10 | multiome = 'PUMATAC_dependencies/whitelists/737K-arc-v1.txt.gz' 11 | multiome_revcomp = 'PUMATAC_dependencies/whitelists/737K-arc-v1.REVCOMP.txt.gz' 12 | hydrop_2x384 = 'PUMATAC_dependencies/whitelists/hydrop_384x384.REVCOMP.txt.gz' 13 | hydrop_3x96_short = 'PUMATAC_dependencies/whitelists/20230120_hydrop-atac_ligation_all_revcomp.txt.gz' 14 | s3_atac_1 = 'PUMATAC_dependencies/whitelists/s3_atac_1.txt.gz' 15 | } 16 | } 17 | barcode_10x_scatac_fastqs { 18 | uncorrected_bc_tag = 'CR' 19 | barcode_quality_tag = 'CY' 20 | } 21 | } 22 | } 23 | } 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/conf/sctk_saturation.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | singlecelltoolkit { 4 | saturation { 5 | percentages = '0.3,0.6,0.9' 6 | sampling_fractions = '0.0,0.1,0.2,0.3,0.4,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.92,0.94,0.96,0.98,1.0' 7 | min_frags_per_cb = 200 8 | skip = true 9 | } 10 | } 11 | } 12 | } 13 | 14 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/main.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | ////////////////////////////////////////////////////// 4 | // process imports: 5 | include { SCTK__BARCODE_CORRECTION; } from './processes/barcode_correction.nf' 6 | include { SCTK__BARCODE_10X_SCATAC_FASTQ; } from './processes/barcode_10x_scatac_fastqs.nf' 7 | include { SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE; } from './processes/extract_and_correct_biorad_barcode.nf' 8 | 9 | include { 10 | SIMPLE_PUBLISH as PUBLISH_BC_STATS; 11 | SIMPLE_PUBLISH as PUBLISH_BR_BC_STATS; 12 | } from '../../src/utils/processes/utils.nf' 13 | 14 | ////////////////////////////////////////////////////// 15 | // Define the workflow 16 | 17 | 18 | /* Barcode correction */ 19 | workflow barcode_correction { 20 | take: 21 | data 22 | 23 | main: 24 | 25 | // gather barcode whitelists from params into a channel: 26 | wl = Channel.empty() 27 | wl_cnt = 0 28 | params.tools.singlecelltoolkit.barcode_correction.whitelist.each { k, v -> 29 | if(v != '') { 30 | wl = wl.mix( Channel.of(tuple(k, file(v)) )) 31 | wl_cnt = wl_cnt + 1 32 | } 33 | } 34 | 35 | /* TO DO: fix ability to skip barcode correction */ 36 | if(wl_cnt == 0) { 37 | if(!params.containsKey('quiet')) { 38 | println("No whitelist files were found in 'params.tools.singlecelltoolkit.barcode_correction.whitelist'. Skipping barcode correction for standard-type samples.") 39 | } 40 | // run barcode demultiplexing on each read+barcode: 41 | fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ(data) 42 | } else { 43 | // join wl to the data channel: 44 | data_wl = wl.cross( data.map { it -> tuple(it[1], it[0], it[2], it[3], it[4]) } ) // technology, sampleId, R1, R2, R3 45 | .map { it -> tuple(it[1][1], it[1][0], // sampleId, technology 46 | it[1][2], it[1][3], it[1][4], // R1, R2, R3 47 | it[0][1] // whitelist 48 | ) } 49 | 50 | // run barcode correction against a whitelist: 51 | fastq_bc_corrected = SCTK__BARCODE_CORRECTION(data_wl) 52 | PUBLISH_BC_STATS(fastq_bc_corrected.map { it -> tuple(it[0], it[5]) }, '.corrected.bc_stats.log', 'reports/barcode') 53 | 54 | // run barcode demultiplexing on each read+barcode: 55 | fastq_dex = SCTK__BARCODE_10X_SCATAC_FASTQ( 56 | fastq_bc_corrected.map { it -> tuple(*it[0..4]) } 57 | ) 58 | } 59 | 60 | emit: 61 | fastq_dex 62 | } 63 | 64 | 65 | workflow biorad_bc { 66 | 67 | take: 68 | data_biorad 69 | 70 | main: 71 | 72 | /* run BioRad barcode correction and debarcoding separately: */ 73 | // using singlecelltoolkit: 74 | fastq_dex_br = SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE(data_biorad.map{ it -> tuple(it[0], it[1], it[2], it[4]) }) 75 | PUBLISH_BR_BC_STATS(fastq_dex_br.map { it -> tuple(it[0], it[3]) }, '.corrected.bc_stats.log', 'reports/barcode') 76 | 77 | emit: 78 | fastq_dex_br.map { it -> tuple(*it[0..2]) } 79 | 80 | } 81 | 82 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/processes/barcode_10x_scatac_fastqs.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" 4 | 5 | toolParams = params.tools.singlecelltoolkit 6 | 7 | process SCTK__BARCODE_10X_SCATAC_FASTQ { 8 | 9 | container toolParams.container 10 | label 'compute_resources__sctk__barcode_10x_scatac_fastq_5cpus' 11 | 12 | input: 13 | tuple val(sampleId), 14 | val(technology), 15 | path(fastq_PE1), 16 | path(fastq_bc), 17 | path(fastq_PE2) 18 | 19 | output: 20 | tuple val(sampleId), 21 | path("${sampleId}_dex_R1.fastq.gz"), 22 | path("${sampleId}_dex_R2.fastq.gz") 23 | 24 | script: 25 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_10x_scatac_fastqs) 26 | processParams = sampleParams.local 27 | def max_threads = (task.cpus > 5) ? 5 : task.cpus 28 | """ 29 | export compress_fastq_threads="${max_threads}" 30 | barcode_10x_scatac_fastqs.sh \ 31 | ${fastq_PE1} \ 32 | ${fastq_bc} \ 33 | ${fastq_PE2} \ 34 | ${sampleId}_dex \ 35 | false \ 36 | true \ 37 | ${processParams.uncorrected_bc_tag}_${processParams.barcode_quality_tag} 38 | """ 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/processes/barcode_correction.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" 4 | 5 | toolParams = params.tools.singlecelltoolkit 6 | 7 | process SCTK__BARCODE_CORRECTION { 8 | 9 | container toolParams.container 10 | label 'compute_resources__sctk_barcode' 11 | 12 | input: 13 | tuple val(sampleId), 14 | val(technology), 15 | path(fastq_PE1), 16 | path(fastq_bc), 17 | path(fastq_PE2), 18 | path(bc_whitelist) 19 | 20 | output: 21 | tuple val(sampleId), 22 | val(technology), 23 | path(fastq_PE1), 24 | path("${sampleId}_bc_corrected.fastq.gz"), 25 | path(fastq_PE2), 26 | path("${sampleId}_bc_corrected.fastq.gz.corrected.bc_stats.tsv") 27 | 28 | script: 29 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams.barcode_correction) 30 | processParams = sampleParams.local 31 | """ 32 | correct_barcode_in_fastq.sh \ 33 | ${bc_whitelist} \ 34 | ${fastq_bc} \ 35 | ${sampleId}_bc_corrected.fastq.gz \ 36 | ${processParams.max_mismatches} \ 37 | ${processParams.min_frac_bcs_to_find} 38 | """ 39 | } 40 | 41 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/processes/extract_and_correct_biorad_barcode.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" 4 | 5 | toolParams = params.tools.singlecelltoolkit 6 | 7 | process SCTK__EXTRACT_AND_CORRECT_BIORAD_BARCODE { 8 | 9 | container toolParams.container 10 | label 'compute_resources__sctk_barcode' 11 | 12 | input: 13 | tuple val(sampleId), 14 | val(technology), 15 | path(fastq_PE1), 16 | path(fastq_PE2) 17 | 18 | output: 19 | tuple val(sampleId), 20 | path("${sampleId}_dex_R1.fastq.gz"), 21 | path("${sampleId}_dex_R2.fastq.gz"), 22 | path("${sampleId}_dex.corrected_bc_stats.tsv") 23 | 24 | script: 25 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams) 26 | //processParams = sampleParams.local 27 | """ 28 | extract_and_correct_biorad_barcode_in_fastq.sh \ 29 | ${fastq_PE1} \ 30 | ${fastq_PE2} \ 31 | ${sampleId}_dex 32 | """ 33 | } 34 | 35 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/processes/extract_hydrop_atac_barcode.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" 4 | 5 | toolParams = params.tools.singlecelltoolkit 6 | 7 | process SCTK__EXTRACT_HYDROP_ATAC_BARCODE { 8 | 9 | container "vibsinglecellnf/singlecelltoolkit:2024-04-09-62429e9" 10 | label 'compute_resources__default' 11 | 12 | input: 13 | tuple val(sampleId), 14 | val(technology), 15 | path(fastq_PE1), 16 | path(fastq_bc), 17 | path(fastq_PE2) 18 | val(hydrop_atac_barcode_design) 19 | 20 | output: 21 | tuple val(sampleId), 22 | val(technology), 23 | path(fastq_PE1), 24 | path("${sampleId}_hydrop_barcode_R2.fastq.gz"), 25 | path(fastq_PE2) 26 | 27 | script: 28 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams) 29 | //processParams = sampleParams.local 30 | """ 31 | extract_hydrop_atac_barcode_from_R2_fastq.sh \ 32 | ${fastq_bc} \ 33 | ${sampleId}_hydrop_barcode_R2.fastq.gz \ 34 | ${hydrop_atac_barcode_design} \ 35 | pigz 36 | """ 37 | } 38 | 39 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/processes/fix_and_compress_fastqs.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | if(!params.containsKey("test")) { 4 | binDir = "${workflow.projectDir}/src/sratoolkit/bin/" 5 | } else { 6 | binDir = "" 7 | } 8 | 9 | process FIX_AND_COMPRESS_SRA_FASTQS { 10 | 11 | container "vibsinglecellnf/singlecelltoolkit:2022-04-15-16314db" 12 | publishDir "${params.global.outdir}/data/raw/fastqs_fixed_and_compressed", mode: 'symlink', overwrite: true 13 | label 'compute_resources__cpu' 14 | 15 | input: 16 | tuple val(sraId), file("${sraId}_*.fastq") 17 | 18 | output: 19 | tuple val(sraId), file("${sraId}_*.fastq.gz") 20 | 21 | script: 22 | """ 23 | # Fixing the FASTQ files is required for future pre-processing (e.g.: scATAC-seq pipelines) because fasterq-dump does not have the -F option as fastq-dump do to keep original sequence names. 24 | # Fix the FASTQ files and compress them 25 | export compress_fastq_threads="${task.cpus}" 26 | NUM_FASTQ_FILES=\$(ls ./*.fastq | wc -l) 27 | echo "Fixing and compressing \${NUM_FASTQ_FILES} FASTQ files in parallel with \${compress_fastq_threads} compression threads for each task..." 28 | echo *.fastq | tr ' ' '\n' | xargs -P "\${NUM_FASTQ_FILES}" -n 1 -I {} fix_sra_fastq.sh "{}" "{}.gz" pigz 29 | echo "Removing all uncompressed FASTQ files" 30 | for FASTQ in *.fastq; do 31 | echo "Removing uncompressed FASTQ file \${FASTQ}..." 32 | rm "\$(readlink -f \${FASTQ})" 33 | done 34 | echo "Done." 35 | """ 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/processes/saturation.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | //binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/singlecelltoolkit/bin/" : "" 4 | 5 | toolParams = params.tools.singlecelltoolkit 6 | 7 | process SCTK__SATURATION { 8 | 9 | container toolParams.container 10 | label 'compute_resources__default','compute_resources__24hqueue' 11 | 12 | input: 13 | tuple val(sampleId), 14 | path(fragments), 15 | path(fragments_index) 16 | file(bc_whitelists) 17 | val(optional) 18 | 19 | output: 20 | tuple val(sampleId), 21 | path("${sampleId}.sampling_stats.tsv"), 22 | path("${sampleId}.saturation.png") 23 | 24 | script: 25 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams) 26 | processParams = sampleParams.local 27 | def bc_wl_param = optional == 'RUN' ? '-w selected_barcodes/' + sampleId + '.cell_barcodes.txt' : '' 28 | def polars_max_threads = (task.cpus > 6) ? 6 : task.cpus 29 | """ 30 | # Max threads polars is allowed to use (else will uses all cores). 31 | export POLARS_MAX_THREADS=${polars_max_threads}; 32 | # Max threads pyarrow is allowed to use (else will uses all cores) (used to read the fragments file in the beginning). 33 | export OMP_NUM_THREADS=${polars_max_threads}; 34 | calculate_saturation_from_fragments.py \ 35 | -i ${fragments} \ 36 | -o ${sampleId} \ 37 | -p ${toolParams.saturation.percentages} \ 38 | -m ${toolParams.saturation.min_frags_per_cb} \ 39 | -s ${toolParams.saturation.sampling_fractions} \ 40 | ${bc_wl_param} 41 | """ 42 | } 43 | 44 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/singlecelltoolkit.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | singlecelltoolkit { 4 | container = 'vibsinglecellnf/singlecelltoolkit:2022-04-15-16314db' 5 | } 6 | } 7 | } 8 | 9 | -------------------------------------------------------------------------------- /src/singlecelltoolkit/workflows/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/singlecelltoolkit/workflows/.gitkeep -------------------------------------------------------------------------------- /src/trimgalore/.gitattributes: -------------------------------------------------------------------------------- 1 | notebooks/* linguist-vendored 2 | -------------------------------------------------------------------------------- /src/trimgalore/.gitignore: -------------------------------------------------------------------------------- 1 | *checkpoint.ipynb 2 | *checkpoint* 3 | *checkpoint.py 4 | *.test.ipynb 5 | *.csv 6 | *.loom 7 | *.pickle 8 | *.pyc 9 | *.html 10 | *egg* 11 | .vscode 12 | .nextflow 13 | .nextflow* 14 | data 15 | refdata 16 | work 17 | out/notebooks 18 | src/scenic/out 19 | src/scenic/notebooks 20 | src/scenic/data 21 | refdata 22 | data/10x/tiny 23 | work/ 24 | out/ 25 | tests/ 26 | debug/ 27 | *.swp 28 | *.swo 29 | -------------------------------------------------------------------------------- /src/trimgalore/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN BUILDPKGS="build-essential zlib1g-dev git curl" && \ 5 | apt-get update && \ 6 | apt-get install -y --no-install-recommends apt-utils debconf locales && dpkg-reconfigure locales && \ 7 | apt-get install -y --no-install-recommends $BUILDPKGS 8 | 9 | RUN pip install -U pip 10 | 11 | ################################################## 12 | # cutadapt 13 | RUN pip install cutadapt 14 | 15 | ################################################## 16 | # fastQC 17 | # RUN wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.11.9.zip 18 | 19 | ################################################## 20 | # trim galore 21 | RUN curl -fsSL https://github.com/FelixKrueger/TrimGalore/archive/0.6.6.tar.gz -o trim_galore.tar.gz && \ 22 | tar xvzf trim_galore.tar.gz && \ 23 | mv TrimGalore-0.6.6/trim_galore /usr/bin/ && \ 24 | rm -r TrimGalore-0.6.6 25 | 26 | 27 | RUN apt-get -y update && \ 28 | apt-get -y --no-install-recommends install \ 29 | # Need to run ps 30 | procps \ 31 | pigz \ 32 | less && \ 33 | rm -rf /var/cache/apt/* && \ 34 | rm -rf /var/lib/apt/lists/* 35 | 36 | -------------------------------------------------------------------------------- /src/trimgalore/README.rst: -------------------------------------------------------------------------------- 1 | 2 | Trim Galore module 3 | ================== 4 | 5 | This repository contains an implementation of Trim Galore for VIB-SingleCell-NF (VSN) pipelines. 6 | See `here <https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/>`_ for the original source. 7 | 8 | -------------------------------------------------------------------------------- /src/trimgalore/bin/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/trimgalore/bin/.gitkeep -------------------------------------------------------------------------------- /src/trimgalore/conf/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/trimgalore/conf/.gitkeep -------------------------------------------------------------------------------- /src/trimgalore/processes/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/trimgalore/processes/.gitkeep -------------------------------------------------------------------------------- /src/trimgalore/processes/trim.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/template/bin/" : "" 4 | 5 | toolParams = params.tools.trimgalore 6 | 7 | process TRIMGALORE__TRIM { 8 | 9 | container toolParams.container 10 | label 'compute_resources__trimgalore__trim' 11 | 12 | input: 13 | tuple val(sampleId), 14 | path(fastq_PE1), 15 | path(fastq_PE2) 16 | 17 | output: 18 | tuple val(sampleId), 19 | path("${sampleId}_dex_R1_val_1.fq.gz"), 20 | path("${sampleId}_dex_R2_val_2.fq.gz"), 21 | path("${sampleId}_dex_R1.fastq.gz_trimming_report.txt"), 22 | path("${sampleId}_dex_R2.fastq.gz_trimming_report.txt") 23 | 24 | script: 25 | def sampleParams = params.parseConfig(sampleId, params.global, toolParams.trim) 26 | processParams = sampleParams.local 27 | def max_threads = (task.cpus > 6) ? 6 : task.cpus 28 | """ 29 | trim_galore \ 30 | -j ${max_threads} \ 31 | -o . \ 32 | ${fastq_PE1} \ 33 | ${fastq_PE2} \ 34 | --paired \ 35 | --gzip 36 | """ 37 | } 38 | 39 | -------------------------------------------------------------------------------- /src/trimgalore/trimgalore.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | trimgalore { 4 | container = 'vibsinglecellnf/trimgalore:0.6.6' 5 | trim { 6 | paired = 'true' 7 | } 8 | } 9 | } 10 | } 11 | 12 | -------------------------------------------------------------------------------- /src/trimgalore/workflows/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aertslab/PUMATAC/5204bda8c37169754bb08c0fdac2c6c399ade7bc/src/trimgalore/workflows/.gitkeep -------------------------------------------------------------------------------- /src/utils/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7.9-slim-stretch AS compile-image 2 | 3 | RUN apt-get update && \ 4 | apt-get install -y --no-install-recommends build-essential gcc apt-utils cmake openssh-client git && \ 5 | apt-get install -y python-dev libxml2-dev zlib1g-dev && \ 6 | rm -rf /var/cache/apt/* && \ 7 | rm -rf /var/lib/apt/lists/* 8 | 9 | RUN python -m venv /opt/venv 10 | # Make sure we use the virtualenv: 11 | ENV PATH="/opt/venv/bin:$PATH" 12 | 13 | RUN python3 -m pip install ipykernel && \ 14 | pip install --no-cache-dir papermill && \ 15 | pip install --no-cache-dir pysradb==1.0.0 && \ 16 | pip install --no-cache-dir nbconvert==5.6.0 && \ 17 | pip install --no-cache-dir nbmerge==0.0.4 && \ 18 | pip install --no-cache-dir nbformat==5.0.8 19 | 20 | FROM python:3.7.9-slim-stretch AS build-image 21 | RUN apt-get -y update && \ 22 | # Need to run ps 23 | apt-get -y install procps && \ 24 | apt-get -y install libxml2 && \ 25 | rm -rf /var/cache/apt/* && \ 26 | rm -rf /var/lib/apt/lists/* 27 | 28 | COPY --from=compile-image /opt/venv /opt/venv 29 | 30 | # Make sure we use the virtualenv: 31 | ENV PATH="/opt/venv/bin:$PATH" -------------------------------------------------------------------------------- /src/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utils module 2 | 3 | ## Cell-based metadata annotation 4 | 5 | The profile `utils_cell_annotate` should be added when generating the main config using `nextflow config`. This will add the following entry in the config: 6 | 7 | ``` 8 | params { 9 | tools { 10 | cell_annotate { 11 | iff = '10x_cellranger_mex' 12 | off = 'h5ad' 13 | cellMetaDataFilePath = '' 14 | indexColumnName = '' 15 | sampleColumnName = '' 16 | annotationColumnNames = [''] 17 | } 18 | } 19 | } 20 | ``` 21 | Then, the following parameters should be updated to use the module feature: 22 | 23 | - `cellMetaDataFilePath` is a .tsv file (with header) with at least 2 columns: a column containing all the cell IDs and an annotation column. 24 | - `indexColumnName` is the column name from `cellMetaDataFilePath` containing the cell IDs information. 25 | - `sampleColumnName` is the column name from `cellMetaDataFilePath` containing the sample ID/name information. 26 | - `annotationColumnNames` is an array of columns names from `cellMetaDataFilePath` containing different annotation metadata to add. 27 | 28 | ## Sample-based metadata annotation 29 | The profile `utils_sample_annotate` should be added when generating the main config using nextflow config. This will add the following entry in the config: 30 | 31 | ``` 32 | params { 33 | tools { 34 | sample_annotate { 35 | iff = '10x_cellranger_mex' 36 | off = 'h5ad' 37 | type = 'sample' 38 | metadataFilePath = 'data/10x/1k_pbmc/metadata.tsv' 39 | } 40 | } 41 | } 42 | ``` 43 | Then, the following parameters should be updated to use the module feature: 44 | 45 | - `metadataFilePath` is a .tsv file (with header) with at least 2 columns where the first column need to match the sample IDs. Any other columns will be added as annotation in the final loom i.e.: all the cells related to their sample will get annotated with their given annotations. 46 | 47 | | id | chemistry | ... | 48 | | ------------- | ------------- | ------------- | 49 | | 1k_pbmc_v2_chemistry | v2 | ... | 50 | | 1k_pbmc_v3_chemistry | v3 | ... | 51 | 52 | Sample-annotating the samples using this system will allow any user to query all the annotation using the SCope portal. This is especially relevant when samples needs to be compared across specific annotations (check compare tab with SCope). 53 | -------------------------------------------------------------------------------- /src/utils/bin/create_cistopic_object.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | print("##################################################") 4 | print("# cisTopic #") 5 | print("##################################################") 6 | 7 | # Loading dependencies scripts 8 | 9 | library("optparse") 10 | parser <- OptionParser( 11 | prog = "create_cistopic_object.R", 12 | description = "Create cisTopic object from 10x Cell Ranger MEX output" 13 | ) 14 | parser <- add_option( 15 | parser, 16 | c("-i", "--tenx_path"), 17 | action = "store", 18 | default = NULL, 19 | help = "Path to Cell Ranger 10x output containing filtered_peak_bc_matrix/ directory" 20 | ) 21 | parser <- add_option( 22 | parser, 23 | c("-m", "--metrics_fname"), 24 | action = "store", 25 | default = "singlecell.csv", 26 | help = "Filename of Cell Ranger 10x output per barcode metrics" 27 | ) 28 | parser <- add_option( 29 | parser, 30 | c("-s", "--sampleId"), 31 | action = "store", 32 | default = "", 33 | help = "sample ID" 34 | ) 35 | parser <- add_option( 36 | parser, 37 | c("-o", "--output"), 38 | action = "store", 39 | default = NULL, 40 | help = "Output file, rds format" 41 | ) 42 | 43 | args <- parse_args(parser) 44 | 45 | cat("Parameters: \n") 46 | print(args) 47 | 48 | ################################################################################ 49 | 50 | suppressWarnings(library(cisTopic)) 51 | 52 | data_folder = file.path(args$tenx_path, 'filtered_peak_bc_matrix') 53 | metrics = file.path(args$tenx_path, args$metrics_fname) 54 | 55 | cisTopicObject <- createcisTopicObjectFrom10Xmatrix(data_folder, metrics, project.name='VSN-ATAC') 56 | 57 | saveRDS(cisTopicObject,file=args$output) 58 | 59 | -------------------------------------------------------------------------------- /src/utils/bin/h5ad_to_filtered_loom.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import loompy as lp 5 | import numpy as np 6 | import os 7 | import scanpy as sc 8 | 9 | parser = argparse.ArgumentParser(description='') 10 | 11 | parser.add_argument( 12 | "input", 13 | type=argparse.FileType('r'), 14 | help='Input h5ad file.' 15 | ) 16 | 17 | parser.add_argument( 18 | "output", 19 | type=argparse.FileType('w'), 20 | help='Output h5ad file.' 21 | ) 22 | 23 | args = parser.parse_args() 24 | 25 | # Define the arguments properly 26 | FILE_PATH_IN = args.input 27 | FILE_PATH_OUT_BASENAME = os.path.splitext(args.output.name)[0] 28 | 29 | try: 30 | adata = sc.read_h5ad(filename=FILE_PATH_IN.name) 31 | except IOError: 32 | raise Exception("VSN ERROR: Wrong input format. Expects .h5ad files, got .{}".format(os.path.splitext(FILE_PATH_IN)[0])) 33 | 34 | row_attrs = { 35 | "Gene": np.array(adata.var.index), 36 | } 37 | col_attrs = { 38 | "CellID": np.array(adata.obs.index), 39 | "nGene": np.array(np.sum(adata.X.transpose() > 0, axis=0)).flatten(), 40 | "nUMI": np.array(np.sum(adata.X.transpose(), axis=0)).flatten(), 41 | } 42 | 43 | matrix = (adata.X).T 44 | 45 | lp.create( 46 | filename=f"{FILE_PATH_OUT_BASENAME}.loom", 47 | layers=matrix if type(matrix) == np.ndarray else matrix.toarray(), 48 | row_attrs=row_attrs, 49 | col_attrs=col_attrs, 50 | ) 51 | -------------------------------------------------------------------------------- /src/utils/bin/reports/workflow_configuration_template.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Workflow Configuration" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from IPython.display import JSON\n", 17 | "import json" 18 | ] 19 | }, 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "## Manifest" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "JSON(json.loads(WORKFLOW_MANIFEST))" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## Parameters" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "JSON(json.loads(WORKFLOW_PARAMETERS))" 50 | ] 51 | } 52 | ], 53 | "metadata": { 54 | "kernelspec": { 55 | "display_name": "Python 3", 56 | "language": "python", 57 | "name": "python3" 58 | }, 59 | "language_info": { 60 | "codemirror_mode": { 61 | "name": "ipython", 62 | "version": 3 63 | }, 64 | "file_extension": ".py", 65 | "mimetype": "text/x-python", 66 | "name": "python", 67 | "nbconvert_exporter": "python", 68 | "pygments_lexer": "ipython3", 69 | "version": "3.6.8" 70 | } 71 | }, 72 | "nbformat": 4, 73 | "nbformat_minor": 4 74 | } 75 | -------------------------------------------------------------------------------- /src/utils/bin/sc_file_concatenator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import numpy as np 5 | import os 6 | import scanpy as sc 7 | 8 | parser = argparse.ArgumentParser(description='') 9 | 10 | parser.add_argument( 11 | "input", 12 | nargs='+', 13 | type=argparse.FileType('r'), 14 | help='Input h5ad files.' 15 | ) 16 | 17 | parser.add_argument( 18 | "-f", "--file-format", 19 | action="store", 20 | dest="format", 21 | default="h5ad", 22 | help="Concatenate the data. Choose one of : h5ad" 23 | ) 24 | 25 | parser.add_argument( 26 | "-j", "--join", 27 | type=str, 28 | action="store", 29 | dest="join", 30 | default="inner", 31 | help="How to concatenate the multiple datasets. Choose one of : inner (intersect), outer (union)." 32 | ) 33 | 34 | parser.add_argument( 35 | "-o", "--output", 36 | action="store", 37 | dest="output", 38 | default=None, 39 | help="Output file name." 40 | ) 41 | 42 | args = parser.parse_args() 43 | 44 | # Define the arguments properly 45 | FILE_PATH_OUT_BASENAME = os.path.splitext(args.output)[0] 46 | 47 | # I/O 48 | files = [] 49 | cell_ids = [] 50 | 51 | if args.format == 'h5ad': 52 | for FILE_PATH_IN in args.input: 53 | try: 54 | FILE_PATH_IN = FILE_PATH_IN.name 55 | adata = sc.read_h5ad(filename=FILE_PATH_IN) 56 | cell_ids.extend(adata.obs.index.values) 57 | files.append(adata) 58 | except IOError: 59 | raise Exception("VSN ERROR: Wrong input format. Expects .h5ad files, got .{}".format(FILE_PATH_IN)) 60 | 61 | index_unique = None 62 | 63 | if len(cell_ids) != len(np.unique(cell_ids)): 64 | print("Non-unique cell index detected!") 65 | print("Make the index unique by joining the existing index names with the batch category, using index_unique='-'") 66 | index_unique = '-' 67 | # 68 | # Concatenate the data 69 | # 70 | 71 | if args.format == 'h5ad': 72 | # Concatenate multiple h5ad files 73 | # Source: https://anndata.readthedocs.io/en/latest/anndata.AnnData.concatenate.html#anndata.AnnData.concatenate 74 | adata = files[0].concatenate( 75 | files[1:], 76 | join=args.join, 77 | index_unique=index_unique 78 | ) 79 | # Not casting to float 64 bits can lead to not exact reproducible results. See: 80 | # - https://github.com/theislab/scanpy/issues/1612 81 | # - https://github.com/vib-singlecell-nf/vsn-pipelines/issues/295 82 | adata.X = adata.X.astype(np.float64) 83 | adata.var.index = adata.var.index.astype(str) 84 | adata = adata[:, np.sort(adata.var.index)] 85 | print(f"Total number of cells: {adata.obs.shape[0]}, genes: {adata.var.shape[0]}.") 86 | else: 87 | raise Exception("VSN ERROR: Concatenation of .{} files is not implemented.".format(args.format)) 88 | 89 | # I/O 90 | adata.write_h5ad("{}.h5ad".format(FILE_PATH_OUT_BASENAME)) 91 | -------------------------------------------------------------------------------- /src/utils/bin/sc_h5ad_apply_obs_filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import pandas as pd 7 | import numpy as np 8 | import scanpy as sc 9 | 10 | parser = argparse.ArgumentParser(description='') 11 | 12 | parser.add_argument( 13 | "input", 14 | type=argparse.FileType('r'), 15 | help='' 16 | ) 17 | 18 | parser.add_argument( 19 | "-o", "--output", 20 | type=argparse.FileType('w'), 21 | help='' 22 | ) 23 | 24 | parser.add_argument( 25 | '-f', '--filter-file-path', 26 | type=argparse.FileType('r'), 27 | action="append", 28 | dest="filter_file_paths", 29 | help="" 30 | ) 31 | 32 | args = parser.parse_args() 33 | 34 | FILE_PATH_IN = args.input.name 35 | FILE_PATH_OUT_BASENAME = os.path.splitext(args.output.name)[0] 36 | 37 | # I/O 38 | # Expects h5ad file 39 | try: 40 | adata = sc.read_h5ad(filename=FILE_PATH_IN) 41 | except IOError: 42 | raise Exception("VSN ERROR: Can only handle .h5ad files.") 43 | 44 | # 45 | # Subset the h5ad using the given cell IDs 46 | # 47 | 48 | obs_to_keep = [] 49 | 50 | for filter_file_path in args.filter_file_paths: 51 | obs_to_keep.extend( 52 | pd.read_csv(filepath_or_buffer=filter_file_path, header=None)[0].values 53 | ) 54 | 55 | if len(obs_to_keep) != len(np.unique(obs_to_keep)): 56 | raise Exception("VSN ERROR: This use case is currently not handled. This could happen if you are using different filters") 57 | 58 | print(f"Dimension of pre-filtered AnnData: {adata.shape}") 59 | adata_filtered = adata[obs_to_keep, :] 60 | print(f"Dimension of post-filtered AnnData: {adata_filtered.shape}") 61 | 62 | # I/O 63 | adata_filtered.write_h5ad("{}.h5ad".format(FILE_PATH_OUT_BASENAME)) 64 | -------------------------------------------------------------------------------- /src/utils/bin/sc_h5ad_extract_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import pandas as pd 7 | import scanpy as sc 8 | import numpy as np 9 | 10 | parser = argparse.ArgumentParser(description='') 11 | 12 | parser.add_argument( 13 | "input", 14 | type=argparse.FileType('r'), 15 | help='The path to the input h5ad file ' 16 | ) 17 | 18 | parser.add_argument( 19 | "output", 20 | type=argparse.FileType('w'), 21 | help='The path to the output containing cells IDs that will be used for applying the filter.' 22 | ) 23 | 24 | parser.add_argument( 25 | '-a', '--axis', 26 | type=str, 27 | dest="axis", 28 | help='The axis defining the metadata which the given column_names will be extracted from. ' 29 | ) 30 | 31 | parser.add_argument( 32 | '-c', '--column-name', 33 | type=str, 34 | action="append", 35 | dest="column_names", 36 | help="" 37 | ) 38 | 39 | args = parser.parse_args() 40 | 41 | FILE_PATH_IN = args.input.name 42 | 43 | # I/O 44 | # Expects h5ad file 45 | try: 46 | adata = sc.read_h5ad(filename=FILE_PATH_IN) 47 | except IOError: 48 | raise Exception("VSN ERROR: Can only handle .h5ad files.") 49 | 50 | # 51 | # Extract the given column_names from the feature/observation-based metadata. 52 | # 53 | 54 | if args.axis == 'feature': 55 | metadata = adata.var[args.column_names] 56 | elif args.axis == 'observation': 57 | raise Exception("VSN ERROR: Extracting the observation-based metadata is currently not implemented.") 58 | else: 59 | raise Exception(f"Cannot extract from the {args.axis}-based metadata.") 60 | 61 | # I/O 62 | metadata.to_csv( 63 | path_or_buf=args.output, 64 | sep='\t', 65 | header=True, 66 | columns=args.column_names, 67 | index=False 68 | ) 69 | -------------------------------------------------------------------------------- /src/utils/bin/sc_h5ad_update_metadata.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import pandas as pd 7 | import scanpy as sc 8 | import numpy as np 9 | 10 | parser = argparse.ArgumentParser(description='') 11 | 12 | parser.add_argument( 13 | "input", 14 | type=argparse.FileType('r'), 15 | help='The path to the input h5ad file ' 16 | ) 17 | 18 | parser.add_argument( 19 | "output", 20 | type=argparse.FileType('w'), 21 | help='The path to the output containing cells IDs that will be used for applying the filter.' 22 | ) 23 | 24 | parser.add_argument( 25 | '-m', "--additional-metadata", 26 | type=argparse.FileType('r'), 27 | dest="additional_metadata", 28 | required=True, 29 | help='The path the additional metadata used to update the metadata of the given input h5ad.' 30 | ) 31 | 32 | parser.add_argument( 33 | '-a', '--axis', 34 | type=str, 35 | dest="axis", 36 | required=True, 37 | help='The axis defining the metadata which the given column_names will be extracted from. ' 38 | ) 39 | 40 | parser.add_argument( 41 | '-j', '--join-key', 42 | type=str, 43 | dest="join_key", 44 | required=True, 45 | help="The column name used to join the metadata with the given additional metadata." 46 | ) 47 | 48 | parser.add_argument( 49 | '-i', '--index-column-name', 50 | type=str, 51 | dest="index_column_name", 52 | help="The column name to use as index for the metadata." 53 | ) 54 | 55 | 56 | args = parser.parse_args() 57 | 58 | FILE_PATH_IN = args.input.name 59 | FILE_PATH_OUT_BASENAME = os.path.splitext(args.output.name)[0] 60 | 61 | # I/O 62 | # Expects h5ad file 63 | try: 64 | adata = sc.read_h5ad(filename=FILE_PATH_IN) 65 | except IOError: 66 | raise Exception("VSN ERROR: Can only handle .h5ad files.") 67 | 68 | # 69 | # Update the feature/observation-based metadata with all the columns present within the look-up table. 70 | # 71 | 72 | additional_metadata = pd.read_csv( 73 | filepath_or_buffer=args.additional_metadata, 74 | sep="\t", 75 | header=0 76 | ) 77 | 78 | if args.axis == 'feature': 79 | adata.var = pd.merge( 80 | adata.var, 81 | additional_metadata, 82 | on=args.join_key 83 | ) 84 | if args.index_column_name is not None: 85 | adata.var.set_index(args.index_column_name, inplace=True) 86 | adata.var.index.names = ['index'] 87 | 88 | elif args.axis == 'observation': 89 | raise Exception("VSN ERROR: Updating the observation-based metadata is currently not implemented.") 90 | 91 | else: 92 | raise Exception(f"Cannot update the {args.axis}-based metadata.") 93 | 94 | 95 | # I/O 96 | adata.write_h5ad("{}.h5ad".format(FILE_PATH_OUT_BASENAME)) 97 | -------------------------------------------------------------------------------- /src/utils/bin/sc_star_concatenator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import argparse 4 | import os 5 | import pandas as pd 6 | 7 | strand_options = { 8 | "no": 1, 9 | "forward": 2, 10 | "reverse": 3 11 | } 12 | 13 | parser = argparse.ArgumentParser(description='') 14 | 15 | parser.add_argument( 16 | "input", 17 | nargs='+', 18 | type=argparse.FileType('r'), 19 | help='Input h5ad file.' 20 | ) 21 | 22 | parser.add_argument( 23 | "-s", "--stranded", 24 | action="store", 25 | dest="stranded", 26 | default="no", 27 | help=f"Stranded nature of the library. Choose one of: {', '.join(strand_options.keys())}" 28 | ) 29 | 30 | parser.add_argument( 31 | "-o", "--output", 32 | action="store", 33 | dest="output", 34 | default=None, 35 | help="Output file name." 36 | ) 37 | 38 | args = parser.parse_args() 39 | 40 | # Define the arguments properly 41 | FILE_PATH_OUT_BASENAME = os.path.splitext(args.output)[0] 42 | 43 | # I/O 44 | files = [] 45 | 46 | for FILE_PATH_IN in args.input: 47 | FILE_PATH_IN = FILE_PATH_IN.name 48 | if not os.path.isfile(FILE_PATH_IN): 49 | raise Exception(f"Could not find file {FILE_PATH_IN}.") 50 | if not FILE_PATH_IN.endswith('ReadsPerGene.out.tab'): 51 | raise Exception(f"Expecting file ending with 'ReadsPerGene.out.tab', {os.path.basename(FILE_PATH_IN)} does not.") 52 | 53 | try: 54 | cell_name = os.path.basename(FILE_PATH_IN)[:-len("ReadsPerGene.out.tab")] 55 | counts = pd.read_csv(FILE_PATH_IN, sep='\t', index_col=0, skiprows=4, header=None) 56 | files.append((counts, cell_name)) 57 | except IOError: 58 | raise Exception("VSN ERROR: Wrong input format. Expects .tab files, got .{}".format(FILE_PATH_IN)) 59 | 60 | # 61 | # Adjust the data 62 | # 63 | try: 64 | all_counts = pd.DataFrame() 65 | for counts, cell_name in files: 66 | all_counts.loc[:, cell_name] = counts[strand_options[args.stranded]].astype(int) 67 | except IOError: 68 | raise Exception("VSN ERROR: Concatenation failed.") 69 | 70 | all_counts.to_csv(f"{FILE_PATH_OUT_BASENAME}.tsv", header=True, index=True, sep='\t') 71 | -------------------------------------------------------------------------------- /src/utils/conf/base.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | container = 'vibsinglecellnf/utils:0.4.0' 4 | file_converter { 5 | off = 'h5ad' 6 | tagCellWithSampleId = true 7 | remove10xGEMWell = false 8 | useFilteredMatrix = true 9 | makeVarIndexUnique = false 10 | } 11 | publish { 12 | // pipelineOutputSuffix = '' 13 | compressionLevel = 6 14 | annotateWithBatchVariableName = false 15 | mode = 'copy' 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/utils/conf/cell_annotate.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | cell_annotate { 4 | off = 'h5ad' 5 | method = 'obo' // or 'aio' 6 | indexColumnName = '' 7 | // cellMetaDataFilePath = '' // Required in static mode and with 'aio' method 8 | // sampleSuffixWithExtension = '' // Required in static mode and with 'aio' method 9 | // sampleColumnName = '' // Required with 'aio' method 10 | // annotationColumnNames = [''] // Required with 'aio' method 11 | // publish = false 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/utils/conf/cell_filter.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | cell_filter { 4 | off = 'h5ad' 5 | method = 'internal' // or 'external' (requires the following additional params cellMetaDataFilePath, sampleColumnName, indexColumnName) 6 | filters = [ 7 | [ 8 | id: '', // Short identifier for the filter 9 | indexColumnName: '', 10 | filterColumnName: '', 11 | valuesToKeepFromFilterColumn: [''] 12 | // sampleColumnName: '', 13 | // cellMetaDataFilePath: '' 14 | ] 15 | ] 16 | // publish = false 17 | } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/utils/conf/h5ad_clean.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | file_cleaner { 4 | obsColumnMapper = [] 5 | obsColumnValueMapper = [] 6 | obsColumnsToRemove = [] 7 | } 8 | } 9 | } -------------------------------------------------------------------------------- /src/utils/conf/h5ad_concatenate.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | file_concatenator { 4 | join = 'outer' 5 | off = 'h5ad' 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/utils/conf/h5ad_extract_metadata.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | extract_feature_metadata { 4 | columnNames = [''] 5 | } 6 | } 7 | } -------------------------------------------------------------------------------- /src/utils/conf/h5ad_update_metadata.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | update_feature_metadata_index { 4 | indexColumnName = 'gene_symbol' 5 | joinKey = 'gene_ids' 6 | } 7 | } 8 | } -------------------------------------------------------------------------------- /src/utils/conf/sample_annotate.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | sample_annotate { 4 | off = 'h5ad' 5 | by { 6 | metadataFilePath = 'data/10x/1k_pbmc/metadata.tsv' 7 | method = 'sample' 8 | sampleColumnName = 'sample_id' 9 | compIndexColumnNames = [ 10 | "<adataIndexColumnName>" : "<metadataIndexColumnName>" 11 | ] 12 | annotationColumnNames = [] 13 | } 14 | } 15 | } 16 | } -------------------------------------------------------------------------------- /src/utils/conf/sample_annotate_old_v1.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | sample_annotate_v1 { 4 | iff = '10x_cellranger_mex' 5 | off = 'h5ad' 6 | type = 'sample' 7 | metadataFilePath = 'data/10x/1k_pbmc/metadata.tsv' 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /src/utils/conf/scope.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | scope { 4 | genome = '' 5 | tree { 6 | level_1 = '' 7 | level_2 = '' 8 | level_3 = '' 9 | } 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/utils/conf/sra_metadata.config: -------------------------------------------------------------------------------- 1 | params { 2 | global { 3 | outdir = 'out' 4 | } 5 | utils { 6 | container = 'vibsinglecellnf/utils:0.3.0' 7 | sra_metadata { 8 | mode = 'web' // or db 9 | // sraDb = '' 10 | // sraDbForceDownload = false 11 | // sraDbOutDir = '' 12 | } 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/utils/conf/sra_metadata.test.config: -------------------------------------------------------------------------------- 1 | params { 2 | global { 3 | outdir = 'out' 4 | } 5 | data { 6 | sra = [ 7 | [ 8 | id:'SRP125768', 9 | samples: [ 10 | "DGRP-551_.*d_r[0-9]+(?! )", 11 | "w1118_.*d_r[0-9]+(?! )" 12 | ] 13 | ] 14 | ] 15 | } 16 | utils { 17 | container = 'vibsinglecellnf/utils:0.3.0' 18 | sra_metadata { 19 | mode = 'web' // or db 20 | // sraDb = '' 21 | // sraDbForceDownload = false 22 | // sraDbOutDir = '' 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/utils/conf/sra_normalize_fastqs.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | sra_normalize_fastqs { 4 | // Downloading FASTQ from SRA will give FASTQ in the following format SRRXXXXXX_[1-9].fastq. This index minus one will be used to retrieve the FASTQ read suffix from the array of suffixes defined hereunder 5 | fastq_read_suffixes = ["R1","R2"] // ["R1","R2","I1","I2"] would be used for SRR11442498 (this requires params.tools.sratoolkit.includeTechnicalReads = true) 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/utils/conf/star_concatenate.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | star_concatenator { 4 | stranded = 'no' 5 | off = 'tsv' 6 | } 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/utils/conf/test.config: -------------------------------------------------------------------------------- 1 | params { 2 | tools { 3 | scanpy { 4 | container = 'vibsinglecellnf/scanpy:1.8.1' 5 | } 6 | } 7 | utils { 8 | file_converter { 9 | iff = '10x_cellranger_mex' 10 | off = 'h5ad' 11 | useFilteredMatrix = true 12 | } 13 | file_annotator { 14 | iff = '10x_cellranger_mex' 15 | off = 'h5ad' 16 | type = 'sample' 17 | metadataFilePath = '/ddn1/vol1/staging/leuven/stg_00002/lcb/lcb_projects/TWE/cellranger/metadata.tsv' 18 | } 19 | file_concatenator { 20 | join = 'outer' 21 | iff = '10x_cellranger_mex' 22 | off = 'h5ad' 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/utils/conf/update_feature_nomenclature.config: -------------------------------------------------------------------------------- 1 | includeConfig './h5adExtractMetadata.config' 2 | includeConfig './../../flybaser/flybaser.config' 3 | includeConfig './h5adUpdateMetadata.config' -------------------------------------------------------------------------------- /src/utils/conf/workflow_report.config: -------------------------------------------------------------------------------- 1 | params { 2 | utils { 3 | workflow_configuration { 4 | report_ipynb = "${params.misc.test.enabled ? '../../..' : ''}/src/utils/bin/reports/workflow_configuration_template.ipynb" 5 | } 6 | } 7 | } -------------------------------------------------------------------------------- /src/utils/processes/.ipynb_checkpoints/config-checkpoint.nf: -------------------------------------------------------------------------------- 1 | import java.nio.file.Paths 2 | import groovy.transform.Memoized 3 | import nextflow.script.ScriptBinding 4 | import nextflow.config.ConfigParser 5 | import static groovy.json.JsonOutput.* 6 | 7 | 8 | def updateParams(params, resolvedParams, setter) { 9 | resolvedParams.each { k, v -> 10 | if(setter == null) { 11 | if(v instanceof Map) { 12 | if(!params.containsKey(k)) 13 | params."${k}" = [:] 14 | updateParams(params, v, params."${k}") 15 | } else { 16 | params."${k}" = v 17 | } 18 | } else { 19 | if(!setter.containsKey(k)) 20 | setter."${k}" = [:] 21 | setter."${k}" = v instanceof Map ? updateParams(params, v, setter."${k}") : v 22 | } 23 | } 24 | } 25 | 26 | @Memoized 27 | def resolveParams(Map params, boolean verbose) { 28 | if(!params.containsKey("strategy")) 29 | return params 30 | if(params.strategy != "min") 31 | return params 32 | def isRootDir = workflow.projectDir.getParent().getName() == "vib-singlecell-nf" 33 | def config = new ConfigParser().setBinding([params: params]) 34 | def co = new ConfigObject() 35 | co.putAll(params) 36 | co.flatten().each { key, val -> 37 | if(key.endsWith("configVersion")) { 38 | // Extract the tool name based on the key 39 | def tool = key.split("\\.")[-2] 40 | // Build the path the versioned config of the current tool 41 | def toolBaseDir = isRootDir ? Paths.get(workflow.projectDir.toRealPath(), "src", tool) : workflow.projectDir.toRealPath() 42 | config = config.parse(Paths.get(toolBaseDir.toString(), "conf/min/base/${val}.config")) 43 | } 44 | } 45 | // Update the strategy since params has been resolved 46 | config.params.strategy = "max" 47 | updateParams(params, config.params, null) 48 | if(verbose) 49 | println(prettyPrint(toJson(params))) 50 | return params 51 | } 52 | 53 | def includeConfig(Map params, String configRelativeFilePath) { 54 | def repoFilePath = workflow.scriptFile.getParent() 55 | def isMainRepo = repoFilePath.getName() == "PUMATAC" 56 | def config = new ConfigParser().setBinding([params: params]) 57 | def co = new ConfigObject() 58 | def toolBaseDir = isMainRepo ? repoFilePath.toRealPath().toString() : repoFilePath.getParent().getParent().toRealPath().toString() 59 | config = config.parse(Paths.get(toolBaseDir, configRelativeFilePath)) 60 | updateParams(params, config.params, null) 61 | return params 62 | } 63 | -------------------------------------------------------------------------------- /src/utils/processes/config.nf: -------------------------------------------------------------------------------- 1 | import java.nio.file.Paths 2 | import groovy.transform.Memoized 3 | import nextflow.script.ScriptBinding 4 | import nextflow.config.ConfigParser 5 | import static groovy.json.JsonOutput.* 6 | 7 | 8 | def updateParams(params, resolvedParams, setter) { 9 | resolvedParams.each { k, v -> 10 | if(setter == null) { 11 | if(v instanceof Map) { 12 | if(!params.containsKey(k)) 13 | params."${k}" = [:] 14 | updateParams(params, v, params."${k}") 15 | } else { 16 | params."${k}" = v 17 | } 18 | } else { 19 | if(!setter.containsKey(k)) 20 | setter."${k}" = [:] 21 | setter."${k}" = v instanceof Map ? updateParams(params, v, setter."${k}") : v 22 | } 23 | } 24 | } 25 | 26 | @Memoized 27 | def resolveParams(Map params, boolean verbose) { 28 | if(!params.containsKey("strategy")) 29 | return params 30 | if(params.strategy != "min") 31 | return params 32 | def isRootDir = workflow.projectDir.getParent().getName() == "vib-singlecell-nf" 33 | def config = new ConfigParser().setBinding([params: params]) 34 | def co = new ConfigObject() 35 | co.putAll(params) 36 | co.flatten().each { key, val -> 37 | if(key.endsWith("configVersion")) { 38 | // Extract the tool name based on the key 39 | def tool = key.split("\\.")[-2] 40 | // Build the path the versioned config of the current tool 41 | def toolBaseDir = isRootDir ? Paths.get(workflow.projectDir.toRealPath(), "src", tool) : workflow.projectDir.toRealPath() 42 | config = config.parse(Paths.get(toolBaseDir.toString(), "conf/min/base/${val}.config")) 43 | } 44 | } 45 | // Update the strategy since params has been resolved 46 | config.params.strategy = "max" 47 | updateParams(params, config.params, null) 48 | if(verbose) 49 | println(prettyPrint(toJson(params))) 50 | return params 51 | } 52 | 53 | def includeConfig(Map params, String configRelativeFilePath) { 54 | def repoFilePath = workflow.scriptFile.getParent() 55 | def isMainRepo = repoFilePath.getName() == "PUMATAC" 56 | def config = new ConfigParser().setBinding([params: params]) 57 | def co = new ConfigObject() 58 | def toolBaseDir = isMainRepo ? repoFilePath.toRealPath().toString() : repoFilePath.getParent().getParent().toRealPath().toString() 59 | config = config.parse(Paths.get(toolBaseDir, configRelativeFilePath)) 60 | updateParams(params, config.params, null) 61 | return params 62 | } 63 | -------------------------------------------------------------------------------- /src/utils/processes/files.nf: -------------------------------------------------------------------------------- 1 | 2 | def getBaseName(file, suffix) { 3 | // Default value suffix = "SC" does not work! Weird... 4 | res = (file.getName() =~ /(.+)\.${suffix}(.+)\.(.+)/) 5 | if(res.size() == 0) { 6 | throw new Exception("VSN ERROR: Cannot get base name.") 7 | } 8 | (full, filename, process, ext) = res[0] 9 | return filename 10 | } 11 | 12 | def extractSample(path, suffix, groups) { 13 | // Extract the sample name based on the given path and on the given suffix 14 | def _suffix = suffix instanceof String ? [suffix] : suffix 15 | _suffix = _suffix.collect { it.replace(".","\\.") } 16 | for(int i = 0; i<_suffix.size(); i++) { 17 | def sufx = _suffix[i] 18 | 19 | def pattern = /(.+)\/(.+)${sufx}/ 20 | def res = (path =~ pattern) 21 | if(res.size() == 0) continue 22 | if(res.size() == 1) { 23 | def (full, parentDir, id) = res[0] 24 | if(groups != null) { 25 | return new Tuple(id, groups[i]) 26 | } else { 27 | return new Tuple(id, 'NULL') 28 | } 29 | } 30 | } 31 | throw new Exception("VSN ERROR: the suffix params couldn't match any of the file paths. Make sure the suffix param exist in the file paths.") 32 | } 33 | -------------------------------------------------------------------------------- /src/utils/processes/gtf.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | process FORMAT_GTF { 4 | 5 | publishDir "${params.global.outdir}/00.refdata", mode: 'symlink' 6 | label 'compute_resources__default' 7 | 8 | input: 9 | file(annotation) 10 | 11 | output: 12 | file "*.formatted.gtf" 13 | 14 | script: 15 | """ 16 | sed -r 's/(.*); transcript_id (.*); (.*); gene_name (.*); \$/\\1; transcript_id \\2; \\3; gene_name \\4; transcript_name \\2;/' \ 17 | ${annotation} \ 18 | > ${annotation.baseName}.formatted.gtf 19 | """ 20 | 21 | } 22 | 23 | process FORMAT_GTF_IGENOMES { 24 | 25 | publishDir "${params.global.outdir}/00.refdata", mode: 'symlink' 26 | label 'compute_resources__default' 27 | 28 | input: 29 | file(annotation) 30 | 31 | output: 32 | file "*.formatted.gtf" 33 | 34 | script: 35 | """ 36 | sed -r 's/(.*); gene_name (.*); transcript_id (.*); (.*);\$/\\1; gene_name \\2; transcript_id \\3; \\4; transcript_name \\3;/' \ 37 | ${annotation} \ 38 | > ${annotation.baseName}.formatted.gtf 39 | """ 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/utils/processes/h5adExtractMetadata.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | import java.nio.file.Paths 4 | 5 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin") 6 | 7 | 8 | process SC__UTILS__EXTRACT_FEATURE_METADATA { 9 | 10 | container params.tools.scanpy.container 11 | publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true 12 | label 'compute_resources__default' 13 | 14 | input: 15 | tuple val(sampleId), path(f) 16 | 17 | output: 18 | tuple val(sampleId), path("${sampleId}.SC__UTILS__EXTRACT_FEATURE_METADATA.tsv") 19 | 20 | script: 21 | def sampleParams = params.parseConfig(sampleId, params.global, params.utils.extract_feature_metadata) 22 | processParams = sampleParams.local 23 | columnNamesAsArguments = processParams.columnNames.collect({ '--column-name' + ' ' + it }).join(' ') 24 | """ 25 | ${binDir}/sc_h5ad_extract_metadata.py \ 26 | --axis feature \ 27 | ${columnNamesAsArguments} \ 28 | $f \ 29 | "${sampleId}.SC__UTILS__EXTRACT_FEATURE_METADATA.tsv" 30 | """ 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/utils/processes/h5adMerge.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | import java.nio.file.Paths 4 | 5 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin") 6 | 7 | 8 | process SC__H5AD_MERGE { 9 | 10 | container params.tools.scanpy.container 11 | publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true 12 | label 'compute_resources__mem' 13 | 14 | input: 15 | // Expects: 16 | // - data to be multiple h5ad files containing the final results to be merged 17 | tuple \ 18 | val(sampleId), \ 19 | path(data) 20 | 21 | output: 22 | tuple \ 23 | val(sampleId), \ 24 | path("${sampleId}.SC__H5AD_MERGE.h5ad") 25 | 26 | script: 27 | """ 28 | ${binDir}/sc_h5ad_merge.py \ 29 | * \ 30 | "${sampleId}.SC__H5AD_MERGE.h5ad" 31 | """ 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/utils/processes/h5adToLoom.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | import java.nio.file.Paths 4 | 5 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin") 6 | 7 | 8 | process SC__H5AD_TO_LOOM { 9 | 10 | container params.tools.scanpy.container 11 | publishDir "${params.global.outdir}/loom", mode: 'link', overwrite: true, saveAs: { filename -> "${sampleId}.SCope_output.loom" } 12 | label 'compute_resources__mem' 13 | 14 | input: 15 | // Expects: 16 | // - rawFilteredData to be h5ad file containing the raw filtered (gene + cell filtered) data 17 | // - data to be one or more h5ad files containing the final results to be stored in the loom 18 | tuple \ 19 | val(sampleId), \ 20 | path(rawFilteredData), \ 21 | path(data) 22 | 23 | output: 24 | tuple \ 25 | val(sampleId), \ 26 | path("${sampleId}.SC__H5AD_TO_LOOM.loom") 27 | 28 | script: 29 | """ 30 | ${binDir}/h5ad_to_loom.py \ 31 | ${params.utils?.scope.genome.length() > 0 ? '--nomenclature "' + params.utils?.scope.genome + '"' : ''} \ 32 | ${params.utils?.scope.tree.level_1.length() > 0 ? '--scope-tree-level-1 "' + params.utils.scope.tree.level_1 + '"' : ''} \ 33 | ${params.utils?.scope.tree.level_2.length() > 0 ? '--scope-tree-level-2 "' + params.utils.scope.tree.level_2 + '"' : ''} \ 34 | ${params.utils?.scope.tree.level_3.length() > 0 ? '--scope-tree-level-3 "' + params.utils.scope.tree.level_3 + '"' : ''} \ 35 | ${params.utils?.scope?.markers?.log_fc_threshold ? '--markers-log-fc-threshold ' + params.utils.scope.markers.log_fc_threshold : ''} \ 36 | ${params.utils?.scope?.markers?.fdr_threshold ? '--markers-fdr-threshold ' + params.utils.scope.markers.fdr_threshold : ''} \ 37 | $data \ 38 | $rawFilteredData \ 39 | "${sampleId}.SC__H5AD_TO_LOOM.loom" 40 | """ 41 | 42 | } 43 | 44 | process SC__H5AD_TO_FILTERED_LOOM { 45 | 46 | container params.tools.scanpy.container 47 | publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true 48 | label 'compute_resources__mem' 49 | 50 | input: 51 | tuple val(sampleId), path(f) 52 | 53 | output: 54 | tuple val(sampleId), path("${sampleId}.filtered.loom") 55 | 56 | script: 57 | """ 58 | ${binDir}/h5ad_to_filtered_loom.py \ 59 | $f \ 60 | "${sampleId}.filtered.loom" 61 | """ 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/utils/processes/h5adUpdate.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | import java.nio.file.Paths 4 | import static groovy.json.JsonOutput.* 5 | 6 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin") 7 | 8 | 9 | process SC__H5AD_UPDATE_X_PCA { 10 | 11 | container params.tools.scanpy.container 12 | label 'compute_resources__mem' 13 | 14 | input: 15 | tuple \ 16 | val(sampleId), \ 17 | path(data), \ 18 | path(xPca) 19 | 20 | output: 21 | tuple \ 22 | val(sampleId), \ 23 | path("${sampleId}.SC__H5AD_UPDATE_X_PCA.h5ad") 24 | 25 | script: 26 | """ 27 | ${binDir}/sc_h5ad_update.py \ 28 | --x-pca ${xPca} \ 29 | $data \ 30 | "${sampleId}.SC__H5AD_UPDATE_X_PCA.h5ad" 31 | """ 32 | 33 | } 34 | 35 | process SC__H5AD_CLEAN { 36 | 37 | container params.tools.scanpy.container 38 | label 'compute_resources__mem' 39 | 40 | input: 41 | tuple \ 42 | val(sampleId), \ 43 | path(data), \ 44 | val(stashedParams) 45 | 46 | output: 47 | tuple \ 48 | val(sampleId), \ 49 | path("${sampleId}.SC__H5AD_CLEAN.h5ad"), \ 50 | val(stashedParams) 51 | 52 | script: 53 | """ 54 | ${binDir}/sc_h5ad_update.py \ 55 | --empty-x \ 56 | $data \ 57 | "${sampleId}.SC__H5AD_CLEAN.h5ad" 58 | """ 59 | 60 | } 61 | 62 | process SC__H5AD_BEAUTIFY { 63 | 64 | container params.tools.scanpy.container 65 | publishDir "${params.global.outdir}/data/intermediate", mode: 'symlink', overwrite: true 66 | label 'compute_resources__mem' 67 | 68 | input: 69 | tuple \ 70 | val(sampleId), \ 71 | path(data), \ 72 | val(stashedParams) 73 | 74 | output: 75 | tuple \ 76 | val(sampleId), \ 77 | path("${sampleId}.SC__H5AD_BEAUTIFY.h5ad"), \ 78 | val(stashedParams) 79 | 80 | script: 81 | def sampleParams = params.parseConfig(sampleId, params.global, params.utils.file_cleaner) 82 | processParams = sampleParams.local 83 | 84 | obsColumnsToRemoveAsArgument = processParams.containsKey("obsColumnsToRemove") ? 85 | processParams.obsColumnsToRemove.collect({ '--obs-column-to-remove' + ' ' + it }).join(' ') : 86 | '' 87 | """ 88 | ${binDir}/sc_h5ad_update.py \ 89 | ${obsColumnsToRemoveAsArgument} \ 90 | ${processParams.containsKey("obsColumnMapper") ? "--obs-column-mapper '" + toJson(processParams.obsColumnMapper) + "'": ''} \ 91 | ${processParams.containsKey("obsColumnValueMapper") ? "--obs-column-value-mapper '" + toJson(processParams.obsColumnValueMapper) + "'": ''} \ 92 | $data \ 93 | "${sampleId}.SC__H5AD_BEAUTIFY.h5ad" 94 | """ 95 | 96 | } 97 | 98 | -------------------------------------------------------------------------------- /src/utils/processes/h5adUpdateMetadata.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | import java.nio.file.Paths 4 | 5 | binDir = !params.containsKey("test") ? "${workflow.projectDir}/src/utils/bin" : Paths.get(workflow.scriptFile.getParent().getParent().toString(), "utils/bin") 6 | 7 | 8 | process SC__UTILS__UPDATE_FEATURE_METADATA_INDEX { 9 | 10 | container params.tools.scanpy.container 11 | publishDir "${params.global.outdir}/data/intermediate", mode: 'link', overwrite: true 12 | label 'compute_resources__default' 13 | 14 | input: 15 | tuple val(sampleId), path(f), path(additionalMetadata) 16 | 17 | output: 18 | tuple val(sampleId), path("${sampleId}.SC__UTILS__UPDATE_FEATURE_METADATA_INDEX.h5ad") 19 | 20 | script: 21 | def sampleParams = params.parseConfig(sampleId, params.global, params.utils.update_feature_metadata_index) 22 | processParams = sampleParams.local 23 | """ 24 | ${binDir}/sc_h5ad_update_metadata.py \ 25 | --additional-metadata ${additionalMetadata} \ 26 | --axis feature \ 27 | --index-column-name ${processParams.indexColumnName} \ 28 | --join-key ${processParams.joinKey} \ 29 | $f \ 30 | "${sampleId}.SC__UTILS__UPDATE_FEATURE_METADATA_INDEX.h5ad" 31 | """ 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/utils/processes/reports.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | import static groovy.json.JsonOutput.* 4 | 5 | process UTILS__GENERATE_WORKFLOW_CONFIG_REPORT { 6 | 7 | container params.utils.container 8 | publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true 9 | label 'compute_resources__report' 10 | 11 | input: 12 | path(ipynb) 13 | 14 | output: 15 | path("workflow_configuration_report.ipynb") 16 | 17 | script: 18 | """ 19 | papermill ${ipynb} \ 20 | workflow_configuration_report.ipynb \ 21 | -p WORKFLOW_MANIFEST '${params.misc.manifestAsJSON}' \ 22 | -p WORKFLOW_PARAMETERS '${params.misc.paramsAsJSON}' 23 | """ 24 | 25 | } 26 | 27 | process UTILS__REPORT_TO_HTML { 28 | 29 | container params.utils.container 30 | publishDir "${params.global.outdir}/notebooks/intermediate", mode: 'link', overwrite: true 31 | // copy final "merged_report" to notbooks root: 32 | publishDir "${params.global.outdir}/notebooks", pattern: '*merged_report*', mode: 'link', overwrite: true 33 | label 'compute_resources__report' 34 | 35 | input: 36 | tuple \ 37 | val(sampleId), \ 38 | path(ipynb) 39 | 40 | output: 41 | file("*.html") 42 | 43 | script: 44 | """ 45 | jupyter nbconvert ${ipynb} --to html 46 | """ 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/utils/utils.config: -------------------------------------------------------------------------------- 1 | includeConfig 'conf/base.config' -------------------------------------------------------------------------------- /src/utils/workflows/annotateByCellMetadata.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | ////////////////////////////////////////////////////// 4 | // Process imports: 5 | include { 6 | isParamNull; 7 | getToolParams; 8 | } from './../processes/utils.nf' params(params) 9 | include { 10 | getChannel; 11 | } from './../../channels/file' params(params) 12 | include { 13 | SC__ANNOTATE_BY_CELL_METADATA; 14 | } from './../processes/h5adAnnotate.nf' params(params) 15 | 16 | ////////////////////////////////////////////////////// 17 | // Define the workflow 18 | 19 | workflow ANNOTATE_BY_CELL_METADATA { 20 | 21 | take: 22 | // Expects (sampleId, h5ad) : Channel 23 | data 24 | // Expects (sampleId, tsv) : (Channel || null) 25 | metadata 26 | // Describes: name of tool 27 | // Expects tool: (string || null) 28 | // Values 29 | // - tool != null: 30 | // - The given tool is performing itself a cell-based annotation 31 | // - params.tools[tool] should exist 32 | // - tool == null: 33 | // - params.utils.cell_annotate should exist 34 | tool 35 | 36 | main: 37 | def workflowParams = isParamNull(tool) ? 38 | params.utils.cell_annotate : 39 | getToolParams(params.tools, tool)["cell_annotate"] 40 | def method = workflowParams.method 41 | if(method == 'aio') { 42 | out = SC__ANNOTATE_BY_CELL_METADATA( 43 | data.map { 44 | it -> tuple(it[0], it[1], file(workflowParams.cellMetaDataFilePath)) 45 | }, 46 | isParamNull(tool) ? 'NULL' : tool 47 | ) 48 | } else if(method == 'obo') { 49 | if(metadata == null) { 50 | metadata = getChannel( 51 | workflowParams.cellMetaDataFilePath, 52 | workflowParams.sampleSuffixWithExtension, 53 | 'NULL' 54 | ) 55 | } 56 | out = SC__ANNOTATE_BY_CELL_METADATA( 57 | data.join(metadata), 58 | isParamNull(tool) ? 'NULL' : tool 59 | ) 60 | } else { 61 | throw new Exception("The given method '" + method + "' is not valid for cell_annotate.") 62 | } 63 | 64 | emit: 65 | out 66 | 67 | } 68 | 69 | workflow ANNOTATE_BY_CELL_METADATA_BY_PAIR { 70 | take: 71 | one 72 | two 73 | tool 74 | main: 75 | ANNOTATE_BY_CELL_METADATA( 76 | one.map { 77 | it -> tuple(it[0], it[1]) 78 | }, 79 | two.map { 80 | it -> tuple(it[0], it[1]) 81 | }, 82 | tool 83 | ) 84 | emit: 85 | ANNOTATE_BY_CELL_METADATA.out 86 | } 87 | 88 | workflow STATIC__ANNOTATE_BY_CELL_METADATA { 89 | 90 | take: 91 | // Expects (sampleId, h5ad) 92 | data 93 | // Expects name of tool ([string] || null) 94 | tool 95 | 96 | main: 97 | out = ANNOTATE_BY_CELL_METADATA( 98 | data, 99 | null, 100 | tool 101 | ) 102 | 103 | emit: 104 | out 105 | 106 | } 107 | 108 | -------------------------------------------------------------------------------- /src/utils/workflows/downloadFromSRA.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | import java.nio.file.Files 4 | import java.nio.file.Paths 5 | 6 | ////////////////////////////////////////////////////// 7 | // process imports: 8 | 9 | include { 10 | SRATOOLKIT__DOWNLOAD_FASTQS; 11 | } from './../../sratoolkit/workflows/downloadFastQ' params(params) 12 | include { 13 | GET_SRA_DB; 14 | } from './../processes/sra' params(params) 15 | include { 16 | SRA_TO_METADATA; 17 | } from './../processes/sra' params(params) 18 | include { 19 | NORMALIZE_SRA_FASTQS; 20 | } from './../processes/sra' params(params) 21 | 22 | ////////////////////////////////////////////////////// 23 | // Define the workflow 24 | 25 | // dataParams = params.data.sra 26 | utilsParams = params.utils 27 | 28 | if(!utilsParams.containsKey("sra_metadata")) 29 | throw new Exception("DOWNLOAD_FROM_SRA workflow requires sra_metadata.config") 30 | 31 | workflowParams = params.utils.sra_metadata 32 | 33 | workflow DOWNLOAD_FROM_SRA { 34 | 35 | take: 36 | // Expects (sraProjectId, sampleFilters) 37 | sra 38 | 39 | main: 40 | if(workflowParams.mode == 'db') { 41 | sraDbFile = workflowParams.sraDb != '' ? file(workflowParams.sraDb): file(workflowParams.sraDbOutDir + "/SRAmetadb.sqlite") 42 | if(sraDbFile.exists() 43 | && sraDbFile.canRead() 44 | && !workflowParams.sraDbForceDownload) { 45 | println("Local SRA database detected ${sraDbFile}!") 46 | db = sraDbFile 47 | } else { 48 | if(workflowParams.sraDbForceDownload 49 | || workflowParams.sraDb == '') { 50 | println("Downloading SRA database to ${sraDbFile}...") 51 | db = GET_SRA_DB() 52 | println("Done!") 53 | } 54 | } 55 | } else if(workflowParams.mode == 'web') { 56 | db = file('NO_FILE') 57 | } else { 58 | throw new Exception("The "+ workflowParams.mode +" mode does not exist. Choose one of: web, db.") 59 | } 60 | // Get metadata for the given SRA Project ID and keep only the samples that passes the given sampleFilters 61 | metadata = SRA_TO_METADATA( 62 | sra, 63 | db 64 | ).splitCsv( 65 | header:true, 66 | sep: '\t' 67 | ).map { 68 | // Remove ending characters (])), all special characters ([]()), /) by underscores 69 | row -> tuple( 70 | row.run_accession, \ 71 | row.sample_name.replaceAll("[\\])]\$","").replaceAll("[\\]\\[)(), /\\.]","_") 72 | ) 73 | } 74 | if(!params.containsKey('quiet')) metadata.view() 75 | // Download and compress all the SRA runs defined in the metadata 76 | data = SRATOOLKIT__DOWNLOAD_FASTQS( 77 | metadata 78 | ).join( 79 | metadata 80 | ).map { 81 | // Put sample as primary key 82 | run -> tuple(run[2], run[1]) 83 | } 84 | out = NORMALIZE_SRA_FASTQS( data ) 85 | 86 | emit: 87 | out 88 | 89 | } 90 | 91 | // workflow test { 92 | // Channel 93 | // .fromFilePairs('work/**/SRR*_{1,2}.fastq.gz') 94 | // } 95 | -------------------------------------------------------------------------------- /src/utils/workflows/fileConverter.nf: -------------------------------------------------------------------------------- 1 | import nextflow.util.ArrayBag 2 | 3 | nextflow.enable.dsl=2 4 | 5 | ////////////////////////////////////////////////////// 6 | // process imports: 7 | 8 | include { 9 | SC__H5AD_TO_LOOM; 10 | } from './../processes/h5adToLoom.nf' params(params) 11 | include { 12 | SC__H5AD_MERGE 13 | } from "./../processes/h5adMerge.nf" params(params) 14 | include { 15 | isParamNull; 16 | PUBLISH; 17 | } from "./utils.nf" params(params) 18 | 19 | ////////////////////////////////////////////////////// 20 | // Define the workflow 21 | 22 | inputFormatsAllowed = ['h5ad'] 23 | outputFormatsAllowed = ['loom', 'h5ad'] 24 | 25 | workflow FILE_CONVERTER { 26 | 27 | take: 28 | // Expects (sampleId, data[]) 29 | data 30 | // Expects outputSuffix: string 31 | outputSuffix 32 | // Expects outputFormat: string 33 | outputFormat 34 | // Expects (sampleId, rawFilteredData) 35 | rawFilteredData 36 | 37 | main: 38 | out = Channel.empty() 39 | 40 | if(outputFormat == "mergeToSCopeLoom") { 41 | if(isParamNull(rawFilteredData)) { 42 | throw new Exception("VSN ERROR: Expecting rawFilteredData not to be null when outputFormat is "+ outputFormat) 43 | } 44 | out = SC__H5AD_TO_LOOM( 45 | rawFilteredData.combine( 46 | data.map { 47 | it -> tuple(it[0], it[1]) 48 | }, 49 | by: 0 50 | ) 51 | ) 52 | } else if(outputFormat == "mergeToScanpyH5ad") { 53 | out = SC__H5AD_MERGE( 54 | data.map { 55 | it -> tuple(it[0], it[1]) 56 | } 57 | ) 58 | } else { 59 | throw new Exception("VSN ERROR: Output format "+ outputFormat +"not supported") 60 | } 61 | 62 | emit: 63 | out 64 | 65 | } 66 | 67 | -------------------------------------------------------------------------------- /src/utils/workflows/filterAnnotateClean.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | ////////////////////////////////////////////////////// 4 | // Process imports: 5 | include { 6 | UPDATE_FEATURE_NOMENCLATURE 7 | } from './updateFeatureNomenclature.nf' params(params) 8 | include { 9 | FILTER_BY_CELL_METADATA 10 | } from './filterByCellMetadata.nf' params(params) 11 | include { 12 | STATIC__ANNOTATE_BY_CELL_METADATA 13 | } from './annotateByCellMetadata.nf' params(params) 14 | include { 15 | hasMetadataFilePath; 16 | SC__ANNOTATE_BY_SAMPLE_METADATA 17 | } from '../processes/h5adAnnotate.nf' params(params) 18 | include { 19 | SC__H5AD_BEAUTIFY; 20 | } from '../processes/h5adUpdate.nf' params(params) 21 | 22 | ////////////////////////////////////////////////////// 23 | // Define the workflow 24 | 25 | workflow FILTER_AND_ANNOTATE_AND_CLEAN { 26 | 27 | take: 28 | // Expects (sampleId, h5ad) : Channel 29 | data 30 | 31 | main: 32 | out = data 33 | if(params.utils?.update_feature_metadata_index) { 34 | out = UPDATE_FEATURE_NOMENCLATURE( data ) 35 | } 36 | // Filter cells based on an indexed cell-based metadata table 37 | if(params.utils?.cell_filter) { 38 | out = FILTER_BY_CELL_METADATA( out, 'NULL' ) 39 | } 40 | // Annotate cells based on an indexed cell-based metadata table 41 | if(params.utils?.cell_annotate) { 42 | out = STATIC__ANNOTATE_BY_CELL_METADATA( 43 | out, 44 | null 45 | ) 46 | } 47 | // Annotate cells based on an indexed sample-based metadata table 48 | if(params.utils?.sample_annotate) { 49 | if (!hasMetadataFilePath(params.utils.sample_annotate)) { 50 | throw new Exception("The metadataFilePath param is missing in sample_annotate.") 51 | } 52 | out = SC__ANNOTATE_BY_SAMPLE_METADATA( out ) 53 | } 54 | // Clean 55 | // e.g.: 56 | // - h5ad: rename adata.obs values, remove adata.obs columns 57 | if(params.utils?.file_cleaner) { 58 | out = SC__H5AD_BEAUTIFY( out ) 59 | } 60 | 61 | emit: 62 | out 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/utils/workflows/filterByCellMetadata.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | ////////////////////////////////////////////////////// 4 | // Process imports: 5 | include { 6 | isParamNull; 7 | getToolParams; 8 | } from './../processes/utils.nf' params(params) 9 | include { 10 | SC__PREPARE_OBS_FILTER; 11 | } from './../processes/h5adSubset' params(params) 12 | include { 13 | SC__APPLY_OBS_FILTER; 14 | } from './../processes/h5adSubset' params(params) 15 | 16 | ////////////////////////////////////////////////////// 17 | // Define the workflow 18 | 19 | workflow FILTER_BY_CELL_METADATA { 20 | 21 | take: 22 | // Expects (sampleId, h5ad) : Channel 23 | data 24 | // Describes: name of tool 25 | // Expects tool: (string || null) 26 | // Values 27 | // - tool != null: 28 | // - The given tool is performing itself a cell-based filtering 29 | // - params.tools[tool] should exist 30 | // - tool == null: 31 | // - params.utils.cell_filter should exist 32 | tool 33 | 34 | main: 35 | def workflowParams = isParamNull(tool) ? 36 | params.utils.cell_filter : 37 | getToolParams(params.tools, tool)["cell_filter"] 38 | Channel 39 | .from(workflowParams.filters) 40 | .set{ filters } 41 | SC__PREPARE_OBS_FILTER( 42 | data.combine(filters), 43 | isParamNull(tool) ? 'NULL' : tool 44 | ) 45 | out = SC__APPLY_OBS_FILTER( 46 | SC__PREPARE_OBS_FILTER.out.groupTuple(), 47 | isParamNull(tool) ? 'NULL' : tool 48 | ) 49 | 50 | emit: 51 | out 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/utils/workflows/finalize.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | include { 4 | SC__H5AD_TO_FILTERED_LOOM 5 | } from './../processes/h5adToLoom.nf' params(params) 6 | include { 7 | FILE_CONVERTER as FILE_CONVERTER_TO_SCOPE; 8 | FILE_CONVERTER as FILE_CONVERTER_TO_SCANPY; 9 | } from "./fileConverter" 10 | 11 | // Convert to 12 | // - SCope-ready 13 | // - Scanpy-ready files 14 | workflow FINALIZE { 15 | 16 | take: 17 | rawFilteredData 18 | finalProcessedData 19 | fileOutputSuffix 20 | 21 | main: 22 | // Conversion 23 | // Convert h5ad to X (here we choose: loom format) 24 | filteredloom = SC__H5AD_TO_FILTERED_LOOM( rawFilteredData ) 25 | FILE_CONVERTER_TO_SCOPE( 26 | finalProcessedData.groupTuple(), 27 | fileOutputSuffix, 28 | 'mergeToSCopeLoom', 29 | rawFilteredData 30 | ) 31 | FILE_CONVERTER_TO_SCANPY( 32 | finalProcessedData.groupTuple(), 33 | fileOutputSuffix, 34 | 'mergeToScanpyH5ad', 35 | rawFilteredData 36 | ) 37 | 38 | emit: 39 | filteredloom 40 | scopeloom = FILE_CONVERTER_TO_SCOPE.out 41 | scanpyh5ad = FILE_CONVERTER_TO_SCANPY.out 42 | 43 | } -------------------------------------------------------------------------------- /src/utils/workflows/updateFeatureNomenclature.nf: -------------------------------------------------------------------------------- 1 | /* 2 | * Conversion workflow 3 | * Source: 4 | * 5 | */ 6 | 7 | nextflow.enable.dsl=2 8 | 9 | ////////////////////////////////////////////////////// 10 | // process imports: 11 | 12 | // Imports 13 | include { 14 | SC__UTILS__EXTRACT_FEATURE_METADATA; 15 | } from './../processes/h5adExtractMetadata' params(params) 16 | include { 17 | FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL; 18 | } from './../../flybaser/processes/convertNomenclature' params(params) 19 | include { 20 | SC__UTILS__UPDATE_FEATURE_METADATA_INDEX; 21 | } from './../processes/h5adUpdateMetadata' params(params) 22 | 23 | ////////////////////////////////////////////////////// 24 | // Define the workflow 25 | 26 | workflow UPDATE_FEATURE_NOMENCLATURE { 27 | 28 | take: 29 | // Expects (sampleId, data) 30 | data 31 | 32 | main: 33 | SC__UTILS__EXTRACT_FEATURE_METADATA( data ) 34 | FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL( SC__UTILS__EXTRACT_FEATURE_METADATA.out ) 35 | out = SC__UTILS__UPDATE_FEATURE_METADATA_INDEX( data.join(FLYBASER__CONVERT_FBGN_TO_GENE_SYMBOL.out) ) 36 | 37 | emit: 38 | out 39 | 40 | } 41 | -------------------------------------------------------------------------------- /workflows/single_sample.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | // Utils 4 | include { 5 | clean; 6 | SC__FILE_CONVERTER; 7 | } from '../src/utils/processes/utils.nf' params(params) 8 | 9 | // Pipeline 10 | include { 11 | SINGLE_SAMPLE as SCANPY__SINGLE_SAMPLE; 12 | } from '../src/scanpy/workflows/single_sample.nf' params(params) 13 | include { 14 | SC__SCANPY__CLUSTERING_PARAMS; 15 | } from '../src/scanpy/processes/cluster.nf' params(params) 16 | include { 17 | SC__DIRECTS__SELECT_DEFAULT_CLUSTERING 18 | } from '../src/directs/processes/selectDefaultClustering.nf' 19 | 20 | workflow single_sample { 21 | 22 | take: 23 | data 24 | 25 | main: 26 | /******************************************* 27 | * Run the pipeline 28 | */ 29 | SC__FILE_CONVERTER( data ) 30 | SCANPY__SINGLE_SAMPLE( SC__FILE_CONVERTER.out ) 31 | 32 | // Define the parameters for clustering 33 | def clusteringParams = SC__SCANPY__CLUSTERING_PARAMS( clean(params.tools.scanpy.clustering) ) 34 | 35 | // Select a default clustering when in parameter exploration mode 36 | if(params.tools?.directs && clusteringParams.isParameterExplorationModeOn()) { 37 | scopeloom = SC__DIRECTS__SELECT_DEFAULT_CLUSTERING( 38 | SCANPY__SINGLE_SAMPLE.out.final_processed_scope_loom 39 | ) 40 | } else { 41 | scopeloom = SCANPY__SINGLE_SAMPLE.out.final_processed_scope_loom 42 | } 43 | 44 | emit: 45 | filteredloom = SCANPY__SINGLE_SAMPLE.out.filtered_loom 46 | scanpyh5ad = SCANPY__SINGLE_SAMPLE.out.final_processed_scanpy_h5ad 47 | scopeloom = scopeloom 48 | 49 | } 50 | -------------------------------------------------------------------------------- /workflows/star.nf: -------------------------------------------------------------------------------- 1 | nextflow.enable.dsl=2 2 | 3 | ////////////////////////////////////////////////////// 4 | // Define the parameters for current testing proces 5 | 6 | include { 7 | SC__STAR__LOAD_GENOME; 8 | } from '../src/star/processes/load_genome' params(params) 9 | include { 10 | SC__STAR__MAP_COUNT; 11 | } from '../src/star/processes/map_count' params(params) 12 | include { 13 | SC__STAR__UNLOAD_GENOME; 14 | } from '../src/star/processes/unload_genome' params(params) 15 | include { 16 | SC__STAR_CONCATENATOR; 17 | } from '../src/utils/processes/utils.nf' params(params) 18 | 19 | include { 20 | getChannel; 21 | } as getSingleEndChannel from '../src/channels/singleend.nf' params(params) 22 | 23 | ////////////////////////////////////////////////////// 24 | // Define the workflow 25 | 26 | /* 27 | * Run the workflow for each 10xGenomics CellRanger output folders specified. 28 | */ 29 | workflow star { 30 | 31 | main: 32 | SC__STAR__LOAD_GENOME( file(params.tools.star.map_count.index) ) 33 | SC__STAR__MAP_COUNT( 34 | file(params.tools.star.map_count.index), 35 | SC__STAR__LOAD_GENOME.out, 36 | getSingleEndChannel(params.tools.star.map_count.fastqs) 37 | ) 38 | SC__STAR__UNLOAD_GENOME( 39 | file(params.tools.star.map_count.index), 40 | SC__STAR__MAP_COUNT.out.isDone.collect() 41 | ) 42 | SC__STAR_CONCATENATOR( SC__STAR__MAP_COUNT.out.counts.map { it[1] }.collect() ) 43 | 44 | emit: 45 | SC__STAR_CONCATENATOR.out 46 | 47 | } 48 | --------------------------------------------------------------------------------