├── .devcontainer └── devcontainer.json ├── .editorconfig ├── .gitattributes ├── .github ├── .dockstore.yml ├── CONTRIBUTING.md ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── config.yml │ └── feature_request.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── awsfulltest.yml │ ├── awstest.yml │ ├── branch.yml │ ├── ci.yml │ ├── clean-up.yml │ ├── fix-linting.yml │ ├── linting.yml │ └── linting_comment.yml ├── .gitignore ├── .gitpod.yml ├── .nf-core.yml ├── .pre-commit-config.yaml ├── .prettierignore ├── .prettierrc.yml ├── CHANGELOG.md ├── CITATIONS.md ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── assets ├── adaptivecard.json ├── email_template.html ├── email_template.txt ├── methods_description_template.yml ├── multiqc_config.yml ├── nf-core-hic_logo_light.png ├── samplesheet.csv ├── schema_input.json ├── sendmail_template.txt └── slackreport.json ├── bin ├── build_matrix ├── check_samplesheet.py ├── cutsite_trimming ├── digest_genome.py ├── hicpro_merge_validpairs.sh ├── mapped_2hic_dnase.py ├── mapped_2hic_fragments.py ├── mergeSAM.py ├── merge_statfiles.py └── src │ ├── build_matrix.cpp │ └── cutsite_trimming.cpp ├── conf ├── base.config ├── igenomes.config ├── modules.config ├── public_aws_ecr.config ├── test.config └── test_full.config ├── docs ├── README.md ├── images │ ├── mqc_fastqc_adapter.png │ ├── mqc_fastqc_counts.png │ ├── mqc_fastqc_quality.png │ ├── nf-core-hic_logo_dark.png │ ├── nf-core-hic_logo_light.png │ ├── nfcore-hic_logo.png │ └── nfcore-hic_logo.svg ├── output.md └── usage.md ├── environment.yml ├── lib ├── NfcoreSchema.groovy ├── NfcoreTemplate.groovy ├── Utils.groovy ├── WorkflowHic.groovy ├── WorkflowMain.groovy └── nfcore_external_java_deps.jar ├── main.nf ├── modules.json ├── modules ├── local │ ├── cooltools │ │ ├── eigscis.nf │ │ └── insulation.nf │ ├── hicexplorer │ │ ├── hicFindTADs.nf │ │ └── hicPlotDistVsCounts.nf │ ├── hicpro │ │ ├── bowtie2_merge.nf │ │ ├── build_contact_maps.nf │ │ ├── combine_mates.nf │ │ ├── dnase_mapping_stats.nf │ │ ├── get_restriction_fragments.nf │ │ ├── get_valid_interaction.nf │ │ ├── get_valid_interaction_dnase.nf │ │ ├── hicpro2pairs.nf │ │ ├── merge_stats.nf │ │ ├── merge_valid_interaction.nf │ │ ├── run_ice.nf │ │ └── trim_reads.nf │ ├── multiqc.nf │ ├── samplesheet_check.nf │ └── split_cooler_dump.nf └── nf-core │ ├── bowtie2 │ ├── align │ │ ├── main.nf │ │ └── meta.yml │ └── build │ │ ├── main.nf │ │ └── meta.yml │ ├── cooler │ ├── balance │ │ ├── main.nf │ │ └── meta.yml │ ├── cload │ │ ├── main.nf │ │ └── meta.yml │ ├── dump │ │ ├── main.nf │ │ └── meta.yml │ ├── makebins │ │ ├── main.nf │ │ └── meta.yml │ └── zoomify │ │ ├── main.nf │ │ └── meta.yml │ ├── custom │ ├── dumpsoftwareversions │ │ ├── main.nf │ │ ├── meta.yml │ │ └── templates │ │ │ └── dumpsoftwareversions.py │ └── getchromsizes │ │ ├── main.nf │ │ └── meta.yml │ └── fastqc │ ├── main.nf │ └── meta.yml ├── nextflow.config ├── nextflow_schema.json ├── pyproject.toml ├── subworkflows └── local │ ├── compartments.nf │ ├── cooler.nf │ ├── hicpro.nf │ ├── hicpro_mapping.nf │ ├── input_check.nf │ ├── prepare_genome.nf │ └── tads.nf ├── tower.yml └── workflows └── hic.nf /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nfcore", 3 | "image": "nfcore/gitpod:latest", 4 | "remoteUser": "gitpod", 5 | 6 | // Configure tool-specific properties. 7 | "customizations": { 8 | // Configure properties specific to VS Code. 9 | "vscode": { 10 | // Set *default* container specific settings.json values on container create. 11 | "settings": { 12 | "python.defaultInterpreterPath": "/opt/conda/bin/python", 13 | "python.linting.enabled": true, 14 | "python.linting.pylintEnabled": true, 15 | "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", 16 | "python.formatting.yapfPath": "/opt/conda/bin/yapf", 17 | "python.linting.flake8Path": "/opt/conda/bin/flake8", 18 | "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", 19 | "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", 20 | "python.linting.pylintPath": "/opt/conda/bin/pylint" 21 | }, 22 | 23 | // Add the IDs of extensions you want installed when the container is created. 24 | "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | trim_trailing_whitespace = true 8 | indent_size = 4 9 | indent_style = space 10 | 11 | [*.{md,yml,yaml,html,css,scss,js}] 12 | indent_size = 2 13 | 14 | # These files are edited and tested upstream in nf-core/modules 15 | [/modules/nf-core/**] 16 | charset = unset 17 | end_of_line = unset 18 | insert_final_newline = unset 19 | trim_trailing_whitespace = unset 20 | indent_style = unset 21 | indent_size = unset 22 | 23 | [/assets/email*] 24 | indent_size = unset 25 | 26 | # C++ compiles code 27 | [/bin/cutsite_trimming] 28 | end_of_line = unset 29 | insert_final_newline = unset 30 | trim_trailing_whitespace = unset 31 | indent_style = unset 32 | indent_size = unset 33 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.config linguist-language=nextflow 2 | *.nf.test linguist-language=nextflow 3 | modules/nf-core/** linguist-generated 4 | subworkflows/nf-core/** linguist-generated 5 | -------------------------------------------------------------------------------- /.github/.dockstore.yml: -------------------------------------------------------------------------------- 1 | # Dockstore config version, not pipeline version 2 | version: 1.2 3 | workflows: 4 | - subclass: nfl 5 | primaryDescriptorPath: /nextflow.config 6 | publish: True 7 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # nf-core/hic: Contributing Guidelines 2 | 3 | Hi there! 4 | Many thanks for taking an interest in improving nf-core/hic. 5 | 6 | We try to manage the required tasks for nf-core/hic using GitHub issues, you probably came to this page when creating one. 7 | Please use the pre-filled template to save time. 8 | 9 | However, don't be put off by this template - other more general issues and suggestions are welcome! 10 | Contributions to the code are even more welcome ;) 11 | 12 | > If you need help using or modifying nf-core/hic then the best place to ask is on the nf-core Slack [#hic](https://nfcore.slack.com/channels/hic) channel ([join our Slack here](https://nf-co.re/join/slack)). 13 | 14 | ## Contribution workflow 15 | 16 | If you'd like to write some code for nf-core/hic, the standard workflow is as follows: 17 | 18 | 1. Check that there isn't already an issue about your idea in the [nf-core/hic issues](https://github.com/nf-core/hic/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 19 | 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/hic repository](https://github.com/nf-core/hic) to your GitHub account 20 | 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) 21 | 4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). 22 | 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged 23 | 24 | If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). 25 | 26 | ## Tests 27 | 28 | When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. 29 | Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. 30 | 31 | There are typically two types of tests that run: 32 | 33 | ### Lint tests 34 | 35 | `nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. 36 | To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. 37 | 38 | If any failures or warnings are encountered, please follow the listed URL for more documentation. 39 | 40 | ### Pipeline tests 41 | 42 | Each `nf-core` pipeline should be set up with a minimal set of test-data. 43 | `GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. 44 | If there are any failures then the automated tests fail. 45 | These tests are run both with the latest available version of `Nextflow` and also the minimum required version that is stated in the pipeline code. 46 | 47 | ## Patch 48 | 49 | :warning: Only in the unlikely and regretful event of a release happening with a bug. 50 | 51 | - On your own fork, make a new branch `patch` based on `upstream/master`. 52 | - Fix the bug, and bump version (X.Y.Z+1). 53 | - A PR should be made on `master` from patch to directly this particular bug. 54 | 55 | ## Getting help 56 | 57 | For further information/help, please consult the [nf-core/hic documentation](https://nf-co.re/hic/usage) and don't hesitate to get in touch on the nf-core Slack [#hic](https://nfcore.slack.com/channels/hic) channel ([join our Slack here](https://nf-co.re/join/slack)). 58 | 59 | ## Pipeline contribution conventions 60 | 61 | To make the nf-core/hic code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. 62 | 63 | ### Adding a new step 64 | 65 | If you wish to contribute a new step, please use the following coding standards: 66 | 67 | 1. Define the corresponding input channel into your new process from the expected previous process channel 68 | 2. Write the process block (see below). 69 | 3. Define the output channel if needed (see below). 70 | 4. Add any new parameters to `nextflow.config` with a default (see below). 71 | 5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). 72 | 6. Add sanity checks and validation for all relevant parameters. 73 | 7. Perform local tests to validate that the new code works as expected. 74 | 8. If applicable, add a new test command in `.github/workflow/ci.yml`. 75 | 9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. 76 | 10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. 77 | 78 | ### Default values 79 | 80 | Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. 81 | 82 | Once there, use `nf-core schema build` to add to `nextflow_schema.json`. 83 | 84 | ### Default processes resource requirements 85 | 86 | Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. 87 | 88 | The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. 89 | 90 | ### Naming schemes 91 | 92 | Please use the following naming schemes, to make it easy to understand what is going where. 93 | 94 | - initial process channel: `ch_output_from_` 95 | - intermediate and terminal channels: `ch__for_` 96 | 97 | ### Nextflow version bumping 98 | 99 | If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` 100 | 101 | ### Images and figures 102 | 103 | For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). 104 | 105 | ## GitHub Codespaces 106 | 107 | This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. 108 | 109 | To get started: 110 | 111 | - Open the repo in [Codespaces](https://github.com/nf-core/hic/codespaces) 112 | - Tools installed 113 | - nf-core 114 | - Nextflow 115 | 116 | Devcontainer specs: 117 | 118 | - [DevContainer config](.devcontainer/devcontainer.json) 119 | - [Dockerfile](.devcontainer/Dockerfile) 120 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug report 2 | description: Report something that is broken or incorrect 3 | labels: bug 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Before you post this issue, please check the documentation: 9 | 10 | - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) 11 | - [nf-core/hic pipeline documentation](https://nf-co.re/hic/usage) 12 | 13 | - type: textarea 14 | id: description 15 | attributes: 16 | label: Description of the bug 17 | description: A clear and concise description of what the bug is. 18 | validations: 19 | required: true 20 | 21 | - type: textarea 22 | id: command_used 23 | attributes: 24 | label: Command used and terminal output 25 | description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. 26 | render: console 27 | placeholder: | 28 | $ nextflow run ... 29 | 30 | Some output where something broke 31 | 32 | - type: textarea 33 | id: files 34 | attributes: 35 | label: Relevant files 36 | description: | 37 | Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. 38 | Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. 39 | 40 | - type: textarea 41 | id: system 42 | attributes: 43 | label: System information 44 | description: | 45 | * Nextflow version _(eg. 22.10.1)_ 46 | * Hardware _(eg. HPC, Desktop, Cloud)_ 47 | * Executor _(eg. slurm, local, awsbatch)_ 48 | * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ 49 | * OS _(eg. CentOS Linux, macOS, Linux Mint)_ 50 | * Version of nf-core/hic _(eg. 1.1, 1.5, 1.8.2)_ 51 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | contact_links: 2 | - name: Join nf-core 3 | url: https://nf-co.re/join 4 | about: Please join the nf-core community here 5 | - name: "Slack #hic channel" 6 | url: https://nfcore.slack.com/channels/hic 7 | about: Discussion about the nf-core/hic pipeline 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | name: Feature request 2 | description: Suggest an idea for the nf-core/hic pipeline 3 | labels: enhancement 4 | body: 5 | - type: textarea 6 | id: description 7 | attributes: 8 | label: Description of feature 9 | description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. 10 | validations: 11 | required: true 12 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 13 | 14 | ## PR checklist 15 | 16 | - [ ] This comment contains a description of changes (with reason). 17 | - [ ] If you've fixed a bug or added code that should be tested, add tests! 18 | - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/hic/tree/master/.github/CONTRIBUTING.md) 19 | - [ ] If necessary, also make a PR on the nf-core/hic _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. 20 | - [ ] Make sure your code lints (`nf-core lint`). 21 | - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). 22 | - [ ] Usage Documentation in `docs/usage.md` is updated. 23 | - [ ] Output Documentation in `docs/output.md` is updated. 24 | - [ ] `CHANGELOG.md` is updated. 25 | - [ ] `README.md` is updated (including new tool citations and authors/contributors). 26 | -------------------------------------------------------------------------------- /.github/workflows/awsfulltest.yml: -------------------------------------------------------------------------------- 1 | name: nf-core AWS full size tests 2 | # This workflow is triggered on published releases. 3 | # It can be additionally triggered manually with GitHub actions workflow dispatch button. 4 | # It runs the -profile 'test_full' on AWS batch 5 | 6 | on: 7 | release: 8 | types: [published] 9 | workflow_dispatch: 10 | jobs: 11 | run-tower: 12 | name: Run AWS full tests 13 | if: github.repository == 'nf-core/hic' 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Launch workflow via tower 17 | uses: nf-core/tower-action@v3 18 | with: 19 | workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} 20 | access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} 21 | compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} 22 | workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/hic/work-${{ github.sha }} 23 | parameters: | 24 | { 25 | "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/hic/results-${{ github.sha }}" 26 | } 27 | profiles: test_full,public_aws_ecr 28 | - uses: actions/upload-artifact@v3 29 | with: 30 | name: Tower debug log file 31 | path: tower_action_*.log 32 | -------------------------------------------------------------------------------- /.github/workflows/awstest.yml: -------------------------------------------------------------------------------- 1 | name: nf-core AWS test 2 | # This workflow can be triggered manually with the GitHub actions workflow dispatch button. 3 | # It runs the -profile 'test' on AWS batch 4 | 5 | on: 6 | workflow_dispatch: 7 | jobs: 8 | run-tower: 9 | name: Run AWS tests 10 | if: github.repository == 'nf-core/hic' 11 | runs-on: ubuntu-latest 12 | steps: 13 | # Launch workflow using Tower CLI tool action 14 | - name: Launch workflow via tower 15 | uses: seqeralabs/action-tower-launch@v1 16 | with: 17 | workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} 18 | access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} 19 | compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} 20 | workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/hic/work-${{ github.sha }} 21 | parameters: | 22 | { 23 | "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/hic/results-test-${{ github.sha }}" 24 | } 25 | profiles: test,public_aws_ecr 26 | - uses: actions/upload-artifact@v3 27 | with: 28 | name: Tower debug log file 29 | path: tower_action_*.log 30 | -------------------------------------------------------------------------------- /.github/workflows/branch.yml: -------------------------------------------------------------------------------- 1 | name: nf-core branch protection 2 | # This workflow is triggered on PRs to master branch on the repository 3 | # It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` 4 | on: 5 | pull_request_target: 6 | branches: [master] 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | steps: 12 | # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches 13 | - name: Check PRs 14 | if: github.repository == 'nf-core/hic' 15 | run: | 16 | { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/hic ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] 17 | 18 | # If the above check failed, post a comment on the PR explaining the failure 19 | # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets 20 | - name: Post PR comment 21 | if: failure() 22 | uses: mshick/add-pr-comment@v1 23 | with: 24 | message: | 25 | ## This PR is against the `master` branch :x: 26 | 27 | * Do not close this PR 28 | * Click _Edit_ and change the `base` to `dev` 29 | * This CI test will remain failed until you push a new commit 30 | 31 | --- 32 | 33 | Hi @${{ github.event.pull_request.user.login }}, 34 | 35 | It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. 36 | The `master` branch on nf-core repositories should always contain code from the latest release. 37 | Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. 38 | 39 | You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. 40 | Note that even after this, the test will continue to show as failing until you push a new commit. 41 | 42 | Thanks again for your contribution! 43 | repo-token: ${{ secrets.GITHUB_TOKEN }} 44 | allow-repeats: false 45 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: nf-core CI 2 | # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors 3 | on: 4 | push: 5 | branches: 6 | - dev 7 | pull_request: 8 | release: 9 | types: [published] 10 | 11 | env: 12 | NXF_ANSI_LOG: false 13 | 14 | concurrency: 15 | group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | test: 20 | name: Run pipeline with test data 21 | # Only run on push if this is the nf-core dev branch (merged PRs) 22 | if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/hic') }}" 23 | runs-on: ubuntu-latest 24 | strategy: 25 | matrix: 26 | NXF_VER: 27 | - "22.10.1" 28 | - "latest-everything" 29 | steps: 30 | - name: Check out pipeline code 31 | uses: actions/checkout@v3 32 | 33 | - name: Install Nextflow 34 | uses: nf-core/setup-nextflow@v1.3.0 35 | with: 36 | version: "${{ matrix.NXF_VER }}" 37 | 38 | - name: Run pipeline with test data 39 | run: | 40 | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results 41 | -------------------------------------------------------------------------------- /.github/workflows/clean-up.yml: -------------------------------------------------------------------------------- 1 | name: "Close user-tagged issues and PRs" 2 | on: 3 | schedule: 4 | - cron: "0 0 * * 0" # Once a week 5 | 6 | jobs: 7 | clean-up: 8 | runs-on: ubuntu-latest 9 | permissions: 10 | issues: write 11 | pull-requests: write 12 | steps: 13 | - uses: actions/stale@v7 14 | with: 15 | stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." 16 | stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." 17 | close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." 18 | days-before-stale: 30 19 | days-before-close: 20 20 | days-before-pr-close: -1 21 | any-of-labels: "awaiting-changes,awaiting-feedback" 22 | exempt-issue-labels: "WIP" 23 | exempt-pr-labels: "WIP" 24 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 25 | -------------------------------------------------------------------------------- /.github/workflows/fix-linting.yml: -------------------------------------------------------------------------------- 1 | name: Fix linting from a comment 2 | on: 3 | issue_comment: 4 | types: [created] 5 | 6 | jobs: 7 | deploy: 8 | # Only run if comment is on a PR with the main repo, and if it contains the magic keywords 9 | if: > 10 | contains(github.event.comment.html_url, '/pull/') && 11 | contains(github.event.comment.body, '@nf-core-bot fix linting') && 12 | github.repository == 'nf-core/hic' 13 | runs-on: ubuntu-latest 14 | steps: 15 | # Use the @nf-core-bot token to check out so we can push later 16 | - uses: actions/checkout@v3 17 | with: 18 | token: ${{ secrets.nf_core_bot_auth_token }} 19 | 20 | # Action runs on the issue comment, so we don't get the PR by default 21 | # Use the gh cli to check out the PR 22 | - name: Checkout Pull Request 23 | run: gh pr checkout ${{ github.event.issue.number }} 24 | env: 25 | GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} 26 | 27 | - uses: actions/setup-node@v3 28 | 29 | - name: Install Prettier 30 | run: npm install -g prettier @prettier/plugin-php 31 | 32 | # Check that we actually need to fix something 33 | - name: Run 'prettier --check' 34 | id: prettier_status 35 | run: | 36 | if prettier --check ${GITHUB_WORKSPACE}; then 37 | echo "result=pass" >> $GITHUB_OUTPUT 38 | else 39 | echo "result=fail" >> $GITHUB_OUTPUT 40 | fi 41 | 42 | - name: Run 'prettier --write' 43 | if: steps.prettier_status.outputs.result == 'fail' 44 | run: prettier --write ${GITHUB_WORKSPACE} 45 | 46 | - name: Commit & push changes 47 | if: steps.prettier_status.outputs.result == 'fail' 48 | run: | 49 | git config user.email "core@nf-co.re" 50 | git config user.name "nf-core-bot" 51 | git config push.default upstream 52 | git add . 53 | git status 54 | git commit -m "[automated] Fix linting with Prettier" 55 | git push 56 | -------------------------------------------------------------------------------- /.github/workflows/linting.yml: -------------------------------------------------------------------------------- 1 | name: nf-core linting 2 | # This workflow is triggered on pushes and PRs to the repository. 3 | # It runs the `nf-core lint` and markdown lint tests to ensure 4 | # that the code meets the nf-core guidelines. 5 | on: 6 | push: 7 | branches: 8 | - dev 9 | pull_request: 10 | release: 11 | types: [published] 12 | 13 | jobs: 14 | EditorConfig: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v3 18 | 19 | - uses: actions/setup-node@v3 20 | 21 | - name: Install editorconfig-checker 22 | run: npm install -g editorconfig-checker 23 | 24 | - name: Run ECLint check 25 | run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') 26 | 27 | Prettier: 28 | runs-on: ubuntu-latest 29 | steps: 30 | - uses: actions/checkout@v3 31 | 32 | - uses: actions/setup-node@v3 33 | 34 | - name: Install Prettier 35 | run: npm install -g prettier 36 | 37 | - name: Run Prettier --check 38 | run: prettier --check ${GITHUB_WORKSPACE} 39 | 40 | PythonBlack: 41 | runs-on: ubuntu-latest 42 | steps: 43 | - uses: actions/checkout@v3 44 | 45 | - name: Check code lints with Black 46 | uses: psf/black@stable 47 | 48 | # If the above check failed, post a comment on the PR explaining the failure 49 | - name: Post PR comment 50 | if: failure() 51 | uses: mshick/add-pr-comment@v1 52 | with: 53 | message: | 54 | ## Python linting (`black`) is failing 55 | 56 | To keep the code consistent with lots of contributors, we run automated code consistency checks. 57 | To fix this CI test, please run: 58 | 59 | * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` 60 | * Fix formatting errors in your pipeline: `black .` 61 | 62 | Once you push these changes the test should pass, and you can hide this comment :+1: 63 | 64 | We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! 65 | 66 | Thanks again for your contribution! 67 | repo-token: ${{ secrets.GITHUB_TOKEN }} 68 | allow-repeats: false 69 | 70 | nf-core: 71 | runs-on: ubuntu-latest 72 | steps: 73 | - name: Check out pipeline code 74 | uses: actions/checkout@v3 75 | 76 | - name: Install Nextflow 77 | uses: nf-core/setup-nextflow@v1 78 | 79 | - uses: actions/setup-python@v4 80 | with: 81 | python-version: "3.8" 82 | architecture: "x64" 83 | 84 | - name: Install dependencies 85 | run: | 86 | python -m pip install --upgrade pip 87 | pip install nf-core 88 | 89 | - name: Run nf-core lint 90 | env: 91 | GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} 92 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 93 | GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} 94 | run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md 95 | 96 | - name: Save PR number 97 | if: ${{ always() }} 98 | run: echo ${{ github.event.pull_request.number }} > PR_number.txt 99 | 100 | - name: Upload linting log file artifact 101 | if: ${{ always() }} 102 | uses: actions/upload-artifact@v3 103 | with: 104 | name: linting-logs 105 | path: | 106 | lint_log.txt 107 | lint_results.md 108 | PR_number.txt 109 | -------------------------------------------------------------------------------- /.github/workflows/linting_comment.yml: -------------------------------------------------------------------------------- 1 | name: nf-core linting comment 2 | # This workflow is triggered after the linting action is complete 3 | # It posts an automated comment to the PR, even if the PR is coming from a fork 4 | 5 | on: 6 | workflow_run: 7 | workflows: ["nf-core linting"] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Download lint results 14 | uses: dawidd6/action-download-artifact@v2 15 | with: 16 | workflow: linting.yml 17 | workflow_conclusion: completed 18 | 19 | - name: Get PR number 20 | id: pr_number 21 | run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT 22 | 23 | - name: Post PR comment 24 | uses: marocchino/sticky-pull-request-comment@v2 25 | with: 26 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 27 | number: ${{ steps.pr_number.outputs.pr_number }} 28 | path: linting-logs/lint_results.md 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .nextflow* 2 | work/ 3 | data/ 4 | results/ 5 | .DS_Store 6 | testing/ 7 | testing* 8 | *.pyc 9 | -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | image: nfcore/gitpod:latest 2 | 3 | vscode: 4 | extensions: # based on nf-core.nf-core-extensionpack 5 | - codezombiech.gitignore # Language support for .gitignore files 6 | # - cssho.vscode-svgviewer # SVG viewer 7 | - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code 8 | - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed 9 | - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files 10 | - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar 11 | - mechatroner.rainbow-csv # Highlight columns in csv files in different colors 12 | # - nextflow.nextflow # Nextflow syntax highlighting 13 | - oderwat.indent-rainbow # Highlight indentation level 14 | - streetsidesoftware.code-spell-checker # Spelling checker for source code 15 | -------------------------------------------------------------------------------- /.nf-core.yml: -------------------------------------------------------------------------------- 1 | repository_type: pipeline 2 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/mirrors-prettier 3 | rev: "v2.7.1" 4 | hooks: 5 | - id: prettier 6 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | email_template.html 2 | adaptivecard.json 3 | slackreport.json 4 | .nextflow* 5 | work/ 6 | data/ 7 | results/ 8 | .DS_Store 9 | testing/ 10 | testing* 11 | *.pyc 12 | bin/ 13 | -------------------------------------------------------------------------------- /.prettierrc.yml: -------------------------------------------------------------------------------- 1 | printWidth: 120 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # nf-core/hic: Changelog 2 | 3 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) 4 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 5 | 6 | ## v2.1.0 - 2023-06-01 7 | 8 | ### `Added` 9 | 10 | - Added public_aws_ecr profile for using containers stored on ECR. 11 | 12 | ### `Fixed` 13 | 14 | ## v2.0.0 - 2023-01-12 15 | 16 | ### `Added` 17 | 18 | - DSL2 version of nf-core-hic pipeline 19 | - Add full test dataset (#80) 20 | - Replace local modules by the cooler nf-core module 21 | 22 | ### `Fixed` 23 | 24 | - Fix error in the Arima preset (#127) 25 | 26 | ## v1.3.1 - 2021-09-25 27 | 28 | ### `Fixed` 29 | 30 | - Fix bug in conda environment for cooltools (#109) 31 | 32 | ## v1.3.0 - 2021-05-22 33 | 34 | - Change the `/tmp/` folder to `./tmp/` folder so that all tmp files are now in the work directory (#24) 35 | - Add `--hicpro_maps` options to generate the raw and normalized HiC-Pro maps. The default is now to use cooler 36 | - Add chromosome compartments calling with cooltools (#53) 37 | - Add HiCExplorer distance decay quality control (#54) 38 | - Add HiCExplorer TADs calling (#55) 39 | - Add insulation score TADs calling (#55) 40 | - Generate cooler/txt contact maps 41 | - Normalize Hi-C data with cooler instead of iced 42 | - New `--digestion` parameter to automatically set the restriction_site and ligation_site motifs 43 | - New `--keep_multi` and `keep_dup` options. Default: false 44 | - Template update for nf-core/tools 45 | - Minor fix to summary log messages in pipeline header 46 | 47 | ### `Fixed` 48 | 49 | - Fix bug in stats report which were not all correcly exported in the results folder 50 | - Fix recurrent bug in input file extension (#86) 51 | - Fix bug in `--bin_size` parameter (#85) 52 | - `--min_mapq` is ignored if `--keep_multi` is used 53 | 54 | ### `Deprecated` 55 | 56 | - `--rm_dup` and `--rm_multi` are replaced by `--keep_dups` and `--keep_multi` 57 | 58 | ## v1.2.2 - 2020-09-02 59 | 60 | ### `Added` 61 | 62 | - Template update for nf-core/tools v1.10.2 63 | - Add the `--fastq_chunks_size` to specify the number of reads per chunks if split_fastq is true 64 | 65 | ### `Fixed` 66 | 67 | - Bug in `--split_fastq` option not recognized 68 | 69 | ## v1.2.1 - 2020-07-06 70 | 71 | ### `Fixed` 72 | 73 | - Fix issue with `--fasta` option and `.fa` extension (#66) 74 | 75 | ## v1.2.0 - 2020-06-18 76 | 77 | ### `Added` 78 | 79 | - Bump v1.2.0 80 | - Merge template nf-core 1.9 81 | - Move some options to camel_case 82 | - Update python scripts for python3 83 | - Update conda environment file 84 | - python base `2.7.15` > `3.7.6` 85 | - pip `19.1` > `20.0.1` 86 | - scipy `1.2.1` > `1.4.1` 87 | - numpy `1.16.3` > `1.18.1` 88 | - bx-python `0.8.2` > `0.8.8` 89 | - pysam `0.15.2` > `0.15.4` 90 | - cooler `0.8.5` > `0.8.6` 91 | - multiqc `1.7` > `1.8` 92 | - iced `0.5.1` > `0.5.6` 93 | - _*New*_ pymdown-extensions `7.1` 94 | - _*New*_ hicexplorer `3.4.3` 95 | - _*New*_ bioconductor-hitc `1.32.0` 96 | - _*New*_ r-optparse `1.6.6` 97 | - _*New*_ ucsc-bedgraphtobigwig `377` 98 | - _*New*_ cython `0.29.19` 99 | - _*New*_ cooltools `0.3.2` 100 | - _*New*_ fanc `0.8.30` 101 | - _*Removed*_ r-markdown 102 | 103 | ### `Fixed` 104 | 105 | - Fix error in doc for Arima kit usage 106 | - Sort output of `get_valid_interaction` process as the input files of `remove_duplicates` 107 | are expected to be sorted (sort -m) 108 | 109 | ### `Deprecated` 110 | 111 | - Command line options converted to `camel_case`: 112 | - `--skipMaps` > `--skip_maps` 113 | - `--skipIce` > `--skip_ice` 114 | - `--skipCool` > `--skip_cool` 115 | - `--skipMultiQC` > `--skip_multiqc` 116 | - `--saveReference` > `--save_reference` 117 | - `--saveAlignedIntermediates` > `--save_aligned_intermediates` 118 | - `--saveInteractionBAM` > `--save_interaction_bam` 119 | 120 | ## v1.1.1 - 2020-04-02 121 | 122 | ### `Fixed` 123 | 124 | - Fix bug in tag. Remove '[' 125 | 126 | ## v1.1.0 - 2019-10-15 127 | 128 | ### `Added` 129 | 130 | - Update hicpro2higlass with `-p` parameter 131 | - Support 'N' base motif in restriction/ligation sites 132 | - Support multiple restriction enzymes/ligattion sites (comma separated) ([#31](https://github.com/nf-core/hic/issues/31)) 133 | - Add --saveInteractionBAM option 134 | - Add DOI ([#29](https://github.com/nf-core/hic/issues/29)) 135 | - Update manual ([#28](https://github.com/nf-core/hic/issues/28)) 136 | 137 | ### `Fixed` 138 | 139 | - Fix bug for reads extension `_1`/`_2` ([#30](https://github.com/nf-core/hic/issues/30)) 140 | 141 | ## v1.0 - [2019-05-06] 142 | 143 | Initial release of nf-core/hic, created with the [nf-core](http://nf-co.re/) template. 144 | 145 | ### `Added` 146 | 147 | First version of nf-core Hi-C pipeline which is a Nextflow implementation of 148 | the [HiC-Pro pipeline](https://github.com/nservant/HiC-Pro/). 149 | Note that all HiC-Pro functionalities are not yet all implemented. 150 | The current version supports most protocols including Hi-C, in situ Hi-C, 151 | DNase Hi-C, Micro-C, capture-C or HiChip data. 152 | 153 | In summary, this version allows : 154 | 155 | - Automatic detection and generation of annotation files based on igenomes 156 | if not provided. 157 | - Two-steps alignment of raw sequencing reads 158 | - Reads filtering and detection of valid interaction products 159 | - Generation of raw contact matrices for a set of resolutions 160 | - Normalization of the contact maps using the ICE algorithm 161 | - Generation of cooler file for visualization on [higlass](https://higlass.io/) 162 | - Quality report based on HiC-Pro MultiQC module 163 | -------------------------------------------------------------------------------- /CITATIONS.md: -------------------------------------------------------------------------------- 1 | # nf-core/hic: Citations 2 | 3 | ## [HiC-Pro](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0831-x) 4 | 5 | > Servant N, Varoquaux N, Lajoie BR, Viara E, Chen C, Vert JP, Dekker J, Heard E, Barillot E. Genome Biology 2015, 16:259 doi: [10.1186/s13059-015-0831-x](https://dx.doi.org/10.1186/s13059-015-0831-x) 6 | 7 | ## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) 8 | 9 | > Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. 10 | 11 | ## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) 12 | 13 | > Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. 14 | 15 | ## Pipeline tools 16 | 17 | - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) 18 | 19 | - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) 20 | > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. 21 | 22 | ## Software packaging/containerisation tools 23 | 24 | - [Anaconda](https://anaconda.com) 25 | 26 | > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. 27 | 28 | - [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) 29 | 30 | > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. 31 | 32 | - [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) 33 | 34 | > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. 35 | 36 | - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) 37 | 38 | - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) 39 | > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. 40 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct at nf-core (v1.0) 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: 6 | 7 | - Age 8 | - Body size 9 | - Familial status 10 | - Gender identity and expression 11 | - Geographical location 12 | - Level of experience 13 | - Nationality and national origins 14 | - Native language 15 | - Physical and neurological ability 16 | - Race or ethnicity 17 | - Religion 18 | - Sexual identity and orientation 19 | - Socioeconomic status 20 | 21 | Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. 22 | 23 | ## Preamble 24 | 25 | > Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. 26 | 27 | An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. 28 | 29 | nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. 30 | 31 | We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. 32 | 33 | Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. 34 | 35 | We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. 36 | 37 | Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re 38 | 39 | ## Our Responsibilities 40 | 41 | The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. 42 | 43 | The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 44 | 45 | Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. 46 | 47 | ## When are where does this Code of Conduct apply? 48 | 49 | Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: 50 | 51 | - Communicating with an official project email address. 52 | - Communicating with community members within the nf-core Slack channel. 53 | - Participating in hackathons organised by nf-core (both online and in-person events). 54 | - Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. 55 | - Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. 56 | - Representing nf-core on social media. This includes both official and personal accounts. 57 | 58 | ## nf-core cares 😊 59 | 60 | nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): 61 | 62 | - Ask for consent before sharing another community member’s personal information (including photographs) on social media. 63 | - Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. 64 | - Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) 65 | - Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) 66 | - Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) 67 | - Focus on what is best for the team and the community. (When in doubt, ask) 68 | - Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. 69 | - Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) 70 | - Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) 71 | - Take breaks when you feel like you need them. 72 | - Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) 73 | 74 | ## nf-core frowns on 😕 75 | 76 | The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. 77 | 78 | - Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. 79 | - “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. 80 | - Spamming or trolling of individuals on social media. 81 | - Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. 82 | - Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. 83 | 84 | ### Online Trolling 85 | 86 | The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. 87 | 88 | All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. 89 | 90 | ## Procedures for Reporting CoC violations 91 | 92 | If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. 93 | 94 | You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). 95 | 96 | Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. 97 | 98 | All reports will be handled with utmost discretion and confidentially. 99 | 100 | ## Attribution and Acknowledgements 101 | 102 | - The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) 103 | - The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) 104 | - The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) 105 | - The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) 106 | 107 | ## Changelog 108 | 109 | ### v1.0 - March 12th, 2021 110 | 111 | - Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. 112 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Nicolas Servant 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ![nf-core/hic](docs/images/nf-core-hic_logo_light.png#gh-light-mode-only) ![nf-core/hic](docs/images/nf-core-hic_logo_dark.png#gh-dark-mode-only) 2 | 3 | [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/hic/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2669512-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2669512) 4 | 5 | [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) 6 | [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) 7 | [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) 8 | [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) 9 | [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/hic) 10 | 11 | [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23hic-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/hic)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) 12 | 13 | ## Introduction 14 | 15 | **nf-core/hic** is a bioinformatics best-practice analysis pipeline for Analysis of Chromosome Conformation Capture data (Hi-C). 16 | 17 | The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! 18 | 19 | On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources.The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/hic/results). 20 | 21 | ## Pipeline summary 22 | 23 | 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 24 | 2. Hi-C data processing 25 | 1. [`HiC-Pro`](https://github.com/nservant/HiC-Pro) 26 | 1. Mapping using a two steps strategy to rescue reads spanning the ligation 27 | sites ([`bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) 28 | 2. Detection of valid interaction products 29 | 3. Duplicates removal 30 | 4. Generate raw and normalized contact maps ([`iced`](https://github.com/hiclib/iced)) 31 | 3. Create genome-wide contact maps at various resolutions ([`cooler`](https://github.com/open2c/cooler)) 32 | 4. Contact maps normalization using balancing algorithm ([`cooler`](https://github.com/open2c/cooler)) 33 | 5. Export to various contact maps formats ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`cooler`](https://github.com/open2c/cooler)) 34 | 6. Quality controls ([`HiC-Pro`](https://github.com/nservant/HiC-Pro), [`HiCExplorer`](https://github.com/deeptools/HiCExplorer)) 35 | 7. Compartments calling ([`cooltools`](https://cooltools.readthedocs.io/en/latest/)) 36 | 8. TADs calling ([`HiCExplorer`](https://github.com/deeptools/HiCExplorer), [`cooltools`](https://cooltools.readthedocs.io/en/latest/)) 37 | 9. Quality control report ([`MultiQC`](https://multiqc.info/)) 38 | 39 | ## Usage 40 | 41 | > **Note** 42 | > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how 43 | > to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) 44 | > with `-profile test` before running the workflow on actual data. 45 | 46 | First, prepare a samplesheet with your input data that looks as follows: 47 | 48 | `samplesheet.csv`: 49 | 50 | ```csv 51 | sample,fastq_1,fastq_2 52 | HIC_ES_4,SRR5339783_1.fastq.gz,SRR5339783_2.fastq.gz 53 | ``` 54 | 55 | Each row represents a pair of fastq files (paired end). 56 | Now, you can run the pipeline using: 57 | 58 | ```bash 59 | nextflow run nf-core/hic \ 60 | -profile \ 61 | --input samplesheet.csv \ 62 | --genome GRCh37 \ 63 | --outdir 64 | ``` 65 | 66 | > **Warning:** 67 | > Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those 68 | > provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; 69 | > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). 70 | 71 | For more details, please refer to the [usage documentation](https://nf-co.re/hic/usage) and the [parameter documentation](https://nf-co.re/hic/parameters). 72 | 73 | ## Pipeline output 74 | 75 | To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/hic/results) tab on the nf-core website pipeline page. 76 | For more details about the output files and reports, please refer to the 77 | [output documentation](https://nf-co.re/hic/output). 78 | 79 | ## Credits 80 | 81 | nf-core/hic was originally written by Nicolas Servant. 82 | 83 | ## Contributions and Support 84 | 85 | If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). 86 | 87 | For further information or help, don't hesitate to get in touch on the [Slack `#hic` channel](https://nfcore.slack.com/channels/hic) (you can join with [this invite](https://nf-co.re/join/slack)). 88 | 89 | ## Citations 90 | 91 | If you use nf-core/hic for your analysis, please cite it using the following doi: doi: [10.5281/zenodo.2669512](https://doi.org/10.5281/zenodo.2669512) 92 | 93 | An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. 94 | 95 | You can cite the `nf-core` publication as follows: 96 | 97 | > **The nf-core framework for community-curated bioinformatics pipelines.** 98 | > 99 | > Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. 100 | > 101 | > _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). 102 | -------------------------------------------------------------------------------- /assets/adaptivecard.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "message", 3 | "attachments": [ 4 | { 5 | "contentType": "application/vnd.microsoft.card.adaptive", 6 | "contentUrl": null, 7 | "content": { 8 | "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", 9 | "msteams": { 10 | "width": "Full" 11 | }, 12 | "type": "AdaptiveCard", 13 | "version": "1.2", 14 | "body": [ 15 | { 16 | "type": "TextBlock", 17 | "size": "Large", 18 | "weight": "Bolder", 19 | "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", 20 | "text": "nf-core/hic v${version} - ${runName}", 21 | "wrap": true 22 | }, 23 | { 24 | "type": "TextBlock", 25 | "spacing": "None", 26 | "text": "Completed at ${dateComplete} (duration: ${duration})", 27 | "isSubtle": true, 28 | "wrap": true 29 | }, 30 | { 31 | "type": "TextBlock", 32 | "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", 33 | "wrap": true 34 | }, 35 | { 36 | "type": "TextBlock", 37 | "text": "The command used to launch the workflow was as follows:", 38 | "wrap": true 39 | }, 40 | { 41 | "type": "TextBlock", 42 | "text": "${commandLine}", 43 | "isSubtle": true, 44 | "wrap": true 45 | } 46 | ], 47 | "actions": [ 48 | { 49 | "type": "Action.ShowCard", 50 | "title": "Pipeline Configuration", 51 | "card": { 52 | "type": "AdaptiveCard", 53 | "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", 54 | "body": [ 55 | { 56 | "type": "FactSet", 57 | "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> 58 | ] 59 | } 60 | ] 61 | } 62 | } 63 | ] 64 | } 65 | } 66 | ] 67 | } 68 | -------------------------------------------------------------------------------- /assets/email_template.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | nf-core/hic Pipeline Report 9 | 10 | 11 |
12 | 13 | 14 | 15 |

nf-core/hic v${version}

16 |

Run Name: $runName

17 | 18 | <% if (!success){ 19 | out << """ 20 |
21 |

nf-core/hic execution completed unsuccessfully!

22 |

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

23 |

The full error message was:

24 |
${errorReport}
25 |
26 | """ 27 | } else { 28 | out << """ 29 |
30 | nf-core/hic execution completed successfully! 31 |
32 | """ 33 | } 34 | %> 35 | 36 |

The workflow was completed at $dateComplete (duration: $duration)

37 |

The command used to launch the workflow was as follows:

38 |
$commandLine
39 | 40 |

Pipeline Configuration:

41 | 42 | 43 | <% out << summary.collect{ k,v -> "" }.join("\n") %> 44 | 45 |
$k
$v
46 | 47 |

nf-core/hic

48 |

https://github.com/nf-core/hic

49 | 50 |
51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /assets/email_template.txt: -------------------------------------------------------------------------------- 1 | ---------------------------------------------------- 2 | ,--./,-. 3 | ___ __ __ __ ___ /,-._.--~\\ 4 | |\\ | |__ __ / ` / \\ |__) |__ } { 5 | | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, 6 | `._,._,' 7 | nf-core/hic v${version} 8 | ---------------------------------------------------- 9 | Run Name: $runName 10 | 11 | <% if (success){ 12 | out << "## nf-core/hic execution completed successfully! ##" 13 | } else { 14 | out << """#################################################### 15 | ## nf-core/hic execution completed unsuccessfully! ## 16 | #################################################### 17 | The exit status of the task that caused the workflow execution to fail was: $exitStatus. 18 | The full error message was: 19 | 20 | ${errorReport} 21 | """ 22 | } %> 23 | 24 | 25 | The workflow was completed at $dateComplete (duration: $duration) 26 | 27 | The command used to launch the workflow was as follows: 28 | 29 | $commandLine 30 | 31 | 32 | 33 | Pipeline Configuration: 34 | ----------------------- 35 | <% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> 36 | 37 | -- 38 | nf-core/hic 39 | https://github.com/nf-core/hic 40 | -------------------------------------------------------------------------------- /assets/methods_description_template.yml: -------------------------------------------------------------------------------- 1 | id: "nf-core-hic-methods-description" 2 | description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." 3 | section_name: "nf-core/hic Methods Description" 4 | section_href: "https://github.com/nf-core/hic" 5 | plot_type: "html" 6 | ## nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline 7 | ## You inject any metadata in the Nextflow '${workflow}' object 8 | data: | 9 |

Methods

10 |

Data was processed using nf-core/hic v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

11 |

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

12 |
${workflow.commandLine}
13 |

References

14 |
    15 |
  • Servant, N., Ewels, P. A., Peltzer, A., Garcia, M. U. (2021) nf-core/hic. Zenodo. https://doi.org/10.5281/zenodo.2669512
  • 16 |
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • 17 |
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • 18 |
19 |
20 |
Notes:
21 |
    22 | ${nodoi_text} 23 |
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • 24 |
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • 25 |
26 |
27 | -------------------------------------------------------------------------------- /assets/multiqc_config.yml: -------------------------------------------------------------------------------- 1 | report_comment: > 2 | This report has been generated by the nf-core/hic 3 | analysis pipeline. For information about how to interpret these results, please see the 4 | documentation. 5 | report_section_order: 6 | "nf-core-hic-methods-description": 7 | order: -1000 8 | software_versions: 9 | order: -1001 10 | "nf-core-hic-summary": 11 | order: -1002 12 | 13 | export_plots: true 14 | -------------------------------------------------------------------------------- /assets/nf-core-hic_logo_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nf-core/hic/fe4ac656317d24c37e81e7940a526ed9ea812f8e/assets/nf-core-hic_logo_light.png -------------------------------------------------------------------------------- /assets/samplesheet.csv: -------------------------------------------------------------------------------- 1 | sample,fastq_1,fastq_2 2 | SRR4292758,https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R1.fastq.gz,https://github.com/nf-core/test-datasets/raw/hic/data/SRR4292758_00_R2.fastq.gz 3 | -------------------------------------------------------------------------------- /assets/schema_input.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema", 3 | "$id": "https://raw.githubusercontent.com/nf-core/hic/master/assets/schema_input.json", 4 | "title": "nf-core/hic pipeline - params.input schema", 5 | "description": "Schema for the file provided with params.input", 6 | "type": "array", 7 | "items": { 8 | "type": "object", 9 | "properties": { 10 | "sample": { 11 | "type": "string", 12 | "pattern": "^\\S+$", 13 | "errorMessage": "Sample name must be provided and cannot contain spaces" 14 | }, 15 | "fastq_1": { 16 | "type": "string", 17 | "pattern": "^\\S+\\.f(ast)?q\\.gz$", 18 | "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" 19 | }, 20 | "fastq_2": { 21 | "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", 22 | "anyOf": [ 23 | { 24 | "type": "string", 25 | "pattern": "^\\S+\\.f(ast)?q\\.gz$" 26 | }, 27 | { 28 | "type": "string", 29 | "maxLength": 0 30 | } 31 | ] 32 | } 33 | }, 34 | "required": ["sample", "fastq_1"] 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /assets/sendmail_template.txt: -------------------------------------------------------------------------------- 1 | To: $email 2 | Subject: $subject 3 | Mime-Version: 1.0 4 | Content-Type: multipart/related;boundary="nfcoremimeboundary" 5 | 6 | --nfcoremimeboundary 7 | Content-Type: text/html; charset=utf-8 8 | 9 | $email_html 10 | 11 | --nfcoremimeboundary 12 | Content-Type: image/png;name="nf-core-hic_logo.png" 13 | Content-Transfer-Encoding: base64 14 | Content-ID: 15 | Content-Disposition: inline; filename="nf-core-hic_logo_light.png" 16 | 17 | <% out << new File("$projectDir/assets/nf-core-hic_logo_light.png"). 18 | bytes. 19 | encodeBase64(). 20 | toString(). 21 | tokenize( '\n' )*. 22 | toList()*. 23 | collate( 76 )*. 24 | collect { it.join() }. 25 | flatten(). 26 | join( '\n' ) %> 27 | 28 | <% 29 | if (mqcFile){ 30 | def mqcFileObj = new File("$mqcFile") 31 | if (mqcFileObj.length() < mqcMaxSize){ 32 | out << """ 33 | --nfcoremimeboundary 34 | Content-Type: text/html; name=\"multiqc_report\" 35 | Content-Transfer-Encoding: base64 36 | Content-ID: 37 | Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" 38 | 39 | ${mqcFileObj. 40 | bytes. 41 | encodeBase64(). 42 | toString(). 43 | tokenize( '\n' )*. 44 | toList()*. 45 | collate( 76 )*. 46 | collect { it.join() }. 47 | flatten(). 48 | join( '\n' )} 49 | """ 50 | }} 51 | %> 52 | 53 | --nfcoremimeboundary-- 54 | -------------------------------------------------------------------------------- /assets/slackreport.json: -------------------------------------------------------------------------------- 1 | { 2 | "attachments": [ 3 | { 4 | "fallback": "Plain-text summary of the attachment.", 5 | "color": "<% if (success) { %>good<% } else { %>danger<%} %>", 6 | "author_name": "sanger-tol/readmapping v${version} - ${runName}", 7 | "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", 8 | "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", 9 | "fields": [ 10 | { 11 | "title": "Command used to launch the workflow", 12 | "value": "```${commandLine}```", 13 | "short": false 14 | } 15 | <% 16 | if (!success) { %> 17 | , 18 | { 19 | "title": "Full error message", 20 | "value": "```${errorReport}```", 21 | "short": false 22 | }, 23 | { 24 | "title": "Pipeline configuration", 25 | "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", 26 | "short": false 27 | } 28 | <% } 29 | %> 30 | ], 31 | "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" 32 | } 33 | ] 34 | } 35 | -------------------------------------------------------------------------------- /bin/build_matrix: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nf-core/hic/fe4ac656317d24c37e81e7940a526ed9ea812f8e/bin/build_matrix -------------------------------------------------------------------------------- /bin/check_samplesheet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Provide a command line tool to validate and transform tabular samplesheets.""" 4 | 5 | 6 | import argparse 7 | import csv 8 | import logging 9 | import sys 10 | from collections import Counter 11 | from pathlib import Path 12 | 13 | logger = logging.getLogger() 14 | 15 | 16 | class RowChecker: 17 | """ 18 | Define a service that can validate and transform each given row. 19 | 20 | Attributes: 21 | modified (list): A list of dicts, where each dict corresponds to a previously 22 | validated and transformed row. The order of rows is maintained. 23 | 24 | """ 25 | 26 | VALID_FORMATS = ( 27 | ".fq.gz", 28 | ".fastq.gz", 29 | ) 30 | 31 | def __init__( 32 | self, 33 | sample_col="sample", 34 | first_col="fastq_1", 35 | second_col="fastq_2", 36 | single_col="single_end", 37 | **kwargs, 38 | ): 39 | """ 40 | Initialize the row checker with the expected column names. 41 | 42 | Args: 43 | sample_col (str): The name of the column that contains the sample name 44 | (default "sample"). 45 | first_col (str): The name of the column that contains the first (or only) 46 | FASTQ file path (default "fastq_1"). 47 | second_col (str): The name of the column that contains the second (if any) 48 | FASTQ file path (default "fastq_2"). 49 | single_col (str): The name of the new column that will be inserted and 50 | records whether the sample contains single- or paired-end sequencing 51 | reads (default "single_end"). 52 | 53 | """ 54 | super().__init__(**kwargs) 55 | self._sample_col = sample_col 56 | self._first_col = first_col 57 | self._second_col = second_col 58 | self._single_col = single_col 59 | self._seen = set() 60 | self.modified = [] 61 | 62 | def validate_and_transform(self, row): 63 | """ 64 | Perform all validations on the given row and insert the read pairing status. 65 | 66 | Args: 67 | row (dict): A mapping from column headers (keys) to elements of that row 68 | (values). 69 | 70 | """ 71 | self._validate_sample(row) 72 | self._validate_first(row) 73 | self._validate_second(row) 74 | self._validate_pair(row) 75 | self._seen.add((row[self._sample_col], row[self._first_col])) 76 | self.modified.append(row) 77 | 78 | def _validate_sample(self, row): 79 | """Assert that the sample name exists and convert spaces to underscores.""" 80 | if len(row[self._sample_col]) <= 0: 81 | raise AssertionError("Sample input is required.") 82 | # Sanitize samples slightly. 83 | row[self._sample_col] = row[self._sample_col].replace(" ", "_") 84 | 85 | def _validate_first(self, row): 86 | """Assert that the first FASTQ entry is non-empty and has the right format.""" 87 | if len(row[self._first_col]) <= 0: 88 | raise AssertionError("At least the first FASTQ file is required.") 89 | self._validate_fastq_format(row[self._first_col]) 90 | 91 | def _validate_second(self, row): 92 | """Assert that the second FASTQ entry has the right format if it exists.""" 93 | if len(row[self._second_col]) > 0: 94 | self._validate_fastq_format(row[self._second_col]) 95 | 96 | def _validate_pair(self, row): 97 | """Assert that read pairs have the same file extension. Report pair status.""" 98 | if row[self._first_col] and row[self._second_col]: 99 | row[self._single_col] = False 100 | first_col_suffix = Path(row[self._first_col]).suffixes[-2:] 101 | second_col_suffix = Path(row[self._second_col]).suffixes[-2:] 102 | if first_col_suffix != second_col_suffix: 103 | raise AssertionError("FASTQ pairs must have the same file extensions.") 104 | else: 105 | row[self._single_col] = True 106 | 107 | def _validate_fastq_format(self, filename): 108 | """Assert that a given filename has one of the expected FASTQ extensions.""" 109 | if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): 110 | raise AssertionError( 111 | f"The FASTQ file has an unrecognized extension: {filename}\n" 112 | f"It should be one of: {', '.join(self.VALID_FORMATS)}" 113 | ) 114 | 115 | def validate_unique_samples(self): 116 | """ 117 | Assert that the combination of sample name and FASTQ filename is unique. 118 | 119 | In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the 120 | number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. 121 | 122 | """ 123 | if len(self._seen) != len(self.modified): 124 | raise AssertionError("The pair of sample name and FASTQ must be unique.") 125 | seen = Counter() 126 | for row in self.modified: 127 | sample = row[self._sample_col] 128 | seen[sample] += 1 129 | ##row[self._sample_col] = f"{sample}_T{seen[sample]}" 130 | 131 | 132 | def read_head(handle, num_lines=10): 133 | """Read the specified number of lines from the current position in the file.""" 134 | lines = [] 135 | for idx, line in enumerate(handle): 136 | if idx == num_lines: 137 | break 138 | lines.append(line) 139 | return "".join(lines) 140 | 141 | 142 | def sniff_format(handle): 143 | """ 144 | Detect the tabular format. 145 | 146 | Args: 147 | handle (text file): A handle to a `text file`_ object. The read position is 148 | expected to be at the beginning (index 0). 149 | 150 | Returns: 151 | csv.Dialect: The detected tabular format. 152 | 153 | .. _text file: 154 | https://docs.python.org/3/glossary.html#term-text-file 155 | 156 | """ 157 | peek = read_head(handle) 158 | handle.seek(0) 159 | sniffer = csv.Sniffer() 160 | dialect = sniffer.sniff(peek) 161 | return dialect 162 | 163 | 164 | def check_samplesheet(file_in, file_out): 165 | """ 166 | Check that the tabular samplesheet has the structure expected by nf-core pipelines. 167 | 168 | Validate the general shape of the table, expected columns, and each row. Also add 169 | an additional column which records whether one or two FASTQ reads were found. 170 | 171 | Args: 172 | file_in (pathlib.Path): The given tabular samplesheet. The format can be either 173 | CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. 174 | file_out (pathlib.Path): Where the validated and transformed samplesheet should 175 | be created; always in CSV format. 176 | 177 | Example: 178 | This function checks that the samplesheet follows the following structure, 179 | see also the `viral recon samplesheet`_:: 180 | 181 | sample,fastq_1,fastq_2 182 | SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz 183 | SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz 184 | SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, 185 | 186 | .. _viral recon samplesheet: 187 | https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv 188 | 189 | """ 190 | required_columns = {"sample", "fastq_1", "fastq_2"} 191 | # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. 192 | with file_in.open(newline="") as in_handle: 193 | reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) 194 | # Validate the existence of the expected header columns. 195 | if not required_columns.issubset(reader.fieldnames): 196 | req_cols = ", ".join(required_columns) 197 | logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") 198 | sys.exit(1) 199 | # Validate each row. 200 | checker = RowChecker() 201 | for i, row in enumerate(reader): 202 | try: 203 | checker.validate_and_transform(row) 204 | except AssertionError as error: 205 | logger.critical(f"{str(error)} On line {i + 2}.") 206 | sys.exit(1) 207 | checker.validate_unique_samples() 208 | header = list(reader.fieldnames) 209 | header.insert(1, "single_end") 210 | # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. 211 | with file_out.open(mode="w", newline="") as out_handle: 212 | writer = csv.DictWriter(out_handle, header, delimiter=",") 213 | writer.writeheader() 214 | for row in checker.modified: 215 | writer.writerow(row) 216 | 217 | 218 | def parse_args(argv=None): 219 | """Define and immediately parse command line arguments.""" 220 | parser = argparse.ArgumentParser( 221 | description="Validate and transform a tabular samplesheet.", 222 | epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", 223 | ) 224 | parser.add_argument( 225 | "file_in", 226 | metavar="FILE_IN", 227 | type=Path, 228 | help="Tabular input samplesheet in CSV or TSV format.", 229 | ) 230 | parser.add_argument( 231 | "file_out", 232 | metavar="FILE_OUT", 233 | type=Path, 234 | help="Transformed output samplesheet in CSV format.", 235 | ) 236 | parser.add_argument( 237 | "-l", 238 | "--log-level", 239 | help="The desired log level (default WARNING).", 240 | choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), 241 | default="WARNING", 242 | ) 243 | return parser.parse_args(argv) 244 | 245 | 246 | def main(argv=None): 247 | """Coordinate argument parsing and program execution.""" 248 | args = parse_args(argv) 249 | logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") 250 | if not args.file_in.is_file(): 251 | logger.error(f"The given input file {args.file_in} was not found!") 252 | sys.exit(2) 253 | args.file_out.parent.mkdir(parents=True, exist_ok=True) 254 | check_samplesheet(args.file_in, args.file_out) 255 | 256 | 257 | if __name__ == "__main__": 258 | sys.exit(main()) 259 | -------------------------------------------------------------------------------- /bin/cutsite_trimming: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nf-core/hic/fe4ac656317d24c37e81e7940a526ed9ea812f8e/bin/cutsite_trimming -------------------------------------------------------------------------------- /bin/digest_genome.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # HiC-Pro 4 | # Copyleft 2015 Institut Curie 5 | # Author(s): Nelle Varoquaux, Nicolas Servant 6 | # Contact: nicolas.servant@curie.fr 7 | # This software is distributed without any guarantee under the terms of the 8 | # GNU General 9 | # Public License, either Version 2, June 1991 or Version 3, June 2007. 10 | 11 | """ 12 | Script to extract restriction fragment from a fasta file and output a BED file 13 | """ 14 | 15 | import argparse 16 | import re 17 | import os 18 | import sys 19 | import numpy as np 20 | 21 | RE_cutsite = {"mboi": ["^GATC"], "dpnii": ["^GATC"], "bglii": ["A^GATCT"], "hindiii": ["A^AGCTT"]} 22 | 23 | 24 | def find_re_sites(filename, sequences, offset): 25 | with open(filename, "r") as infile: 26 | chr_id = None 27 | big_str = "" 28 | indices = [] 29 | all_indices = [] 30 | contig_names = [] 31 | c = 0 32 | for line in infile: 33 | c += 1 34 | if line.startswith(">"): 35 | print("{}...".format(line.split()[0][1:])) 36 | # If this is not the first chromosome, find the indices and append 37 | # them to the list 38 | if chr_id is not None: 39 | for rs in range(len(sequences)): 40 | pattern = "(?={})".format(sequences[rs].lower()) 41 | indices += [m.start() + offset[rs] for m in re.finditer(pattern, big_str)] 42 | indices.sort() 43 | all_indices.append(indices) 44 | indices = [] 45 | 46 | # This is a new chromosome. Empty the sequence string, and add the 47 | # correct chrom id 48 | big_str = "" 49 | chr_id = line.split()[0][1:] 50 | if chr_id in contig_names: 51 | print("The fasta file contains several instance of {}. Exit.".format(chr_id)) 52 | sys.exit(-1) 53 | contig_names.append(chr_id) 54 | else: 55 | # As long as we don't change chromosomes, continue reading the 56 | # file, and appending the sequences 57 | big_str += line.lower().strip() 58 | # Add the indices for the last chromosome 59 | for rs in range(len(sequences)): 60 | pattern = "(?={})".format(sequences[rs].lower()) 61 | indices += [m.start() + offset[rs] for m in re.finditer(pattern, big_str)] 62 | indices.sort() 63 | all_indices.append(indices) 64 | 65 | return contig_names, all_indices 66 | 67 | 68 | def find_chromsomose_lengths(reference_filename): 69 | chromosome_lengths = [] 70 | chromosome_names = [] 71 | length = None 72 | with open(reference_filename, "r") as infile: 73 | for line in infile: 74 | if line.startswith(">"): 75 | chromosome_names.append(line[1:].strip()) 76 | if length is not None: 77 | chromosome_lengths.append(length) 78 | length = 0 79 | else: 80 | length += len(line.strip()) 81 | chromosome_lengths.append(length) 82 | return chromosome_names, np.array(chromosome_lengths) 83 | 84 | 85 | def replaceN(cs): 86 | npos = int(cs.find("N")) 87 | cseql = [] 88 | if npos != -1: 89 | for nuc in ["A", "C", "G", "T"]: 90 | tmp = cs.replace("N", nuc, 1) 91 | tmpl = replaceN(tmp) 92 | if type(tmpl) == list: 93 | cseql = cseql + tmpl 94 | else: 95 | cseql.append(tmpl) 96 | else: 97 | cseql.append(cs) 98 | return cseql 99 | 100 | 101 | if __name__ == "__main__": 102 | parser = argparse.ArgumentParser() 103 | parser.add_argument("fastafile") 104 | parser.add_argument( 105 | "-r", 106 | "--restriction_sites", 107 | dest="res_sites", 108 | nargs="+", 109 | help=( 110 | "The cutting position has to be specified using " 111 | "'^'. For instance, -r A^AGCTT for HindIII " 112 | "digestion. Several restriction enzyme can be " 113 | "specified." 114 | ), 115 | ) 116 | parser.add_argument("-o", "--out", default=None) 117 | args = parser.parse_args() 118 | 119 | filename = args.fastafile 120 | out = args.out 121 | 122 | # Split restriction sites if comma-separated 123 | cutsites = [] 124 | for s in args.res_sites: 125 | for m in s.split(","): 126 | cutsites.append(m) 127 | 128 | # process args and get restriction enzyme sequences 129 | sequences = [] 130 | offset = [] 131 | for cs in cutsites: 132 | if cs.lower() in RE_cutsite: 133 | cseq = "".join(RE_cutsite[cs.lower()]) 134 | else: 135 | cseq = cs 136 | 137 | offpos = int(cseq.find("^")) 138 | if offpos == -1: 139 | print( 140 | "Unable to detect offset for {}. Please, use '^' to specify the cutting position,\ 141 | i.e A^GATCT for HindIII digestion.".format( 142 | cseq 143 | ) 144 | ) 145 | sys.exit(-1) 146 | 147 | for nuc in list(set(cs)): 148 | if nuc not in ["A", "T", "G", "C", "N", "^"]: 149 | print("Find unexpected character ['{}']in restriction motif".format(nuc)) 150 | print("Note that multiple motifs should be separated by a space (not a comma !)") 151 | 152 | sys.exit(-1) 153 | 154 | offset.append(offpos) 155 | sequences.append(re.sub("\^", "", cseq)) 156 | 157 | # replace all N in restriction motif 158 | sequences_without_N = [] 159 | offset_without_N = [] 160 | for rs in range(len(sequences)): 161 | nrs = replaceN(sequences[rs]) 162 | sequences_without_N = sequences_without_N + nrs 163 | offset_without_N = offset_without_N + [offset[rs]] * len(nrs) 164 | 165 | sequences = sequences_without_N 166 | offset = offset_without_N 167 | 168 | if out is None: 169 | out = os.path.splitext(filename)[0] + "_fragments.bed" 170 | 171 | print("Analyzing", filename) 172 | print("Restriction site(s)", ",".join(sequences)) 173 | print("Offset(s)", ",".join(str(x) for x in offset)) 174 | 175 | # Read fasta file and look for rs per chromosome 176 | contig_names, all_indices = find_re_sites(filename, sequences, offset=offset) 177 | _, lengths = find_chromsomose_lengths(filename) 178 | 179 | valid_fragments = [] 180 | for i, indices in enumerate(all_indices): 181 | valid_fragments_chr = np.concatenate( 182 | [np.concatenate([[0], indices])[:, np.newaxis], np.concatenate([indices, [lengths[i]]])[:, np.newaxis]], 183 | axis=1, 184 | ) 185 | valid_fragments.append(valid_fragments_chr) 186 | 187 | # Write results 188 | print("Writing to {} ...".format(out)) 189 | with open(out, "w") as outfile: 190 | for chrom_name, indices in zip(contig_names, valid_fragments): 191 | frag_id = 0 192 | for begin, end in indices: 193 | # allow to remove cases where the enzyme cut at 194 | # the first position of the chromosome 195 | if end > begin: 196 | frag_id += 1 197 | frag_name = "HIC_{}_{}".format(str(chrom_name), int(frag_id)) 198 | outfile.write( 199 | "{}\t{}\t{}\t{}\t0\t+\n".format(str(chrom_name), int(begin), int(end), str(frag_name)) 200 | ) 201 | -------------------------------------------------------------------------------- /bin/hicpro_merge_validpairs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | ## 5 | ## HiC-Pro 6 | ## Internal function 7 | ## Merge valid interactions files and remove duplicates 8 | ## 9 | 10 | rmDup=0 11 | prefix="" 12 | while getopts ":dp:" opt; do 13 | case "$opt" in 14 | d) rmDup=1 ;; 15 | p) prefix=$OPTARG ;; 16 | esac 17 | done 18 | shift $(( OPTIND - 1 )) 19 | 20 | vpairs="$@" 21 | vpairs_sorted=$(echo $vpairs | sed -e 's/validPairs/sorted.validPairs/g') 22 | 23 | mkdir -p ./tmp/ 24 | 25 | if [[ ${rmDup} == 1 ]]; then 26 | ## Sort individual validPairs files 27 | fcounts=0 28 | for vfile in ${vpairs} 29 | do 30 | echo "Sorting ${vfile} ..." 31 | fcounts=$((fcounts+1)) 32 | ofile=$(echo ${vfile} | sed -e 's/validPairs/sorted.validPairs/') 33 | #sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -o ${ofile} ${vfile} 34 | sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -o ${ofile} ${vfile} 35 | done 36 | 37 | if [[ $fcounts -gt 1 ]] 38 | then 39 | echo "Merging and removing the duplicates ..." 40 | ## Sort valid pairs and remove read pairs with same starts (i.e duplicated read pairs) 41 | #sort -k2,2V -k3,3n -k5,5V -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \ 42 | sort -k2,2 -k5,5 -k3,3n -k6,6n -T ./tmp/ -m ${vpairs_sorted} | \ 43 | awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs 44 | else 45 | echo "Removing the duplicates ..." 46 | cat ${vpairs_sorted} | awk -F"\t" 'BEGIN{c1=0;c2=0;s1=0;s2=0}(c1!=$2 || c2!=$5 || s1!=$3 || s2!=$6){print;c1=$2;c2=$5;s1=$3;s2=$6}' > ${prefix}.allValidPairs 47 | fi 48 | 49 | ## clean 50 | /bin/rm -rf ${vpairs_sorted} 51 | else 52 | cat ${vpairs} > ${prefix}.allValidPairs 53 | fi 54 | 55 | echo -e -n "valid_interaction\t" > ${prefix}_allValidPairs.mergestat 56 | cat ${vpairs} | wc -l >> ${prefix}_allValidPairs.mergestat 57 | echo -e -n "valid_interaction_rmdup\t" >> ${prefix}_allValidPairs.mergestat 58 | cat ${prefix}.allValidPairs | wc -l >> ${prefix}_allValidPairs.mergestat 59 | 60 | ## Count short range (<20000) vs long range contacts 61 | awk 'BEGIN{cis=0;trans=0;sr=0;lr=0} $2 == $5{cis=cis+1; d=$6>$3?$6-$3:$3-$6; if (d<=20000){sr=sr+1}else{lr=lr+1}} $2!=$5{trans=trans+1}END{print "trans_interaction\t"trans"\ncis_interaction\t"cis"\ncis_shortRange\t"sr"\ncis_longRange\t"lr}' ${prefix}.allValidPairs >> ${prefix}_allValidPairs.mergestat 62 | 63 | ## clean 64 | /bin/rm -rf ./tmp/ 65 | -------------------------------------------------------------------------------- /bin/merge_statfiles.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ## nf-core-hic 4 | ## Copyright (c) 2020 Institut Curie 5 | ## Author(s): Nicolas Servant 6 | ## Contact: nicolas.servant@curie.fr 7 | ## This software is distributed without any guarantee under the terms of the BSD-3 licence. 8 | ## See the LICENCE file for details 9 | 10 | """ 11 | Script to merge any files with the same template 12 | """ 13 | 14 | import argparse 15 | import sys 16 | import glob 17 | import os 18 | from collections import OrderedDict 19 | 20 | 21 | def num(s): 22 | try: 23 | return int(s) 24 | except ValueError: 25 | return float(s) 26 | 27 | 28 | if __name__ == "__main__": 29 | ## Read command line arguments 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument("-f", "--files", help="List of input file(s)", type=str, nargs="+") 32 | parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true") 33 | args = parser.parse_args() 34 | 35 | infiles = args.files 36 | li = len(infiles) 37 | 38 | if li > 0: 39 | if args.verbose: 40 | print("## merge_statfiles.py") 41 | print("## Merging " + str(li) + " files") 42 | 43 | ## Reading first file to get the template 44 | template = OrderedDict() 45 | if args.verbose: 46 | print("## Use " + infiles[0] + " as template") 47 | with open(infiles[0]) as f: 48 | for line in f: 49 | if not line.startswith("#"): 50 | lsp = line.strip().split("\t") 51 | data = map(num, lsp[1 : len(lsp)]) 52 | template[str(lsp[0])] = list(data) 53 | 54 | if len(template) == 0: 55 | print("Cannot find template files !") 56 | sys.exit(1) 57 | 58 | ## Int are counts / Float are percentage 59 | for fidx in list(range(1, li)): 60 | with open(infiles[fidx]) as f: 61 | for line in f: 62 | if not line.startswith("#"): 63 | lsp = line.strip().split("\t") 64 | if lsp[0] in template: 65 | for i in list(range(1, len(lsp))): 66 | if isinstance(num(lsp[i]), int): 67 | template[lsp[0]][i - 1] += num(lsp[i]) 68 | else: 69 | template[lsp[0]][i - 1] = round((template[lsp[0]][i - 1] + num(lsp[i])) / 2, 3) 70 | else: 71 | sys.stderr.write( 72 | "Warning : '" + lsp[0] + "' not found in template [" + infiles[fidx] + "]\n" 73 | ) 74 | 75 | ## Print template 76 | for x in template: 77 | sys.stdout.write(x) 78 | for y in template[x]: 79 | sys.stdout.write("\t" + str(y)) 80 | sys.stdout.write("\n") 81 | 82 | else: 83 | print("No files to merge - stop") 84 | sys.exit(1) 85 | -------------------------------------------------------------------------------- /bin/src/cutsite_trimming.cpp: -------------------------------------------------------------------------------- 1 | // HiC-Pro 2 | // Copyright 2015 Institut Curie 3 | // Author(s): Nicolas Servant 4 | // Contact: nicolas.servant@curie.fr 5 | // This software is distributed without any guarantee under the terms of the BSD-3 licence 6 | // g++ -std=c++0x -o cutsite_trimming cutsite_trimming.cpp 7 | //./cutsite_trimming -fastq fastq -cutsite AGCTT 8 | 9 | #include // std::cout 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | static const char* prog; 16 | 17 | static int usage(int ret=1) 18 | { 19 | std::cerr << "usage: " << prog << " --fastq FASTQFILE --cutsite CUTSITE --out OUTFILE [--rmuntrim] \n"; 20 | std::cerr << "usage: " << prog << " --help\n"; 21 | return ret; 22 | } 23 | 24 | static int get_options(int argc, char* argv[], std::string& fastqFile, std::vector& cutSites, std::string& output, bool& rmuntrim) 25 | { 26 | prog = argv[0]; 27 | if (argc == 1){ 28 | exit(usage()); 29 | } 30 | for (int ac = 1; ac < argc; ++ac) { 31 | const char* opt = argv[ac]; 32 | if (*opt == '-') { 33 | if (!strcmp(opt, "--fastq")) { 34 | fastqFile = std::string(argv[++ac]); 35 | } 36 | else if (!strcmp(opt, "--cutsite")) { 37 | std::string cutSitesSequence; 38 | cutSitesSequence = std::string(argv[++ac]); 39 | size_t pos = cutSitesSequence.find(","); 40 | size_t begin = 0; 41 | while(pos != std::string::npos){ 42 | cutSites.push_back(cutSitesSequence.substr(begin, pos - begin)); 43 | begin = pos + 1; 44 | pos = cutSitesSequence.find(",", begin + 1); 45 | } 46 | cutSites.push_back(cutSitesSequence.substr(begin, pos)); 47 | } 48 | else if (!strcmp(opt, "--out")) { 49 | output = std::string(argv[++ac]); 50 | } 51 | else if (!strcmp(opt, "--rmuntrim")) { 52 | rmuntrim = true; 53 | } 54 | }else { 55 | std::cerr << prog << ": unknown option " << opt << std::endl; 56 | return usage(); 57 | } 58 | } 59 | return 0; 60 | } 61 | 62 | static int trim_fastq(std::string& fastqFile, std::vector& cutSites, std::string& outFile, bool& rmuntrim){ 63 | int trim_count=0; 64 | std::string ID; 65 | std::ifstream ifs (fastqFile); 66 | std::ofstream ofs (outFile); 67 | 68 | if (ifs.is_open()){ 69 | while (getline(ifs, ID)) { 70 | std::string seq; 71 | std::string dummy; 72 | std::string qual; 73 | 74 | getline(ifs, seq); 75 | getline(ifs, dummy); 76 | getline(ifs, qual); 77 | 78 | bool find_pos = false; 79 | size_t pos = std::string::npos; 80 | for (std::vector::iterator it = cutSites.begin(); it != cutSites.end(); ++it){ 81 | size_t tmp_pos = seq.find(*it); 82 | if (tmp_pos != std::string::npos) { 83 | // If find_pos is alread True, there is a problem (there are two cut 84 | // sites in the same read).) 85 | if (find_pos == true){ 86 | if(tmp_pos < pos) { 87 | pos = tmp_pos; 88 | } 89 | } else { 90 | find_pos = true; 91 | pos = tmp_pos; 92 | } 93 | } 94 | } 95 | 96 | if (pos != std::string::npos) { 97 | trim_count++; 98 | ofs << ID << '\n'; 99 | ofs << seq.substr(0, pos) << '\n'; 100 | ofs << "+\n"; 101 | ofs << qual.substr(0, pos) << '\n'; 102 | } else { 103 | if (!rmuntrim){ 104 | ofs << ID << '\n'; 105 | ofs << seq << '\n'; 106 | ofs << "+\n"; 107 | ofs << qual << '\n'; 108 | } 109 | } 110 | find_pos = false; 111 | } 112 | }else{ 113 | std::cerr << "Error : Cannot open file : " << fastqFile; 114 | } 115 | return trim_count; 116 | } 117 | 118 | int main(int argc, char* argv[]) 119 | { 120 | std::string fastqFile; 121 | std::vector cutSites; 122 | std::string outFile; 123 | bool rmuntrim = false; 124 | 125 | int ret = get_options(argc, argv, fastqFile, cutSites, outFile, rmuntrim); 126 | printf("##Fastq file: %s\n", fastqFile.c_str()); 127 | printf("##Restriction sites:\n"); 128 | for(std::vector::iterator it = cutSites.begin(); it != cutSites.end(); ++it){ 129 | std::cout << *it << std::endl; 130 | } 131 | printf("##Output File: %s\n", outFile.c_str()); 132 | 133 | if (fastqFile.empty() || cutSites.size() == 0 || outFile.empty()){ 134 | usage(); 135 | exit(ret); 136 | } 137 | 138 | int trim_count=trim_fastq(fastqFile, cutSites, outFile, rmuntrim); 139 | printf("\n##Trimmed reads: %d\n", trim_count); 140 | return(0); 141 | } 142 | 143 | 144 | -------------------------------------------------------------------------------- /conf/base.config: -------------------------------------------------------------------------------- 1 | /* 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | nf-core/hic Nextflow base config file 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | A 'blank slate' config file, appropriate for general use on most high performance 6 | compute environments. Assumes that all software is installed and available on 7 | the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. 8 | ---------------------------------------------------------------------------------------- 9 | */ 10 | 11 | process { 12 | 13 | cpus = { check_max( 1 * task.attempt, 'cpus' ) } 14 | memory = { check_max( 8.GB * task.attempt, 'memory' ) } 15 | time = { check_max( 12.h * task.attempt, 'time' ) } 16 | 17 | errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } 18 | maxRetries = 1 19 | maxErrors = '-1' 20 | 21 | // Process-specific resource requirements 22 | // NOTE - Please try and re-use the labels below as much as possible. 23 | // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. 24 | // If possible, it would be nice to keep the same label naming convention when 25 | // adding in your local modules too. 26 | // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors 27 | withLabel:process_single { 28 | cpus = { check_max( 1 , 'cpus' ) } 29 | memory = { check_max( 6.GB * task.attempt, 'memory' ) } 30 | time = { check_max( 4.h * task.attempt, 'time' ) } 31 | } 32 | withLabel:process_low { 33 | cpus = { check_max( 2 * task.attempt, 'cpus' ) } 34 | memory = { check_max( 4.GB * task.attempt, 'memory' ) } 35 | time = { check_max( 4.h * task.attempt, 'time' ) } 36 | } 37 | withLabel:process_medium { 38 | cpus = { check_max( 6 * task.attempt, 'cpus' ) } 39 | memory = { check_max( 8.GB * task.attempt, 'memory' ) } 40 | time = { check_max( 8.h * task.attempt, 'time' ) } 41 | } 42 | withLabel:process_high { 43 | cpus = { check_max( 12 * task.attempt, 'cpus' ) } 44 | memory = { check_max( 64.GB * task.attempt, 'memory' ) } 45 | time = { check_max( 16.h * task.attempt, 'time' ) } 46 | } 47 | withLabel:process_long { 48 | time = { check_max( 20.h * task.attempt, 'time' ) } 49 | } 50 | withLabel:process_high_memory { 51 | memory = { check_max( 24.GB * task.attempt, 'memory' ) } 52 | } 53 | withLabel:error_ignore { 54 | errorStrategy = 'ignore' 55 | } 56 | withLabel:error_retry { 57 | errorStrategy = 'retry' 58 | maxRetries = 2 59 | } 60 | withName:CUSTOM_DUMPSOFTWAREVERSIONS { 61 | cache = false 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /conf/modules.config: -------------------------------------------------------------------------------- 1 | process { 2 | 3 | //Default 4 | publishDir = [ 5 | path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, 6 | mode: 'copy', 7 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename } 8 | ] 9 | 10 | withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { 11 | publishDir = [ 12 | path: { "${params.outdir}/pipeline_info" }, 13 | mode: 'copy', 14 | pattern: '*_versions.yml' 15 | ] 16 | } 17 | 18 | //********************************************** 19 | // PREPARE_GENOME 20 | withName: 'BOWTIE2_BUILD' { 21 | publishDir = [ 22 | path: { "${params.outdir}/genome/bowtie2" }, 23 | mode: 'copy', 24 | enabled: params.save_reference 25 | ] 26 | } 27 | 28 | withName: 'CUSTOM_GETCHROMSIZES' { 29 | publishDir = [ 30 | path: { "${params.outdir}/genome" }, 31 | mode: 'copy', 32 | enabled: params.save_reference 33 | ] 34 | } 35 | 36 | withName: 'GET_RESTRICTION_FRAGMENTS' { 37 | publishDir = [ 38 | path: { "${params.outdir}/genome" }, 39 | mode: 'copy', 40 | enabled: params.save_reference 41 | ] 42 | } 43 | 44 | //******************************************* 45 | // HICPRO 46 | withName: 'BOWTIE2_ALIGN' { 47 | publishDir = [ 48 | path: { "${params.outdir}/hicpro/mapping" }, 49 | mode: 'copy', 50 | enabled: params.save_aligned_intermediates 51 | ] 52 | ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } 53 | ext.args = params.bwt2_opts_end2end ?: '' 54 | ext.args2 = !params.dnase ? "-F 4" :"" 55 | } 56 | 57 | withName: 'TRIM_READS' { 58 | publishDir = [ 59 | path: { "${params.outdir}/hicpro/mapping/" }, 60 | mode: 'copy', 61 | enabled: params.save_aligned_intermediates 62 | ] 63 | } 64 | 65 | withName: 'BOWTIE2_ALIGN_TRIMMED' { 66 | publishDir = [ 67 | path: { "${params.outdir}/hicpro/mapping" }, 68 | mode: 'copy', 69 | enabled: params.save_aligned_intermediates 70 | ] 71 | ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_trimmed" } 72 | ext.args = params.bwt2_opts_trimmed ?: '' 73 | ext.args2 = "" 74 | } 75 | 76 | withName: 'MERGE_BOWTIE2' { 77 | publishDir = [ 78 | path: { "${params.outdir}/hicpro/mapping" }, 79 | mode: 'copy', 80 | enabled: params.save_aligned_intermediates 81 | ] 82 | ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } 83 | } 84 | 85 | withName: 'COMBINE_MATES' { 86 | publishDir = [ 87 | path: { "${params.outdir}/hicpro/mapping" }, 88 | mode: 'copy', 89 | pattern: '*.bam' 90 | ] 91 | ext.args = [ 92 | "-t", 93 | params.keep_multi ? "--multi" : "", 94 | params.min_mapq ? "-q ${params.min_mapq}" : "" 95 | ].join(' ').trim() 96 | ext.prefix = { "${meta.id}_${meta.chunk}" } 97 | } 98 | 99 | withName: 'GET_VALID_INTERACTION' { 100 | publishDir = [ 101 | path: { "${params.outdir}/hicpro/valid_pairs" }, 102 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename}, 103 | mode: 'copy', 104 | enabled: params.save_pairs_intermediates 105 | ] 106 | ext.args = { [ 107 | params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : '', 108 | params.min_insert_size > 0 ? " -s ${params.min_insert_size}" : '', 109 | params.max_insert_size > 0 ? " -l ${params.max_insert_size}" : '', 110 | params.min_restriction_fragment_size > 0 ? " -t ${params.min_restriction_fragment_size}" : '', 111 | params.max_restriction_fragment_size > 0 ? " -m ${params.max_restriction_fragment_size}" : '', 112 | params.save_interaction_bam ? " --sam" : '' 113 | ].join(' ').trim() } 114 | } 115 | 116 | withName: 'GET_VALID_INTERACTION_DNASE' { 117 | publishDir = [ 118 | path: { "${params.outdir}/hicpro/valid_pairs" }, 119 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 120 | mode: 'copy', 121 | enabled: params.save_pairs_intermediates 122 | ] 123 | ext.args = { params.min_cis_dist > 0 ? " -d ${params.min_cis_dist}" : "" } 124 | } 125 | 126 | withName: 'MERGE_VALID_INTERACTION' { 127 | publishDir = [ 128 | [ 129 | path: { "${params.outdir}/hicpro/stats/${meta.id}" }, 130 | mode: 'copy', 131 | pattern: "*stat" 132 | ], 133 | [ 134 | path: { "${params.outdir}/hicpro/valid_pairs" }, 135 | mode: 'copy', 136 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 137 | pattern: "*Pairs" 138 | ] 139 | ] 140 | ext.args = { params.keep_dups ? '' : '-d' } 141 | } 142 | 143 | withName: 'MERGE_STATS' { 144 | publishDir = [ 145 | path: { "${params.outdir}/hicpro/stats/${meta.id}" }, 146 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 147 | mode: 'copy', 148 | pattern: "*stat" 149 | ] 150 | } 151 | 152 | withName: 'HICPRO2PAIRS' { 153 | publishDir = [ 154 | path: { "${params.outdir}/hicpro/valid_pairs/pairix/" }, 155 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 156 | mode: 'copy' 157 | ] 158 | } 159 | 160 | withName: 'BUILD_CONTACT_MAPS' { 161 | publishDir = [ 162 | path: { "${params.outdir}/hicpro/matrix/raw" }, 163 | mode: 'copy', 164 | enabled: params.hicpro_maps 165 | ] 166 | ext.prefix = { "${meta.id}.${resolution}" } 167 | } 168 | 169 | withName: 'ICE_NORMALIZATION' { 170 | publishDir = [ 171 | path: { "${params.outdir}/hicpro/matrix/iced" }, 172 | mode: 'copy', 173 | enabled: params.hicpro_maps 174 | ] 175 | } 176 | 177 | //***************************************** 178 | // QUALITY METRICS 179 | 180 | withName: 'HIC_PLOT_DIST_VS_COUNTS'{ 181 | publishDir = [ 182 | path: { "${params.outdir}/distance_decay/" }, 183 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 184 | mode: 'copy' 185 | ] 186 | } 187 | 188 | //***************************************** 189 | // COOLER 190 | 191 | withName: 'COOLER_MAKEBINS' { 192 | publishDir = [ 193 | path: { "${params.outdir}/contact_maps/bins/" }, 194 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 195 | mode: 'copy' 196 | ] 197 | ext.prefix={ "cooler_bins_${cool_bin}" } 198 | } 199 | 200 | withName: 'COOLER_CLOAD' { 201 | publishDir = [ 202 | path: { "${params.outdir}/contact_maps/cool/" }, 203 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 204 | mode: 'copy', 205 | enabled : params.save_raw_maps 206 | ] 207 | ext.prefix = { "${meta.id}.${cool_bin}" } 208 | ext.args = "pairs -c1 2 -p1 3 -c2 4 -p2 5" 209 | } 210 | 211 | withName: 'COOLER_BALANCE' { 212 | publishDir = [ 213 | path: { "${params.outdir}/contact_maps/cool/" }, 214 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 215 | mode: 'copy' 216 | ] 217 | ext.args = '--force' 218 | ext.prefix = { "${cool.baseName}_balanced" } 219 | } 220 | 221 | withName: 'COOLER_DUMP' { 222 | publishDir = [ 223 | enabled: false 224 | ] 225 | ext.prefix = { "${cool.baseName}" } 226 | ext.args = "--one-based-ids --balanced --na-rep 0" 227 | } 228 | 229 | withName:'SPLIT_COOLER_DUMP' { 230 | publishDir = [ 231 | [ 232 | path: { "${params.outdir}/contact_maps/txt/" }, 233 | mode: 'copy', 234 | pattern: "*_raw.txt", 235 | enabled: params.save_raw_maps 236 | ], 237 | [ 238 | path: { "${params.outdir}/contact_maps/txt/" }, 239 | mode: 'copy', 240 | pattern: "*_balanced.txt" 241 | ] 242 | ] 243 | } 244 | 245 | withName: 'COOLER_ZOOMIFY' { 246 | publishDir = [ 247 | path: { "${params.outdir}/contact_maps/cool/" }, 248 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 249 | mode: 'copy' 250 | ] 251 | ext.args = "--balance" 252 | } 253 | 254 | //******************************** 255 | // COMPARTMENTS 256 | 257 | withName: 'COOLTOOLS_EIGSCIS' { 258 | publishDir = [ 259 | path: { "${params.outdir}/compartments/" }, 260 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 261 | mode: 'copy' 262 | ] 263 | ext.args = '--bigwig' 264 | ext.prefix = { "${meta.id}.${resolution}" } 265 | } 266 | 267 | //******************************** 268 | // TADS 269 | 270 | withName: 'COOLTOOLS_INSULATION' { 271 | publishDir = [ 272 | path: { "${params.outdir}/tads/insulation/" }, 273 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 274 | mode: 'copy' 275 | ] 276 | ext.args = '15 25 50 --window-pixels' 277 | ext.prefix = { "${cool.baseName}" } 278 | } 279 | 280 | withName: 'HIC_FIND_TADS' { 281 | publishDir = [ 282 | path: { "${params.outdir}/tads/hicExplorer" }, 283 | saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, 284 | mode: 'copy' 285 | ] 286 | ext.args = '--correctForMultipleTesting fdr' 287 | ext.prefix = { "${cool.baseName}" } 288 | } 289 | } 290 | -------------------------------------------------------------------------------- /conf/public_aws_ecr.config: -------------------------------------------------------------------------------- 1 | /* 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | AWS ECR Config 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | Config to set public AWS ECR images wherever possible 6 | This improves speed when running on AWS infrastructure. 7 | Use this as an example template when using your own private registry. 8 | ---------------------------------------------------------------------------------------- 9 | */ 10 | 11 | docker.registry = 'public.ecr.aws' 12 | podman.registry = 'public.ecr.aws' 13 | 14 | process { 15 | withName: 'BOWTIE2_ALIGN' { 16 | container = 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' 17 | } 18 | withName: 'BOWTIE2_ALIGN_TRIMMED' { 19 | container = 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' 20 | } 21 | withName: 'BUILD_CONTACT_MAPS' { 22 | container = 'quay.io/nf-core/ubuntu:20.04' 23 | } 24 | withName: 'COMBINE_MATES' { 25 | container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' 26 | } 27 | withName: 'COOLTOOLS_EIGSCIS' { 28 | container = 'quay.io/biocontainers/mulled-v2-c81d8d6b6acf4714ffaae1a274527a41958443f6:cc7ea58b8cefc76bed985dcfe261cb276ed9e0cf-0' 29 | } 30 | withName: 'GET_RESTRICTION_FRAGMENTS' { 31 | container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' 32 | } 33 | withName: 'GET_VALID_INTERACTION' { 34 | container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' 35 | } 36 | withName: 'GET_VALID_INTERACTION_DNASE' { 37 | container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' 38 | } 39 | withName: 'ICE_NORMALIZATION' { 40 | container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' 41 | } 42 | withName: 'MERGE_STATS' { 43 | container = 'quay.io/biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' 44 | } 45 | withName: 'MERGE_VALID_INTERACTION' { 46 | container = 'quay.io/nf-core/ubuntu:20.04' 47 | } 48 | withName: 'SAMPLESHEET_CHECK' { 49 | container = 'quay.io/biocontainers/python:3.8.3' 50 | } 51 | withName: 'SPLIT_COOLER_DUMP' { 52 | container = 'quay.io/nf-core/ubuntu:20.04' 53 | } 54 | withName: 'TRIM_READS' { 55 | container = 'quay.io/nf-core/ubuntu:20.04' 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /conf/test.config: -------------------------------------------------------------------------------- 1 | /* 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | Nextflow config file for running minimal tests 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | Defines input files and everything required to run a fast and simple pipeline test. 6 | 7 | Use as follows: 8 | nextflow run nf-core/hic -profile test, --outdir 9 | 10 | ---------------------------------------------------------------------------------------- 11 | */ 12 | 13 | params { 14 | config_profile_name = 'Hi-C test data from Schalbetter et al. (2017)' 15 | config_profile_description = 'Minimal test dataset to check pipeline function' 16 | 17 | // Limit resources so that this can run on Travis 18 | max_cpus = 2 19 | max_memory = 4.GB 20 | max_time = 1.h 21 | 22 | // Input data 23 | input = "${baseDir}/assets/samplesheet.csv" 24 | 25 | // Annotations 26 | fasta = 'https://github.com/nf-core/test-datasets/raw/hic/reference/W303_SGD_2015_JRIU00000000.fsa' 27 | digestion = 'hindiii' 28 | min_mapq = 10 29 | min_restriction_fragment_size = 100 30 | max_restriction_fragment_size = 100000 31 | min_insert_size = 100 32 | max_insert_size = 600 33 | 34 | bin_size = '2000,1000' 35 | res_dist_decay = '1000' 36 | res_tads = '1000' 37 | tads_caller = 'insulation,hicexplorer' 38 | res_compartments = '2000' 39 | 40 | // Ignore `--input` as otherwise the parameter validation will throw an error 41 | schema_ignore_params = 'genomes,digest,input_paths,input' 42 | } 43 | -------------------------------------------------------------------------------- /conf/test_full.config: -------------------------------------------------------------------------------- 1 | /* 2 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 | Nextflow config file for running full-size tests 4 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 5 | Defines input files and everything required to run a full size pipeline test. 6 | 7 | Use as follows: 8 | nextflow run nf-core/hic -profile test_full, --outdir 9 | ---------------------------------------------------------------------------------------- 10 | */ 11 | 12 | params { 13 | config_profile_name = 'Full test profile' 14 | config_profile_description = 'Full test dataset to check pipeline function' 15 | 16 | // Input data for full size test 17 | input = 'https://raw.githubusercontent.com/nf-core/test-datasets/hic/samplesheet/samplesheet_HiC_mESC_full_test.csv' 18 | 19 | // Genome references 20 | genome = 'mm10' 21 | 22 | // Other options 23 | digestion = 'dpnii' 24 | bin_size = '40000,250000,500000,1000000' 25 | res_compartments = '500000,250000' 26 | res_tads = '40000,20000' 27 | } 28 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # nf-core/hic: Documentation 2 | 3 | The nf-core/hic documentation is split into the following pages: 4 | 5 | - [Usage](usage.md) 6 | - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. 7 | - [Output](output.md) 8 | - An overview of the different results produced by the pipeline and how to interpret them. 9 | 10 | You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) 11 | -------------------------------------------------------------------------------- /docs/images/mqc_fastqc_adapter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nf-core/hic/fe4ac656317d24c37e81e7940a526ed9ea812f8e/docs/images/mqc_fastqc_adapter.png -------------------------------------------------------------------------------- /docs/images/mqc_fastqc_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nf-core/hic/fe4ac656317d24c37e81e7940a526ed9ea812f8e/docs/images/mqc_fastqc_counts.png -------------------------------------------------------------------------------- /docs/images/mqc_fastqc_quality.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nf-core/hic/fe4ac656317d24c37e81e7940a526ed9ea812f8e/docs/images/mqc_fastqc_quality.png -------------------------------------------------------------------------------- /docs/images/nf-core-hic_logo_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nf-core/hic/fe4ac656317d24c37e81e7940a526ed9ea812f8e/docs/images/nf-core-hic_logo_dark.png -------------------------------------------------------------------------------- /docs/images/nf-core-hic_logo_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nf-core/hic/fe4ac656317d24c37e81e7940a526ed9ea812f8e/docs/images/nf-core-hic_logo_light.png -------------------------------------------------------------------------------- /docs/images/nfcore-hic_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nf-core/hic/fe4ac656317d24c37e81e7940a526ed9ea812f8e/docs/images/nfcore-hic_logo.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | # You can use this file to create a conda environment for this pipeline: 2 | # conda env create -f environment.yml 3 | name: nf-core-hic-2.0.0 4 | channels: 5 | - conda-forge 6 | - bioconda 7 | - defaults 8 | dependencies: 9 | - conda-forge::python=3.9.12=h9a8a25e_1_cpython 10 | - pip=22.0.4=pyhd8ed1ab_0 11 | - conda-forge::tbb=2020.2=hc9558a2_0 12 | - conda-forge::scipy=1.8.0=py39hee8e79c_1 13 | - conda-forge::numpy=1.22.3=py39hc58783e_2 14 | - bioconda::iced=0.5.10=py39h919a90d_1 15 | - bioconda::bx-python=0.8.13=py39h6471ffd_1 16 | - bioconda::pysam=0.19.0=py39h5030a8b_0 17 | - conda-forge::pymdown-extensions=7.1=pyh9f0ad1d_0 18 | - bioconda::cooler=0.8.11=pyh5e36f6f_1 19 | - bioconda::cooltools=0.5.1=py39h5371cbf_1 20 | - bioconda::bowtie2=2.4.5=py39hd2f7db1_2 21 | - bioconda::samtools=1.15.1=h1170115_0 22 | - bioconda::multiqc=1.12=pyhdfd78af_0 23 | - bioconda::fastqc=0.11.9=hdfd78af_1 24 | 25 | ## Dev tools 26 | - bioconda::hicexplorer=3.7.2=pyhdfd78af_1 27 | - bioconda::bioconductor-hitc=1.38.0=r41hdfd78af_0 28 | - conda-forge::r-optparse=1.7.1=r41hc72bb7e_0 29 | - bioconda::ucsc-bedgraphtobigwig=377=ha8a8165_3 30 | - conda-forge::cython=0.29.28=py39h5a03fae_2 31 | - pip: 32 | - fanc==0.9.23 33 | -------------------------------------------------------------------------------- /lib/Utils.groovy: -------------------------------------------------------------------------------- 1 | // 2 | // This file holds several Groovy functions that could be useful for any Nextflow pipeline 3 | // 4 | 5 | import org.yaml.snakeyaml.Yaml 6 | 7 | class Utils { 8 | 9 | // 10 | // When running with -profile conda, warn if channels have not been set-up appropriately 11 | // 12 | public static void checkCondaChannels(log) { 13 | Yaml parser = new Yaml() 14 | def channels = [] 15 | try { 16 | def config = parser.load("conda config --show channels".execute().text) 17 | channels = config.channels 18 | } catch(NullPointerException | IOException e) { 19 | log.warn "Could not verify conda channel configuration." 20 | return 21 | } 22 | 23 | // Check that all channels are present 24 | // This channel list is ordered by required channel priority. 25 | def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] 26 | def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean 27 | 28 | // Check that they are in the right order 29 | def channel_priority_violation = false 30 | def n = required_channels_in_order.size() 31 | for (int i = 0; i < n - 1; i++) { 32 | channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) 33 | } 34 | 35 | if (channels_missing | channel_priority_violation) { 36 | log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + 37 | " There is a problem with your Conda configuration!\n\n" + 38 | " You will need to set-up the conda-forge and bioconda channels correctly.\n" + 39 | " Please refer to https://bioconda.github.io/\n" + 40 | " The observed channel order is \n" + 41 | " ${channels}\n" + 42 | " but the following channel order is required:\n" + 43 | " ${required_channels_in_order}\n" + 44 | "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /lib/WorkflowHic.groovy: -------------------------------------------------------------------------------- 1 | // 2 | // This file holds several functions specific to the workflow/hic.nf in the nf-core/hic pipeline 3 | // 4 | 5 | import nextflow.Nextflow 6 | import groovy.text.SimpleTemplateEngine 7 | 8 | class WorkflowHic { 9 | 10 | // 11 | // Check and validate parameters 12 | // 13 | public static void initialise(params, log) { 14 | genomeExistsError(params, log) 15 | 16 | // digestion parameters 17 | if (params.digest && params.digestion && !params.digest.containsKey(params.digestion)) { 18 | Nextflow.error "Unknown digestion protocol. Currently, the available digestion options are ${params.digest.keySet().join(", ")}. Please set manually the '--restriction_site' and '--ligation_site' parameters." 19 | } 20 | 21 | // Check Digestion or DNase Hi-C mode 22 | //if (!params.dnase && !params.ligation_site) { 23 | // Nextflow.error "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase Hi-C, please use '--dnase' option" 24 | //} 25 | 26 | } 27 | 28 | // 29 | // Get workflow summary for MultiQC 30 | // 31 | public static String paramsSummaryMultiqc(workflow, summary) { 32 | String summary_section = '' 33 | for (group in summary.keySet()) { 34 | def group_params = summary.get(group) // This gets the parameters of that particular group 35 | if (group_params) { 36 | summary_section += "

$group

\n" 37 | summary_section += "
\n" 38 | for (param in group_params.keySet()) { 39 | summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" 40 | } 41 | summary_section += "
\n" 42 | } 43 | } 44 | 45 | String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" 46 | yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" 47 | yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" 48 | yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" 49 | yaml_file_text += "plot_type: 'html'\n" 50 | yaml_file_text += "data: |\n" 51 | yaml_file_text += "${summary_section}" 52 | return yaml_file_text 53 | } 54 | 55 | public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { 56 | // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file 57 | def meta = [:] 58 | meta.workflow = run_workflow.toMap() 59 | meta["manifest_map"] = run_workflow.manifest.toMap() 60 | 61 | meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" 62 | meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " 63 | 64 | def methods_text = mqc_methods_yaml.text 65 | 66 | def engine = new SimpleTemplateEngine() 67 | def description_html = engine.createTemplate(methods_text).make(meta) 68 | 69 | return description_html 70 | } 71 | 72 | // 73 | // Exit pipeline if incorrect --genome key provided 74 | // 75 | private static void genomeExistsError(params, log) { 76 | if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { 77 | def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + 78 | " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + 79 | " Currently, the available genome keys are:\n" + 80 | " ${params.genomes.keySet().join(", ")}\n" + 81 | "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" 82 | Nextflow.error(error_string) 83 | } 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /lib/WorkflowMain.groovy: -------------------------------------------------------------------------------- 1 | // 2 | // This file holds several functions specific to the main.nf workflow in the nf-core/hic pipeline 3 | // 4 | 5 | import nextflow.Nextflow 6 | 7 | class WorkflowMain { 8 | 9 | // 10 | // Citation string for pipeline 11 | // 12 | public static String citation(workflow) { 13 | return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + 14 | "* The pipeline\n" + 15 | " https://doi.org/10.5281/zenodo.2669513\n\n" + 16 | "* The nf-core framework\n" + 17 | " https://doi.org/10.1038/s41587-020-0439-x\n\n" + 18 | "* Software dependencies\n" + 19 | " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" 20 | } 21 | 22 | // 23 | // Generate help string 24 | // 25 | public static String help(workflow, params) { 26 | def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" 27 | def help_string = '' 28 | help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) 29 | help_string += NfcoreSchema.paramsHelp(workflow, params, command) 30 | help_string += '\n' + citation(workflow) + '\n' 31 | help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) 32 | return help_string 33 | } 34 | 35 | // 36 | // Generate parameter summary log string 37 | // 38 | public static String paramsSummaryLog(workflow, params) { 39 | def summary_log = '' 40 | summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) 41 | summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) 42 | summary_log += '\n' + citation(workflow) + '\n' 43 | summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) 44 | return summary_log 45 | } 46 | 47 | // 48 | // Validate parameters and print summary to screen 49 | // 50 | public static void initialise(workflow, params, log) { 51 | // Print help to screen if required 52 | if (params.help) { 53 | log.info help(workflow, params) 54 | System.exit(0) 55 | } 56 | 57 | // Print workflow version and exit on --version 58 | if (params.version) { 59 | String workflow_version = NfcoreTemplate.version(workflow) 60 | log.info "${workflow.manifest.name} ${workflow_version}" 61 | System.exit(0) 62 | } 63 | 64 | // Print parameter summary log to screen 65 | log.info paramsSummaryLog(workflow, params) 66 | 67 | // Validate workflow parameters via the JSON schema 68 | if (params.validate_params) { 69 | NfcoreSchema.validateParameters(workflow, params, log) 70 | } 71 | 72 | // Check that a -profile or Nextflow config has been provided to run the pipeline 73 | NfcoreTemplate.checkConfigProvided(workflow, log) 74 | 75 | // Check that conda channels are set-up correctly 76 | if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { 77 | Utils.checkCondaChannels(log) 78 | } 79 | 80 | // Check AWS batch settings 81 | NfcoreTemplate.awsBatch(workflow, params) 82 | 83 | // Check input has been provided 84 | if (!params.input) { 85 | Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") 86 | } 87 | } 88 | // 89 | // Get attribute from genome config file e.g. fasta 90 | // 91 | public static Object getGenomeAttribute(params, attribute) { 92 | if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { 93 | if (params.genomes[ params.genome ].containsKey(attribute)) { 94 | return params.genomes[ params.genome ][ attribute ] 95 | } 96 | } 97 | return null 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /lib/nfcore_external_java_deps.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nf-core/hic/fe4ac656317d24c37e81e7940a526ed9ea812f8e/lib/nfcore_external_java_deps.jar -------------------------------------------------------------------------------- /main.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | /* 3 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | nf-core/hic 5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6 | Github : https://github.com/nf-core/hic 7 | Website: https://nf-co.re/hic 8 | Slack : https://nfcore.slack.com/channels/hic 9 | ---------------------------------------------------------------------------------------- 10 | */ 11 | 12 | nextflow.enable.dsl = 2 13 | 14 | /* 15 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 16 | GENOME PARAMETER VALUES 17 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 18 | */ 19 | 20 | params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') 21 | params.bwt2_index = WorkflowMain.getGenomeAttribute(params, 'bowtie2') 22 | 23 | /* 24 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 25 | VALIDATE & PRINT PARAMETER SUMMARY 26 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 27 | */ 28 | 29 | WorkflowMain.initialise(workflow, params, log) 30 | 31 | /* 32 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 33 | NAMED WORKFLOW FOR PIPELINE 34 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 35 | */ 36 | 37 | include { HIC } from './workflows/hic' 38 | 39 | // 40 | // WORKFLOW: Run main nf-core/hic analysis pipeline 41 | // 42 | workflow NFCORE_HIC { 43 | HIC () 44 | } 45 | 46 | /* 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 48 | RUN ALL WORKFLOWS 49 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 50 | */ 51 | 52 | // 53 | // WORKFLOW: Execute a single named workflow for the pipeline 54 | // See: https://github.com/nf-core/rnaseq/issues/619 55 | // 56 | workflow { 57 | NFCORE_HIC () 58 | } 59 | 60 | /* 61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 62 | THE END 63 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 64 | */ 65 | -------------------------------------------------------------------------------- /modules.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nf-core/hic", 3 | "homePage": "https://github.com/nf-core/hic", 4 | "repos": { 5 | "https://github.com/nf-core/modules.git": { 6 | "modules": { 7 | "nf-core": { 8 | "bowtie2/align": { 9 | "branch": "master", 10 | "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", 11 | "installed_by": ["modules"] 12 | }, 13 | "bowtie2/build": { 14 | "branch": "master", 15 | "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", 16 | "installed_by": ["modules"] 17 | }, 18 | "cooler/balance": { 19 | "branch": "master", 20 | "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", 21 | "installed_by": ["modules"] 22 | }, 23 | "cooler/cload": { 24 | "branch": "master", 25 | "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", 26 | "installed_by": ["modules"] 27 | }, 28 | "cooler/dump": { 29 | "branch": "master", 30 | "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", 31 | "installed_by": ["modules"] 32 | }, 33 | "cooler/makebins": { 34 | "branch": "master", 35 | "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", 36 | "installed_by": ["modules"] 37 | }, 38 | "cooler/zoomify": { 39 | "branch": "master", 40 | "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", 41 | "installed_by": ["modules"] 42 | }, 43 | "custom/dumpsoftwareversions": { 44 | "branch": "master", 45 | "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", 46 | "installed_by": ["modules"] 47 | }, 48 | "custom/getchromsizes": { 49 | "branch": "master", 50 | "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", 51 | "installed_by": ["modules"] 52 | }, 53 | "fastqc": { 54 | "branch": "master", 55 | "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", 56 | "installed_by": ["modules"] 57 | } 58 | } 59 | }, 60 | "subworkflows": { 61 | "nf-core": {} 62 | } 63 | } 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /modules/local/cooltools/eigscis.nf: -------------------------------------------------------------------------------- 1 | /* 2 | * cooltools - call_compartments 3 | */ 4 | 5 | process COOLTOOLS_EIGSCIS { 6 | tag "${meta.id}" 7 | label 'process_medium' 8 | 9 | conda "bioconda::cooltools=0.5.1 bioconda::ucsc-bedgraphtobigwig=377" 10 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 11 | 'https://depot.galaxyproject.org/singularity/mulled-v2-c81d8d6b6acf4714ffaae1a274527a41958443f6:cc7ea58b8cefc76bed985dcfe261cb276ed9e0cf-0' : 12 | 'biocontainers/mulled-v2-c81d8d6b6acf4714ffaae1a274527a41958443f6:cc7ea58b8cefc76bed985dcfe261cb276ed9e0cf-0' }" 13 | 14 | input: 15 | tuple val(meta), path(cool), val(resolution) 16 | path(fasta) 17 | path(chrsize) 18 | 19 | output: 20 | path("*compartments*"), emit: results 21 | path("versions.yml"), emit: versions 22 | 23 | script: 24 | def args = task.ext.args ?: '' 25 | def prefix = task.ext.prefix ?: "${meta.id}" 26 | """ 27 | cooltools genome binnify --all-names ${chrsize} ${resolution} > genome_bins.txt 28 | cooltools genome gc genome_bins.txt ${fasta} > genome_gc.txt 29 | cooltools eigs-cis ${args} -o ${prefix}_compartments ${cool} 30 | 31 | cat <<-END_VERSIONS > versions.yml 32 | "${task.process}": 33 | cooltools: \$(cooltools --version 2>&1 | grep version | sed 's/cooltools, version //') 34 | END_VERSIONS 35 | """ 36 | } 37 | -------------------------------------------------------------------------------- /modules/local/cooltools/insulation.nf: -------------------------------------------------------------------------------- 1 | /* 2 | * Cooltools - diamond-insulation 3 | */ 4 | 5 | process COOLTOOLS_INSULATION { 6 | tag "${meta.id}" 7 | label 'process_medium' 8 | 9 | conda "bioconda::cooltools=0.5.1" 10 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 11 | 'https://depot.galaxyproject.org/singularity/cooltools:0.5.1--py37h37892f8_0' : 12 | 'biocontainers/cooltools:0.5.1--py37h37892f8_0' }" 13 | 14 | input: 15 | tuple val(meta), path(cool) 16 | 17 | output: 18 | path("*tsv"), emit:tsv 19 | path("versions.yml"), emit:versions 20 | 21 | script: 22 | def args = task.ext.args ?: '' 23 | def prefix = task.ext.prefix ?: "${meta.id}" 24 | """ 25 | cooltools insulation ${cool} ${args} > ${prefix}_insulation.tsv 26 | 27 | cat <<-END_VERSIONS > versions.yml 28 | "${task.process}": 29 | cooltools: \$(cooltools --version 2>&1 | sed 's/cooltools, version //') 30 | END_VERSIONS 31 | """ 32 | } 33 | -------------------------------------------------------------------------------- /modules/local/hicexplorer/hicFindTADs.nf: -------------------------------------------------------------------------------- 1 | /* 2 | * hicexplorer - hicFindTADs 3 | */ 4 | 5 | process HIC_FIND_TADS { 6 | label 'process_medium' 7 | 8 | conda "bioconda::hicexplorer=3.7.2" 9 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 10 | 'https://depot.galaxyproject.org/singularity/hicexplorer:3.7.2--pyhdfd78af_1' : 11 | 'biocontainers/hicexplorer:3.7.2--pyhdfd78af_1' }" 12 | 13 | input: 14 | tuple val(meta), path(cool) 15 | 16 | output: 17 | path("*hicfindtads*"), emit:results 18 | path("versions.yml"), emit:versions 19 | 20 | script: 21 | def args = task.ext.args ?: '' 22 | def prefix = task.ext.prefix ?: "${meta.id}" 23 | """ 24 | hicFindTADs --matrix ${cool} \ 25 | --outPrefix ${prefix}_hicfindtads \ 26 | ${args} \ 27 | --numberOfProcessors ${task.cpus} 28 | 29 | cat <<-END_VERSIONS > versions.yml 30 | "${task.process}": 31 | hicexplorer: \$(hicFindTADs --version 2>&1 | sed 's/hicFindTADs //') 32 | END_VERSIONS 33 | """ 34 | } 35 | -------------------------------------------------------------------------------- /modules/local/hicexplorer/hicPlotDistVsCounts.nf: -------------------------------------------------------------------------------- 1 | /* 2 | * hicexplorer - Genomic distance/counts plots 3 | */ 4 | 5 | process HIC_PLOT_DIST_VS_COUNTS { 6 | tag "${meta.id}" 7 | label 'process_medium' 8 | 9 | conda "bioconda::hicexplorer=3.7.2" 10 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 11 | 'https://depot.galaxyproject.org/singularity/hicexplorer:3.7.2--pyhdfd78af_1' : 12 | 'biocontainers/hicexplorer:3.7.2--pyhdfd78af_1' }" 13 | 14 | input: 15 | tuple val(meta), path(cool) 16 | 17 | output: 18 | path("*distcount*"), emit:results 19 | path("versions.yml"), emit:versions 20 | 21 | script: 22 | def args = task.ext.args ?: '' 23 | def prefix = task.ext.prefix ?: "${meta.id}" 24 | """ 25 | hicPlotDistVsCounts --matrices ${cool} \ 26 | --plotFile ${prefix}_distcount.png \ 27 | --outFileData ${prefix}_distcount.txt 28 | 29 | cat <<-END_VERSIONS > versions.yml 30 | "${task.process}": 31 | hicexplorer: \$(hicPlotDistVsCounts --version 2>&1 | sed 's/hicPlotDistVsCounts //') 32 | END_VERSIONS 33 | """ 34 | } 35 | -------------------------------------------------------------------------------- /modules/local/hicpro/bowtie2_merge.nf: -------------------------------------------------------------------------------- 1 | process MERGE_BOWTIE2{ 2 | tag "${meta.id}" 3 | label 'process_medium' 4 | 5 | conda "bioconda::samtools=1.15.1" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : 8 | 'biocontainers/samtools:1.15.1--h1170115_0' }" 9 | 10 | input: 11 | tuple val(meta), path(bam1), path(bam2) 12 | 13 | output: 14 | tuple val(meta), path("${prefix}_bwt2merged.bam"), emit: bam 15 | tuple val(meta), path("${prefix}.mapstat"), emit: stats 16 | path("versions.yml"), emit: versions 17 | 18 | script: 19 | prefix = task.ext.prefix ?: "${meta.id}" 20 | tag = meta.mates 21 | """ 22 | samtools merge -@ ${task.cpus} \\ 23 | -f ${prefix}_bwt2merged.bam \\ 24 | ${bam1} ${bam2} 25 | 26 | samtools sort -@ ${task.cpus} -m 800M \\ 27 | -n \\ 28 | -o ${prefix}_bwt2merged.sorted.bam \\ 29 | ${prefix}_bwt2merged.bam 30 | 31 | mv ${prefix}_bwt2merged.sorted.bam ${prefix}_bwt2merged.bam 32 | 33 | echo "## ${prefix}" > ${prefix}.mapstat 34 | echo -n "total_${tag}\t" >> ${prefix}.mapstat 35 | samtools view -c ${prefix}_bwt2merged.bam >> ${prefix}.mapstat 36 | echo -n "mapped_${tag}\t" >> ${prefix}.mapstat 37 | samtools view -c -F 4 ${prefix}_bwt2merged.bam >> ${prefix}.mapstat 38 | echo -n "global_${tag}\t" >> ${prefix}.mapstat 39 | samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat 40 | echo -n "local_${tag}\t" >> ${prefix}.mapstat 41 | samtools view -c -F 4 ${bam2} >> ${prefix}.mapstat 42 | 43 | cat <<-END_VERSIONS > versions.yml 44 | "${task.process}": 45 | samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') 46 | END_VERSIONS 47 | """ 48 | } 49 | -------------------------------------------------------------------------------- /modules/local/hicpro/build_contact_maps.nf: -------------------------------------------------------------------------------- 1 | process BUILD_CONTACT_MAPS{ 2 | tag "${meta.id}" 3 | label 'process_high_memory' 4 | 5 | conda "conda-forge::sed=4.7" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 8 | 'nf-core/ubuntu:20.04' }" 9 | 10 | input: 11 | tuple val(meta), path(vpairs), val(resolution) 12 | tuple val(meta2), path(chrsize) 13 | 14 | output: 15 | tuple val(meta), val(resolution), path("*.matrix"), path("*.bed"), emit: maps 16 | 17 | script: 18 | def prefix = task.ext.prefix ?: "${meta.id}" 19 | """ 20 | build_matrix \\ 21 | --matrix-format upper \\ 22 | --binsize ${resolution} \\ 23 | --chrsizes ${chrsize} \\ 24 | --ifile ${vpairs} \\ 25 | --oprefix ${prefix} 26 | """ 27 | } 28 | -------------------------------------------------------------------------------- /modules/local/hicpro/combine_mates.nf: -------------------------------------------------------------------------------- 1 | process COMBINE_MATES { 2 | tag "${meta.id}" 3 | label 'process_low' 4 | 5 | conda "conda-forge::python=3.9 bioconda::pysam=0.19.0" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : 8 | 'biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' }" 9 | 10 | input: 11 | tuple val(meta), path(bam) 12 | 13 | output: 14 | tuple val(meta), path("*bwt2pairs.bam"), emit:bam 15 | tuple val(meta), path("*.pairstat"), optional:true, emit:stats 16 | path("versions.yml"), emit: versions 17 | 18 | script: 19 | prefix = task.ext.prefix ?: "${meta.id}" 20 | def args = task.ext.args ?: '' 21 | """ 22 | mergeSAM.py -f ${bam[0]} -r ${bam[1]} -o ${prefix}_bwt2pairs.bam ${args} 23 | 24 | cat <<-END_VERSIONS > versions.yml 25 | "${task.process}": 26 | python: \$(echo \$(python --version 2>&1) | sed 's/Python //') 27 | END_VERSIONS 28 | """ 29 | } 30 | -------------------------------------------------------------------------------- /modules/local/hicpro/dnase_mapping_stats.nf: -------------------------------------------------------------------------------- 1 | process MAPPING_STATS_DNASE { 2 | tag "$sample = $bam" 3 | label 'process_medium' 4 | 5 | conda "bioconda::samtools=1.15.1" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : 8 | 'biocontainers/samtools:1.15.1--h1170115_0' }" 9 | 10 | 11 | input: 12 | tuple val(meta), path(bam) 13 | 14 | output: 15 | tuple val(meta), path(bam), emit:bam 16 | tuple val(meta), path("${prefix}.mapstat"), emit:stats 17 | 18 | script: 19 | prefix = meta.id + "_" + meta.chunk + "_" + meta.mates 20 | tag = meta.mates 21 | """ 22 | echo "## ${prefix}" > ${prefix}.mapstat 23 | echo -n "total_${tag}\t" >> ${prefix}.mapstat 24 | samtools view -c ${bam} >> ${prefix}.mapstat 25 | echo -n "mapped_${tag}\t" >> ${prefix}.mapstat 26 | samtools view -c -F 4 ${bam} >> ${prefix}.mapstat 27 | echo -n "global_${tag}\t" >> ${prefix}.mapstat 28 | samtools view -c -F 4 ${bam} >> ${prefix}.mapstat 29 | echo -n "local_${tag}\t0" >> ${prefix}.mapstat 30 | """ 31 | } 32 | -------------------------------------------------------------------------------- /modules/local/hicpro/get_restriction_fragments.nf: -------------------------------------------------------------------------------- 1 | process GET_RESTRICTION_FRAGMENTS { 2 | tag "$res_site" 3 | label 'process_low' 4 | 5 | conda "conda-forge::python=3.9 conda-forge::numpy=1.22.3" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : 8 | 'biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' }" 9 | 10 | input: 11 | tuple val(meta), path(fasta) 12 | val(res_site) 13 | 14 | output: 15 | tuple val(meta), path("*.bed"), emit: results 16 | path("versions.yml"), emit: versions 17 | 18 | script: 19 | """ 20 | digest_genome.py -r ${res_site} -o restriction_fragments.bed ${fasta} 21 | 22 | cat <<-END_VERSIONS > versions.yml 23 | "${task.process}": 24 | python: \$(echo \$(python --version 2>&1) | sed 's/Python //') 25 | END_VERSIONS 26 | """ 27 | } 28 | -------------------------------------------------------------------------------- /modules/local/hicpro/get_valid_interaction.nf: -------------------------------------------------------------------------------- 1 | process GET_VALID_INTERACTION { 2 | tag "$meta.id" 3 | label 'process_low' 4 | 5 | conda "conda-forge::python=3.9 bioconda::pysam=0.19.0 bioconda::bx-python=0.8.13" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : 8 | 'biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' }" 9 | 10 | input: 11 | tuple val(meta), path(bam) 12 | tuple val(meta2), path(resfrag) 13 | 14 | output: 15 | tuple val(meta), path("*.validPairs"), emit:valid_pairs 16 | tuple val(meta), path("*.DEPairs"), optional:true, emit:de_pairs 17 | tuple val(meta), path("*.SCPairs"), optional: true, emit:sc_pairs 18 | tuple val(meta), path("*.REPairs"), optional: true, emit:re_pairs 19 | tuple val(meta), path("*.FiltPairs"), optional: true, emit:filt_pairs 20 | tuple val(meta), path("*RSstat"), optional: true, emit:stats 21 | path("versions.yml"), emit: versions 22 | 23 | script: 24 | def args = task.ext.args ?: '' 25 | """ 26 | mapped_2hic_fragments.py \\ 27 | -f ${resfrag} \\ 28 | -r ${bam} \\ 29 | --all \\ 30 | ${args} 31 | 32 | cat <<-END_VERSIONS > versions.yml 33 | "${task.process}": 34 | python: \$(echo \$(python --version 2>&1) | sed 's/Python //') 35 | END_VERSIONS 36 | """ 37 | } 38 | -------------------------------------------------------------------------------- /modules/local/hicpro/get_valid_interaction_dnase.nf: -------------------------------------------------------------------------------- 1 | process GET_VALID_INTERACTION_DNASE { 2 | tag "$meta.id" 3 | label 'process_low' 4 | 5 | conda "conda-forge::python=3.9 bioconda::pysam=0.19.0 bioconda::bx-python=0.8.13" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : 8 | 'biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' }" 9 | 10 | input: 11 | tuple val(meta), path(bam) 12 | 13 | output: 14 | tuple val(meta), path("*.validPairs"), emit:valid_pairs 15 | tuple val(meta), path("*RSstat"), optional: true, emit:stats 16 | path("versions.yml"), emit: versions 17 | 18 | script: 19 | def args = task.ext.args ?: '' 20 | """ 21 | mapped_2hic_dnase.py \\ 22 | -r ${bam} \\ 23 | ${args} 24 | 25 | cat <<-END_VERSIONS > versions.yml 26 | "${task.process}": 27 | python: \$(echo \$(python --version 2>&1) | sed 's/Python //') 28 | END_VERSIONS 29 | """ 30 | } 31 | -------------------------------------------------------------------------------- /modules/local/hicpro/hicpro2pairs.nf: -------------------------------------------------------------------------------- 1 | process HICPRO2PAIRS { 2 | tag "$meta.id" 3 | label 'process_medium' 4 | 5 | conda "bioconda::pairix=0.3.7" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/pairix:0.3.7--py36h30a8e3e_3' : 8 | 'biocontainers/pairix:0.3.7--py36h30a8e3e_3' }" 9 | 10 | input: 11 | tuple val(meta), path(vpairs) 12 | tuple val(meta2), path(chrsize) 13 | 14 | output: 15 | tuple val(meta), path("*.pairs.gz"), path("*.pairs.gz.px2"), emit: pairs 16 | path("versions.yml"), emit: versions 17 | 18 | script: 19 | prefix = "${meta.id}" 20 | """ 21 | ##columns: readID chr1 pos1 chr2 pos2 strand1 strand2 22 | awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs | bgzip -c > ${prefix}_contacts.pairs.gz 23 | ##sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz 24 | pairix -f ${prefix}_contacts.pairs.gz 25 | 26 | cat <<-END_VERSIONS > versions.yml 27 | "${task.process}": 28 | pairix: \$(echo \$(pairix 2>&1 | grep Version | sed -e 's/Version: //')) 29 | END_VERSIONS 30 | """ 31 | } 32 | -------------------------------------------------------------------------------- /modules/local/hicpro/merge_stats.nf: -------------------------------------------------------------------------------- 1 | process MERGE_STATS { 2 | tag "${meta.id}" 3 | label 'process_low' 4 | 5 | conda "conda-forge::python=3.9" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : 8 | 'biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' }" 9 | 10 | input: 11 | tuple val(meta), path(fstat) 12 | 13 | output: 14 | path("${meta.id}/"), emit: mqc 15 | path("*.{mmapstat,mpairstat,mRSstat}"), emit: stats 16 | path("versions.yml"), emit:versions 17 | 18 | script: 19 | if ( (fstat =~ /.mapstat/) ){ ext = "${meta.mates}.mmapstat" } 20 | if ( (fstat =~ /.pairstat/) ){ ext = "mpairstat" } 21 | if ( (fstat =~ /.RSstat/) ){ ext = "mRSstat" } 22 | """ 23 | mkdir -p ${meta.id} 24 | merge_statfiles.py -f ${fstat} > ${meta.id}.${ext} 25 | cp *${ext} ${meta.id}/ 26 | 27 | cat <<-END_VERSIONS > versions.yml 28 | "${task.process}": 29 | python: \$(echo \$(python --version 2>&1) | sed 's/Python //') 30 | END_VERSIONS 31 | """ 32 | } 33 | -------------------------------------------------------------------------------- /modules/local/hicpro/merge_valid_interaction.nf: -------------------------------------------------------------------------------- 1 | process MERGE_VALID_INTERACTION { 2 | tag "$prefix" 3 | label 'process_high_memory' 4 | 5 | conda "conda-forge::gawk=5.1.0" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 8 | 'nf-core/ubuntu:20.04' }" 9 | 10 | input: 11 | tuple val(meta), path(vpairs) 12 | 13 | output: 14 | tuple val(meta), path("*.allValidPairs"), emit: valid_pairs 15 | path("${meta.id}/"), emit:mqc 16 | path("*mergestat"), emit:stats 17 | path("versions.yml"), emit: versions 18 | 19 | script: 20 | prefix = meta.id 21 | def args = task.ext.args ?: '' 22 | """ 23 | hicpro_merge_validpairs.sh ${args} -p ${prefix} ${vpairs} 24 | 25 | ## For MultiQC 26 | mkdir -p ${prefix} 27 | cp ${prefix}_allValidPairs.mergestat ${prefix}/ 28 | 29 | cat <<-END_VERSIONS > versions.yml 30 | "${task.process}": 31 | sort: \$(echo \$(sort --version 2>&1 | head -1 | awk '{print \$NF}' 2>&1)) 32 | END_VERSIONS 33 | """ 34 | } 35 | -------------------------------------------------------------------------------- /modules/local/hicpro/run_ice.nf: -------------------------------------------------------------------------------- 1 | process ICE_NORMALIZATION { 2 | tag "$meta.id" 3 | label 'process_high_memory' 4 | 5 | conda "conda-forge::python=3.9 bioconda::iced=0.5.10 conda-forge::numpy=1.22.3" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' : 8 | 'biocontainers/mulled-v2-c6ff206325681cbb9c9ef890bb8de554172c0483:713df51cd897ceb893b9a6e6420f527d83c2ed95-0' }" 9 | 10 | input: 11 | tuple val(meta), val(res), path(rmaps), path(bed) 12 | 13 | output: 14 | tuple val(meta), val(res), path("*iced.matrix"), path(bed), emit:maps 15 | path ("*.biases"), emit:bias 16 | path("versions.yml"), emit: versions 17 | 18 | script: 19 | prefix = rmaps.toString() - ~/(\.matrix)?$/ 20 | """ 21 | ice --filter_low_counts_perc ${params.ice_filter_low_count_perc} \ 22 | --results_filename ${prefix}_iced.matrix \ 23 | --filter_high_counts_perc ${params.ice_filter_high_count_perc} \ 24 | --max_iter ${params.ice_max_iter} --eps ${params.ice_eps} --remove-all-zeros-loci --output-bias 1 --verbose 1 ${rmaps} 25 | 26 | cat <<-END_VERSIONS > versions.yml 27 | "${task.process}": 28 | python: \$(echo \$(python --version 2>&1) | sed 's/Python //') 29 | iced: \$(python -c "import iced; print(iced.__version__)") 30 | END_VERSIONS 31 | """ 32 | } 33 | -------------------------------------------------------------------------------- /modules/local/hicpro/trim_reads.nf: -------------------------------------------------------------------------------- 1 | process TRIM_READS { 2 | tag "$meta.id" 3 | label 'process_low' 4 | 5 | conda "conda-forge::sed=4.7" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 8 | 'nf-core/ubuntu:20.04' }" 9 | 10 | input: 11 | tuple val(meta), path(reads) 12 | val(motif) 13 | 14 | output: 15 | tuple val(meta), path("*trimmed.fastq.gz"), emit: fastq 16 | path("versions.yml") , emit: versions 17 | 18 | script: 19 | """ 20 | zcat ${reads} > tmp.fastq 21 | cutsite_trimming --fastq tmp.fastq \\ 22 | --cutsite ${motif[0]} \\ 23 | --out ${reads.simpleName}_trimmed.fastq 24 | gzip ${reads.simpleName}_trimmed.fastq 25 | /bin/rm -f tmp.fastq 26 | 27 | cat <<-END_VERSIONS > versions.yml 28 | "${task.process}": 29 | gzip: \$(echo \$(gzip --version 2>&1) | head -1 | cut -d" " -f2) 30 | END_VERSIONS 31 | """ 32 | } 33 | -------------------------------------------------------------------------------- /modules/local/multiqc.nf: -------------------------------------------------------------------------------- 1 | process MULTIQC { 2 | label 'process_medium' 3 | 4 | conda "bioconda::multiqc=1.14" 5 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 6 | 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : 7 | 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" 8 | 9 | input: 10 | path multiqc_config 11 | path (mqc_custom_config) 12 | path workflow_summary 13 | path ('fastqc/*') 14 | path ('input_*/*') 15 | 16 | output: 17 | path "*multiqc_report.html", emit: report 18 | path "*_data" , emit: data 19 | path "*_plots" , optional:true, emit: plots 20 | path "versions.yml" , emit: versions 21 | 22 | when: 23 | task.ext.when == null || task.ext.when 24 | 25 | script: 26 | def args = task.ext.args ?: '' 27 | """ 28 | multiqc -f $args . 29 | 30 | cat <<-END_VERSIONS > versions.yml 31 | "${task.process}": 32 | multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) 33 | END_VERSIONS 34 | """ 35 | } 36 | -------------------------------------------------------------------------------- /modules/local/samplesheet_check.nf: -------------------------------------------------------------------------------- 1 | process SAMPLESHEET_CHECK { 2 | tag "$samplesheet" 3 | label 'process_single' 4 | 5 | conda "conda-forge::python=3.8.3" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/python:3.8.3' : 8 | 'biocontainers/python:3.8.3' }" 9 | 10 | input: 11 | path samplesheet 12 | 13 | output: 14 | path '*.csv' , emit: csv 15 | path "versions.yml", emit: versions 16 | 17 | when: 18 | task.ext.when == null || task.ext.when 19 | 20 | script: // This script is bundled with the pipeline, in nf-core/hic/bin/ 21 | """ 22 | check_samplesheet.py \\ 23 | $samplesheet \\ 24 | samplesheet.valid.csv 25 | 26 | cat <<-END_VERSIONS > versions.yml 27 | "${task.process}": 28 | python: \$(python --version | sed 's/Python //g') 29 | END_VERSIONS 30 | """ 31 | } 32 | -------------------------------------------------------------------------------- /modules/local/split_cooler_dump.nf: -------------------------------------------------------------------------------- 1 | process SPLIT_COOLER_DUMP { 2 | tag "$meta.id" 3 | label 'process_low' 4 | 5 | conda "conda-forge::gawk=5.1.0" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 8 | 'nf-core/ubuntu:20.04' }" 9 | 10 | input: 11 | tuple val(meta), path(bedpe) 12 | 13 | output: 14 | tuple val(meta), path("*.txt"), emit: matrix 15 | path ("versions.yml"), emit: versions 16 | 17 | when: 18 | task.ext.when == null || task.ext.when 19 | 20 | script: 21 | def args = task.ext.args ?: '' 22 | prefix = bedpe.toString() - ~/(\_balanced)?.bedpe$/ 23 | """ 24 | cat ${bedpe} | awk '{OFS="\t"; print \$1,\$2,\$3}' > ${prefix}_raw.txt 25 | cat ${bedpe} | awk '{OFS="\t"; print \$1,\$2,\$4}' > ${prefix}_balanced.txt 26 | 27 | cat <<-END_VERSIONS > versions.yml 28 | "${task.process}": 29 | cooler: \$(awk --version | head -1 | cut -f1 -d, | sed -e 's/GNU Awk //') 30 | END_VERSIONS 31 | """ 32 | } 33 | -------------------------------------------------------------------------------- /modules/nf-core/bowtie2/align/main.nf: -------------------------------------------------------------------------------- 1 | process BOWTIE2_ALIGN { 2 | tag "$meta.id" 3 | label "process_high" 4 | 5 | conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' : 8 | 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }" 9 | 10 | input: 11 | tuple val(meta) , path(reads) 12 | tuple val(meta2), path(index) 13 | val save_unaligned 14 | val sort_bam 15 | 16 | output: 17 | tuple val(meta), path("*.bam") , emit: bam 18 | tuple val(meta), path("*.log") , emit: log 19 | tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true 20 | path "versions.yml" , emit: versions 21 | 22 | when: 23 | task.ext.when == null || task.ext.when 24 | 25 | script: 26 | def args = task.ext.args ?: "" 27 | def args2 = task.ext.args2 ?: "" 28 | def prefix = task.ext.prefix ?: "${meta.id}" 29 | 30 | def unaligned = "" 31 | def reads_args = "" 32 | if (meta.single_end) { 33 | unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" 34 | reads_args = "-U ${reads}" 35 | } else { 36 | unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" 37 | reads_args = "-1 ${reads[0]} -2 ${reads[1]}" 38 | } 39 | 40 | def samtools_command = sort_bam ? 'sort' : 'view' 41 | 42 | """ 43 | INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` 44 | [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` 45 | [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 46 | 47 | bowtie2 \\ 48 | -x \$INDEX \\ 49 | $reads_args \\ 50 | --threads $task.cpus \\ 51 | $unaligned \\ 52 | $args \\ 53 | 2> ${prefix}.bowtie2.log \\ 54 | | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - 55 | 56 | if [ -f ${prefix}.unmapped.fastq.1.gz ]; then 57 | mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz 58 | fi 59 | 60 | if [ -f ${prefix}.unmapped.fastq.2.gz ]; then 61 | mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz 62 | fi 63 | 64 | cat <<-END_VERSIONS > versions.yml 65 | "${task.process}": 66 | bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') 67 | samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') 68 | pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) 69 | END_VERSIONS 70 | """ 71 | } 72 | -------------------------------------------------------------------------------- /modules/nf-core/bowtie2/align/meta.yml: -------------------------------------------------------------------------------- 1 | name: bowtie2_align 2 | description: Align reads to a reference genome using bowtie2 3 | keywords: 4 | - align 5 | - map 6 | - fasta 7 | - fastq 8 | - genome 9 | - reference 10 | tools: 11 | - bowtie2: 12 | description: | 13 | Bowtie 2 is an ultrafast and memory-efficient tool for aligning 14 | sequencing reads to long reference sequences. 15 | homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml 16 | documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml 17 | doi: 10.1038/nmeth.1923 18 | licence: ["GPL-3.0-or-later"] 19 | input: 20 | - meta: 21 | type: map 22 | description: | 23 | Groovy Map containing sample information 24 | e.g. [ id:'test', single_end:false ] 25 | - reads: 26 | type: file 27 | description: | 28 | List of input FastQ files of size 1 and 2 for single-end and paired-end data, 29 | respectively. 30 | - meta2: 31 | type: map 32 | description: | 33 | Groovy Map containing reference information 34 | e.g. [ id:'test', single_end:false ] 35 | - index: 36 | type: file 37 | description: Bowtie2 genome index files 38 | pattern: "*.ebwt" 39 | - save_unaligned: 40 | type: boolean 41 | description: | 42 | Save reads that do not map to the reference (true) or discard them (false) 43 | (default: false) 44 | - sort_bam: 45 | type: boolean 46 | description: use samtools sort (true) or samtools view (false) 47 | pattern: "true or false" 48 | output: 49 | - bam: 50 | type: file 51 | description: Output BAM file containing read alignments 52 | pattern: "*.{bam}" 53 | - versions: 54 | type: file 55 | description: File containing software versions 56 | pattern: "versions.yml" 57 | - fastq: 58 | type: file 59 | description: Unaligned FastQ files 60 | pattern: "*.fastq.gz" 61 | - log: 62 | type: file 63 | description: Aligment log 64 | pattern: "*.log" 65 | authors: 66 | - "@joseespinosa" 67 | - "@drpatelh" 68 | -------------------------------------------------------------------------------- /modules/nf-core/bowtie2/build/main.nf: -------------------------------------------------------------------------------- 1 | process BOWTIE2_BUILD { 2 | tag "$fasta" 3 | label 'process_high' 4 | 5 | conda "bioconda::bowtie2=2.4.4" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' : 8 | 'biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" 9 | 10 | input: 11 | tuple val(meta), path(fasta) 12 | 13 | output: 14 | tuple val(meta), path('bowtie2') , emit: index 15 | path "versions.yml" , emit: versions 16 | 17 | when: 18 | task.ext.when == null || task.ext.when 19 | 20 | script: 21 | def args = task.ext.args ?: '' 22 | """ 23 | mkdir bowtie2 24 | bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName} 25 | cat <<-END_VERSIONS > versions.yml 26 | "${task.process}": 27 | bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') 28 | END_VERSIONS 29 | """ 30 | } 31 | -------------------------------------------------------------------------------- /modules/nf-core/bowtie2/build/meta.yml: -------------------------------------------------------------------------------- 1 | name: bowtie2_build 2 | description: Builds bowtie index for reference genome 3 | keywords: 4 | - build 5 | - index 6 | - fasta 7 | - genome 8 | - reference 9 | tools: 10 | - bowtie2: 11 | description: | 12 | Bowtie 2 is an ultrafast and memory-efficient tool for aligning 13 | sequencing reads to long reference sequences. 14 | homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml 15 | documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml 16 | doi: 10.1038/nmeth.1923 17 | licence: ["GPL-3.0-or-later"] 18 | input: 19 | - meta: 20 | type: map 21 | description: | 22 | Groovy Map containing reference information 23 | e.g. [ id:'test', single_end:false ] 24 | - fasta: 25 | type: file 26 | description: Input genome fasta file 27 | output: 28 | - meta: 29 | type: map 30 | description: | 31 | Groovy Map containing reference information 32 | e.g. [ id:'test', single_end:false ] 33 | - index: 34 | type: file 35 | description: Bowtie2 genome index files 36 | pattern: "*.bt2" 37 | - versions: 38 | type: file 39 | description: File containing software versions 40 | pattern: "versions.yml" 41 | authors: 42 | - "@joseespinosa" 43 | - "@drpatelh" 44 | -------------------------------------------------------------------------------- /modules/nf-core/cooler/balance/main.nf: -------------------------------------------------------------------------------- 1 | process COOLER_BALANCE { 2 | tag "$meta.id" 3 | label 'process_high' 4 | 5 | conda "bioconda::cooler=0.8.11" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0': 8 | 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" 9 | 10 | input: 11 | tuple val(meta), path(cool), val(resolution) 12 | 13 | output: 14 | tuple val(meta), path("${prefix}.${extension}"), emit: cool 15 | path "versions.yml" , emit: versions 16 | 17 | when: 18 | task.ext.when == null || task.ext.when 19 | 20 | script: 21 | def args = task.ext.args ?: '' 22 | prefix = task.ext.prefix ?: "${meta.id}" 23 | suffix = resolution ? "::/resolutions/$resolution" : "" 24 | extension = cool.getExtension() 25 | if ("$cool" == "${prefix}.${extension}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" 26 | """ 27 | cp ${cool} ${prefix}.${extension} 28 | 29 | cooler balance \\ 30 | $args \\ 31 | -p ${task.cpus} \\ 32 | ${prefix}.${extension}${suffix} 33 | 34 | cat <<-END_VERSIONS > versions.yml 35 | "${task.process}": 36 | cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') 37 | END_VERSIONS 38 | """ 39 | } 40 | -------------------------------------------------------------------------------- /modules/nf-core/cooler/balance/meta.yml: -------------------------------------------------------------------------------- 1 | name: "cooler_balance" 2 | description: Run matrix balancing on a cool file 3 | keywords: 4 | - balance 5 | tools: 6 | - "cooler": 7 | description: Sparse binary format for genomic interaction matrices 8 | homepage: https://open2c.github.io/cooler/ 9 | documentation: https://cooler.readthedocs.io/en/latest/index.html 10 | tool_dev_url: https://github.com/open2c/cooler 11 | doi: "10.1093/bioinformatics/btz540" 12 | licence: ["BSD-3-Clause"] 13 | 14 | input: 15 | - meta: 16 | type: map 17 | description: | 18 | Groovy Map containing sample information 19 | e.g. [ id:'test', single_end:false ] 20 | - cool: 21 | type: file 22 | description: Path to COOL file 23 | pattern: "*.{cool,mcool}" 24 | - resolution: 25 | type: value 26 | description: Resolution 27 | 28 | output: 29 | - meta: 30 | type: map 31 | description: | 32 | Groovy Map containing sample information 33 | e.g. [ id:'test', single_end:false ] 34 | - versions: 35 | type: file 36 | description: File containing software versions 37 | pattern: "versions.yml" 38 | - cool: 39 | type: file 40 | description: Output COOL file balancing weigths 41 | pattern: "*.cool" 42 | 43 | authors: 44 | - "@nservant" 45 | - "@muffato" 46 | -------------------------------------------------------------------------------- /modules/nf-core/cooler/cload/main.nf: -------------------------------------------------------------------------------- 1 | process COOLER_CLOAD { 2 | tag "$meta.id" 3 | label 'process_high' 4 | 5 | conda "bioconda::cooler=0.8.11" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : 8 | 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" 9 | 10 | input: 11 | tuple val(meta), path(pairs), path(index), val(cool_bin) 12 | path chromsizes 13 | 14 | output: 15 | tuple val(meta), path("*.cool"), val(cool_bin), emit: cool 16 | path "versions.yml" , emit: versions 17 | 18 | when: 19 | task.ext.when == null || task.ext.when 20 | 21 | script: 22 | def args = task.ext.args ?: '' 23 | def prefix = task.ext.prefix ?: "${meta.id}" 24 | def nproc = args.contains('pairix') || args.contains('tabix')? "--nproc $task.cpus" : '' 25 | 26 | """ 27 | cooler cload \\ 28 | $args \\ 29 | $nproc \\ 30 | ${chromsizes}:${cool_bin} \\ 31 | $pairs \\ 32 | ${prefix}.cool 33 | 34 | cat <<-END_VERSIONS > versions.yml 35 | "${task.process}": 36 | cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') 37 | END_VERSIONS 38 | """ 39 | } 40 | -------------------------------------------------------------------------------- /modules/nf-core/cooler/cload/meta.yml: -------------------------------------------------------------------------------- 1 | name: cooler_cload 2 | description: Create a cooler from genomic pairs and bins 3 | keywords: 4 | - cool 5 | tools: 6 | - cooler: 7 | description: Sparse binary format for genomic interaction matrices 8 | homepage: https://open2c.github.io/cooler/ 9 | documentation: https://cooler.readthedocs.io/en/latest/index.html 10 | tool_dev_url: https://github.com/open2c/cooler 11 | doi: "10.1093/bioinformatics/btz540" 12 | licence: ["BSD-3-clause"] 13 | 14 | input: 15 | - meta: 16 | type: map 17 | description: | 18 | Groovy Map containing sample information 19 | e.g. [ id:'test', single_end:false ] 20 | - pairs: 21 | type: file 22 | description: Path to contacts (i.e. read pairs) file. 23 | - index: 24 | type: file 25 | description: Path to index file of the contacts. 26 | - cool_bin: 27 | type: value 28 | description: Bins size in bp 29 | - chromsizes: 30 | type: file 31 | description: Path to a chromsizes file. 32 | 33 | output: 34 | - meta: 35 | type: map 36 | description: | 37 | Groovy Map containing sample information 38 | e.g. [ id:'test', single_end:false ] 39 | - version: 40 | type: file 41 | description: File containing software version 42 | pattern: "versions.yml" 43 | - cool: 44 | type: file 45 | description: Output COOL file path 46 | pattern: "*.cool" 47 | - cool_bin: 48 | type: value 49 | description: Bins size in bp 50 | 51 | authors: 52 | - "@jianhong" 53 | - "@muffato" 54 | -------------------------------------------------------------------------------- /modules/nf-core/cooler/dump/main.nf: -------------------------------------------------------------------------------- 1 | process COOLER_DUMP { 2 | tag "$meta.id" 3 | label 'process_high' 4 | 5 | conda "bioconda::cooler=0.8.11" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : 8 | 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" 9 | 10 | input: 11 | tuple val(meta), path(cool), val(resolution) 12 | 13 | output: 14 | tuple val(meta), path("*.bedpe"), emit: bedpe 15 | path "versions.yml" , emit: versions 16 | 17 | when: 18 | task.ext.when == null || task.ext.when 19 | 20 | script: 21 | def args = task.ext.args ?: '' 22 | def prefix = task.ext.prefix ?: "${meta.id}" 23 | def suffix = resolution ? "::/resolutions/$resolution" : "" 24 | """ 25 | cooler dump \\ 26 | $args \\ 27 | -o ${prefix}.bedpe \\ 28 | $cool$suffix 29 | 30 | cat <<-END_VERSIONS > versions.yml 31 | "${task.process}": 32 | cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') 33 | END_VERSIONS 34 | """ 35 | } 36 | -------------------------------------------------------------------------------- /modules/nf-core/cooler/dump/meta.yml: -------------------------------------------------------------------------------- 1 | name: cooler_dump 2 | description: Dump a cooler’s data to a text stream. 3 | keywords: 4 | - dump 5 | tools: 6 | - cooler: 7 | description: Sparse binary format for genomic interaction matrices 8 | homepage: https://open2c.github.io/cooler/ 9 | documentation: https://cooler.readthedocs.io/en/latest/index.html 10 | tool_dev_url: https://github.com/open2c/cooler 11 | doi: "10.1093/bioinformatics/btz540" 12 | licence: ["BSD-3-Clause"] 13 | 14 | input: 15 | - meta: 16 | type: map 17 | description: | 18 | Groovy Map containing sample information 19 | e.g. [ id:'test', single_end:false ] 20 | - cool: 21 | type: file 22 | description: Path to COOL file 23 | pattern: "*.{cool,mcool}" 24 | - resolution: 25 | type: value 26 | description: Resolution 27 | 28 | output: 29 | - meta: 30 | type: map 31 | description: | 32 | Groovy Map containing sample information 33 | e.g. [ id:'test', single_end:false ] 34 | - versions: 35 | type: file 36 | description: File containing software versions 37 | pattern: "versions.yml" 38 | - bedpe: 39 | type: file 40 | description: Output text file 41 | pattern: "*.bedpe" 42 | 43 | authors: 44 | - "@jianhong" 45 | - "@muffato" 46 | -------------------------------------------------------------------------------- /modules/nf-core/cooler/makebins/main.nf: -------------------------------------------------------------------------------- 1 | process COOLER_MAKEBINS { 2 | tag "${meta.id}}" 3 | label 'process_low' 4 | 5 | conda "bioconda::cooler=0.8.11" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0': 8 | 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" 9 | 10 | input: 11 | tuple val(meta), path(chromsizes), val(cool_bin) 12 | 13 | output: 14 | tuple val(meta), path("*.bed"), emit: bed 15 | path "versions.yml" , emit: versions 16 | 17 | when: 18 | task.ext.when == null || task.ext.when 19 | 20 | script: 21 | def args = task.ext.args ?: '' 22 | def prefix = task.ext.prefix ?: "${meta.id}" 23 | """ 24 | cooler makebins \\ 25 | $args \\ 26 | ${chromsizes} \\ 27 | ${cool_bin} > ${prefix}.bed 28 | 29 | cat <<-END_VERSIONS > versions.yml 30 | "${task.process}": 31 | cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') 32 | END_VERSIONS 33 | """ 34 | } 35 | -------------------------------------------------------------------------------- /modules/nf-core/cooler/makebins/meta.yml: -------------------------------------------------------------------------------- 1 | name: "cooler_makebins" 2 | description: Generate fixed-width genomic bins 3 | keywords: 4 | - makebins 5 | tools: 6 | - "cooler": 7 | description: Sparse binary format for genomic interaction matrices 8 | homepage: https://open2c.github.io/cooler/ 9 | documentation: https://cooler.readthedocs.io/en/latest/index.html 10 | tool_dev_url: https://github.com/open2c/cooler 11 | doi: "10.1093/bioinformatics/btz540" 12 | licence: ["BSD-3-Clause"] 13 | 14 | input: 15 | - chromsize: 16 | type: file 17 | description: Path to chromosome size file 18 | - cool_bin: 19 | type: value 20 | description: Resolution (bin size) in base pairs 21 | 22 | output: 23 | - versions: 24 | type: file 25 | description: File containing software versions 26 | pattern: "versions.yml" 27 | - bed: 28 | type: file 29 | description: Genome segmentation at a fixed resolution as a BED file. 30 | pattern: "*.bed" 31 | 32 | authors: 33 | - "@nservant" 34 | - "@muffato" 35 | -------------------------------------------------------------------------------- /modules/nf-core/cooler/zoomify/main.nf: -------------------------------------------------------------------------------- 1 | process COOLER_ZOOMIFY { 2 | tag "$meta.id" 3 | label 'process_high' 4 | 5 | conda "bioconda::cooler=0.8.11" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/cooler:0.8.11--pyh3252c3a_0' : 8 | 'biocontainers/cooler:0.8.11--pyh3252c3a_0' }" 9 | 10 | input: 11 | tuple val(meta), path(cool) 12 | 13 | output: 14 | tuple val(meta), path("*.mcool"), emit: mcool 15 | path "versions.yml" , emit: versions 16 | 17 | when: 18 | task.ext.when == null || task.ext.when 19 | 20 | script: 21 | def args = task.ext.args ?: '' 22 | def prefix = task.ext.prefix ?: "${meta.id}" 23 | """ 24 | cooler zoomify \\ 25 | $args \\ 26 | -n $task.cpus \\ 27 | -o ${prefix}.mcool \\ 28 | $cool 29 | 30 | cat <<-END_VERSIONS > versions.yml 31 | "${task.process}": 32 | cooler: \$(cooler --version 2>&1 | sed 's/cooler, version //') 33 | END_VERSIONS 34 | """ 35 | } 36 | -------------------------------------------------------------------------------- /modules/nf-core/cooler/zoomify/meta.yml: -------------------------------------------------------------------------------- 1 | name: cooler_zoomify 2 | description: Generate a multi-resolution cooler file by coarsening 3 | keywords: 4 | - mcool 5 | tools: 6 | - cooler: 7 | description: Sparse binary format for genomic interaction matrices 8 | homepage: https://open2c.github.io/cooler/ 9 | documentation: https://cooler.readthedocs.io/en/latest/index.html 10 | tool_dev_url: https://github.com/open2c/cooler 11 | doi: "10.1093/bioinformatics/btz540" 12 | licence: ["BSD-3-clause"] 13 | 14 | input: 15 | - meta: 16 | type: map 17 | description: | 18 | Groovy Map containing sample information 19 | e.g. [ id:'test', single_end:false ] 20 | - cool: 21 | type: file 22 | description: Path to COOL file 23 | pattern: "*.{cool,mcool}" 24 | 25 | output: 26 | - meta: 27 | type: map 28 | description: | 29 | Groovy Map containing sample information 30 | e.g. [ id:'test', single_end:false ] 31 | - versions: 32 | type: file 33 | description: File containing software versions 34 | pattern: "versions.yml" 35 | - mcool: 36 | type: file 37 | description: Output mcool file 38 | pattern: "*.mcool" 39 | 40 | authors: 41 | - "@jianhong" 42 | -------------------------------------------------------------------------------- /modules/nf-core/custom/dumpsoftwareversions/main.nf: -------------------------------------------------------------------------------- 1 | process CUSTOM_DUMPSOFTWAREVERSIONS { 2 | label 'process_single' 3 | 4 | // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container 5 | conda "bioconda::multiqc=1.14" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : 8 | 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" 9 | 10 | input: 11 | path versions 12 | 13 | output: 14 | path "software_versions.yml" , emit: yml 15 | path "software_versions_mqc.yml", emit: mqc_yml 16 | path "versions.yml" , emit: versions 17 | 18 | when: 19 | task.ext.when == null || task.ext.when 20 | 21 | script: 22 | def args = task.ext.args ?: '' 23 | template 'dumpsoftwareversions.py' 24 | } 25 | -------------------------------------------------------------------------------- /modules/nf-core/custom/dumpsoftwareversions/meta.yml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json 2 | name: custom_dumpsoftwareversions 3 | description: Custom module used to dump software versions within the nf-core pipeline template 4 | keywords: 5 | - custom 6 | - dump 7 | - version 8 | tools: 9 | - custom: 10 | description: Custom module used to dump software versions within the nf-core pipeline template 11 | homepage: https://github.com/nf-core/tools 12 | documentation: https://github.com/nf-core/tools 13 | licence: ["MIT"] 14 | input: 15 | - versions: 16 | type: file 17 | description: YML file containing software versions 18 | pattern: "*.yml" 19 | 20 | output: 21 | - yml: 22 | type: file 23 | description: Standard YML file containing software versions 24 | pattern: "software_versions.yml" 25 | - mqc_yml: 26 | type: file 27 | description: MultiQC custom content YML file containing software versions 28 | pattern: "software_versions_mqc.yml" 29 | - versions: 30 | type: file 31 | description: File containing software versions 32 | pattern: "versions.yml" 33 | 34 | authors: 35 | - "@drpatelh" 36 | - "@grst" 37 | -------------------------------------------------------------------------------- /modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | """Provide functions to merge multiple versions.yml files.""" 5 | 6 | 7 | import yaml 8 | import platform 9 | from textwrap import dedent 10 | 11 | 12 | def _make_versions_html(versions): 13 | """Generate a tabular HTML output of all versions for MultiQC.""" 14 | html = [ 15 | dedent( 16 | """\\ 17 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | """ 31 | ) 32 | ] 33 | for process, tmp_versions in sorted(versions.items()): 34 | html.append("") 35 | for i, (tool, version) in enumerate(sorted(tmp_versions.items())): 36 | html.append( 37 | dedent( 38 | f"""\\ 39 | 40 | 41 | 42 | 43 | 44 | """ 45 | ) 46 | ) 47 | html.append("") 48 | html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") 49 | return "\\n".join(html) 50 | 51 | 52 | def main(): 53 | """Load all version files and generate merged output.""" 54 | versions_this_module = {} 55 | versions_this_module["${task.process}"] = { 56 | "python": platform.python_version(), 57 | "yaml": yaml.__version__, 58 | } 59 | 60 | with open("$versions") as f: 61 | versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module 62 | 63 | # aggregate versions by the module name (derived from fully-qualified process name) 64 | versions_by_module = {} 65 | for process, process_versions in versions_by_process.items(): 66 | module = process.split(":")[-1] 67 | try: 68 | if versions_by_module[module] != process_versions: 69 | raise AssertionError( 70 | "We assume that software versions are the same between all modules. " 71 | "If you see this error-message it means you discovered an edge-case " 72 | "and should open an issue in nf-core/tools. " 73 | ) 74 | except KeyError: 75 | versions_by_module[module] = process_versions 76 | 77 | versions_by_module["Workflow"] = { 78 | "Nextflow": "$workflow.nextflow.version", 79 | "$workflow.manifest.name": "$workflow.manifest.version", 80 | } 81 | 82 | versions_mqc = { 83 | "id": "software_versions", 84 | "section_name": "${workflow.manifest.name} Software Versions", 85 | "section_href": "https://github.com/${workflow.manifest.name}", 86 | "plot_type": "html", 87 | "description": "are collected at run time from the software output.", 88 | "data": _make_versions_html(versions_by_module), 89 | } 90 | 91 | with open("software_versions.yml", "w") as f: 92 | yaml.dump(versions_by_module, f, default_flow_style=False) 93 | with open("software_versions_mqc.yml", "w") as f: 94 | yaml.dump(versions_mqc, f, default_flow_style=False) 95 | 96 | with open("versions.yml", "w") as f: 97 | yaml.dump(versions_this_module, f, default_flow_style=False) 98 | 99 | 100 | if __name__ == "__main__": 101 | main() 102 | -------------------------------------------------------------------------------- /modules/nf-core/custom/getchromsizes/main.nf: -------------------------------------------------------------------------------- 1 | process CUSTOM_GETCHROMSIZES { 2 | tag "$fasta" 3 | label 'process_single' 4 | 5 | conda "bioconda::samtools=1.16.1" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : 8 | 'biocontainers/samtools:1.16.1--h6899075_1' }" 9 | 10 | input: 11 | tuple val(meta), path(fasta) 12 | 13 | output: 14 | tuple val(meta), path ("*.sizes"), emit: sizes 15 | tuple val(meta), path ("*.fai") , emit: fai 16 | tuple val(meta), path ("*.gzi") , emit: gzi, optional: true 17 | path "versions.yml" , emit: versions 18 | 19 | when: 20 | task.ext.when == null || task.ext.when 21 | 22 | script: 23 | def args = task.ext.args ?: '' 24 | """ 25 | samtools faidx $fasta 26 | cut -f 1,2 ${fasta}.fai > ${fasta}.sizes 27 | 28 | cat <<-END_VERSIONS > versions.yml 29 | "${task.process}": 30 | getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') 31 | END_VERSIONS 32 | """ 33 | 34 | stub: 35 | """ 36 | touch ${fasta}.fai 37 | touch ${fasta}.sizes 38 | 39 | cat <<-END_VERSIONS > versions.yml 40 | "${task.process}": 41 | getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') 42 | END_VERSIONS 43 | """ 44 | } 45 | -------------------------------------------------------------------------------- /modules/nf-core/custom/getchromsizes/meta.yml: -------------------------------------------------------------------------------- 1 | name: custom_getchromsizes 2 | description: Generates a FASTA file of chromosome sizes and a fasta index file 3 | keywords: 4 | - fasta 5 | - chromosome 6 | - indexing 7 | tools: 8 | - samtools: 9 | description: Tools for dealing with SAM, BAM and CRAM files 10 | homepage: http://www.htslib.org/ 11 | documentation: http://www.htslib.org/doc/samtools.html 12 | tool_dev_url: https://github.com/samtools/samtools 13 | doi: 10.1093/bioinformatics/btp352 14 | licence: ["MIT"] 15 | 16 | input: 17 | - meta: 18 | type: map 19 | description: | 20 | Groovy Map containing sample information 21 | e.g. [ id:'test', single_end:false ] 22 | - fasta: 23 | type: file 24 | description: FASTA file 25 | pattern: "*.{fa,fasta,fna,fas}" 26 | 27 | output: 28 | - meta: 29 | type: map 30 | description: | 31 | Groovy Map containing sample information 32 | e.g. [ id:'test', single_end:false ] 33 | - sizes: 34 | type: file 35 | description: File containing chromosome lengths 36 | pattern: "*.{sizes}" 37 | - fai: 38 | type: file 39 | description: FASTA index file 40 | pattern: "*.{fai}" 41 | - gzi: 42 | type: file 43 | description: Optional gzip index file for compressed inputs 44 | pattern: "*.gzi" 45 | - versions: 46 | type: file 47 | description: File containing software versions 48 | pattern: "versions.yml" 49 | 50 | authors: 51 | - "@tamara-hodgetts" 52 | - "@chris-cheshire" 53 | - "@muffato" 54 | -------------------------------------------------------------------------------- /modules/nf-core/fastqc/main.nf: -------------------------------------------------------------------------------- 1 | process FASTQC { 2 | tag "$meta.id" 3 | label 'process_medium' 4 | 5 | conda "bioconda::fastqc=0.11.9" 6 | container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 7 | 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : 8 | 'biocontainers/fastqc:0.11.9--0' }" 9 | 10 | input: 11 | tuple val(meta), path(reads) 12 | 13 | output: 14 | tuple val(meta), path("*.html"), emit: html 15 | tuple val(meta), path("*.zip") , emit: zip 16 | path "versions.yml" , emit: versions 17 | 18 | when: 19 | task.ext.when == null || task.ext.when 20 | 21 | script: 22 | def args = task.ext.args ?: '' 23 | def prefix = task.ext.prefix ?: "${meta.id}" 24 | // Make list of old name and new name pairs to use for renaming in the bash while loop 25 | def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } 26 | def rename_to = old_new_pairs*.join(' ').join(' ') 27 | def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') 28 | """ 29 | printf "%s %s\\n" $rename_to | while read old_name new_name; do 30 | [ -f "\${new_name}" ] || ln -s \$old_name \$new_name 31 | done 32 | fastqc $args --threads $task.cpus $renamed_files 33 | 34 | cat <<-END_VERSIONS > versions.yml 35 | "${task.process}": 36 | fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) 37 | END_VERSIONS 38 | """ 39 | 40 | stub: 41 | def prefix = task.ext.prefix ?: "${meta.id}" 42 | """ 43 | touch ${prefix}.html 44 | touch ${prefix}.zip 45 | 46 | cat <<-END_VERSIONS > versions.yml 47 | "${task.process}": 48 | fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) 49 | END_VERSIONS 50 | """ 51 | } 52 | -------------------------------------------------------------------------------- /modules/nf-core/fastqc/meta.yml: -------------------------------------------------------------------------------- 1 | name: fastqc 2 | description: Run FastQC on sequenced reads 3 | keywords: 4 | - quality control 5 | - qc 6 | - adapters 7 | - fastq 8 | tools: 9 | - fastqc: 10 | description: | 11 | FastQC gives general quality metrics about your reads. 12 | It provides information about the quality score distribution 13 | across your reads, the per base sequence content (%A/C/G/T). 14 | You get information about adapter contamination and other 15 | overrepresented sequences. 16 | homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ 17 | documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ 18 | licence: ["GPL-2.0-only"] 19 | input: 20 | - meta: 21 | type: map 22 | description: | 23 | Groovy Map containing sample information 24 | e.g. [ id:'test', single_end:false ] 25 | - reads: 26 | type: file 27 | description: | 28 | List of input FastQ files of size 1 and 2 for single-end and paired-end data, 29 | respectively. 30 | output: 31 | - meta: 32 | type: map 33 | description: | 34 | Groovy Map containing sample information 35 | e.g. [ id:'test', single_end:false ] 36 | - html: 37 | type: file 38 | description: FastQC report 39 | pattern: "*_{fastqc.html}" 40 | - zip: 41 | type: file 42 | description: FastQC report archive 43 | pattern: "*_{fastqc.zip}" 44 | - versions: 45 | type: file 46 | description: File containing software versions 47 | pattern: "versions.yml" 48 | authors: 49 | - "@drpatelh" 50 | - "@grst" 51 | - "@ewels" 52 | - "@FelixKrueger" 53 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. 2 | # Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. 3 | [tool.black] 4 | line-length = 120 5 | target_version = ["py37", "py38", "py39", "py310"] 6 | 7 | [tool.isort] 8 | profile = "black" 9 | known_first_party = ["nf_core"] 10 | multi_line_output = 3 11 | -------------------------------------------------------------------------------- /subworkflows/local/compartments.nf: -------------------------------------------------------------------------------- 1 | include { COOLTOOLS_EIGSCIS } from '../../modules/local/cooltools/eigscis' 2 | 3 | workflow COMPARTMENTS { 4 | 5 | take: 6 | cool 7 | fasta 8 | chrsize 9 | 10 | main: 11 | ch_versions = Channel.empty() 12 | 13 | COOLTOOLS_EIGSCIS( 14 | cool, 15 | fasta.map{it -> it[1]}.collect(), 16 | chrsize.map{it -> it[1]}.collect() 17 | ) 18 | ch_versions = ch_versions.mix(COOLTOOLS_EIGSCIS.out.versions) 19 | 20 | emit: 21 | versions = ch_versions 22 | compartments = COOLTOOLS_EIGSCIS.out.results 23 | } -------------------------------------------------------------------------------- /subworkflows/local/cooler.nf: -------------------------------------------------------------------------------- 1 | /* 2 | * COOLER MAIN WORKFLOW 3 | * INPUT : .pair text file with the list of valid interaction 4 | * OUTPUT : cooler files 5 | */ 6 | 7 | include { COOLER_ZOOMIFY } from '../../modules/nf-core/cooler/zoomify/main' 8 | include { COOLER_DUMP } from '../../modules/nf-core/cooler/dump/main' 9 | include { COOLER_CLOAD } from '../../modules/nf-core/cooler/cload/main' 10 | include { COOLER_BALANCE } from '../../modules/nf-core/cooler/balance/main' 11 | include { COOLER_MAKEBINS } from '../../modules/nf-core/cooler/makebins/main' 12 | 13 | include { SPLIT_COOLER_DUMP } from '../../modules/local/split_cooler_dump' 14 | 15 | // add resolution in meta 16 | def addResolution(row) { 17 | def meta = [:] 18 | meta.id = row[0].id 19 | meta.resolution = row[2] 20 | return [meta, row[1], row[2]] 21 | } 22 | 23 | workflow COOLER { 24 | 25 | take: 26 | pairs // [meta, pairs, index] 27 | chromsize // [meta, chromsize] 28 | cool_bins 29 | 30 | main: 31 | ch_versions = Channel.empty() 32 | 33 | //***************************************** 34 | // EXPORT BINS 35 | 36 | COOLER_MAKEBINS( 37 | chromsize.combine(cool_bins) 38 | ) 39 | ch_versions = ch_versions.mix(COOLER_MAKEBINS.out.versions) 40 | 41 | //***************************************** 42 | // BUILD COOL FILE PER RESOLUTION 43 | // [meta, pairs, resolution] 44 | 45 | COOLER_CLOAD( 46 | pairs.combine(cool_bins), 47 | chromsize.map{it -> it[1]}.collect() 48 | ) 49 | ch_versions = ch_versions.mix(COOLER_CLOAD.out.versions) 50 | 51 | // Add resolution in meta 52 | COOLER_CLOAD.out.cool 53 | .map{ it -> addResolution(it) } 54 | .set{ ch_cool } 55 | 56 | COOLER_BALANCE( 57 | ch_cool.map{[it[0], it[1], ""]} 58 | ) 59 | ch_versions = ch_versions.mix(COOLER_BALANCE.out.versions) 60 | 61 | // Zoomify at minimum bin resolution 62 | if (!params.res_zoomify){ 63 | ch_res_zoomify = cool_bins.min() 64 | }else{ 65 | ch_res_zoomify = Channel.from(params.res_zoomify).splitCsv().flatten().unique().toInteger() 66 | } 67 | 68 | ch_cool 69 | .combine(ch_res_zoomify) 70 | .filter{ it[2] == it[3] } 71 | .map{ it->[it[0], it[1]] } 72 | .set{ ch_cool_zoomify } 73 | 74 | COOLER_ZOOMIFY( 75 | ch_cool_zoomify 76 | ) 77 | ch_versions = ch_versions.mix(COOLER_ZOOMIFY.out.versions) 78 | 79 | //***************************************** 80 | // DUMP DATA 81 | // [meta, cool] / resolution 82 | 83 | COOLER_DUMP( 84 | COOLER_BALANCE.out.cool.map{[it[0], it[1], ""]} 85 | ) 86 | ch_versions = ch_versions.mix(COOLER_DUMP.out.versions) 87 | 88 | SPLIT_COOLER_DUMP( 89 | COOLER_DUMP.out.bedpe 90 | ) 91 | ch_versions = ch_versions.mix(SPLIT_COOLER_DUMP.out.versions) 92 | 93 | emit: 94 | versions = ch_versions 95 | cool = COOLER_BALANCE.out.cool 96 | mcool = COOLER_ZOOMIFY.out.mcool 97 | } -------------------------------------------------------------------------------- /subworkflows/local/hicpro.nf: -------------------------------------------------------------------------------- 1 | /* 2 | * HICPRO 3 | * MAIN WORKFLOW 4 | * From the raw sequencing reads to the list of valid interactions 5 | */ 6 | 7 | include { HICPRO_MAPPING } from './hicpro_mapping' 8 | include { GET_VALID_INTERACTION } from '../../modules/local/hicpro/get_valid_interaction' 9 | include { GET_VALID_INTERACTION_DNASE } from '../../modules/local/hicpro/get_valid_interaction_dnase' 10 | include { MERGE_VALID_INTERACTION } from '../../modules/local/hicpro/merge_valid_interaction' 11 | include { MERGE_STATS } from '../../modules/local/hicpro/merge_stats' 12 | include { HICPRO2PAIRS } from '../../modules/local/hicpro/hicpro2pairs' 13 | include { BUILD_CONTACT_MAPS } from '../../modules/local/hicpro/build_contact_maps' 14 | include { ICE_NORMALIZATION } from '../../modules/local/hicpro/run_ice' 15 | 16 | // Remove meta.chunks 17 | def removeChunks(row){ 18 | meta = row[0].clone() 19 | meta.remove('chunk') 20 | return [meta, row[1]] 21 | } 22 | 23 | workflow HICPRO { 24 | 25 | take: 26 | reads // [meta, read1, read2] 27 | index // path 28 | fragments // path 29 | chrsize // path 30 | ligation_site // value 31 | map_res // values 32 | 33 | main: 34 | ch_versions = Channel.empty() 35 | 36 | // Fastq to paired-end bam 37 | HICPRO_MAPPING( 38 | reads, 39 | index, 40 | ligation_site 41 | ) 42 | ch_versions = ch_versions.mix(HICPRO_MAPPING.out.versions) 43 | 44 | //*************************************** 45 | // DIGESTION PROTOCOLS 46 | 47 | if (!params.dnase){ 48 | GET_VALID_INTERACTION ( 49 | HICPRO_MAPPING.out.bam, 50 | fragments.collect() 51 | ) 52 | ch_versions = ch_versions.mix(GET_VALID_INTERACTION.out.versions) 53 | ch_valid_pairs = GET_VALID_INTERACTION.out.valid_pairs 54 | ch_valid_stats = GET_VALID_INTERACTION.out.stats 55 | 56 | }else{ 57 | 58 | //**************************************** 59 | // DNASE-LIKE PROTOCOLS 60 | 61 | GET_VALID_INTERACTION_DNASE ( 62 | HICPRO_MAPPING.out.bam 63 | ) 64 | ch_versions = ch_versions.mix(GET_VALID_INTERACTION_DNASE.out.versions) 65 | ch_valid_pairs = GET_VALID_INTERACTION_DNASE.out.valid_pairs 66 | ch_valid_stats = GET_VALID_INTERACTION_DNASE.out.stats 67 | } 68 | 69 | 70 | //************************************** 71 | // MERGE AND REMOVE DUPLICATES 72 | 73 | //if (params.split_fastq){ 74 | ch_valid_pairs = ch_valid_pairs.map{ it -> removeChunks(it)}.groupTuple() 75 | ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.map{it->removeChunks(it)}.groupTuple() 76 | .concat(HICPRO_MAPPING.out.pairstats.map{it->removeChunks(it)}.groupTuple(), 77 | ch_valid_stats.map{it->removeChunks(it)}.groupTuple()) 78 | //}else{ 79 | // ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.groupTuple() 80 | // .concat(HICPRO_MAPPING.out.pairstats.groupTuple(), 81 | // ch_valid_stats.groupTuple()) 82 | //} 83 | 84 | MERGE_VALID_INTERACTION ( 85 | ch_valid_pairs 86 | ) 87 | ch_versions = ch_versions.mix(MERGE_VALID_INTERACTION.out.versions) 88 | 89 | MERGE_STATS( 90 | ch_hicpro_stats 91 | ) 92 | ch_versions = ch_versions.mix(MERGE_STATS.out.versions) 93 | 94 | //*************************************** 95 | // CONVERTS TO PAIRS 96 | HICPRO2PAIRS ( 97 | MERGE_VALID_INTERACTION.out.valid_pairs, 98 | chrsize.collect() 99 | ) 100 | ch_versions = ch_versions.mix(HICPRO2PAIRS.out.versions) 101 | 102 | //*************************************** 103 | // CONTACT MAPS 104 | 105 | if (params.hicpro_maps){ 106 | 107 | //build_contact_maps 108 | BUILD_CONTACT_MAPS( 109 | MERGE_VALID_INTERACTION.out.valid_pairs.combine(map_res), 110 | chrsize.collect() 111 | ) 112 | ch_hicpro_raw_maps = BUILD_CONTACT_MAPS.out.maps 113 | 114 | // run_ice 115 | ICE_NORMALIZATION( 116 | BUILD_CONTACT_MAPS.out.maps 117 | ) 118 | ch_hicpro_iced_maps = ICE_NORMALIZATION.out.maps 119 | ch_versions = ch_versions.mix(ICE_NORMALIZATION.out.versions) 120 | 121 | }else{ 122 | ch_hicpro_raw_maps = Channel.empty() 123 | ch_hicpro_iced_maps = Channel.empty() 124 | } 125 | 126 | emit: 127 | versions = ch_versions 128 | pairs = HICPRO2PAIRS.out.pairs 129 | mqc = MERGE_VALID_INTERACTION.out.mqc.concat(MERGE_STATS.out.mqc) 130 | raw_maps = ch_hicpro_raw_maps 131 | iced_maps = ch_hicpro_iced_maps 132 | } 133 | -------------------------------------------------------------------------------- /subworkflows/local/hicpro_mapping.nf: -------------------------------------------------------------------------------- 1 | /* 2 | * HiC-Pro mapping 3 | * From the raw sequencing reads to a paired-end bam file 4 | */ 5 | 6 | include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' 7 | include { TRIM_READS } from '../../modules/local/hicpro/trim_reads' 8 | include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_TRIMMED } from '../../modules/nf-core/bowtie2/align/main' 9 | include { MERGE_BOWTIE2 } from '../../modules/local/hicpro/bowtie2_merge' 10 | include { COMBINE_MATES} from '../../modules/local/hicpro/combine_mates' 11 | include { MAPPING_STATS_DNASE } from '../../modules/local/hicpro/dnase_mapping_stats' 12 | 13 | // Paired-end to Single-end 14 | def pairToSingle(row, mates) { 15 | def meta = row[0].clone() 16 | meta.single_end = true 17 | meta.mates = mates 18 | if (mates == "R1") { 19 | return [meta, [ row[1][0]] ] 20 | }else if (mates == "R2"){ 21 | return [meta, [ row[1][1]] ] 22 | } 23 | } 24 | 25 | // Single-end to Paired-end 26 | def singleToPair(row){ 27 | def meta = row[0].clone() 28 | meta.remove('mates') 29 | meta.single_end = false 30 | return [ meta, row[1] ] 31 | } 32 | 33 | 34 | workflow HICPRO_MAPPING { 35 | 36 | take: 37 | reads // [meta, read1, read2] 38 | index // [meta, path] 39 | ligation_site // value 40 | 41 | main: 42 | ch_versions = Channel.empty() 43 | 44 | // Align each mates separetly and add mates information in [meta] 45 | ch_reads_r1 = reads.map{ it -> pairToSingle(it,"R1") } 46 | ch_reads_r2 = reads.map{ it -> pairToSingle(it,"R2") } 47 | ch_reads = ch_reads_r1.concat(ch_reads_r2) 48 | 49 | // bowtie2 - save_unaligned=true - sort_bam=false 50 | BOWTIE2_ALIGN( 51 | ch_reads, 52 | index.collect(), 53 | true, 54 | false 55 | ) 56 | ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) 57 | 58 | if (!params.dnase){ 59 | // trim reads 60 | TRIM_READS( 61 | BOWTIE2_ALIGN.out.fastq, 62 | ligation_site.collect() 63 | ) 64 | ch_versions = ch_versions.mix(TRIM_READS.out.versions) 65 | 66 | // bowtie2 on trimmed reads - save_unaligned=false - sort_bam=false 67 | BOWTIE2_ALIGN_TRIMMED( 68 | TRIM_READS.out.fastq, 69 | index.collect(), 70 | false, 71 | false 72 | ) 73 | ch_versions = ch_versions.mix(BOWTIE2_ALIGN_TRIMMED.out.versions) 74 | 75 | // Merge the two mapping steps 76 | BOWTIE2_ALIGN.out.bam 77 | .combine(BOWTIE2_ALIGN_TRIMMED.out.bam, by:[0]) 78 | .set { ch_bowtie2_align} 79 | 80 | MERGE_BOWTIE2( 81 | ch_bowtie2_align 82 | ) 83 | ch_versions = ch_versions.mix(MERGE_BOWTIE2.out.versions) 84 | ch_mapping_stats = MERGE_BOWTIE2.out.stats 85 | 86 | // Combine mates 87 | MERGE_BOWTIE2.out.bam 88 | .map { singleToPair(it) } 89 | .groupTuple() 90 | .set {ch_bams} 91 | 92 | }else{ 93 | 94 | MAPPING_STATS_DNASE( 95 | BOWTIE2_ALIGN.out.bam 96 | ) 97 | ch_mapping_stats = MAPPING_STATS_DNASE.out.stats 98 | 99 | BOWTIE2_ALIGN.out.bam 100 | .map { singleToPair(it) } 101 | .groupTuple() 102 | .set {ch_bams} 103 | } 104 | 105 | COMBINE_MATES ( 106 | ch_bams 107 | ) 108 | ch_versions = ch_versions.mix(COMBINE_MATES.out.versions) 109 | 110 | emit: 111 | versions = ch_versions 112 | bam = COMBINE_MATES.out.bam 113 | mapstats = ch_mapping_stats 114 | pairstats = COMBINE_MATES.out.stats 115 | } 116 | -------------------------------------------------------------------------------- /subworkflows/local/input_check.nf: -------------------------------------------------------------------------------- 1 | // 2 | // Check input samplesheet and get read channels 3 | // 4 | 5 | include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' 6 | 7 | workflow INPUT_CHECK { 8 | take: 9 | samplesheet // file: /path/to/samplesheet.csv 10 | 11 | main: 12 | if (params.split_fastq){ 13 | 14 | SAMPLESHEET_CHECK ( samplesheet ) 15 | .csv 16 | .splitCsv ( header:true, sep:',' ) 17 | .map { create_fastq_channels(it) } 18 | .splitFastq( by: params.fastq_chunks_size, pe:true, file: true, compress:true) 19 | .map { it -> [it[0], [it[1], it[2]]]} 20 | .groupTuple(by: [0]) 21 | .flatMap { it -> setMetaChunk(it) } 22 | .collate(2) 23 | //.map { it -> 24 | // def meta = it[0].clone() 25 | // meta.chunk = it[1].baseName - ~/.fastq(.gz)?/ 26 | // return [meta, [it[1], it[2]]] 27 | //} 28 | .set { reads } 29 | 30 | }else{ 31 | SAMPLESHEET_CHECK ( samplesheet ) 32 | .csv 33 | .splitCsv ( header:true, sep:',' ) 34 | .map { create_fastq_channels(it) } 35 | .map { it -> [it[0], [it[1], it[2]]]} 36 | .groupTuple(by: [0]) 37 | .flatMap { it -> setMetaChunk(it) } 38 | .collate(2) 39 | .set { reads } 40 | } 41 | 42 | emit: 43 | reads // channel: [ val(meta), [ reads ] ] 44 | } 45 | 46 | // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] 47 | def create_fastq_channels(LinkedHashMap row) { 48 | def meta = [:] 49 | meta.id = row.sample 50 | meta.single_end = false 51 | 52 | def array = [] 53 | if (!file(row.fastq_1).exists()) { 54 | exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" 55 | } 56 | if (!file(row.fastq_2).exists()) { 57 | exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" 58 | } 59 | array = [ meta, file(row.fastq_1), file(row.fastq_2) ] 60 | return array 61 | } 62 | 63 | // Set the meta.chunk value in case of technical replicates 64 | def setMetaChunk(row){ 65 | def map = [] 66 | row[1].eachWithIndex() { file,i -> 67 | meta = row[0].clone() 68 | meta.chunk = i 69 | map += [meta, file] 70 | } 71 | return map 72 | } -------------------------------------------------------------------------------- /subworkflows/local/prepare_genome.nf: -------------------------------------------------------------------------------- 1 | /* 2 | * Prepare Annotation Genome for Hi-C data analysis 3 | */ 4 | 5 | include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' 6 | include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' 7 | include { GET_RESTRICTION_FRAGMENTS } from '../../modules/local/hicpro/get_restriction_fragments' 8 | 9 | workflow PREPARE_GENOME { 10 | 11 | take: 12 | fasta 13 | restriction_site 14 | 15 | main: 16 | ch_versions = Channel.empty() 17 | 18 | //*************************************** 19 | // Bowtie Index 20 | if(!params.bwt2_index){ 21 | BOWTIE2_BUILD ( 22 | fasta 23 | ) 24 | ch_index = BOWTIE2_BUILD.out.index 25 | ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) 26 | }else{ 27 | Channel.fromPath( params.bwt2_index , checkIfExists: true) 28 | .map { it -> [[:], it]} 29 | .ifEmpty { exit 1, "Genome index: Provided index not found: ${params.bwt2_index}" } 30 | .set { ch_index } 31 | } 32 | 33 | //*************************************** 34 | // Chromosome size 35 | if(!params.chromosome_size){ 36 | CUSTOM_GETCHROMSIZES( 37 | fasta 38 | ) 39 | ch_chromsize = CUSTOM_GETCHROMSIZES.out.sizes 40 | ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) 41 | }else{ 42 | Channel.fromPath( params.chromosome_size , checkIfExists: true) 43 | .map { it -> [[:], it]} 44 | .set {ch_chromsize} 45 | } 46 | 47 | //*************************************** 48 | // Restriction fragments 49 | if(!params.restriction_fragments && !params.dnase){ 50 | GET_RESTRICTION_FRAGMENTS( 51 | fasta, 52 | restriction_site 53 | ) 54 | ch_resfrag = GET_RESTRICTION_FRAGMENTS.out.results 55 | ch_versions = ch_versions.mix(GET_RESTRICTION_FRAGMENTS.out.versions) 56 | }else if (!params.dnase){ 57 | Channel.fromPath( params.restriction_fragments, checkIfExists: true ) 58 | .map{ it -> [[:], it] } 59 | .set {ch_resfrag} 60 | }else{ 61 | ch_resfrag = Channel.empty() 62 | } 63 | 64 | emit: 65 | index = ch_index 66 | chromosome_size = ch_chromsize 67 | res_frag = ch_resfrag 68 | versions = ch_versions 69 | } 70 | -------------------------------------------------------------------------------- /subworkflows/local/tads.nf: -------------------------------------------------------------------------------- 1 | include { COOLTOOLS_INSULATION } from '../../modules/local/cooltools/insulation' 2 | include { HIC_FIND_TADS } from '../../modules/local/hicexplorer/hicFindTADs' 3 | 4 | workflow TADS { 5 | 6 | take: 7 | cool 8 | 9 | main: 10 | ch_versions = Channel.empty() 11 | ch_tads = Channel.empty() 12 | 13 | if (params.tads_caller =~ 'insulation'){ 14 | COOLTOOLS_INSULATION(cool) 15 | ch_versions = ch_versions.mix(COOLTOOLS_INSULATION.out.versions) 16 | ch_tads = ch_tads.mix(COOLTOOLS_INSULATION.out.tsv) 17 | } 18 | 19 | if (params.tads_caller =~ 'hicexplorer'){ 20 | HIC_FIND_TADS(cool) 21 | ch_versions = ch_versions.mix(HIC_FIND_TADS.out.versions) 22 | ch_tads = ch_tads.mix(HIC_FIND_TADS.out.results) 23 | } 24 | 25 | emit: 26 | tads = ch_tads 27 | versions = ch_versions 28 | } -------------------------------------------------------------------------------- /tower.yml: -------------------------------------------------------------------------------- 1 | reports: 2 | multiqc_report.html: 3 | display: "MultiQC HTML report" 4 | samplesheet.csv: 5 | display: "Auto-created samplesheet with collated metadata and FASTQ paths" 6 | --------------------------------------------------------------------------------