├── .github └── workflows │ ├── deploy.yml │ └── test.yml ├── .gitignore ├── .gitpod.yml ├── LICENSE.txt ├── README.md ├── channel-duplication.nf ├── collect-into-file.nf ├── conditional-process.nf ├── conditional-process2.nf ├── conditional-process3.nf ├── conditional-resources.nf ├── create-key-to-combine-channels.nf ├── data ├── alignment │ ├── kidney.bai │ ├── kidney.bam │ ├── lung.bai │ └── lung.bam ├── hello.txt ├── index.csv ├── poem.txt ├── prots │ ├── prot_1.fa │ ├── prot_2.fa │ ├── prot_3.fa │ └── sample.fa └── reads │ ├── 110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_1.fq.gz │ ├── 110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_2.fq.gz │ ├── 110105_I186_FC812MWABXX_L8_HUMrutRGVDIABPE_1.fq.gz │ ├── 110105_I186_FC812MWABXX_L8_HUMrutRGVDIABPE_2.fq.gz │ ├── 110121_I288_FC81DE8ABXX_L3_HUMrutRGXDIAAPE_1.fq.gz │ ├── 110121_I288_FC81DE8ABXX_L3_HUMrutRGXDIAAPE_2.fq.gz │ ├── 110122_I329_FC81DB5ABXX_L6_HUMrutRGVDIAAPE_1.fq.gz │ ├── 110122_I329_FC81DB5ABXX_L6_HUMrutRGVDIAAPE_2.fq.gz │ ├── 110128_I481_FC819P0ABXX_L5_HUMrutRGWDIAAPE_1.fq.gz │ ├── 110128_I481_FC819P0ABXX_L5_HUMrutRGWDIAAPE_2.fq.gz │ └── sample.fq.gz ├── docs ├── channel-duplication.md ├── collect-into-file.md ├── conditional-process-dynamic.md ├── conditional-process.md ├── conditional-resources.md ├── create-key-to-combine-channels.md ├── feedback-loop.md ├── ignore-failing-process.md ├── index.md ├── optional-input.md ├── optional-output.md ├── process-collect.md ├── process-get-workdir.md ├── process-into-groups.md ├── process-per-csv-record.md ├── process-per-file-chunk.md ├── process-per-file-output.md ├── process-per-file-pairs.md ├── process-per-file-path.md ├── process-per-file-range.md ├── process-when-empty.md ├── publish-matching-glob.md ├── publish-process-outputs.md ├── publish-rename-outputs.md ├── skip-process-execution.md ├── sort-filepairs-by-samplename.md ├── state-dependency.md ├── task-batching.md └── workflow-grouping.md ├── feedback-loop-process.nf ├── feedback-loop-workflow.nf ├── ignore-failing-process.nf ├── mkdocs.yml ├── nextflow.config ├── optional-input.nf ├── optional-output.nf ├── process-collect.nf ├── process-get-workdir.nf ├── process-into-groups.nf ├── process-per-csv-record.nf ├── process-per-file-chunk.nf ├── process-per-file-output.nf ├── process-per-file-pairs-custom.nf ├── process-per-file-pairs.nf ├── process-per-file-path.nf ├── process-per-file-range.nf ├── process-when-empty.nf ├── publish-matching-glob.nf ├── publish-process-outputs.nf ├── publish-rename-outputs-subdirs.nf ├── publish-rename-outputs.nf ├── scripts ├── cleanup.sh └── test.sh ├── skip-process-execution.nf ├── sort-filepairs-by-samplename.nf ├── state-dependency.nf ├── task-batching.nf └── workflow-grouping.nf /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: deploy 2 | on: 3 | push: 4 | branches: 5 | - master 6 | jobs: 7 | deploy: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - uses: actions/setup-python@v2 12 | with: 13 | python-version: 3.x 14 | - run: pip install mkdocs-material 15 | - run: mkdocs gh-deploy --force 16 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: push 3 | jobs: 4 | test: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v2 8 | - uses: actions/setup-java@v3 9 | with: 10 | distribution: 'temurin' 11 | java-version: '17' 12 | - uses: nf-core/setup-nextflow@v1 13 | - run: bash scripts/test.sh 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .git 2 | .idea* 3 | .nextflow* 4 | my-results 5 | public 6 | results 7 | work -------------------------------------------------------------------------------- /.gitpod.yml: -------------------------------------------------------------------------------- 1 | image: nfcore/gitpod:latest 2 | 3 | vscode: 4 | extensions: # based on nf-core.nf-core-extensionpack 5 | - codezombiech.gitignore # Language support for .gitignore files 6 | # - cssho.vscode-svgviewer # SVG viewer 7 | - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code 8 | - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed 9 | - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files 10 | - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar 11 | - mechatroner.rainbow-csv # Highlight columns in csv files in different colors 12 | # - nextflow.nextflow # Nextflow syntax highlighting 13 | - oderwat.indent-rainbow # Highlight indentation level 14 | - streetsidesoftware.code-spell-checker # Spelling checker for source code 15 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, Centre for Genomic Regulation (CRG). 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Nextflow Patterns 2 | 3 | A curated collection of Nextflow implementation patterns 4 | 5 | ![Build Status](https://github.com/nextflow-io/patterns/actions/workflows/test.yml/badge.svg) 6 | 7 | ## Basic patterns 8 | 9 | * [Channel duplication](docs/channel-duplication.md) 10 | * [Sort FilePairs by sample name](docs/sort-filepairs-by-samplename.md) 11 | * [Create key to combine channels](docs/create-key-to-combine-channels.md) 12 | 13 | ## Scatter executions 14 | 15 | * [Process per file path](docs/process-per-file-path.md) 16 | * [Process per file chunk](docs/process-per-file-chunk.md) 17 | * [Process per file pairs](docs/process-per-file-pairs.md) 18 | * [Process per file range](docs/process-per-file-range.md) 19 | * [Process per CSV record](docs/process-per-csv-record.md) 20 | * [Process per file output](docs/process-per-file-output.md) 21 | 22 | ## Gather results 23 | 24 | * [Process all outputs altogether](docs/process-collect.md) 25 | * [Process outputs into groups](docs/process-into-groups.md) 26 | * [Collect outputs into a file](docs/collect-into-file.md) 27 | 28 | ## Organize outputs 29 | 30 | * [Store process outputs](docs/publish-process-outputs.md) 31 | * [Store outputs matching a glob pattern](docs/publish-matching-glob.md) 32 | * [Store outputs renaming files](docs/publish-rename-outputs.md) 33 | 34 | ## Other 35 | 36 | * [Get process work directory](docs/process-get-workdir.md) 37 | * [Ignore failing process](docs/ignore-failing-process.md) 38 | * [State dependency](docs/state-dependency.md) 39 | 40 | ## Advanced patterns 41 | 42 | * [Conditional process resources](docs/conditional-resources.md) 43 | * [Conditional process executions](docs/conditional-process.md) 44 | * [Skip process execution](docs/skip-process-execution.md) 45 | * [Feedback loop](docs/feedback-loop.md) 46 | * [Optional input](docs/optional-input.md) 47 | * [Optional output](docs/optional-output.md) 48 | * [Process when empty](docs/process-when-empty.md) 49 | * [Task batching](docs/task-batching.md) 50 | -------------------------------------------------------------------------------- /channel-duplication.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | input: path x 31 | script: 32 | """ 33 | echo your_command --input $x 34 | """ 35 | } 36 | 37 | process bar { 38 | input: path x 39 | script: 40 | """ 41 | echo your_command --input $x 42 | """ 43 | } 44 | 45 | workflow { 46 | input_ch = Channel.fromPath("$baseDir/data/prots/*_?.fa") 47 | 48 | foo(input_ch) 49 | bar(input_ch) 50 | } 51 | -------------------------------------------------------------------------------- /collect-into-file.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | input: 31 | path x 32 | output: 33 | path 'file.fq' 34 | script: 35 | """ 36 | < $x zcat > file.fq 37 | """ 38 | } 39 | 40 | workflow { 41 | Channel.fromPath("$baseDir/data/reads/*_1.fq.gz", checkIfExists: true) \ 42 | | foo \ 43 | | collectFile \ 44 | | view 45 | } 46 | -------------------------------------------------------------------------------- /conditional-process.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | params.flag = false 30 | 31 | process foo { 32 | output: 33 | path 'x.txt' 34 | 35 | script: 36 | ''' 37 | echo foo > x.txt 38 | ''' 39 | } 40 | 41 | process bar { 42 | output: 43 | path 'x.txt' 44 | 45 | script: 46 | ''' 47 | echo bar > x.txt 48 | ''' 49 | } 50 | 51 | process omega { 52 | debug true 53 | input: 54 | path x 55 | 56 | script: 57 | """ 58 | cat $x 59 | """ 60 | } 61 | 62 | workflow { 63 | (params.flag ? bar : foo) | omega 64 | } 65 | -------------------------------------------------------------------------------- /conditional-process2.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | * author Mike Smoot 28 | */ 29 | 30 | params.flag = false 31 | 32 | process foo { 33 | input: 34 | val x 35 | 36 | output: 37 | path 'x.txt' 38 | 39 | script: 40 | """ 41 | echo $x > x.txt 42 | """ 43 | } 44 | 45 | process bar { 46 | input: 47 | val(b) 48 | 49 | output: 50 | path 'x.txt' 51 | 52 | script: 53 | """ 54 | echo $b > x.txt 55 | """ 56 | } 57 | 58 | process omega { 59 | debug true 60 | input: 61 | path x 62 | 63 | script: 64 | """ 65 | cat $x 66 | """ 67 | } 68 | 69 | workflow { 70 | (foo_ch, bar_ch) = params.flag 71 | ? [ Channel.empty(), Channel.from(1,2,3) ] 72 | : [ Channel.from(4,5,6), Channel.empty() ] 73 | 74 | foo(foo_ch) 75 | bar(bar_ch) 76 | 77 | foo.out | mix(bar.out) | omega 78 | } 79 | -------------------------------------------------------------------------------- /conditional-process3.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2023, Seqera Labs. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Ben Sherman 27 | */ 28 | 29 | process foo { 30 | input: 31 | val signal 32 | output: 33 | path 'x.txt' 34 | 35 | script: 36 | ''' 37 | echo foo > x.txt 38 | ''' 39 | } 40 | 41 | process bar { 42 | input: 43 | val signal 44 | output: 45 | path 'x.txt' 46 | 47 | script: 48 | ''' 49 | echo bar > x.txt 50 | ''' 51 | } 52 | 53 | workflow { 54 | ch_if = Channel.of( 1..100 ) 55 | | randomSample(1) 56 | | branch { n -> 57 | high: n > 50 58 | low: n <= 50 59 | } 60 | 61 | ch_if.high | foo 62 | ch_if.low | bar 63 | } 64 | -------------------------------------------------------------------------------- /conditional-resources.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | 30 | process foo { 31 | memory { reads.size() < 70.KB ? 1.GB : 5.GB } 32 | 33 | input: 34 | path reads 35 | 36 | """ 37 | echo your_command_here --in ${reads} --mem=${task.memory.giga} 38 | """ 39 | } 40 | 41 | workflow { 42 | Channel.fromPath("$baseDir/data/reads/*_1.fq.gz", checkIfExists:true) \ 43 | | foo 44 | } 45 | -------------------------------------------------------------------------------- /create-key-to-combine-channels.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2022, Seqera Labs. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Marcel Ribeiro-Dantas 27 | */ 28 | 29 | Channel 30 | .of('demux.Clontech_5p--bc1003_3p.flnc_clustered.sorted.sam', 31 | 'demux.Clontech_5p--bc1001_3p.flnc_clustered.sorted.sam', 32 | 'demux.Clontech_5p--bc1002_3p.flnc_clustered.sorted.sam') 33 | .set { ch_alignment } 34 | 35 | Channel 36 | .of('demux.Clontech_5p--bc1001_3p.flnc_clustered.fasta', 37 | 'demux.Clontech_5p--bc1002_3p.flnc_clustered.fasta', 38 | 'demux.Clontech_5p--bc1003_3p.flnc_clustered.fasta') 39 | .set { ch_clustered } 40 | 41 | ch_alignment 42 | .map { [it.toString().split("--")[1].split("_3p")[0], 43 | it] }. 44 | set { ch_alignment } 45 | ch_clustered 46 | .map { [it.toString().split("--")[1].split("_3p")[0], 47 | it] }. 48 | set { ch_clustered } 49 | 50 | ch_alignment 51 | .combine(ch_clustered, by: 0) 52 | .map { id, sam, fasta -> [sam, fasta] } 53 | .view() -------------------------------------------------------------------------------- /data/alignment/kidney.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/alignment/kidney.bai -------------------------------------------------------------------------------- /data/alignment/kidney.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/alignment/kidney.bam -------------------------------------------------------------------------------- /data/alignment/lung.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/alignment/lung.bai -------------------------------------------------------------------------------- /data/alignment/lung.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/alignment/lung.bam -------------------------------------------------------------------------------- /data/hello.txt: -------------------------------------------------------------------------------- 1 | Hello 2 | -------------------------------------------------------------------------------- /data/index.csv: -------------------------------------------------------------------------------- 1 | sampleId,read1,read2 2 | FC816RLABXX,reads/110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_1.fq.gz,reads/110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_2.fq.gz 3 | FC812MWABXX,reads/110105_I186_FC812MWABXX_L8_HUMrutRGVDIABPE_1.fq.gz,reads/110105_I186_FC812MWABXX_L8_HUMrutRGVDIABPE_2.fq.gz 4 | FC81DE8ABXX,reads/110121_I288_FC81DE8ABXX_L3_HUMrutRGXDIAAPE_1.fq.gz,reads/110121_I288_FC81DE8ABXX_L3_HUMrutRGXDIAAPE_2.fq.gz 5 | FC81DB5ABXX,reads/110122_I329_FC81DB5ABXX_L6_HUMrutRGVDIAAPE_1.fq.gz,reads/110122_I329_FC81DB5ABXX_L6_HUMrutRGVDIAAPE_2.fq.gz 6 | FC819P0ABXX,reads/110128_I481_FC819P0ABXX_L5_HUMrutRGWDIAAPE_1.fq.gz,reads/110128_I481_FC819P0ABXX_L5_HUMrutRGWDIAAPE_2.fq.gz -------------------------------------------------------------------------------- /data/poem.txt: -------------------------------------------------------------------------------- 1 | Who will believe my verse in time to come 2 | If it were filled with your most high deserts? 3 | Though yet heaven knows it is but as a tomb 4 | Which hides your life, and shows not half your parts: 5 | If I could write the beauty of your eyes, 6 | And in fresh numbers number all your graces, 7 | The age to come would say this poet lies, 8 | Such heavenly touches ne'er touched earthly faces. 9 | So should my papers (yellowed with their age) 10 | Be scorned, like old men of less truth than tongue, 11 | And your true rights be termed a poet's rage, 12 | And stretched metre of an antique song. 13 | But were some child of yours alive that time, 14 | You should live twice in it, and in my rhyme. -------------------------------------------------------------------------------- /data/prots/prot_1.fa: -------------------------------------------------------------------------------- 1 | >ENSP00000354687 pep:known chromosome:GRCh37:MT:3307:4262:1 gene:ENSG00000198888 transcript:ENST00000361390 gene_biotype:protein_coding transcript_biotype:protein_coding 2 | MPMANLLLLIVPILIAMAFLMLTERKILGYMQLRKGPNVVGPYGLLQPFADAMKLFTKEP 3 | LKPATSTITLYITAPTLALTIALLLWTPLPMPNPLVNLNLGLLFILATSSLAVYSILWSG 4 | WASNSNYALIGALRAVAQTISYEVTLAIILLSTLLMSGSFNLSTLITTQEHLWLLLPSWP 5 | LAMMWFISTLAETNRTPFDLAEGESELVSGFNIEYAAGPFALFFMAEYTNIIMMNTLTTT 6 | IFLGTTYDALSPELYTTYFVTKTLLLTSLFLWIRTAYPRFRYDQLMHLLWKNFLPLTLAL 7 | LMWYVSMPITISSIPPQT 8 | >ENSP00000355046 pep:known chromosome:GRCh37:MT:4470:5511:1 gene:ENSG00000198763 transcript:ENST00000361453 gene_biotype:protein_coding transcript_biotype:protein_coding 9 | MNPLAQPVIYSTIFAGTLITALSSHWFFTWVGLEMNMLAFIPVLTKKMNPRSTEAAIKYF 10 | LTQATASMILLMAILFNNMLSGQWTMTNTTNQYSSLMIMMAMAMKLGMAPFHFWVPEVTQ 11 | GTPLTSGLLLLTWQKLAPISIMYQISPSLNVSLLLTLSILSIMAGSWGGLNQTQLRKILA 12 | YSSITHMGWMMAVLPYNPNMTILNLTIYIILTTTAFLLLNLNSSTTTLLLSRTWNKLTWL 13 | TPLIPSTLLSLGGLPPLTGFLPKWAIIEEFTKNNSLIIPTIMATITLLNLYFYLRLIYST 14 | SITLLPMSNNVKMKWQFEHTKPTPFLPTLIALTTLLLPISPFMLMIL 15 | >ENSP00000354499 pep:known chromosome:GRCh37:MT:5904:7445:1 gene:ENSG00000198804 transcript:ENST00000361624 gene_biotype:protein_coding transcript_biotype:protein_coding 16 | MFADRWLFSTNHKDIGTLYLLFGAWAGVLGTALSLLIRAELGQPGNLLGNDHIYNVIVTA 17 | HAFVMIFFMVMPIMIGGFGNWLVPLMIGAPDMAFPRMNNMSFWLLPPSLLLLLASAMVEA 18 | GAGTGWTVYPPLAGNYSHPGASVDLTIFSLHLAGVSSILGAINFITTIINMKPPAMTQYQ 19 | TPLFVWSVLITAVLLLLSLPVLAAGITMLLTDRNLNTTFFDPAGGGDPILYQHLFWFFGH 20 | PEVYILILPGFGMISHIVTYYSGKKEPFGYMGMVWAMMSIGFLGFIVWAHHMFTVGMDVD 21 | TRAYFTSATMIIAIPTGVKVFSWLATLHGSNMKWSAAVLWALGFIFLFTVGGLTGIVLAN 22 | SSLDIVLHDTYYVVAHFHYVLSMGAVFAIMGGFIHWFPLFSGYTLDQTYAKIHFTIMFIG 23 | VNLTFFPQHFLGLSGMPRRYSDYPDAYTTWNILSSVGSFISLTAVMLMIFMIWEAFASKR 24 | KVLMVEEPSMNLEWLYGCPPPYHTFEEPVYMKS 25 | >ENSP00000354876 pep:known chromosome:GRCh37:MT:7586:8269:1 gene:ENSG00000198712 transcript:ENST00000361739 gene_biotype:protein_coding transcript_biotype:protein_coding 26 | MAHAAQVGLQDATSPIMEELITFHDHALMIIFLICFLVLYALFLTLTTKLTNTNISDAQE 27 | METVWTILPAIILVLIALPSLRILYMTDEVNDPSLTIKSIGHQWYWTYEYTDYGGLIFNS 28 | YMLPPLFLEPGDLRLLDVDNRVVLPIEAPIRMMITSQDVLHSWAVPTLGLKTDAIPGRLN 29 | QTTFTATRPGVYYGQCSEICGANHSFMPIVLELIPLKIFEMGPVFTL 30 | >ENSP00000355265 pep:known chromosome:GRCh37:MT:8366:8572:1 gene:ENSG00000228253 transcript:ENST00000361851 gene_biotype:protein_coding transcript_biotype:protein_coding 31 | MPQLNTTVWPTMITPMLLTLFLITQLKMLNTNYHLPPSPKPMKMKNYNKPWEPKWTKICS 32 | LHSLPPQS 33 | -------------------------------------------------------------------------------- /data/prots/prot_2.fa: -------------------------------------------------------------------------------- 1 | >ENSP00000354813 pep:known chromosome:GRCh37:MT:12337:14148:1 gene:ENSG00000198786 transcript:ENST00000361567 gene_biotype:protein_coding transcript_biotype:protein_coding 2 | MTMHTTMTTLTLTSLIPPILTTLVNPNKKNSYPHYVKSIVASTFIISLFPTTMFMCLDQE 3 | VIISNWHWATTQTTQLSLSFKLDYFSMMFIPVALFVTWSIMEFSLWYMNSDPNINQFFKY 4 | LLIFLITMLILVTANNLFQLFIGWEGVGIMSFLLISWWYARADANTAAIQAILYNRIGDI 5 | GFILALAWFILHSNSWDPQQMALLNANPSLTPLLGLLLAAAGKSAQLGLHPWLPSAMEGP 6 | TPVSALLHSSTMVVAGIFLLIRFHPLAENSPLIQTLTLCLGAITTLFAAVCALTQNDIKK 7 | IVAFSTSSQLGLMMVTIGINQPHLAFLHICTHAFFKAMLFMCSGSIIHNLNNEQDIRKMG 8 | GLLKTMPLTSTSLTIGSLALAGMPFLTGFYSKDHIIETANMSYTNAWALSITLIATSLTS 9 | AYSTRMILLTLTGQPRFPTLTNINENNPTLLNPIKRLAAGSLFAGFLITNNISPASPFQT 10 | TIPLYLKLTALAVTFLGLLTALDLNYLTNKLKMKSPLCTFYFSNMLGFYPSITHRTIPYL 11 | GLLTSQNLPLLLLDLTWLEKLLPKTISQHQISTSIITSTQKGMIKLYFLSFFFPLILTLL 12 | LIT 13 | >ENSP00000354665 pep:known chromosome:GRCh37:MT:14149:14673:-1 gene:ENSG00000198695 transcript:ENST00000361681 gene_biotype:protein_coding transcript_biotype:protein_coding 14 | MMYALFLLSVGLVMGFVGFSSKPSPIYGGLVLIVSGVVGCVIILNFGGGYMGLMVFLIYL 15 | GGMMVVFGYTTAMAIEEYPEAWGSGVEVLVSVLVGLAMEVGLVLWVKEYDGVVVVVNFNS 16 | VGSWMIYEGEGSGLIREDPIGAGALYDYGRWLVVVTGWTLFVGVYIVIEIARGN 17 | >ENSP00000354554 pep:known chromosome:GRCh37:MT:14747:15887:1 gene:ENSG00000198727 transcript:ENST00000361789 gene_biotype:protein_coding transcript_biotype:protein_coding 18 | MTPMRKTNPLMKLINHSFIDLPTPSNISAWWNFGSLLGACLILQITTGLFLAMHYSPDAS 19 | TAFSSIAHITRDVNYGWIIRYLHANGASMFFICLFLHIGRGLYYGSFLYSETWNIGIILL 20 | LATMATAFMGYVLPWGQMSFWGATVITNLLSAIPYIGTDLVQWIWGGYSVDSPTLTRFFT 21 | FHFILPFIIAALATLHLLFLHETGSNNPLGITSHSDKITFHPYYTIKDALGLLLFLLSLM 22 | TLTLFSPDLLGDPDNYTLANPLNTPPHIKPEWYFLFAYTILRSVPNKLGGVLALLLSILI 23 | LAMIPILHMSKQQSMMFRPLSQSLYWLLAADLLILTWIGGQPVSYPFTIIGQVASVLYFT 24 | TILILMPTISLIENKMLKWA 25 | >ENSP00000442112 pep:known chromosome:GRCh37:15:63889592:63893885:1 gene:ENSG00000259662 transcript:ENST00000539570 gene_biotype:protein_coding transcript_biotype:protein_coding 26 | MWPLLTMHITQLNRECLLHLFSFLDKDSRKSLARTCSQLHDVFEDPALWSLLHFRSLTEL 27 | QKDNFLLGPALRSLSICWHSSRVQVCSIEDWLKSAFQRSICSRHESLVNDFLLRVCDRLS 28 | AVRSPRRREAPAPSSGTPIAVGPKSPRWGGPDHSEFADLRSGVTGARAAARRGLGSLRAE 29 | RPSETPPAPGVSWGPPPPGAPVVISVKQEEGKQGRTGRRSHRAAPPCGFARTRVCPPTFP 30 | GADAFPQ 31 | >ENSP00000427336 pep:known chromosome:GRCh37:14:102027834:102028748:1 gene:ENSG00000258865 transcript:ENST00000510508 gene_biotype:protein_coding transcript_biotype:protein_coding 32 | MPRQATSRLVVGEGEGSQGASGPAATMLRSLLLHSLRLCAQTASCLVLFPRFLGTAFMLW 33 | LLDFLCIRKHFLGRRRRGQPEPEVELNSEGEEVPPDDPPICVSDDNRLCTLASLKAVWHG 34 | QKLDFFKQAHEGGPAPNSEVVLPDGFQSQHILDYAQGNRPLVLNFGSCTUPPFMARMSAF 35 | QRLVTKYQRDVDFLIIYIEEAHPSDGWVTTDSPYIIPQHRSLEDRVSAARVLQQGAPGCA 36 | LVLDTMANSSSSAYGAYFERLYVIQSGTIMYQGGRGPDGYQVSELRTWLERYDEQLHGAR 37 | PRRV 38 | 39 | -------------------------------------------------------------------------------- /data/prots/prot_3.fa: -------------------------------------------------------------------------------- 1 | >ENSP00000452959 pep:known chromosome:GRCh37:10:225953:295049:1 gene:ENSG00000259741 transcript:ENST00000558098 gene_biotype:protein_coding transcript_biotype:protein_coding 2 | MARLTKRRQADTKAIQHLWAAIEIIRNQKQIANIDRITKYMSRVHGMHPKETTRQLSLAV 3 | KDGLIVETLTVGCKGSKAGIEQEGYWLPGDEIDWETENHDWYCFECHLPGEVLICDLCFR 4 | VYHSKCLSDEFRLRDSSSPWQCPVCRSIKKKNTNKQEMGTYLRFIVSRMKERAIDLNKKG 5 | KDNKHPMYRRLVHSAVDVPTIQEKVNEGKYRSYEEFKADAQLLLHNTVIFYGADSEQADI 6 | ARMLYKDTCHELDELQLCKNCFYLSNARPDNWFCYPCIPNHELVWAKMKGFGFWPAKVMQ 7 | KEDNQVDVRFFGHHHQRAWIPSENIQDITVNIHRLHVKRSMGWKKACDELELHQRFLREG 8 | RFWKSKNEDRGEEEAESSISSTSNEQLKVTQEPRAKKGRRNQSVEPKKEEPEPETEAVSS 9 | SQEIPTMPQPIEKVSVSTQTKKLSASSPRMLHRSTQTTNDGVCQSMCHDKYTKIFNDFKD 10 | RMKSDHKRETERVVREALEKLRSEMEEEKRQAVNKAVANMQGEMDRKCKQVKEKCKEEFV 11 | EEIKKLATQHKQLISQTKKKQWVNTSLF 12 | >ENSP00000292095 pep:known chromosome:GRCh37:11:117160282:117166263:-1 gene:ENSG00000265969 transcript:ENST00000292095 gene_biotype:protein_coding transcript_biotype:protein_coding 13 | MVPFIYLQAHFTLCSGWSSTYRDLRKGVYVPYTQGKWEGELGTDLVSIPHGPNVTVRANI 14 | AAITESDKFFINGSNWEGILGLAYAEIARPDDSLEPFFDSLVKQTHVPNLFSLQLCGAGF 15 | PLNQSEVLASVGGSMIIGGIDHSLYTGSLWYTPIRREWYYEVIIVRVEINGQDLKMDCKE 16 | YNYDKSIVDSGTTNLRLPKKVFEAAVKSIKAASSTEKFPDGFWLGEQLVCWQAGTTPWNI 17 | FPVISLYLMGEVTNQSFRITILPQQYLRPVEDVATSQDDCYKFAISQSSTGTVMGAVIME 18 | GFYVVFDRARKRIGFAVSACHVHDEFRTAAVEGPFVTLDMEDCGYNIPQTDESTLMTIAY 19 | VMAAICALFMLPLCLMVCQWCCLRCLRQQHDDFADDISLLK 20 | >ENSP00000306381 pep:known chromosome:GRCh37:14:55034638:55255662:1 gene:ENSG00000262355 transcript:ENST00000305831 gene_biotype:protein_coding transcript_biotype:protein_coding 21 | MFRDQVGVLAGWFKGWNECEQTVALLSLLKRVSQTQARFLQLCLEHSLADCAELHVLERE 22 | ANSPGIINQWQQESKDKVISLLLTHLPLLKPGNLDAKVEYMKLLPKILAHSIEHNQHIEE 23 | SRQLLSYALIHPATSLEDRSALAMWLNHLEDRTSTSFGGQNRGRSDSVDYGQTHYYHQRQ 24 | NSDDKLNGWQNSRDSGICINASNWQDKSMGCENGHVPLYSSSSVPTTINTIGTSTSTNVP 25 | AWLKSLRLHKYAALFSQMTYEEMMALTECQLEAQNVTKGARHKIVISIQKLKERQNLLKS 26 | LERDIIEGGSLRIPLQELHQMILTPIKAYSSPSTTPEARRREPQAPRQPSLMGPESQSPD 27 | CKDGAAATGATATPSAGASGGLQPHQLSSCDGELAVAPLPEGDLPGQFTRVMGKVCTQLL 28 | VSRPDEENISSYLQLIDKCLIHEAFTETQKKRLLSWKQQVQKLFRSFPRKTLLDISGYRQ 29 | QRNRGFGQSNSLPTAGSVGGGMGRRNPRQYQIPSRNVPSARLGLLGTSGFVSSNQRNTTA 30 | TPTIMKQGRQNLWFANPGGSNSMPSRTHSSVQRTRSLPVHTSPQNMLMFQQPEFQLPVTE 31 | PDINNRLESLCLSMTEHALGDGVDRTSTI 32 | >ENSP00000471397 pep:known chromosome:GRCh37:4:22694639:22820546:1 gene:ENSG00000269541 transcript:ENST00000594964 gene_biotype:protein_coding transcript_biotype:protein_coding 33 | MAFPAGFGWAAATAAYQVEGGWDADGKGPCVWDTFTHQGGERVFKNQTGDVACGSYTLWE 34 | EDLKCIKQLGLTHYRFSLSWSRLLPDGTTGFINQKGIDYYNKIIDDLLKNGVTPIVTLYH 35 | FDLPQTLEDQGGWLSEAIIESFDKYAQFCFSTFGDRVKQWITINEANVLSVMSYDLGMFP 36 | PGIPHFGTGGYQAAHNLIKAHARSWHSYDSLFRKKQKGMVSLSLFAVWLEPADPNSVSDQ 37 | EAAKRAITFHLDLFAKPIFIDGDYPEVVKSQIASMSQKQGYPSSRLPEFTEEEKKMIKGT 38 | ADFFAVQYYTTRLIKYQENKKGELGILQDAEIEFFPDPSWKNVDWIYVVPWGVCKLLKYI 39 | KDTYNNPVIYITENGFPQSDPAPLDDTQRWEYFRQTFQELFKAIQLDKVNLQVYCAWSLL 40 | DNFEWNQGYSSRFGLFHVDFEDPARPRVPYTSAKEYAKIIRNNGLEAHL 41 | >ENSP00000471024 pep:known chromosome:GRCh37:4:22694639:22820546:1 gene:ENSG00000269222 transcript:ENST00000599224 gene_biotype:protein_coding transcript_biotype:protein_coding 42 | MAFPAGFGWAAATAAYQVEGGWDADGKGPCVWDTFTHQGGERVFKNQTGDVACGSYTLWE 43 | EDLKCIKQLGLTHYRFSLSWSRLLPDGTTGFINQKAIQLDKVNLQVYCAWSLLDNFEWNQ 44 | GYSSRFGLFHVDFEDPARPRVPYTSAKEYAKIIRNNGLEAHL 45 | >ENSP00000470473 pep:known chromosome:GRCh37:8:142444058:142517249:-1 gene:ENSG00000269739 transcript:ENST00000599122 gene_biotype:protein_coding transcript_biotype:protein_coding 46 | MDRQCSERPYSCTPTGRVSSAVSQNSRISPPVSTSMKDSSCMKVHQDSARRDRWSHPTTI 47 | LLHKSQSSQATLMLQEHRMFMGEAYSAATGFKMLQDMNSADPFHLKYIIKKIKNMAHGSP 48 | KLVMETIHDYFIDNPEISSRHKFRLFQTLEMVIGASDVLEETWEKTFTRLALENMTKATE 49 | LEDIYQDAASNMLVAICRHSWRVVAQHLETELLTGVFPHRSLLYVMGVLSSSEELFSQED 50 | KACWEEQLIQMAIKSVPFLSTDVWSKELLWTLTTPSWTQQEQSPEKAFLFTYYGLILQAE 51 | KNGATVRRHLQALLETSHQWPKQREGMALTLGLAATRHLDDVWAVLDQFGRSRPIRWSLP 52 | SSSPKNSEDLRWKWASSTILLAYGQVAAKARAHILPWVDNIVSRMVFYFHYSSWDETLKQ 53 | SFLTATLMLMGAVSRSEGAHSYEFFQTSELLQCLMVLMEKEPQDTLCTRSRQQAMHIASS 54 | LCKLRPPIDLERKSQLLSTCFRSVFALPLLDALEKHTCLFLEPPNIQLWPVARERAGWTH 55 | QGWGPRAVLHCSEHLQSLYSRTMEALDFMLQSLIMQNPTADELHFLLSHLYIWLASEKAH 56 | ERQRAVHSCMILLKFLNHNGYLDPKEDFKRIGQLVGILGMLCQDPDRATQRCSLEGASHL 57 | YQLLMCHKTGEALQAESQAPKELSQAHSDGAPLWNSRDQKATPLGPQEMAKNHIFQLCSF 58 | QVIKDIMQQLTLAELSDLIWTAIDGLGSTSPFRVQAASEMLLTAVQEHGAKLEIVSSMAQ 59 | AIRLRLCSVHIPQAKEKTLHAITLLARSHTCELVATFLNISIPLDSHTFQLWRALGAGQP 60 | TSHLVLTTLLACLQERPLPTGASDSSPCPKEKTYLRLLAAMNMLHELQFAREFKQAVQEG 61 | YPKLFLALLTQMHYVLELNLPSEPQPKQQAQEAAVPSPQSCSTSLEALKSLLSTTGHWHD 62 | FAHLELQGSWELFTTIHTYPKGVGLLARAMVQNHCRQIPAVLRQLLPSLQSPQERERKVA 63 | ILILTKFLYSPVLLEVLPKQAALTVLAQGLHDPSPEVRVLSLQGLSNILFHPDKGSLLQG 64 | QLRPLLDGFFQSSDQVIVCIMGTVSDTLHRLGAQGTGSQSLGVAISTRSFFNDERDGIRA 65 | AAMALFGDLVAAMADRELSGLRTQVHQSMVPLLLHLKDQCPAVATQAKFTFYRCAVLLRW 66 | RLLHTLFCTLAWERGLSARHFLWTCLMTRSQEEFSIHLSQALSYLHSHSCHIKTWVTLFI 67 | GHTICYHPQAVFQMLNAVDTNLLFRTFEHLRSDPEPSIREFATSQLSFLQKVSARPKQ -------------------------------------------------------------------------------- /data/prots/sample.fa: -------------------------------------------------------------------------------- 1 | >1aboA 2 | NLFVALYDFVASGDNTLSITKGEKLRVLGYNHNGEWCEAQTKNGQGWVPS 3 | NYITPVN 4 | >1ycsB 5 | KGVIYALWDYEPQNDDELPMKEGDCMTIIHREDEDEIEWWWARLNDKEGY 6 | VPRNLLGLYP 7 | >1pht 8 | GYQYRALYDYKKEREEDIDLHLGDILTVNKGSLVALGFSDGQEARPEEIG 9 | WLNGYNETTGERGDFPGTYVEYIGRKKISP 10 | >1vie 11 | DRVRKKSGAAWQGQIVGWYCTNLTPEGYAVESEAHPGSVQIYPVAALERI 12 | N 13 | >1ihvA 14 | NFRVYYRDSRDPVWKGPAKLLWKGEGAVVIQDNSDIKVVPRRKAKIIRD 15 | >1csn_ 16 | NVVGVHYKVGRRIGEGSFGVIFEGTNLLNNQQVAIKFEPRRSDAPQLRDE 17 | YMHYARNLAFDATPDYDYLQGLFSKVLERLNTTEDENFDWNLL 18 | >1ad5_A 19 | EDIIVVALYDYEAIHHEDLSFQKGDQMVVLEESGEWWKARSLATRKEGYI 20 | DVWSFGILLMEIVTYGRIPYPGMSNPEVIRALERGYRMPRPENCPEELYN 21 | IMMRCWKNRPEERPTFEYIQSVLDDFYTATESQXQQQP 22 | >1a9u_ 23 | ERPTFYRQELNKTIWEVPERYQNLSPVGSGAYGSVCAAFDTKTGLRVAVK 24 | SARNYIQSLTQMPKMNFANVFIGANPLAVDLLEKMLVLDSDKRITAAQAL 25 | AHAYFAQYHDPDDEPVADPYDQSFESRDLLIDEWKSLTYDEVISFVPPPL 26 | D 27 | >1mq4_A 28 | RQWALEDFEIGRPLGKGKFGNVYLAREKQSKFILALKVLFKAQLEKAGVE 29 | PFEANTYQETYKRISRVEFTFPDFVTEGARDLISRLLKHNPSQRPMLREV 30 | LEHPWITANSS 31 | >1how_A 32 | FHPAFKGEPYKDARYILVRKLGWGHFSTVWLAKDMVNNTHVAMKIVRGDK 33 | RNGKYTRTFFSKLKFWPLEDVLTEKYKFSKDEAKEISDFLSPMLQLDPRK 34 | RADAGGLVNHPWLKDTLGMEEIRVPDRELYGSGSDIPGWFEEVR 35 | >1omw_A 36 | SKKILLPEPSIRSVMQKYLEDRGEVTFEKIFSQKLGYLLFRDFCLKHLEE 37 | LDSDQELYRNFPLTISERWQQEVAETVFDTINAETDRLEARKKTKNKQLG 38 | HEEDYALGKDCIMHGYMSKMGWQRRYFYLFPNRLEWRGEGEAPQSLLTME 39 | EIQSVEETQIKERKCLLLKIRGGKQFVLQCDSDPELVQWKKELRDAYREA 40 | QQLVQRVPKMKNKP 41 | >1f3m_C 42 | SDEEILEKLRSIVSVGDPKKKYTRFEKIGQGASGTVYTAMDVATGQEVAI 43 | VEKRGSAKELLQHQFLKIAKPLSSLTPLIAAAKEATK 44 | >1b6c_B 45 | TTLKDLIYDMTTSGSGSGLPLLVQRTIARTIVLQESIGKGRFGEVWRGKW 46 | QLPYYDLVPSDPSVEEMRKVVCEQKLRPNIPNRWQSCEALRVMAKIMREC 47 | WYANGAARLTALRIKKTLSQLSQQEG 48 | >1fcd_A 49 | AGRKVVVVGGGTGGATAAKYIKLADPSIEVTLIEPNTDYYTCYLSNEVIG 50 | AIYRPNADGSAIESVPDSGGVTPVDAPDWVLEREVQYAYSWYNNIVHDTF 51 | G 52 | >2tmd_A 53 | ARDPKHDILFEPIQIGPKTLRNRFYQVPHCIGAGSDKPGFQSAHRSVKAE 54 | WNELKARESEWAENDIKGIYLIGDAEAPRLIADATFTGHRVAREIEEANP 55 | QIAIPYKRETIAWGTPHMPGGNFKIEYKV 56 | -------------------------------------------------------------------------------- /data/reads/110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_1.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/reads/110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_1.fq.gz -------------------------------------------------------------------------------- /data/reads/110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_2.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/reads/110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_2.fq.gz -------------------------------------------------------------------------------- /data/reads/110105_I186_FC812MWABXX_L8_HUMrutRGVDIABPE_1.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/reads/110105_I186_FC812MWABXX_L8_HUMrutRGVDIABPE_1.fq.gz -------------------------------------------------------------------------------- /data/reads/110105_I186_FC812MWABXX_L8_HUMrutRGVDIABPE_2.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/reads/110105_I186_FC812MWABXX_L8_HUMrutRGVDIABPE_2.fq.gz -------------------------------------------------------------------------------- /data/reads/110121_I288_FC81DE8ABXX_L3_HUMrutRGXDIAAPE_1.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/reads/110121_I288_FC81DE8ABXX_L3_HUMrutRGXDIAAPE_1.fq.gz -------------------------------------------------------------------------------- /data/reads/110121_I288_FC81DE8ABXX_L3_HUMrutRGXDIAAPE_2.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/reads/110121_I288_FC81DE8ABXX_L3_HUMrutRGXDIAAPE_2.fq.gz -------------------------------------------------------------------------------- /data/reads/110122_I329_FC81DB5ABXX_L6_HUMrutRGVDIAAPE_1.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/reads/110122_I329_FC81DB5ABXX_L6_HUMrutRGVDIAAPE_1.fq.gz -------------------------------------------------------------------------------- /data/reads/110122_I329_FC81DB5ABXX_L6_HUMrutRGVDIAAPE_2.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/reads/110122_I329_FC81DB5ABXX_L6_HUMrutRGVDIAAPE_2.fq.gz -------------------------------------------------------------------------------- /data/reads/110128_I481_FC819P0ABXX_L5_HUMrutRGWDIAAPE_1.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/reads/110128_I481_FC819P0ABXX_L5_HUMrutRGWDIAAPE_1.fq.gz -------------------------------------------------------------------------------- /data/reads/110128_I481_FC819P0ABXX_L5_HUMrutRGWDIAAPE_2.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextflow-io/patterns/8341e94a61feaa2263fdd83ddf6287b54aebde20/data/reads/110128_I481_FC819P0ABXX_L5_HUMrutRGWDIAAPE_2.fq.gz -------------------------------------------------------------------------------- /data/reads/sample.fq.gz: -------------------------------------------------------------------------------- 1 | 110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_1.fq.gz -------------------------------------------------------------------------------- /docs/channel-duplication.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to you use the same channel as input in two or more processes. 4 | 5 | ## Solution 6 | 7 | In DSL2, you can just do it! The [into](https://www.nextflow.io/docs/latest/operator.html#into) operator is no longer needed. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | process foo { 13 | input: path x 14 | script: 15 | """ 16 | echo your_command --input $x 17 | """ 18 | } 19 | 20 | process bar { 21 | input: path x 22 | script: 23 | """ 24 | echo your_command --input $x 25 | """ 26 | } 27 | 28 | workflow { 29 | input_ch = Channel.fromPath("$baseDir/data/prots/*_?.fa") 30 | 31 | foo(input_ch) 32 | bar(input_ch) 33 | } 34 | ``` 35 | 36 | ## Run it 37 | 38 | Use the the following command to execute the example: 39 | 40 | ```bash 41 | nextflow run nextflow-io/patterns/channel-duplication.nf 42 | ``` 43 | -------------------------------------------------------------------------------- /docs/collect-into-file.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to concatenate into a single file all output files produced by an upstream process. 4 | 5 | ## Solution 6 | 7 | Use the [collectFile](https://www.nextflow.io/docs/latest/operator.html#collectfile) operator to merge all 8 | the output files into a single file. 9 | 10 | ## Code 11 | 12 | ```groovy 13 | process foo { 14 | input: 15 | path x 16 | output: 17 | path 'file.fq' 18 | script: 19 | """ 20 | < $x zcat > file.fq 21 | """ 22 | } 23 | 24 | workflow { 25 | Channel.fromPath("$baseDir/data/reads/*_1.fq.gz", checkIfExists: true) \ 26 | | foo \ 27 | | collectFile \ 28 | | view 29 | } 30 | ``` 31 | 32 | ## Run it 33 | 34 | Use the the following command to execute the example: 35 | 36 | ```bash 37 | nextflow run nextflow-io/patterns/collect-into-file.nf 38 | ``` 39 | -------------------------------------------------------------------------------- /docs/conditional-process-dynamic.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | One of two processes should be executed depending on the result of an upstream channel. 4 | 5 | ## Solution 6 | 7 | Because the condition is a channel result, `if/else` cannot be used. Instead, use the `branch` operator to create a "true" channel and a "false" channel. The channel whose condition is true will receive a value, which will trigger its respective process. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | process foo { 13 | input: 14 | val signal 15 | output: 16 | path 'x.txt' 17 | 18 | script: 19 | ''' 20 | echo foo > x.txt 21 | ''' 22 | } 23 | 24 | process bar { 25 | input: 26 | val signal 27 | output: 28 | path 'x.txt' 29 | 30 | script: 31 | ''' 32 | echo bar > x.txt 33 | ''' 34 | } 35 | 36 | workflow { 37 | ch_if = Channel.of( 1..100 ) 38 | | randomSample(1) 39 | | branch { n -> 40 | TRUE: n > 50 41 | FALSE: n <= 50 42 | } 43 | 44 | ch_if.TRUE | foo 45 | ch_if.FALSE | bar 46 | } 47 | ``` 48 | 49 | ## Run it 50 | 51 | Use the the following command to execute the example: 52 | 53 | ```bash 54 | nextflow run nextflow-io/patterns/conditional-process3.nf 55 | ``` 56 | 57 | The workflow will execute `foo` or `bar` based on a random number. Execute it multiple times to observe the random behavior. 58 | -------------------------------------------------------------------------------- /docs/conditional-process.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | One of two different tasks should be executed based on some condition, 4 | and a third task should process the results of the selected task. 5 | 6 | ## Solution 7 | 8 | Simply execute either process using `if/else` statements on the condition. 9 | Define a channel, e.g. `omega_ch`, which emits the output of the selected process 10 | in each case. Then, execute the third process with this output channel. 11 | 12 | Or, use a ternary expression and a pipe to keep things short and sweet. 13 | 14 | ## Code 15 | 16 | ```groovy 17 | params.flag = false 18 | 19 | process foo { 20 | output: 21 | path 'x.txt' 22 | 23 | script: 24 | ''' 25 | echo foo > x.txt 26 | ''' 27 | } 28 | 29 | process bar { 30 | output: 31 | path 'x.txt' 32 | 33 | script: 34 | ''' 35 | echo bar > x.txt 36 | ''' 37 | } 38 | 39 | process omega { 40 | debug true 41 | input: 42 | path x 43 | 44 | script: 45 | """ 46 | cat $x 47 | """ 48 | } 49 | 50 | workflow { 51 | // the long way 52 | if ( params.flag ) { 53 | bar() 54 | omega_ch = bar.out 55 | } 56 | else { 57 | foo() 58 | omega_ch = foo.out 59 | } 60 | 61 | omega(omega_ch) 62 | 63 | // the short way 64 | (params.flag ? bar : foo) | omega 65 | } 66 | ``` 67 | 68 | ## Run it 69 | 70 | Use the the following command to execute the example: 71 | 72 | ```bash 73 | nextflow run nextflow-io/patterns/conditional-process.nf 74 | ``` 75 | 76 | The processes `foo` and `omega` are executed. Run the same command 77 | with the `--flag` command line option. 78 | 79 | ```bash 80 | nextflow run nextflow-io/patterns/conditional-process.nf --flag 81 | ``` 82 | 83 | This time the processes `bar` and `omega` are executed. 84 | 85 | ## Alternative solution 86 | 87 | Create an input channel for each process that is either populated with data or an 88 | [empty](https://www.nextflow.io/docs/latest/channel.html#empty) channel. 89 | Each process will execute only if its input channel has data. 90 | 91 | Then use the [mix](https://www.nextflow.io/docs/latest/operator.html#mix) operator to create 92 | a new channel that emits the outputs produced by the two processes, and use it as the input 93 | for the third process. 94 | 95 | ## Code 96 | 97 | ```groovy 98 | params.flag = false 99 | 100 | process foo { 101 | input: 102 | val x 103 | 104 | output: 105 | path 'x.txt' 106 | 107 | script: 108 | """ 109 | echo $x > x.txt 110 | """ 111 | } 112 | 113 | process bar { 114 | input: 115 | val(b) 116 | 117 | output: 118 | path 'x.txt' 119 | 120 | script: 121 | """ 122 | echo $b > x.txt 123 | """ 124 | } 125 | 126 | process omega { 127 | debug true 128 | input: 129 | path x 130 | 131 | script: 132 | """ 133 | cat $x 134 | """ 135 | } 136 | 137 | workflow { 138 | (foo_ch, bar_ch) = params.flag 139 | ? [ Channel.empty(), Channel.from(1,2,3) ] 140 | : [ Channel.from(4,5,6), Channel.empty() ] 141 | 142 | foo(foo_ch) 143 | bar(bar_ch) 144 | 145 | foo.out | mix(bar.out) | omega 146 | } 147 | ``` 148 | 149 | ## Run it 150 | 151 | ```bash 152 | nextflow run nextflow-io/patterns/conditional-process2.nf 153 | ``` 154 | -------------------------------------------------------------------------------- /docs/conditional-resources.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | A task in your workflow needs to use some amount of computing 4 | resources (e.g. memory) that depends on the size or the name of one 5 | or more input files. 6 | 7 | ## Solution 8 | 9 | Declare the resource requirements (`memory`, `cpus`, etc.) 10 | in a dynamic manner using a closure. 11 | 12 | The closure computes the required amount of resources using the file 13 | attributes (e.g. `size`) of the inputs declared in the process 14 | definition. 15 | 16 | ## Code 17 | 18 | ```groovy 19 | process foo { 20 | memory { reads.size() < 70.KB ? 1.GB : 5.GB } 21 | 22 | input: 23 | path reads 24 | 25 | """ 26 | echo your_command_here --in ${reads} --mem=${task.memory.giga} 27 | """ 28 | } 29 | 30 | workflow { 31 | Channel.fromPath("$baseDir/data/reads/*_1.fq.gz", checkIfExists:true) \ 32 | | foo 33 | } 34 | ``` 35 | 36 | ## Run it 37 | 38 | ```bash 39 | nextflow run nextflow-io/patterns/conditional-resources.nf 40 | ``` 41 | -------------------------------------------------------------------------------- /docs/create-key-to-combine-channels.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You have channels you want to combine, but the elements in these channels are related and you want this new combined channel to take this into consideration. Some Nextflow channel operators can combine elements in channels according to a matching key but, unfortunately, in your case you have no such key. 4 | 5 | ## Solution 6 | 7 | Use the [map](https://www.nextflow.io/docs/latest/operator.html#map) operator and the `toString` and `split` native Groovy functions to extract a matching key from every element of your channel. Then, use the [combine](https://www.nextflow.io/docs/latest/operator.html#combine) operator to combine the channels according to the created matching key. 8 | 9 | The code below will create sample channels to test the solution. 10 | 11 | ```groovy 12 | Channel 13 | .of('demux.Clontech_5p--bc1003_3p.flnc_clustered.sorted.sam', 14 | 'demux.Clontech_5p--bc1001_3p.flnc_clustered.sorted.sam', 15 | 'demux.Clontech_5p--bc1002_3p.flnc_clustered.sorted.sam') 16 | .set { ch_alignment } 17 | 18 | Channel 19 | .of('demux.Clontech_5p--bc1001_3p.flnc_clustered.fasta', 20 | 'demux.Clontech_5p--bc1002_3p.flnc_clustered.fasta', 21 | 'demux.Clontech_5p--bc1003_3p.flnc_clustered.fasta') 22 | .set { ch_clustered } 23 | ``` 24 | ## Code 25 | 26 | ```groovy 27 | ch_alignment 28 | // For every element of this channel, convert it to a string, split in pieces separated by --, get the second part, then split by _3p and get the first part. Return a list with this as the first value, and then the original element as the second value. This part has to be customized depending on what part of the String you want to get as matching key 29 | .map { [it.toString().split("--")[1].split("_3p")[0], 30 | it] }. 31 | set { ch_alignment } 32 | ch_clustered 33 | .map { [it.toString().split("--")[1].split("_3p")[0], 34 | it] }. 35 | set { ch_clustered } 36 | 37 | ch_alignment 38 | // Combine according to a key that is the first value of every first element, which is a list according to what we did above 39 | .combine(ch_clustered, by: 0) 40 | // For every element of this channel, which consists of three values now, the matching key (id), the first element of the first channel, and the second, keep only the second and the third. 41 | .map { id, sam, fasta -> [sam, fasta] } 42 | // View the content of the channel, which consists of the last two values 43 | .view() 44 | ``` 45 | 46 | ## Run it 47 | 48 | Run the example using this command: 49 | 50 | ```bash 51 | nextflow run nextflow-io/patterns/create-key-to-combine-channels.nf 52 | ``` 53 | -------------------------------------------------------------------------------- /docs/feedback-loop.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to repeat a process or workflow multiple times, using the output 4 | from the previous iteration as the input to the next iteration. 5 | 6 | ## Solution 7 | 8 | !!! warning 9 | This feature is experimental and may change in the future. 10 | 11 | Use the `recurse` method on a process or workflow to execute it iteratively. 12 | In order to use this feature, the process or workflow must have identical input 13 | and output definitions, and any initial values must be Groovy values or 14 | value channels -- queue channels are not supported (yet). 15 | 16 | You can use the `times` operator to perform a fixed number of iterations, or the 17 | [until](https://www.nextflow.io/docs/latest/operator.html#until) operator to iterate until 18 | some condition is satisfied. 19 | 20 | ## Code 21 | 22 | For an iterative process: 23 | 24 | ```groovy 25 | nextflow.preview.recursion=true 26 | 27 | params.data = "$baseDir/data/hello.txt" 28 | 29 | process foo { 30 | input: 31 | path 'input.txt' 32 | output: 33 | path 'result.txt' 34 | script: 35 | """ 36 | cat input.txt > result.txt 37 | echo "Task ${task.index} was here" >> result.txt 38 | """ 39 | } 40 | 41 | workflow { 42 | // perform a fixed number of iterations 43 | foo 44 | .recurse(file(params.data)) 45 | .times(10) 46 | 47 | // iterate until some condition is satisfied 48 | foo 49 | .recurse(file(params.data)) 50 | .until { it -> it.size() > 100 } 51 | 52 | foo 53 | .out 54 | .view(it -> it.text) 55 | } 56 | ``` 57 | 58 | For an iterative workflow: 59 | 60 | ```groovy 61 | nextflow.preview.recursion=true 62 | 63 | params.input = "$baseDir/data/hello.txt" 64 | 65 | process tick { 66 | input: 67 | path 'input.txt' 68 | output: 69 | path 'result.txt' 70 | script: 71 | """ 72 | cat input.txt > result.txt 73 | echo "Task ${task.index} : tick" >> result.txt 74 | """ 75 | } 76 | 77 | process tock { 78 | input: 79 | path 'input.txt' 80 | output: 81 | path 'result.txt' 82 | script: 83 | """ 84 | cat input.txt > result.txt 85 | echo "Task ${task.index} : tock" >> result.txt 86 | """ 87 | } 88 | 89 | workflow clock { 90 | take: infile 91 | main: 92 | infile | tick | tock 93 | emit: 94 | tock.out 95 | } 96 | 97 | workflow { 98 | clock 99 | .recurse(file(params.input)) 100 | .until { it -> it.size() > 100 } 101 | 102 | clock 103 | .out 104 | .view(it -> it.text) 105 | } 106 | ``` 107 | 108 | ## Run it 109 | 110 | Use the the following command to execute the example: 111 | 112 | ``` 113 | # iterative process 114 | nextflow run nextflow-io/patterns/feedback-loop-process.nf 115 | 116 | # iterative workflow 117 | nextflow run nextflow-io/patterns/feedback-loop-workflow.nf 118 | ``` 119 | -------------------------------------------------------------------------------- /docs/ignore-failing-process.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | A task is expected to fail in some cases. You want to ignore the failure and continue the execution of the remaining tasks in the workflow. 4 | 5 | ## Solution 6 | 7 | Use the process [directive](https://www.nextflow.io/docs/latest/process.html#errorstrategy) `errorStrategy 'ignore'` to ignore the error condition. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | process foo { 13 | errorStrategy 'ignore' 14 | script: 15 | ''' 16 | echo This is going to fail! 17 | exit 1 18 | ''' 19 | } 20 | 21 | process bar { 22 | script: 23 | ''' 24 | echo OK 25 | ''' 26 | } 27 | 28 | workflow { 29 | foo() 30 | bar() 31 | } 32 | ``` 33 | 34 | ## Run it 35 | 36 | Run the script with the following command: 37 | 38 | ```bash 39 | nextflow run nextflow-io/patterns/ignore-failing-process.nf 40 | ``` 41 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Nextflow Patterns 2 | 3 | This page collects some recurring implementation patterns used in Nextflow applications. Feel free to contribute by opening a pull request in the [GitHub repository](https://github.com/nextflow-io/patterns). 4 | -------------------------------------------------------------------------------- /docs/optional-input.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | One or more processes have an optional input file. 4 | 5 | ## Solution 6 | 7 | Use a special file name to mark the absence of the file parameter. 8 | 9 | Create an empty file in `assets`: 10 | ``` 11 | touch assets/NO_FILE 12 | ``` 13 | 14 | ## Code 15 | 16 | ```groovy 17 | params.inputs = "$projectDir/data/prots/*{1,2,3}.fa" 18 | params.filter = "$projectDir/assets/NO_FILE" 19 | 20 | process foo { 21 | debug true 22 | input: 23 | path seq 24 | path opt 25 | 26 | script: 27 | def filter = opt.name != 'NO_FILE' ? "--filter $opt" : '' 28 | """ 29 | echo your_command --input $seq $filter 30 | """ 31 | } 32 | 33 | workflow { 34 | prots_ch = Channel.fromPath(params.inputs, checkIfExists:true) 35 | opt_file = file(params.filter, checkIfExists:true) 36 | 37 | foo(prots_ch, opt_file) 38 | } 39 | ``` 40 | 41 | ## Run it 42 | 43 | Run the script with the following command: 44 | 45 | ```bash 46 | nextflow run nextflow-io/patterns/optional-input.nf 47 | ``` 48 | 49 | Run the same script providing an optional file input: 50 | 51 | ```bash 52 | nextflow run nextflow-io/patterns/optional-input.nf --filter foo.txt 53 | ``` 54 | -------------------------------------------------------------------------------- /docs/optional-output.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | A task in your workflow is expected to not create an output file in some circumstances. 4 | 5 | ## Solution 6 | 7 | Declare such output as an `optional` file. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | process foo { 13 | output: 14 | path 'foo.txt', optional: true 15 | 16 | script: 17 | ''' 18 | touch foo.txt 19 | ''' 20 | } 21 | 22 | workflow { 23 | foo() 24 | } 25 | ``` 26 | 27 | ## Run it 28 | 29 | Use the the following command to execute the example: 30 | 31 | ```bash 32 | nextflow run nextflow-io/patterns/optional-output.nf 33 | ``` 34 | -------------------------------------------------------------------------------- /docs/process-collect.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to process all the outputs of an upstream task altogether. 4 | 5 | ## Solution 6 | 7 | Use the [collect](https://www.nextflow.io/docs/latest/operator.html#collect) operator to gather 8 | all the outputs produced by the upstream task and emit them as a single output. 9 | Then use the resulting channel as input for the downstream task. 10 | 11 | ## Code 12 | 13 | ```groovy 14 | process foo { 15 | input: 16 | path x 17 | output: 18 | path 'file.fq' 19 | script: 20 | """ 21 | < $x zcat > file.fq 22 | """ 23 | } 24 | 25 | process bar { 26 | debug true 27 | input: 28 | path '*.fq' 29 | script: 30 | """ 31 | cat *.fq | head -n 50 32 | """ 33 | } 34 | 35 | workflow { 36 | Channel.fromPath("$baseDir/data/reads/*_1.fq.gz", checkIfExists: true) \ 37 | | foo \ 38 | | collect \ 39 | | bar 40 | } 41 | ``` 42 | 43 | ## Run it 44 | 45 | Use the the following command to execute the example: 46 | 47 | ```bash 48 | nextflow run nextflow-io/patterns/process-collect.nf 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/process-get-workdir.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | A tool needs the explicit path of the current task work directory. 4 | 5 | ## Solution 6 | 7 | Use the `$PWD` Bash variable or the `pwd` command to retrieve the task working directory path. 8 | 9 | !!! note 10 | Make sure to escape the `$` variable placeholder when the command script is enclosed in double quote characters. 11 | 12 | ## Example 13 | 14 | ```groovy 15 | process foo { 16 | debug true 17 | script: 18 | """ 19 | echo foo task path: \$PWD 20 | """ 21 | } 22 | 23 | process bar { 24 | debug true 25 | script: 26 | ''' 27 | echo bar task path: $PWD 28 | ''' 29 | } 30 | 31 | workflow { 32 | foo() 33 | bar() 34 | } 35 | ``` 36 | 37 | ## Run it 38 | 39 | The command run the script with an empty channel: 40 | 41 | ```bash 42 | nextflow run nextflow-io/patterns/process-get-workdir.nf 43 | ``` 44 | 45 | Use the following command to provide the same script 46 | some input files, that prevents the process from being executed: 47 | 48 | ```bash 49 | nextflow run nextflow-io/patterns/process-get-workdir.nf --inputs ../data/prots/\* 50 | ``` 51 | -------------------------------------------------------------------------------- /docs/process-into-groups.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to process in the same batch all files that have a matching key in the file name. 4 | 5 | ## Solution 6 | 7 | Use the [map](https://www.nextflow.io/docs/latest/operator.html#map) operator to associate each file with a key extracted from the file name. Then chain the resulting channel with the [groupTuple](https://www.nextflow.io/docs/latest/operator.html#grouptuple) operator to group together all files that have a matching key. Finally, use the resulting channel as input for the process. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | params.reads = "$baseDir/data/reads/*" 13 | 14 | process foo { 15 | debug true 16 | input: 17 | tuple val(key), file(samples) 18 | 19 | script: 20 | """ 21 | echo your_command --batch $key --input $samples 22 | """ 23 | } 24 | 25 | workflow { 26 | Channel.fromPath(params.reads, checkIfExists:true) \ 27 | | map { file -> 28 | def key = file.name.toString().tokenize('_').get(0) 29 | return tuple(key, file) 30 | } \ 31 | | groupTuple() \ 32 | | foo 33 | } 34 | ``` 35 | 36 | ## Run it 37 | 38 | ```bash 39 | nextflow run nextflow-io/patterns/process-into-groups.nf 40 | ``` 41 | -------------------------------------------------------------------------------- /docs/process-per-csv-record.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to execute a task for each record in one or more CSV files. 4 | 5 | ## Solution 6 | 7 | Read the CSV file line-by-line using the [splitCsv](https://www.nextflow.io/docs/latest/operator.html#splitcsv) operator, then use the [map](https://www.nextflow.io/docs/latest/operator.html#map) operator to return a tuple with the required field for each line and convert any string path to a file path object using the `file` function. Finally, use the resulting channel as input for the process. 8 | 9 | ## Code 10 | 11 | Given the file `index.csv` with the following content: 12 | 13 | | sampleId | read1 | read2 14 | | -------- | ----- | ----- | 15 | | FC816RLABXX | reads/110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_1.fq.gz | reads/110101_I315_FC816RLABXX_L1_HUMrutRGXDIAAPE_2.fq.gz | 16 | | FC812MWABXX | reads/110105_I186_FC812MWABXX_L8_HUMrutRGVDIABPE_1.fq.gz | reads/110105_I186_FC812MWABXX_L8_HUMrutRGVDIABPE_2.fq.gz | 17 | | FC81DE8ABXX | reads/110121_I288_FC81DE8ABXX_L3_HUMrutRGXDIAAPE_1.fq.gz | reads/110121_I288_FC81DE8ABXX_L3_HUMrutRGXDIAAPE_2.fq.gz | 18 | | FC81DB5ABXX | reads/110122_I329_FC81DB5ABXX_L6_HUMrutRGVDIAAPE_1.fq.gz | reads/110122_I329_FC81DB5ABXX_L6_HUMrutRGVDIAAPE_2.fq.gz | 19 | | FC819P0ABXX | reads/110128_I481_FC819P0ABXX_L5_HUMrutRGWDIAAPE_1.fq.gz | reads/110128_I481_FC819P0ABXX_L5_HUMrutRGWDIAAPE_2.fq.gz | 20 | 21 | This workflow parses the file and executes a process for each line: 22 | 23 | ```groovy 24 | params.index = "$baseDir/data/index.csv" 25 | 26 | process foo { 27 | debug true 28 | input: 29 | tuple val(sampleId), file(read1), file(read2) 30 | 31 | script: 32 | """ 33 | echo your_command --sample $sampleId --reads $read1 $read2 34 | """ 35 | } 36 | 37 | workflow { 38 | Channel.fromPath(params.index) \ 39 | | splitCsv(header:true) \ 40 | | map { row-> tuple(row.sampleId, file(row.read1), file(row.read2)) } \ 41 | | foo 42 | } 43 | ``` 44 | 45 | !!! note 46 | Relative paths are resolved by the `file` function against the execution directory. In practice, it is preferable to use absolute file paths. 47 | 48 | ## Run it 49 | 50 | Use the the following command to execute the example: 51 | 52 | ```bash 53 | nextflow run nextflow-io/patterns/process-per-csv-record.nf 54 | ``` 55 | -------------------------------------------------------------------------------- /docs/process-per-file-chunk.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to split one or more input files into chunks and execute a task for each of them. 4 | 5 | ## Solution 6 | 7 | Use the [splitText](https://www.nextflow.io/docs/latest/operator.html#splittext) operator to split a file into chunks of a given size. Then use the resulting channel as input for the process implementing your task. 8 | 9 | !!! warning 10 | Chunks are kept in memory by default. When splitting big files, specify the parameter `file: true` to save the chunks into files. See the [documentation](https://www.nextflow.io/docs/latest/operator.html#splittext) for details. 11 | 12 | Splitter for specific file formats are available, e.g. [splitFasta](https://www.nextflow.io/docs/latest/operator.html#splitfasta) and [splitFastq](https://www.nextflow.io/docs/latest/operator.html#splitfastq). 13 | 14 | ## Code 15 | 16 | ```groovy 17 | params.infile = "$baseDir/data/poem.txt" 18 | params.size = 5 19 | 20 | process foo { 21 | debug true 22 | input: 23 | file x 24 | 25 | script: 26 | """ 27 | rev $x | rev 28 | """ 29 | } 30 | 31 | workflow { 32 | Channel.fromPath(params.infile) \ 33 | | splitText(by: params.size) \ 34 | | foo 35 | } 36 | ``` 37 | 38 | ## Run it 39 | 40 | Use the the following command to execute the example: 41 | 42 | ```bash 43 | nextflow run nextflow-io/patterns/process-per-file-chunk.nf 44 | ``` 45 | -------------------------------------------------------------------------------- /docs/process-per-file-output.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | A task in your workflow produces two or more files at time. A downstream task needs to process each 4 | of these files independently. 5 | 6 | ## Solution 7 | 8 | Use the [flatten](https://www.nextflow.io/docs/latest/operator.html#flatten) operator to 9 | transform the outputs of the upstream process to a channel that emits each file separately. 10 | Then use this channel as input for the downstream process. 11 | 12 | ## Code 13 | 14 | ```groovy 15 | process foo { 16 | output: 17 | path '*.txt' 18 | 19 | script: 20 | ''' 21 | echo Hello there! > file1.txt 22 | echo What a beautiful day > file2.txt 23 | echo I hope you are having fun! > file3.txt 24 | ''' 25 | } 26 | 27 | process bar { 28 | debug true 29 | input: 30 | path x 31 | 32 | script: 33 | """ 34 | cat $x 35 | """ 36 | } 37 | 38 | workflow { 39 | foo | flatten | bar 40 | } 41 | ``` 42 | 43 | ## Run it 44 | 45 | Use the the following command to execute the example: 46 | 47 | ```bash 48 | nextflow run nextflow-io/patterns/process-per-file-output.nf 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/process-per-file-pairs.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to process the files in a directory, grouping them by pairs. 4 | 5 | ## Solution 6 | 7 | Use the [Channel.fromFilePairs](https://www.nextflow.io/docs/latest/channel.html#fromfilepairs) method to create a channel that emits file pairs matching a glob pattern. The pattern must match a common prefix in the paired file names. 8 | 9 | The matching files are emitted as tuples in which the first element is the grouping key of the matching files and the second element is the file pair itself. 10 | 11 | ## Code 12 | 13 | ```groovy 14 | process foo { 15 | debug true 16 | 17 | input: 18 | tuple val(sampleId), file(reads) 19 | 20 | script: 21 | """ 22 | echo your_command --sample $sampleId --reads $reads 23 | """ 24 | } 25 | 26 | workflow { 27 | Channel.fromFilePairs("$baseDir/data/reads/*_{1,2}.fq.gz", checkIfExists:true) \ 28 | | foo 29 | } 30 | ``` 31 | 32 | ## Run it 33 | 34 | ```bash 35 | nextflow run nextflow-io/patterns/process-per-file-pairs.nf 36 | ``` 37 | 38 | ## Custom grouping strategy 39 | 40 | When necessary, it is possible to define a custom grouping strategy. A common use case is for alignment BAM files (`sample1.bam`) that come along with their index file. The difficulty is that the index is sometimes called `sample1.bai` and sometimes `sample1.bam.bai` depending on the software used. The following example can accommodate both cases. 41 | 42 | ```groovy 43 | process foo { 44 | debug true 45 | tag "$sampleId" 46 | 47 | input: 48 | tuple val(sampleId), file(bam) 49 | 50 | script: 51 | """ 52 | echo your_command --sample ${sampleId} --bam ${sampleId}.bam 53 | """ 54 | } 55 | 56 | workflow { 57 | Channel.fromFilePairs("$baseDir/data/alignment/*.{bam,bai}", checkIfExists:true) { file -> file.name.replaceAll(/.bam|.bai$/,'') } \ 58 | | foo 59 | } 60 | ``` 61 | 62 | ## Run it 63 | 64 | ```bash 65 | nextflow run nextflow-io/patterns/process-per-file-pairs-custom.nf 66 | ``` 67 | -------------------------------------------------------------------------------- /docs/process-per-file-path.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to execute a task for each file that matches a glob pattern. 4 | 5 | ## Solution 6 | 7 | Use the [Channel.fromPath](https://www.nextflow.io/docs/latest/channel.html#frompath) method to create a channel emitting all files matching the glob pattern. Then, use the channel as input of the process implementing your task. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | process foo { 13 | debug true 14 | input: 15 | path x 16 | 17 | script: 18 | """ 19 | echo your_command --input $x 20 | """ 21 | } 22 | 23 | workflow { 24 | foo("$baseDir/data/reads/*_1.fq.gz") 25 | } 26 | ``` 27 | 28 | ## Run it 29 | 30 | Use the the following command to execute the example: 31 | 32 | ```bash 33 | nextflow run nextflow-io/patterns/process-per-file-path.nf 34 | ``` 35 | -------------------------------------------------------------------------------- /docs/process-per-file-range.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to execute a task over two or more series of files having a common index range. 4 | 5 | ## Solution 6 | 7 | Use the [from](https://www.nextflow.io/docs/latest/channel.html#from) method to define the range over which to repeat the task execution, then chain it with the [map](https://www.nextflow.io/docs/latest/operator.html#map) operator to associate each index with the corresponding input files. Finally, use the resulting channel as input for the process. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | process foo { 13 | debug true 14 | tag "$sampleId" 15 | 16 | input: 17 | tuple val(sampleId), file(indels), file(snps) 18 | 19 | """ 20 | echo foo_command --this $indels --that $snps 21 | """ 22 | } 23 | 24 | workflow { 25 | Channel.from(1..23) \ 26 | | map { chr -> ["sample${chr}", file("/some/path/foo.${chr}.indels.vcf"), file("/other/path/foo.snvs.${chr}.vcf")] } \ 27 | | foo 28 | } 29 | ``` 30 | 31 | ## Run it 32 | 33 | ```bash 34 | nextflow run nextflow-io/patterns/process-per-file-range.nf 35 | ``` 36 | -------------------------------------------------------------------------------- /docs/process-when-empty.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to execute a process if a channel is empty. 4 | 5 | ## Solution 6 | 7 | Use the [ifEmpty](https://www.nextflow.io/docs/latest/operator.html#ifempty) operator to emit a _marker_ value to trigger the execution of the process. 8 | 9 | ## Example 10 | 11 | ```groovy 12 | params.inputs = '' 13 | 14 | process foo { 15 | debug true 16 | input: 17 | val x 18 | when: 19 | x == 'EMPTY' 20 | 21 | script: 22 | ''' 23 | echo hello 24 | ''' 25 | } 26 | 27 | workflow { 28 | reads_ch = params.inputs 29 | ? Channel.fromPath(params.inputs, checkIfExists:true) 30 | : Channel.empty() 31 | 32 | reads_ch \ 33 | | ifEmpty { 'EMPTY' } \ 34 | | foo 35 | } 36 | ``` 37 | 38 | ## Run it 39 | 40 | Use the following command to run the script with an empty channel: 41 | 42 | ```bash 43 | nextflow run nextflow-io/patterns/process-when-empty.nf 44 | ``` 45 | 46 | Use the following command to provide the same script some input files, which prevents the process from being executed: 47 | 48 | ```bash 49 | nextflow run nextflow-io/patterns/process-when-empty.nf --inputs ../data/prots/\* 50 | ``` 51 | -------------------------------------------------------------------------------- /docs/publish-matching-glob.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | A task in your workflow creates many output files that are required by a downstream task. You want to store some of those files into separate directories depending on the file name. 4 | 5 | ## Solution 6 | 7 | Use two or more [publishDir](https://www.nextflow.io/docs/latest/process.html#publishdir) directives to publish the output files into separate paths. For each directive specify a different glob pattern using the `pattern` option to store into each directory only the files that match the provided pattern. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | params.reads = "$baseDir/data/reads/*_{1,2}.fq.gz" 13 | params.outdir = 'my-results' 14 | 15 | process foo { 16 | publishDir "$params.outdir/$sampleId/counts", pattern: "*_counts.txt" 17 | publishDir "$params.outdir/$sampleId/outlooks", pattern: '*_outlook.txt' 18 | publishDir "$params.outdir/$sampleId/", pattern: '*.fq' 19 | 20 | input: 21 | tuple val(sampleId), file('sample1.fq.gz'), file('sample2.fq.gz') 22 | output: 23 | path "*" 24 | script: 25 | """ 26 | < sample1.fq.gz zcat > sample1.fq 27 | < sample2.fq.gz zcat > sample2.fq 28 | 29 | awk '{s++}END{print s/4}' sample1.fq > sample1_counts.txt 30 | awk '{s++}END{print s/4}' sample2.fq > sample2_counts.txt 31 | 32 | head -n 50 sample1.fq > sample1_outlook.txt 33 | head -n 50 sample2.fq > sample2_outlook.txt 34 | """ 35 | } 36 | 37 | workflow { 38 | Channel.fromFilePairs(params.reads, checkIfExists: true, flat: true) \ 39 | | foo 40 | } 41 | ``` 42 | 43 | ## Run it 44 | 45 | Run the script with the following command: 46 | 47 | ```bash 48 | nextflow run nextflow-io/patterns/publish-matching-glob.nf 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/publish-process-outputs.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to store the outputs of one or more processes into a directory structure of your choice. 4 | 5 | ## Solution 6 | 7 | Use the [publishDir](https://www.nextflow.io/docs/latest/process.html#publishdir) directive to define a custom directory where the process outputs should be saved. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | params.reads = "$baseDir/data/reads/*{1,2}.fq.gz" 13 | params.outdir = 'my-results' 14 | 15 | process foo { 16 | publishDir "$params.outdir/$sampleId" 17 | input: 18 | tuple val(sampleId), file(samples) 19 | output: 20 | path '*.fq' 21 | 22 | script: 23 | """ 24 | < ${samples[0]} zcat > sample_1.fq 25 | < ${samples[1]} zcat > sample_2.fq 26 | """ 27 | } 28 | 29 | workflow { 30 | Channel.fromFilePairs(params.reads, checkIfExists: true) \ 31 | | foo 32 | } 33 | ``` 34 | 35 | ## Run it 36 | 37 | Run the script with the following command: 38 | 39 | ```bash 40 | nextflow run nextflow-io/patterns/publish-process-outputs.nf 41 | ``` 42 | -------------------------------------------------------------------------------- /docs/publish-rename-outputs.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to save the outputs of a process to a directory, giving each file a name of your choice. 4 | 5 | ## Solution 6 | 7 | The [publishDir](https://www.nextflow.io/docs/latest/process.html#publishdir) allows you to save the process outputs in a directory of your choice. 8 | 9 | Use the `saveAs` option to give each file a name of your choice, providing a custom rule as a [closure](https://www.nextflow.io/docs/latest/script.html#closures). 10 | 11 | ## Code 12 | 13 | ```groovy 14 | process foo { 15 | publishDir 'results', saveAs: { filename -> "foo_$filename" } 16 | 17 | output: 18 | path '*.txt' 19 | 20 | ''' 21 | touch this.txt 22 | touch that.txt 23 | ''' 24 | } 25 | 26 | workflow { 27 | foo() 28 | } 29 | ``` 30 | 31 | ## Run it 32 | 33 | ```bash 34 | nextflow run nextflow-io/patterns/publish-rename-outputs.nf 35 | ``` 36 | 37 | ## Save outputs in a sub-directory 38 | 39 | The same pattern can be used to store specific files in separate directories depending on the actual name. 40 | 41 | ```groovy 42 | process foo { 43 | publishDir 'results', saveAs: { filename -> filename.endsWith(".zip") ? "zips/$filename" : filename } 44 | 45 | output: 46 | path '*' 47 | 48 | ''' 49 | touch this.txt 50 | touch that.zip 51 | ''' 52 | } 53 | 54 | workflow { 55 | foo() 56 | } 57 | ``` 58 | 59 | !!! tip 60 | Relative paths are resolved against the `publishDir` store path. Use an absolute path to store files in a directory outside the `publishDir` store path. 61 | 62 | ## Run it 63 | 64 | ```bash 65 | nextflow run nextflow-io/patterns/publish-rename-outputs-subdirs.nf 66 | ``` 67 | -------------------------------------------------------------------------------- /docs/skip-process-execution.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You have two sequential tasks in your workflow. When an optional flag is specified, the first task should be skipped and its input(s) should be processed by the second task. 4 | 5 | ## Solution 6 | 7 | Use an empty channel, created in a conditional expression, to skip the first process execution when an optional parameter is specified. Then, define the second process input as a [mix](https://www.nextflow.io/docs/latest/operator.html#mix) of the first process output (when executed) and the input channel. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | params.skip = false 13 | params.input = "$baseDir/data/reads/sample.fq.gz" 14 | 15 | process foo { 16 | input: 17 | path x 18 | 19 | output: 20 | file('*.fastq') 21 | 22 | script: 23 | """ 24 | < $x zcat > ${x.simpleName}.fastq 25 | """ 26 | } 27 | 28 | process bar { 29 | debug true 30 | 31 | input: 32 | path x 33 | 34 | script: 35 | """ 36 | echo your_command --input $x 37 | """ 38 | } 39 | 40 | workflow { 41 | input_ch = Channel.fromPath(params.input) 42 | 43 | (foo_ch, bar_ch) = params.skip 44 | ? [Channel.empty(), input_ch] 45 | : [input_ch, Channel.empty()] 46 | 47 | foo_ch | foo | mix(bar_ch) | bar 48 | } 49 | ``` 50 | 51 | ## Run it 52 | 53 | Use the the following command to execute the example: 54 | 55 | ```bash 56 | nextflow run nextflow-io/patterns/skip-process-execution.nf 57 | ``` 58 | 59 | The processes `foo` and `bar` are executed. Run the same command with the `--skip` command line option: 60 | 61 | ```bash 62 | nextflow run nextflow-io/patterns/skip-process-execution.nf --skip 63 | ``` 64 | 65 | This time only the `bar` process is executed. 66 | -------------------------------------------------------------------------------- /docs/sort-filepairs-by-samplename.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You have many tuples with a sample name that works as an identifier and you want to sort the channel by the sample name. 4 | 5 | ## Solution 6 | 7 | Use the [toSortedList](https://www.nextflow.io/docs/latest/operator.html#tosortedlist) and the [flatMap](https://www.nextflow.io/docs/latest/operator.html#flatmap) operators to convert the channel to a sorted list, and then convert back to the original structure you get from the [fromFilePairs](https://www.nextflow.io/docs/latest/channel.html?highlight=fromfilepairs#fromfilepairs) channel factory. 8 | 9 | It's worth mentioning that the [toSortedList](https://www.nextflow.io/docs/latest/operator.html#tosortedlist) operator is not scalable as it introduces a blocking point in the pipeline execution since to sort the elements of a channel, it needs to collect all of them first. 10 | 11 | The [fromFilePairs](https://www.nextflow.io/docs/latest/channel.html#fromfilepairs) channel factory in the code below will create a channel with tuple elements on the following format: 12 | 13 | ```groovy 14 | [ 15 | [samplec, [/path/to/my/files/samplec_1.fastq, /path/to/my/files/samplec_2.fastq]] 16 | [sampleb, [/path/to/my/files/sampleb_1.fastq, /path/to/my/files/sampleb_2.fastq]] 17 | [samplea, [/path/to/my/files/samplea_1.fastq, /path/to/my/files/samplea_2.fastq]] 18 | [sampled, [/path/to/my/files/sampled_1.fastq, /path/to/my/files/sampled_2.fastq]] 19 | [samplee, [/path/to/my/files/samplee_1.fastq, /path/to/my/files/samplee_2.fastq]] 20 | ] 21 | ``` 22 | ## Code 23 | 24 | ```groovy 25 | Channel 26 | .fromFilePairs('/path/to/my/files/*_{1,2}.fastq') 27 | // Sort the channel elements based on the first object of each tuple, 28 | // that is, the sample name, and convert to a channel with a single 29 | // element which is a list of tuples 30 | .toSortedList( { a, b -> a[0] <=> b[0] } ) // <=> is an operator for comparison 31 | // flatten the single-element channel to a channel with as many elements 32 | // as there are samples, which is the original structure provided by 33 | // fromFilePairs 34 | .flatMap() 35 | // View the channel elements by printing it to the screen 36 | .view() 37 | ``` 38 | 39 | ## Run it 40 | 41 | Run the example using this command: 42 | 43 | ```bash 44 | nextflow run nextflow-io/patterns/sort-filepairs-by-samplename.nf 45 | ``` 46 | -------------------------------------------------------------------------------- /docs/state-dependency.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You need to synchronize the execution of two processes for which there isn't a data dependency, so that process `bar` is executed after the completion of process `foo`. 4 | 5 | ## Solution 6 | 7 | Add an output channel to process `foo` that produces a _ready_ signal. Then pass this channel as input to process `bar` in order to trigger its execution when `foo` completes. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | process foo { 13 | output: 14 | val true 15 | script: 16 | """ 17 | echo your_command_here 18 | """ 19 | } 20 | 21 | process bar { 22 | input: 23 | val ready 24 | path fq 25 | script: 26 | """ 27 | echo other_command_here --reads $fq 28 | """ 29 | } 30 | 31 | workflow { 32 | reads_ch = Channel.fromPath("$baseDir/data/reads/11010*.fq.gz", checkIfExists:true) 33 | 34 | foo() 35 | bar(foo.out, reads_ch) 36 | } 37 | ``` 38 | 39 | ## Run it 40 | 41 | Run the example using this command: 42 | 43 | ```bash 44 | nextflow run nextflow-io/patterns/state-dependency.nf 45 | ``` 46 | -------------------------------------------------------------------------------- /docs/task-batching.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You have many small tasks that you would like to process in batches to reduce job submission overhead. 4 | 5 | ## Solution 6 | 7 | Use the [buffer](https://www.nextflow.io/docs/latest/operator.html#buffer) operator to collect your input channel into batches, then refactor the process to accept a list of inputs instead of one input. One job will be created for each batch instead of each task. 8 | 9 | ## Code 10 | 11 | ```groovy 12 | process foo { 13 | input: 14 | val indices 15 | 16 | script: 17 | """ 18 | for INDEX in ${indices.join(' ')}; do 19 | echo "Hello from task \${INDEX}!" 20 | done 21 | """ 22 | } 23 | 24 | workflow { 25 | Channel.of(1..1000) 26 | | buffer(size: 10, remainder: true) 27 | | foo 28 | } 29 | ``` 30 | 31 | ## Run it 32 | 33 | Run the example using this command: 34 | 35 | ```bash 36 | nextflow run nextflow-io/patterns/task-batching.nf 37 | ``` 38 | -------------------------------------------------------------------------------- /docs/workflow-grouping.md: -------------------------------------------------------------------------------- 1 | ## Problem 2 | 3 | You have a subworkflow, and you would like to limit the number of parallel subworkflow executions. 4 | 5 | ## Solution 6 | 7 | For a single process, you could use the [maxForks](https://nextflow.io/docs/latest/process.html#maxforks) directive to limit the number of parallel process executions. For a subworkflow, you can achieve the same effect by merging the subworkflow's processes into a single process, and then using `maxForks` or the `executor.queueSize` config option. 8 | 9 | The following example is based on a "diamond-shaped" subworkflow, in order to show how to implement parallel steps in a Bash script. View the [complete example](https://github.com/nextflow-io/patterns/blob/master/workflow-grouping.nf) to see the original subworkflow. 10 | 11 | ## Code 12 | 13 | ```groovy 14 | params.n_groups = 10 15 | params.queue_size = 2 16 | 17 | process diamond_merged { 18 | maxForks params.queue_size 19 | 20 | input: 21 | val(index) 22 | 23 | output: 24 | tuple val(index), path('d.txt') 25 | 26 | script: 27 | """ 28 | sleep 1 29 | 30 | # process A 31 | echo "subworkflow ${index}, process A was here" >> a.txt 32 | 33 | # process B 34 | process_b() { 35 | cat a.txt >> b.txt 36 | echo "subworkflow ${index}, process B was here" >> b.txt 37 | } 38 | process_b & 39 | 40 | # process C 41 | process_c() { 42 | cat a.txt >> c.txt 43 | echo "subworkflow ${index}, process C was here" >> c.txt 44 | } 45 | process_c & 46 | 47 | wait 48 | 49 | # process D 50 | cat b.txt >> d.txt 51 | cat c.txt >> d.txt 52 | echo "subworkflow ${index}, process D was here" >> d.txt 53 | """ 54 | } 55 | 56 | workflow { 57 | Channel.of(1..params.n_groups) 58 | | diamond_merged 59 | } 60 | ``` 61 | 62 | ## Run it 63 | 64 | Run the example using this command: 65 | 66 | ```bash 67 | nextflow run nextflow-io/patterns/workflow-grouping.nf 68 | ``` 69 | -------------------------------------------------------------------------------- /feedback-loop-process.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | * author Ben Sherman 28 | */ 29 | 30 | nextflow.preview.recursion=true 31 | 32 | params.data = "$baseDir/data/hello.txt" 33 | 34 | process foo { 35 | input: 36 | path 'input.txt' 37 | output: 38 | path 'result.txt' 39 | script: 40 | """ 41 | cat input.txt > result.txt 42 | echo "Task ${task.index} was here" >> result.txt 43 | """ 44 | } 45 | 46 | workflow { 47 | foo 48 | .recurse(file(params.data)) 49 | .until { it -> it.size() > 100 } 50 | 51 | foo 52 | .out 53 | .view(it -> it.text) 54 | } 55 | -------------------------------------------------------------------------------- /feedback-loop-workflow.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Ben Sherman 27 | */ 28 | 29 | nextflow.preview.recursion=true 30 | 31 | params.input = "$baseDir/data/hello.txt" 32 | 33 | process tick { 34 | input: 35 | path 'input.txt' 36 | output: 37 | path 'result.txt' 38 | script: 39 | """ 40 | cat input.txt > result.txt 41 | echo "Task ${task.index} : tick" >> result.txt 42 | """ 43 | } 44 | 45 | process tock { 46 | input: 47 | path 'input.txt' 48 | output: 49 | path 'result.txt' 50 | script: 51 | """ 52 | cat input.txt > result.txt 53 | echo "Task ${task.index} : tock" >> result.txt 54 | """ 55 | } 56 | 57 | workflow clock { 58 | take: infile 59 | main: 60 | infile | tick | tock 61 | emit: 62 | tock.out 63 | } 64 | 65 | workflow { 66 | clock 67 | .recurse(file(params.input)) 68 | .until { it -> it.size() > 100 } 69 | 70 | clock 71 | .out 72 | .view(it -> it.text) 73 | } 74 | -------------------------------------------------------------------------------- /ignore-failing-process.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | errorStrategy 'ignore' 31 | script: 32 | ''' 33 | echo This is going to fail! 34 | exit 1 35 | ''' 36 | } 37 | 38 | process bar { 39 | script: 40 | ''' 41 | echo OK 42 | ''' 43 | } 44 | 45 | workflow { 46 | foo() 47 | bar() 48 | } 49 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Nextflow Patterns 2 | repo_name: nextflow-io/patterns 3 | repo_url: https://github.com/nextflow-io/patterns 4 | 5 | theme: 6 | name: material 7 | favicon: https://www.nextflow.io/img/favicon.png 8 | features: 9 | - navigation.sections 10 | logo: https://www.nextflow.io/img/favicon.png 11 | palette: 12 | scheme: default 13 | primary: black 14 | 15 | markdown_extensions: 16 | - admonition 17 | - pymdownx.details 18 | - pymdownx.superfences 19 | 20 | nav: 21 | - 'Home': 'index.md' 22 | - 'Basic patterns': 23 | - 'Channel duplication': 'channel-duplication.md' 24 | - 'Sort FilePairs by sample name': 'sort-filepairs-by-samplename.md' 25 | - 'Create key to combine channels': 'create-key-to-combine-channels.md' 26 | - 'Scatter executions': 27 | - 'Process per file path': 'process-per-file-path.md' 28 | - 'Process per file chunk': 'process-per-file-chunk.md' 29 | - 'Process per file pairs': 'process-per-file-pairs.md' 30 | - 'Process per file range': 'process-per-file-range.md' 31 | - 'Process per CSV record': 'process-per-csv-record.md' 32 | - 'Process per file output': 'process-per-file-output.md' 33 | - 'Gather results': 34 | - 'Process all outputs altogether': 'process-collect.md' 35 | - 'Process outputs into groups': 'process-into-groups.md' 36 | - 'Collect outputs into a file': 'collect-into-file.md' 37 | - 'Organize outputs': 38 | - 'Store process outputs': 'publish-process-outputs.md' 39 | - 'Store outputs matching a glob pattern': 'publish-matching-glob.md' 40 | - 'Store outputs renaming files': 'publish-rename-outputs.md' 41 | - 'Other': 42 | - 'Get process work directory': 'process-get-workdir.md' 43 | - 'Ignore failing process': 'ignore-failing-process.md' 44 | - 'State dependency': 'state-dependency.md' 45 | - 'Advanced patterns': 46 | - 'Conditional process resources': 'conditional-resources.md' 47 | - 'Conditional process execution (static)': 'conditional-process.md' 48 | - 'Conditional process execution (dynamic)': 'conditional-process-dynamic.md' 49 | - 'Skip process execution': 'skip-process-execution.md' 50 | - 'Feedback loop': 'feedback-loop.md' 51 | - 'Optional input': 'optional-input.md' 52 | - 'Optional output': 'optional-output.md' 53 | - 'Process when empty': 'process-when-empty.md' 54 | - 'Task batching': 'task-batching.md' 55 | - 'Workflow grouping': 'workflow-grouping.md' 56 | 57 | extra: 58 | analytics: 59 | provider: google 60 | property: G-244N3GEN75 61 | -------------------------------------------------------------------------------- /nextflow.config: -------------------------------------------------------------------------------- 1 | manifest { 2 | description = 'A curated collection of Nextflow implementation patterns' 3 | } 4 | -------------------------------------------------------------------------------- /optional-input.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | params.inputs = "$projectDir/data/prots/*{1,2,3}.fa" 30 | params.filter = "$projectDir/assets/NO_FILE" 31 | 32 | process foo { 33 | debug true 34 | input: 35 | path seq 36 | path opt 37 | 38 | script: 39 | def filter = opt.name != 'NO_FILE' ? "--filter $opt" : '' 40 | """ 41 | echo your_command --input $seq $filter 42 | """ 43 | } 44 | 45 | workflow { 46 | prots_ch = Channel.fromPath(params.inputs, checkIfExists:true) 47 | opt_file = file(params.filter) 48 | 49 | foo(prots_ch, opt_file) 50 | } 51 | -------------------------------------------------------------------------------- /optional-output.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | output: 31 | path 'foo.txt', optional: true 32 | 33 | script: 34 | ''' 35 | if [[ $(( ( RANDOM % 2 ) )) == 0 ]]; then 36 | echo Hello world > foo.txt 37 | fi 38 | ''' 39 | } 40 | 41 | process bar { 42 | input: 43 | path '*' 44 | script: 45 | ''' 46 | cat foo.txt 47 | ''' 48 | } 49 | 50 | workflow { 51 | foo | bar 52 | } 53 | -------------------------------------------------------------------------------- /process-collect.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | input: 31 | path x 32 | output: 33 | path 'file.fq' 34 | script: 35 | """ 36 | < $x zcat > file.fq 37 | """ 38 | } 39 | 40 | process bar { 41 | debug true 42 | input: 43 | path '*.fq' 44 | script: 45 | """ 46 | cat *.fq | head -n 50 47 | """ 48 | } 49 | 50 | workflow { 51 | Channel.fromPath("$baseDir/data/reads/*_1.fq.gz", checkIfExists: true) \ 52 | | foo \ 53 | | collect \ 54 | | bar 55 | } 56 | -------------------------------------------------------------------------------- /process-get-workdir.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | debug true 31 | script: 32 | """ 33 | echo foo task path: \$PWD 34 | """ 35 | } 36 | 37 | process bar { 38 | debug true 39 | script: 40 | ''' 41 | echo bar task path: $PWD 42 | ''' 43 | } 44 | 45 | workflow { 46 | foo() 47 | bar() 48 | } 49 | -------------------------------------------------------------------------------- /process-into-groups.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | params.reads = "$baseDir/data/reads/*" 30 | 31 | process foo { 32 | debug true 33 | input: 34 | tuple val(key), path(samples) 35 | 36 | script: 37 | """ 38 | echo your_command --batch $key --input $samples 39 | """ 40 | } 41 | 42 | workflow { 43 | Channel.fromPath(params.reads, checkIfExists:true) \ 44 | | map { file -> 45 | def key = file.name.toString().tokenize('_').get(0) 46 | return tuple(key, file) 47 | } \ 48 | | groupTuple() \ 49 | | foo 50 | } 51 | -------------------------------------------------------------------------------- /process-per-csv-record.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | params.index = "$baseDir/data/index.csv" 30 | 31 | process foo { 32 | debug true 33 | input: 34 | tuple val(sampleId), path(read1), path(read2) 35 | 36 | script: 37 | """ 38 | echo your_command --sample $sampleId --reads $read1 $read2 39 | """ 40 | } 41 | 42 | workflow { 43 | Channel.fromPath(params.index) \ 44 | | splitCsv(header:true) \ 45 | | map { row-> tuple(row.sampleId, file(row.read1), file(row.read2)) } \ 46 | | foo 47 | } 48 | -------------------------------------------------------------------------------- /process-per-file-chunk.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | params.infile = "$baseDir/data/poem.txt" 30 | params.size = 5 31 | 32 | process foo { 33 | debug true 34 | input: 35 | file x 36 | 37 | script: 38 | """ 39 | rev $x | rev 40 | """ 41 | } 42 | 43 | workflow { 44 | Channel.fromPath(params.infile) \ 45 | | splitText(by: params.size) \ 46 | | foo 47 | } 48 | -------------------------------------------------------------------------------- /process-per-file-output.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | output: 31 | path '*.txt' 32 | 33 | script: 34 | ''' 35 | echo Hello there! > file1.txt 36 | echo What a beautiful day > file2.txt 37 | echo I hope you are having fun! > file3.txt 38 | ''' 39 | } 40 | 41 | process bar { 42 | debug true 43 | input: 44 | path x 45 | 46 | script: 47 | """ 48 | cat $x 49 | """ 50 | } 51 | 52 | workflow { 53 | foo | flatten | bar 54 | } 55 | -------------------------------------------------------------------------------- /process-per-file-pairs-custom.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Matthieu Foll 27 | */ 28 | 29 | process foo { 30 | debug true 31 | tag "$sampleId" 32 | 33 | input: 34 | tuple val(sampleId), path(bam) 35 | 36 | script: 37 | """ 38 | echo your_command --sample ${sampleId} --bam ${sampleId}.bam 39 | """ 40 | } 41 | 42 | workflow { 43 | Channel.fromFilePairs("$baseDir/data/alignment/*.{bam,bai}", checkIfExists:true) { file -> file.name.replaceAll(/.bam|.bai$/,'') } \ 44 | | foo 45 | } 46 | -------------------------------------------------------------------------------- /process-per-file-pairs.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | debug true 31 | 32 | input: 33 | tuple val(sampleId), path(reads) 34 | 35 | script: 36 | """ 37 | echo your_command --sample $sampleId --reads $reads 38 | """ 39 | } 40 | 41 | workflow { 42 | Channel.fromFilePairs("$baseDir/data/reads/*_{1,2}.fq.gz", checkIfExists:true) \ 43 | | foo 44 | } 45 | -------------------------------------------------------------------------------- /process-per-file-path.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | params.inputs = "$baseDir/data/reads/*_1.fq.gz" 30 | 31 | process foo { 32 | debug true 33 | input: 34 | path x 35 | 36 | script: 37 | """ 38 | echo your_command --input $x 39 | """ 40 | } 41 | 42 | workflow { 43 | Channel.fromPath(params.inputs, checkIfExists: true) \ 44 | | foo 45 | } 46 | -------------------------------------------------------------------------------- /process-per-file-range.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | debug true 31 | tag "$sampleId" 32 | 33 | input: 34 | tuple val(sampleId), path(indels), path(snps) 35 | 36 | """ 37 | echo foo_command --this $indels --that $snps 38 | """ 39 | } 40 | 41 | workflow { 42 | Channel.from(1..23) \ 43 | | map { chr -> ["sample${chr}", file("/some/path/foo.${chr}.indels.vcf"), file("/other/path/foo.snvs.${chr}.vcf")] } \ 44 | | foo 45 | } 46 | -------------------------------------------------------------------------------- /process-when-empty.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | params.inputs = '' 30 | 31 | process foo { 32 | debug true 33 | input: 34 | val x 35 | when: 36 | x == 'EMPTY' 37 | 38 | script: 39 | ''' 40 | echo hello 41 | ''' 42 | } 43 | 44 | workflow { 45 | reads_ch = params.inputs 46 | ? Channel.fromPath(params.inputs, checkIfExists:true) 47 | : Channel.empty() 48 | 49 | reads_ch \ 50 | | ifEmpty { 'EMPTY' } \ 51 | | foo 52 | } 53 | -------------------------------------------------------------------------------- /publish-matching-glob.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Kevin Sayers 27 | */ 28 | 29 | params.reads = "$baseDir/data/reads/*_{1,2}.fq.gz" 30 | params.outdir = 'my-results' 31 | 32 | process foo { 33 | publishDir "$params.outdir/$sampleId/counts", pattern: "*_counts.txt" 34 | publishDir "$params.outdir/$sampleId/outlooks", pattern: '*_outlook.txt' 35 | publishDir "$params.outdir/$sampleId/", pattern: '*.fq' 36 | 37 | input: 38 | tuple val(sampleId), path('sample1.fq.gz'), path('sample2.fq.gz') 39 | output: 40 | path "*" 41 | script: 42 | """ 43 | < sample1.fq.gz zcat > sample1.fq 44 | < sample2.fq.gz zcat > sample2.fq 45 | 46 | awk '{s++}END{print s/4}' sample1.fq > sample1_counts.txt 47 | awk '{s++}END{print s/4}' sample2.fq > sample2_counts.txt 48 | 49 | head -n 50 sample1.fq > sample1_outlook.txt 50 | head -n 50 sample2.fq > sample2_outlook.txt 51 | """ 52 | } 53 | 54 | workflow { 55 | Channel.fromFilePairs(params.reads, checkIfExists: true, flat: true) \ 56 | | foo 57 | } 58 | -------------------------------------------------------------------------------- /publish-process-outputs.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | params.reads = "$baseDir/data/reads/*{1,2}.fq.gz" 30 | params.outdir = 'my-results' 31 | 32 | process foo { 33 | publishDir "$params.outdir/$sampleId" 34 | input: 35 | tuple val(sampleId), path(samples) 36 | output: 37 | path '*.fq' 38 | 39 | script: 40 | """ 41 | < ${samples[0]} zcat > sample_1.fq 42 | < ${samples[1]} zcat > sample_2.fq 43 | """ 44 | } 45 | 46 | workflow { 47 | Channel.fromFilePairs(params.reads, checkIfExists: true) \ 48 | | foo 49 | } 50 | -------------------------------------------------------------------------------- /publish-rename-outputs-subdirs.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | publishDir 'results', saveAs: { filename -> filename.endsWith(".zip") ? "zips/$filename" : filename } 31 | 32 | output: 33 | path '*' 34 | 35 | ''' 36 | touch this.txt 37 | touch that.zip 38 | ''' 39 | } 40 | 41 | workflow { 42 | foo() 43 | } 44 | -------------------------------------------------------------------------------- /publish-rename-outputs.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | publishDir 'results', saveAs: { filename -> "foo_$filename" } 31 | 32 | output: 33 | path '*.txt' 34 | 35 | ''' 36 | touch this.txt 37 | touch that.txt 38 | ''' 39 | } 40 | 41 | workflow { 42 | foo() 43 | } 44 | -------------------------------------------------------------------------------- /scripts/cleanup.sh: -------------------------------------------------------------------------------- 1 | find . -name .report | xargs rm 2 | find . -name stdout | xargs rm 3 | find . -name .stdout | xargs rm 4 | find . -name checks.out | xargs rm 5 | find . -name .cache | xargs rm -rf 6 | find . -name '.nextflow*' | xargs rm -rf 7 | find . -name '.node-nextflow*' | xargs rm -rf 8 | find . -name 'work' | xargs rm -rf 9 | find . -name 'my-results' | xargs rm -rf -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | for x in *.nf; do 5 | ( 6 | printf "\n\n== Testing > $(basename $x) ==\n\n" 7 | nextflow run $x 8 | ) 9 | done -------------------------------------------------------------------------------- /skip-process-execution.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | params.skip = false 30 | params.input = "$baseDir/data/reads/sample.fq.gz" 31 | 32 | process foo { 33 | input: 34 | path x 35 | 36 | output: 37 | path '*.fastq' 38 | 39 | script: 40 | """ 41 | < $x zcat > ${x.simpleName}.fastq 42 | """ 43 | } 44 | 45 | process bar { 46 | debug true 47 | 48 | input: 49 | path x 50 | 51 | script: 52 | """ 53 | echo your_command --input $x 54 | """ 55 | } 56 | 57 | workflow { 58 | input_ch = Channel.fromPath(params.input) 59 | 60 | (foo_ch, bar_ch) = params.skip 61 | ? [Channel.empty(), input_ch] 62 | : [input_ch, Channel.empty()] 63 | 64 | foo_ch | foo | mix(bar_ch) | bar 65 | } 66 | -------------------------------------------------------------------------------- /sort-filepairs-by-samplename.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2022, Seqera Labs. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Marcel Ribeiro-Dantas 27 | */ 28 | 29 | Channel 30 | .fromFilePairs('/my/path/data/*_{1,2}.fastq') 31 | .toSortedList( { a, b -> a[0] <=> b[0] } ) 32 | .flatMap() 33 | .view() 34 | -------------------------------------------------------------------------------- /state-dependency.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2018, Centre for Genomic Regulation (CRG). 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Paolo Di Tommaso 27 | */ 28 | 29 | process foo { 30 | output: 31 | val true 32 | script: 33 | """ 34 | echo your_command_here 35 | """ 36 | } 37 | 38 | process bar { 39 | input: 40 | val ready 41 | path fq 42 | script: 43 | """ 44 | echo other_command_here --reads $fq 45 | """ 46 | } 47 | 48 | workflow { 49 | reads_ch = Channel.fromPath("$baseDir/data/reads/11010*.fq.gz", checkIfExists:true) 50 | 51 | foo() 52 | bar(foo.out, reads_ch) 53 | } 54 | -------------------------------------------------------------------------------- /task-batching.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2022, Seqera Labs. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Ben Sherman 27 | */ 28 | 29 | params.n_tasks = 1000 30 | params.batch_size = 10 31 | 32 | process foo { 33 | input: 34 | val indices 35 | 36 | script: 37 | """ 38 | for INDEX in ${indices.join(' ')}; do 39 | echo "Hello from task \${INDEX}!" 40 | done 41 | """ 42 | } 43 | 44 | workflow { 45 | Channel.of(1..params.n_tasks) 46 | | buffer(size: params.batch_size, remainder: true) 47 | | foo 48 | } 49 | -------------------------------------------------------------------------------- /workflow-grouping.nf: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env nextflow 2 | 3 | /* 4 | * Copyright (c) 2023, Seqera Labs. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to deal 8 | * in the Software without restriction, including without limitation the rights 9 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | * copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /* 26 | * author Ben Sherman 27 | */ 28 | 29 | params.n_groups = 10 30 | params.queue_size = 2 31 | 32 | process A { 33 | input: 34 | val(index) 35 | 36 | output: 37 | tuple val(index), path('output.txt') 38 | 39 | script: 40 | """ 41 | sleep 1 42 | 43 | echo "subworkflow ${index}, process A was here" >> output.txt 44 | """ 45 | } 46 | 47 | process B { 48 | input: 49 | tuple val(index), path('input.txt') 50 | 51 | output: 52 | tuple val(index), path('output.txt') 53 | 54 | script: 55 | """ 56 | sleep 1 57 | 58 | cat input.txt >> output.txt 59 | echo "subworkflow ${index}, process B was here" >> output.txt 60 | """ 61 | } 62 | 63 | process C { 64 | input: 65 | tuple val(index), path('input.txt') 66 | 67 | output: 68 | tuple val(index), path('output.txt') 69 | 70 | script: 71 | """ 72 | sleep 1 73 | 74 | cat input.txt >> output.txt 75 | echo "subworkflow ${index}, process C was here" >> output.txt 76 | """ 77 | } 78 | 79 | process D { 80 | input: 81 | tuple val(index), path('input_b.txt'), path('input_c.txt') 82 | 83 | output: 84 | tuple val(index), path('output.txt') 85 | 86 | script: 87 | """ 88 | sleep 1 89 | 90 | cat input_b.txt >> output.txt 91 | cat input_c.txt >> output.txt 92 | echo "subworkflow ${index}, process D was here" >> output.txt 93 | """ 94 | } 95 | 96 | workflow diamond { 97 | Channel.of(1..params.n_groups) 98 | | A 99 | | (B & C) 100 | | join 101 | | D 102 | } 103 | 104 | process diamond_merged { 105 | maxForks params.queue_size 106 | 107 | input: 108 | val(index) 109 | 110 | output: 111 | tuple val(index), path('d.txt') 112 | 113 | script: 114 | """ 115 | sleep 1 116 | 117 | # process A 118 | echo "subworkflow ${index}, process A was here" >> a.txt 119 | 120 | # process B 121 | process_b() { 122 | cat a.txt >> b.txt 123 | echo "subworkflow ${index}, process B was here" >> b.txt 124 | } 125 | process_b & 126 | 127 | # process C 128 | process_c() { 129 | cat a.txt >> c.txt 130 | echo "subworkflow ${index}, process C was here" >> c.txt 131 | } 132 | process_c & 133 | 134 | wait 135 | 136 | # process D 137 | cat b.txt >> d.txt 138 | cat c.txt >> d.txt 139 | echo "subworkflow ${index}, process D was here" >> d.txt 140 | """ 141 | } 142 | 143 | workflow { 144 | Channel.of(1..params.n_groups) 145 | | diamond_merged 146 | } 147 | --------------------------------------------------------------------------------