├── .dockerignore
├── .gitattributes
├── .github
└── ISSUE_TEMPLATE
│ ├── bug_report.yml
│ ├── config.yml
│ ├── feature_request.yml
│ └── question.yml
├── .gitignore
├── .gitlab-ci.yml
├── .pre-commit-config.yaml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── base.config
├── bin
├── deconcatenate.py
├── trim.py
├── workflow-glue
└── workflow_glue
│ ├── __init__.py
│ ├── bokeh_plot.py
│ ├── find_inserts.py
│ ├── models
│ ├── __init__.py
│ └── common.py
│ ├── report.py
│ ├── report_utils
│ └── report_utils.py
│ ├── run_plannotate.py
│ ├── tests
│ ├── __init__.py
│ ├── conftest.py
│ ├── test_deconcatenate.py
│ ├── test_find_inserts.py
│ ├── test_report.py
│ └── test_run_plannotate.py
│ ├── trim.py
│ ├── util.py
│ └── wfg_helpers
│ ├── __init__.py
│ ├── check_bam_headers_in_dir.py
│ ├── check_sample_sheet.py
│ ├── check_xam_index.py
│ ├── configure_igv.py
│ ├── get_max_depth_locus.py
│ └── reheader_samstream.py
├── data
├── .gitkeep
├── OPTIONAL_FILE
└── primers.tsv
├── docs
├── 01_brief_description.md
├── 02_introduction.md
├── 03_compute_requirements.md
├── 04_install_and_run.md
├── 05_related_protocols.md
├── 06_input_example.md
├── 06_input_parameters.md
├── 07_outputs.md
├── 08_pipeline_overview.md
├── 09_troubleshooting.md
├── 10_FAQ.md
└── 11_other.md
├── lib
├── ArgumentParser.groovy
├── CWUtil.groovy
├── NfcoreSchema.groovy
├── NfcoreTemplate.groovy
├── Pinguscript.groovy
├── WorkflowMain.groovy
├── common.nf
├── ingress.nf
└── nfcore_external_java_deps.jar
├── main.nf
├── modules
└── local
│ ├── canu_assembly.nf
│ └── flye_assembly.nf
├── nextflow.config
├── nextflow_schema.json
├── output_definition.json
└── test_data
├── .gitkeep
├── client_fields.json
├── cutsite_test.fasta
├── fastq-no-basecall-model
└── barcode01
│ └── test1.fastq
├── insert_reference.fasta
├── other_reference.fasta
├── plasmid.bam
├── sample_sheet.txt
├── sample_sheet_cutsite.csv
├── sample_sheet_hosts.csv
├── sample_sheet_number.csv
├── test
├── barcode01
│ └── test1.fastq
├── barcode02
│ └── test2.fastq
├── barcode03
│ └── test3.fastq
└── barcode04
│ └── .gitkeep
├── test_forward_full_ref.fasta
├── test_medaka_model.tar.gz
├── test_reverse_full_ref.fasta
└── workflow_glue
├── deconcatenate
├── barcode01.fasta
├── barcode01_expected.fasta
├── barcode02.fasta
├── barcode02_expected.fasta
├── barcode03.fasta
├── barcode03_expected.fasta
├── barcode04.fasta
├── barcode04_expected.fasta
├── barcode05.fasta
└── barcode05_expected.fasta
├── find_inserts
├── assemblies
│ ├── barcode01.final.fasta
│ ├── barcode02.final.fasta
│ ├── barcode03.final.fasta
│ └── barcode04.final.fasta
├── expected_df.csv
├── expected_insert
│ ├── barcode01.insert.fasta
│ ├── barcode01.insert.fasta.fai
│ ├── barcode02.insert.fasta
│ ├── barcode02.insert.fasta.fai
│ ├── barcode03.insert.fasta
│ ├── barcode03.insert.fasta.fai
│ ├── barcode04.insert.fasta
│ └── barcode04.insert.fasta.fai
├── insert_beds
│ ├── internal_forward.bed
│ ├── internal_reverse.bed
│ ├── split_forward.bed
│ └── split_reverse.bed
└── primers
│ ├── internal_forward.tsv
│ ├── internal_reverse.tsv
│ ├── split_forward.tsv
│ └── split_reverse.tsv
├── report
└── cut_sites.csv
└── run_plannotate
├── barcode01.annotations.gbk
└── barcode01.fasta
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git
2 | LICENSE
3 | main.nf
4 | data
5 | test_data
6 | nextflow.config
7 | output
8 | README.md
9 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epi2me-labs/wf-clone-validation/e0e56b944dcf3efd54cde2bdc8dced104fc6c0bc/.gitattributes
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
1 | name: Bug Report
2 | description: File a bug report
3 | labels: ["triage"]
4 | body:
5 | - type: markdown
6 | attributes:
7 | value: |
8 | Thanks for taking the time to fill out this bug report!
9 |
10 |
11 | - type: markdown
12 | attributes:
13 | value: |
14 | # Background
15 | - type: dropdown
16 | id: os
17 | attributes:
18 | label: Operating System
19 | description: What operating system are you running?
20 | options:
21 | - Windows 10
22 | - Windows 11
23 | - macOS
24 | - Ubuntu 22.04
25 | - CentOS 7
26 | - Other Linux (please specify below)
27 | validations:
28 | required: true
29 | - type: input
30 | id: other-os
31 | attributes:
32 | label: Other Linux
33 | placeholder: e.g. Fedora 38
34 | - type: input
35 | id: version
36 | attributes:
37 | label: Workflow Version
38 | description: This is most easily found in the workflow output log
39 | placeholder: v1.2.3
40 | validations:
41 | required: true
42 | - type: dropdown
43 | id: execution
44 | attributes:
45 | label: Workflow Execution
46 | description: Where are you running the workflow?
47 | options:
48 | - EPI2ME Desktop (Local)
49 | - EPI2ME Desktop (Cloud)
50 | - Command line (Local)
51 | - Command line (Cluster)
52 | - Other (please describe)
53 | validations:
54 | required: true
55 | - type: input
56 | id: other-workflow-execution
57 | attributes:
58 | label: Other workflow execution
59 | description: If "Other", please describe
60 | placeholder: Tell us where / how you are running the workflow.
61 |
62 | - type: markdown
63 | attributes:
64 | value: |
65 | # EPI2ME Desktop Application
66 | If you are using the application please provide the following.
67 | - type: input
68 | id: labs-version
69 | attributes:
70 | label: EPI2ME Version
71 | description: Available from the application settings page.
72 | placeholder: v5.1.1
73 | validations:
74 | required: false
75 |
76 |
77 | - type: markdown
78 | attributes:
79 | value: |
80 | # Command-line execution
81 | If you are using nextflow on a command-line, please provide the following.
82 | - type: textarea
83 | id: cli-command
84 | attributes:
85 | label: CLI command run
86 | description: Please tell us the command you are running
87 | placeholder: e.g. nextflow run epi2me-labs/wf-human-variations -profile standard --fastq my-reads/fastq
88 | validations:
89 | required: false
90 | - type: dropdown
91 | id: profile
92 | attributes:
93 | label: Workflow Execution - CLI Execution Profile
94 | description: Which execution profile are you using? If you are using a custom profile or nextflow configuration, please give details below.
95 | options:
96 | - standard (default)
97 | - singularity
98 | - custom
99 | validations:
100 | required: false
101 |
102 |
103 | - type: markdown
104 | attributes:
105 | value: |
106 | # Report details
107 | - type: textarea
108 | id: what-happened
109 | attributes:
110 | label: What happened?
111 | description: Also tell us, what did you expect to happen?
112 | placeholder: Tell us what you see!
113 | validations:
114 | required: true
115 | - type: textarea
116 | id: logs
117 | attributes:
118 | label: Relevant log output
119 | description: For CLI execution please include the full output from running nextflow. For execution from the EPI2ME application please copy the contents of the "Workflow logs" panel from the "Logs" tab corresponding to your workflow instance. (This will be automatically formatted into code, so no need for backticks).
120 | render: shell
121 | validations:
122 | required: true
123 | - type: textarea
124 | id: activity-log
125 | attributes:
126 | label: Application activity log entry
127 | description: For use with the EPI2ME application please see the Settings > View Activity Log page, and copy the contents of any items listed in red using the Copy to clipboard button.
128 | render: shell
129 | validations:
130 | required: false
131 | - type: dropdown
132 | id: run-demo
133 | attributes:
134 | label: Were you able to successfully run the latest version of the workflow with the demo data?
135 | description: For CLI execution, were you able to successfully run the workflow using the demo data available in the [Install and run](./README.md#install-and-run) section of the `README.md`? For execution in the EPI2ME application, were you able to successfully run the workflow via the "Use demo data" button?
136 | options:
137 | - 'yes'
138 | - 'no'
139 | - other (please describe below)
140 | validations:
141 | required: true
142 | - type: textarea
143 | id: demo-other
144 | attributes:
145 | label: Other demo data information
146 | render: shell
147 | validations:
148 | required: false
149 |
150 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | contact_links:
3 | - name: Nanopore customer support
4 | url: https://nanoporetech.com/contact
5 | about: For general support, including bioinformatics questions.
6 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
1 | name: Feature request
2 | description: Suggest an idea for this project
3 | labels: ["feature request"]
4 | body:
5 |
6 | - type: textarea
7 | id: question1
8 | attributes:
9 | label: Is your feature related to a problem?
10 | placeholder: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
11 | validations:
12 | required: true
13 | - type: textarea
14 | id: question2
15 | attributes:
16 | label: Describe the solution you'd like
17 | placeholder: A clear and concise description of what you want to happen.
18 | validations:
19 | required: true
20 | - type: textarea
21 | id: question3
22 | attributes:
23 | label: Describe alternatives you've considered
24 | placeholder: A clear and concise description of any alternative solutions or features you've considered.
25 | validations:
26 | required: true
27 | - type: textarea
28 | id: question4
29 | attributes:
30 | label: Additional context
31 | placeholder: Add any other context about the feature request here.
32 | validations:
33 | required: false
34 |
35 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.yml:
--------------------------------------------------------------------------------
1 | name: Question
2 | description: Ask a generic question about this project unrelated to features or bugs.
3 | labels: ["question"]
4 | body:
5 | - type: markdown
6 | attributes:
7 | value: |
8 | Please reserve this form for issues not related to bugs or feature requests. If our developers deem your questions to be related to bugs or features you will be asked to fill in the appropriate form.
9 | - type: textarea
10 | id: question1
11 | attributes:
12 | label: Ask away!
13 | placeholder: |
14 | Bad question: How do I use this workflow in my HPC cluster?
15 | Good question: My HPC cluster uses a GridEngine scheduler. Can you point me to documentation for how to use your workflows to efficiently submit jobs to my cluster?
16 | validations:
17 | required: true
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | nextflow
2 | .nextflow*
3 | template-workflow
4 | .*.swp
5 | .*.swo
6 | *.pyc
7 | *.pyo
8 | .DS_store
9 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: local
3 | hooks:
4 | - id: docs_readme
5 | name: docs_readme
6 | entry: parse_docs -p docs -e .md -s 01_brief_description 02_introduction 03_compute_requirements 04_install_and_run 05_related_protocols 06_input_example 06_input_parameters 07_outputs 08_pipeline_overview 09_troubleshooting 10_FAQ 11_other -ot README.md -od output_definition.json -ns nextflow_schema.json
7 | language: python
8 | always_run: true
9 | pass_filenames: false
10 | additional_dependencies:
11 | - epi2melabs==0.0.57
12 | - repo: https://github.com/pycqa/flake8
13 | rev: 5.0.4
14 | hooks:
15 | - id: flake8
16 | pass_filenames: false
17 | additional_dependencies:
18 | - flake8-rst-docstrings
19 | - flake8-docstrings
20 | - flake8-import-order
21 | - flake8-forbid-visual-indent
22 | - pep8-naming
23 | - flake8-no-types
24 | - flake8-builtins
25 | - flake8-absolute-import
26 | - flake8-print
27 | args: [
28 | "bin",
29 | "--import-order-style=google",
30 | "--statistics",
31 | "--max-line-length=88",
32 | "--per-file-ignores=bin/workflow_glue/models/*:NT001",
33 | ]
34 |
--------------------------------------------------------------------------------
/base.config:
--------------------------------------------------------------------------------
1 | params {
2 | out_dir = "output"
3 | wf {
4 | container_sha_cloneval = "sha0ebc91d22c0ea5183272af8bf2b96ca51e88ad5d"
5 | container_sha_medaka = "sha447c70a639b8bcf17dc49b51e74dfcde6474837b"
6 | common_sha = "sha9ef2f4e4585c4ce6a604616e77185077551abf50"
7 | container_sha_canu = "shabbdea3813f6fb436ea0cbaa19958ad772db9154c"
8 | plannotate_sha = "shaf7f37f751dd0bc529121b765fb63322502288a03"
9 | }
10 | }
11 |
12 | // used by default for "standard" (docker) and singularity profiles,
13 | // other profiles may override.
14 | process {
15 | withLabel:wfplasmid {
16 | container = "ontresearch/wf-clone-validation:${params.wf.container_sha_cloneval}"
17 | }
18 | withLabel:canu {
19 | container = "ontresearch/canu:${params.wf.container_sha_canu}"
20 | }
21 | withLabel:medaka {
22 | container = "ontresearch/medaka:${params.wf.container_sha_medaka}"
23 | }
24 | withLabel:wf_common {
25 | container = "ontresearch/wf-common:${params.wf.common_sha}"
26 | }
27 | withLabel:plannotate {
28 | container = "ontresearch/plannotate:${params.wf.plannotate_sha}"
29 | }
30 | shell = ['/bin/bash', '-euo', 'pipefail']
31 | }
32 |
33 | profiles {
34 | // the "standard" profile is used implicitely by nextflow
35 | // if no other profile is given on the CLI
36 | standard {
37 | docker {
38 | enabled = true
39 | // this ensures container is run as host user and group, but
40 | // also adds host user to the within-container group
41 | runOptions = "--user \$(id -u):\$(id -g) --group-add 100"
42 | }
43 | }
44 |
45 | // using singularity instead of docker
46 | singularity {
47 | singularity {
48 | enabled = true
49 | autoMounts = true
50 | }
51 | }
52 |
53 | conda {
54 | conda.enabled = true
55 | }
56 |
57 | // Using AWS batch.
58 | // May need to set aws.region and aws.batch.cliPath
59 | awsbatch {
60 | process {
61 | executor = 'awsbatch'
62 | queue = "${params.aws_queue}"
63 | memory = '8G'
64 | withLabel:wfplasmid {
65 | container = "${params.aws_image_prefix}-wf-clone-validation:${params.wf.container_sha_cloneval}"
66 | }
67 | withLabel:medaka {
68 | container = "${params.aws_image_prefix}-medaka:${params.wf.container_sha_medaka}"
69 | }
70 | withLabel:canu {
71 | container = "${params.aws_image_prefix}-canu:${params.wf.container_sha_canu}"
72 | }
73 | withLabel:wf_common {
74 | container = "${params.aws_image_prefix}-wf-common:${params.wf.common_sha}"
75 | }
76 | withLabel:plannotate {
77 | container = "${params.aws_image_prefix}-plannotate:${params.wf.plannotate_sha}"
78 | }
79 | shell = ['/bin/bash', '-euo', 'pipefail']
80 | }
81 | }
82 | aws.region = 'eu-west-1'
83 | aws.batch.cliPath = '/home/ec2-user/miniconda/bin/aws'
84 |
85 | // local profile for simplified development testing
86 | local {
87 | process.executor = 'local'
88 | }
89 | }
90 |
91 | timeline {
92 | enabled = true
93 | file = "${params.out_dir}/execution/timeline.html"
94 | timeline.overwrite = true
95 | }
96 | report {
97 | enabled = true
98 | file = "${params.out_dir}/execution/report.html"
99 | report.overwrite = true
100 | }
101 | trace {
102 | enabled = true
103 | file = "${params.out_dir}/execution/trace.txt"
104 | trace.overwrite = true
105 | }
106 | env {
107 | PYTHONNOUSERSITE = 1
108 | JAVA_TOOL_OPTIONS = "-Xlog:disable -Xlog:all=warning:stderr"
109 | }
--------------------------------------------------------------------------------
/bin/deconcatenate.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Go deconcatenate your sequences."""
3 | import argparse
4 | import sys
5 |
6 | import mappy as mp
7 |
8 |
9 | def get_output_handler(path):
10 | """Open file path or stdout."""
11 | if path != '-':
12 | fh = open(path, 'w')
13 | else:
14 | fh = sys.stdout
15 | return fh
16 |
17 |
18 | def get_aligner(reference):
19 | """Return a ready aligner."""
20 | aligner = mp.Aligner(seq=reference, preset='asm5')
21 | if not aligner:
22 | raise Exception("ERROR: failed to load/build index")
23 | return aligner
24 |
25 |
26 | def align_self(seq):
27 | """Split read and align one half to the other."""
28 | half = len(seq) // 2
29 | first, second = seq[0:half], seq[half:]
30 |
31 | aligner = get_aligner(first)
32 | hits = [hit for hit in aligner.map(second)]
33 |
34 | return hits, first, second
35 |
36 |
37 | def deconcatenate(seq, approx_size):
38 | """Self-align to remove duplicate regions."""
39 | finished = False
40 | iteration = 0
41 | trimmed_assm = seq
42 | while not finished:
43 | iteration += 1
44 | sys.stdout.write(f"Trimming sequence... Round {iteration}\n")
45 | approx_size = int(approx_size)
46 | upper_limit = approx_size * 1.2
47 | lower_limit = approx_size * 0.8
48 | hits, first, second = align_self(trimmed_assm)
49 | if lower_limit < len(trimmed_assm) < upper_limit:
50 | sys.stdout.write(
51 | "Approx size is as expected, stopping here.\n")
52 | finished = True
53 | break
54 | elif len(hits) == 1:
55 | sys.stdout.write("> Single self-alignment detected.\n")
56 | elif len(hits) > 1:
57 | sys.stdout.write("> Multiple self-alignments detected.\n")
58 | # Tested variations of this, but if works...
59 | hits = [hit for hit in hits if hit.q_st < 5]
60 | if not hits:
61 | sys.stdout.write(
62 | "> No self-alignments match criteria, stopping here.\n")
63 | finished = True
64 | break
65 |
66 | else:
67 | sys.stdout.write("> No self-alignments, stopping here.\n")
68 | finished = True
69 | break
70 | hit = hits[0]
71 | if hit.r_st < hit.q_st:
72 | trimmed_assm = second[:hit.q_en] + first[hit.r_en:]
73 | else:
74 | trimmed_assm = first[:hit.r_en] + second[hit.q_en:]
75 |
76 | return trimmed_assm
77 |
78 |
79 | def main(args):
80 | """For each sequence, deconcatenate and write to output."""
81 | sequence_fasta = args.sequence
82 | output = args.output
83 | corrected = []
84 | for name, seq, _ in mp.fastx_read(sequence_fasta):
85 | corrected.append([name, deconcatenate(seq, args.approx_size)])
86 |
87 | if not corrected:
88 | return
89 |
90 | handler = get_output_handler(output)
91 | for n, s in corrected:
92 | handler.write(f">{n}\n{s}\n")
93 | handler.close()
94 |
95 |
96 | def argparser():
97 | """Argument parser for entrypoint."""
98 | parser = argparse.ArgumentParser(
99 | "deconcatenate",
100 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
101 | add_help=False)
102 | parser.add_argument(
103 | dest="sequence",
104 | help="File in .FASTA format containing a single sequence/contig."
105 | )
106 | parser.add_argument(
107 | "--approx_size",
108 | dest="approx_size",
109 | help="Approx plasmid size."
110 | )
111 | parser.add_argument(
112 | "-o",
113 | "--output",
114 | dest="output",
115 | default="-",
116 | help="Path at which to write the fixedsequence/contig.",
117 | required=False
118 | )
119 | return parser
120 |
121 |
122 | if __name__ == '__main__':
123 | args = argparser().parse_args()
124 | main(args)
125 |
--------------------------------------------------------------------------------
/bin/trim.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Trim Canu assembly sequences."""
3 |
4 | import argparse
5 | import sys
6 |
7 | import pysam
8 |
9 |
10 | def get_output_handler(path):
11 | """Open file path or stdout."""
12 | if path != '-':
13 | fh = open(path, 'w')
14 | else:
15 | fh = sys.stdout
16 | return fh
17 |
18 |
19 | def trim(entry):
20 | """Trim following fastq comment suggestion."""
21 | # example Canu output fastq header >tig00000001 len=3207 reads=43 class=contig
22 | # suggestRepeat=no suggestBubble=no suggestCircular=no trim=0-3207
23 | split = {}
24 | for i in entry.comment.split(' '):
25 | subsplit = i.split('=')
26 | split[subsplit[0]] = subsplit[1]
27 |
28 | trim = [int(i) for i in split['trim'].split('-')]
29 | trimmed = entry.sequence[trim[0]:trim[1]]
30 |
31 | return trimmed
32 |
33 |
34 | def main(args):
35 | """For each sequence, trim and write to output."""
36 | handler = get_output_handler(args.output)
37 | for entry in pysam.FastxFile(args.sequence):
38 | if 'trim=' not in entry.comment:
39 | continue
40 | handler.write(f">{entry.name}\n{trim(entry)}\n")
41 | handler.close()
42 |
43 |
44 | def argparser():
45 | """Argument parser for entrypoint."""
46 | parser = argparse.ArgumentParser(
47 | "trim",
48 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
49 | add_help=False)
50 | parser.add_argument(
51 | dest="sequence",
52 | help="File in .FASTA format containing a single sequence/contig."
53 | )
54 | parser.add_argument(
55 | "-o",
56 | "--output",
57 | dest="output",
58 | default="-",
59 | help="Path at which to write the fixedsequence/contig.",
60 | required=False
61 | )
62 | return parser
63 |
64 |
65 | if __name__ == "__main__":
66 | args = argparser().parse_args()
67 | main(args)
68 |
--------------------------------------------------------------------------------
/bin/workflow-glue:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """Entrypoint of pseudo-package for all the code used in the workflow."""
3 |
4 | from workflow_glue import cli
5 |
6 | if __name__ == "__main__":
7 | cli()
8 |
--------------------------------------------------------------------------------
/bin/workflow_glue/__init__.py:
--------------------------------------------------------------------------------
1 | """Workflow Python code."""
2 | import argparse
3 | import glob
4 | import importlib
5 | import itertools
6 | import os
7 | import sys
8 |
9 | from .util import _log_level, get_main_logger # noqa: ABS101
10 |
11 |
12 | __version__ = "0.0.1"
13 | _package_name = "workflow_glue"
14 |
15 | HELPERS = "wfg_helpers"
16 |
17 |
18 | def get_components(allowed_components=None):
19 | """Find a list of workflow command scripts."""
20 | logger = get_main_logger(_package_name)
21 |
22 | # gather all python files in the current directory and the wfg_helpers
23 | home_path = os.path.dirname(os.path.abspath(__file__))
24 | standard_lib = os.path.join(home_path, HELPERS)
25 | globs = itertools.chain.from_iterable((
26 | glob.glob(os.path.join(path, "*.py"))
27 | for path in (home_path, standard_lib)))
28 |
29 | components = dict()
30 | for fname in globs:
31 | name = os.path.splitext(os.path.basename(fname))[0]
32 | if name in ("__init__", "util"):
33 | continue
34 | if allowed_components is not None and name not in allowed_components:
35 | continue
36 |
37 | # leniently attempt to import module
38 | try:
39 | if HELPERS in fname:
40 | mod = importlib.import_module(f"{_package_name}.{HELPERS}.{name}")
41 | else:
42 | mod = importlib.import_module(f"{_package_name}.{name}")
43 | except ModuleNotFoundError as e:
44 | # if imports cannot be satisifed, refuse to add the component
45 | # rather than exploding
46 | logger.warn(f"Could not load {name} due to missing module {e.name}")
47 | continue
48 |
49 | # if theres a main() and and argparser() thats good enough for us.
50 | try:
51 | req = "main", "argparser"
52 | if all(callable(getattr(mod, x)) for x in req):
53 | components[name] = mod
54 | except Exception:
55 | pass
56 | return components
57 |
58 |
59 | def cli():
60 | """Run workflow entry points."""
61 | logger = get_main_logger(_package_name)
62 | logger.info("Bootstrapping CLI.")
63 | parser = argparse.ArgumentParser(
64 | 'wf-glue',
65 | parents=[_log_level()],
66 | formatter_class=argparse.ArgumentDefaultsHelpFormatter)
67 |
68 | parser.add_argument(
69 | '-v', '--version', action='version',
70 | version='%(prog)s {}'.format(__version__))
71 |
72 | subparsers = parser.add_subparsers(
73 | title='subcommands', description='valid commands',
74 | help='additional help', dest='command')
75 | subparsers.required = True
76 |
77 | # importing everything can take time, try to shortcut
78 | if len(sys.argv) > 1:
79 | components = get_components(allowed_components=[sys.argv[1]])
80 | if not sys.argv[1] in components:
81 | logger.warn("Importing all modules, this may take some time.")
82 | components = get_components()
83 | else:
84 | components = get_components()
85 |
86 | # add all module parsers to main CLI
87 | for name, module in components.items():
88 | p = subparsers.add_parser(
89 | name.split(".")[-1], parents=[module.argparser()])
90 | p.set_defaults(func=module.main)
91 |
92 | args = parser.parse_args()
93 |
94 | logger.info("Starting entrypoint.")
95 | args.func(args)
96 |
--------------------------------------------------------------------------------
/bin/workflow_glue/bokeh_plot.py:
--------------------------------------------------------------------------------
1 | """Script from plannotate edited to work with bokeh 3."""
2 | from math import pi
3 |
4 | from bokeh.models import ColumnDataSource, HoverTool, Range1d, WheelZoomTool
5 | from bokeh.models.annotations import Label
6 | from bokeh.plotting import figure
7 | import numpy as np
8 | import pandas as pd
9 | from plannotate.bokeh_plot import calc_glyphs, calc_level, calc_num_markers
10 | import plannotate.resources as rsc
11 |
12 |
13 | def get_bokeh(df, linear=False):
14 | """Get bokeh from plannotate updated to use Bokeh v3 to match ezcharts."""
15 | # df = df.fillna("")
16 | x = 0
17 | y = 0
18 | baseradius = .18
19 | tooltips = """
20 | @Feature — @Type @pi_permatch_int
21 | @Description
22 | """ # noqa: E501
23 | hover = HoverTool(tooltips=tooltips)
24 | plotsize = .35
25 | plotdimen = 800
26 | x_range = Range1d(-plotsize, plotsize, bounds=(-.5, .5), min_interval=.1)
27 | y_range = Range1d(-plotsize, plotsize, bounds=(-.5, .5), min_interval=.1)
28 | toolbar = "right"
29 | p = figure(
30 | height=plotdimen, width=plotdimen, title="",
31 | toolbar_location=toolbar, toolbar_sticky=False,
32 | match_aspect=True,
33 | sizing_mode='scale_width', tools=['save', 'pan'],
34 | x_range=x_range, y_range=y_range)
35 |
36 | # x_range=(-plotsize, plotsize), y_range=(-plotsize, plotsize))
37 | p.toolbar.logo = None
38 | p.add_tools(WheelZoomTool(zoom_on_axis=False))
39 | p.toolbar.active_scroll = p.select_one(WheelZoomTool)
40 |
41 | # backbone line
42 | p.circle(
43 | x=x, y=y, radius=baseradius, line_color="#000000",
44 | fill_color=None, line_width=2.5)
45 |
46 | df = calc_level(df)
47 |
48 | if linear:
49 | line_length = baseradius / 5
50 | p.line(
51 | [0, 0], [baseradius - line_length, baseradius + line_length],
52 | line_width=4, level="overlay", line_color="black")
53 |
54 | df['pi_permatch_int'] = df['pi_permatch'].astype('int')
55 |
56 | df['pi_permatch_int'] = df['pi_permatch_int'].astype(str) + "%"
57 | # removes percent from infernal hits
58 | df.loc[df['db'] == "Rfam", 'pi_permatch_int'] = ""
59 |
60 | df['rstart'] = ((df["qstart"]/df["qlen"])*2*pi)
61 | df['rend'] = ((df["qend"]/df["qlen"])*2*pi)
62 | df['rstart'] = np.where(
63 | df['rstart'] < 0, df['rstart'] + (2*pi), df['rstart'])
64 | df['rend'] = np.where(df['rend'] < 0, df['rend'] + (2*pi), df['rend'])
65 | df['rend'] = np.where(
66 | df['rend'] < df['rstart'], df['rend'] + (2*pi), df['rend'])
67 |
68 | df['Type'] = df['Type'].str.replace('rep_origin', 'origin of replication')
69 |
70 | # DDE0BD
71 | # C97064
72 | # C9E4CA
73 | fullcolordf = pd.read_csv(
74 | rsc.get_resource("data", "colors.csv"), index_col=0)
75 | fragcolordf = fullcolordf.copy()
76 | fragcolordf[['fill_color', 'line_color']] = fragcolordf[
77 | ['line_color', 'fill_color']]
78 | fragcolordf["fill_color"] = "#ffffff"
79 |
80 | full = df[df["fragment"] == False] # noqa
81 |
82 | full = full.merge(
83 | fullcolordf,
84 | how="left", on=["Type"])
85 | full['legend'] = full['Type']
86 | full = full.fillna(
87 | {"color": "grey",
88 | "fill_color": "#808080",
89 | "line_color": "#000000"})
90 | frag = df[df["fragment"]]
91 | frag = frag.merge(fragcolordf, how="left", on=["Type"])
92 | frag = frag.fillna(
93 | {"color": "grey",
94 | "fill_color": "#ffffff",
95 | "line_color": "#808080"})
96 |
97 | df = full.append(frag).reset_index(drop=True)
98 |
99 | # add orientation column
100 | orient = pd.read_csv(
101 | rsc.get_resource("data", "feature_orientation.csv"),
102 | header=None, names=["Type", "has_orientation"])
103 | orient['Type'] = orient['Type']
104 | orient['has_orientation'] = orient['has_orientation'].map({"T": True})
105 | df = df.merge(orient, on="Type", how="left")
106 | df['Type'] = df['Type'].str.replace("_", " ")
107 | df['has_orientation'] = df['has_orientation'].fillna(value=False)
108 | df[[
109 | 'x', 'y', "Lx1", "Ly1",
110 | "annoLineColor", "lineX",
111 | "lineY", "theta", "text_align"]] = df.apply(calc_glyphs, axis=1)
112 | df['legend'] = df['Type']
113 | # allowedtypes = ['CDS',"promoter","origin of replication","swissprot"]
114 | allowedtypes = fullcolordf['Type']
115 | mask = ~df['legend'].isin(allowedtypes)
116 | df.loc[mask, 'legend'] = 'misc feature'
117 |
118 | # plot annotations
119 | source = ColumnDataSource(df)
120 | hover_labels = p.patches(
121 | 'x', 'y', fill_color='fill_color', line_color='line_color',
122 | name="features", line_width=2.5,
123 | source=source, legend_group="legend")
124 | p.multi_line(
125 | xs="lineX", ys="lineY", line_color="annoLineColor",
126 | line_width=3, level="overlay",
127 | line_cap='round', alpha=.5, source=source)
128 |
129 | # `text_align` cannot read from `source` -- have to do this workaround
130 | right = ColumnDataSource(df[df['text_align'] == 'right'])
131 | left = ColumnDataSource(df[df['text_align'] == 'left'])
132 | bcenter = ColumnDataSource(df[df['text_align'] == 'b_center'])
133 | tcenter = ColumnDataSource(df[df['text_align'] == 't_center'])
134 |
135 | text_level = 'overlay'
136 | p.text(
137 | x="Lx1", y="Ly1", name="2", x_offset=3, y_offset=8,
138 | text_align="left",
139 | text='Feature', level=text_level, source=right)
140 | p.text(
141 | x="Lx1", y="Ly1", name="2", x_offset=-5, y_offset=8,
142 | text_align="right", text='Feature', level=text_level, source=left)
143 | p.text(
144 | x="Lx1", y="Ly1", name="2", x_offset=0, y_offset=15,
145 | text_align="center", text='Feature',
146 | level=text_level, source=bcenter)
147 | p.text(
148 | x="Lx1", y="Ly1", name="2", x_offset=0, y_offset=0,
149 | text_align="center",
150 | text='Feature', level=text_level, source=tcenter)
151 |
152 | # calculate chunk size(s) for drawing lines
153 | plaslen = df.iloc[0]['qlen']
154 | ticks = calc_num_markers(plaslen)
155 | ticks_cds = ColumnDataSource(ticks)
156 | p.multi_line(
157 | xs="lineX", ys="lineY", line_color="black", line_width=2,
158 | level="underlay", line_cap='round',
159 | alpha=.5, source=ticks_cds)
160 |
161 | right = ColumnDataSource(ticks[ticks['text_align'] == 'right'])
162 | left = ColumnDataSource(ticks[ticks['text_align'] == 'left'])
163 | bcenter = ColumnDataSource(ticks[ticks['text_align'] == 'b_center'])
164 | tcenter = ColumnDataSource(ticks[ticks['text_align'] == 't_center'])
165 | p.text(
166 | x="Lx1", y="Ly1", name="2", x_offset=3, y_offset=6,
167 | text_align="left", text='bp', alpha=.5,
168 | text_font_size='size', level=text_level, source=right)
169 | p.text(
170 | x="Lx1", y="Ly1", name="2", x_offset=-5,
171 | y_offset=6, text_align="right",
172 | text='bp', alpha=.5, text_font_size='size',
173 | level=text_level, source=left)
174 | p.text(
175 | x="Lx1", y="Ly1", name="2", x_offset=0, y_offset=15,
176 | text_align="center", text='bp', alpha=.5,
177 | text_font_size='size', level=text_level, source=bcenter)
178 | p.text(
179 | x="Lx1", y="Ly1", name="2", x_offset=0, y_offset=-3,
180 | text_align="center", text='bp', alpha=.5,
181 | text_font_size='size', level=text_level, source=tcenter)
182 | p.add_tools(hover)
183 | p.add_layout(Label(
184 | x=0, y=0, name="2", x_offset=0, y_offset=-8,
185 | text_align="center",
186 | text=f"{plaslen} bp", text_color="#7b7b7b",
187 | text_font_size='16px', level=text_level))
188 | p.hover.renderers = [hover_labels]
189 | p.axis.axis_label = None
190 | p.axis.visible = False
191 | p.grid.grid_line_color = "#EFEFEF"
192 | p.outline_line_color = "#DDDDDD"
193 | p.legend.location = 'bottom_left'
194 | p.legend.border_line_color = "#EFEFEF"
195 | p.legend.visible = True
196 | return p
197 |
--------------------------------------------------------------------------------
/bin/workflow_glue/find_inserts.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """Finding inserts."""
4 |
5 | import argparse
6 | import json
7 | import os
8 |
9 | import pandas as pd
10 | from pysam import FastaFile
11 | from spoa import poa
12 | from .util import wf_parser # noqa: ABS101
13 |
14 |
15 | def reverse_complement(seq):
16 | """Read a seq return reverse complement."""
17 | comp = {
18 | 'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C', 'X': 'X', 'N': 'N'}
19 | comp_trans = seq.maketrans(''.join(comp.keys()), ''.join(comp.values()))
20 | return seq.translate(comp_trans)[::-1]
21 |
22 |
23 | def read_seqkit(bed_files, assemblies_dir):
24 | """Read beds and return single data frame and dict of the insert seqs."""
25 | dfs = list()
26 | bed_dic = {}
27 | for fname in sorted(bed_files):
28 | df = pd.read_csv(
29 | fname, sep='\t',
30 | header=None, names=(
31 | [
32 | 'Sample', 'start',
33 | 'end', 'primer', 'score',
34 | 'strand', 'sequence']),
35 | dtype={'Sample': str})
36 | seq = str(df['sequence'][0])
37 | # need to find the sequence if seq spans origin
38 | if seq == 'nan':
39 | file_name = os.path.join(
40 | assemblies_dir, df['Sample'][0] + '.final.fasta')
41 | with open(file_name, "r") as fp:
42 | whole_seq = fp.readlines()[1][:-1:]
43 | final_seq = whole_seq[df['start'][0]::] + whole_seq[:df['end'][0]:]
44 | if df['strand'][0] == '-':
45 | final_seq = reverse_complement(final_seq)
46 | df['sequence'][0] = final_seq
47 | else:
48 | pass
49 | bed_dic[df['Sample'][0]] = df['sequence'][0]
50 | dfs.append(df)
51 | bed_df = pd.concat(dfs).drop(['score', 'sequence'], axis=1)
52 | bed_df.reset_index(drop=True, inplace=True)
53 | return bed_df, bed_dic
54 |
55 |
56 | def make_msa(inserts_dic, reference=None):
57 | """Make multiple sequence alignment."""
58 | allseq = []
59 | names = []
60 | # Align with reference if included
61 | if reference:
62 | refseq = FastaFile(reference)
63 | ref_seq = refseq.fetch(refseq.references[0])
64 | allseq.append(ref_seq)
65 | names.append('Reference')
66 | for k, v in inserts_dic.items():
67 | allseq.append(v)
68 | names.append(k)
69 | msa_report = []
70 | msa = poa(allseq)[1]
71 | # make sure names are all same length for MSA
72 | msa_names = []
73 | for name in names:
74 | new_name = name + (' '*(max(map(len, names))-len(name)))
75 | msa_names.append(new_name)
76 | for i in range(0, len(msa)):
77 | msa_report.append(msa_names[i] + ' ' + msa[i])
78 | return (msa_report)
79 |
80 |
81 | def main(args):
82 | """Entry point to create a wf-clone-validation report."""
83 | with open(args.output, 'w') as f:
84 | ref = "No reference"
85 | if args.reference:
86 | insert_ref = FastaFile(args.reference).references
87 | if len(insert_ref) > 1:
88 | raise ValueError(
89 | f"""Insert reference can only contain one fasta record;
90 | {len(insert_ref)} found.""")
91 | ref = insert_ref[0]
92 | if args.primer_beds:
93 | # find inserts and put in dir
94 | current_directory = os.getcwd()
95 | make_directory = os.path.join(current_directory, r'inserts')
96 | # only create inserts directory if there are primer_beds
97 | # as optional output in nextflow
98 | if not os.path.exists(make_directory):
99 | os.makedirs(make_directory)
100 | sk_df, sk_dict = read_seqkit(args.primer_beds, args.assemblies)
101 | for k, v in sk_dict.items():
102 | insert_fn = os.path.join(args.insert_dir, f'{str(k)}.insert.fasta')
103 | with open(insert_fn, "a") as fp:
104 | fp.write('>' + str(k) + '\n' + str(v) + '\n')
105 | # If assembly will be large, skip MSA creation
106 | if args.large_construct:
107 | inserts_json = {
108 | 'bed_df': sk_df.to_json(),
109 | 'bed_dic': sk_dict,
110 | 'reference': ref}
111 | else:
112 | # If reference is available, it will be included to perform the
113 | # Multiple sequence alignment.
114 | # Otherwise MSA will be done using the inserts available
115 | if args.reference:
116 | msa = make_msa(sk_dict, args.reference)
117 | else:
118 | msa = make_msa(sk_dict)
119 | inserts_json = {
120 | 'bed_df': sk_df.to_json(),
121 | 'bed_dic': sk_dict, 'msa': msa,
122 | 'reference': ref}
123 | json.dump(inserts_json, f)
124 |
125 |
126 | def argparser():
127 | """Argument parser for entrypoint."""
128 | parser = wf_parser("find_inserts")
129 | parser.add_argument(
130 | "--output",
131 | help="output json name",
132 | required=True
133 | )
134 | parser.add_argument(
135 | "--primer_beds", nargs='+',
136 | help="bed files of extracted sequences",
137 | required=False)
138 | parser.add_argument(
139 | "--large_construct", default=False, action="store_true",
140 | help="large construct mode skip msa",
141 | required=False)
142 | parser.add_argument(
143 | "--reference",
144 | help="reference", required=False)
145 | parser.add_argument(
146 | "--insert_dir", default="inserts",
147 | help="output directory for insert fastas"
148 | )
149 | parser.add_argument(
150 | "--assemblies", default="assemblies",
151 | help="Full assemblies directory for finding insert"
152 | )
153 | return parser
154 |
155 |
156 | if __name__ == "__main__":
157 | args = argparse().parse_args()
158 | main(args)
159 |
--------------------------------------------------------------------------------
/bin/workflow_glue/models/__init__.py:
--------------------------------------------------------------------------------
1 | """A collection of scripts for results models."""
2 |
--------------------------------------------------------------------------------
/bin/workflow_glue/models/common.py:
--------------------------------------------------------------------------------
1 | """Common model classes used across all workflows."""
2 | from dataclasses import asdict, dataclass, field
3 | from decimal import Decimal
4 | from enum import Enum
5 | import json
6 | from typing import Any, Dict, List
7 |
8 |
9 | class SampleType(str, Enum):
10 | """The type of the sample."""
11 |
12 | no_template_control = "no_template_control"
13 | positive_control = "positive_control"
14 | negative_control = "negative_control"
15 | test_sample = "test_sample"
16 |
17 | def friendly_name(self):
18 | """Convert sample type to string."""
19 | return self.name.replace("_", " ").capitalize()
20 |
21 |
22 | @dataclass
23 | class SampleIdentifier:
24 | """Additional identifiers for a sample."""
25 |
26 | name: str = field(
27 | metadata={
28 | "title": "Identifier name",
29 | "Description": "The name of the sample identifier"})
30 | value: str = field(
31 | metadata={
32 | "title": "Identifier value",
33 | "Description": "The value of the sample identifier"})
34 |
35 |
36 | @dataclass
37 | class CheckResult:
38 | """
39 | A result of some check the workflow has performed.
40 |
41 | This can be at sample or workflow level.
42 | """
43 |
44 | check_category: str = field(
45 | metadata={
46 | "title": "Check category",
47 | "description": "The category of the check"})
48 | check_name: str = field(
49 | metadata={
50 | "title": "Check name",
51 | "description": "The name of the check"})
52 | check_pass: bool = field(
53 | metadata={
54 | "title": "Check pass",
55 | "description": "If true the check has passed"})
56 | check_threshold: str | None = field(
57 | default=None, metadata={
58 | "title": "Check threshold",
59 | "description": "The threshold for the check, useful for reporting later"})
60 |
61 | categories = {}
62 |
63 | def friendly_check_category(self):
64 | """Convert category to string."""
65 | if self.check_category not in self.categories:
66 | raise ValueError(f"{self.check_category} has no friendly name")
67 | return self.categories[self.check_category]
68 |
69 | def friendly_check_name(self):
70 | """Convert check name to string."""
71 | return self.check_name.replace("_", " ").capitalize()
72 |
73 |
74 | @dataclass
75 | class ResultsContents:
76 | """Placeholder class for results contents."""
77 |
78 | pass
79 |
80 |
81 | @dataclass
82 | class Sample:
83 | """A sample sheet entry and its corresponding checks and related results."""
84 |
85 | alias: str = field(
86 | metadata={
87 | "title": "Sample alias",
88 | "description": "The alias for the sample given by the user"})
89 | barcode: str = field(
90 | metadata={
91 | "title": "Sample barcode",
92 | "description": "The physical barcode assigned to the sample"})
93 | sample_type: SampleType = field(
94 | metadata={
95 | "title": "Sample type",
96 | "description": "The type of the sample"})
97 | sample_pass: bool = field(
98 | metadata={
99 | "title": "Sample pass",
100 | "description": "If true the sample has passed workflow checks"})
101 | additional_identifiers: List[SampleIdentifier] = field(
102 | default_factory=list, metadata={
103 | "title": "Additional sample identifiers",
104 | "description": "Addition identifiers for the sample"})
105 | sample_checks: list[CheckResult] = field(
106 | default_factory=list, metadata={
107 | "title": "Sample checks",
108 | "description": "An array of checks performed on the sample"})
109 | results: ResultsContents | None = field(
110 | default=None, metadata={
111 | "title": "Sample results",
112 | "description": "Further specific workflow results for this sample"})
113 | config: Dict[str, Any] | None = field(
114 | default=None, metadata={
115 | "title": "Sample configuration",
116 | "description": """Sample specific config parameters
117 | used for running analysis"""})
118 |
119 | def __post_init__(self):
120 | """Determine overall status for a sample given the individual check results."""
121 | self.sample_pass = all(
122 | check.check_pass for check in self.sample_checks)
123 |
124 | def get_sample_identifier(self, sample_identifier):
125 | """Get a sample identifier given the identifier name."""
126 | for indentifier in self.additional_identifiers:
127 | if indentifier.name == sample_identifier:
128 | return indentifier.value
129 | raise KeyError("Sample identifier not found")
130 |
131 | def set_sample_identifier(self, name, value):
132 | """Set a sample identifier."""
133 | sample_identifier = SampleIdentifier(
134 | name=name,
135 | value=value)
136 | self.additional_identifiers.append(sample_identifier)
137 | return self.additional_identifiers
138 |
139 | def to_json(self, filename):
140 | """Save class as JSON."""
141 | with open(filename, 'w') as f:
142 | json.dump(asdict(self), f, default=str, indent=2, cls=DecimalEncoder)
143 |
144 |
145 | @dataclass
146 | class RunStats:
147 | """Basic run statistics for the entire run."""
148 |
149 | total_reads: int | None = field(
150 | default=None, metadata={
151 | "title": "Total reads",
152 | "description": "Total number of reads on run"})
153 | total_ambiguous_reads: int | None = field(
154 | default=None, metadata={
155 | "title": "Total ambiguous reads",
156 | "description": "Number of reads of unknown provenance"})
157 | total_unaligned_reads: int | None = field(
158 | default=None, metadata={
159 | "title": "Total unaligned reads",
160 | "description": "Number of unaligned reads"})
161 |
162 |
163 | @dataclass
164 | class WorkflowResult():
165 | """
166 | Definition for results that will be returned by this workflow.
167 |
168 | This structure will be passed through by Gizmo speaking clients
169 | as WorkflowInstance.results.
170 | """
171 |
172 | samples: list[Sample] = field(
173 | metadata={
174 | "title": "Samples",
175 | "description": "Samples in this workflow instance"})
176 | workflow_pass: bool | None = field(
177 | default=None, metadata={
178 | "title": "Workflow pass",
179 | "description": "True if this workflow instance passes all checks"})
180 | workflow_checks: list[CheckResult] = field(
181 | default_factory=list, metadata={
182 | "title": "Workflow checks",
183 | "description": "An array of checks performed on the workflow instance"})
184 | run_stats: RunStats | None = field(
185 | default=None, metadata={
186 | "title": "Samples",
187 | "description": "Basic run statistics"})
188 | client_fields: dict[str, Any] | None = field(
189 | default_factory=dict, metadata={
190 | "title": "Client fields",
191 | "description": "Arbitrary key-value pairs provided by the client"})
192 |
193 | def to_json(self, filename):
194 | """Save class as JSON."""
195 | with open(filename, 'w') as f:
196 | json.dump(asdict(self), f, default=str, indent=2, cls=DecimalEncoder)
197 |
198 |
199 | class DecimalEncoder(json.JSONEncoder):
200 | """This should probably be moved."""
201 |
202 | def default(self, obj):
203 | """Override the default method to handle Decimal objects."""
204 | if isinstance(obj, Decimal):
205 | return float(obj)
206 | return super().default(obj)
207 |
--------------------------------------------------------------------------------
/bin/workflow_glue/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """__init__.py for the tests."""
2 |
--------------------------------------------------------------------------------
/bin/workflow_glue/tests/conftest.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Pytests argument definitions."""
3 |
4 |
5 | def pytest_addoption(parser):
6 | """Define command line arguments for pytest."""
7 | parser.addoption(
8 | "--test_data",
9 | action="store",
10 | default="/host/test_data"
11 | )
12 |
--------------------------------------------------------------------------------
/bin/workflow_glue/tests/test_deconcatenate.py:
--------------------------------------------------------------------------------
1 | """Test deconcatenate.py."""
2 |
3 | import deconcatenate
4 | import mappy as mp
5 | import pytest
6 |
7 |
8 | @pytest.mark.parametrize("test_data, approx_size, expected", [
9 | # is approx size so don't remove repeat
10 | ("test_data/workflow_glue/deconcatenate/barcode01.fasta",
11 | 4690, "test_data/workflow_glue/deconcatenate/barcode01_expected.fasta"),
12 | # single repeat
13 | ("test_data/workflow_glue/deconcatenate/barcode02.fasta",
14 | 2345, "test_data/workflow_glue/deconcatenate/barcode02_expected.fasta"),
15 | ("test_data/workflow_glue/deconcatenate/barcode03.fasta",
16 | 2345, "test_data/workflow_glue/deconcatenate/barcode03_expected.fasta"),
17 | # multiple repeats, remove one
18 | ("test_data/workflow_glue/deconcatenate/barcode04.fasta",
19 | 4690, "test_data/workflow_glue/deconcatenate/barcode04_expected.fasta"),
20 | # no self alignment
21 | ("test_data/workflow_glue/deconcatenate/barcode05.fasta",
22 | 1000, "test_data/workflow_glue/deconcatenate/barcode05_expected.fasta")
23 | ])
24 | def test_deconcatenate(test_data, approx_size, expected):
25 | """Test deconcatenate."""
26 | test_seq_list = mp.fastx_read(test_data)
27 | expected_seq_list = mp.fastx_read(expected)
28 | for test_seq, exp_seq in zip(test_seq_list, expected_seq_list):
29 | corrected = deconcatenate.deconcatenate(test_seq[1], approx_size)
30 | assert corrected == exp_seq[1]
31 |
--------------------------------------------------------------------------------
/bin/workflow_glue/tests/test_find_inserts.py:
--------------------------------------------------------------------------------
1 | """Test find_inserts.py."""
2 | import os
3 |
4 | import pandas as pd
5 | import pysam
6 | import pytest
7 | import workflow_glue.find_inserts as find_inserts
8 |
9 | # Use FASTA files as expected inserts.
10 | # The same sequences used for gitlab CI test.
11 | EXPECTED_INSERTS = [
12 | "barcode01.insert.fasta",
13 | "barcode02.insert.fasta",
14 | "barcode03.insert.fasta",
15 | "barcode04.insert.fasta"]
16 |
17 |
18 | def get_fasta_seq_dic(file_path, file_list):
19 | """Get list of fasta sequences as dictionary."""
20 | fasta_dic = {}
21 | for file_n in file_list:
22 | file_name = f"{file_path}/{file_n}"
23 | with pysam.FastaFile(file_name) as f:
24 | barcode = f.references[0]
25 | seq = f.fetch(barcode)
26 | fasta_dic[barcode] = seq
27 | return fasta_dic
28 |
29 |
30 | @pytest.fixture
31 | def test_data(request):
32 | """Define data location fixture."""
33 | return os.path.join(
34 | request.config.getoption("--test_data"),
35 | "workflow_glue",
36 | "find_inserts")
37 |
38 |
39 | @pytest.mark.parametrize(
40 | "test_data_fixture, bed_files, assemblies_dir, \
41 | expected_inserts, expected_df_file",
42 | [(
43 | "test_data", "insert_beds",
44 | "assemblies", EXPECTED_INSERTS, "expected_df.csv")]
45 | )
46 | def test_read_seqkit(
47 | test_data_fixture, bed_files, assemblies_dir,
48 | expected_inserts, expected_df_file, request):
49 | """Test insert sequence generation from seqkit amplicon output."""
50 | # Prepare expected insert dictionary and data frame
51 | test_data_dir = request.getfixturevalue(test_data_fixture)
52 | bed_files = f"{test_data_dir}/{bed_files}"
53 | expected_fp = f"{test_data_dir}/expected_insert"
54 | expected_seqs_dic = get_fasta_seq_dic(expected_fp, expected_inserts)
55 | exp_df_file = f"{test_data_dir}/{expected_df_file}"
56 | expected_df = pd.read_csv(exp_df_file).sort_values('Sample')
57 |
58 | # Get actual insert dictionary and data frame
59 | assemblies_dir = f"{test_data_dir}/{assemblies_dir}"
60 | sk_amplicon_beds = [
61 | os.path.join(bed_files, file) for file in os.listdir(bed_files)]
62 | actual_df, actual_seqs_dic = find_inserts.read_seqkit(
63 | sk_amplicon_beds, assemblies_dir=assemblies_dir)
64 |
65 | # Compare
66 | pd.testing.assert_frame_equal(
67 | expected_df,
68 | actual_df.sort_values('Sample').reset_index(drop=True),
69 | check_index_type=False)
70 | for k, v in expected_seqs_dic.items():
71 | assert actual_seqs_dic[k] == v
72 |
73 |
74 | @pytest.mark.parametrize(
75 | "seq, expected",
76 | [
77 | ("GGGATATAGCCCCGCATAT", "ATATGCGGGGCTATATCCC"),
78 | ("TATCCCGCCCCCXCAGCTTGCCAGNTCTTT",
79 | "AAAGANCTGGCAAGCTGXGGGGGCGGGATA")
80 | ]
81 | )
82 | def test_reverse_complement(seq, expected):
83 | """Test reverse complement."""
84 | actual = find_inserts.reverse_complement(seq)
85 | assert actual == expected
86 |
--------------------------------------------------------------------------------
/bin/workflow_glue/tests/test_report.py:
--------------------------------------------------------------------------------
1 | """report tests."""
2 | import os
3 |
4 | import pandas as pd
5 | import pytest
6 | from workflow_glue.report_utils.report_utils import get_cutsite_table
7 |
8 |
9 | @pytest.fixture
10 | def test_data(request):
11 | """Define data location fixture."""
12 | return os.path.join(
13 | request.config.getoption("--test_data"),
14 | "workflow_glue",
15 | "report")
16 |
17 |
18 | @pytest.mark.parametrize(
19 | "cutsite_csv,samples,expected_values",
20 | [
21 | ("cut_sites.csv",
22 | ["sample01", "sample02", "sample03", "sample04"],
23 | [13.61, 28.86, 75.0, "N/A"])
24 | ]
25 | )
26 | def test_get_cutsite_table(test_data, cutsite_csv, samples, expected_values):
27 | """Test get cutsite table."""
28 | expected_df = pd.DataFrame(
29 | {"Sample": samples, "Linearisation efficiency (%)": expected_values}
30 | )
31 | cutsite_file = f"{test_data}/{cutsite_csv}"
32 | actual_df = get_cutsite_table(cutsite_file, samples)
33 | pd.testing.assert_frame_equal(
34 | actual_df.reset_index(drop=True), expected_df.reset_index(drop=True))
35 |
--------------------------------------------------------------------------------
/bin/workflow_glue/tests/test_run_plannotate.py:
--------------------------------------------------------------------------------
1 | """Test run_plannotate.py."""
2 | from datetime import date, datetime
3 | import filecmp
4 | import os
5 |
6 | import pytest
7 | import workflow_glue.run_plannotate as run_plannotate
8 |
9 |
10 | EXPECTED = [
11 | ("barcode01.fasta", "barcode01.annotations.gbk")
12 | ]
13 |
14 |
15 | @pytest.fixture
16 | def test_data(request):
17 | """Define data location fixture."""
18 | return os.path.join(
19 | request.config.getoption("--test_data"),
20 | "workflow_glue",
21 | "run_plannotate")
22 |
23 |
24 | @pytest.mark.parametrize("assembly_file,gbk_file", EXPECTED)
25 | def test_per_assembly(test_data, assembly_file, gbk_file, tmpdir):
26 | """Test per assembly function in run_plannotate outputs correct gbk."""
27 | retval = os.getcwd()
28 | os.chdir(tmpdir)
29 |
30 | # add date to template
31 | today = date.today()
32 | x = datetime.strptime(str(today), '%Y-%m-%d')
33 | fmt_date = x.strftime('%d-%b-%Y').upper()
34 | input_gbk = f"{test_data}/{gbk_file}"
35 | with open(input_gbk, "r") as inputfile:
36 | expected_gbk = f"{inputfile.read()}".format(date=fmt_date)
37 | with open("gbk_with_date.gbk", "w") as outfile:
38 | outfile.write(expected_gbk)
39 |
40 | # run plannotate
41 | run_plannotate.make_yaml('Default')
42 | assembly = f"{test_data}/{assembly_file}"
43 | run_plannotate.per_assembly(assembly, "barcode01")
44 | gbk = "barcode01.annotations.gbk"
45 | gbk_bool = filecmp.cmp(gbk, "gbk_with_date.gbk")
46 | os.chdir(retval)
47 | assert gbk_bool is True
48 |
--------------------------------------------------------------------------------
/bin/workflow_glue/trim.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Go deconcatenate your sequences."""
3 |
4 | import argparse
5 | import sys
6 |
7 | import pysam
8 | from .util import wf_parser # noqa: ABS101
9 |
10 |
11 | def get_output_handler(path):
12 | """Open file path or stdout."""
13 | if path != '-':
14 | fh = open(path, 'w')
15 | else:
16 | fh = sys.stdout
17 | return fh
18 |
19 |
20 | def trim(entry):
21 | """Trim following fastq comment suggestion."""
22 | split = {}
23 | for i in entry.comment.split(' '):
24 | subsplit = i.split('=')
25 | split[subsplit[0]] = subsplit[1]
26 |
27 | trim = [int(i) for i in split['trim'].split('-')]
28 |
29 | if len(trim) > 1:
30 | trimmed = entry.sequence[trim[0]:trim[1]]
31 | else:
32 | trimmed = entry.sequence[trim[0]]
33 |
34 | return trimmed
35 |
36 |
37 | def main(args):
38 | """For each sequence, trim and write to output."""
39 | trimmed = []
40 |
41 | for entry in pysam.FastxFile(args.sequence):
42 |
43 | if 'trim=' not in entry.comment:
44 | continue
45 |
46 | trimmed.append([entry.name, trim(entry)])
47 |
48 | if not trimmed:
49 | return
50 |
51 | handler = get_output_handler(args.output)
52 | for name, seq in trimmed:
53 | handler.write(f">{name}\n{seq}\n")
54 |
55 | handler.close()
56 |
57 |
58 | def argparser():
59 | """Argument parser for entrypoint."""
60 | parser = wf_parser("trim")
61 | parser.add_argument(
62 | dest="sequence",
63 | help="File in .FASTA format containing a single sequence/contig."
64 | )
65 |
66 | parser.add_argument(
67 | "-o",
68 | "--output",
69 | dest="output",
70 | default="-",
71 | help="Path at which to write the fixedsequence/contig.",
72 | required=False
73 | )
74 | return parser
75 |
76 |
77 | if __name__ == "__main__":
78 | args = argparse().parse_args()
79 | main(args)
80 |
--------------------------------------------------------------------------------
/bin/workflow_glue/util.py:
--------------------------------------------------------------------------------
1 | """The odd helper function.
2 |
3 | Be careful what you place in here. This file is imported into all glue.
4 | """
5 | import argparse
6 | import logging
7 |
8 |
9 | _log_name = None
10 |
11 |
12 | def get_main_logger(name):
13 | """Create the top-level logger."""
14 | global _log_name
15 | _log_name = name
16 | logging.basicConfig(
17 | format='[%(asctime)s - %(name)s] %(message)s',
18 | datefmt='%H:%M:%S', level=logging.INFO)
19 | return logging.getLogger(name)
20 |
21 |
22 | def get_named_logger(name):
23 | """Create a logger with a name.
24 |
25 | :param name: name of logger.
26 | """
27 | name = name.ljust(10)[:10] # so logging is aligned
28 | logger = logging.getLogger('{}.{}'.format(_log_name, name))
29 | return logger
30 |
31 |
32 | def wf_parser(name):
33 | """Make an argument parser for a workflow command."""
34 | return argparse.ArgumentParser(
35 | name,
36 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
37 | add_help=False)
38 |
39 |
40 | def _log_level():
41 | """Parser to set logging level and acquire software version/commit."""
42 | parser = argparse.ArgumentParser(
43 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, add_help=False)
44 |
45 | modify_log_level = parser.add_mutually_exclusive_group()
46 | modify_log_level.add_argument(
47 | '--debug', action='store_const',
48 | dest='log_level', const=logging.DEBUG, default=logging.INFO,
49 | help='Verbose logging of debug information.')
50 | modify_log_level.add_argument(
51 | '--quiet', action='store_const',
52 | dest='log_level', const=logging.WARNING, default=logging.INFO,
53 | help='Minimal logging; warnings only.')
54 |
55 | return parser
56 |
--------------------------------------------------------------------------------
/bin/workflow_glue/wfg_helpers/__init__.py:
--------------------------------------------------------------------------------
1 | """A collection of helper scripts common to workflows."""
2 |
--------------------------------------------------------------------------------
/bin/workflow_glue/wfg_helpers/check_bam_headers_in_dir.py:
--------------------------------------------------------------------------------
1 | """Check (u)BAM files for `@SQ` lines whether they are the same in all headers."""
2 |
3 | from pathlib import Path
4 | import sys
5 |
6 | import pysam
7 |
8 | from ..util import get_named_logger, wf_parser # noqa: ABS101
9 |
10 |
11 | def main(args):
12 | """Run the entry point."""
13 | logger = get_named_logger("checkBamHdr")
14 |
15 | if not args.input_path.is_dir():
16 | raise ValueError(f"Input path '{args.input_path}' must be a directory.")
17 |
18 | target_files = list(args.input_path.glob("*"))
19 | if not target_files:
20 | raise ValueError(f"No files found in input directory '{args.input_path}'.")
21 | # Loop over target files and check if there are `@SQ` lines in all headers or not.
22 | # Set `is_unaligned` accordingly. If there are mixed headers (either with some files
23 | # containing `@SQ` lines and some not or with different files containing different
24 | # `@SQ` lines), set `mixed_headers` to `True`.
25 | # Also check if there is the SO line, to validate whether the file is (un)sorted.
26 | first_sq_lines = None
27 | mixed_headers = False
28 | sorted_xam = False
29 | for xam_file in target_files:
30 | # get the `@SQ` and `@HD` lines in the header
31 | with pysam.AlignmentFile(xam_file, check_sq=False) as f:
32 | # compare only the SN/LN/M5 elements of SQ to avoid labelling XAM with
33 | # same reference but different SQ.UR as mixed_header (see CW-4842)
34 | sq_lines = [{
35 | "SN": sq["SN"],
36 | "LN": sq["LN"],
37 | "M5": sq.get("M5"),
38 | } for sq in f.header.get("SQ", [])]
39 | hd_lines = f.header.get("HD")
40 | # Check if it is sorted.
41 | # When there is more than one BAM, merging/sorting
42 | # will happen regardless of this flag.
43 | if hd_lines is not None and hd_lines.get('SO') == 'coordinate':
44 | sorted_xam = True
45 | if first_sq_lines is None:
46 | # this is the first file
47 | first_sq_lines = sq_lines
48 | else:
49 | # this is a subsequent file; check with the first `@SQ` lines
50 | if sq_lines != first_sq_lines:
51 | mixed_headers = True
52 | break
53 |
54 | # we set `is_unaligned` to `True` if there were no mixed headers and the last file
55 | # didn't have `@SQ` lines (as we can then be sure that none of the files did)
56 | is_unaligned = not mixed_headers and not sq_lines
57 | # write `is_unaligned` and `mixed_headers` out so that they can be set as env.
58 | # variables
59 | sys.stdout.write(
60 | f"IS_UNALIGNED={int(is_unaligned)};" +
61 | f"MIXED_HEADERS={int(mixed_headers)};" +
62 | f"IS_SORTED={int(sorted_xam)}"
63 | )
64 | logger.info(f"Checked (u)BAM headers in '{args.input_path}'.")
65 |
66 |
67 | def argparser():
68 | """Argument parser for entrypoint."""
69 | parser = wf_parser("check_bam_headers_in_dir")
70 | parser.add_argument("input_path", type=Path, help="Path to target directory")
71 | return parser
72 |
--------------------------------------------------------------------------------
/bin/workflow_glue/wfg_helpers/check_sample_sheet.py:
--------------------------------------------------------------------------------
1 | """Check if a sample sheet is valid."""
2 | import codecs
3 | import csv
4 | import os
5 | import re
6 | import sys
7 |
8 | from ..util import get_named_logger, wf_parser # noqa: ABS101
9 |
10 |
11 | # Some Excel users save their CSV as UTF-8 (and occasionally for a reason beyond my
12 | # comprehension, UTF-16); Excel then adds a byte order mark (unnecessarily for UTF-8
13 | # I should add). If we do not handle this with the correct encoding, the mark will
14 | # appear in the parsed data, causing the header to be malformed.
15 | # See CW-2310
16 | def determine_codec(f):
17 | """Peek at a file and return an appropriate reading codec."""
18 | with open(f, 'rb') as f_bytes:
19 | # Could use chardet here if we need to expand codec support
20 | initial_bytes = f_bytes.read(8)
21 |
22 | for codec, encoding_name in [
23 | [codecs.BOM_UTF8, "utf-8-sig"], # use the -sig codec to drop the mark
24 | [codecs.BOM_UTF16_BE, "utf-16"], # don't specify LE or BE to drop mark
25 | [codecs.BOM_UTF16_LE, "utf-16"],
26 | [codecs.BOM_UTF32_BE, "utf-32"], # handle 32 for completeness
27 | [codecs.BOM_UTF32_LE, "utf-32"], # again skip LE or BE to drop mark
28 | ]:
29 | if initial_bytes.startswith(codec):
30 | return encoding_name
31 | return None # will cause file to be opened with default encoding
32 |
33 |
34 | def main(args):
35 | """Run the entry point."""
36 | logger = get_named_logger("checkSheet")
37 |
38 | barcodes = []
39 | aliases = []
40 | sample_types = []
41 | analysis_groups = []
42 | allowed_sample_types = [
43 | "test_sample", "positive_control", "negative_control", "no_template_control"
44 | ]
45 |
46 | if not os.path.exists(args.sample_sheet) or not os.path.isfile(args.sample_sheet):
47 | sys.stdout.write("Could not open sample sheet file.")
48 | sys.exit()
49 |
50 | try:
51 | encoding = determine_codec(args.sample_sheet)
52 | with open(args.sample_sheet, "r", encoding=encoding) as f:
53 | try:
54 | # Excel files don't throw any error until here
55 | csv.Sniffer().sniff(f.readline())
56 | f.seek(0) # return to initial position again
57 | except Exception as e:
58 | # Excel fails with UniCode error
59 | sys.stdout.write(
60 | "The sample sheet doesn't seem to be a CSV file.\n"
61 | "The sample sheet has to be a CSV file.\n"
62 | "Please verify that the sample sheet is a CSV file.\n"
63 | f"Parsing error: {e}"
64 | )
65 |
66 | sys.exit()
67 |
68 | csv_reader = csv.DictReader(f)
69 | n_row = 0
70 | for row in csv_reader:
71 | n_row += 1
72 | if n_row == 1:
73 | n_cols = len(row)
74 | else:
75 | # check we got the same number of fields
76 | if len(row) != n_cols:
77 | sys.stdout.write(
78 | f"Unexpected number of cells in row number {n_row}"
79 | )
80 | sys.exit()
81 | try:
82 | barcodes.append(row["barcode"])
83 | except KeyError:
84 | sys.stdout.write("'barcode' column missing")
85 | sys.exit()
86 | try:
87 | aliases.append(row["alias"])
88 | except KeyError:
89 | sys.stdout.write("'alias' column missing")
90 | sys.exit()
91 | try:
92 | sample_types.append(row["type"])
93 | except KeyError:
94 | pass
95 | try:
96 | analysis_groups.append(row["analysis_group"])
97 | except KeyError:
98 | pass
99 | except Exception as e:
100 | sys.stdout.write(f"Parsing error: {e}")
101 | sys.exit()
102 |
103 | # check barcodes are correct format
104 | for barcode in barcodes:
105 | if not re.match(r'^barcode\d\d+$', barcode):
106 | sys.stdout.write("values in 'barcode' column are incorrect format")
107 | sys.exit()
108 |
109 | # check aliases are correct format
110 | # for now we have decided they may not start with "barcode"
111 | for alias in aliases:
112 | if alias.startswith("barcode"):
113 | sys.stdout.write("values in 'alias' column must not begin with 'barcode'")
114 | sys.exit()
115 |
116 | # check barcodes are all the same length
117 | first_length = len(barcodes[0])
118 | for barcode in barcodes[1:]:
119 | if len(barcode) != first_length:
120 | sys.stdout.write("values in 'barcode' column are different lengths")
121 | sys.exit()
122 |
123 | # check barcode and alias values are unique
124 | if len(barcodes) > len(set(barcodes)):
125 | sys.stdout.write("values in 'barcode' column not unique")
126 | sys.exit()
127 | if len(aliases) > len(set(aliases)):
128 | sys.stdout.write("values in 'alias' column not unique")
129 | sys.exit()
130 |
131 | if sample_types:
132 | # check if "type" column has unexpected values
133 | unexp_type_vals = set(sample_types) - set(allowed_sample_types)
134 |
135 | if unexp_type_vals:
136 | sys.stdout.write(
137 | f"found unexpected values in 'type' column: {unexp_type_vals}. "
138 | f"Allowed values are: {allowed_sample_types}"
139 | )
140 | sys.exit()
141 |
142 | if args.required_sample_types:
143 | for required_type in args.required_sample_types:
144 | if required_type not in allowed_sample_types:
145 | sys.stdout.write(f"Not an allowed sample type: {required_type}")
146 | sys.exit()
147 | if sample_types.count(required_type) < 1:
148 | sys.stdout.write(
149 | f"Sample sheet requires at least 1 of {required_type}")
150 | sys.exit()
151 | if analysis_groups:
152 | # if there was a "analysis_group" column, make sure it had values for all
153 | # samples
154 | if not all(analysis_groups):
155 | sys.stdout.write(
156 | "if an 'analysis_group' column exists, it needs values in each row"
157 | )
158 | sys.exit()
159 |
160 | logger.info(f"Checked sample sheet {args.sample_sheet}.")
161 |
162 |
163 | def argparser():
164 | """Argument parser for entrypoint."""
165 | parser = wf_parser("check_sample_sheet")
166 | parser.add_argument("sample_sheet", help="Sample sheet to check")
167 | parser.add_argument(
168 | "--required_sample_types",
169 | help="List of required sample types. Each sample type provided must "
170 | "appear at least once in the sample sheet",
171 | nargs="*"
172 | )
173 | return parser
174 |
--------------------------------------------------------------------------------
/bin/workflow_glue/wfg_helpers/check_xam_index.py:
--------------------------------------------------------------------------------
1 | """Validate a single (u)BAM file index."""
2 |
3 | from pathlib import Path
4 | import sys
5 |
6 | import pysam
7 |
8 | from ..util import get_named_logger, wf_parser # noqa: ABS101
9 |
10 |
11 | def validate_xam_index(xam_file):
12 | """Use fetch to validate the index.
13 |
14 | Invalid indexes will fail the call with a ValueError:
15 | ValueError: fetch called on bamfile without index
16 | """
17 | with pysam.AlignmentFile(xam_file, check_sq=False) as alignments:
18 | try:
19 | alignments.fetch()
20 | has_valid_index = True
21 | except ValueError:
22 | has_valid_index = False
23 | return has_valid_index
24 |
25 |
26 | def main(args):
27 | """Run the entry point."""
28 | logger = get_named_logger("checkBamIdx")
29 |
30 | # Check if a XAM has a valid index
31 | has_valid_index = validate_xam_index(args.input_xam)
32 | # write `has_valid_index` out so that they can be set as env.
33 | sys.stdout.write(
34 | f"HAS_VALID_INDEX={int(has_valid_index)}"
35 | )
36 | logger.info(f"Checked (u)BAM index for: '{args.input_xam}'.")
37 |
38 |
39 | def argparser():
40 | """Argument parser for entrypoint."""
41 | parser = wf_parser("check_xam_index")
42 | parser.add_argument("input_xam", type=Path, help="Path to target XAM")
43 | return parser
44 |
--------------------------------------------------------------------------------
/bin/workflow_glue/wfg_helpers/get_max_depth_locus.py:
--------------------------------------------------------------------------------
1 | """Find max depth window in a `mosdepth` regions BED file and write as locus string."""
2 |
3 | from pathlib import Path
4 | import sys
5 |
6 | import pandas as pd
7 |
8 | from ..util import get_named_logger, wf_parser # noqa: ABS101
9 |
10 |
11 | def main(args):
12 | """Run the entry point."""
13 | logger = get_named_logger("getMaxDepth")
14 |
15 | # read the regions BED file
16 | df = pd.read_csv(
17 | args.depths_bed, sep="\t", header=None, names=["ref", "start", "end", "depth"]
18 | )
19 |
20 | # get the window with the largest depth
21 | ref, start, end, depth = df.loc[df["depth"].idxmax()]
22 |
23 | # get the length of the reference of that window
24 | ref_length = df.query("ref == @ref")["end"].iloc[-1]
25 |
26 | # show the whole reference in case it's shorter than the desired locus size
27 | if ref_length < args.locus_size:
28 | start = 1
29 | end = ref_length
30 | else:
31 | # otherwise, show a region of the desired size around the window
32 | half_size = args.locus_size // 2
33 | mid = (start + end) // 2
34 | start = mid - half_size
35 | end = mid + half_size
36 | # check if the region starts below `1` or ends beyond the end of the reference
37 | if start < 1:
38 | start = 1
39 | end = args.locus_size
40 | if end > ref_length:
41 | start = ref_length - args.locus_size
42 | end = ref_length
43 |
44 | # write depth and locus string
45 | sys.stdout.write(f"{depth}\t{ref}:{start}-{end}")
46 |
47 | logger.info("Wrote locus with maximum depth to STDOUT.")
48 |
49 |
50 | def argparser():
51 | """Argument parser for entrypoint."""
52 | parser = wf_parser("get_max_depth_locus")
53 | parser.add_argument(
54 | "depths_bed",
55 | type=Path,
56 | help="path to mosdepth regions depth file (can be compressed)",
57 | )
58 | parser.add_argument(
59 | "locus_size", type=int, help="size of the locus in basepairs (e.g. '2000')"
60 | )
61 | return parser
62 |
--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epi2me-labs/wf-clone-validation/e0e56b944dcf3efd54cde2bdc8dced104fc6c0bc/data/.gitkeep
--------------------------------------------------------------------------------
/data/OPTIONAL_FILE:
--------------------------------------------------------------------------------
1 | # Nothing to see here. A sentinel file to replace real data.
2 | # e.g.:
3 | #
4 | # input:
5 | # file some_data
6 | # file extra_data
7 | # script:
8 | # def extra = extra_data.name != 'OPTIONAL_FILE' ? "--extra-data $opt" : ''
9 | # """
10 | # command ${some_data} ${extra}
11 | # """
12 |
--------------------------------------------------------------------------------
/data/primers.tsv:
--------------------------------------------------------------------------------
1 | pRham GACCACAACGGTTTCCCTCTAG TGGGTAACTTTGTATGTGTCCGCAGC
2 | T7 TAATACGACTCACTATAGGG GCTAGTTATTGCTCAGCGG
3 |
--------------------------------------------------------------------------------
/docs/01_brief_description.md:
--------------------------------------------------------------------------------
1 | De-novo reconstruction of plasmid sequences.
--------------------------------------------------------------------------------
/docs/02_introduction.md:
--------------------------------------------------------------------------------
1 | Among other uses this workflow could determine the success of a molecular cloning experiment and determine whether one DNA sequence has been correctly inserted into another as an experimentalist was expecting.
2 |
3 | In brief, this workflow will perform the following:
4 |
5 | + *De novo* assembly of plasmids.
6 | + Annotation of the full assembly.
7 | + Provide a per base quality score of the plasmid assembly.
8 | + Locate an insert sequence in a plasmid using provided primers.
9 | + Multiple sequence alignment between insert sequences from different samples.
10 | + Create an assembly dot plot showing repetitive regions in the created assemblies.
11 | + Comparison between an insert reference and the assembled insert.
--------------------------------------------------------------------------------
/docs/03_compute_requirements.md:
--------------------------------------------------------------------------------
1 | Recommended requirements:
2 |
3 | + CPUs = 4
4 | + Memory = 8GB
5 |
6 | Minimum requirements:
7 |
8 | + CPUs = 4
9 | + Memory = 8GB
10 |
11 | Approximate run time: 6 minutes per sample for 10,000 reads
12 |
13 | ARM processor support: True
14 |
--------------------------------------------------------------------------------
/docs/04_install_and_run.md:
--------------------------------------------------------------------------------
1 |
2 | These are instructions to install and run the workflow on command line.
3 | You can also access the workflow via the
4 | [EPI2ME Desktop application](https://labs.epi2me.io/downloads/).
5 |
6 | The workflow uses [Nextflow](https://www.nextflow.io/) to manage
7 | compute and software resources,
8 | therefore Nextflow will need to be
9 | installed before attempting to run the workflow.
10 |
11 | The workflow can currently be run using either
12 | [Docker](https://www.docker.com/products/docker-desktop)
13 | or [Singularity](https://docs.sylabs.io/guides/3.0/user-guide/index.html)
14 | to provide isolation of the required software.
15 | Both methods are automated out-of-the-box provided
16 | either Docker or Singularity is installed.
17 | This is controlled by the
18 | [`-profile`](https://www.nextflow.io/docs/latest/config.html#config-profiles)
19 | parameter as exemplified below.
20 |
21 | It is not required to clone or download the git repository
22 | in order to run the workflow.
23 | More information on running EPI2ME workflows can
24 | be found on our [website](https://labs.epi2me.io/wfindex).
25 |
26 | The following command can be used to obtain the workflow.
27 | This will pull the repository in to the assets folder of
28 | Nextflow and provide a list of all parameters
29 | available for the workflow as well as an example command:
30 |
31 | ```
32 | nextflow run epi2me-labs/wf-clone-validation --help
33 | ```
34 | To update a workflow to the latest version on the command line use
35 | the following command:
36 | ```
37 | nextflow pull epi2me-labs/wf-clone-validation
38 | ```
39 |
40 | A demo dataset is provided for testing of the workflow.
41 | It can be downloaded and unpacked using the following commands:
42 | ```
43 | wget https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-clone-validation/wf-clone-validation-demo.tar.gz
44 | tar -xzvf wf-clone-validation-demo.tar.gz
45 | ```
46 | The workflow can then be run with the downloaded demo data using:
47 | ```
48 | nextflow run epi2me-labs/wf-clone-validation \
49 | --fastq 'wf-clone-validation-demo/fastq' \
50 | --primers 'wf-clone-validation-demo/primers.tsv' \
51 | --sample_sheet 'wf-clone-validation-demo/sample_sheet.csv' \
52 | -profile standard
53 | ```
54 |
55 | For further information about running a workflow on
56 | the command line see https://labs.epi2me.io/wfquickstart/
57 |
--------------------------------------------------------------------------------
/docs/05_related_protocols.md:
--------------------------------------------------------------------------------
1 | This workflow is designed to take input sequences that have been produced from [Oxford Nanopore Technologies](https://nanoporetech.com/) devices using this protocol:
2 |
3 | https://community.nanoporetech.com/docs/prepare/library_prep_protocols/plasmid-sequencing-using-sqk-rbk004/
--------------------------------------------------------------------------------
/docs/06_input_example.md:
--------------------------------------------------------------------------------
1 |
2 | This workflow accepts either FASTQ or BAM files as input.
3 |
4 | The FASTQ or BAM input parameters for this workflow accept one of three cases: (i) the path to a single FASTQ or BAM file; (ii) the path to a top-level directory containing FASTQ or BAM files; (iii) the path to a directory containing one level of sub-directories which in turn contain FASTQ or BAM files. In the first and second cases (i and ii), a sample name can be supplied with `--sample`. In the last case (iii), the data is assumed to be multiplexed with the names of the sub-directories as barcodes. In this case, a sample sheet can be provided with `--sample_sheet`.
5 |
6 | ```
7 | (i) (ii) (iii)
8 | input_reads.fastq ─── input_directory ─── input_directory
9 | ├── reads0.fastq ├── barcode01
10 | └── reads1.fastq │ ├── reads0.fastq
11 | │ └── reads1.fastq
12 | ├── barcode02
13 | │ ├── reads0.fastq
14 | │ ├── reads1.fastq
15 | │ └── reads2.fastq
16 | └── barcode03
17 | └── reads0.fastq
18 | ```
19 |
20 | When using a sample sheet the workflow allows the use of additional columns `approx_size`, `full_reference` `insert_reference`, `host_reference` and `regions_bedfile` which replace parameters `--approx_size`, `--full_reference`, `--insert_reference`, `--host_reference` and `--regions_bedfile` respectively. This allows per-sample variables to be applied rather than global settings. Users should provide the full path to these files, with windows users requiring to add the prefix `/mnt/c` to all paths. An example sample sheet is shown below.
21 |
22 | ```
23 | alias,barcode,type,approx_size,full_reference,insert_reference,host_reference,regions_bedfile
24 | sample1,barcode01,test_sample,4000,/path/to/full_reference.fasta,/path/to/insert_reference.fasta,/path/to/host_reference.fasta,/path/to/regions_bedfile.bed
25 | sample2,barcode02,test_sample,4000,/path/to/full_reference.fasta,/path/to/insert_reference.fasta,/path/to/host_reference.fasta,/path/to/regions_bedfile.bed
26 | sample3,barcode03,test_sample,7000,/path/to/full_reference_alt.fasta,/path/to/insert_reference_alt.fasta/,path/to/host_reference_alt.fasta,/path/to/regions_bedfile_alt.bed
27 | ```
--------------------------------------------------------------------------------
/docs/07_outputs.md:
--------------------------------------------------------------------------------
1 | Output files may be aggregated including information for all samples or provided per sample. Per-sample files will be prefixed with respective aliases and represented below as {{ alias }}.
2 |
3 | | Title | File path | Description | Per sample or aggregated |
4 | |-------|-----------|-------------|--------------------------|
5 | | worfklow report | wf-clone-validation-report.html | A report bringing together the main results of the workflow, across samples. | aggregated |
6 | | sample status | sample_status.txt | A CSV file with per-sample assembly success or failure reasons | aggregated |
7 | | plasmid annotations | plannotate.json | Plasmid annotations in a JSON structure. | aggregated |
8 | | annotations bed | {{ alias }}.annotations.bed | Plasmid annotations in a BED file format for onward use | per-sample |
9 | | annotations gbk | {{ alias }}.annotations.gbk | Plasmid annotations in a GBK file format for onward use | per-sample |
10 | | Assembly FASTQ | {{ alias }}.final.fastq | Sequence and quality score of the final assembly. | per-sample |
11 | | Assembly statistics | {{ alias }}.assembly_stats.tsv | Assembly statistics from fastcat. | per-sample |
12 | | Insert FASTA | {{ alias }}.insert.fasta | Insert sequence found in the final assembly, only relevant if the primers parameter was used. | per-sample |
13 | | Variant stats report | {{ alias }}.full_construct.stats | A BCF stats report with any variants found, only relevant if a full reference was provided. | per-sample |
14 | | Variants BCF file | {{ alias }}.full_construct.calls.bcf | A BCF file with any variants found per sample, only relevant if a full reference was provided. | per-sample |
15 | | Reference alignment | {{ alias }}.bam | Reference aligned with the assembly in BAM format, only relevant if a full reference was provided. | per-sample |
16 | | Reference alignment index | {{ alias }}.bam.bai | The index for the reference aligned with the assembly, only relevant if a full reference was provided. | per-sample |
17 | | Host reference alignment | {{ alias }}.host.bam | Host reference aligned with sample in BAM format, only relevant if a host reference was provided. | per-sample |
18 | | Host reference alignment index | {{ alias }}.host.bam.bai | The index for the host reference aligned with sample, only relevant if a host reference was provided. | per-sample |
19 | | BAM Stats | {{ alias }}.bam.stats | Stats report for the reference aligned with the assembly, only relevant if a full reference was provided. | per-sample |
20 |
--------------------------------------------------------------------------------
/docs/08_pipeline_overview.md:
--------------------------------------------------------------------------------
1 | ### 1. Concatenates input files and generate per read stats.
2 |
3 | The [fastcat](https://github.com/epi2me-labs/fastcat) tool is used to concatenate multifile samples to be processed by the workflow. It will also output per read stats including average read lengths and qualities. Reads with lengths less than 0.5 X and more than 1.5 X the approximate size are filtered out unless the `--large_construct` parameter is provided which indicates the assembly is expected to be larger (50,000-300,000 bps).
4 |
5 | ### 2. Filter out host reference reads
6 |
7 | If a host_reference fasta file is provided, [Minimap2](https://github.com/lh3/minimap2) is used to align all reads to the host_reference, and any aligned reads are filtered out.
8 |
9 | ### 3. Trim reads
10 |
11 | If a trim length is provided, the reads are then trimmed at the ends using [SeqKit](https://bioinf.shenwei.me/seqkit/). Use the default value of 0 if no trimming is desired, such as for non-linearized plasmid sequences or linearized plasmid sequences that have already been trimmed.
12 | At this stage SeqKit is also used to filter out reads that are longer than 1.2 x the approximate size or shorter than 100bp, and reads that don't meet the minimum quality score set by the `min_quality` parameter.
13 |
14 | ### 4. Subsample reads
15 |
16 | The sequences are then subsampled using [Rasusa](https://github.com/mbhall88/rasusa). The subsampling will take the expected coverage parameter in to account; as we will be repeating the assembly 3 times, we subsample to a target of approximately 3x the required coverage. However, this is just a target and if there is not enough data, Rasusa is still able to create the 3 subsamples. The approximate size parameter is also used by Rasusa to work out the target number of bases and therefore number of reads required for each of the subsamples.
17 |
18 | ### 5. Create 3 subsamples
19 |
20 | +[Trycycler](https://github.com/rrwick/Trycycler) is used to create 3 subsamples as we will be creating three assemblies and finding the consensus between all three. This consensus generation will be handled by Ttrycycler.
21 |
22 | ### 6. Assembly
23 |
24 | We perform the assembly for each of the 3 subsamples separately. The assembly is done using either [Flye](https://github.com/fenderglass/Flye) or [Canu](https://github.com/marbl/canu) depending on what is set as the `assembly_tool` parameter. Both Flye and Canu are popular assemblers that usually produce reliable assemblies. Flye is our default assembler as it usually provides reliable assemblies in less time than Canu, and supports ARM processors. If Flye fails to assemble you may wish to try Canu.
25 |
26 | ### 7. De-concatenate
27 |
28 | If there are concatemers in the assembly, these are found using minimap2 and de-concatenated using a custom Python script. If the assembly is already roughly the expected approximate size, this de-concatenate step will be skipped.
29 |
30 | ### 8. Reconcile and polish
31 |
32 | Trycycler is used to reconcile the subsampled assemblies into one final assembly. This is then polished with [Medaka](https://github.com/nanoporetech/medaka). A per-base quality score for the assembly is output by Medaka in a FASTQ file. This is used for creating the mean assembly quality you will find in the report.
33 |
34 | ### 9. Insert location and QC
35 |
36 | SeqKit is used to locate inserts using the primers supplied to the primers parameter.
37 |
38 | A multiple sequence alignment (MSA) will be done using [Pyspoa](https://github.com/nanoporetech/pyspoa). This will be presented in the report to help users compare inserts across samples in a multi-sample run. If an insert reference FASTA file is provided, this will also be included in the MSA.
39 |
40 | If a reference insert FASTA sequence is provided, [BCFtools](https://samtools.github.io/bcftools/bcftools.html) is used to find variants between it and the final insert assembly, and are reported in BCF file per sample.
41 |
42 | ### 10. Full assembly comparison with a reference
43 |
44 | If a full reference FASTA sequence is provided, Minimap2 is used to align the final assembly with the reference. [BCFtools](https://samtools.github.io/bcftools/bcftools.html) is used to report variants between the reference and the final assembly, which are reported in a BCF stats file per sample.
45 |
46 | ### 11. Annotate
47 |
48 | The assembly is annotated by [pLannotate](https://github.com/barricklab/pLannotate) to show any features that are present. The default database is used, which contains entries from [FPbase](https://www.fpbase.org/), [Swiss-Prot](https://www.expasy.org/resources/uniprotkb-swiss-prot), [Rfam](https://rfam.org/) and [SnapGene](https://www.snapgene.com/). Descriptions, percentage match and length of the match are also provided.
49 |
50 | ### 12. Self alignment
51 |
52 | For each sample a self alignment will be done using [Last](https://gitlab.com/mcfrith/last) and the output will be presented as a dotplot. This can help identify any repetitive regions in your final assembly.
53 |
54 | ### 13. Linearisation efficiency
55 |
56 | If a user provides a `cut_site` column in the sample sheet (per sample short sequences) these will be used to predict linearisation efficiency by calculating how many reads don't span the cut site vs total reads and provided as a percentage.
57 |
--------------------------------------------------------------------------------
/docs/09_troubleshooting.md:
--------------------------------------------------------------------------------
1 | + If there are no assemblies output by the workflow, open the wf-clone-validation-report.html to look at failure reasons. Check the read summary section for quality and quantity
2 | of reads before and after downsampling to ensure there is enough data for the assembly. If there is not sufficient data, you may need to adjust the approx size and coverage options.
3 | + If the workflow fails please run it with the demo data set to ensure the workflow itself is working. This will help us determine if the issue is related to the environment, input parameters or a bug.
4 | + See how to interpret some common nextflow exit codes [here](https://labs.epi2me.io/trouble-shooting/).
--------------------------------------------------------------------------------
/docs/10_FAQ.md:
--------------------------------------------------------------------------------
1 | *I don't know the approximate size of my plasmid?* - On most occasions you can use the mode of the data as an approximate guide To find the mode, you can run the workflow with the default settings, and from the raw data read length plot find the highest peak. This value should approximate the plasmid size because for most plasmids only one cut is made to the circular plasmid prior to sequencing, meaning each read is of the full plasmid. Furthermore, it is better to overestimate the approximate size than underestimate.
2 |
3 | *Does the workflow report contaminants?* - The workflow has no way of reporting contaminants. However, if contaminants are present, the workflow may struggle to create consistent assemblies and the output assemblies are likely to show low quality. If you have a reference for an expected contaminant, you could use this as the host reference to filter out any reads that align with that.
4 |
5 | *Can I use my own annotation database?* – Currently using your own annotation database is not supported, but we may add it in future.
6 |
7 | *Does this workflow support reference based assembly?* - It does not have a reference based assembly mode.
8 |
9 | *Does this workflow have support for bacterial artificial chromosomes (BACs)?* - This workflow does not yet have BAC support and has not been tested for assembly of genomes larger than 50,000bps
10 |
11 | If your question is not answered here, please report any issues or suggestions on the [github issues](https://github.com/epi2me-labs/wf-template/issues) page or start a discussion on the [community](https://community.nanoporetech.com/).
--------------------------------------------------------------------------------
/docs/11_other.md:
--------------------------------------------------------------------------------
1 | + [Assembly tools and Flye](https://labs.epi2me.io/assembly-flye/)
2 |
3 | See the [EPI2ME website](https://labs.epi2me.io/) for lots of other resources and blog posts.
--------------------------------------------------------------------------------
/lib/ArgumentParser.groovy:
--------------------------------------------------------------------------------
1 | /* Check arguments of a Nextflow function
2 | *
3 | * Nextflow script does not support the Groovy idiom:
4 | *
5 | * def function(Map args[:], arg1, arg2, ...)
6 | *
7 | * to support unordered kwargs. The methods here are designed
8 | * to reduce boileplate while allowing Nextflow script to implement
9 | *
10 | * def function(Map args[:])
11 | *
12 | * with required and default values. This is similar to some Python
13 | * libraries' (notably matplotlib) extensive use of things like:
14 | *
15 | * def function(*args, **kwargs)
16 | *
17 | * to implement generic APIs. Why do we want to do all this? Because
18 | * we want to write library code with a clean set of required parameters
19 | * but also extensible with non-required parameters with default values.
20 | * This allows us to later add parameters without breaking existing code,
21 | * and is very common practice elsewhere.
22 | */
23 |
24 | import java.util.Set
25 |
26 | class ArgumentParser {
27 | Set args
28 | Map kwargs
29 | String name
30 |
31 | /* Parse arguments, raising an error on unknown keys */
32 | public Map parse_args(LinkedHashMap given_args) {
33 | Set opt_keys = kwargs.keySet()
34 | Set given_keys = given_args.keySet()
35 | check_required(given_keys)
36 | check_unknown(given_keys, opt_keys)
37 | return kwargs + given_args
38 | }
39 |
40 | /* Parse arguments, without raising an error for extra keys */
41 | public Map parse_known_args(LinkedHashMap given_args) {
42 | Set opt_keys = kwargs.keySet()
43 | Set given_keys = given_args.keySet()
44 | check_required(given_keys)
45 | return kwargs + given_args
46 | }
47 |
48 | private void check_required(Set given) {
49 | Set missing_keys = args - given
50 | if (!missing_keys.isEmpty()) {
51 | throw new Exception("Missing arguments for function ${name}: ${missing_keys}")
52 | }
53 | }
54 |
55 | private void check_unknown(Set given, Set kwargs_keys) {
56 | Set extra_keys = given - (args + kwargs_keys)
57 | if (!extra_keys.isEmpty()) {
58 | throw new Exception("Unknown arguments provided to function ${name}: ${extra_keys}.")
59 | }
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/lib/CWUtil.groovy:
--------------------------------------------------------------------------------
1 | /* Miscellaneous utilities for workflows from the ONT Customer Workflows Group.
2 | */
3 | class CWUtil {
4 |
5 | /* Mutate the global Nextflow params map
6 | *
7 | * Occasionally, we may wish to mutate the value of a parameter provided
8 | * by the user. Typically, this leads to workflows with `params.my_param`
9 | * and `params._my_param` which is ripe for confusion. Instead, we can
10 | * mutate the parameter value in the Nextflow params ScriptMap itself
11 | * with the following call:
12 | *
13 | * CWUtil.mutateParam(params, k, v)
14 | *
15 | * This is possible as Groovy actually has a surprisingly loose
16 | * definition of "private", and allows us to call the private `allowNames`
17 | * method on the ScriptMap which removes the read-only status for a key set.
18 | * We can follow this up with a call to the private `put0` to reinsert
19 | * the key and mark it as read-only again.
20 | */
21 | public static void mutateParam(nf_params, key, value) {
22 | Set s = [key] // must be a set to allow call to allowNames
23 | nf_params.allowNames(s)
24 | nf_params.put0(key, value)
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/lib/Pinguscript.groovy:
--------------------------------------------------------------------------------
1 | import static groovy.json.JsonOutput.toJson
2 | import groovy.json.JsonBuilder
3 | import groovy.json.JsonSlurper
4 |
5 |
6 | class Pinguscript {
7 |
8 | // Send a ping for the start of a workflow
9 | public static void ping_start(nextflow, workflow, params) {
10 | wf_ping(nextflow, workflow, "start", null, params)
11 | }
12 | // Send a ping for a completed workflow (successful or otherwise)
13 | public static void ping_complete(nextflow, workflow, params) {
14 | wf_ping(nextflow, workflow, "end", null, params)
15 | }
16 | // Send a ping for a workflow error
17 | public static void ping_error(nextflow, workflow, params) {
18 | def error_message = workflow.errorMessage
19 | wf_ping(nextflow, workflow, "error", error_message, params)
20 | }
21 | // Shared handler to construct a ping JSON and send it
22 | private static String wf_ping(nextflow, workflow, event, error_message, params) {
23 | if (params.disable_ping) {
24 | return "{}"
25 | }
26 | def body_json = make_wf_ping(nextflow, workflow, event, error_message, params)
27 | send_ping_post("epilaby", body_json)
28 | }
29 |
30 | // Helper to removing keys from a map
31 | private static clean_meta(meta, keys_to_remove) {
32 | for (key in keys_to_remove) {
33 | if (meta.containsKey(key)) {
34 | meta.remove(key)
35 | }
36 | }
37 | }
38 |
39 | // Helper for fetching a key from the params map
40 | // seems pointless but you just know someone is going to end up writing meta.this ? meta.that
41 | private static get_meta(meta, key) {
42 | (meta.containsKey(key) && meta[key]) ? meta[key].toString() : null
43 | }
44 |
45 | // Construct workflow ping JSON
46 | private static String make_wf_ping(nextflow, workflow, event, error_message, params) {
47 | // cheeky deepcopy using json
48 | String paramsJSON = new JsonBuilder(params).toPrettyString()
49 | def params_data = new JsonSlurper().parseText(paramsJSON)
50 |
51 | // hostname
52 | def host = null
53 | try {
54 | host = InetAddress.getLocalHost().getHostName()
55 | }
56 | catch(Exception e) {}
57 |
58 | // OS
59 | // TODO check version on WSL
60 | def opsys = System.properties['os.name'].toLowerCase()
61 | def opver = System.properties['os.version']
62 | if (opver.toLowerCase().contains("wsl")){
63 | opsys = "wsl"
64 | }
65 |
66 | // placeholder for any future okta business
67 | // for now we'll use the guest_ sent to wf.epi2me_user
68 | def user = get_meta(params.wf, "epi2me_user")
69 |
70 | // drop cruft to save some precious bytes
71 | // affects the deep copy rather than original params
72 | clean_meta(params_data, [
73 | "schema_ignore_params",
74 | ])
75 | def ingress_ids = []
76 | if (params_data.containsKey("wf")) {
77 | ingress_ids = params_data.wf["ingress.run_ids"] ?: []
78 | clean_meta(params_data.wf, [
79 | "agent", // we send this later
80 | "epi2me_instance", // we send this later
81 | "epi2me_user", // we send this later
82 | "example_cmd",
83 | "ingress.run_ids", // we will send this elsewhere
84 | ])
85 | }
86 |
87 | // try and get runtime information
88 | def cpus = null
89 | try {
90 | cpus = Runtime.getRuntime().availableProcessors()
91 | }
92 | catch(Exception e) {}
93 |
94 | def workflow_success = null
95 | def workflow_exitcode = null
96 | if (event != "start") {
97 | workflow_success = workflow.success
98 | workflow_exitcode = workflow.exitStatus
99 | }
100 |
101 | /// build message
102 | def body_json = new JsonBuilder()
103 | body_json \
104 | "tracking_id": [
105 | "msg_id": UUID.randomUUID().toString(),
106 | "version": "3.0.0"
107 | ],
108 | "source": "workflow",
109 | "event": event,
110 | "params": params_data,
111 | // data will be null on start events, as ingress has not run
112 | "data": event != "start" ? [run_ids: ingress_ids] : null,
113 | "workflow": [
114 | "name": workflow.manifest.name,
115 | "version": workflow.manifest.version, // could use NfcoreTemplate.version(workflow)
116 | "run_name": workflow.runName, // required to disambiguate sessions
117 | "session": workflow.sessionId,
118 | "profile": workflow.profile,
119 | "resume": workflow.resume,
120 | "error": error_message, // null if no error
121 | "success": workflow_success,
122 | "exitcode": workflow_exitcode,
123 | ],
124 | "env": [
125 | "user": user, // placeholder for any future okta
126 | "hostname": host,
127 | "os": [
128 | "name": opsys,
129 | "version": opver
130 | ],
131 | "resource": [
132 | "cpus": cpus,
133 | "memory": null, // placeholder, no point asking via Runtime as it will just give us the Xmx size
134 | ],
135 | "agent": get_meta(params.wf, "agent"), // access via original params
136 | "epi2me": [
137 | "instance": get_meta(params.wf, "epi2me_instance"),
138 | "user": user,
139 | ],
140 | "nextflow": [
141 | "version": nextflow.version.toString(),
142 | "version_compat": nextflow.version.matches(workflow.manifest.nextflowVersion)
143 | ]
144 | ]
145 | return body_json
146 | }
147 |
148 | // Send a JSON payload to a given endpoint
149 | private static String send_ping_post(endpoint, body_json) {
150 | // Attempt to send payload and absorb any possible Exception gracefully
151 | String postResult
152 | boolean raise_exception = false
153 | try {
154 | ((HttpURLConnection)new URL("https://ping.oxfordnanoportal.com/${endpoint}").openConnection()).with({
155 | requestMethod = 'POST'
156 | doOutput = true
157 | setConnectTimeout(5000)
158 | setReadTimeout(10000)
159 | setRequestProperty('Content-Type', 'application/json')
160 | setRequestProperty('accept', 'application/json')
161 | outputStream.withPrintWriter({printWriter ->
162 | printWriter.write(body_json.toString())
163 | })
164 |
165 | // Rethrow exceptions that imply we're not using this endpoint properly
166 | if(responseCode >= 400 && agent.toString() == "cw-ci") {
167 | raise_exception = true
168 | }
169 | // Accessing inputStream.text will raise an Exception for failed requests
170 | postResult = inputStream.text
171 | })
172 | }
173 | catch(Exception e) {
174 | if(raise_exception) { throw e }
175 | }
176 | return (postResult)
177 | }
178 | }
179 |
--------------------------------------------------------------------------------
/lib/WorkflowMain.groovy:
--------------------------------------------------------------------------------
1 | // This file is based on the nf-core/tools pipeline-template.
2 | // Changes to this file must be propagated via wf-template.
3 |
4 | class WorkflowMain {
5 |
6 | // Citation string for pipeline
7 | public static String citation(workflow) {
8 | return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
9 | "* The nf-core framework\n" +
10 | " https://doi.org/10.1038/s41587-020-0439-x\n\n"
11 | }
12 |
13 | // Generate help string
14 | public static String help(workflow, params, log) {
15 | String line_sep = ' \\ \n\t'
16 | String command_example = params.wf.example_cmd.join(line_sep)
17 | String command = 'nextflow run ' + workflow.manifest.name + line_sep + command_example
18 | String help_string = ''
19 | help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs)
20 | help_string += NfcoreSchema.paramsHelp(workflow, params, command)
21 | help_string += '\n' + citation(workflow) + '\n'
22 | return help_string
23 | }
24 |
25 | // Generate parameter summary log string
26 | public static String paramsSummaryLog(workflow, params, log) {
27 | String workflow_version = NfcoreTemplate.version(workflow)
28 | String summary_log = ''
29 | summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs)
30 | summary_log += NfcoreSchema.paramsSummaryLog(workflow, params)
31 | summary_log += '\n' + citation(workflow) + '\n'
32 | summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs)
33 | summary_log += "\nThis is ${workflow.manifest.name} ${workflow_version}.\n"
34 | summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs)
35 | return summary_log
36 | }
37 |
38 | // Validate parameters and print summary to screen
39 | public static void initialise(workflow, params, log) {
40 | // Print help to screen if required
41 | if (params.help) {
42 | log.info help(workflow, params, log)
43 | System.exit(0)
44 | }
45 |
46 | // Print workflow version and exit on --version
47 | if (params.version) {
48 | String workflow_version = NfcoreTemplate.version(workflow)
49 | log.info "${workflow.manifest.name} ${workflow_version}"
50 | System.exit(0)
51 | }
52 |
53 | // Explode on conda
54 | // conda.enabled seems to be backward compatible but wrap this
55 | // in a generic catch just in case
56 | try {
57 | if (workflow.session.config.conda.enabled) {
58 | log.error "Sorry, this workflow is not compatible with Conda, please use -profile standard (Docker) or -profile singularity."
59 | System.exit(1)
60 | }
61 | } catch(Exception e) {}
62 |
63 | // Validate workflow parameters via the JSON schema
64 | if (params.validate_params) {
65 | NfcoreSchema.validateParameters(workflow, params, log)
66 | }
67 |
68 | // Print parameter summary log to screen
69 | log.info paramsSummaryLog(workflow, params, log)
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/lib/common.nf:
--------------------------------------------------------------------------------
1 | import groovy.json.JsonBuilder
2 |
3 | process getParams {
4 | label "wf_common"
5 | publishDir "${params.out_dir}", mode: 'copy', pattern: "params.json"
6 | cache false
7 | cpus 1
8 | memory "2 GB"
9 | output:
10 | path "params.json"
11 | script:
12 | def paramsJSON = new JsonBuilder(params).toPrettyString().replaceAll("'", "'\\\\''")
13 | """
14 | # Output nextflow params object to JSON
15 | echo '$paramsJSON' > params.json
16 | """
17 | }
18 |
19 | process configure_igv {
20 | publishDir "${params.out_dir}/", mode: 'copy', pattern: 'igv.json', enabled: params.containsKey("igv") && params.igv
21 | label "wf_common"
22 | cpus 1
23 | memory "2 GB"
24 | input:
25 | // the python script will work out what to do with all the files based on their
26 | // extensions
27 | path "file-names.txt"
28 | val locus_str
29 | val aln_extra_opts
30 | val var_extra_opts
31 | output: path "igv.json"
32 | script:
33 | // the locus argument just makes sure that the initial view in IGV shows something
34 | // interesting
35 | String locus_arg = locus_str ? "--locus $locus_str" : ""
36 | // extra options for alignment tracks
37 | def aln_opts_json_str = \
38 | aln_extra_opts ? new JsonBuilder(aln_extra_opts).toPrettyString() : ""
39 | String aln_extra_opts_arg = \
40 | aln_extra_opts ? "--extra-alignment-opts extra-aln-opts.json" : ""
41 | // extra options for variant tracks
42 | def var_opts_json_str = \
43 | var_extra_opts ? new JsonBuilder(var_extra_opts).toPrettyString() : ""
44 | String var_extra_opts_arg = \
45 | var_extra_opts ? "--extra-vcf-opts extra-var-opts.json" : ""
46 | """
47 | # write out JSON files with extra options for the alignment and variant tracks
48 | echo '$aln_opts_json_str' > extra-aln-opts.json
49 | echo '$var_opts_json_str' > extra-var-opts.json
50 |
51 | workflow-glue configure_igv \
52 | --fofn file-names.txt \
53 | $locus_arg \
54 | $aln_extra_opts_arg \
55 | $var_extra_opts_arg \
56 | > igv.json
57 | """
58 | }
59 |
60 |
--------------------------------------------------------------------------------
/lib/nfcore_external_java_deps.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epi2me-labs/wf-clone-validation/e0e56b944dcf3efd54cde2bdc8dced104fc6c0bc/lib/nfcore_external_java_deps.jar
--------------------------------------------------------------------------------
/modules/local/canu_assembly.nf:
--------------------------------------------------------------------------------
1 | import groovy.json.JsonBuilder
2 |
3 | // processes required for assembly using canu
4 |
5 |
6 | process assembleCore_canu {
7 | errorStrategy = {task.attempt <= 4 ? 'retry' : 'ignore'}
8 | maxRetries 4
9 | label "canu"
10 | cpus params.threads
11 | memory "7GB"
12 | input:
13 | tuple val(meta), path(fastq)
14 | output:
15 | tuple val(meta), path("${meta.alias}.reconciled.fasta"), optional: true, emit: assembly
16 | tuple val(meta), path("${meta.alias}.downsampled.fastq"), optional: true, emit: downsampled
17 | tuple val(meta.alias), env(STATUS), emit: status
18 | script:
19 | String cluster_dir = "trycycler/cluster_001"
20 | int coverage_target = params.assm_coverage * 3
21 | int min_dep = (params.assm_coverage / 3) * 2
22 | int min_len = 100
23 | int max_len = (meta.approx_size as Integer) * 1.2
24 | int exit_number = task.attempt <= 4 ? 1 : 0
25 | def fast = params.canu_fast == true ? '-fast' : ''
26 | // WSL does not support named pipes used by Canu, setting these parameters avoids their use
27 | def windows_params = System.properties['os.version'].toLowerCase().contains("wsl") ? """\
28 | -mhapPipe=false \
29 | -purgeOverlaps=false \
30 | -saveOverlaps=true """ : ""
31 | def seqkit_threads = params.threads >= 6 ? 2 : 1
32 |
33 | """
34 | ############################################################
35 | # Trimming
36 | ############################################################
37 | STATUS="Failed to trim reads"
38 | (
39 | if [[ $params.trim_length -gt 0 ]]; then
40 | seqkit subseq -j $seqkit_threads -r $params.trim_length:-$params.trim_length $fastq
41 | else
42 | cat $fastq
43 | fi \
44 | | seqkit subseq -j $seqkit_threads -r 1:$max_len \
45 | | seqkit seq -j $seqkit_threads -m $min_len -Q $params.min_quality -g > "${meta.alias}.trimmed.fastq"
46 | ) &&
47 |
48 | ############################################################
49 | # Downsampling
50 | ############################################################
51 |
52 | STATUS="Failed to downsample reads" &&
53 | (rasusa \
54 | --coverage $coverage_target \
55 | --genome-size ${meta.approx_size} \
56 | --input "${meta.alias}.trimmed.fastq" > "${meta.alias}.downsampled.fastq") &&
57 |
58 |
59 | ############################################################
60 | # Subsetting
61 | ############################################################
62 | STATUS="Failed to Subset reads" &&
63 | (trycycler subsample \
64 | --count 3 \
65 | --min_read_depth $min_dep \
66 | --reads "${meta.alias}.downsampled.fastq" \
67 | --out_dir sets \
68 | --genome_size ${meta.approx_size}) &&
69 |
70 | ############################################################
71 | # Assembly
72 | ############################################################
73 | STATUS="Failed to assemble using Canu" &&
74 | (for SUBSET in \$(ls sets/sample_*.fastq)
75 | do
76 | SUBSET_NAME=\$(basename -s .fastq \$SUBSET)
77 | canu \
78 | -p \$SUBSET_NAME \
79 | -d assm_\${SUBSET_NAME} \
80 | -maxThreads=$task.cpus \
81 | genomeSize=${meta.approx_size} \
82 | $fast \
83 | -nanopore \$SUBSET \
84 | $windows_params
85 | done) &&
86 |
87 | ############################################################
88 | # Trim assemblies
89 | ############################################################
90 | STATUS="Failed to trim Assembly" &&
91 | (for assembly in \$(ls assm_sample_0*/*.contigs.fasta)
92 | do
93 | echo \$assembly
94 | assembly_name=\$(basename -s .fasta \$assembly)
95 | trim.py \
96 | \$assembly \
97 | -o \${assembly_name}.trimmed.fasta
98 | deconcatenate.py \
99 | \${assembly_name}.trimmed.fasta \
100 | -o \${assembly_name}.deconcat.fasta \
101 | --approx_size ${meta.approx_size}
102 | done
103 | ls *.deconcat.fasta > /dev/null 2>&1) &&
104 |
105 | ############################################################
106 | # Reconciliation
107 | ############################################################
108 | STATUS="Failed to reconcile assemblies" &&
109 | (trycycler cluster \
110 | --assemblies *.deconcat.fasta \
111 | --reads "${meta.alias}.downsampled.fastq" \
112 | --out_dir trycycler) &&
113 | (trycycler reconcile \
114 | --reads "${meta.alias}.downsampled.fastq" \
115 | --cluster_dir $cluster_dir \
116 | --max_trim_seq_percent 20 \
117 | --max_add_seq_percent 10) &&
118 | (trycycler msa --cluster_dir $cluster_dir) &&
119 | (trycycler partition --reads "${meta.alias}.downsampled.fastq" --cluster_dirs $cluster_dir) &&
120 | (trycycler consensus --cluster_dir $cluster_dir)
121 |
122 | ############################################################
123 | # Exit handling
124 | ############################################################
125 |
126 | if [ ! -f "${cluster_dir}/7_final_consensus.fasta" ]; then
127 | if ls ${cluster_dir}/1_contigs/*.fasta 1> /dev/null 2>&1; then
128 | STATUS="Completed but failed to reconcile"
129 | (seqkit sort ${cluster_dir}/1_contigs/*.fasta --by-length \
130 | | seqkit head -n 1 > "${meta.alias}.reconciled.fasta") \
131 | && echo "Trycycler failed, outputting un-reconciled assembly"
132 | elif [ "$exit_number" == "1" ]; then
133 | echo \$STATUS
134 | echo "Assembly failed, retrying process"
135 | exit 1
136 | elif [ "$exit_number" == "0" ]; then
137 | echo \$STATUS
138 | echo "Failed final attempt"
139 | fi
140 | else
141 | mv "${cluster_dir}/7_final_consensus.fasta" "${meta.alias}.reconciled.fasta"
142 | STATUS="Completed successfully"
143 | fi
144 | """
145 | }
--------------------------------------------------------------------------------
/modules/local/flye_assembly.nf:
--------------------------------------------------------------------------------
1 | import groovy.json.JsonBuilder
2 |
3 | // processes required for assembly using flye
4 |
5 |
6 | process assembleCore_flye {
7 | errorStrategy = {task.attempt <= 4 ? 'retry' : 'ignore'}
8 | maxRetries 4
9 | label "wfplasmid"
10 | cpus params.threads
11 | memory "4GB"
12 | input:
13 | tuple val(meta), path(fastq)
14 | output:
15 | tuple val(meta), path("${meta.alias}.reconciled.fasta"), optional: true, emit: assembly
16 | tuple val(meta), path("${meta.alias}.downsampled.fastq"), optional: true, emit: downsampled
17 | tuple val(meta.alias), env(STATUS), emit: status
18 | script:
19 | cluster_dir = "trycycler/cluster_001"
20 | int coverage_target = params.assm_coverage * 3
21 | int min_dep = (params.assm_coverage / 3) * 2
22 | int min_len = 100
23 | int max_len = (meta.approx_size as Integer) * 1.2
24 | int exit_number = task.attempt <= 4 ? 1 : 0
25 | // min_overlap normally auto calculated but with a lower limit of 3000
26 | // assembly with same size as overlap will likely fail
27 | def min_overlap = meta.approx_size.toInteger() <= 3000 ? '--min-overlap 1000' : ''
28 | def meta_cov = params.non_uniform_coverage ? '--meta' : ''
29 | def seqkit_threads = params.threads >= 6 ? 2 : 1
30 | """
31 | # STATUS is changed after a command succeeds and is related to the following section
32 | # This is because we can't put comments in a multiline cmd
33 | ############################################################
34 | # Trimming
35 | ############################################################
36 | STATUS="Failed to trim reads"
37 | (
38 | if [[ $params.trim_length -gt 0 ]]; then
39 | seqkit subseq -j $seqkit_threads -r $params.trim_length:-$params.trim_length $fastq
40 | else
41 | cat $fastq
42 | fi \
43 | | seqkit subseq -j $seqkit_threads -r 1:$max_len \
44 | | seqkit seq -j $seqkit_threads -m $min_len -Q $params.min_quality -g > "${meta.alias}.trimmed.fastq"
45 | ) &&
46 |
47 |
48 | ############################################################
49 | # Downsampling
50 | ############################################################
51 |
52 | STATUS="Failed to downsample reads" &&
53 | (rasusa \
54 | --coverage $coverage_target \
55 | --genome-size "${meta.approx_size}" \
56 | --input "${meta.alias}.trimmed.fastq" > "${meta.alias}.downsampled.fastq") &&
57 |
58 | ############################################################
59 | # Subsetting
60 | ############################################################
61 | STATUS="Failed to Subset reads" &&
62 | (trycycler subsample \
63 | --count 3 \
64 | --min_read_depth $min_dep \
65 | --reads "${meta.alias}.downsampled.fastq" \
66 | --out_dir sets \
67 | --genome_size ${meta.approx_size}) &&
68 |
69 | ############################################################
70 | # Assembly
71 | ############################################################
72 | STATUS="Failed to assemble using Flye" &&
73 | (for SUBSET in \$(ls sets/sample_*.fastq)
74 | do
75 | SUBSET_NAME=\$(basename -s .fastq \$SUBSET)
76 | flye \
77 | --${params.flye_quality} \${SUBSET} \
78 | --deterministic \
79 | --threads $task.cpus \
80 | --genome-size ${meta.approx_size} \
81 | --out-dir "assm_\${SUBSET_NAME}" \
82 | ${meta_cov} \
83 | $min_overlap
84 |
85 | mv assm_sample_0*/assembly.fasta "assm_\${SUBSET_NAME}/\${SUBSET_NAME}_assembly.fasta"
86 | done) &&
87 |
88 | ############################################################
89 | # Trim assemblies
90 | ############################################################
91 | STATUS="Failed to trim Assembly" &&
92 | (for assembly in \$(ls assm_sample_0*/*assembly.fasta)
93 | do
94 | echo \$assembly
95 | assembly_name=\$(basename -s .fasta \$assembly)
96 | ass_stats=\$(dirname \$assembly)/assembly_info.txt
97 | deconcatenate.py \
98 | \$assembly \
99 | -o \${assembly_name}.deconcat.fasta \
100 | --approx_size ${meta.approx_size}
101 | done
102 | ls *.deconcat.fasta > /dev/null 2>&1) &&
103 |
104 |
105 | ############################################################
106 | # Reconciliation
107 | ############################################################
108 | STATUS="Failed to reconcile assemblies" &&
109 | (trycycler cluster \
110 | --assemblies *.deconcat.fasta \
111 | --reads "${meta.alias}.downsampled.fastq" \
112 | --out_dir trycycler) &&
113 | (trycycler reconcile \
114 | --reads "${meta.alias}.downsampled.fastq" \
115 | --cluster_dir $cluster_dir \
116 | --max_trim_seq_percent 20 \
117 | --max_add_seq_percent 10) &&
118 | (trycycler msa --cluster_dir $cluster_dir) &&
119 | (trycycler partition --reads "${meta.alias}.downsampled.fastq" --cluster_dirs $cluster_dir) &&
120 | (trycycler consensus --cluster_dir $cluster_dir)
121 |
122 | ############################################################
123 | # Exit handling
124 | ############################################################
125 |
126 | if [ ! -f "${cluster_dir}/7_final_consensus.fasta" ]; then
127 | if ls ${cluster_dir}/1_contigs/*.fasta 1> /dev/null 2>&1; then
128 | STATUS="Completed but failed to reconcile"
129 | (seqkit sort ${cluster_dir}/1_contigs/*.fasta --by-length \
130 | | seqkit head -n 1 > "${meta.alias}.reconciled.fasta") \
131 | && echo "Trycycler failed, outputting un-reconciled assembly"
132 | elif [ "$exit_number" == "1" ]; then
133 | echo \$STATUS
134 | echo "Assembly failed, retrying process"
135 | exit 1
136 | elif [ "$exit_number" == "0" ]; then
137 | echo \$STATUS
138 | echo "Failed final attempt"
139 | fi
140 | else
141 | mv "${cluster_dir}/7_final_consensus.fasta" "${meta.alias}.reconciled.fasta"
142 | STATUS="Completed successfully"
143 | fi
144 | """
145 | }
--------------------------------------------------------------------------------
/nextflow.config:
--------------------------------------------------------------------------------
1 | // import profiles and workflow SHA from core
2 | includeConfig "base.config"
3 |
4 | // define workflow params
5 | params {
6 | help = false
7 | version = false
8 | fastq = null
9 | bam = null
10 | db_directory = null
11 | threads = 4
12 | host_reference = null
13 | regions_bedfile = null
14 | approx_size = 7000
15 | assm_coverage = 60
16 | trim_length = 0
17 | min_quality = 9
18 | prefix = null
19 | primers = null
20 | insert_reference = null
21 | sample = null
22 | sample_sheet = null
23 | disable_ping = false
24 | analyse_unclassified = false
25 | override_basecaller_cfg = null
26 | medaka_model_path = null
27 | flye_quality = "nano-hq"
28 | non_uniform_coverage = false
29 | large_construct = false
30 | full_reference = null
31 | cutsite_mismatch = 1
32 | primer_mismatch = 2
33 | expected_coverage = 95
34 | expected_identity = 99
35 |
36 | assembly_tool = "flye"
37 | canu_fast = false
38 | client_fields = null
39 |
40 | store_dir = null
41 |
42 | monochrome_logs = false
43 | validate_params = true
44 | show_hidden_params = false
45 | schema_ignore_params = 'show_hidden_params,validate_params,monochrome_logs,aws_queue,aws_image_prefix,wf'
46 |
47 | wf {
48 | example_cmd = [
49 | "--fastq 'wf-clone-validation-demo/fastq'",
50 | "--primers 'wf-clone-validation-demo/primers.tsv'",
51 | "--sample_sheet 'wf-clone-validation-demo/sample_sheet.csv'"
52 | ]
53 | agent = null
54 | }
55 | }
56 |
57 | manifest {
58 | name = 'epi2me-labs/wf-clone-validation'
59 | author = 'Oxford Nanopore Technologies'
60 | homePage = 'https://github.com/epi2me-labs/wf-clone-validation'
61 | description = 'De-novo reconstruction of synthetic plasmid sequences.'
62 | mainScript = 'main.nf'
63 | nextflowVersion = '>=23.04.2'
64 | version = 'v1.8.0'
65 | }
66 |
67 | epi2melabs {
68 | tags = "wf-clone-validation,plasmid,denovo,annotation,assembly"
69 | }
70 |
--------------------------------------------------------------------------------
/output_definition.json:
--------------------------------------------------------------------------------
1 | {
2 | "files": {
3 | "workflow-report": {
4 | "filepath": "wf-clone-validation-report.html",
5 | "title": "worfklow report",
6 | "description": "A report bringing together the main results of the workflow, across samples.",
7 | "mime-type": "text/html",
8 | "optional": false,
9 | "type": "aggregated"
10 | },
11 | "sample-status-csv-summary": {
12 | "filepath": "sample_status.txt",
13 | "title": "sample status",
14 | "description": "A CSV file with per-sample assembly success or failure reasons",
15 | "mime-type": "text/csv",
16 | "optional": false,
17 | "type": "aggregated"
18 | },
19 | "plannotate-annotations": {
20 | "filepath": "plannotate.json",
21 | "title": "plasmid annotations",
22 | "description": "Plasmid annotations in a JSON structure.",
23 | "mime-type": "text/json",
24 | "optional": false,
25 | "type": "aggregated"
26 | },
27 | "annotations_bed" : {
28 | "filepath": "{{ alias }}.annotations.bed",
29 | "title": "annotations bed",
30 | "description": "Plasmid annotations in a BED file format for onward use",
31 | "mime-type": "text/tab-separated-values",
32 | "optional": false,
33 | "type": "per-sample"
34 | },
35 | "annotations_gbk" : {
36 | "filepath": "{{ alias }}.annotations.gbk",
37 | "title": "annotations gbk",
38 | "description": "Plasmid annotations in a GBK file format for onward use",
39 | "mime-type": "application/octet-stream",
40 | "optional": false,
41 | "type": "per-sample"
42 | },
43 | "assembly_fastq" : {
44 | "filepath": "{{ alias }}.final.fastq",
45 | "title": "Assembly FASTQ",
46 | "description": "Sequence and quality score of the final assembly.",
47 | "mime-type": "text/plain",
48 | "optional": false,
49 | "type": "per-sample"
50 | },
51 | "assembly_stats": {
52 | "filepath": "{{ alias }}.assembly_stats.tsv",
53 | "title": "Assembly statistics",
54 | "description": "Assembly statistics from fastcat.",
55 | "mime-type": "text/tab-separated-values",
56 | "optional": true,
57 | "type": "per-sample"
58 | },
59 | "insert_fasta" : {
60 | "filepath": "{{ alias }}.insert.fasta",
61 | "title": "Insert FASTA",
62 | "description": "Insert sequence found in the final assembly, only relevant if the primers parameter was used.",
63 | "mime-type": "text/plain",
64 | "optional": true,
65 | "type": "per-sample"
66 | },
67 | "variant_stats" : {
68 | "filepath": "{{ alias }}.full_construct.stats",
69 | "title": "Variant stats report",
70 | "description": "A BCF stats report with any variants found, only relevant if a full reference was provided.",
71 | "mime-type": "text/plain",
72 | "optional": true,
73 | "type": "per-sample"
74 | },
75 | "bcf" : {
76 | "filepath": "{{ alias }}.full_construct.calls.bcf",
77 | "title": "Variants BCF file",
78 | "description": "A BCF file with any variants found per sample, only relevant if a full reference was provided.",
79 | "mime-type": "application/gzip",
80 | "optional": true,
81 | "type": "per-sample"
82 | },
83 | "bam" : {
84 | "filepath": "{{ alias }}.bam",
85 | "title": "Reference alignment",
86 | "description": "Reference aligned with the assembly in BAM format, only relevant if a full reference was provided.",
87 | "mime-type": "application/gzip",
88 | "optional": true,
89 | "type": "per-sample"
90 | },
91 | "bai" : {
92 | "filepath": "{{ alias }}.bam.bai",
93 | "title": "Reference alignment index",
94 | "description": "The index for the reference aligned with the assembly, only relevant if a full reference was provided.",
95 | "mime-type": "application/octet-stream",
96 | "optional": true,
97 | "type": "per-sample"
98 | },
99 | "host_bam" : {
100 | "filepath": "{{ alias }}.host.bam",
101 | "title": "Host reference alignment",
102 | "description": "Host reference aligned with sample in BAM format, only relevant if a host reference was provided.",
103 | "mime-type": "application/gzip",
104 | "optional": true,
105 | "type": "per-sample"
106 | },
107 | "host_bai" : {
108 | "filepath": "{{ alias }}.host.bam.bai",
109 | "title": "Host reference alignment index",
110 | "description": "The index for the host reference aligned with sample, only relevant if a host reference was provided.",
111 | "mime-type": "application/octet-stream",
112 | "optional": true,
113 | "type": "per-sample"
114 | },
115 | "bam_stats" : {
116 | "filepath": "{{ alias }}.bam.stats",
117 | "title": "BAM Stats",
118 | "description": "Stats report for the reference aligned with the assembly, only relevant if a full reference was provided.",
119 | "mime-type": "text/tab-separated-values",
120 | "optional": true,
121 | "type": "per-sample"
122 | }
123 | }
124 | }
--------------------------------------------------------------------------------
/test_data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epi2me-labs/wf-clone-validation/e0e56b944dcf3efd54cde2bdc8dced104fc6c0bc/test_data/.gitkeep
--------------------------------------------------------------------------------
/test_data/client_fields.json:
--------------------------------------------------------------------------------
1 |
2 | {
3 | "operator": "Dwight Schrute",
4 | "requester": "Michael Scott",
5 | "organisation": "Dunder Mifflin",
6 | "sequencer": "GridION",
7 | "location": "Scranton"
8 | }
--------------------------------------------------------------------------------
/test_data/cutsite_test.fasta:
--------------------------------------------------------------------------------
1 | >reference
2 | TGACGTAGCGATAATCAGCTTGCATACTAAAGGTGGTGATATGACTGAATACTTTACAGTCTATAGGTACTAAGAAGGTCAGAAGCTACGCGCGTCCTCTGGGTGGCCAGATGCAGTTGACATCGAGGCGTAAGCCAACGAATCGGGGTAGGAATGGACCGATAGGGCCATTGTACGGGCAATTCAATTAGTATTAGTCCCATCCCCCTTAGAGAGAAGCTAAGAGGATGGATTCATTAAACAGCTCAACCATTGGACCAGGGAGGTGCAATTCGTACCTTGCGTTGAAGCACTACCTAATATCGTGAGTCATCTCCTCCTGCTTTGGCGGCACTGTGGCCTGATCTCAGACACCAAAGGTGGAAAGTTGGGCACCAAAGGAGCCCCCATTCACGGGTAAGGATGTCTCATATCAGTCGGTATACGTCTTAAGTTCAAAACATGGGCTACATTTTCTTAGAAATGGAGGTGCCAGGCGACTTGACAAGGATGTCGATTTTTCGTAATCCTCAAAGGGCCTTATTCAATCCGCGCGGTCCATCACCCTTGGATGATGTGAATGTGTATTACAGCCGTGTGAGGCGTTAAGACCCGTAGCAGACTATAACTCGAGCCAGGTTGCGGTGATGTACGGGGAGAATAACACACCGCTCCCCGTGTCGCCAGACCGCTTCCGTGTGGCTTCGTGAGTATTGAATATGCGATGCGCGGACCTGAATTCCCCCACGCATTGATTACTCACTATAGTCAGAAGAAGCTTGAGCAGCTCCCCTGGTCTCGCTCTGACATTCCCATACGCGACGCACCTGTACACTAGATTCTCAGATTACAACCTGAATCTCGTTATCCGTAAACGAAACGGCGCTACCCCCACTAAGCTAGGATGTCCCCACTTCTGATACGTACATAATCCGATTCAGAGCCGCAGGGGACTTTTACCGTTCTCATCCGTGACCGGCAGCCAACCTGGTGATGTCCAGGCTTCCTACGCGGCAAATTGGTGCCGCATTAAATTTAGAGGCGCCGTTCAACCACTAAAGGTAAGAAATCATGAGTCAAGGTACTACGTTAAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTAGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTACGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGCCGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGACCACAACGGTTTCCCTCTAGGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGGATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGCCGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGTCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTTCTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTGCTGCGGACACATACAAAGTTACCCATGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCAGAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGATATGCTGTTATCTCACACTACGATTATTTCCCACTTGCGGCCACATATACGGGTAACCCCTCATAAACAGCACGGCCAACTTATCCTCTCATTAGCTAAGATCGTCATGGTTCCTTAGATTATCCAAGAACCCTTTGGAAATCAG
3 |
--------------------------------------------------------------------------------
/test_data/insert_reference.fasta:
--------------------------------------------------------------------------------
1 | >reference
2 | GACCACAACGGTTTCCCTCTAGGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAATCACGTTGTTTCTGTTGGTGCTGATATTGCTTTTGAGGCCGACCTTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGGATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGCCGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGTCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTTCTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTGCTGCGGACACATACAAAGTTACCCA
--------------------------------------------------------------------------------
/test_data/other_reference.fasta:
--------------------------------------------------------------------------------
1 | >reference
2 | CTGAGAACCGGGAGGCGGGAATCCGTCACGTATGAGAAGGTATTTGCCCGATAATCAATACCCCAGGCTTCTAACTTTTTCCACTCGCTTGAGCCGGCTAGGCCTTTCTGCCCGAAGTTTCGATGGACTGGTGCCAACGCGCAGGCATAGTTTTAGGAGAATTATTCGGGGGCAGTGACAACCAACATCTCGGGTCCTGCCCAACCGGTCTACACGCTAATATAGCGAATCACCGAGAACCCGGCGCCACGCAATGGAACGTCCTTAACTCCGGCAGGCAATTAAAGGGAACGTATGTATAACGCAAAAAAACAGAAAAATAGGCGAATGAATCTTTTCTCTGTGTATCGAAGAATGGCCTCGCGGAGGCATGCGTCATGCTAGCGTGCGGGGTACTCTTGCTATCCATATGGTCCACAGGACACTCGTTGTTTTCGGATTTACCCTTTATGCGCCGGTTTTCAGCCACGCTTATGCCCAGCATCGTTACAACCAGACCGATACTAGATGTATAAAGTCCGCCATGCAGACGAGACCAGTCGGAGATTACCGAGCATTCTATCAGGTCGGCGACCACTAGTGAGCTACTGGAGCCGAGGGGTAACCACGATGCCGCTAAGAACCTCTCGGTCGACGCAAGCGATTACACTCCTGTCACATCATAATCGTTTGCTATTCAGGGGTTGACCAACACCGGAAAACTTTTCACTTGAAGTATTGTATACGACAGGGTGCGTGTACCTACCAAACCTGTTTAAACTAAGTTCAGACTAGTTGGAAGTGTGTCTAGATCTTAGTTTTCGTCACTAGAGGGCCCACGCTTTATTTTTATGATCCATTGATCTCCCAGACGCTGCAAGATTTGCAACCAGGCAGACTTGGCGGTAGGTCCTAGTGCAGCGGGACTTTTTTTCTATAGTCCTTGAGAGGAGGAGTCGTCAGTCCAGATACCTTTGATGTCCTGATTGGAAGGACCGTTGGCCCCCCACCCTTAGGCAGTGTACTCAGTTCCATAAACGAGCTATTAGATATGAGGTCCGTAGATTGAAAAGGGTGACGGAATTCGCCCGAACGGGAAAGACGGACAACTAGGTATCCTGAGCACGGTTGCGCGTCCGTATCAAGCTCCTCTTTATAGGCCCCGGTTTCTGTTGGTCGTAGAGCGCAGAACGGGTTGGGGGGATGTACGACAATATCGCTTAGTCACCTTTGGGCCACGGTCCGCTACCTTACAGGAATTGAGACCGTCCTTTAATTTCCCTTGCATATATGTTGCGTTTCTTCGACCTTTTAACCGCTCCCTTAGGAGAAAGACAGATAGCTTCTTACCCGTACTCCACCGTTGGCAGCACGATCGCATGTCCCACGTGAACCATTGGTAAACCCTGTGGCCTGTGAGCGACAAAAGCTTTAATGGGAAATTCGCGCCCATAACTTGGTCCGAATACGGGTCCTAGCAACGTTCGTCTGAGTTTGATCTATATAATACGGGCGGTATGTCTGCTTTGATCAACCTCCAATAGCTCGTATGATAGTGCACCCGCTGGTGATCACTCAATGATCTGGGCTCCCCGTTGCAACTACGGGGATTTTTCGAGACCGACCTGCGTTCGGCATTGTGGGCACAGTGAAGTATTAGCAAACGTTAAGTCCCGAACTAGATGTGACCTAACGGTAAGAGAATTTCATAATACGTCCTGCCGCACGCGCAAGGTACATTTGGACAGTATTGAATGGACTCTGATCAACCTTCACACCGATCTAGAATCGAATGCGTAGATCAGCCAGGTGCAAACCAAAAATTCTAGGTTACTAGAAGTTTTGCGACGTTCTAAGTGTTGGACGAAATGATTCGCGACCCAGGATGAGGTCGCCCTAAAAAATAGATTTCTGCAACTCTCCTCGTGAGCAGTCTGGTGTATCGAAAGTACAGGACTAGCCTTCCTAGCAACCGCGGGCTGGGAGTCTGAGACATCACTCAAGATATATGCTCGGTAACGTATGCTCTAGCCATCTAACTATTCCCTATGTCTTATAGGGGCCTACGTTATCTGCCTGTCGAACCATAGGATTCGCGTCAGCGCGCAGGCTTGGATCGAGATGAAATCTCCGGAGCCTAAGACCACGAGCGTCTGGCGTCTTGGCTAATCCCCCTACATGTTGTTATAAACAATCAGTGGAAACTCAGTGCTAGAGGGTGGAGTGACCTTAAATCAAGGACGATATTAATCGGAAGGAGTATTCAACGCAATGAAGTCGCAGGGTTGACGTGGGAATGGTGCTTCTGTCCAAACAGGTAAGGGTATGAGGCCGCAACCGTCCCCCAAGCGTACAGGGTGCACTTTGCAACGATTTCGGAGTCCAAAGACTCGCTGTTTTCGAAATTTGCGCTCAAGGGCGAGTATTGAACCAGGCTTACGCCCAAGAACGTAGCAAGGTGACTCAAACAAAGTACATCTTGCCCGCGTTTCATATGAATCAAGTTAGAAGTTATGGAGCATAATAACATGTGGATGGCCAGTGGTCGGTTGCTACACCCCTGCCGCAACGTTGAAGGTCCCGGATTAGACTGGCTGGATCTATGCCGTGACACCCGTTATACTCCATTACCGTCTGTGGGTCACAGCTTGTTGTGGACTGGATTGCCATTCTCTCAGTGTATTACGCAGGCCGGCGCACGGGTCCCATATAAACCTGTCATAGCTTACCTGACTCTACTTGGAAATGTGGCTAGGCCTTTGCCCACGCACCTGATCGGTCCTCGTTTGCTTTTTAGGACCGGATGAACTACAGAGCATTGCAAGAATCTCTACCTG
--------------------------------------------------------------------------------
/test_data/plasmid.bam:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epi2me-labs/wf-clone-validation/e0e56b944dcf3efd54cde2bdc8dced104fc6c0bc/test_data/plasmid.bam
--------------------------------------------------------------------------------
/test_data/sample_sheet.txt:
--------------------------------------------------------------------------------
1 | barcode,alias
2 | barcode01,sample01
3 | barcode02,sample02
4 | barcode04,sample04
5 | barcode06,06
6 |
--------------------------------------------------------------------------------
/test_data/sample_sheet_cutsite.csv:
--------------------------------------------------------------------------------
1 | barcode,alias,cut_site
2 | barcode01,sample01,CAGCAGGTTTCCGAGATTATGCGCC
3 | barcode02,sample02,CAGCAGGTTTCCGAGATTATGCGCC
4 | barcode04,sample04,AGGTTTCCGAGATTATGCG
5 | barcode06,06,CAGCAGGTTTCCGAGATTATGCGCC
6 |
--------------------------------------------------------------------------------
/test_data/sample_sheet_hosts.csv:
--------------------------------------------------------------------------------
1 | barcode,alias,approx_size,insert_reference,full_reference,host_reference,regions_bedfile
2 | barcode01,sample01,3000,wf-clone-validation/data/clone_val_test/references/ref2.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref2.full.fasta,wf-clone-validation/data/clone_val_test/host_reference.fa.gz,wf-clone-validation/data/clone_val_test/reference.bed
3 | barcode02,sample02,3000,wf-clone-validation/data/clone_val_test/references/ref2.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref2.full.fasta,wf-clone-validation/data/clone_val_test/host_reference.fa.gz,wf-clone-validation/data/clone_val_test/reference.bed
4 | barcode03,sample03,3000,wf-clone-validation/data/clone_val_test/references/ref1.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref1.full.fasta,wf-clone-validation/data/clone_val_test/host_reference.fa.gz,wf-clone-validation/data/clone_val_test/reference.bed
5 | barcode04,sample04,3000,wf-clone-validation/data/clone_val_test/references/ref1.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref1.full.fasta,wf-clone-validation/data/clone_val_test/host_reference.fa.gz,wf-clone-validation/data/clone_val_test/reference.bed
6 | barcode05,sample05,3000,wf-clone-validation/data/clone_val_test/references/ref2.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref2.full.fasta
7 | barcode06,sample06,3000,wf-clone-validation/data/clone_val_test/references/ref2.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref2.full.fasta
8 | barcode07,sample07,3000,wf-clone-validation/data/clone_val_test/references/ref1.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref1.full.fasta,wf-clone-validation/data/clone_val_test/host_reference.fa.gz,wf-clone-validation/data/clone_val_test/reference.bed
9 | barcode08,sample08,3000,wf-clone-validation/data/clone_val_test/references/ref1.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref1.full.fasta,wf-clone-validation/data/clone_val_test/host_reference.fa.gz,wf-clone-validation/data/clone_val_test/reference.bed
10 | barcode09,sample09,3000,wf-clone-validation/data/clone_val_test/references/ref2.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref2.full.fasta,wf-clone-validation/data/clone_val_test/host_reference.fa.gz,wf-clone-validation/data/clone_val_test/reference.bed
11 | barcode10,sample10,3000,wf-clone-validation/data/clone_val_test/references/ref2.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref2.full.fasta,wf-clone-validation/data/clone_val_test/host_reference.fa.gz,wf-clone-validation/data/clone_val_test/reference.bed
12 | barcode11,sample11,3000,wf-clone-validation/data/clone_val_test/references/ref1.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref1.full.fasta,wf-clone-validation/data/clone_val_test/host_reference.fa.gz,wf-clone-validation/data/clone_val_test/reference.bed
13 | barcode12,sample12,3000,wf-clone-validation/data/clone_val_test/references/ref1.insert.fasta,wf-clone-validation/data/clone_val_test/references/ref1.full.fasta,wf-clone-validation/data/clone_val_test/host_reference.fa.gz,wf-clone-validation/data/clone_val_test/reference.bed
14 |
--------------------------------------------------------------------------------
/test_data/sample_sheet_number.csv:
--------------------------------------------------------------------------------
1 | barcode,alias
2 | barcode01,01
3 | barcode02,02
4 |
--------------------------------------------------------------------------------
/test_data/test/barcode04/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epi2me-labs/wf-clone-validation/e0e56b944dcf3efd54cde2bdc8dced104fc6c0bc/test_data/test/barcode04/.gitkeep
--------------------------------------------------------------------------------
/test_data/test_forward_full_ref.fasta:
--------------------------------------------------------------------------------
1 | >forward_full_ref
2 | AGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGGATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGCCGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGTCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGC
3 |
--------------------------------------------------------------------------------
/test_data/test_medaka_model.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/epi2me-labs/wf-clone-validation/e0e56b944dcf3efd54cde2bdc8dced104fc6c0bc/test_data/test_medaka_model.tar.gz
--------------------------------------------------------------------------------
/test_data/test_reverse_full_ref.fasta:
--------------------------------------------------------------------------------
1 | >reverse_full_ref_with_errors
2 | TTCTGCGTCATGGAAGCGATAAAACTCTGCAGTGGTTGGATACGCCAATCATTTTTATCGAAGCGCGCATAAATTTGAGCATTTGTCGTCACAGGTTGCGCCGCCAAACGTCGGCTACAGTAACTTTTTCCCAGCCTCAATCTGATCTCTCTTTTTGCGTTCTGCTTCAATATCTGGTTGAATGCGTCGCGTCGTAACCCAGCTTGGTAAGTTGGATTAAGCACTCCGTGGACAGATTTCCATTGTGAGCATTTTCATCCCGAAGTTGCGGCTGATCTGATTCTGAACAGCTTCTTGGGAAGTAGCGACAGCTTGGTTTTTAGTGAGTTGTTCCATTTCTTAGCTCCTAGACCTTTAGCAGCAAGGTCCATATCTGACTTTTTGTTAACGTATTTAGCCACATAGAAACCAACAGCCATATAACTGGTAGCTTTAAGCGGCTCACCTTTAGCATCAACAGGCCACAACCAACCAGAACGTGAAAAAGCGTCCTGCGTGTAGCGAACTGCGATGGGCATACTGTAACCATAAGGCCACGTATTTTGCAAGCTATTTAACCTGGCGGCGATTGCGTACCCGACGACCAAAATTAGGGTCAACGCTACCTGTAGGAAGTGTCCGCATAAAGTGCACCGCATGGAAATGAAGACGGCCATTAGCTGTACCATACTCAGGCACACAAAAATACTGATAGCAGTCGGCGTGTGAATCATTAGCCTTGCGACCCTCGGCAGCAAGAACCATACGACCAATATCACGAAAATAGTCACGCAAAGCATTGGGATTATCATAAAACGCCTCTAATCGGTCGTCAGCCAACGTGAGAGTGTCAAAAACGATAAACCAACCATCAGCATGAGCCTGTCGCATTGCATTCATCAAAACGCTGAATAGCAAAGCCTCTACGCGATTTCATAGTGGAGGCCTCCAGCAATCTTGAACACTCATCCTTAATACCTTTCTTTTTGGGGTAATTATACTCATCGCGAATATCCTT
3 |
4 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/deconcatenate/barcode01.fasta:
--------------------------------------------------------------------------------
1 | >barcode01
2 | CGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGGCGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACAGCCTGCTTTTTTGTACAAAGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGG
--------------------------------------------------------------------------------
/test_data/workflow_glue/deconcatenate/barcode01_expected.fasta:
--------------------------------------------------------------------------------
1 | >barcode01
2 | CGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGGCGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACAGCCTGCTTTTTTGTACAAAGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGG
--------------------------------------------------------------------------------
/test_data/workflow_glue/deconcatenate/barcode02.fasta:
--------------------------------------------------------------------------------
1 | >barcode02
2 | CGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGG
--------------------------------------------------------------------------------
/test_data/workflow_glue/deconcatenate/barcode02_expected.fasta:
--------------------------------------------------------------------------------
1 | >barcode02
2 | CGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGG
--------------------------------------------------------------------------------
/test_data/workflow_glue/deconcatenate/barcode03.fasta:
--------------------------------------------------------------------------------
1 | >barcode03
2 | TTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTCGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGT
--------------------------------------------------------------------------------
/test_data/workflow_glue/deconcatenate/barcode03_expected.fasta:
--------------------------------------------------------------------------------
1 | >barcode03
2 | TTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTCGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGG
--------------------------------------------------------------------------------
/test_data/workflow_glue/deconcatenate/barcode04.fasta:
--------------------------------------------------------------------------------
1 | >barcode04
2 | AAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATACCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGGCGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATACCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGGCGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATACCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCG
--------------------------------------------------------------------------------
/test_data/workflow_glue/deconcatenate/barcode04_expected.fasta:
--------------------------------------------------------------------------------
1 | >barcode04
2 | AAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATACCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGGCGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATACCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCG
--------------------------------------------------------------------------------
/test_data/workflow_glue/deconcatenate/barcode05.fasta:
--------------------------------------------------------------------------------
1 | >barcode05
2 | CGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGG
--------------------------------------------------------------------------------
/test_data/workflow_glue/deconcatenate/barcode05_expected.fasta:
--------------------------------------------------------------------------------
1 | >barcode05
2 | CGGGGGAAAGCCACGTTGTGTCTCAAAATCTCTGATGTTACATTGCACAAGATAAAAATATATCATCATGAACAATAAAACTGTCTGCTTACATAAACAGTAATACAAGGGGTGTTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGATGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCATTTGATGCTCGATGAGTTTTTCTAATCAGAATTTGTTAATTGGTTGTAACACTGGCAGAGCGGACGATTTGAAGCCCCCTACCCATTTACGCTGACTTGACGGGACGGCGGCTTTGTTGAATAGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCAGCGACATATGCCATACGCCGAACTGAACTCCACCTAATGTCTTGGAGATTGTCGGATAGCGCGGAATAGGATTGTGCCCTCTGAGCGGATAACAATTTCACACAGGAAACAGCTATGACCATGACACATGCATCCACCATCGCAGACTTATCATCACTGAGCCTCCACCTAGCCTCAAATAATGATTTTATTTTGACTGATAGTGACCTGTTCGTTGCAACAAATTGATAAGCAATGCTTTCTTATAATGCCAACTTTGTACAAGAAAGCTGGGTGTTGGCATTATAAAAAAGCATTGCTCATCAATCTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGAGGTCAGGCGGAATGGCACTTCCTTATTCCAGATGCGTGCGGATTATGCATGACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGCTTGTGGTGGATCACAGTTATTGGAGACTTCCTTGGCCGTACGATGGGTTTTGTTTCAGGTATCCCAACCTGGACGTCTCGTATAATGCGACAAACAACAGATAAAACGAAAGGCCCAGTCTTTCGACTGAGCCTTTCGTTTTATTTGGATCCACTAGTTCTAGAGCGG
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/assemblies/barcode01.final.fasta:
--------------------------------------------------------------------------------
1 | >barcode01
2 | ACATCACCATGGCGCAGCGGCAGGCTCTTATTAATGGCCCAACGCTGTTTCGGGTTGTCGGTGCCTTGATCGAAGTAGATACCGCCCTGGTCAGAGCTATACGCGTATTCATAACCAGAATACATCGTATTCAGGGTTTTGATGCGAAAACCGCCCCAATCTTCGTACAGCACGAACTTAAACAGTTTACTGTGACCCAGCGTGGCCGCCGGCCAAGATTTACGGCTAACCACGGTCATATATTCCTTGCGTTCAGCATGTTTGATACGAATTTCGGTCTTGCCCAGGATAATCTGGCGACCAACGATAATTTTTGCACGGGAGGTGATCACTTGCGGAATACTCTGCGTACCGCCAATCGGGGTAACTTCATCGATCAGGTACATATATTCAATGTCAGCACCGCCGACATCTGCGTTCAGCTGCCAGCGCGTGAACTTCGAGGTCGGCGGGATCGTAATGGTGTGTTCAATCACTTTTGATTCGTAGACTTCCGTTTCGGTCATCGAAACTTGTGATTCTTCGTGGCTATGCGAATATGAAACTTCCACGGAGCCGATTTCAAAGGCGTCACCCGTGGAGATGGTACTGCCAATAGAATGCGTCGCGGTGACCGTACGGGTTTCGCTATTAACGTTCTTCATGCCTTTCGTGATGGTAATTTTTTGATCCACGGAGGTACTACCACGATTTTCATACACATAACCTTCCTTCCACACAGCAACCACATCAACTTCGATTTGTTCATAACCTTCAGCAGCCTTCGCACTCATATGTATATCTCCTTCTTATAGTTAAACAAAATTATTTCTAGAGGGAAACCGTTGTGGTCTCCCTACGACCAGTCTAAAAAGCGCCTGAATTCGCGACCTTCTCGTTACTGACAGGAAAATGGGCCATTGGCAACCAGGGAAAGATGAACGTGATGATGTTCACAATTTGCTGAATTGTGGTGGACGAATTCTCTAGATATCGCTCAATACTGACCATTTAAATCATACCTGACCTCCATAGCAGAAAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGATCGGCACGTAAGAGGTTCCAACTTTCACCATAATGAAATAAGATCACTACCGGGCGTATTTTTTGAGTTATCGAGATTTTCAGGAGCTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGGCTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCAGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACGGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGGTGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCACTTGATGCTCGATGAGTTTTTCTAAGCTAGCGATCAAAGGATCTTCTTGAGATCCTTTTTTCTGCGCGTAATCTTTTGCCCTGTAAACGAAAAACCACCTGGAGGTGGTTTGATCGAAGGTTAAGCAGTTCCCCAACTGACTTAACCTTCGAAAAAACCACCTGGAGGTGGTTTGATCGAAGGTTAAGTCAGTTGAGGAACTGCTTAACCGTGGTAACTGGCTTTCGCAGAGCACAGCAACCAAATCTGTCCTTCCAGTGTAGCCGGACTTTGGCGCACACTTCAAGAGCAACCGCGTGTTTAGCTAAACAAATCCTCTGCGAACTCCCAGTTACCAATGGCTGCTGCCAGTGGCGTTTTACCGTGCTTTTCCGGGTTGGACTCAAGTGAACAGTTACCGGATAAGGCGCAGCAGTCGGGCTGAACGGGGAGTTCTTGCTTACAGCCCAGCTTGGAGCGAACGACCTACACCGAGCCGAGATACCAGTGTGTGAGCTATGAGAAAGCGCCACACTTCCCGTAAGGGAGAAAGGCGGAACAGGTATCCGGTAAACGGCAGGGTCGGAACAGGAGAGCGCAAGAGGGAGCGACCCGCCGGAAACGGTGGGGATCTTTAAGTCCTGTCGGGTTTCGCCCGTACTGTCAGATTCATGGTTGAGCCTCACGGCTCCCACAGATGCACCGGAAAAGCGTCTGTTTATGTGAACTCTGGCAGGAGGGCGGAGCCTATGGAAAAACGCCACCGGCGCGGCCCTGCTGTTTTGCCTCACATGTTAGTCCCCTGCTTATCCACGGAATCTGTGGGTAACTTTGTATGTGTCCGCAGCGCGGATCCCGAAGAAAGGCCCACCCGTGAAGGTGAGCCAGTGAGTTGATTGCAGTCCAGTTACGCTGGAGTCTGAGGCTCGTCCTGAATGATATCAAGCTTGAATTCGTTACCCGGATATAGTTCCTCCTTTCAGCAAAAAACCCCTCAAGACCCGTTTAGAGGCCCCAAGGGGTTATGCTAGTTATTGCTCAGCGGTGGCTGCTGCTCATTATCCGACAACTTCCAGAATCCACTTGTCTTCGCGTTTATCCAGACAATACACATTGGTCGCCGGGCCGTCATCATAGCACAGACCAGAACGCGTGAAGTATTTGTTCATAAAGGTCACG
3 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/assemblies/barcode02.final.fasta:
--------------------------------------------------------------------------------
1 | >barcode02
2 | ACATCACCATGGCGCAGCGGCAGGCTCTTATTAATGGCCCAACGCTGTTTCGGGTTGTCGGTGCCTTGATCGAAGTAGATACCGCCCTGGTCAGAGCTATACGCGTATTCATAACCAGAATACATCGTATTCAGGGTTTTGATGCGAAAACCGCCCCAATCTTCGTACAGCACGAACTTAAACAGTTTACTGTGACCCAGCGTGGCCGCCGGCCAAGATTTACGGCTAACCACGGTCATATATTCCTTGCGTTCAGCATGTTTGATACGAATTTCGGTCTTGCCCAGGATAATCTGGCGACCAACGATAATTTTTGCACGGGAGGTGATCACTTGCGGAATACTCTGCGTACCGCCAATCGGGGTAACTTCATCGATCAGGTACATATATTCAATGTCAGCACCGCCGACATCTGCGTTCAGCTGCCAGCGCGTGAACTTCGAGGTCGGCGGGATCGTAATGGTGTGTTCAATCACTTTTGATTCGTAGACTTCCGTTTCGGTCATCGAAACTTGTGATTCTTCGTGGCTATGCGAATATGAAACTTCCACGGAGCCGATTTCAAAGGCGTCACCCGTGGAGATGGTACTGCCAATAGAATGCGTCGCGGTGACCGTACGGGTTTCGCTATTAACGTTCTTCATGCCTTTCGTGATGGTAATTTTTTGATCCACGGAGGTACTACCACGATTTTCATACACATAACCTTCCTTCCACACAGCAACCACATCAACTTCGATTTGTTCATAACCTTCAGCAGCCTTCGCACTCATATGTATATCTCCTTCTTATAGTTAAACAAAATTATTTCTAGAGGGAAACCGTTGTGGTCTCCCTACGACCAGTCTAAAAAGCGCCTGAATTCGCGACCTTCTCGTTACTGACAGGAAAATGGGCCATTGGCAACCAGGGAAAGATGAACGTGATGATGTTCACAATTTGCTGAATTGTGGTGGACGAATTCTCTAGATATCGCTCAATACTGACCATTTAAATCATACCTGACCTCCATAGCAGAAAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGATCGGCACGTAAGAGGTTCCAACTTTCACCATAATGAAATAAGATCACTACCGGGCGTATTTTTTGAGTTATCGAGATTTTCAGGAGCTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGGCTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCAGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACGGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGGTGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCACTTGATGCTCGATGAGTTTTTCTAAGCTAGCGATCAAAGGATCTTCTTGAGATCCTTTTTTCTGCGCGTAATCTTTTGCCCTGTAAACGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGCAGTTCCCCAACTGACTTAACCTTCGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCGTGGTAACTGGCTTTCGCAGAGCACAGCAACCAAATCTGTCCTTCCAGTGTAGCCGGACTTTGGCGCACACTTCAAGAGCAACCGCGTGTTTAGCTAAACAAATCCTCTGCGAACTCCCAGTTACCAATGGCTGCTGCCAGTGGCGTTTTACCGTGCTTTTCCGGGTTGGACTCAAGTGAACAGTTACCGGATAAGGCGCAGCAGTCGGGCTGAACGGGGAGTTCTTGCTTACAGCCCAGCTTGGAGCGAACGACCTACACCGAGCCGAGATACCAGTGTGTGAGCTATGAGAAAGCGCCACACTTCCCGTAAGGGAGAAAGGCGGAACAGGTATCCGGTAAACGGCAGGGTCGGAACAGGAGAGCGCAAGAGGGAGCGACCCGCCGGAAACGGTGGGGATCTTTAAGTCCTGTCGGGTTTCGCCCGTACTGTCAGATTCATGGTTGAGCCTCACGGCTCCCACAGATGCACCGGAAAAGCGTCTGTTTATGTGAACTCTGGCAGGAGGGCGGAGCCTATGGAAAAACGCCACCGGCGCGGCCCTGCTGTTTTGCCTCACATGTTAGTCCCCTGCTTATCCACGGAATCTGTGGGTAACTTTGTATGTGTCCGCAGCGCGGATCCCGAAGAAAGGCCCACCCGTGAAGGTGAGCCAGTGAGTTGATTGCAGTCCAGTTACGCTGGAGTCTGAGGCTCGTCCTGAATGATATCAAGCTTGAATTCGTTACCCGGATATAGTTCCTCCTTTCAGCAAAAAACCCCTCAAGACCCGTTTAAGAGGCCCCAAGGGGTTATGCTAGTTATTGCTCAGCGGTGGCTGCTGCTCATTATCCGACAACTTCCAGAATCCACTTGTCTTCGCGTTTATCCAGACAATACACATTGGTCGCCGGGCCGTCATCATAGCACAGACCAGAACGCGTGAAGTATTTGTTCATAAAGGTCACG
3 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/assemblies/barcode03.final.fasta:
--------------------------------------------------------------------------------
1 | >barcode03
2 | ACATCACCATGGCGCAGCGGCAGGCTCTTATTAATGGCCCAACGCTGTTTCGGGTTGTCGGTGCCTTGATCGAAGTAGATACCGCCCTGGTCAGAGCTATACGCGTATTCATAACCAGAATACATCGTATTCAGGGTTTTGATGCGAAAACCGCCCCAATCTTCGTACAGCACGAACTTAAACAGTTTACTGTGACCCAGCGTGGCCGCCGGCCAAGATTTACGGCTAACCACGGTCATATATTCCTTGCGTTCAGCATGTTTGATACGAATTTCGGTCTTGCCCAGGATAATCTGGCGACCAACGATAATTTTTGCACGGGAGGTGATCACTTGCGGAATACTCTGCGTACCGCCAATCGGGGTAACTTCATCGATCAGGTACATATATTCAATGTCAGCACCGCCGACATCTGCGTTCAGCTGCCAGCGCGTGAACTTCGAGGTCGGCGGGATCGTAATGGTGTGTTCAATCACTTTTGATTCGTAGACTTCCGTTTCGGTCATCGAAACTTGTGATTCTTCGTGGCTATGCGAATATGAAACTTCCACGGAGCCGATTTCAAAGGCGTCACCCGTGGAGATGGTACTGCCAATAGAATGCGTCGCGGTGACCGTACGGGTTTCGCTATTAACGTTCTTCATGCCTTTCGTGATGGTAATTTTTTGATCCACGGAGGTACTACCACGATTTTCATACACATAACCTTCCTTCCACACAGCAACCACATCAACTTCGATTTGTTCATAACCTTCAGCAGCCTTCGCACTCATATGTATATCTCCTTCTTATAGTTAAACAAAATTATTTCTAGAGGGAAACCGTTGTGGTCTCCCTACGACCAGTCTAAAAAGCGCCTGAATTCGCGACCTTCTCGTTACTGACAGGAAAATGGGCCATTGGCAACCAGGGAAAGATGAACGTGATGATGTTCACAATTTGCTGAATTGTGGTGGACGAATTCTCTAGATATCGCTCAATACTGACCATTTAAATCATACCTGACCTCCATAGCAGAAAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGATCGGCACGTAAGAGGTTCCAACTTTCACCATAATGAAATAAGATCACTACCGGGCGTATTTTTTGAGTTATCGAGATTTTCAGGAGCTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGGCTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCAGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACGGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGGTGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCACTTGATGCTCGATGAGTTTTTCTAAGCTAGCGATCAAAGGATCTTCTTGAGATCCTTTTTTCTGCGCGTAATCTTTTGCCCTGTAAACGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGCAGTTCCCCAACTGACTTAACCTTCGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCGTGGTAACTGGCTTTCGCAGAGCACAGCAACCAAATCTGTCCTTCCAGTGTAGCCGGACTTTGGCGCACACTTCAAGAGCAACCGCGTGTTTAGCTAAACAAATCCTCTGCGAACTCCCAGTTACCAATGGCTGCTGCCAGTGGCGTTTTACCGTGCTTTTCCGGGTTGGACTCAAGTGAACAGTTACCGGATAAGGCGCAGCAGTCGGGCTGAACGGGGAGTTCTTGCTTACAGCCCAGCTTGGAGCGAACGACCTACACCGAGCCGAGATACCAGTGTGTGAGCTATGAGAAAGCGCCACACTTCCCGTAAGGGAGAAAGGCGGAACAGGTATCCGGTAAACGGCAGGGTCGGAACAGGAGAGCGCAAGAGGGAGCGACCCGCCGGAAACGGTGGGGATCTTTAAGTCCTGTCGGGTTTCGCCCGTACTGTCAGATTCATGGTTGAGCCTCACGGCTCCCACAGATGCACCGGAAAAGCGTCTGTTTATGTGAACTCTGGCAGGAGGGCGGAGCCTATGGAAAAACGCCACCGGCGCGGCCCTGCTGTTTTGCCTCACATGTTAGTCCCCTGCTTATCCACGGAATCTGTGGGTAACTTTGTATGTGTCCGCAGCGCGGATCCCGAAGAAAGGCCCACCCGTGAAGGTGAGCCAGTGAGTTGATTGCAGTCCAGTTACGCTGGAGTCTGAGGCTCGTCCTGAATGATATCAAGCTTGAATTCGTTACCCGGATATAGTTCCTCCTTTCAGCAAAAAACCCCTCAAGACCCGTTTAGAGGCCCCAAGGGGTTATGCTAGTTATTGCTCAGCGGTGGCTGCTGCTCATTATCCGACAACTTCCAGAATCCACTTGTCTTCGCGTTTATCCAGACAATACACATTGGTCGCCGGGCCGTCATCATAGCACAGACCAGAACGCGTGAAGTATTTGTTCATAAAGGTCACG
3 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/assemblies/barcode04.final.fasta:
--------------------------------------------------------------------------------
1 | >barcode04
2 | ACATCACCATGGCGCAGCGGCAGGCTCTTATTAATGGCCCAACGCTGTTTCGGGTTGTCGGTGCCTTGATCGAAGTAGATACCGCCCTGGTCAGAGCTATACGCGTATTCATAACCAGAATACATCGTATTCAGGGTTTTGATGCGAAAACCGCCCCAATCTTCGTACAGCACGAACTTAAACAGTTTACTGTGACCCAGCGTGGCCGCCGGCCAAGATTTACGGCTAACCACGGTCATATATTCCTTGCGTTCAGCATGTTTGATACGAATTTCGGTCTTGCCCAGGATAATCTGGCGACCAACGATAATTTTTGCACGGGAGGTGATCACTTGCGGAATACTCTGCGTACCGCCAATCGGGGTAACTTCATCGATCAGGTACATATATTCAATGTCAGCACCGCCGACATCTGCGTTCAGCTGCCAGCGCGTGAACTTCGAGGTCGGCGGGATCGTAATGGTGTGTTCAATCACTTTTGATTCGTAGACTTCCGTTTCGGTCATCGAAACTTGTGATTCTTCGTGGCTATGCGAATATGAAACTTCCACGGAGCCGATTTCAAAGGCGTCACCCGTGGAGATGGTACTGCCAATAGAATGCGTCGCGGTGACCGTACGGGTTTCGCTATTAACGTTCTTCATGCCTTTCGTGATGGTAATTTTTTGATCCACGGAGGTACTACCACGATTTTCATACACATAACCTTCCTTCCACACAGCAACCACATCAACTTCGATTTGTTCATAACCTTCAGCAGCCTTCGCACTCATATGTATATCTCCTTCTTATAGTTAAACAAAATTATTTCTAGAGGGAAACCGTTGTGGTCTCCCTACGACCAGTCTAAAAAGCGCCTGAATTCGCGACCTTCTCGTTACTGACAGGAAAATGGGCCATTGGCAACCAGGGAAAGATGAACGTGATGATGTTCACAATTTGCTGAATTGTGGTGGACGAATTCTCTAGATATCGCTCAATACTGACCATTTAAATCATACCTGACCTCCATAGCAGAAAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGATCGGCACGTAAGAGGTTCCAACTTTCACCATAATGAAATAAGATCACTACCGGGCGTATTTTTTGAGTTATCGAGATTTTCAGGAGCTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGGCTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCAGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACGGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGGTGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCACTTGATGCTCGATGAGTTTTTCTAAGCTAGCGATCAAAGGATCTTCTTGAGATCCTTTTTTCTGCGCGTAATCTTTTGCCCTGTAAACGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGCAGTTCCCCAACTGACTTAACCTTCGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCGTGGTAACTGGCTTTCGCAGAGCACAGCAACCAAATCTGTCCTTCCAGTGTAGCCGGACTTTGGCGCACACTTCAAGAGCAACCGCGTGTTTAGCTAAACAAATCCTCTGCGAACTCCCAGTTACCAATGGCTGCTGCCAGTGGCGTTTTACCGTGCTTTTCCGGGTTGGACTCAAGTGAACAGTTACCGGATAAGGCGCAGCAGTCGGGCTGAACGGGGAGTTCTTGCTTACAGCCCAGCTTGGAGCGAACGACCTACACCGAGCCGAGATACCAGTGTGTGAGCTATGAGAAAGCGCCACACTTCCCGTAAGGGAGAAAGGCGGAACAGGTATCCGGTAAACGGCAGGGTCGGAACAGGAGAGCGCAAGAGGGAGCGACCCGCCGGAAACGGTGGGGATCTTTAAGTCCTGTCGGGTTTCGCCCGTACTGTCAGATTCATGGTTGAGCCTCACGGCTCCCACAGATGCACCGGAAAAGCGTCTGTTTATGTGAACTCTGGCAGGAGGGCGGAGCCTATGGAAAAACGCCACCGGCGCGGCCCTGCTGTTTTGCCTCACATGTTAGTCCCCTGCTTATCCACGGAATCTGTGGGTAACTTTGTATGTGTCCGCAGCGCGGATCCCGAAGAAAGGCCCACCCGTGAAGGTGAGCCAGTGAGTTGATTGCAGTCCAGTTACGCTGGAGTCTGAGGCTCGTCCTGAATGATATCAAGCTTGAATTCGTTACCCGGATATAGTTCCTCCTTTCAGCAAAAAACCCCTCAAGACCCGTTTAGAGGCCCCAAGGGGTTATGCTAGTTATTGCTCAGCGGTGGCTGCTGCTCATTATCCGACAACTTCCAGAATCCACTTGTCTTCGCGTTTATCCAGACAATACACATTGGTCGCCGGGCCGTCATCATAGCACAGACCAGAACGCGTGAAGTATTTGTTCATAAAGGTCACG
3 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/expected_df.csv:
--------------------------------------------------------------------------------
1 | Sample,start,end,primer,strand
2 | barcode01,436,2925,internal_reverse,-
3 | barcode02,436,2925,internal_forward,+
4 | barcode03,1777,1038,split_reverse,-
5 | barcode04,1777,1038,split_forward,+
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/expected_insert/barcode01.insert.fasta:
--------------------------------------------------------------------------------
1 | >barcode01
2 | TAAACGGGTCTTGAGGGGTTTTTTGCTGAAAGGAGGAACTATATCCGGGTAACGAATTCAAGCTTGATATCATTCAGGACGAGCCTCAGACTCCAGCGTAACTGGACTGCAATCAACTCACTGGCTCACCTTCACGGGTGGGCCTTTCTTCGGGATCCGCGCTGCGGACACATACAAAGTTACCCACAGATTCCGTGGATAAGCAGGGGACTAACATGTGAGGCAAAACAGCAGGGCCGCGCCGGTGGCGTTTTTCCATAGGCTCCGCCCTCCTGCCAGAGTTCACATAAACAGACGCTTTTCCGGTGCATCTGTGGGAGCCGTGAGGCTCAACCATGAATCTGACAGTACGGGCGAAACCCGACAGGACTTAAAGATCCCCACCGTTTCCGGCGGGTCGCTCCCTCTTGCGCTCTCCTGTTCCGACCCTGCCGTTTACCGGATACCTGTTCCGCCTTTCTCCCTTACGGGAAGTGTGGCGCTTTCTCATAGCTCACACACTGGTATCTCGGCTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTAAGCAAGAACTCCCCGTTCAGCCCGACTGCTGCGCCTTATCCGGTAACTGTTCACTTGAGTCCAACCCGGAAAAGCACGGTAAAACGCCACTGGCAGCAGCCATTGGTAACTGGGAGTTCGCAGAGGATTTGTTTAGCTAAACACGCGGTTGCTCTTGAAGTGTGCGCCAAAGTCCGGCTACACTGGAAGGACAGATTTGGTTGCTGTGCTCTGCGAAAGCCAGTTACCACGGTTAAGCAGTTCCCCAACTGACTTAACCTTCGATCAAACCACCTCCCCAGGTGGTTTTTTCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCTTCGATCAAACCACCTCCCCAGGTGGTTTTTTCGTTTACAGGGCAAAAGATTACGCGCAGAAAAAAGGATCTCAAGAAGATCCTTTGATCGCTAGCTTAGAAAAACTCATCGAGCATCAAGTGAAACTGCAATTTATTCATATCAGGATTATCAATACCATATTTTTGAAAAAGCCGTTTCTGTAATGAAGGAGAAAACTCACCGAGGCAGTTCCATAGGATGGCAAGATCCTGGTATCGGTCTGCGATTCCGACTCGTCCAACATCAATACAACCTATTAATTTCCCCTCGTCAAAAATAAGGTTATCAAGTGAGAAATCACCATGAGTGACGACTGAATCCGGTGAGAATGGCAAAAGCTTATGCATTTCTTTCCAGACTTGTTCAACAGGCCAGCCATTACGCTCGTCATCAAAATCACTCGCACCAACCAAACCGTTATTCATTCGTGATTGCGCCTGAGCGAGACGAAATACGCGATCGCCGTTAAAAGGACAATTACAAACAGGAATCGAATGCAACCGGCGCAGGAACACTGCCAGCGCATCAACAATATTTTCACCTGAATCAGGATATTCTTCTAATACCTGGAATGCTGTTTTCCCTGGGATCGCAGTGGTGAGTAACCATGCATCATCAGGAGTACGGATAAAATGCTTGATGGTCGGAAGAGGCATAAATTCCGTCAGCCAGTTTAGCCTGACCATCTCATCTGTAACATCATTGGCAACGCTACCTTTGCCATGTTTCAGAAACAACTCTGGCGCATCGGGCTTCCCATACAATCGATAGATTGTCGCACCTGATTGCCCGACATTATCGCGAGCCCATTTATACCCATATAAATCAGCATCCATGTTGGAATTTAATCGCGGCCTCGAGCAAGACGTTTCCCGTTGAATATGGCTCATAGCTCCTGAAAATCTCGATAACTCAAAAAATACGCCCGGTAGTGATCTTATTTCATTATGGTGAAAGTTGGAACCTCTTACGTGCCGATCAAGTCAAAAGCCTCCGGTCGGAGGCTTTTGACTTTCTGCTATGGAGGTCAGGTATGATTTAAATGGTCAGTATTGAGCGATATCTAGAGAATTCGTCCACCACAATTCAGCAAATTGTGAACATCATCACGTTCATCTTTCCCTGGTTGCCAATGGCCCATTTTCCTGTCAGTAACGAGAAGGTCGCGAATTCAGGCGCTTTTTAGACTGGTCGTAGGGAGACCACAACGGTTTCCCTCTAGAAATAATTTTGTTTAACTATAAGAAGGAGATATACATATGAGTGCGAAGGCTGCTGAAGGTTATGAACAAATCGAAGTTGATGTGGTTGCTGTGTGGAAGGAAGGTTATGTGTATGAAAATCGTGGTAGTACCTCCGTGGATCAAAAAATTACCATCACGAAAGGCATGAAGAACGTTAATAGCGAAACCCGTACGGTCACCGCGACGCATTCTATTGGCAGTACCATCTCCACGGGTGACGCCTTTGAAATCGGCTCCGTGGAAGTTTCATATTCGCATAGCCACGAAGAATCACAAGTTTCGATGACCGAAACGGAAGTCTACGAATCAAAAGTGATTGAACACACCATTACGATCCCGCCGACCTCGAAGT
3 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/expected_insert/barcode01.insert.fasta.fai:
--------------------------------------------------------------------------------
1 | barcode01 2489 11 2489 2490
2 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/expected_insert/barcode02.insert.fasta:
--------------------------------------------------------------------------------
1 | >barcode02
2 | ACTTCGAGGTCGGCGGGATCGTAATGGTGTGTTCAATCACTTTTGATTCGTAGACTTCCGTTTCGGTCATCGAAACTTGTGATTCTTCGTGGCTATGCGAATATGAAACTTCCACGGAGCCGATTTCAAAGGCGTCACCCGTGGAGATGGTACTGCCAATAGAATGCGTCGCGGTGACCGTACGGGTTTCGCTATTAACGTTCTTCATGCCTTTCGTGATGGTAATTTTTTGATCCACGGAGGTACTACCACGATTTTCATACACATAACCTTCCTTCCACACAGCAACCACATCAACTTCGATTTGTTCATAACCTTCAGCAGCCTTCGCACTCATATGTATATCTCCTTCTTATAGTTAAACAAAATTATTTCTAGAGGGAAACCGTTGTGGTCTCCCTACGACCAGTCTAAAAAGCGCCTGAATTCGCGACCTTCTCGTTACTGACAGGAAAATGGGCCATTGGCAACCAGGGAAAGATGAACGTGATGATGTTCACAATTTGCTGAATTGTGGTGGACGAATTCTCTAGATATCGCTCAATACTGACCATTTAAATCATACCTGACCTCCATAGCAGAAAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGATCGGCACGTAAGAGGTTCCAACTTTCACCATAATGAAATAAGATCACTACCGGGCGTATTTTTTGAGTTATCGAGATTTTCAGGAGCTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGGCTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCAGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACGGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGGTGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCACTTGATGCTCGATGAGTTTTTCTAAGCTAGCGATCAAAGGATCTTCTTGAGATCCTTTTTTCTGCGCGTAATCTTTTGCCCTGTAAACGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGCAGTTCCCCAACTGACTTAACCTTCGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCGTGGTAACTGGCTTTCGCAGAGCACAGCAACCAAATCTGTCCTTCCAGTGTAGCCGGACTTTGGCGCACACTTCAAGAGCAACCGCGTGTTTAGCTAAACAAATCCTCTGCGAACTCCCAGTTACCAATGGCTGCTGCCAGTGGCGTTTTACCGTGCTTTTCCGGGTTGGACTCAAGTGAACAGTTACCGGATAAGGCGCAGCAGTCGGGCTGAACGGGGAGTTCTTGCTTACAGCCCAGCTTGGAGCGAACGACCTACACCGAGCCGAGATACCAGTGTGTGAGCTATGAGAAAGCGCCACACTTCCCGTAAGGGAGAAAGGCGGAACAGGTATCCGGTAAACGGCAGGGTCGGAACAGGAGAGCGCAAGAGGGAGCGACCCGCCGGAAACGGTGGGGATCTTTAAGTCCTGTCGGGTTTCGCCCGTACTGTCAGATTCATGGTTGAGCCTCACGGCTCCCACAGATGCACCGGAAAAGCGTCTGTTTATGTGAACTCTGGCAGGAGGGCGGAGCCTATGGAAAAACGCCACCGGCGCGGCCCTGCTGTTTTGCCTCACATGTTAGTCCCCTGCTTATCCACGGAATCTGTGGGTAACTTTGTATGTGTCCGCAGCGCGGATCCCGAAGAAAGGCCCACCCGTGAAGGTGAGCCAGTGAGTTGATTGCAGTCCAGTTACGCTGGAGTCTGAGGCTCGTCCTGAATGATATCAAGCTTGAATTCGTTACCCGGATATAGTTCCTCCTTTCAGCAAAAAACCCCTCAAGACCCGTTTA
3 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/expected_insert/barcode02.insert.fasta.fai:
--------------------------------------------------------------------------------
1 | barcode02 2489 11 2489 2490
2 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/expected_insert/barcode03.insert.fasta:
--------------------------------------------------------------------------------
1 | >barcode03
2 | CGGTCGGAGGCTTTTGACTTTCTGCTATGGAGGTCAGGTATGATTTAAATGGTCAGTATTGAGCGATATCTAGAGAATTCGTCCACCACAATTCAGCAAATTGTGAACATCATCACGTTCATCTTTCCCTGGTTGCCAATGGCCCATTTTCCTGTCAGTAACGAGAAGGTCGCGAATTCAGGCGCTTTTTAGACTGGTCGTAGGGAGACCACAACGGTTTCCCTCTAGAAATAATTTTGTTTAACTATAAGAAGGAGATATACATATGAGTGCGAAGGCTGCTGAAGGTTATGAACAAATCGAAGTTGATGTGGTTGCTGTGTGGAAGGAAGGTTATGTGTATGAAAATCGTGGTAGTACCTCCGTGGATCAAAAAATTACCATCACGAAAGGCATGAAGAACGTTAATAGCGAAACCCGTACGGTCACCGCGACGCATTCTATTGGCAGTACCATCTCCACGGGTGACGCCTTTGAAATCGGCTCCGTGGAAGTTTCATATTCGCATAGCCACGAAGAATCACAAGTTTCGATGACCGAAACGGAAGTCTACGAATCAAAAGTGATTGAACACACCATTACGATCCCGCCGACCTCGAAGTTCACGCGCTGGCAGCTGAACGCAGATGTCGGCGGTGCTGACATTGAATATATGTACCTGATCGATGAAGTTACCCCGATTGGCGGTACGCAGAGTATTCCGCAAGTGATCACCTCCCGTGCAAAAATTATCGTTGGTCGCCAGATTATCCTGGGCAAGACCGAAATTCGTATCAAACATGCTGAACGCAAGGAATATATGACCGTGGTTAGCCGTAAATCTTGGCCGGCGGCCACGCTGGGTCACAGTAAACTGTTTAAGTTCGTGCTGTACGAAGATTGGGGCGGTTTTCGCATCAAAACCCTGAATACGATGTATTCTGGTTATGAATACGCGTATAGCTCTGACCAGGGCGGTATCTACTTCGATCAAGGCACCGACAACCCGAAACAGCGTTGGGCCATTAATAAGAGCCTGCCGCTGCGCCATGGTGATGTCGTGACCTTTATGAACAAATACTTCACGCGTTCTGGTCTGTGCTATGATGACGGCCCGGCGACCAATGTGTATTGTCTGGATAAACGCGAAGACAAGTGGATTCTGGAAGTTGTCGGATAATGAGCAGCAGCCACCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCTGAAAGGAGGAACTATATCCGGGTAACGAATTCAAGCTTGATATCATTCAGGACGAGCCTCAGACTCCAGCGTAACTGGACTGCAATCAACTCACTGGCTCACCTTCACGGGTGGGCCTTTCTTCGGGATCCGCGCTGCGGACACATACAAAGTTACCCACAGATTCCGTGGATAAGCAGGGGACTAACATGTGAGGCAAAACAGCAGGGCCGCGCCGGTGGCGTTTTTCCATAGGCTCCGCCCTCCTGCCAGAGTTCACATAAACAGACGCTTTTCCGGTGCATCTGTGGGAGCCGTGAGGCTCAACCATGAATCTGACAGTACGGGCGAAACCCGACAGGACTTAAAGATCCCCACCGTTTCCGGCGGGTCGCTCCCTCTTGCGCTCTCCTGTTCCGACCCTGCCGTTTACCGGATACCTGTTCCGCCTTTCTCCCTTACGGGAAGTGTGGCGCTTTCTCATAGCTCACACACTGGTATCTCGGCTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTAAGCAAGAACTCCCCGTTCAGCCCGACTGCTGCGCCTTATCCGGTAACTGTTCACTTGAGTCCAACCCGGAAAAGCACGGTAAAACGCCACTGGCAGCAGCCATTGGTAACTGGGAGTTCGCAGAGGATTTGTTTAGCTAAACACGCGGTTGCTCTTGAAGTGTGCGCCAAAGTCCGGCTACACTGGAAGGACAGATTTGGTTGCTGTGCTCTGCGAAAGCCAGTTACCACGGTTAAGCAGTTCCCCAACTGACTTAACCTTCGATCAAACCACCTCCCCAGGTGGTTTTTTCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCTTCGATCAAACCACCTCCCCAGGTGGTTTTTTCGTTTACAGGGCAAAAGATTACGCGCAGAAAAAAGGATCTCAAGAAGATCCTTTGATCGCTAGCTTAGAAAAACTCATCGAGCATCAAGTGAAACTGCAATTTATTCATATCAGGATTATCAATACCATATTTTTGAAAAAGCCGTTTCTGTAATGAAGGAGAAAACTCACCGAGGCAGTTCCATAGGATGGCAAGATCCTGGTATCGGTCTGCGATTCCGACTCGTCCAACATCAATACAACCT
3 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/expected_insert/barcode03.insert.fasta.fai:
--------------------------------------------------------------------------------
1 | barcode03 2357 11 2357 2358
2 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/expected_insert/barcode04.insert.fasta:
--------------------------------------------------------------------------------
1 | >barcode04
2 | AGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCACTTGATGCTCGATGAGTTTTTCTAAGCTAGCGATCAAAGGATCTTCTTGAGATCCTTTTTTCTGCGCGTAATCTTTTGCCCTGTAAACGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGCAGTTCCCCAACTGACTTAACCTTCGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCGTGGTAACTGGCTTTCGCAGAGCACAGCAACCAAATCTGTCCTTCCAGTGTAGCCGGACTTTGGCGCACACTTCAAGAGCAACCGCGTGTTTAGCTAAACAAATCCTCTGCGAACTCCCAGTTACCAATGGCTGCTGCCAGTGGCGTTTTACCGTGCTTTTCCGGGTTGGACTCAAGTGAACAGTTACCGGATAAGGCGCAGCAGTCGGGCTGAACGGGGAGTTCTTGCTTACAGCCCAGCTTGGAGCGAACGACCTACACCGAGCCGAGATACCAGTGTGTGAGCTATGAGAAAGCGCCACACTTCCCGTAAGGGAGAAAGGCGGAACAGGTATCCGGTAAACGGCAGGGTCGGAACAGGAGAGCGCAAGAGGGAGCGACCCGCCGGAAACGGTGGGGATCTTTAAGTCCTGTCGGGTTTCGCCCGTACTGTCAGATTCATGGTTGAGCCTCACGGCTCCCACAGATGCACCGGAAAAGCGTCTGTTTATGTGAACTCTGGCAGGAGGGCGGAGCCTATGGAAAAACGCCACCGGCGCGGCCCTGCTGTTTTGCCTCACATGTTAGTCCCCTGCTTATCCACGGAATCTGTGGGTAACTTTGTATGTGTCCGCAGCGCGGATCCCGAAGAAAGGCCCACCCGTGAAGGTGAGCCAGTGAGTTGATTGCAGTCCAGTTACGCTGGAGTCTGAGGCTCGTCCTGAATGATATCAAGCTTGAATTCGTTACCCGGATATAGTTCCTCCTTTCAGCAAAAAACCCCTCAAGACCCGTTTAGAGGCCCCAAGGGGTTATGCTAGTTATTGCTCAGCGGTGGCTGCTGCTCATTATCCGACAACTTCCAGAATCCACTTGTCTTCGCGTTTATCCAGACAATACACATTGGTCGCCGGGCCGTCATCATAGCACAGACCAGAACGCGTGAAGTATTTGTTCATAAAGGTCACGACATCACCATGGCGCAGCGGCAGGCTCTTATTAATGGCCCAACGCTGTTTCGGGTTGTCGGTGCCTTGATCGAAGTAGATACCGCCCTGGTCAGAGCTATACGCGTATTCATAACCAGAATACATCGTATTCAGGGTTTTGATGCGAAAACCGCCCCAATCTTCGTACAGCACGAACTTAAACAGTTTACTGTGACCCAGCGTGGCCGCCGGCCAAGATTTACGGCTAACCACGGTCATATATTCCTTGCGTTCAGCATGTTTGATACGAATTTCGGTCTTGCCCAGGATAATCTGGCGACCAACGATAATTTTTGCACGGGAGGTGATCACTTGCGGAATACTCTGCGTACCGCCAATCGGGGTAACTTCATCGATCAGGTACATATATTCAATGTCAGCACCGCCGACATCTGCGTTCAGCTGCCAGCGCGTGAACTTCGAGGTCGGCGGGATCGTAATGGTGTGTTCAATCACTTTTGATTCGTAGACTTCCGTTTCGGTCATCGAAACTTGTGATTCTTCGTGGCTATGCGAATATGAAACTTCCACGGAGCCGATTTCAAAGGCGTCACCCGTGGAGATGGTACTGCCAATAGAATGCGTCGCGGTGACCGTACGGGTTTCGCTATTAACGTTCTTCATGCCTTTCGTGATGGTAATTTTTTGATCCACGGAGGTACTACCACGATTTTCATACACATAACCTTCCTTCCACACAGCAACCACATCAACTTCGATTTGTTCATAACCTTCAGCAGCCTTCGCACTCATATGTATATCTCCTTCTTATAGTTAAACAAAATTATTTCTAGAGGGAAACCGTTGTGGTCTCCCTACGACCAGTCTAAAAAGCGCCTGAATTCGCGACCTTCTCGTTACTGACAGGAAAATGGGCCATTGGCAACCAGGGAAAGATGAACGTGATGATGTTCACAATTTGCTGAATTGTGGTGGACGAATTCTCTAGATATCGCTCAATACTGACCATTTAAATCATACCTGACCTCCATAGCAGAAAGTCAAAAGCCTCCGACCG
3 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/expected_insert/barcode04.insert.fasta.fai:
--------------------------------------------------------------------------------
1 | barcode04 2357 11 2357 2358
2 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/insert_beds/internal_forward.bed:
--------------------------------------------------------------------------------
1 | barcode02 436 2925 internal_forward 0 + ACTTCGAGGTCGGCGGGATCGTAATGGTGTGTTCAATCACTTTTGATTCGTAGACTTCCGTTTCGGTCATCGAAACTTGTGATTCTTCGTGGCTATGCGAATATGAAACTTCCACGGAGCCGATTTCAAAGGCGTCACCCGTGGAGATGGTACTGCCAATAGAATGCGTCGCGGTGACCGTACGGGTTTCGCTATTAACGTTCTTCATGCCTTTCGTGATGGTAATTTTTTGATCCACGGAGGTACTACCACGATTTTCATACACATAACCTTCCTTCCACACAGCAACCACATCAACTTCGATTTGTTCATAACCTTCAGCAGCCTTCGCACTCATATGTATATCTCCTTCTTATAGTTAAACAAAATTATTTCTAGAGGGAAACCGTTGTGGTCTCCCTACGACCAGTCTAAAAAGCGCCTGAATTCGCGACCTTCTCGTTACTGACAGGAAAATGGGCCATTGGCAACCAGGGAAAGATGAACGTGATGATGTTCACAATTTGCTGAATTGTGGTGGACGAATTCTCTAGATATCGCTCAATACTGACCATTTAAATCATACCTGACCTCCATAGCAGAAAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGATCGGCACGTAAGAGGTTCCAACTTTCACCATAATGAAATAAGATCACTACCGGGCGTATTTTTTGAGTTATCGAGATTTTCAGGAGCTATGAGCCATATTCAACGGGAAACGTCTTGCTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGCTCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAACATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGGCTAAACTGGCTGACGGAATTTATGCCTCTTCCGACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCAGGGAAAACAGCATTCCAGGTATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTGTTTGTAATTGTCCTTTTAACGGCGATCGCGTATTTCGTCTCGCTCAGGCGCAATCACGAATGAATAACGGTTTGGTTGGTGCGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCACCGGATTCAGTCGTCACTCATGGTGATTTCTCACTTGATAACCTTATTTTTGACGAGGGGAAATTAATAGGTTGTATTGATGTTGGACGAGTCGGAATCGCAGACCGATACCAGGATCTTGCCATCCTATGGAACTGCCTCGGTGAGTTTTCTCCTTCATTACAGAAACGGCTTTTTCAAAAATATGGTATTGATAATCCTGATATGAATAAATTGCAGTTTCACTTGATGCTCGATGAGTTTTTCTAAGCTAGCGATCAAAGGATCTTCTTGAGATCCTTTTTTCTGCGCGTAATCTTTTGCCCTGTAAACGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGCAGTTCCCCAACTGACTTAACCTTCGAAAAAACCACCTGGGGAGGTGGTTTGATCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCGTGGTAACTGGCTTTCGCAGAGCACAGCAACCAAATCTGTCCTTCCAGTGTAGCCGGACTTTGGCGCACACTTCAAGAGCAACCGCGTGTTTAGCTAAACAAATCCTCTGCGAACTCCCAGTTACCAATGGCTGCTGCCAGTGGCGTTTTACCGTGCTTTTCCGGGTTGGACTCAAGTGAACAGTTACCGGATAAGGCGCAGCAGTCGGGCTGAACGGGGAGTTCTTGCTTACAGCCCAGCTTGGAGCGAACGACCTACACCGAGCCGAGATACCAGTGTGTGAGCTATGAGAAAGCGCCACACTTCCCGTAAGGGAGAAAGGCGGAACAGGTATCCGGTAAACGGCAGGGTCGGAACAGGAGAGCGCAAGAGGGAGCGACCCGCCGGAAACGGTGGGGATCTTTAAGTCCTGTCGGGTTTCGCCCGTACTGTCAGATTCATGGTTGAGCCTCACGGCTCCCACAGATGCACCGGAAAAGCGTCTGTTTATGTGAACTCTGGCAGGAGGGCGGAGCCTATGGAAAAACGCCACCGGCGCGGCCCTGCTGTTTTGCCTCACATGTTAGTCCCCTGCTTATCCACGGAATCTGTGGGTAACTTTGTATGTGTCCGCAGCGCGGATCCCGAAGAAAGGCCCACCCGTGAAGGTGAGCCAGTGAGTTGATTGCAGTCCAGTTACGCTGGAGTCTGAGGCTCGTCCTGAATGATATCAAGCTTGAATTCGTTACCCGGATATAGTTCCTCCTTTCAGCAAAAAACCCCTCAAGACCCGTTTA
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/insert_beds/internal_reverse.bed:
--------------------------------------------------------------------------------
1 | barcode01 436 2925 internal_reverse 0 - TAAACGGGTCTTGAGGGGTTTTTTGCTGAAAGGAGGAACTATATCCGGGTAACGAATTCAAGCTTGATATCATTCAGGACGAGCCTCAGACTCCAGCGTAACTGGACTGCAATCAACTCACTGGCTCACCTTCACGGGTGGGCCTTTCTTCGGGATCCGCGCTGCGGACACATACAAAGTTACCCACAGATTCCGTGGATAAGCAGGGGACTAACATGTGAGGCAAAACAGCAGGGCCGCGCCGGTGGCGTTTTTCCATAGGCTCCGCCCTCCTGCCAGAGTTCACATAAACAGACGCTTTTCCGGTGCATCTGTGGGAGCCGTGAGGCTCAACCATGAATCTGACAGTACGGGCGAAACCCGACAGGACTTAAAGATCCCCACCGTTTCCGGCGGGTCGCTCCCTCTTGCGCTCTCCTGTTCCGACCCTGCCGTTTACCGGATACCTGTTCCGCCTTTCTCCCTTACGGGAAGTGTGGCGCTTTCTCATAGCTCACACACTGGTATCTCGGCTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTAAGCAAGAACTCCCCGTTCAGCCCGACTGCTGCGCCTTATCCGGTAACTGTTCACTTGAGTCCAACCCGGAAAAGCACGGTAAAACGCCACTGGCAGCAGCCATTGGTAACTGGGAGTTCGCAGAGGATTTGTTTAGCTAAACACGCGGTTGCTCTTGAAGTGTGCGCCAAAGTCCGGCTACACTGGAAGGACAGATTTGGTTGCTGTGCTCTGCGAAAGCCAGTTACCACGGTTAAGCAGTTCCCCAACTGACTTAACCTTCGATCAAACCACCTCCCCAGGTGGTTTTTTCGAAGGTTAAGTCAGTTGGGGAACTGCTTAACCTTCGATCAAACCACCTCCCCAGGTGGTTTTTTCGTTTACAGGGCAAAAGATTACGCGCAGAAAAAAGGATCTCAAGAAGATCCTTTGATCGCTAGCTTAGAAAAACTCATCGAGCATCAAGTGAAACTGCAATTTATTCATATCAGGATTATCAATACCATATTTTTGAAAAAGCCGTTTCTGTAATGAAGGAGAAAACTCACCGAGGCAGTTCCATAGGATGGCAAGATCCTGGTATCGGTCTGCGATTCCGACTCGTCCAACATCAATACAACCTATTAATTTCCCCTCGTCAAAAATAAGGTTATCAAGTGAGAAATCACCATGAGTGACGACTGAATCCGGTGAGAATGGCAAAAGCTTATGCATTTCTTTCCAGACTTGTTCAACAGGCCAGCCATTACGCTCGTCATCAAAATCACTCGCACCAACCAAACCGTTATTCATTCGTGATTGCGCCTGAGCGAGACGAAATACGCGATCGCCGTTAAAAGGACAATTACAAACAGGAATCGAATGCAACCGGCGCAGGAACACTGCCAGCGCATCAACAATATTTTCACCTGAATCAGGATATTCTTCTAATACCTGGAATGCTGTTTTCCCTGGGATCGCAGTGGTGAGTAACCATGCATCATCAGGAGTACGGATAAAATGCTTGATGGTCGGAAGAGGCATAAATTCCGTCAGCCAGTTTAGCCTGACCATCTCATCTGTAACATCATTGGCAACGCTACCTTTGCCATGTTTCAGAAACAACTCTGGCGCATCGGGCTTCCCATACAATCGATAGATTGTCGCACCTGATTGCCCGACATTATCGCGAGCCCATTTATACCCATATAAATCAGCATCCATGTTGGAATTTAATCGCGGCCTCGAGCAAGACGTTTCCCGTTGAATATGGCTCATAGCTCCTGAAAATCTCGATAACTCAAAAAATACGCCCGGTAGTGATCTTATTTCATTATGGTGAAAGTTGGAACCTCTTACGTGCCGATCAAGTCAAAAGCCTCCGGTCGGAGGCTTTTGACTTTCTGCTATGGAGGTCAGGTATGATTTAAATGGTCAGTATTGAGCGATATCTAGAGAATTCGTCCACCACAATTCAGCAAATTGTGAACATCATCACGTTCATCTTTCCCTGGTTGCCAATGGCCCATTTTCCTGTCAGTAACGAGAAGGTCGCGAATTCAGGCGCTTTTTAGACTGGTCGTAGGGAGACCACAACGGTTTCCCTCTAGAAATAATTTTGTTTAACTATAAGAAGGAGATATACATATGAGTGCGAAGGCTGCTGAAGGTTATGAACAAATCGAAGTTGATGTGGTTGCTGTGTGGAAGGAAGGTTATGTGTATGAAAATCGTGGTAGTACCTCCGTGGATCAAAAAATTACCATCACGAAAGGCATGAAGAACGTTAATAGCGAAACCCGTACGGTCACCGCGACGCATTCTATTGGCAGTACCATCTCCACGGGTGACGCCTTTGAAATCGGCTCCGTGGAAGTTTCATATTCGCATAGCCACGAAGAATCACAAGTTTCGATGACCGAAACGGAAGTCTACGAATCAAAAGTGATTGAACACACCATTACGATCCCGCCGACCTCGAAGT
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/insert_beds/split_forward.bed:
--------------------------------------------------------------------------------
1 | barcode04 1777 1038 split_forward 0 +
2 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/insert_beds/split_reverse.bed:
--------------------------------------------------------------------------------
1 | barcode03 1777 1038 split_reverse 0 -
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/primers/internal_forward.tsv:
--------------------------------------------------------------------------------
1 | internal_forward ACTTCGAGGTCGGC TAAACGGGTCT
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/primers/internal_reverse.tsv:
--------------------------------------------------------------------------------
1 | internal_reverse TAAACGGGTCT ACTTCGAGGTCGGC
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/primers/split_forward.tsv:
--------------------------------------------------------------------------------
1 | split_forward AGGTTGTATTGATGTTGG CGGTCGGAGGCTTTTGACTTTC
--------------------------------------------------------------------------------
/test_data/workflow_glue/find_inserts/primers/split_reverse.tsv:
--------------------------------------------------------------------------------
1 | split_reverse CGGTCGGAGGCTTTTGACTTTC AGGTTGTATTGATGTTGG
--------------------------------------------------------------------------------
/test_data/workflow_glue/report/cut_sites.csv:
--------------------------------------------------------------------------------
1 | sample02,149,106
2 | sample01,169,146
3 | sample03,200,50
4 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/run_plannotate/barcode01.annotations.gbk:
--------------------------------------------------------------------------------
1 | LOCUS plasmid 4972 bp DNA circular SYN {date}
2 | DEFINITION .
3 | ACCESSION .
4 | VERSION .
5 | KEYWORDS .
6 | SOURCE .
7 | ORGANISM .
8 | .
9 | COMMENT Annotated with pLannotate v1.2.0
10 | FEATURES Location/Qualifiers
11 | CDS complement(1411..2394)
12 | /note="pLannotate"
13 | /label="H"
14 | /database="swissprot"
15 | /identity="99.7"
16 | /match_length="100.0"
17 | /fragment="False"
18 | /other="CDS"
19 | CDS complement(3384..4346)
20 | /note="pLannotate"
21 | /label="F (fragment)"
22 | /database="swissprot"
23 | /identity="100.0"
24 | /match_length="75.2"
25 | /fragment="True"
26 | /other="CDS"
27 | CDS complement(4386..4499)
28 | /note="pLannotate"
29 | /label="J"
30 | /database="swissprot"
31 | /identity="100.0"
32 | /match_length="100.0"
33 | /fragment="False"
34 | /other="CDS"
35 | CDS complement(2406..2930)
36 | /note="pLannotate"
37 | /label="G"
38 | /database="swissprot"
39 | /identity="99.4"
40 | /match_length="100.0"
41 | /fragment="False"
42 | /other="CDS"
43 | CDS complement(4502..4924)
44 | /note="pLannotate"
45 | /label="D (fragment)"
46 | /database="swissprot"
47 | /identity="100.0"
48 | /match_length="92.8"
49 | /fragment="True"
50 | /other="CDS"
51 | CDS complement(610..1344)
52 | /note="pLannotate"
53 | /label="A (fragment)"
54 | /database="swissprot"
55 | /identity="99.6"
56 | /match_length="47.8"
57 | /fragment="True"
58 | /other="CDS"
59 | ORIGIN
60 | 1 ggtagtgcta gaacggtttc tgcctcatgg gaggataaat tgacatcgat tgtcacgtgt
61 | 61 aaattgtggt agttaagccg tgagtgtgtt acaacagtcc aaaggtacgc acgttcactg
62 | 121 gagaaaccgc cataatttac actatcgtgc catgagctct gcaggtacta ccgagctttt
63 | 181 tgaaatcgaa aacgaggctg cccccacttt ttaagcgtat gaactgggga taaaggtcca
64 | 241 ggccataaga ggcaagaaca tataaattaa agggacgcat tcatcgtaca gacattttga
65 | 301 tttcaccatg tccgctaaaa atagcgttgc caaccaaagg cctactggac gtgctaagtc
66 | 361 tagaatggct cgtttactgc gctacatagt tattctgttc ccaagcttgg cgcctggccc
67 | 421 taatcccgat attcgagtat ctcttgttaa cttggaagtc aaataattct tgccggtgtc
68 | 481 caattgacct gtcattgtac aaaatctgag gaagtgcccg ggccttatcc agtagacaat
69 | 541 tcgttagcag cgacgaataa ctctctctac ttgtccgcca tttggcagcc cagcttgacg
70 | 601 cgctgccacc tctgcatggc tatcattagc cttgcgaccc tcggcagcaa gaaccatacg
71 | 661 accaatatca cgaaaatagt cacgcaaagc attgggatta tcataaaacg cctctaatcg
72 | 721 gtcgtcagcc aacgtgagag tgtcaaaaac gataaaccaa ccatcagcat gagcctgtcg
73 | 781 cattgcattc atcaaacgct gaatagcaaa gcctctacgc gatttcatag tggaggcctc
74 | 841 cagcaatctt gaacactcat ccttaatacc tttctttttg gggtaattat actcatcgcg
75 | 901 aatatcctta agagggcgtt cagcagccag cttgcggcaa aactgcgtaa ccgtcttctc
76 | 961 gttctctaaa aaccattttt cgtccccttc ggggcggtgg tctatagtgt tattaatatc
77 | 1021 aagttggggg agcacattgt agcattgtgc caattcatcc attaacttct cagtaacaga
78 | 1081 tacaaactca tcacgaacgt cagaagcagc cttatggccg tcaacataca tatcaccatt
79 | 1141 atcgaactca acgccctgca tacgaaaaga cagaatctct tccaagagct tgatgcggtt
80 | 1201 atccatctgc ttatggaagc caagcattgg ggattgagaa agagtagaaa tgccacaagc
81 | 1261 ctcaatagca ggtttaagag cctcgatacg ctcaaagtca aaataatcag cgtgacattc
82 | 1321 agaagggtaa taagaacgaa ccataaaaaa gcctccaaga tttggaggca tgaaaacata
83 | 1381 caattgggag ggtgtcaatc ctgacggtta tttcctagac aaattagagc caataccatc
84 | 1441 agctttaccg tctttccaga aattgttcca agtatcggca acagctttat caataccatg
85 | 1501 aaaaatatca accacaccag aagcagcatc agtgacgaca ttagaaatat cctttgcagt
86 | 1561 agcgccaata tgagaagagc cataccgctg attctgcgtt tgctgatgaa ctaagtcaac
87 | 1621 ctcagcacta accttgcgag tcatttcttt gatttggtca ttggtaaaat actgaccagc
88 | 1681 cgtttgagct tgagtaagca tttggcgcat aatctcggaa acctgctgtt gcttggaaag
89 | 1741 attggtgttt tccataatag acgcaacgcg agcagtagac tccttctgtt gataagcaag
90 | 1801 catctcattt tgtgcatata cctggtcttt cgtattctgg cgtgaagtcg ccgactgaat
91 | 1861 gccagcaatc tctttttgag tctcattttg catctcggca atctctttct gattgtccag
92 | 1921 ttgcatttta gtaagctctt tttgattctc aaatccggcg tcaaccatac cagcagagga
93 | 1981 agcatcagca ccagcacgct cccaagcatt aagctcagga aatgcagcag caagataatc
94 | 2041 acgagtatcc tttcctttat cagcggcaga cttgccacca agtccaacca aatcaagcaa
95 | 2101 cttatcagaa acggcagaag tgccagcctg caacgtacct tcaagaagtc ctttaccagc
96 | 2161 tttagccata gcaccagaaa caaaactagg gacggcctca tcagggttag gaacattaga
97 | 2221 gccttgaatg gcagatttaa taccagcatc acccatgcct acagtattgt tatcggtagc
98 | 2281 aagcacatca ccttgaatgc caccggaggc ggctttttga ccgcctccaa acaatttaga
99 | 2341 catggcgcca ccagcaagag cagaagcaat accgccagca atagcaccaa acataaatca
100 | 2401 cctcacttaa gtggctggag acaaataatc tctttaataa cctgattcag cgaaaccaat
101 | 2461 ccgcggcatt tagtagcggt aaagttagac caaaccatga aaccaacata aacattattg
102 | 2521 cccggcgtac ggggaaggac gtcaatagtc acacagtcct tgacggtata ataaccacca
103 | 2581 tcatggcgac catccaaagg ataaacatca taggcagtcg ggagggtagt cggaaccgaa
104 | 2641 gaagactcaa agcgaaccaa acaggcaaaa aatttagggt cggcatcaaa agcaatatca
105 | 2701 gcaccaacag aaacaacctg attagcggcg ttgacagatg tatccatctg aatgcaatga
106 | 2761 agaaaaccac cattaccagc attaaccgtc aaactatcaa aatataacgt tgacgatgta
107 | 2821 gctttaggtg tctgtaaaac aggtgccgaa gaagctggag taacagaagt gagaaccagc
108 | 2881 ttatcagaaa aaaagtttga attatggcga gaaataaaag tctgaaacat gattaaactc
109 | 2941 ctaagcagaa aacctaccgc gcttcgcttg gtcaacccct cagcggcaaa aattaaaatt
110 | 3001 tttaccgctt cggcgttata acctcacact caatctttta tcacgaagtc atgattgaat
111 | 3061 cgcgagtggt cggcagattg cgataaacgg tcacattaaa tttaacctga ctattccact
112 | 3121 gcaacaactg aacggactgg aaacactggt cataatcatg gtggcgaata agtacgcgtt
113 | 3181 cttgcaaatc accagaaggc ggttcctgaa tgaatgggaa gccttcaaga aggtgataag
114 | 3241 caggagaaac atacgaaggc gcataacgat accactgacc ctcagcaatc ttaaacttct
115 | 3301 tagacgaatc accagaacgg aaaacatcct tcatagaaat ttcacgcggc ggcaagttgc
116 | 3361 cctagaggga aaccgttgtg gtcatacaaa acagggtcgc cagcaatatc ggtataagtc
117 | 3421 aaagcacctt tagcgttaag gtactgaatc tctttagtcg cagtaggcgg aaaacgaaca
118 | 3481 agcgcaagag taaacatagt gccatgctca ggaacaaaga aacgcggcac agaatgttta
119 | 3541 taggtctgtt gaacacgacc agaaaactgg cctaacgacg tttggtcagt tccatcaaca
120 | 3601 tcatagccag atgcccagag attagagcgc atgacaagta aaggacggtt gtcagcgtca
121 | 3661 taagaggttt tacctccaaa tgaagaaata acatcatggt aacgctgcat gaagtaatca
122 | 3721 cgttcttggt cagtatgcaa attagcataa gcagcttgca gacccataat gtcaatagat
123 | 3781 gtggtagaag tcgtcatttg gcgagaaagc tcagtctcag gaggaagcgg agcagtccaa
124 | 3841 atgtttttga gatggcagca acggaaacca taacgagcat catcttgatt aagctcatta
125 | 3901 gggttagcct cggtacggtc aggcatccac ggcgctttaa aatagttgtt atagatattc
126 | 3961 aaataaccct gaaacaaatg cttagggatt ttattggtat cagggttaat cgtgccaaga
127 | 4021 aaagcggcat ggtcaatata accagtagtg ttaacagtcg ggagaggagt ggcattaaca
128 | 4081 ccatccttca tgaacttaat ccactgttca ccataaacgt gacgatgagg gacataaaaa
129 | 4141 gtaaaaatgt ctacagtaga gtcaatagca aggccacgac gcaatggaga aagacggaga
130 | 4201 gcgccaacgg cgtccatctc gaaggagtcg ccagcgataa ccggagtagt tgaaatggta
131 | 4261 ataagacgac caatctgacc agcaaggaag ccaagatggg aaaggtcatg cggcatacgc
132 | 4321 tcggcgccag tttgaatatt agacataatt tatcctcaag taaggggccg aagcccctgc
133 | 4381 aattaaaatt gttgaccacc tacataccaa agacgagcgc ctttacgctt gcctttagta
134 | 4441 cctcgcaacg gctgcggacg accagggcga gcgccagaac gttttttacc tttagacatt
135 | 4501 acatcactcc ttctgcacgt aatttttgac gcacgttttc ttctgcgtca gtaagaacgt
136 | 4561 cagtgtttcc tgcgcgtaca cgcaaggtaa acgcgaacaa ttcagcggct ttaaccggac
137 | 4621 gctcgacgcc attaataatg ttttccgtaa attcagcgcc ttccatgatg agacaggccg
138 | 4681 tttgaatgtt gacgggatga acataataag caatgacggc agcaataaac tcaacaggag
139 | 4741 caggaaagcg agggtatcct acaaagtcca gcgtaccata aacgcaagcc tcaacgcagc
140 | 4801 gacgagcacg agagcggtca gtagcaatcc aaactttgtt actcgtcaga aaatcgaaat
141 | 4861 catcttcggt taaatccaaa acggcagaag cctgaatgag cttaatagag gccaaagcgg
142 | 4921 tctggattgc tacccgcttc atacgggggg ccgtgttagg aaatatacga ga
143 | //
144 |
--------------------------------------------------------------------------------
/test_data/workflow_glue/run_plannotate/barcode01.fasta:
--------------------------------------------------------------------------------
1 | >barcode01
2 | GGTAGTGCTAGAACGGTTTCTGCCTCATGGGAGGATAAATTGACATCGATTGTCACGTGTAAATTGTGGTAGTTAAGCCGTGAGTGTGTTACAACAGTCCAAAGGTACGCACGTTCACTGGAGAAACCGCCATAATTTACACTATCGTGCCATGAGCTCTGCAGGTACTACCGAGCTTTTTGAAATCGAAAACGAGGCTGCCCCCACTTTTTAAGCGTATGAACTGGGGATAAAGGTCCAGGCCATAAGAGGCAAGAACATATAAATTAAAGGGACGCATTCATCGTACAGACATTTTGATTTCACCATGTCCGCTAAAAATAGCGTTGCCAACCAAAGGCCTACTGGACGTGCTAAGTCTAGAATGGCTCGTTTACTGCGCTACATAGTTATTCTGTTCCCAAGCTTGGCGCCTGGCCCTAATCCCGATATTCGAGTATCTCTTGTTAACTTGGAAGTCAAATAATTCTTGCCGGTGTCCAATTGACCTGTCATTGTACAAAATCTGAGGAAGTGCCCGGGCCTTATCCAGTAGACAATTCGTTAGCAGCGACGAATAACTCTCTCTACTTGTCCGCCATTTGGCAGCCCAGCTTGACGCGCTGCCACCTCTGCATGGCTATCATTAGCCTTGCGACCCTCGGCAGCAAGAACCATACGACCAATATCACGAAAATAGTCACGCAAAGCATTGGGATTATCATAAAACGCCTCTAATCGGTCGTCAGCCAACGTGAGAGTGTCAAAAACGATAAACCAACCATCAGCATGAGCCTGTCGCATTGCATTCATCAAACGCTGAATAGCAAAGCCTCTACGCGATTTCATAGTGGAGGCCTCCAGCAATCTTGAACACTCATCCTTAATACCTTTCTTTTTGGGGTAATTATACTCATCGCGAATATCCTTAAGAGGGCGTTCAGCAGCCAGCTTGCGGCAAAACTGCGTAACCGTCTTCTCGTTCTCTAAAAACCATTTTTCGTCCCCTTCGGGGCGGTGGTCTATAGTGTTATTAATATCAAGTTGGGGGAGCACATTGTAGCATTGTGCCAATTCATCCATTAACTTCTCAGTAACAGATACAAACTCATCACGAACGTCAGAAGCAGCCTTATGGCCGTCAACATACATATCACCATTATCGAACTCAACGCCCTGCATACGAAAAGACAGAATCTCTTCCAAGAGCTTGATGCGGTTATCCATCTGCTTATGGAAGCCAAGCATTGGGGATTGAGAAAGAGTAGAAATGCCACAAGCCTCAATAGCAGGTTTAAGAGCCTCGATACGCTCAAAGTCAAAATAATCAGCGTGACATTCAGAAGGGTAATAAGAACGAACCATAAAAAAGCCTCCAAGATTTGGAGGCATGAAAACATACAATTGGGAGGGTGTCAATCCTGACGGTTATTTCCTAGACAAATTAGAGCCAATACCATCAGCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCGGCAACAGCTTTATCAATACCATGAAAAATATCAACCACACCAGAAGCAGCATCAGTGACGACATTAGAAATATCCTTTGCAGTAGCGCCAATATGAGAAGAGCCATACCGCTGATTCTGCGTTTGCTGATGAACTAAGTCAACCTCAGCACTAACCTTGCGAGTCATTTCTTTGATTTGGTCATTGGTAAAATACTGACCAGCCGTTTGAGCTTGAGTAAGCATTTGGCGCATAATCTCGGAAACCTGCTGTTGCTTGGAAAGATTGGTGTTTTCCATAATAGACGCAACGCGAGCAGTAGACTCCTTCTGTTGATAAGCAAGCATCTCATTTTGTGCATATACCTGGTCTTTCGTATTCTGGCGTGAAGTCGCCGACTGAATGCCAGCAATCTCTTTTTGAGTCTCATTTTGCATCTCGGCAATCTCTTTCTGATTGTCCAGTTGCATTTTAGTAAGCTCTTTTTGATTCTCAAATCCGGCGTCAACCATACCAGCAGAGGAAGCATCAGCACCAGCACGCTCCCAAGCATTAAGCTCAGGAAATGCAGCAGCAAGATAATCACGAGTATCCTTTCCTTTATCAGCGGCAGACTTGCCACCAAGTCCAACCAAATCAAGCAACTTATCAGAAACGGCAGAAGTGCCAGCCTGCAACGTACCTTCAAGAAGTCCTTTACCAGCTTTAGCCATAGCACCAGAAACAAAACTAGGGACGGCCTCATCAGGGTTAGGAACATTAGAGCCTTGAATGGCAGATTTAATACCAGCATCACCCATGCCTACAGTATTGTTATCGGTAGCAAGCACATCACCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAACAATTTAGACATGGCGCCACCAGCAAGAGCAGAAGCAATACCGCCAGCAATAGCACCAAACATAAATCACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGCGAAACCAATCCGCGGCATTTAGTAGCGGTAAAGTTAGACCAAACCATGAAACCAACATAAACATTATTGCCCGGCGTACGGGGAAGGACGTCAATAGTCACACAGTCCTTGACGGTATAATAACCACCATCATGGCGACCATCCAAAGGATAAACATCATAGGCAGTCGGGAGGGTAGTCGGAACCGAAGAAGACTCAAAGCGAACCAAACAGGCAAAAAATTTAGGGTCGGCATCAAAAGCAATATCAGCACCAACAGAAACAACCTGATTAGCGGCGTTGACAGATGTATCCATCTGAATGCAATGAAGAAAACCACCATTACCAGCATTAACCGTCAAACTATCAAAATATAACGTTGACGATGTAGCTTTAGGTGTCTGTAAAACAGGTGCCGAAGAAGCTGGAGTAACAGAAGTGAGAACCAGCTTATCAGAAAAAAAGTTTGAATTATGGCGAGAAATAAAAGTCTGAAACATGATTAAACTCCTAAGCAGAAAACCTACCGCGCTTCGCTTGGTCAACCCCTCAGCGGCAAAAATTAAAATTTTTACCGCTTCGGCGTTATAACCTCACACTCAATCTTTTATCACGAAGTCATGATTGAATCGCGAGTGGTCGGCAGATTGCGATAAACGGTCACATTAAATTTAACCTGACTATTCCACTGCAACAACTGAACGGACTGGAAACACTGGTCATAATCATGGTGGCGAATAAGTACGCGTTCTTGCAAATCACCAGAAGGCGGTTCCTGAATGAATGGGAAGCCTTCAAGAAGGTGATAAGCAGGAGAAACATACGAAGGCGCATAACGATACCACTGACCCTCAGCAATCTTAAACTTCTTAGACGAATCACCAGAACGGAAAACATCCTTCATAGAAATTTCACGCGGCGGCAAGTTGCCCTAGAGGGAAACCGTTGTGGTCATACAAAACAGGGTCGCCAGCAATATCGGTATAAGTCAAAGCACCTTTAGCGTTAAGGTACTGAATCTCTTTAGTCGCAGTAGGCGGAAAACGAACAAGCGCAAGAGTAAACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTCTGTTGAACACGACCAGAAAACTGGCCTAACGACGTTTGGTCAGTTCCATCAACATCATAGCCAGATGCCCAGAGATTAGAGCGCATGACAAGTAAAGGACGGTTGTCAGCGTCATAAGAGGTTTTACCTCCAAATGAAGAAATAACATCATGGTAACGCTGCATGAAGTAATCACGTTCTTGGTCAGTATGCAAATTAGCATAAGCAGCTTGCAGACCCATAATGTCAATAGATGTGGTAGAAGTCGTCATTTGGCGAGAAAGCTCAGTCTCAGGAGGAAGCGGAGCAGTCCAAATGTTTTTGAGATGGCAGCAACGGAAACCATAACGAGCATCATCTTGATTAAGCTCATTAGGGTTAGCCTCGGTACGGTCAGGCATCCACGGCGCTTTAAAATAGTTGTTATAGATATTCAAATAACCCTGAAACAAATGCTTAGGGATTTTATTGGTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTGTTCACCATAAACGTGACGATGAGGGACATAAAAAGTAAAAATGTCTACAGTAGAGTCAATAGCAAGGCCACGACGCAATGGAGAAAGACGGAGAGCGCCAACGGCGTCCATCTCGAAGGAGTCGCCAGCGATAACCGGAGTAGTTGAAATGGTAATAAGACGACCAATCTGACCAGCAAGGAAGCCAAGATGGGAAAGGTCATGCGGCATACGCTCGGCGCCAGTTTGAATATTAGACATAATTTATCCTCAAGTAAGGGGCCGAAGCCCCTGCAATTAAAATTGTTGACCACCTACATACCAAAGACGAGCGCCTTTACGCTTGCCTTTAGTACCTCGCAACGGCTGCGGACGACCAGGGCGAGCGCCAGAACGTTTTTTACCTTTAGACATTACATCACTCCTTCTGCACGTAATTTTTGACGCACGTTTTCTTCTGCGTCAGTAAGAACGTCAGTGTTTCCTGCGCGTACACGCAAGGTAAACGCGAACAATTCAGCGGCTTTAACCGGACGCTCGACGCCATTAATAATGTTTTCCGTAAATTCAGCGCCTTCCATGATGAGACAGGCCGTTTGAATGTTGACGGGATGAACATAATAAGCAATGACGGCAGCAATAAACTCAACAGGAGCAGGAAAGCGAGGGTATCCTACAAAGTCCAGCGTACCATAAACGCAAGCCTCAACGCAGCGACGAGCACGAGAGCGGTCAGTAGCAATCCAAACTTTGTTACTCGTCAGAAAATCGAAATCATCTTCGGTTAAATCCAAAACGGCAGAAGCCTGAATGAGCTTAATAGAGGCCAAAGCGGTCTGGATTGCTACCCGCTTCATACGGGGGGCCGTGTTAGGAAATATACGAGA
3 |
--------------------------------------------------------------------------------