├── .gitignore
├── CHANGELOG.md
├── ECLIP-VERSION-0.7.0
├── LICENSE
├── README.md
├── bin
    ├── __init__.py
    ├── annotate_peaks_bedformat_wproxdistal_lncRNA.pl
    ├── barcodecollapsepe.py
    ├── bed_to_narrowpeak.py
    ├── calculate_entropy.py
    ├── combine_ReadsByLoc_files.pl
    ├── compress_l2foldenrpeakfi_for_replicate_overlapping_bedformat.pl
    ├── convert_ReadsByLoc_combined_significancecalls.pl
    ├── count_reads_broadfeatures_frombamfi_PEmap_lncRNA.pl
    ├── count_reads_broadfeatures_frombamfi_SEmap_lncRNA.pl
    ├── fix_bed_for_bigbed_conversion.py
    ├── generate_adaptertrim_fasta.ipynb
    ├── overlap_peakfi_with_bam.pl
    ├── overlap_peakfi_with_bam_PE.pl
    └── parsebarcodes.sh
├── cwl
    ├── barcodecollapse_pe.cwl
    ├── barcodecollapse_se.cwl
    ├── barcodecollapse_se_nostats.cwl
    ├── bed_to_bigbed.cwl
    ├── bed_to_narrowpeak.cwl
    ├── blacklist-remove.cwl
    ├── calculate_entropy.cwl
    ├── clipper.cwl
    ├── demux_pe.cwl
    ├── demux_se.cwl
    ├── demux_targeted_mir_se.cwl
    ├── fastqc.cwl
    ├── fastqsort.cwl
    ├── file2string.cwl
    ├── file2stringArray.cwl
    ├── fix_bed_for_bigbed_conversion.cwl
    ├── gzip.cwl
    ├── index.cwl
    ├── makebigwigfiles.cwl
    ├── makebigwigfiles_PE.cwl
    ├── makebigwigfiles_SE.cwl
    ├── namesort.cwl
    ├── overlap_peakfi_with_bam.cwl
    ├── overlap_peakfi_with_bam_PE.cwl
    ├── parsebarcodes.cwl
    ├── peakscompress.cwl
    ├── rename.cwl
    ├── samtools-index.cwl
    ├── samtools-mappedreadnum.cwl
    ├── samtools-merge.cwl
    ├── samtools-view.cwl
    ├── samtools-viewr2.cwl
    ├── sort-bed.cwl
    ├── sort.cwl
    ├── star-genome.cwl
    ├── star-repeatmapping.cwl
    ├── star.cwl
    ├── trim_pe.cwl
    ├── trim_se.cwl
    ├── trim_umi.cwl
    ├── wf_clipseqcore_chimeric_se_1barcode.cwl
    ├── wf_clipseqcore_nostats_se_1barcode.cwl
    ├── wf_clipseqcore_pe_1barcode.cwl
    ├── wf_clipseqcore_pe_1barcode_nodemux.cwl
    ├── wf_clipseqcore_pe_2barcodes.cwl
    ├── wf_clipseqcore_pe_2barcodes_nodemux.cwl
    ├── wf_clipseqcore_se_1barcode.cwl
    ├── wf_clipseqcore_trim_partial_se_1barcode.cwl
    ├── wf_demultiplex_pe.cwl
    ├── wf_demultiplex_se.cwl
    ├── wf_encode_se_full.cwl
    ├── wf_encode_se_full_nostats.cwl
    ├── wf_encode_se_full_scatter.cwl
    ├── wf_encode_se_full_scatter_nostats.cwl
    ├── wf_encode_se_just_repmap.cwl
    ├── wf_fastqc.cwl
    ├── wf_get_peaks_chimeric_se.cwl
    ├── wf_get_peaks_nostats_se.cwl
    ├── wf_get_peaks_pe.cwl
    ├── wf_get_peaks_scatter_chimeric_se.cwl
    ├── wf_get_peaks_scatter_pe.cwl
    ├── wf_get_peaks_scatter_se.cwl
    ├── wf_get_peaks_scatter_se_nostats.cwl
    ├── wf_get_peaks_se.cwl
    ├── wf_get_peaks_trim_partial_scatter_se.cwl
    ├── wf_get_peaks_trim_partial_se.cwl
    ├── wf_trim_and_map_chimeric_se.cwl
    ├── wf_trim_and_map_pe.cwl
    ├── wf_trim_and_map_se.cwl
    ├── wf_trim_and_map_se_nostats.cwl
    ├── wf_trim_partial_and_map_se.cwl
    └── wf_trim_partial_and_map_se_scatter.cwl
├── documentation
    ├── Repeat_mapping.pdf
    ├── Reproducible_peaks.pdf
    ├── Zero_to_peaks.pdf
    ├── eCLIP_analysisSOP_v2.0.pdf
    ├── eCLIP_analysisSOP_v2.2.1.docx
    ├── eCLIP_analysisSOP_v2.2.docx
    └── eCLIP_single_end_analysisSOP_v1.docx
├── eCLIP-flowchart.png
├── example
    ├── inputs
    │   ├── ENCFF039QTN.bed
    │   ├── ENCFF269URO.bed
    │   ├── InvRNA1_adapters.fasta
    │   ├── InvRNA2_adapters.fasta
    │   ├── InvRNA3_adapters.fasta
    │   ├── InvRNA4_adapters.fasta
    │   ├── InvRNA5_adapters.fasta
    │   ├── InvRNA6_adapters.fasta
    │   ├── InvRNA7_adapters.fasta
    │   ├── InvRNA8_adapters.fasta
    │   ├── InvRil19_adapters.yaml
    │   ├── example_fastqs
    │   │   ├── chrom19kbp550_clip1_r1.fastq.gz
    │   │   ├── chrom19kbp550_clip1_r2.fastq.gz
    │   │   ├── chrom19kbp550_clip2_r1.fastq.gz
    │   │   ├── chrom19kbp550_clip2_r2.fastq.gz
    │   │   ├── chrom19kbp550_input_r1.fastq.gz
    │   │   └── chrom19kbp550_input_r2.fastq.gz
    │   ├── hg113seqs_repbase_starindex
    │   │   ├── Genome
    │   │   ├── SA
    │   │   ├── SAindex
    │   │   ├── chrLength.txt
    │   │   ├── chrName.txt
    │   │   ├── chrNameLength.txt
    │   │   ├── chrStart.txt
    │   │   ├── genomeParameters.txt
    │   │   └── small_repelements.fa
    │   ├── hg19.chrom.sizes
    │   ├── hg19chr19.chrom.sizes
    │   ├── hg19chr19kbp550_starindex
    │   │   ├── Genome
    │   │   ├── SA
    │   │   ├── SAindex
    │   │   ├── chr19_550000bases.fa
    │   │   ├── chrLength.txt
    │   │   ├── chrName.txt
    │   │   ├── chrNameLength.txt
    │   │   ├── chrStart.txt
    │   │   └── genomeParameters.txt
    │   └── yeolabbarcodes_20170101.fasta
    ├── paired_end_clip.yaml
    └── single_end_clip.yaml
├── tests
    └── eCLIP-0.7.0
    │   ├── 01_umi_tools_extract
    │       └── run_demux_se.sh
    │   ├── 02_cutadapt_round1
    │       └── run_cutadapt.sh
    │   ├── 03_cutadapt_round2
    │       └── run_cutadapt.sh
    │   ├── 04_fastq_sort
    │       └── run_fastq-sort.sh
    │   ├── 05_star_repeat
    │       └── run_star.sh
    │   ├── 06_star_genome
    │       └── run_star.sh
    │   ├── 07_sort
    │       └── run_sort.sh
    │   ├── 08_umi_tools_dedup
    │       └── run_umitools.sh
    │   ├── 09_clipper
    │       ├── run_204_01_RBFOX2_clipper.sh
    │       ├── run_4020_CLIP1_clipper.sh
    │       └── run_clipper.sh
    │   ├── 10_normalize
    │       └── run_input_norm.sh
    │   ├── wf_clipseqcore_pe_2barcodes
    │       └── wf_clipseqcore_2bc.sh
    │   └── wf_clipseqcore_se_1barcode
    │       └── wf_clipseqcore_1bc.sh
├── wf
    ├── README.md
    ├── eCLIP_pairedend
    ├── eCLIP_pairedend_singlenode
    ├── eCLIP_singleend
    ├── eCLIP_singleend_encode
    └── eCLIP_singleend_singlenode
└── wf_debug
    ├── eCLIP_singleend_encode_nostats
    ├── eCLIP_singleend_nostats
    ├── eCLIP_singleend_singlenode_nostats
    └── eCLIP_singleend_trim_partial


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | .static_storage/
 56 | .media/
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | #
107 | archived*
108 | 
109 | #
110 | *DS_Store
111 | 
112 | #
113 | .ipynb_checkpoints/
114 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 5 | 
 6 | ## [0.7.0] - 2020-10-09
 7 | ### Added
 8 | - Extra step to calculate total entropy
 9 | 
10 | ### Changed
11 | - Updated clipper.cwl to "Clipper3" () and include the latest ENCODE annotations (GRCh38_v29e). Also removed pickle intermediates, although this has never made it to the final outputs. 
12 | - Updated star*.cwl STAR to version 2.7.6, fixes a bug that produces non-ascii characters
13 | 
14 | ### 
15 | 
16 | ## [0.6.0a] - 2020-08-20
17 | ### Added
18 | - The core pipeline (wf_get_peaks_scatter_se.cwl and wf_get_peaks_scatter_pe.cwl) *should* now be fully portable on AWS.
19 | - Slight modifications to README (updated references)
20 | 
21 | ### Fixed
22 | - Updated adapter examples (was missing one base in the last adapter)
23 | 
24 | ## [Unreleased 0.5.99] - 2020-06-24
25 | ### Added
26 | - Added docker requirement definitions to most commandlinetools.
27 | - Added the following companion workflows:
28 |   - wf_encode_se_full_nostats "full encode workflow (eCLIP + repeat mapping + region normalization)" minus umi_tools --stats (to save memory)
29 |   - wf_encode_se_full_scatter_nostats "full encode workflow" minus umi_tools --nostats (multiple samples)
30 | - Added the following commandlinetools:
31 |   - fastqc.cwl
32 | - Added the following subworkflows to the main workflow:
33 |   - wf_fastqc.cwl essentially fastqc.cwl + rename.cwl (so fastqc files won't override each other)
34 | - Added a 'blacklist_file' required param to the following workflows
35 |   - wf_get_peaks_scatter_se_nostats.cwl
36 |   - wf_get_peaks_trim_partial_scatter_se.cwl
37 |   - wf_get_peaks_trim_partial_se.cwl
38 | ### Fixed
39 | - Updated workflows to report uniquely-named fastqc reports so they don't override each other.
40 | - (unused in main pipeline) convert_ReadsByLoc_combined_significancecalls.pl now matches current region normalization script
41 | - (unused in main pipeline) duplicate_removal_inline_paired_count_region_other_reads_SE.pl now matches current repeat element scripts
42 | - (unused in main pipeline) split_bam_to_subfiles_SE.pl now matches current repeat element scripts
43 | 
44 | ## [Unreleased 0.5.0] - 2020-02-21
45 | ### Changed
46 | - Version bumped to 0.5.0
47 | 
48 | ### Added
49 | - Added the following steps to the main single-end pipeline:
50 |   - sort_bed (sorts input normalized bed file)
51 |   - blacklist remove (removes blacklisted regions from peak file)
52 |   - bed to narrowPeak (converts peak bed file to narrowPeak format)
53 |   - fix bed ("fixes" a peak bed file format such that it is compatible with bedToBigBed)
54 |   - bed to bigbed (calls bedToBigBed to convert peak bed file to bigBed format)
55 | - Added a 'nostats' workflow in 'wf/' to optionally run the pipeline without requiring umi_tools stats generation. This dramatically cuts down on runtime/mem reqs
56 | - Added pre/post processing scripts (annotate_peaks_bedformat_wproxdistal_lncRNA.pl & generate_adaptertrim_fasta.ipynb)
57 |   - annotate_peaks_bedformat_wproxdistal_lncRNA.pl (perl script that annotates bed files)
58 |   - generate_adaptertrim_fasta.ipynb (jupyter notebook that generates fasta files w/ partial adapter sequences to trim)
59 | 
60 | ## [0.4.0] - 2019-03-25
61 | ### Changed
62 | - YAML metadata changes slightly to account for each dataset to potentially have its own adapter sequences
63 | 
64 | ## [0.3.0] - 2019-03-05
65 | - There is some work done to make the SE pipeline outputs deterministic. Outputs should be the same every time.
66 | - Introducing a "wf_encode_full" workflow that combines the peak calling workflow, the repeat mapping workflow (hg19 only), and region-level normalization workflow
67 | - The previous manifests (eCLIP-0.2.2) for eCLIP_pairedend and eCLIP_singleend should still work.
68 | 
69 | ### Added
70 | - gzip step for all fastq files
71 | - added ```arguments: ["--random-seed", "1"]``` to barcodecollapse_se and demux_se definitions to decrease randomness in umi_tools outputs
72 | - added an "wf_encode_se_full" and "wf_encode_se_full_scatter" cwl definitions to run 1) peak finding, 2) region level normalization, 3) repeat mapping for SE reads.
73 | - region normalization subworkflow (regionnormalize/) cwl definitions to incorporate region level normalization
74 | - repeat mapping subworkflow (repmap/) cwl definitions to incorporate repeat mapping
75 | 
76 | ### Changed
77 | - makebigwigs script is now split into _PE and _SE due to strand flipping
78 | - repeat-mapped reads now are named dataset.readname.umi.r1.repeat-mapped.bam (instead of dataset.readname.umi.r1TrTr.sorted.STARAligned.out.bam)
79 | - repeat-unmapped reads are now named dataset.readname.umi.r1.repeat-unmapped.sorted.fq (instead of dataset.readname.umi.r1TrTr.sorted.STARUnmapped.out.sorted.fq.gz)
80 | - genome-mapped reads now are named dataset.readname.umi.r1.genome-mapped.bam (instead of dataset.readname.umi.r1TrTr.sorted.STARUnmapped.out.sorted.STARAligned.outSo.rmDupSo.bam)
81 | - wf_trim_and_map_se.cwl now outputs gzipped X_output_trim_first and X_output_trim_again fastq files.
82 | 
83 | [Unreleased]: https://github.com/yeolab/eclip...HEAD
84 | 
85 | 


--------------------------------------------------------------------------------
/ECLIP-VERSION-0.7.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/ECLIP-VERSION-0.7.0


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | This software is Copyright © 2022 The Regents of the University of California. 
 3 | All Rights Reserved. 
 4 | 
 5 | Permission to copy, modify, and distribute this software and its documentation 
 6 | for educational, research and non-profit purposes, without fee, and without a 
 7 | written agreement is hereby granted, provided that the above copyright notice, 
 8 | this paragraph and the following three paragraphs appear in all copies. 
 9 | 
10 | Permission to make commercial use of this software may be obtained by contacting:
11 | 
12 |    Office of Innovation & Commercialization
13 |    9500 Gilman Drive, Mail Code 0910
14 |    University of California
15 |    La Jolla, CA 92093-0910
16 |    (858) 534-5815
17 |    innovation@ucsd.edu
18 | 
19 | This software program and documentation are copyrighted by The Regents of the 
20 | University of California. The software program and documentation are supplied 
21 | ìas isî, without any accompanying services from The Regents. The Regents does 
22 | not warrant that the operation of the program will be uninterrupted or 
23 | error-free. The end-user understands that the program was developed for 
24 | research purposes and is advised not to rely exclusively on the program for 
25 | any reason.
26 | 
27 | IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR 
28 | DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING 
29 | LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, 
30 | EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 
31 | SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY 
32 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 
33 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED 
34 | HEREUNDER IS ON AN ìAS ISî BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO 
35 | OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR 
36 | MODIFICATIONS.
37 | 


--------------------------------------------------------------------------------
/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/bin/__init__.py


--------------------------------------------------------------------------------
/bin/barcodecollapsepe.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | 
  4 | """
  5 | barcodecollapsepe.py
  6 | 
  7 | Created by Gabriel Pratt
  8 | 
  9 | reads in a .bam file where the first 9 nt of the read name are the barcode
 10 | and merge reads mapped to the same position that have the same barcode
 11 | """
 12 | 
 13 | 
 14 | from __future__ import print_function
 15 | 
 16 | 
 17 | from collections import Counter
 18 | import itertools
 19 | from optparse import OptionParser  # TODO replace with argparse
 20 | import sys
 21 | import pysam
 22 | 
 23 | 
 24 | help_message = """
 25 | barcodecollapse_pe reads in a .bam file where the first 9 nt of the read name
 26 | are the barcode and merge reads mapped to the same position that have the same
 27 | barcode
 28 | """
 29 | 
 30 | 
 31 | def stranded_read_start(read):
 32 |     if read.is_reverse:
 33 |         return read.positions[-1]
 34 |     else:
 35 |         return read.pos
 36 | 
 37 | 
 38 | def output_metrics(metrics_file, total_count, removed_count):
 39 |     with open(metrics_file, 'w') as metrics:
 40 |         metrics.write("\t".join(["randomer",
 41 |                                  "total_count",
 42 |                                  "removed_count"])
 43 |                       + "\n")
 44 |         for barcode in total_count.keys():
 45 |             metrics.write("\t".join(map(str, [barcode,
 46 |                                               total_count[barcode],
 47 |                                               removed_count[barcode]]))
 48 |                           + "\n")
 49 | 
 50 | 
 51 | def barcode_collapse(in_bam, out_bam):
 52 |     number_of_unmapped_mate_pairs = 0
 53 |     different_chroms = 0
 54 |     removed_count = Counter()
 55 |     total_count = Counter()
 56 |     result_dict = {}
 57 | 
 58 |     # reads in 2 copies of in_bam
 59 |     with pysam.Samfile(in_bam, 'r') as samfile1:
 60 |         with pysam.Samfile(in_bam, 'r') as samfile2:
 61 | 
 62 |             samfile_read1 = itertools.islice(samfile1, 0, None, 2)
 63 |             samfile_read2 = itertools.islice(samfile2, 1, None, 2)
 64 |             for read1, read2 in itertools.izip(samfile_read1, samfile_read2):
 65 |                 if not read1.qname == read2.qname:
 66 |                     print(read1.qname, read2.qname)
 67 |                     raise Exception("Read Names don't match")
 68 |                 if read1.is_unmapped and read1.is_unmapped:
 69 |                     #Both reads don't map, don't even both saving them.
 70 |                     continue
 71 |                 if ((not read1.is_unmapped and read2.is_unmapped)
 72 |                     or (read1.is_unmapped and read2.is_unmapped)):
 73 |                     number_of_unmapped_mate_pairs += 1
 74 |                     continue
 75 |                 if read1.rname != read2.rname:
 76 |                     different_chroms += 1
 77 |                     continue
 78 | 
 79 |                 #if the read order is swapped swap everything before running.
 80 |                 if not read1.is_read1:
 81 |                     read1, read2 = read2, read1
 82 | 
 83 |                 randomer = read1.qname.split(":")[0]
 84 | 
 85 |                 start = stranded_read_start(read1)
 86 |                 stop = stranded_read_start(read2)
 87 |                 # read1.is_read1
 88 |                 strand = "-" if read1.is_reverse else "+"
 89 |                 unique_location = (read1.rname, start, stop, strand, randomer)
 90 | 
 91 |                 # increment appropriate counter
 92 |                 total_count[randomer] += 1
 93 |                 if unique_location in result_dict:
 94 |                     removed_count[randomer] += 1
 95 |                     continue
 96 | 
 97 |                 result_dict[(read1.rname, start, stop, strand, randomer)] = (read1, read2)
 98 | 
 99 |         # ouput barcode collapsed reads
100 |         with pysam.Samfile(out_bam, 'wb', template=samfile1) as out_bam:
101 |             for key, (read1, read2) in result_dict.items():
102 |                 out_bam.write(read1)
103 |                 out_bam.write(read2)
104 | 
105 |     return total_count, removed_count
106 | 
107 | 
108 | def main():
109 |     description = """Paired End randomer aware duplciate removal algorithm."""
110 |     usage  = """
111 | Assumes paired end reads are adjacent in output file (ie needs unsorted bams)
112 | Also assumes no multimappers in the bam file (otherwise behavior is undefined)
113 | """
114 |     parser = OptionParser(usage=usage, description=description)
115 |     parser.add_option("-b", "--bam",
116 |                       dest="bam",
117 |                       help="bam file to barcode collapse")
118 |     parser.add_option("-o", "--out_file",
119 |                       dest="out_file")
120 |     parser.add_option("-m", "--metrics_file",
121 |                       dest="metrics_file")
122 |     (options, args) = parser.parse_args()
123 | 
124 |     if not (options.bam.endswith(".bam")):
125 |         raise TypeError("%s, not bam file" % options.bam)
126 | 
127 |     total_count, removed_count = barcode_collapse(options.bam, options.out_file)
128 |     output_metrics(options.metrics_file, total_count, removed_count)
129 | 
130 |     sys.exit(0)
131 | 
132 | 
133 | if __name__ == "__main__":
134 |     main()
135 | 


--------------------------------------------------------------------------------
/bin/bed_to_narrowpeak.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | narrowPeak, 
 4 | cols 9 and 10 are just blank, 
 5 | col 5 is 1000 for things that meet the >=3 l2fc and l10pval cutoffs and 200 otherwise (its just for ucsc track coloring)
 6 | """
 7 | import numpy as np
 8 | import pandas as pd
 9 | import argparse
10 | import os
11 | 
12 | ECLIP_HEADER = [
13 |     'chrom','start','end','pValue','signalValue','strand'
14 | ]
15 | 
16 | def score_encode(row):
17 |     if row['pValue'] >= 3 and row['signalValue'] >= 3:
18 |         return 1000
19 |     else:
20 |         return 200
21 | 
22 | def return_narrowpeak_header(bed, species, visibility=3):
23 |     name = os.path.basename(bed)
24 |     description = name + " input-normalized peaks"
25 |     header = 'track type=narrowPeak visibility={} db={} name=\"{}\" description=\"{}\"'.format(
26 |         visibility, species, name, description
27 |     )
28 |     return header
29 |     
30 | def bed_to_narrowpeak(bed, species, narrowpeak):
31 |     
32 |     peaks = pd.read_csv(bed, names=ECLIP_HEADER, sep='\t')
33 |     peaks['name'] = '.'
34 |     peaks['score'] = peaks.apply(score_encode, axis=1)
35 |     peaks['qValue'] = -1
36 |     peaks['peak'] = -1
37 |     
38 |     with open(narrowpeak, 'w') as f:
39 |         f.write("{}\n".format(return_narrowpeak_header(bed, species)))
40 |     with open(narrowpeak, 'a') as f:
41 |         peaks[[
42 |             'chrom','start','end','name','score','strand','signalValue','pValue','qValue','peak'
43 |         ]].to_csv(
44 |             f,
45 |             sep='\t',
46 |             header=False,
47 |             index=False
48 |         )
49 |     
50 |     
51 | def main():
52 |     parser = argparse.ArgumentParser()
53 | 
54 |     parser.add_argument(
55 |         "--input_bed",
56 |         required=True,
57 |     )
58 |     parser.add_argument(
59 |         "--species",
60 |         required=True,
61 |     )
62 |     parser.add_argument(
63 |         "--output_narrowpeak",
64 |         required=True,
65 |     )
66 |     
67 |     # Process arguments
68 |     args = parser.parse_args()
69 |     bed = args.input_bed
70 |     species = args.species
71 |     narrowpeak = args.output_narrowpeak
72 |     
73 |     # Hack to get around the hg19/38 -> GRCh37/38 ucsc schema.
74 |     if species.upper() == 'GRCH37' or species.upper().startswith('GRCH37'):
75 |         species = 'hg19'
76 |     elif species.upper() == 'GRCH38' or species.upper().startswith('GRCH38'):
77 |         species = 'hg38'
78 |         
79 |     # main func
80 |     bed_to_narrowpeak(bed, species, narrowpeak)
81 |     
82 | if __name__ == "__main__":
83 |     main()
84 | 


--------------------------------------------------------------------------------
/bin/calculate_entropy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | import argparse
  6 | import os
  7 | 
  8 | FULL_HEADER = [
  9 |     'chrom','start','end','peak','ip_count','input_count',
 10 |     'pvalue','chivalue','chitype','isenriched','l10p','l2fc'
 11 | ]
 12 | 
 13 | def entropy(row, ip_mapped_num, input_mapped_num):
 14 |     """
 15 |     Computes the entropy for a given peak (row).
 16 |     Uses the number of reads and number of total mapped reads 
 17 |     to 
 18 |     """
 19 |     pip = float(row['ip_count']/float(ip_mapped_num))
 20 |     pinp = float(row['input_count']/float(input_mapped_num))
 21 |     return pip * np.log2(pip/pinp)
 22 |     
 23 | def sum_entropy(full, ip_mapped, input_mapped, l10p, l2fc):
 24 |     """
 25 |     Computes the entropy 
 26 |     """
 27 |     try:
 28 |         with open(ip_mapped, 'r') as f:
 29 |             ip_mapped_num = int(f.readline().rstrip())
 30 |         with open(input_mapped, 'r') as f:
 31 |             input_mapped_num = int(f.readline().rstrip())
 32 | 
 33 |         peaks = pd.read_csv(full, names=FULL_HEADER, sep='\t')
 34 |         peaks = peaks[(peaks['l10p'] >= l10p) & (peaks['l2fc'] >= l2fc)]
 35 |         peaks['entropy'] = peaks.apply(entropy, args=(ip_mapped_num, input_mapped_num, ), axis=1)
 36 | 
 37 |         return peaks['entropy'].sum()
 38 |     except Exception as e:
 39 |         return e
 40 |     
 41 | def main():
 42 |     parser = argparse.ArgumentParser()
 43 | 
 44 |     parser.add_argument(
 45 |         "--full",
 46 |         required=True,
 47 |     )
 48 |     parser.add_argument(
 49 |         "--ip_mapped",
 50 |         required=True,
 51 |     )
 52 |     parser.add_argument(
 53 |         "--input_mapped",
 54 |         required=True,
 55 |     )
 56 |     parser.add_argument(
 57 |         "--l10p",
 58 |         required=False,
 59 |         default=3,
 60 |         help='Only consider peaks at or above this -log10p-value cutoff.'
 61 |     )
 62 |     parser.add_argument(
 63 |         "--l2fc",
 64 |         required=False,
 65 |         default=3,
 66 |         help='Only consider peaks at or above this log2 fold change cutoff.'
 67 |     )
 68 |     parser.add_argument(
 69 |         "--output",
 70 |         required=False,
 71 |         default=None,
 72 |         help='Write to file, default: stdout'
 73 |     )
 74 |     # Process arguments
 75 |     args = parser.parse_args()
 76 |     
 77 |     full = args.full
 78 |     ip_mapped = args.ip_mapped
 79 |     input_mapped = args.input_mapped
 80 |     l10p = args.l10p
 81 |     l2fc = args.l2fc
 82 |     output = args.output
 83 |     
 84 |     # main func
 85 |     summed_entropy = sum_entropy(
 86 |         full=full, 
 87 |         ip_mapped=ip_mapped, 
 88 |         input_mapped=input_mapped, 
 89 |         l10p=l10p, 
 90 |         l2fc=l2fc
 91 |     )
 92 |     if output is None:
 93 |         print(summed_entropy)
 94 |     else:
 95 |         with open(output, 'w') as o:
 96 |             o.write("{}".format(summed_entropy))
 97 |         
 98 | if __name__ == "__main__":
 99 |     main()
100 | 


--------------------------------------------------------------------------------
/bin/combine_ReadsByLoc_files.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use warnings;
 4 | use strict;
 5 | 
 6 | my $line_size = 14;
 7 | my @files = @ARGV;
 8 | 
 9 | my %hash;
10 | for my $fi (@files) {
11 |     open(F,$fi);
12 |     for my $line (<F>) {
13 | 	chomp($line);
14 | 	my @tmp = split(/\t/,$line);
15 | 	my $ensg = shift(@tmp);
16 | 	next if ($ensg eq "all");
17 | 
18 | 	$hash{$ensg}{$fi} = join("\t",@tmp);
19 | 	if (scalar(@tmp) == $line_size) {
20 | 	} else {
21 | 	    print STDERR "changing line_size to ".scalar(@tmp)."\n";
22 | 	    $line_size = scalar(@tmp);
23 | 	}
24 |     }
25 |     close(F);
26 | }
27 | 
28 | print "ENSG\t";
29 | for my $fi (@files) {
30 |     print "$fi|".$hash{"ENSG"}{$fi}."\t";
31 | }
32 | print "\n";
33 | 
34 | for my $k (keys %hash) {
35 |     next if ($k eq "ENSG");
36 |     print "$k\t";
37 |     for my $fi (@files) {
38 | 	unless (exists $hash{$k}{$fi}) {
39 | 	    $hash{$k}{$fi} = "NaN";
40 | 	    for my $i (1..($line_size-1)) {
41 | 		$hash{$k}{$fi} .= "\tNaN";
42 | 	    }
43 | 	}
44 | #	$hash{$k}{$fi} = "0\t0\t0\t0\t0\t0\t0\t0" unless (exists $hash{$k}{$fi});
45 | 	print "$hash{$k}{$fi}\t";
46 |     }
47 |     print "\n";
48 | }
49 | 


--------------------------------------------------------------------------------
/bin/compress_l2foldenrpeakfi_for_replicate_overlapping_bedformat.pl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | 
  3 | use warnings;
  4 | use strict;
  5 | 
  6 | ### Note 2015/11/12: fixed so that all bed formats are ucsc format (0-based, open ended)
  7 | 
  8 | ## this is the first version - keeps MOST significant peak if two overlap
  9 | my $hashing_value = 100000;
 10 | 
 11 | # uses l2foldenr peak files
 12 | 
 13 | my $fi = $ARGV[0];
 14 | my $output_fi = $ARGV[1];
 15 | # my $output_fi = $fi.".compressed.bed";
 16 | open(O,">$output_fi");
 17 | 
 18 | my %peaks2size;
 19 | my %peaks2l2fenr;
 20 | my %peaks2l10p;
 21 | my %peaks2start;
 22 | my %read_hash;
 23 | my %peak_hash;
 24 | &readfi($fi);
 25 | 
 26 | my %overlap_hash;
 27 | #for my $chr ("chr10") {
 28 | for my $chr (keys %read_hash) {
 29 |     for my $str ("+","-") {
 30 | #	print STDERR "\non $chr $str\n";
 31 | 
 32 | 	my %deleted_peaks;
 33 | 	my %kept_peaks;
 34 | 
 35 | #	my @sorted_peaks = sort {$peaks2l10p{$chr}{$str}{$fi}{$b} <=> $peaks2l10p{$chr}{$str}{$fi}{$a}} keys %{$peaks2l10p{$chr}{$str}{$fi}};
 36 | 	my @sorted_peaks = sort {$peaks2l10p{$chr}{$str}{$fi}{$b} <=> $peaks2l10p{$chr}{$str}{$fi}{$a} or $peaks2l2fenr{$chr}{$str}{$fi}{$b} <=> $peaks2l2fenr{$chr}{$str}{$fi}{$a} or $peaks2size{$chr}{$str}{$fi}{$b} <=> $peaks2size{$chr}{$str}{$fi}{$a} or $peaks2start{$chr}{$str}{$fi}{$b} <=> $peaks2start{$chr}{$str}{$fi}{$a}} keys %{$peaks2l10p{$chr}{$str}{$fi}};
 37 | 
 38 | 	my $i=0;
 39 | 
 40 | 	for my $peak1 (@sorted_peaks) {
 41 | 	    my $verbose_flag = 0;
 42 | #	while ($i < scalar(@sorted_peaks)) {
 43 | ## now take any peaks that overlap and merge them
 44 | #	    my $peak1 = $sorted_peaks[$i];
 45 | 
 46 | 	    next if (exists $deleted_peaks{$peak1});
 47 | #	    print STDERR "re-checking $peak1\r";
 48 | 
 49 | #	    my $peak_id = $chr.":".$start."-".$stop.":".$str.":".$vsinput_l10p.":".$vsinput_l2fenr;
 50 | 
 51 | 	    my ($p1chr,$p1pos,$p1str,$p1vsinput_l10p,$p1vsinput_l2fenr) = split(/\:/,$peak1);
 52 | 	    my ($p1start,$p1stop) = split(/\-/,$p1pos);
 53 | 
 54 | 	    my $p1x = int($p1start / $hashing_value);
 55 | 	    my $p1y = int( $p1stop / $hashing_value);
 56 | 
 57 | 	    for my $p1i ($p1x..$p1y) {
 58 | 		for my $tocomp_peak (@{$read_hash{$chr}{$str}{$fi}{$p1i}}) {
 59 | 		    print STDERR "comparing $peak1 $tocomp_peak\n" if ($verbose_flag == 1);
 60 | 		    next if (exists $deleted_peaks{$tocomp_peak});
 61 | 		    next if ($tocomp_peak eq $peak1);
 62 | 
 63 | 		    my ($p2compchr,$p2comppos,$p2compstr,$p2compvsinput_l10p,$p2compvsinput_l2fenr) = split(/\:/,$tocomp_peak);
 64 | 		    my ($p2compstart,$p2compstop) = split(/\-/,$p2comppos);
 65 | 
 66 | 		    next if ($p2compstop <= $p1start);
 67 | 		    next if ($p1stop <= $p2compstart);
 68 | 
 69 | 		    #peak2 overlaps with peak1 and has a lower l10pval - remove it!
 70 | 		    if ($p1vsinput_l10p >= $p2compvsinput_l10p) {
 71 | 			print STDERR "discarding $tocomp_peak vs $peak1\n" if ($verbose_flag == 1);
 72 | 			$deleted_peaks{$tocomp_peak} = 1;
 73 | 		    } elsif ($p1vsinput_l10p < $p2compvsinput_l10p) {
 74 | 			$deleted_peaks{$peak1} = 1;
 75 | 			print STDERR "discarding $peak1 vs $tocomp_peak\n" if ($verbose_flag == 1);
 76 | 		    } else {
 77 | 			print STDERR "weird error shouldn't happen $peak1\n";
 78 | 		    }
 79 | 		}
 80 | 	    }
 81 | 	}
 82 | 	
 83 | 	for my $peak (@sorted_peaks) {
 84 | 	    next if (exists $deleted_peaks{$peak});
 85 | 
 86 | 	    my ($p1chr,$p1pos,$p1str,$p1vsinput_l10p,$p1vsinput_l2fenr) = split(/\:/,$peak);
 87 |             my ($p1start,$p1stop) = split(/\-/,$p1pos);
 88 | 	    print O "$p1chr\t$p1start\t$p1stop\t$p1vsinput_l10p\t$p1vsinput_l2fenr\t$p1str\n";
 89 | 
 90 | 	}
 91 |     }
 92 | }
 93 | close(O);
 94 | 
 95 | 
 96 | sub min {
 97 |     my $x = shift;
 98 |     my $y = shift;
 99 |     
100 |     if ($x < $y) {
101 | 	return($x);
102 |     } else {
103 | 	return($y);
104 |     }
105 | }
106 | 
107 | sub max {
108 |     my $x = shift;
109 |     my $y = shift;
110 | 
111 |     if ($x > $y) {
112 | 	return($x);
113 |     } else {
114 | 	return($y);
115 |     }
116 | }
117 | 
118 | sub readfi {
119 |     my $fi = shift;
120 |     open(F,$fi);
121 |     for my $line (<F>) {
122 | 	chomp($line);
123 | 
124 | 	my @tmp = split(/\t/,$line);
125 | 
126 | 	my $chr = $tmp[0];
127 | 	my $str = $tmp[5];
128 | 	my $start = $tmp[1];
129 | 	my $stop = $tmp[2];
130 |         my $vsinput_l10p = $tmp[3];
131 |         my $vsinput_l2fenr = $tmp[4];
132 | #	my ($chr,$pos,$str,$orig_pval) = split(/\:/,$tmp[0]);
133 | #	my ($start,$stop) = split(/\-/,$pos);
134 | 
135 | #### Gabe's peaks are open-ended on right side; this fixes that issue (so peak from 1-10 actually covers bases 1-10, not 1-9
136 | # removed 2015/11/12
137 | #	$stop = $stop - 1;
138 | 
139 | #	my ($chr,$start,$stop,$ens_id,$pval,$str,$start2,$stop2) = split(/\t/,$line);
140 | 
141 | 	my $peak_id = $chr.":".$start."-".$stop.":".$str.":".$vsinput_l10p.":".$vsinput_l2fenr;
142 | 
143 | 
144 | 	push @{$peak_hash{$chr}{$str}{$fi}},$peak_id;
145 | 	$peaks2start{$chr}{$str}{$fi}{$peak_id} = $start;
146 | 	$peaks2l10p{$chr}{$str}{$fi}{$peak_id} = $vsinput_l10p;
147 |         $peaks2l2fenr{$chr}{$str}{$fi}{$peak_id} = $vsinput_l2fenr;
148 | 	$peaks2size{$chr}{$str}{$fi}{$peak_id} = $stop-$start;
149 | 
150 | 	my $x = int($start / $hashing_value);
151 | 	my $y = int( $stop / $hashing_value);
152 | 
153 | 	for my $i ($x..$y) {
154 | 	    push @{$read_hash{$chr}{$str}{$fi}{$i}},$peak_id
155 | 
156 | 
157 | 	}
158 | 
159 |     }
160 |     close(F);
161 | }
162 | 


--------------------------------------------------------------------------------
/bin/fix_bed_for_bigbed_conversion.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | narrowPeak, 
 5 | cols 9 and 10 are just blank, 
 6 | col 5 is 1000 for things that meet the >=3 l2fc and l10pval cutoffs and 200 otherwise (its just for ucsc track coloring)
 7 | """
 8 | import numpy as np
 9 | import pandas as pd
10 | import argparse
11 | import os
12 | 
13 | ECLIP_HEADER = [
14 |     'chrom','start','end','pValue','signalValue','strand'
15 | ]
16 | 
17 | def combine_pvalue_fold(row):
18 |     return "{}|{}".format(row['pValue'], row['signalValue'])
19 |     
20 | def fix_bed(bed, fixed_bed):
21 |     
22 |     peaks = pd.read_csv(bed, names=ECLIP_HEADER, sep='\t')
23 |     peaks['name'] = peaks.apply(combine_pvalue_fold, axis=1)
24 |     peaks['score'] = 0
25 |     
26 |     peaks[[
27 |         'chrom','start','end','name','score','strand'
28 |     ]].to_csv(
29 |         fixed_bed,
30 |         sep='\t',
31 |         header=False,
32 |         index=False
33 |     )
34 |     
35 |     
36 | def main():
37 |     parser = argparse.ArgumentParser()
38 | 
39 |     parser.add_argument(
40 |         "--input_bed",
41 |         required=True,
42 |     )
43 |     parser.add_argument(
44 |         "--output_fixed_bed",
45 |         required=True,
46 |     )
47 |     
48 |     # Process arguments
49 |     args = parser.parse_args()
50 |     bed = args.input_bed
51 |     output_fixed_bed = args.output_fixed_bed
52 |         
53 |     # main func
54 |     fix_bed(bed, output_fixed_bed)
55 |     
56 | if __name__ == "__main__":
57 |     main()
58 | 


--------------------------------------------------------------------------------
/bin/generate_adaptertrim_fasta.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# This generates the fasta adapter files from a predefined list of adapters\n",
  8 |     "- This is a pre-processing step that generates input adapter seqs (NOT PART OF THE PIPELINE)\n",
  9 |     "- These adapters are to be split into incrementing tiles, which are then used to trim excess adapter sequences with cutadapt.\n",
 10 |     "- We will store these as fasta files for convenience."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 1,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import os\n",
 20 |     "import glob\n",
 21 |     "import pandas as pd\n",
 22 |     "import numpy as np"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "output_dir = '/projects/ps-yeolab4/software/eclip/0.5.0/examples/inputs/'"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "markdown",
 36 |    "metadata": {},
 37 |    "source": [
 38 |     "# Notes from Eric:\n",
 39 |     "```\n",
 40 |     "You had tiles of:\n",
 41 |     "\n",
 42 |     "GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n",
 43 |     "\n",
 44 |     "Need to do tiles of:\n",
 45 |     "\n",
 46 |     "InvRNA1 NNAGCGCTAG A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n",
 47 |     "InvRNA2 NNGATATCGA A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n",
 48 |     "InvRNA3 NNCGCAGACG A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n",
 49 |     "InvRNA4 NNTATGAGTA A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n",
 50 |     "InvRNA5 NNAGGTGCGT A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n",
 51 |     "InvRNA6 NNGAACATAC A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n",
 52 |     "InvRNA7 NNACATAGCG A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n",
 53 |     "InvRNA8 NNGTGCGATA A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n",
 54 |     "```"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 3,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "adapter_sequences = {\n",
 64 |     "    'InvRNA1':\"NNAGCGCTAGAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n",
 65 |     "    'InvRNA2':\"NNGATATCGAAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n",
 66 |     "    'InvRNA3':\"NNCGCAGACGAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n",
 67 |     "    'InvRNA4':\"NNTATGAGTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n",
 68 |     "    'InvRNA5':\"NNAGGTGCGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n",
 69 |     "    'InvRNA6':\"NNGAACATACAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n",
 70 |     "    'InvRNA7':\"NNACATAGCGAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n",
 71 |     "    'InvRNA8':\"NNGTGCGATAAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\"\n",
 72 |     "}"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 4,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "word_len = 15\n",
 82 |     "\n",
 83 |     "for name, sequence in adapter_sequences.items():\n",
 84 |     "    offset = 0\n",
 85 |     "    with open(os.path.join(output_dir, '{}_adapters.fasta'.format(name)), 'w') as f:\n",
 86 |     "        for counter in range(len(sequence) - word_len):\n",
 87 |     "            f.write(\n",
 88 |     "                \">{}_{}\\n{}\\n\".format(\n",
 89 |     "                    name,\n",
 90 |     "                    offset,\n",
 91 |     "                    sequence[offset:offset+word_len]\n",
 92 |     "                )\n",
 93 |     "            )\n",
 94 |     "            offset += 1"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": []
103 |   }
104 |  ],
105 |  "metadata": {
106 |   "kernelspec": {
107 |    "display_name": "python3-essential",
108 |    "language": "python",
109 |    "name": "python3-essential"
110 |   },
111 |   "language_info": {
112 |    "codemirror_mode": {
113 |     "name": "ipython",
114 |     "version": 3
115 |    },
116 |    "file_extension": ".py",
117 |    "mimetype": "text/x-python",
118 |    "name": "python",
119 |    "nbconvert_exporter": "python",
120 |    "pygments_lexer": "ipython3",
121 |    "version": "3.6.7"
122 |   }
123 |  },
124 |  "nbformat": 4,
125 |  "nbformat_minor": 2
126 | }
127 | 


--------------------------------------------------------------------------------
/bin/overlap_peakfi_with_bam_PE.pl:
--------------------------------------------------------------------------------
1 | overlap_peakfi_with_bam.pl


--------------------------------------------------------------------------------
/cwl/barcodecollapse_pe.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | ### doc: "collapses eCLIP barcodes to remove PCR duplicates" ###
 4 | 
 5 | cwlVersion: v1.0
 6 | 
 7 | class: CommandLineTool
 8 | 
 9 | requirements:
10 |   - class: ResourceRequirement
11 |     coresMin: 1
12 |     ramMin: 16000
13 |     
14 | hints:
15 |   - class: DockerRequirement
16 |     dockerPull: brianyee/eclip:0.7.0_python
17 |     
18 | baseCommand: [barcodecollapsepe.py]
19 | 
20 | arguments: [
21 |   "-o",
22 |   $(inputs.input_barcodecollapsepe_bam.nameroot).rmDup.bam,
23 |   "-m",
24 |   $(inputs.input_barcodecollapsepe_bam.nameroot).rmDup.metrics
25 |   ]
26 | 
27 | inputs:
28 | 
29 |   input_barcodecollapsepe_bam:
30 |     type: File
31 | 
32 |     inputBinding:
33 |       position: 1
34 |       prefix: -b
35 |     label: ""
36 |     doc: "input bam to barcode collapse. NOTE: no use for a bai index file!"
37 | 
38 | outputs:
39 | 
40 |   output_barcodecollapsepe_bam:
41 |     type: File
42 |     outputBinding:
43 |       glob: $(inputs.input_barcodecollapsepe_bam.nameroot).rmDup.bam
44 |     label: ""
45 |     doc: "barcode collapseed mappings bam "
46 | 
47 |   output_barcodecollapsepe_metrics:
48 |     type: File
49 |     outputBinding:
50 |       glob: $(inputs.input_barcodecollapsepe_bam.nameroot).rmDup.metrics
51 |     label: ""
52 |     doc: "barcode collapse metrics"
53 | 
54 | doc: |
55 |   This tool wraps barcodecollapsepe.py, a paired-end PCR duplicate removal script
56 |   which reads in a .bam file where the first string left of : split of the read name is the barcode
57 |   and merge reads mapped to the same position that have the same barcode.
58 |   Assumes paired end reads are adjacent in output file (ie needs unsorted bams)
59 |   Also assumes no multimappers in the bam file (otherwise behavior is undefined)
60 |     Usage: python barcodecollapsepe.py --bam BAM --out_file OUT_FILE --metrics_file METRICS_FILE
61 | 


--------------------------------------------------------------------------------
/cwl/barcodecollapse_se.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwl-runner
  2 | 
  3 | ### doc: "collapses eCLIP barcodes to remove PCR duplicates" ###
  4 | 
  5 | cwlVersion: v1.0
  6 | 
  7 | class: CommandLineTool
  8 | 
  9 | requirements:
 10 |   - class: InlineJavascriptRequirement
 11 |   - class: ResourceRequirement
 12 |     coresMin: 1
 13 |     ramMin: 32000
 14 |     
 15 | hints:
 16 |   - class: DockerRequirement
 17 |     dockerPull: brianyee/umi_tools:1.0.0
 18 |     
 19 | baseCommand: [umi_tools, dedup]
 20 | 
 21 | arguments: ["--random-seed", "1"]
 22 | 
 23 | inputs:
 24 | 
 25 |   input_barcodecollapsese_bam:
 26 |     type: File
 27 |     inputBinding:
 28 |       position: 1
 29 |       prefix: -I
 30 |     label: ""
 31 |     doc: "input bam to barcode collapse. NOTE: no use for a bai index file!"
 32 |     secondaryFiles: [.bai]
 33 | 
 34 |   output_stats:
 35 |     default: ""
 36 |     type: string
 37 |     inputBinding:
 38 |       position: 1
 39 |       prefix: --output-stats
 40 |       valueFrom: |
 41 |         ${
 42 |           if (inputs.output_stats == "") {
 43 |             return inputs.input_barcodecollapsese_bam.nameroot;
 44 |           }
 45 |           else {
 46 |             return inputs.output_stats;
 47 |           }
 48 |         }
 49 |     label: ""
 50 |     doc: "stats i guess"
 51 | 
 52 |   method:
 53 |     default: "unique"
 54 |     type: string
 55 |     inputBinding:
 56 |       position: 1
 57 |       prefix: --method
 58 | 
 59 |   collapsed_bam:
 60 |     type: string
 61 |     default: ""
 62 |     inputBinding:
 63 |       position: 2
 64 |       prefix: -S
 65 |       valueFrom: |
 66 |         ${
 67 |           if (inputs.collapsed_bam == "") {
 68 |             return inputs.input_barcodecollapsese_bam.nameroot + ".rmDup.bam";
 69 |           }
 70 |           else {
 71 |             return inputs.collapsed_bam;
 72 |           }
 73 |         }
 74 |     label: ""
 75 |     doc: "input bam to barcode collapse. NOTE: no use for a bai index file!"
 76 | 
 77 | outputs:
 78 | 
 79 |   output_barcodecollapsese_bam:
 80 |     type: File
 81 |     outputBinding:
 82 |       glob: |
 83 |         ${
 84 |           if (inputs.collapsed_bam == "") {
 85 |             return inputs.input_barcodecollapsese_bam.nameroot + ".rmDup.bam";
 86 |           }
 87 |           else {
 88 |             return inputs.collapsed_bam;
 89 |           }
 90 |         }
 91 |     label: ""
 92 |     doc: "barcode collapsed mappings bam "
 93 | 
 94 |   output_barcodecollapsese_metrics:
 95 |     type: File
 96 |     outputBinding:
 97 |       glob: |
 98 |         ${
 99 |           if (inputs.output_stats == "") {
100 |             return inputs.input_barcodecollapsese_bam.nameroot + "_per_umi.tsv";
101 |           }
102 |           else {
103 |             return inputs.output_stats;
104 |           }
105 |         }
106 |     label: ""
107 |     doc: "barcode collapsed mappings stats "
108 | 
109 | doc: |
110 |   The purpose of this command is to deduplicate BAM files based
111 |   on the first mapping co-ordinate and the UMI attached to the read.
112 |   It is assumed that the FASTQ files were processed with extract_umi.py
113 |   before mapping and thus the UMI is the last word of the read name. e.g:
114 | 
115 |   @HISEQ:87:00000000_AATT
116 | 
117 |   where AATT is the UMI sequeuence.
118 | 
119 |     Usage: umi_tools dedup -I infile.bam -S deduped.bam -L dedup.log
120 | 


--------------------------------------------------------------------------------
/cwl/barcodecollapse_se_nostats.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | ### doc: "collapses eCLIP barcodes to remove PCR duplicates" ###
  4 | 
  5 | cwlVersion: v1.0
  6 | 
  7 | class: CommandLineTool
  8 | 
  9 | requirements:
 10 |   - class: InlineJavascriptRequirement
 11 |   - class: ResourceRequirement
 12 |     coresMin: 1
 13 |     ramMin: 32000
 14 |     
 15 | hints:
 16 |   - class: DockerRequirement
 17 |     dockerPull: brianyee/umi_tools:1.0.0
 18 |     
 19 | baseCommand: [umi_tools, dedup]
 20 | 
 21 | arguments: ["--random-seed", "1"]
 22 | 
 23 | inputs:
 24 | 
 25 |   input_barcodecollapsese_bam:
 26 |     type: File
 27 |     inputBinding:
 28 |       position: 1
 29 |       prefix: -I
 30 |     label: ""
 31 |     doc: "input bam to barcode collapse. NOTE: no use for a bai index file!"
 32 |     secondaryFiles: [.bai]
 33 | 
 34 |   # output_stats:
 35 |   #   default: ""
 36 |   #   type: string
 37 |   #   inputBinding:
 38 |   #     position: 1
 39 |   #     prefix: --output-stats
 40 |   #     valueFrom: |
 41 |   #       ${
 42 |   #         if (inputs.output_stats == "") {
 43 |   #           return inputs.input_barcodecollapsese_bam.nameroot;
 44 |   #         }
 45 |   #         else {
 46 |   #           return inputs.output_stats;
 47 |   #         }
 48 |   #       }
 49 |   #   label: ""
 50 |   #   doc: "stats i guess"
 51 | 
 52 |   method:
 53 |     default: "unique"
 54 |     type: string
 55 |     inputBinding:
 56 |       position: 1
 57 |       prefix: --method
 58 | 
 59 |   collapsed_bam:
 60 |     type: string
 61 |     default: ""
 62 |     inputBinding:
 63 |       position: 2
 64 |       prefix: -S
 65 |       valueFrom: |
 66 |         ${
 67 |           if (inputs.collapsed_bam == "") {
 68 |             return inputs.input_barcodecollapsese_bam.nameroot + ".rmDup.bam";
 69 |           }
 70 |           else {
 71 |             return inputs.collapsed_bam;
 72 |           }
 73 |         }
 74 |     label: ""
 75 |     doc: "input bam to barcode collapse. NOTE: no use for a bai index file!"
 76 | 
 77 | outputs:
 78 | 
 79 |   output_barcodecollapsese_bam:
 80 |     type: File
 81 |     outputBinding:
 82 |       glob: |
 83 |         ${
 84 |           if (inputs.collapsed_bam == "") {
 85 |             return inputs.input_barcodecollapsese_bam.nameroot + ".rmDup.bam";
 86 |           }
 87 |           else {
 88 |             return inputs.collapsed_bam;
 89 |           }
 90 |         }
 91 |     label: ""
 92 |     doc: "barcode collapsed mappings bam "
 93 | 
 94 | 
 95 |   # output_barcodecollapsese_metrics:
 96 |   #   type: File
 97 |   #   outputBinding:
 98 |   #     glob: |
 99 |   #       ${
100 |   #         if (inputs.output_stats == "") {
101 |   #           return inputs.input_barcodecollapsese_bam.nameroot + "_per_umi.tsv";
102 |   #         }
103 |   #         else {
104 |   #           return inputs.output_stats;
105 |   #         }
106 |   #       }
107 |   #   label: ""
108 |   #   doc: "barcode collapsed mappings stats "
109 | 
110 | doc: |
111 |   The purpose of this command is to deduplicate BAM files based
112 |   on the first mapping co-ordinate and the UMI attached to the read.
113 |   It is assumed that the FASTQ files were processed with extract_umi.py
114 |   before mapping and thus the UMI is the last word of the read name. e.g:
115 | 
116 |   @HISEQ:87:00000000_AATT
117 | 
118 |   where AATT is the UMI sequeuence.
119 | 
120 |     Usage: umi_tools dedup -I infile.bam -S deduped.bam -L dedup.log
121 | 


--------------------------------------------------------------------------------
/cwl/bed_to_bigbed.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwl-runner
 2 | 
 3 | ### doc: "Convert peak bed to narrowPeak" ###
 4 | 
 5 | cwlVersion: v1.0
 6 | 
 7 | class: CommandLineTool
 8 | 
 9 | requirements:
10 |   - class: ResourceRequirement
11 |     coresMin: 1
12 |     coresMax: 16
13 |   - class: InlineJavascriptRequirement
14 | hints:
15 |   - class: DockerRequirement
16 |     dockerPull: brianyee/ucsc-tools:377
17 |     
18 | baseCommand: [bedToBigBed]
19 | 
20 | inputs:
21 | 
22 |   input_bed:
23 |     type: File
24 |     inputBinding:
25 |       position: 1
26 |     label: ""
27 | 
28 |   chrom_sizes:
29 |     type: File
30 |     inputBinding:
31 |       position: 2
32 | 
33 |   output_bb_filename:
34 |     type: string
35 |     default: ""
36 |     inputBinding:
37 |       position: 3
38 |       valueFrom: |
39 |         ${
40 |           if (inputs.output_bb_filename == "") {
41 |             return inputs.input_bed.nameroot + ".bb";
42 |           }
43 |           else {
44 |             return inputs.output_bb_filename;
45 |           }
46 |         }
47 |         
48 | outputs:
49 | 
50 |   output_bigbed:
51 |     type: File
52 |     outputBinding:
53 |       glob: |
54 |         ${
55 |           if (inputs.output_bb_filename == "") {
56 |             return inputs.input_bed.nameroot + ".bb";
57 |           }
58 |           else {
59 |             return inputs.output_bb_filename;
60 |           }
61 |         }
62 |     label: ""
63 |     doc: ""
64 | 
65 | doc: |
66 |   This tool converts an input-normalized eCLIP peaks file (BED6) into a bigbed (bb) file.
67 | 


--------------------------------------------------------------------------------
/cwl/bed_to_narrowpeak.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | class: CommandLineTool
 5 | 
 6 | requirements:
 7 |   - class: ResourceRequirement
 8 |     coresMin: 1
 9 |     ramMin: 8000
10 |     
11 | hints:
12 |   - class: DockerRequirement
13 |     dockerPull: brianyee/eclip:0.7.0_python
14 |     
15 | baseCommand: [bed_to_narrowpeak.py]
16 | 
17 | arguments: [
18 |   "--output_narrowpeak",
19 |   $(inputs.input_bed.nameroot).narrowPeak
20 | ]
21 | 
22 | inputs:
23 | 
24 |   input_bed:
25 |     type: File
26 |     inputBinding:
27 |       position: 1
28 |       prefix: --input_bed
29 |     label: ""
30 |     doc: "input bam to convert to narrowPeak format. Must be ECLIP-style input-normed format! (log10p in col4, log2fold in col5)"
31 | 
32 |   species:
33 |     type: string
34 |     inputBinding: 
35 |       position: 2
36 |       prefix: --species
37 | 
38 | outputs:
39 | 
40 |   output_narrowpeak:
41 |     type: File
42 |     outputBinding:
43 |       glob: $(inputs.input_bed.nameroot).narrowPeak
44 |     label: ""
45 |     doc: "eCLIP peaks in narrowPeak format"
46 | 
47 | doc: |
48 |   This tool converts an input-normalized eCLIP peaks file (BED6) into a narrowPeak format for encode DCC. 
49 |   cols 9 and 10 are just blank, col 5 is 1000 for things that meet the >=3 l2fc and l10pval cutoffs and 200 otherwise (it’s just for ucsc track coloring)
50 | 


--------------------------------------------------------------------------------
/cwl/blacklist-remove.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: InlineJavascriptRequirement
 9 |   - class: ResourceRequirement
10 |     coresMin: 1
11 |     ramMin: 8000
12 |     
13 | hints:
14 |   - class: DockerRequirement
15 |     dockerPull: brianyee/bedtools:2.27.1
16 |     
17 | baseCommand: [bedtools, intersect]
18 | 
19 | arguments: [
20 |   "-v",
21 |   "-s",
22 |   ]
23 | 
24 | inputs:
25 | 
26 |   input_bed:
27 |     type: File
28 |     inputBinding:
29 |       position: 1
30 |       prefix: -a
31 |       
32 |   blacklist_file:
33 |     type: File
34 |     inputBinding:
35 |       position: 2
36 |       prefix: -b
37 | 
38 | stdout: $(inputs.input_bed.nameroot).blacklist-removed.bed
39 | 
40 | outputs:
41 | 
42 |   output_blacklist_removed_bed:
43 |     type: File
44 |     outputBinding:
45 |       glob: $(inputs.input_bed.nameroot).blacklist-removed.bed
46 | 
47 | doc: |
48 |   Given a list of 'blacklist' regions, remove those regions from an input BED file
49 |   This tool wraps bedtools intersect -v to remove blacklist regions
50 | 


--------------------------------------------------------------------------------
/cwl/calculate_entropy.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwl-runner
 2 | 
 3 | ### doc: "Fixes a BED file" ###
 4 | 
 5 | cwlVersion: v1.0
 6 | class: CommandLineTool
 7 | 
 8 | requirements:
 9 |   - class: ResourceRequirement
10 |     coresMin: 1
11 |     ramMin: 1000
12 |     
13 | hints:
14 |   - class: DockerRequirement
15 |     dockerPull: brianyee/eclip:0.7.0_python
16 |     
17 | baseCommand: [calculate_entropy.py]
18 | 
19 | inputs:
20 | 
21 |   full:
22 |     type: File
23 |     inputBinding:
24 |       position: 1
25 |       prefix: --full
26 |     label: ""
27 |     doc: "output full file from overlap_peakfi_with_bam.pl (should contain number of reads per peak)"
28 |   ip_mapped:
29 |     type: File
30 |     inputBinding: 
31 |       position: 2
32 |       prefix: --ip_mapped
33 |     label: ""
34 |     doc: "File containing a single number corresponding to the number of mapped reads in IP"
35 |   input_mapped:
36 |     type: File
37 |     inputBinding: 
38 |       position: 3
39 |       prefix: --input_mapped
40 |     label: ""
41 |     doc: "File containing a single number corresponding to the number of mapped reads in INPUT"
42 |    
43 | arguments: [
44 |   "--output",
45 |   $(inputs.full.nameroot).entropynum
46 | ]
47 | 
48 | outputs:
49 | 
50 |   output_entropynum:
51 |     type: File
52 |     outputBinding:
53 |       glob: $(inputs.full.nameroot).entropynum
54 |     label: ""
55 |     doc: "File containing the sum entropy value"
56 | 
57 | doc: |
58 |   This tool computes and sums the entropy values for significant peaks (l10p >=3 and l2fc >=3).
59 |   Returns the number as a file.
60 | 


--------------------------------------------------------------------------------
/cwl/clipper.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: InlineJavascriptRequirement
 9 |   - class: ResourceRequirement
10 |     coresMin: 8
11 |     ramMin: 32000
12 | 
13 | hints:
14 |   - class: DockerRequirement
15 |     dockerPull: brianyee/clipper:5d865bb
16 |     
17 | baseCommand: [clipper]
18 | 
19 | inputs:
20 | 
21 |   species:
22 |     type: string
23 |     inputBinding:
24 |       position: 0
25 |       prefix: --species
26 |     doc: "species: one of ce10 ce11 dm3 hg19 GRCh38 mm9 mm10 GRCh38_pU6 GRCh38_v29 GRCh38_v29e hg19_VSV"
27 | 
28 |   bam:
29 |     type: File
30 |     inputBinding:
31 |       position: 1
32 |       prefix: --bam
33 | 
34 |   gene:
35 |     type: string?
36 |     inputBinding:
37 |      position: 8
38 |      prefix: --gene
39 | 
40 |   outfile:
41 |     type: string
42 |     default: ""
43 |     inputBinding:
44 |       position: 10
45 |       prefix: --outfile
46 |       valueFrom: |
47 |         ${
48 |           if (inputs.outfile == "") {
49 |             return inputs.bam.nameroot + ".peakClusters.bed";
50 |           }
51 |           else {
52 |             return inputs.outfile;
53 |           }
54 |         }
55 | 
56 | outputs:
57 | 
58 |   output_bed:
59 |     type: File
60 |     outputBinding:
61 |       glob: |
62 |         ${
63 |           if (inputs.outfile == "") {
64 |             return inputs.bam.nameroot + ".peakClusters.bed";
65 |           }
66 |           else {
67 |             return inputs.outfile;
68 |           }
69 |         }
70 | 
71 | doc: |
72 |   CLIPper is a tool to define peaks in your CLIP-seq dataset.
73 |   CLIPper was developed in the Yeo Lab at the University of California, San Diego.
74 |     Usage: clipper --bam CLIP-seq_reads.srt.bam --species hg19 --outfile CLIP-seq_reads.srt.peaks.bed
75 | 


--------------------------------------------------------------------------------
/cwl/demux_pe.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwl-runner
  2 | 
  3 | ### doc: "demultiplexes a paired-end eCLIP set of reads acording to the specified barcode and barcode file." ###
  4 | 
  5 | cwlVersion: v1.0
  6 | class: CommandLineTool
  7 | 
  8 | requirements:
  9 |   - class: ResourceRequirement
 10 |     coresMin: 1
 11 |     ramMin: 8000
 12 |     
 13 | hints:
 14 |   - class: DockerRequirement
 15 |     dockerPull: brianyee/eclipdemux:0.0.1
 16 | 
 17 | baseCommand: [eclipdemux]
 18 | 
 19 | arguments: ["--metrics",
 20 |   $(inputs.dataset).$(inputs.reads.name).---.--.metrics,
 21 |   "--expectedbarcodeida",
 22 |   "$(inputs.reads.barcodeids[0])",
 23 |   "--expectedbarcodeidb",
 24 |   "$(inputs.reads.barcodeids[1])"
 25 |   ]
 26 | 
 27 | inputs:
 28 | 
 29 |   barcodesfasta:
 30 |     type: File
 31 |     inputBinding:
 32 |       position: 6
 33 |       prefix: --barcodesfile
 34 | 
 35 |   randomer_length:
 36 |     type: string
 37 |     # default: "10"
 38 |     inputBinding:
 39 |       position: 7
 40 |       prefix: --length
 41 |     doc: "randomer length"
 42 | 
 43 |   dataset:
 44 |     type: string
 45 |     inputBinding:
 46 |       position: 5
 47 |       prefix: --dataset
 48 | 
 49 |   reads:
 50 |     type:
 51 |       type: record
 52 |       #name: reads
 53 |       fields:
 54 |         read1:
 55 |           type: File
 56 |           inputBinding:
 57 |             position: 1
 58 |             prefix: --fastq_1
 59 |         read2:
 60 |           type: File
 61 |           inputBinding:
 62 |             position: 2
 63 |             prefix: --fastq_2
 64 |         barcodeids:
 65 |           type: string[]
 66 |           #default: [NIL, NIL]
 67 |           #inputBinding:
 68 |           #  position: 3
 69 |           #  prefix: --expectedbarcodeids
 70 |         name:
 71 |           type: string
 72 |           inputBinding:
 73 |             position: 4
 74 |             prefix: --newname
 75 | 
 76 | 
 77 | outputs:
 78 | 
 79 |   output_dataset:
 80 |     type: string
 81 |     outputBinding:
 82 |       glob: $(inputs.dataset)
 83 |       loadContents: true
 84 |       outputEval: $(self[0].contents)
 85 |   name:
 86 |     type: string
 87 |     outputBinding:
 88 |       glob: $(inputs.reads.name)
 89 |       loadContents: true
 90 |       outputEval: $(self[0].contents)
 91 |   barcodeidA:
 92 |     type: string
 93 |     outputBinding:
 94 |       glob: $(inputs.reads.barcodeids[0])
 95 |       loadContents: true
 96 |       outputEval: $(self[0].contents)
 97 |   barcodeidB:
 98 |     type: string
 99 |     outputBinding:
100 |       glob: $(inputs.reads.barcodeids[1])
101 |       loadContents: true
102 |       outputEval: $(self[0].contents)
103 | 
104 |   demuxedAfwd:
105 |     type: File
106 |     outputBinding:
107 |       glob: $(inputs.dataset).$(inputs.reads.name).$(inputs.reads.barcodeids[0]).r1.fq.gz
108 |   demuxedArev:
109 |     type: File
110 |     outputBinding:
111 |       glob: $(inputs.dataset).$(inputs.reads.name).$(inputs.reads.barcodeids[0]).r2.fq.gz
112 |   demuxedBfwd:
113 |     type: File
114 |     outputBinding:
115 |       glob: $(inputs.dataset).$(inputs.reads.name).$(inputs.reads.barcodeids[1]).r1.fq.gz
116 |   demuxedBrev:
117 |     type: File
118 |     outputBinding:
119 |       glob: $(inputs.dataset).$(inputs.reads.name).$(inputs.reads.barcodeids[1]).r2.fq.gz
120 | 
121 |   output_demuxedpairedend_metrics:
122 |     type: File
123 |     outputBinding:
124 |       glob: $(inputs.dataset).$(inputs.reads.name).---.--.metrics
125 |     label: ""
126 |     doc: "demuxedpairedend metrics"
127 | 
128 | doc: |
129 |   demultiplex utility for paired-end eCLIP raw fastq files (process eCLIP barcodes and ramdomers)
130 |   See: https://github.com/YeoLab/eclipdemux for full code and documentation
131 |     Usage: eclipdemux --dataset DATASET_ID --metrics METRICS_FILE --fastq_1 READ_1 --fastq_2 READ_2 --expectedbarcodeida BARCODE_A --expectedbarcodeidb BARCODE_B --barcodesfile BARCODES_FASTA --length LENGTH
132 | 


--------------------------------------------------------------------------------
/cwl/demux_se.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | ### doc: "Doesn't actually demultiplex!!!" ###
  4 | ### just trims the first 10 bases, but named as such to match the demux_pe step ###
  5 | 
  6 | cwlVersion: v1.0
  7 | class: CommandLineTool
  8 | 
  9 | requirements:
 10 |   - class: InlineJavascriptRequirement
 11 |   - class: ResourceRequirement
 12 |     coresMin: 1
 13 |     ramMin: 8000
 14 | 
 15 | hints:
 16 |   - class: DockerRequirement
 17 |     dockerPull: brianyee/umi_tools:1.0.0
 18 |             
 19 | baseCommand: [umi_tools, extract]
 20 | arguments: ["--random-seed", "1"]
 21 | inputs:
 22 | 
 23 |   # stdin:
 24 |   #   type: File
 25 |   #   inputBinding:
 26 |   #     position: 1
 27 |   #     prefix: --stdin
 28 | 
 29 |   bc_pattern:
 30 |     type: string
 31 |     default: "NNNNNNNNNN"
 32 |     inputBinding:
 33 |       position: 2
 34 |       prefix: --bc-pattern
 35 |     doc: "10 nt randomer"
 36 | 
 37 |   log:
 38 |     type: string
 39 |     default: ""
 40 |     inputBinding:
 41 |       position: 3
 42 |       prefix: --log
 43 |       valueFrom: |
 44 |         ${
 45 |           if (inputs.log == "") {
 46 |             return inputs.dataset + "." + inputs.reads.name + ".---.--.metrics";
 47 |           }
 48 |           else {
 49 |             return inputs.log;
 50 |           }
 51 |         }
 52 | 
 53 |   dataset:
 54 |     type: string
 55 |     inputBinding:
 56 |       position: 4
 57 | 
 58 |   stdout:
 59 |     type: string
 60 |     default: ""
 61 |     inputBinding:
 62 |       position: 4
 63 |       prefix: --stdout
 64 |       valueFrom: |
 65 |         ${
 66 |           if (inputs.stdout == "") {
 67 |             return inputs.dataset + "." + inputs.reads.name + ".umi.r1.fq";
 68 |           }
 69 |           else {
 70 |             return inputs.stdout;
 71 |           }
 72 |         }
 73 | 
 74 |   reads:
 75 |     type:
 76 |       type: record
 77 |       fields:
 78 |         read1:
 79 |           type: File
 80 |           inputBinding:
 81 |             position: 1
 82 |             prefix: --stdin
 83 |         name:
 84 |           type: string
 85 | 
 86 | outputs:
 87 | 
 88 |   demuxedAfwd:
 89 |     type: File
 90 |     outputBinding:
 91 |       glob: $(inputs.dataset).$(inputs.reads.name).umi.r1.fq
 92 | 
 93 |   output_demuxedsingleend_metrics:
 94 |     type: File
 95 |     outputBinding:
 96 |       glob: $(inputs.dataset).$(inputs.reads.name).---.--.metrics
 97 |     label: ""
 98 |     doc: "demuxed se metrics"
 99 | 
100 |   output_dataset:
101 |     type: string
102 |     outputBinding:
103 |       loadContents: true
104 |       outputEval: $(inputs.dataset)
105 |     doc: "just passes output dataset string to output to match with PE demux"
106 | 
107 |   name:
108 |     type: string
109 |     outputBinding:
110 |       loadContents: true
111 |       outputEval: $(inputs.reads.name)
112 |     doc: "just passes output name string to output to match with PE demux"
113 | 
114 |   # prefix:
115 |   #   type: string
116 |   #   outputBinding:
117 |   #     loadContents: true
118 |   #     outputEval: $(inputs.dataset).$(inputs.reads.name)
119 |   #   doc: "added to make the renaming step easier"
120 | 
121 | doc: |
122 |   Extract UMI barcode from a read and add it to the read name, leaving
123 |   any sample barcode in place. Can deal with paired end reads and UMIs
124 |   split across the paired ends. For eCLIP single-end processing, this step just
125 |   trims the first 10 bases, but named as such to match the demux_pe step.
126 | 
127 |     Usage: umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS]
128 | 


--------------------------------------------------------------------------------
/cwl/demux_targeted_mir_se.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | ### doc: "Doesn't actually demultiplex!!!" ###
 4 | ### just trims the first 10 bases, but named as such to match the demux_pe step ###
 5 | 
 6 | cwlVersion: v1.0
 7 | class: CommandLineTool
 8 | 
 9 | requirements:
10 |   - class: InlineJavascriptRequirement
11 |   - class: ResourceRequirement
12 |     coresMin: 1
13 | 
14 | hints:
15 |   - class: DockerRequirement
16 |     dockerPull: brianyee/umi_tools:1.0.0
17 |     
18 | baseCommand: [cat]
19 | inputs:
20 | 
21 |   dataset:
22 |     type: string
23 |     
24 |   reads:
25 |     type:
26 |       type: record
27 |       fields:
28 |         read1:
29 |           type: File
30 |           inputBinding:
31 |             position: 1
32 |         name:
33 |           type: string
34 |           
35 |   stdout:
36 |     type: string
37 |     default: ""
38 |     inputBinding:
39 |       position: 2
40 |       valueFrom: |
41 |         ${
42 |           if (inputs.stdout == "") {
43 |             return inputs.dataset + "." + inputs.reads.name + ".umi.r1.fq";
44 |           }
45 |           else {
46 |             return inputs.stdout;
47 |           }
48 |         }
49 | 
50 |   
51 | 
52 | outputs:
53 | 
54 |   demuxedAfwd:
55 |     type: File
56 |     outputBinding:
57 |       glob: $(inputs.dataset).$(inputs.reads.name).umi.r1.fq
58 | 
59 |   output_demuxedsingleend_metrics:
60 |     type: File
61 |     outputBinding:
62 |       glob: $(inputs.dataset).$(inputs.reads.name).---.--.metrics
63 |     label: ""
64 |     doc: "demuxed se metrics"
65 | 
66 |   output_dataset:
67 |     type: string
68 |     outputBinding:
69 |       loadContents: true
70 |       outputEval: $(inputs.dataset)
71 |     doc: "just passes output dataset string to output to match with PE demux"
72 | 
73 |   name:
74 |     type: string
75 |     outputBinding:
76 |       loadContents: true
77 |       outputEval: $(inputs.reads.name)
78 |     doc: "just passes output name string to output to match with PE demux"
79 | 
80 |   # prefix:
81 |   #   type: string
82 |   #   outputBinding:
83 |   #     loadContents: true
84 |   #     outputEval: $(inputs.dataset).$(inputs.reads.name)
85 |   #   doc: "added to make the renaming step easier"
86 | 
87 | doc: |
88 |   Extract UMI barcode from a read and add it to the read name, leaving
89 |   any sample barcode in place. Can deal with paired end reads and UMIs
90 |   split across the paired ends. For eCLIP single-end processing, this step just
91 |   trims the first 10 bases, but named as such to match the demux_pe step.
92 | 
93 |     Usage: umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS]
94 | 


--------------------------------------------------------------------------------
/cwl/fastqc.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: ResourceRequirement
 9 |     coresMin: 2
10 |     ramMin: 8000
11 |     
12 | hints:
13 |   - class: DockerRequirement
14 |     dockerPull: brianyee/fastqc:0.11.8
15 | 
16 | baseCommand: [fastqc, -t, "2", --extract, -k, "7"]
17 | 
18 | inputs:
19 | 
20 |   output_postfix:
21 |     type: string
22 |     default: .
23 |     inputBinding:
24 |       position: 1
25 |       prefix: -o
26 |     label: ""
27 |     doc: ""
28 | 
29 |   reads:
30 |     type: File
31 |     inputBinding:
32 |       position: 1
33 |     label: ""
34 |     doc: ""
35 | 
36 | outputs:
37 | 
38 |   output_qc_report:
39 |     type: File
40 |     outputBinding:
41 |       glob: "*/fastqc_report.html"
42 | 
43 |   output_qc_stats:
44 |     type: File
45 |     outputBinding:
46 |       glob: "*/fastqc_data.txt"
47 | 


--------------------------------------------------------------------------------
/cwl/fastqsort.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | ### doc: "Sorts fastq file by read name." ###
 4 | 
 5 | cwlVersion: v1.0
 6 | class: CommandLineTool
 7 | 
 8 | requirements:
 9 |   - class: InlineJavascriptRequirement
10 |   - class: ResourceRequirement
11 |     coresMin: 1
12 |     ramMin: 8000
13 | 
14 | hints:
15 |   - class: DockerRequirement
16 |     dockerPull: brianyee/fastq-tools:0.8
17 |     
18 | baseCommand: [fastq-sort]
19 | 
20 | #hints:
21 | #
22 | #  - class: ex:ScriptRequirement
23 | #    scriptlines:
24 | #      - "#!/bin/bash"
25 | 
26 | inputs:
27 | 
28 |   input_fastqsort_fastq:
29 |     type: File
30 |     # format: http://edamontology.org/format_1930
31 |     inputBinding:
32 |       position: 1
33 |       prefix: --id
34 |     label: ""
35 |     doc: "input fastq"
36 | 
37 |   output_fastqsort_fastq:
38 |     type: string
39 |     default: ""
40 | 
41 | # stdout: $(inputs.input_fastqsort_fastq.basename)So.fq
42 | stdout: ${
43 |     if (inputs.output_fastqsort_fastq == "") {
44 |       return inputs.input_fastqsort_fastq.nameroot + ".sorted.fq";
45 |     }
46 |   else {
47 |       return inputs.output_fastqsort_fastq;
48 |     }
49 |   }
50 | 
51 | outputs:
52 | 
53 |   output_fastqsort_sortedfastq:
54 |     type: File
55 |     # format: http://edamontology.org/format_1930
56 |     outputBinding:
57 |       # glob: $(inputs.output_fastqsort_filename)
58 |       # glob: $(inputs.input_fastqsort_fastq.basename)So.fq
59 |       glob: |
60 |         ${
61 |           if (inputs.output_fastqsort_fastq == "") {
62 |             return inputs.input_fastqsort_fastq.nameroot + ".sorted.fq";
63 |           }
64 |           else {
65 |             return inputs.output_fastqsort_fastq;
66 |           }
67 |         }
68 |     label: ""
69 |     doc: "sorted fastq"
70 | 
71 | doc: |
72 |   Sorts FASTQ files by their read name. Sorted fastq files are required to keep mapping steps
73 |   deterministic.
74 | 
75 |     Usage: fastq-sort --id FASTQ_FILE > STDOUT
76 | 


--------------------------------------------------------------------------------
/cwl/file2string.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | ### doc: "returns string expression based on file contents" ###
 4 | 
 5 | cwlVersion: v1.0
 6 | class: ExpressionTool
 7 | 
 8 | requirements:
 9 |   - class: InlineJavascriptRequirement
10 | 
11 | inputs:
12 |   file:
13 |     type: File
14 |     inputBinding:
15 |       loadContents: true
16 | 
17 | outputs:
18 |   output:
19 |     type: string
20 | 
21 | expression: "${return {'output':inputs.file.contents}; }"
22 | 
23 | doc: |
24 |   Returns string expression based on file contents.


--------------------------------------------------------------------------------
/cwl/file2stringArray.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | ### doc: "Returns string array expression based on lines in a file" ###
 4 | 
 5 | cwlVersion: v1.0
 6 | class: ExpressionTool
 7 | 
 8 | requirements:
 9 |   - class: InlineJavascriptRequirement
10 | 
11 | inputs:
12 |   file:
13 |     type: File
14 |     inputBinding:
15 |       loadContents: true
16 | 
17 | outputs:
18 |   output:
19 |     type: string[]
20 | 
21 | expression: "${var lines=inputs.file.contents.split('\\n');
22 |   var seqs = [];
23 |   for(var line = 0; line < lines.length; line++) {
24 |     if(lines[line][0] != '>') {
25 |       if (!lines[line] || 0 === lines[line].length) {
26 | 
27 |       }
28 |       else {
29 |         seqs.push(lines[line]);
30 |       }
31 |     }
32 |   }
33 |   return {'output':seqs};
34 | }"
35 | 
36 | doc: |
37 |   Returns string array expression based on lines in a fasta file (SKIPS >).


--------------------------------------------------------------------------------
/cwl/fix_bed_for_bigbed_conversion.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwl-runner
 2 | 
 3 | ### doc: "Fixes a BED file" ###
 4 | 
 5 | cwlVersion: v1.0
 6 | class: CommandLineTool
 7 | 
 8 | requirements:
 9 |   - class: ResourceRequirement
10 |     coresMin: 1
11 |     ramMin: 8000
12 |     
13 | hints:
14 |   - class: DockerRequirement
15 |     dockerPull: brianyee/eclip:0.7.0_python
16 |     
17 | baseCommand: [fix_bed_for_bigbed_conversion.py]
18 | 
19 | inputs:
20 | 
21 |   input_bed:
22 |     type: File
23 |     inputBinding:
24 |       position: 1
25 |       prefix: --input_bed
26 |     label: ""
27 |     doc: "input bed (eCLIP input-normalized format) to be fixed (ie. change col4 to string, col5 to integer) for bigbed conversion"
28 |       
29 | arguments: [
30 |   "--output_fixed_bed",
31 |   $(inputs.input_bed.nameroot).fx.bed
32 | ]
33 | 
34 | outputs:
35 | 
36 |   output_fixed_bed:
37 |     type: File
38 |     outputBinding:
39 |       glob: $(inputs.input_bed.nameroot).fx.bed
40 |     label: ""
41 |     doc: "eCLIP peaks in proper BED6 format"
42 | 
43 | doc: |
44 |   This tool fixes the eCLIP input-normalized format to the proper BED6 format prior to bigbed conversion.
45 | 


--------------------------------------------------------------------------------
/cwl/gzip.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | baseCommand: [gzip]
 8 | 
 9 | inputs:
10 | 
11 |   stdout:
12 |     type: boolean
13 |     inputBinding:
14 |       position: 1
15 |       prefix: -c
16 |     default: true
17 | 
18 |   input:
19 |     type: File
20 |     inputBinding:
21 |       position: 2
22 | 
23 | stdout: $(inputs.input.basename).gz
24 | 
25 | outputs:
26 | 
27 |   gzipped:
28 |     type: File
29 |     outputBinding:
30 |       glob: $(inputs.input.basename).gz
31 | 


--------------------------------------------------------------------------------
/cwl/index.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: ResourceRequirement
 9 |     coresMin: 1
10 |     ramMin: 4000
11 |     
12 | hints:
13 |   - class: DockerRequirement
14 |     dockerPull: brianyee/samtools:1.6
15 | 
16 | baseCommand: [samtools, index]
17 | 
18 | inputs:
19 | 
20 |   input_index_bam:
21 |     type: File
22 |     inputBinding:
23 |       position: -1
24 |     label: ""
25 |     doc: "input bam to index"
26 | 
27 | arguments: [ $(inputs.input_index_bam.basename).bai ]
28 | 
29 | outputs:
30 | 
31 |   output_index_bai:
32 |     type: File
33 |     outputBinding:
34 |       glob: $(inputs.input_index_bam.basename).bai
35 |     label: ""
36 |     doc: "index"
37 | 
38 | doc: |
39 |   Indexes a bam file (should be deprecated by samtools-index.cwl so kept for legacy),
40 |   with the difference being that this tool returns the *.bai index while the other
41 |   returns a BAM file object containing an index file as a secondaryFile.
42 | 
43 |   Usage: samtools index <input.bam> <output.bam>
44 | 


--------------------------------------------------------------------------------
/cwl/makebigwigfiles.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: ResourceRequirement
 9 |     coresMin: 1
10 |     ramMin: 8000
11 |   - class: InitialWorkDirRequirement
12 |     listing:
13 |       - entry: $(inputs.bam)
14 |         writable: true
15 | hints:
16 |   - class: DockerRequirement
17 |     dockerPull: brianyee/makebigwigfiles:0.0.3
18 | 
19 | baseCommand: [makebigwigfiles]
20 | 
21 | arguments: [
22 |   --bw_pos,
23 |   $(inputs.bam.nameroot).norm.pos.bw,
24 |   --bw_neg,
25 |   $(inputs.bam.nameroot).norm.neg.bw
26 |   ]
27 | 
28 | inputs:
29 | 
30 |   bam:
31 |      type: File
32 |      inputBinding:
33 |        position: 1
34 |        prefix: --bam
35 | #     secondaryFiles: [.bai]
36 | 
37 |   chromsizes:
38 |     type: File
39 |     inputBinding:
40 |       position: 3
41 |       prefix: --genome
42 |   
43 |   direction:
44 |     default: f
45 |     type: string
46 |     inputBinding:
47 |       position: 4
48 |       prefix: --direction
49 | 
50 | outputs:
51 | 
52 |   posbw:
53 |     type: File
54 |     outputBinding:
55 |       glob: $(inputs.bam.nameroot).norm.pos.bw
56 | 
57 |   negbw:
58 |     type: File
59 |     outputBinding:
60 |       glob: $(inputs.bam.nameroot).norm.neg.bw
61 | 
62 | doc: |
63 |   Creates strand-specific bigwig files from a BAM file.
64 |   See original script here: https://github.com/YeoLab/gscripts/blob/master/gscripts/general/make_bigwig_files_pe.py
65 |     Usage: makebigwigfiles --bam BAM --genome GENOME --dont_flip --bw_pos --bw_neg
66 | 


--------------------------------------------------------------------------------
/cwl/makebigwigfiles_PE.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: ResourceRequirement
 9 |     coresMin: 1
10 |     ramMin: 8000
11 |   - class: InitialWorkDirRequirement
12 |     listing:
13 |       - entry: $(inputs.bam)
14 |         writable: true
15 | 
16 | hints:
17 |   - class: DockerRequirement
18 |     dockerPull: brianyee/makebigwigfiles:0.0.3
19 | 
20 | baseCommand: [makebigwigfiles]
21 | 
22 | arguments: [
23 |   --bw_pos,
24 |   $(inputs.bam.nameroot).norm.pos.bw,
25 |   --bw_neg,
26 |   $(inputs.bam.nameroot).norm.neg.bw
27 |   ]
28 | 
29 | inputs:
30 | 
31 |   bam:
32 |      type: File
33 |      inputBinding:
34 |        position: 1
35 |        prefix: --bam
36 | #     secondaryFiles: [.bai]
37 | 
38 |   chromsizes:
39 |     type: File
40 |     inputBinding:
41 |       position: 3
42 |       prefix: --genome
43 |   
44 |   direction:
45 |     default: r
46 |     type: string
47 |     inputBinding:
48 |       position: 4
49 |       prefix: --direction
50 | 
51 | outputs:
52 | 
53 |   posbw:
54 |     type: File
55 |     outputBinding:
56 |       glob: $(inputs.bam.nameroot).norm.pos.bw
57 | 
58 |   negbw:
59 |     type: File
60 |     outputBinding:
61 |       glob: $(inputs.bam.nameroot).norm.neg.bw
62 | 
63 | doc: |
64 |   Creates strand-specific bigwig files from a BAM file.
65 |   See original script here: https://github.com/YeoLab/gscripts/blob/master/gscripts/general/make_bigwig_files_pe.py
66 |     Usage: makebigwigfiles --bam BAM --genome GENOME --dont_flip --bw_pos --bw_neg
67 | 


--------------------------------------------------------------------------------
/cwl/makebigwigfiles_SE.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: ResourceRequirement
 9 |     coresMin: 1
10 |     ramMin: 8000
11 |   - class: InitialWorkDirRequirement
12 |     listing:
13 |       - entry: $(inputs.bam)
14 |         writable: true
15 | 
16 | hints:
17 |   - class: DockerRequirement
18 |     dockerPull: brianyee/makebigwigfiles:0.0.3
19 | 
20 | baseCommand: [makebigwigfiles]
21 | 
22 | arguments: [
23 |   --bw_pos,
24 |   $(inputs.bam.nameroot).norm.pos.bw,
25 |   --bw_neg,
26 |   $(inputs.bam.nameroot).norm.neg.bw
27 |   ]
28 | 
29 | inputs:
30 | 
31 |   bam:
32 |      type: File
33 |      inputBinding:
34 |        position: 1
35 |        prefix: --bam
36 | #     secondaryFiles: [.bai]
37 | 
38 |   chromsizes:
39 |     type: File
40 |     inputBinding:
41 |       position: 3
42 |       prefix: --genome
43 |   
44 |   direction:
45 |     default: f
46 |     type: string
47 |     inputBinding:
48 |       position: 4
49 |       prefix: --direction
50 | 
51 | outputs:
52 | 
53 |   posbw:
54 |     type: File
55 |     outputBinding:
56 |       glob: $(inputs.bam.nameroot).norm.pos.bw
57 | 
58 |   negbw:
59 |     type: File
60 |     outputBinding:
61 |       glob: $(inputs.bam.nameroot).norm.neg.bw
62 | 
63 | doc: |
64 |   Creates strand-specific bigwig files from a BAM file.
65 |   See original script here: https://github.com/YeoLab/gscripts/blob/master/gscripts/general/make_bigwig_files_pe.py
66 |     Usage: makebigwigfiles --bam BAM --genome GENOME --dont_flip --bw_pos --bw_neg
67 | 


--------------------------------------------------------------------------------
/cwl/namesort.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | ### doc: "samtools sort tool (sort by name)" ###
 4 | 
 5 | ### This is a copy of sort.cwl, ###
 6 | ### exists in case TOIL mistakes namesorting with regular sorting ###
 7 | ### Changes: name_sort flag is TRUE by default ###
 8 | 
 9 | cwlVersion: v1.0
10 | class: CommandLineTool
11 | 
12 | requirements:
13 |   - class: InlineJavascriptRequirement
14 |   - class: ResourceRequirement
15 |     coresMin: 1
16 |     ramMin: 16000
17 |     
18 | hints:
19 |   - class: DockerRequirement
20 |     dockerPull: brianyee/samtools:1.6
21 | 
22 | baseCommand: [samtools, sort]
23 | 
24 | inputs:
25 | 
26 |   name_sort:
27 |     type: boolean
28 |     inputBinding:
29 |       position: 1
30 |       prefix: -n
31 |     default: true
32 | 
33 |   output_file:
34 |     type: string
35 |     inputBinding:
36 |       position: 2
37 |       prefix: -o
38 |       valueFrom: |
39 |         ${
40 |           if (inputs.output_file == "") {
41 |             return inputs.input_sort_bam.nameroot + "So.bam";
42 |           }
43 |           else {
44 |             return inputs.output_file;
45 |           }
46 |         }
47 |     default: ""
48 | 
49 |   input_sort_bam:
50 |     type: File
51 |     inputBinding:
52 |       position: 3
53 |     label: ""
54 |     doc: "input bam"
55 | 
56 | outputs:
57 | 
58 |   output_sort_bam:
59 |     type: File
60 |     outputBinding:
61 |       glob: |
62 |         ${
63 |           if (inputs.output_file == "") {
64 |             return inputs.input_sort_bam.nameroot + "So.bam";
65 |           }
66 |           else {
67 |             return inputs.output_file;
68 |           }
69 |         }
70 |     label: ""
71 |     doc: "sorted bam"
72 | 
73 | doc: |
74 |   This tool wraps samtools sort, setting the by-name (-n) flag to be True by default.
75 |     Usage: samtools sort -n <input.bam> <output.bam>
76 | 


--------------------------------------------------------------------------------
/cwl/overlap_peakfi_with_bam.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: InlineJavascriptRequirement
 9 |   - class: ResourceRequirement
10 |     coresMin: 1
11 |     ramMin: 8000
12 | 
13 | hints: 
14 |   - class: DockerRequirement
15 |     dockerPull: brianyee/eclip:0.7.0_perl
16 | 
17 | baseCommand: [overlap_peakfi_with_bam.pl]
18 | 
19 | inputs:
20 | 
21 |   # IP BAM file
22 |   clipBamFile:
23 |     type: File
24 |     inputBinding:
25 |       position: -5
26 | 
27 |   inputBamFile:
28 |     type: File
29 |     inputBinding:
30 |       position: -4
31 | 
32 |   peakFile:
33 |     type: File
34 |     inputBinding:
35 |       position: -3
36 | 
37 |   # mapped_read_num
38 |   clipReadnum:
39 |     type: File
40 |     inputBinding:
41 |       position: -2
42 | 
43 |   #mapped_read_num"
44 |   inputReadnum:
45 |     type: File
46 |     inputBinding:
47 |       position: -1
48 | 
49 |   outputFile:
50 |     type: string
51 |     default: ""
52 |     inputBinding:
53 |       position: 0
54 |       valueFrom: |
55 |         ${
56 |           if (inputs.outputFile == "") {
57 |             return inputs.peakFile.nameroot + ".normed.bed";
58 |           }
59 |           else {
60 |             return inputs.outputFile;
61 |           }
62 |         }
63 | 
64 | outputs:
65 | 
66 |   inputnormedBed:
67 |     type: File
68 |     outputBinding:
69 |       glob: |
70 |         ${
71 |           if (inputs.outputFile == "") {
72 |             return inputs.peakFile.nameroot + ".normed.bed";
73 |           }
74 |           else {
75 |             return inputs.outputFile;
76 |           }
77 |         }
78 | 
79 |   inputnormedBedfull:
80 |     type: File
81 |     outputBinding:
82 |       glob: |
83 |         ${
84 |           if (inputs.outputFile == "") {
85 |             return inputs.peakFile.nameroot + ".normed.bed.full";
86 |           }
87 |           else {
88 |             return inputs.outputFile;
89 |           }
90 |         }
91 | 
92 | doc: |
93 |   This tool wraps overlap_peakfi_with_bam.pl
94 |     Usage:
95 | 


--------------------------------------------------------------------------------
/cwl/overlap_peakfi_with_bam_PE.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: InlineJavascriptRequirement
 9 |   - class: ResourceRequirement
10 |     coresMin: 1
11 |     ramMin: 8000
12 |     
13 | hints: 
14 |   - class: DockerRequirement
15 |     dockerPull: brianyee/eclip:0.7.0_perl
16 |     
17 | baseCommand: [overlap_peakfi_with_bam_PE.pl]
18 | 
19 | inputs:
20 | 
21 |   # IP BAM file
22 |   clipBamFile:
23 |     type: File
24 |     inputBinding:
25 |       position: -5
26 | 
27 |   inputBamFile:
28 |     type: File
29 |     inputBinding:
30 |       position: -4
31 | 
32 |   peakFile:
33 |     type: File
34 |     inputBinding:
35 |       position: -3
36 | 
37 |   # mapped_read_num
38 |   clipReadnum:
39 |     type: File
40 |     inputBinding:
41 |       position: -2
42 | 
43 |   #mapped_read_num"
44 |   inputReadnum:
45 |     type: File
46 |     inputBinding:
47 |       position: -1
48 | 
49 |   outputFile:
50 |     type: string
51 |     default: ""
52 |     inputBinding:
53 |       position: 0
54 |       valueFrom: |
55 |         ${
56 |           if (inputs.outputFile == "") {
57 |             return inputs.peakFile.nameroot + ".normed.bed";
58 |           }
59 |           else {
60 |             return inputs.outputFile;
61 |           }
62 |         }
63 | 
64 | outputs:
65 | 
66 |   inputnormedBed:
67 |     type: File
68 |     outputBinding:
69 |       glob: |
70 |         ${
71 |           if (inputs.outputFile == "") {
72 |             return inputs.peakFile.nameroot + ".normed.bed";
73 |           }
74 |           else {
75 |             return inputs.outputFile;
76 |           }
77 |         }
78 | 
79 |   inputnormedBedfull:
80 |     type: File
81 |     outputBinding:
82 |       glob: |
83 |         ${
84 |           if (inputs.outputFile == "") {
85 |             return inputs.peakFile.nameroot + ".normed.bed.full";
86 |           }
87 |           else {
88 |             return inputs.outputFile;
89 |           }
90 |         }
91 | 
92 | doc: |
93 |   This tool wraps overlap_peakfi_with_bam_PE.pl
94 |     Usage:
95 | 


--------------------------------------------------------------------------------
/cwl/parsebarcodes.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | cwlVersion: v1.0
  4 | 
  5 | class: CommandLineTool
  6 | 
  7 | requirements:
  8 |   - class: ResourceRequirement
  9 |     coresMin: 1
 10 |     ramMin: 1000
 11 | 
 12 | baseCommand: [parsebarcodes.sh]
 13 | 
 14 | hints: 
 15 |   - class: DockerRequirement
 16 |     dockerPull: brianyee/eclip:0.7.0_perl
 17 |     
 18 | inputs:
 19 | 
 20 | # these are now hard-coded in parser.sh
 21 | #  adapter3prime:
 22 | #     type: string
 23 | #     optional: true
 24 | #     default: AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
 25 | #  adapter5prime:
 26 | #     type: string
 27 | #     optional: true
 28 | #     default : CTTCCGATCT
 29 | 
 30 |   randomer_length:
 31 |     type: string
 32 |     default: "10"
 33 |     inputBinding:
 34 |       position: 1
 35 |     doc: "randomer length: now normally 10, some old experiment used 5"
 36 | 
 37 |   barcodesfasta:
 38 |     type: File
 39 |     inputBinding:
 40 |       position: 2
 41 | 
 42 |   barcodeidA:
 43 |     type: string
 44 |     inputBinding:
 45 |       position: 3
 46 | 
 47 |   barcodeidB:
 48 |     type: string
 49 |     inputBinding:
 50 |       position: 4
 51 | 
 52 | outputs:
 53 | 
 54 |   a_adapters_default:
 55 |     type: File
 56 |     outputBinding:
 57 |       glob: a_adapters_default.fasta
 58 | 
 59 |   g_adapters_default:
 60 |     type: File
 61 |     outputBinding:
 62 |       glob: g_adapters_default.fasta
 63 | 
 64 |   a_adapters:
 65 |     type: File
 66 |     outputBinding:
 67 |       glob: a_adapters.fasta
 68 | 
 69 |   g_adapters:
 70 |     type: File
 71 |     outputBinding:
 72 |       glob: g_adapters.fasta
 73 | 
 74 |   A_adapters:
 75 |     type: File
 76 |     outputBinding:
 77 |       glob: A_adapters.fasta
 78 | 
 79 |   trimfirst_overlap_length:
 80 |     type: File
 81 |     outputBinding:
 82 |       glob: trimfirst_overlap_length.txt
 83 | 
 84 |   trimagain_overlap_length:
 85 |     type: File
 86 |     outputBinding:
 87 |       glob: trimagain_overlap_length.txt
 88 | 
 89 | doc: |
 90 |   This tool wraps parsebarcodes.sh.
 91 | 
 92 |   We have observed occasional double ligation events on the 5’ end of Read1, and we have found
 93 |   that to fix this requires we run cutadapt twice.  Additionally, because two adapters are used for
 94 |   each library (to ensure proper balancing on the Illumina sequencer), two separate barcodes may
 95 |   be ligated to the same Read1 5’ end (often with 5’ truncations).  To fix this we split the barcodes
 96 |   up into 15bp chunks so that cutadapt is able to deconvolute barcode adapters properly (as by
 97 |   default it will not find adapters missing the first N bases of the adapter sequence)
 98 | 
 99 |   parsebarcodes.sh writes the following files:
100 |   trimfirst_overlap_length.txt : file that always contains "1"
101 |   trimagain_overlap_length.txt : file that contains max((length of longest barcode - 2),5)
102 |   g_adapters_default.fasta : empty file (to be fed to cutadapt properly)
103 |   a_adapters_default.fasta : empty file (to be fed to cutadapt properly)
104 |   g_adapters.fasta :  fasta file containing sequences to be trimmed via cutadapt -g flag
105 |   a_adapters.fasta : fasta file containing sequences to be trimmed via cutadapt -a flag
106 |   A_adapters.fasta : fasta file containing sequences to be trimmed via cutadapt -A flag
107 | 
108 |     Usage: parsebarcodes.sh <randommer_length> <barcodes_fasta> <barcode_A> <barcode_B>
109 | 


--------------------------------------------------------------------------------
/cwl/peakscompress.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: ResourceRequirement
 9 |     coresMin: 1
10 |     ramMin: 8000
11 |     
12 | hints: 
13 |   - class: DockerRequirement
14 |     dockerPull: brianyee/eclip:0.6.0a_perl
15 |     
16 | baseCommand: [compress_l2foldenrpeakfi_for_replicate_overlapping_bedformat.pl]
17 | 
18 | arguments: [ $(inputs.input_bed.nameroot).compressed.bed ]
19 | 
20 | inputs:
21 | 
22 |   input_bed:
23 |     type: File
24 |     inputBinding:
25 |       position: -1
26 | 
27 | outputs:
28 | 
29 |   output_bed:
30 |     type: File
31 |     outputBinding:
32 |       glob: $(inputs.input_bed.nameroot).compressed.bed
33 | 
34 | doc: |
35 |   This tool wraps compress_l2foldenrpeakfi_for_replicate_overlapping_bedformat.pl,
36 |   which merges neighboring or overlapping regions in a BED file.
37 |     Usage:   perl compress_l2foldenrpeakfi_for_replicate_overlapping_bedformat.pl <in.bed> <out.compressed.bed>
38 | 


--------------------------------------------------------------------------------
/cwl/rename.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | class: CommandLineTool
 4 | 
 5 | cwlVersion: v1.0
 6 | 
 7 | requirements:
 8 |   InitialWorkDirRequirement:
 9 |     listing:
10 |       - entryname: $(inputs.newname + inputs.suffix)
11 |         entry: $(inputs.srcfile)
12 | 
13 | baseCommand: "true"
14 | 
15 | inputs:
16 |   srcfile: File
17 | 
18 |   suffix: string
19 | 
20 |   newname: string
21 | 
22 | outputs:
23 |   outfile:
24 |     type: File
25 |     outputBinding:
26 |       glob: $(inputs.newname + inputs.suffix)
27 | 


--------------------------------------------------------------------------------
/cwl/samtools-index.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwl-runner
 2 | 
 3 | ### doc: "Indexes input alignments and returns alignment with index." ###
 4 | ### Differs from index.cwl in that index.cwl returns just index ###
 5 | ### This tool returns alignments with index as secondaryFile    ###
 6 | 
 7 | cwlVersion: v1.0
 8 | 
 9 | class: CommandLineTool
10 | 
11 | hints:
12 |   - class: DockerRequirement
13 |     dockerPull: brianyee/samtools:1.6
14 | 
15 | requirements:
16 |   InitialWorkDirRequirement:
17 |     listing: [ $(inputs.alignments) ]
18 | 
19 | inputs:
20 |   alignments:
21 |     type: File
22 |     inputBinding:
23 |       position: 2
24 |       valueFrom: $(self.basename)
25 |     label: Input bam file.
26 | 
27 | baseCommand: [samtools, index, -b]
28 | 
29 | outputs:
30 |   alignments_with_index:
31 |     type: File
32 |     secondaryFiles: .bai
33 |     outputBinding:
34 |       glob: $(inputs.alignments.basename)
35 | 
36 | 
37 |     doc: The index file
38 | 
39 | s:mainEntity:
40 | #   $import: samtools-metadata.yaml
41 | 
42 | s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-index.cwl
43 | s:codeRepository: https://github.com/common-workflow-language/workflows
44 | s:license: http://www.apache.org/licenses/LICENSE-2.0
45 | 
46 | s:isPartOf:
47 |   class: s:CreativeWork
48 |   s:name: Common Workflow Language
49 |   s:url: http://commonwl.org/
50 | 
51 | s:author:
52 |   class: s:Person
53 |   s:name: Andrey Kartashov
54 |   s:email: mailto:Andrey.Kartashov@cchmc.org
55 |   s:sameAs:
56 |   - id: http://orcid.org/0000-0001-9102-5681
57 |   s:worksFor:
58 |   - class: s:Organization
59 |     s:name: Cincinnati Children's Hospital Medical Center
60 |     s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026
61 |     s:department:
62 |     - class: s:Organization
63 |       s:name: Barski Lab
64 | doc: |
65 |   samtools-index.cwl is developed for CWL consortium
66 | 
67 | 


--------------------------------------------------------------------------------
/cwl/samtools-mappedreadnum.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwl-runner
  2 | 
  3 | ### doc: "Returns a file containing the number of mapped reads in a BAM." ###
  4 | ### Copy of samtools-view.cwl, except for changes due to bugs in TOIL ###
  5 | ### readswithoutbits is by default 4 and NOT optional.                ###
  6 | ### count is by default set to true.                                  ###
  7 | 
  8 | cwlVersion: v1.0
  9 | class: CommandLineTool
 10 | 
 11 | requirements:
 12 |   - class: InlineJavascriptRequirement
 13 |   - class: ResourceRequirement
 14 |     coresMin: 1
 15 | 
 16 | hints:
 17 |   - class: DockerRequirement
 18 |     dockerPull: brianyee/samtools:1.6
 19 |     
 20 | inputs:
 21 |   isbam:
 22 |     type: boolean
 23 |     default: false
 24 |     inputBinding:
 25 |       position: 2
 26 |       prefix: -b
 27 |     doc: |
 28 |       output in BAM format
 29 |   readswithoutbits:
 30 |     type: int
 31 |     default: 4
 32 |     inputBinding:
 33 |       position: 1
 34 |       prefix: -F
 35 |     doc: |
 36 |       only include reads with none of the bits set in INT set in FLAG [0]
 37 |   collapsecigar:
 38 |     type: boolean
 39 |     default: false
 40 |     inputBinding:
 41 |       position: 1
 42 |       prefix: -B
 43 |     doc: |
 44 |       collapse the backward CIGAR operation
 45 |   readsingroup:
 46 |     type: string?
 47 |     inputBinding:
 48 |       position: 1
 49 |       prefix: -r
 50 |     doc: |
 51 |       only include reads in read group STR [null]
 52 |   bedoverlap:
 53 |     type: File?
 54 |     inputBinding:
 55 |       position: 1
 56 |       prefix: -L
 57 |     doc: |
 58 |       only include reads overlapping this BED FILE [null]
 59 |   uncompressed:
 60 |     type: boolean
 61 |     default: false
 62 |     inputBinding:
 63 |       position: 1
 64 |       prefix: -u
 65 |     doc: |
 66 |       uncompressed BAM output (implies -b)
 67 |   readtagtostrip:
 68 |     type: string[]?
 69 |     inputBinding:
 70 |       position: 1
 71 | 
 72 |     doc: |
 73 |       read tag to strip (repeatable) [null]
 74 |   input:
 75 |     type: File
 76 |     inputBinding:
 77 |       position: 4
 78 | 
 79 |     doc: |
 80 |       Input bam file.
 81 |   readsquality:
 82 |     type: int?
 83 |     inputBinding:
 84 |       position: 1
 85 |       prefix: -q
 86 |     doc: |
 87 |       only include reads with mapping quality >= INT [0]
 88 |   readswithbits:
 89 |     type: int?
 90 |     inputBinding:
 91 |       position: 1
 92 |       prefix: -f
 93 |     doc: |
 94 |       only include reads with all bits set in INT set in FLAG [0]
 95 |   cigar:
 96 |     type: int?
 97 |     inputBinding:
 98 |       position: 1
 99 |       prefix: -m
100 |     doc: |
101 |       only include reads with number of CIGAR operations
102 |       consuming query sequence >= INT [0]
103 |   iscram:
104 |     type: boolean
105 |     default: false
106 |     inputBinding:
107 |       position: 2
108 |       prefix: -C
109 |     doc: |
110 |       output in CRAM format
111 |   threads:
112 |     type: int?
113 |     inputBinding:
114 |       position: 1
115 |       prefix: -@
116 |     doc: |
117 |       number of BAM compression threads [0]
118 |   fastcompression:
119 |     type: boolean
120 |     default: false
121 |     inputBinding:
122 |       position: 1
123 |       prefix: '-1'
124 |     doc: |
125 |       use fast BAM compression (implies -b)
126 |   samheader:
127 |     type: boolean
128 |     default: false
129 |     inputBinding:
130 |       position: 1
131 |       prefix: -h
132 |     doc: |
133 |       include header in SAM output
134 |   count:
135 |     type: boolean
136 |     default: true
137 |     inputBinding:
138 |       position: 1
139 |       prefix: -c
140 |     doc: |
141 |       print only the count of matching records
142 |   randomseed:
143 |     type: float?
144 |     inputBinding:
145 |       position: 1
146 |       prefix: -s
147 |     doc: |
148 |       integer part sets seed of random number generator [0];
149 |       rest sets fraction of templates to subsample [no subsampling]
150 |   referencefasta:
151 |     type: File?
152 |     inputBinding:
153 |       position: 1
154 |       prefix: -T
155 |     doc: |
156 |       reference sequence FASTA FILE [null]
157 |   region:
158 |     type: string?
159 |     inputBinding:
160 |       position: 5
161 | 
162 |     doc: |
163 |       [region ...]
164 |   readsingroupfile:
165 |     type: File?
166 |     inputBinding:
167 |       position: 1
168 |       prefix: -R
169 |     doc: |
170 |       only include reads with read group listed in FILE [null]
171 |   readsinlibrary:
172 |     type: string?
173 |     inputBinding:
174 |       position: 1
175 |       prefix: -l
176 |     doc: |
177 |       only include reads in library STR [null]
178 |   output_name:
179 |     type: string
180 |     inputBinding:
181 |       position: 2
182 |       prefix: -o
183 | 
184 | outputs:
185 |   output:
186 |     type: File
187 |     outputBinding:
188 |       glob: $(inputs.output_name)
189 | 
190 | baseCommand: [samtools, view]
191 | 
192 | s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-view.cwl
193 | s:codeRepository: https://github.com/common-workflow-language/workflows
194 | s:license: http://www.apache.org/licenses/LICENSE-2.0
195 | 
196 | s:isPartOf:
197 |   class: s:CreativeWork
198 |   s:name: Common Workflow Language
199 |   s:url: http://commonwl.org/
200 | 
201 | s:author:
202 |   class: s:Person
203 |   s:name: Andrey Kartashov
204 |   s:email: mailto:Andrey.Kartashov@cchmc.org
205 |   s:sameAs:
206 |   - id: http://orcid.org/0000-0001-9102-5681
207 |   s:worksFor:
208 |   - class: s:Organization
209 |     s:name: Cincinnati Children's Hospital Medical Center
210 |     s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026
211 |     s:department:
212 |     - class: s:Organization
213 |       s:name: Barski Lab
214 | doc: |
215 |   samtools-view.cwl is developed for CWL consortium
216 |     Usage:   samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]
217 | 
218 |     Options: -b       output BAM
219 |              -C       output CRAM (requires -T)
220 |              -1       use fast BAM compression (implies -b)
221 |              -u       uncompressed BAM output (implies -b)
222 |              -h       include header in SAM output
223 |              -H       print SAM header only (no alignments)
224 |              -c       print only the count of matching records
225 |              -o FILE  output file name [stdout]
226 |              -U FILE  output reads not selected by filters to FILE [null]
227 |              -t FILE  FILE listing reference names and lengths (see long help) [null]
228 |              -T FILE  reference sequence FASTA FILE [null]
229 |              -L FILE  only include reads overlapping this BED FILE [null]
230 |              -r STR   only include reads in read group STR [null]
231 |              -R FILE  only include reads with read group listed in FILE [null]
232 |              -q INT   only include reads with mapping quality >= INT [0]
233 |              -l STR   only include reads in library STR [null]
234 |              -m INT   only include reads with number of CIGAR operations
235 |                       consuming query sequence >= INT [0]
236 |              -f INT   only include reads with all bits set in INT set in FLAG [0]
237 |              -F INT   only include reads with none of the bits set in INT
238 |                       set in FLAG [0]
239 |              -x STR   read tag to strip (repeatable) [null]
240 |              -B       collapse the backward CIGAR operation
241 |              -s FLOAT integer part sets seed of random number generator [0];
242 |                       rest sets fraction of templates to subsample [no subsampling]
243 |              -@ INT   number of BAM compression threads [0]
244 | 
245 | 


--------------------------------------------------------------------------------
/cwl/samtools-merge.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: InlineJavascriptRequirement
 9 |   - class: ResourceRequirement
10 |     coresMin: 1
11 |     ramMin: 8000
12 |     
13 | baseCommand: [samtools, merge]
14 | 
15 | hints:
16 |   - class: DockerRequirement
17 |     dockerPull: brianyee/samtools:1.6
18 | 
19 | inputs:
20 | 
21 |   output_bam:
22 |     type: string
23 |     default: ""
24 |     inputBinding:
25 |       position: 1
26 |       valueFrom: |
27 |         ${
28 |           if (inputs.output_bam == "") {
29 |             return inputs.input_bam_files[0].nameroot + ".merged.bam";
30 |           }
31 |           else {
32 |             return inputs.output_bam;
33 |           }
34 |         }
35 |     label: ""
36 |     doc: "output merged bam file name"
37 | 
38 |   input_bam_files:
39 |     type: File[]
40 |     inputBinding:
41 |       position: 2
42 |     label: ""
43 |     doc: "input unmerged bam files"
44 | 
45 | outputs:
46 | 
47 |   output_bam_file:
48 |     type: File
49 |     outputBinding:
50 |       glob: |
51 |         ${
52 |           if (inputs.output_bam == "") {
53 |             return inputs.input_bam_files[0].nameroot + ".merged.bam";
54 |           }
55 |           else {
56 |             return inputs.output_bam;
57 |           }
58 |         }
59 |     label: ""
60 |     doc: "output merged bam file"
61 | 
62 | doc: |
63 |   samtools-merge.cwl takes in a list of input_bam_files and
64 |   returns a merged BAM file.
65 | 
66 |   Usage: samtools merge [-nurlf] [-h inh.sam] [-b <bamlist.fofn>] <out.bam> <in1.bam> [<in2.bam> ... <inN.bam>]
67 | 


--------------------------------------------------------------------------------
/cwl/samtools-view.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwl-runner
  2 | 
  3 | ### doc: "Samtools view tool (https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-view.cwl)" ###
  4 | 
  5 | cwlVersion: v1.0
  6 | class: CommandLineTool
  7 | 
  8 | requirements:
  9 |   - class: InlineJavascriptRequirement
 10 |   - class: ResourceRequirement
 11 |     coresMin: 1
 12 |     ramMin: 8000
 13 |     
 14 | hints:
 15 |   - class: DockerRequirement
 16 |     dockerPull: brianyee/samtools:1.6
 17 |     
 18 | inputs:
 19 |   isbam:
 20 |     type: boolean
 21 |     default: false
 22 |     inputBinding:
 23 |       position: 2
 24 |       prefix: -b
 25 |     doc: |
 26 |       output in BAM format
 27 |   readswithoutbits:
 28 |     type: int?
 29 |     inputBinding:
 30 |       position: 1
 31 |       prefix: -F
 32 |     doc: |
 33 |       only include reads with none of the bits set in INT set in FLAG [0]
 34 |   collapsecigar:
 35 |     type: boolean
 36 |     default: false
 37 |     inputBinding:
 38 |       position: 1
 39 |       prefix: -B
 40 |     doc: |
 41 |       collapse the backward CIGAR operation
 42 |   readsingroup:
 43 |     type: string?
 44 |     inputBinding:
 45 |       position: 1
 46 |       prefix: -r
 47 |     doc: |
 48 |       only include reads in read group STR [null]
 49 |   bedoverlap:
 50 |     type: File?
 51 |     inputBinding:
 52 |       position: 1
 53 |       prefix: -L
 54 |     doc: |
 55 |       only include reads overlapping this BED FILE [null]
 56 |   uncompressed:
 57 |     type: boolean
 58 |     default: false
 59 |     inputBinding:
 60 |       position: 1
 61 |       prefix: -u
 62 |     doc: |
 63 |       uncompressed BAM output (implies -b)
 64 |   readtagtostrip:
 65 |     type: string[]?
 66 |     inputBinding:
 67 |       position: 1
 68 | 
 69 |     doc: |
 70 |       read tag to strip (repeatable) [null]
 71 |   input:
 72 |     type: File
 73 |     inputBinding:
 74 |       position: 4
 75 | 
 76 |     doc: |
 77 |       Input bam file.
 78 |   readsquality:
 79 |     type: int?
 80 |     inputBinding:
 81 |       position: 1
 82 |       prefix: -q
 83 |     doc: |
 84 |       only include reads with mapping quality >= INT [0]
 85 |   readswithbits:
 86 |     type: int?
 87 |     inputBinding:
 88 |       position: 1
 89 |       prefix: -f
 90 |     doc: |
 91 |       only include reads with all bits set in INT set in FLAG [0]
 92 |   cigar:
 93 |     type: int?
 94 |     inputBinding:
 95 |       position: 1
 96 |       prefix: -m
 97 |     doc: |
 98 |       only include reads with number of CIGAR operations
 99 |       consuming query sequence >= INT [0]
100 |   iscram:
101 |     type: boolean
102 |     default: false
103 |     inputBinding:
104 |       position: 2
105 |       prefix: -C
106 |     doc: |
107 |       output in CRAM format
108 |   threads:
109 |     type: int?
110 |     inputBinding:
111 |       position: 1
112 |       prefix: -@
113 |     doc: |
114 |       number of BAM compression threads [0]
115 |   fastcompression:
116 |     type: boolean
117 |     default: false
118 |     inputBinding:
119 |       position: 1
120 |       prefix: '-1'
121 |     doc: |
122 |       use fast BAM compression (implies -b)
123 |   samheader:
124 |     type: boolean
125 |     default: false
126 |     inputBinding:
127 |       position: 1
128 |       prefix: -h
129 |     doc: |
130 |       include header in SAM output
131 |   count:
132 |     type: boolean
133 |     default: false
134 |     inputBinding:
135 |       position: 1
136 |       prefix: -c
137 |     doc: |
138 |       print only the count of matching records
139 |   randomseed:
140 |     type: float?
141 |     inputBinding:
142 |       position: 1
143 |       prefix: -s
144 |     doc: |
145 |       integer part sets seed of random number generator [0];
146 |       rest sets fraction of templates to subsample [no subsampling]
147 |   referencefasta:
148 |     type: File?
149 |     inputBinding:
150 |       position: 1
151 |       prefix: -T
152 |     doc: |
153 |       reference sequence FASTA FILE [null]
154 |   region:
155 |     type: string?
156 |     inputBinding:
157 |       position: 5
158 | 
159 |     doc: |
160 |       [region ...]
161 |   readsingroupfile:
162 |     type: File?
163 |     inputBinding:
164 |       position: 1
165 |       prefix: -R
166 |     doc: |
167 |       only include reads with read group listed in FILE [null]
168 |   readsinlibrary:
169 |     type: string?
170 |     inputBinding:
171 |       position: 1
172 |       prefix: -l
173 |     doc: |
174 |       only include reads in library STR [null]
175 |   output_name:
176 |     type: string
177 |     inputBinding:
178 |       position: 2
179 |       prefix: -o
180 | outputs:
181 |   output:
182 |     type: File
183 |     outputBinding:
184 |       glob: $(inputs.output_name)
185 | 
186 | baseCommand: [samtools, view]
187 | 
188 | # s:mainEntity:
189 | #   $import: samtools-metadata.yaml
190 | 
191 | s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-view.cwl
192 | s:codeRepository: https://github.com/common-workflow-language/workflows
193 | s:license: http://www.apache.org/licenses/LICENSE-2.0
194 | 
195 | s:isPartOf:
196 |   class: s:CreativeWork
197 |   s:name: Common Workflow Language
198 |   s:url: http://commonwl.org/
199 | 
200 | s:author:
201 |   class: s:Person
202 |   s:name: Andrey Kartashov
203 |   s:email: mailto:Andrey.Kartashov@cchmc.org
204 |   s:sameAs:
205 |   - id: http://orcid.org/0000-0001-9102-5681
206 |   s:worksFor:
207 |   - class: s:Organization
208 |     s:name: Cincinnati Children's Hospital Medical Center
209 |     s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026
210 |     s:department:
211 |     - class: s:Organization
212 |       s:name: Barski Lab
213 | doc: |
214 |   samtools-view.cwl is developed for CWL consortium
215 |     Usage:   samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]
216 | 
217 |     Options: -b       output BAM
218 |              -C       output CRAM (requires -T)
219 |              -1       use fast BAM compression (implies -b)
220 |              -u       uncompressed BAM output (implies -b)
221 |              -h       include header in SAM output
222 |              -H       print SAM header only (no alignments)
223 |              -c       print only the count of matching records
224 |              -o FILE  output file name [stdout]
225 |              -U FILE  output reads not selected by filters to FILE [null]
226 |              -t FILE  FILE listing reference names and lengths (see long help) [null]
227 |              -T FILE  reference sequence FASTA FILE [null]
228 |              -L FILE  only include reads overlapping this BED FILE [null]
229 |              -r STR   only include reads in read group STR [null]
230 |              -R FILE  only include reads with read group listed in FILE [null]
231 |              -q INT   only include reads with mapping quality >= INT [0]
232 |              -l STR   only include reads in library STR [null]
233 |              -m INT   only include reads with number of CIGAR operations
234 |                       consuming query sequence >= INT [0]
235 |              -f INT   only include reads with all bits set in INT set in FLAG [0]
236 |              -F INT   only include reads with none of the bits set in INT
237 |                       set in FLAG [0]
238 |              -x STR   read tag to strip (repeatable) [null]
239 |              -B       collapse the backward CIGAR operation
240 |              -s FLOAT integer part sets seed of random number generator [0];
241 |                       rest sets fraction of templates to subsample [no subsampling]
242 |              -@ INT   number of BAM compression threads [0]
243 | 
244 | 


--------------------------------------------------------------------------------
/cwl/samtools-viewr2.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwl-runner
  2 | 
  3 | ### doc: "Samtools view (just read2) tool" ###
  4 | ### Copy of samtools-view.cwl, except for changes due to bugs in TOIL ###
  5 | ### readswithbits is by default 128 and NOT optional.                 ###
  6 | 
  7 | cwlVersion: v1.0
  8 | 
  9 | class: CommandLineTool
 10 | 
 11 | requirements:
 12 |   - class: InlineJavascriptRequirement
 13 |   - class: ResourceRequirement
 14 |     coresMin: 1
 15 |     ramMin: 8000
 16 |     
 17 | hints:
 18 |   - class: DockerRequirement
 19 |     dockerPull: brianyee/samtools:1.6
 20 |     
 21 | inputs:
 22 |   isbam:
 23 |     type: boolean
 24 |     default: false
 25 |     inputBinding:
 26 |       position: 2
 27 |       prefix: -b
 28 |     doc: |
 29 |       output in BAM format
 30 |   readswithoutbits:
 31 |     type: int?
 32 |     inputBinding:
 33 |       position: 1
 34 |       prefix: -F
 35 |     doc: |
 36 |       only include reads with none of the bits set in INT set in FLAG [0]
 37 |   collapsecigar:
 38 |     type: boolean
 39 |     default: false
 40 |     inputBinding:
 41 |       position: 1
 42 |       prefix: -B
 43 |     doc: |
 44 |       collapse the backward CIGAR operation
 45 |   readsingroup:
 46 |     type: string?
 47 |     inputBinding:
 48 |       position: 1
 49 |       prefix: -r
 50 |     doc: |
 51 |       only include reads in read group STR [null]
 52 |   bedoverlap:
 53 |     type: File?
 54 |     inputBinding:
 55 |       position: 1
 56 |       prefix: -L
 57 |     doc: |
 58 |       only include reads overlapping this BED FILE [null]
 59 |   uncompressed:
 60 |     type: boolean
 61 |     default: false
 62 |     inputBinding:
 63 |       position: 1
 64 |       prefix: -u
 65 |     doc: |
 66 |       uncompressed BAM output (implies -b)
 67 |   readtagtostrip:
 68 |     type: string[]?
 69 |     inputBinding:
 70 |       position: 1
 71 | 
 72 |     doc: |
 73 |       read tag to strip (repeatable) [null]
 74 |   input:
 75 |     type: File
 76 |     inputBinding:
 77 |       position: 4
 78 | 
 79 |     doc: |
 80 |       Input bam file.
 81 |   readsquality:
 82 |     type: int?
 83 |     inputBinding:
 84 |       position: 1
 85 |       prefix: -q
 86 |     doc: |
 87 |       only include reads with mapping quality >= INT [0]
 88 |   readswithbits:
 89 |     type: int
 90 |     default: 128
 91 |     inputBinding:
 92 |       position: 1
 93 |       prefix: -f
 94 |     doc: |
 95 |       only include reads with all bits set in INT set in FLAG [0]
 96 |   cigar:
 97 |     type: int?
 98 |     inputBinding:
 99 |       position: 1
100 |       prefix: -m
101 |     doc: |
102 |       only include reads with number of CIGAR operations
103 |       consuming query sequence >= INT [0]
104 |   iscram:
105 |     type: boolean
106 |     default: false
107 |     inputBinding:
108 |       position: 2
109 |       prefix: -C
110 |     doc: |
111 |       output in CRAM format
112 |   threads:
113 |     type: int?
114 |     inputBinding:
115 |       position: 1
116 |       prefix: -@
117 |     doc: |
118 |       number of BAM compression threads [0]
119 |   fastcompression:
120 |     type: boolean
121 |     default: false
122 |     inputBinding:
123 |       position: 1
124 |       prefix: '-1'
125 |     doc: |
126 |       use fast BAM compression (implies -b)
127 |   samheader:
128 |     type: boolean
129 |     default: false
130 |     inputBinding:
131 |       position: 1
132 |       prefix: -h
133 |     doc: |
134 |       include header in SAM output
135 |   count:
136 |     type: boolean
137 |     default: false
138 |     inputBinding:
139 |       position: 1
140 |       prefix: -c
141 |     doc: |
142 |       print only the count of matching records
143 |   randomseed:
144 |     type: float?
145 |     inputBinding:
146 |       position: 1
147 |       prefix: -s
148 |     doc: |
149 |       integer part sets seed of random number generator [0];
150 |       rest sets fraction of templates to subsample [no subsampling]
151 |   referencefasta:
152 |     type: File?
153 |     inputBinding:
154 |       position: 1
155 |       prefix: -T
156 |     doc: |
157 |       reference sequence FASTA FILE [null]
158 |   region:
159 |     type: string?
160 |     inputBinding:
161 |       position: 5
162 | 
163 |     doc: |
164 |       [region ...]
165 |   readsingroupfile:
166 |     type: File?
167 |     inputBinding:
168 |       position: 1
169 |       prefix: -R
170 |     doc: |
171 |       only include reads with read group listed in FILE [null]
172 |   readsinlibrary:
173 |     type: string?
174 |     inputBinding:
175 |       position: 1
176 |       prefix: -l
177 |     doc: |
178 |       only include reads in library STR [null]
179 |   output_name:
180 |     type: string
181 |     default: ""
182 |     inputBinding:
183 |       position: 2
184 |       prefix: -o
185 |       valueFrom: |
186 |         ${
187 |           if (inputs.output_name == "") {
188 |             return inputs.input.nameroot + ".r2.bam";
189 |           }
190 |           else {
191 |             return inputs.output_name;
192 |           }
193 |         }
194 | outputs:
195 |   output:
196 |     type: File
197 |     outputBinding:
198 |       glob: |
199 |         ${
200 |           if (inputs.output_name == "") {
201 |             return inputs.input.nameroot + ".r2.bam";
202 |           }
203 |           else {
204 |             return inputs.output_name;
205 |           }
206 |         }
207 | 
208 | baseCommand: [samtools, view]
209 | 
210 | 
211 | # s:mainEntity:
212 | #   $import: samtools-metadata.yaml
213 | 
214 | s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-view.cwl
215 | s:codeRepository: https://github.com/common-workflow-language/workflows
216 | s:license: http://www.apache.org/licenses/LICENSE-2.0
217 | 
218 | s:isPartOf:
219 |   class: s:CreativeWork
220 |   s:name: Common Workflow Language
221 |   s:url: http://commonwl.org/
222 | 
223 | s:author:
224 |   class: s:Person
225 |   s:name: Andrey Kartashov
226 |   s:email: mailto:Andrey.Kartashov@cchmc.org
227 |   s:sameAs:
228 |   - id: http://orcid.org/0000-0001-9102-5681
229 |   s:worksFor:
230 |   - class: s:Organization
231 |     s:name: Cincinnati Children's Hospital Medical Center
232 |     s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026
233 |     s:department:
234 |     - class: s:Organization
235 |       s:name: Barski Lab
236 | doc: |
237 |   samtools-view.cwl is developed for CWL consortium
238 |     Usage:   samtools view [options] <in.bam>|<in.sam>|<in.cram> [region ...]
239 | 
240 |     Options: -b       output BAM
241 |              -C       output CRAM (requires -T)
242 |              -1       use fast BAM compression (implies -b)
243 |              -u       uncompressed BAM output (implies -b)
244 |              -h       include header in SAM output
245 |              -H       print SAM header only (no alignments)
246 |              -c       print only the count of matching records
247 |              -o FILE  output file name [stdout]
248 |              -U FILE  output reads not selected by filters to FILE [null]
249 |              -t FILE  FILE listing reference names and lengths (see long help) [null]
250 |              -T FILE  reference sequence FASTA FILE [null]
251 |              -L FILE  only include reads overlapping this BED FILE [null]
252 |              -r STR   only include reads in read group STR [null]
253 |              -R FILE  only include reads with read group listed in FILE [null]
254 |              -q INT   only include reads with mapping quality >= INT [0]
255 |              -l STR   only include reads in library STR [null]
256 |              -m INT   only include reads with number of CIGAR operations
257 |                       consuming query sequence >= INT [0]
258 |              -f INT   only include reads with all bits set in INT set in FLAG [0]
259 |              -F INT   only include reads with none of the bits set in INT
260 |                       set in FLAG [0]
261 |              -x STR   read tag to strip (repeatable) [null]
262 |              -B       collapse the backward CIGAR operation
263 |              -s FLOAT integer part sets seed of random number generator [0];
264 |                       rest sets fraction of templates to subsample [no subsampling]
265 |              -@ INT   number of BAM compression threads [0]
266 | 
267 | 


--------------------------------------------------------------------------------
/cwl/sort-bed.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | cwlVersion: v1.0
 4 | 
 5 | class: CommandLineTool
 6 | 
 7 | requirements:
 8 |   - class: InlineJavascriptRequirement
 9 |   - class: ResourceRequirement
10 |     coresMin: 1
11 |     ramMin: 8000
12 |     
13 | hints:
14 |   - class: DockerRequirement
15 |     dockerPull: brianyee/bedtools:2.27.1
16 |     
17 | baseCommand: [sort]
18 | 
19 | arguments: [
20 |   "-k1,1",
21 |   "-k2,2n"
22 |   ]
23 | 
24 | inputs:
25 | 
26 |   unsorted_bed:
27 |     type: File
28 |     inputBinding:
29 |       position: 1
30 | 
31 | stdout: $(inputs.unsorted_bed.nameroot).sorted.bed
32 | 
33 | outputs:
34 | 
35 |   sorted_bed:
36 |     type: File
37 |     outputBinding:
38 |       glob: $(inputs.unsorted_bed.nameroot).sorted.bed
39 | 
40 | doc: |
41 |   This tool wraps unix sort to sort a BED file.
42 |   
43 |   Usage: sort -k1,1 -k2,2n unsorted.bed > sorted.bed
44 | 


--------------------------------------------------------------------------------
/cwl/sort.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | ### doc: "samtools sort tool (sort by coordinate)" ###
 4 | 
 5 | ### This is a copy of namesort.cwl, ###
 6 | ### exists in case TOIL mistakes namesorting with regular sorting ###
 7 | ### Changes: name_sort flag is FALSE by default ###
 8 | 
 9 | cwlVersion: v1.0
10 | class: CommandLineTool
11 | 
12 | requirements:
13 |   - class: InlineJavascriptRequirement
14 |   - class: ResourceRequirement
15 |     coresMin: 1
16 |     ramMin: 8000
17 |     tmpdirMin: 8000
18 |     outdirMin: 8000
19 | 
20 | hints:
21 |   - class: DockerRequirement
22 |     dockerPull: brianyee/samtools:1.6
23 | 
24 | baseCommand: [samtools, sort]
25 | 
26 | inputs:
27 | 
28 |   name_sort:
29 |     type: boolean
30 |     inputBinding:
31 |       position: 1
32 |       prefix: -n
33 |     default: false
34 | 
35 |   output_file:
36 |     type: string
37 |     inputBinding:
38 |       position: 2
39 |       prefix: -o
40 |       valueFrom: |
41 |         ${
42 |           if (inputs.output_file == "") {
43 |             return inputs.input_sort_bam.nameroot + "So.bam";
44 |           }
45 |           else {
46 |             return inputs.output_file;
47 |           }
48 |         }
49 |     default: ""
50 | 
51 |   memory:
52 |     default: 3G
53 |     type: string
54 |     inputBinding:
55 |       position: 3
56 |       prefix: -m
57 | 
58 |   input_sort_bam:
59 |     type: File
60 |     inputBinding:
61 |       position: 4
62 |     label: ""
63 |     doc: "input bam"
64 | 
65 | outputs:
66 | 
67 |   output_sort_bam:
68 |     type: File
69 |     outputBinding:
70 |       glob: |
71 |         ${
72 |           if (inputs.output_file == "") {
73 |             return inputs.input_sort_bam.nameroot + "So.bam";
74 |           }
75 |           else {
76 |             return inputs.output_file;
77 |           }
78 |         }
79 |     label: ""
80 |     doc: "sorted bam"
81 | 
82 | doc: |
83 |   This tool wraps samtools sort by coordinates (namesort flag is False by default).
84 |     Usage: samtools sort [options...] [in.bam]
85 | 


--------------------------------------------------------------------------------
/cwl/trim_pe.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | cwlVersion: v1.0
  4 | 
  5 | class: CommandLineTool
  6 | 
  7 | requirements:
  8 |   - class: ResourceRequirement
  9 |     coresMin: 2
 10 |   - class: StepInputExpressionRequirement
 11 |   - class: InlineJavascriptRequirement
 12 | 
 13 | hints:
 14 |   - class: DockerRequirement
 15 |     dockerPull: brianyee/cutadapt:1.14
 16 | 
 17 | baseCommand: [cutadapt]
 18 | 
 19 | inputs:
 20 | 
 21 |   input_trim_overlap_length:
 22 |     type: string
 23 |     default: "5"
 24 |     inputBinding:
 25 |       position: 0
 26 |       prefix: -O
 27 | 
 28 |   f:
 29 |     type: string
 30 |     default: "fastq"
 31 |     inputBinding:
 32 |       position: 1
 33 |       prefix: -f
 34 | 
 35 |   match_read_wildcards:
 36 |     type: boolean
 37 |     default: true
 38 |     inputBinding:
 39 |       position: 2
 40 |       prefix: --match-read-wildcards
 41 | 
 42 |   times:
 43 |     type: string
 44 |     default: "1"
 45 |     inputBinding:
 46 |       position: 3
 47 |       prefix: --times
 48 | 
 49 |   error_rate:
 50 |     type: string
 51 |     default: "0.1"
 52 |     inputBinding:
 53 |       position: 4
 54 |       prefix: -e
 55 | 
 56 |   quality_cutoff:
 57 |     type: string
 58 |     default: "6"
 59 |     inputBinding:
 60 |       position: 5
 61 |       prefix: --quality-cutoff
 62 | 
 63 |   minimum_length:
 64 |     type: string
 65 |     default: "18"
 66 |     inputBinding:
 67 |       position: 6
 68 |       prefix: -m
 69 | 
 70 |   output_r1:
 71 |     type: string
 72 |     inputBinding:
 73 |       position: 7
 74 |       prefix: -o
 75 |       valueFrom: |
 76 |         ${
 77 |           if (inputs.output_r1 == "") {
 78 |             return inputs.input_trim[0].nameroot + "Tr.fq";
 79 |           }
 80 |           else {
 81 |             return inputs.output_r1;
 82 |           }
 83 |         }
 84 |     default: ""
 85 | 
 86 |   output_r2:
 87 |     type: string?
 88 |     inputBinding:
 89 |       position: 8
 90 |       prefix: -p
 91 |       valueFrom: |
 92 |         ${
 93 |           if (inputs.output_r2 == "") {
 94 |             return inputs.input_trim[1].nameroot + "Tr.fq";
 95 |           }
 96 |           else {
 97 |             return inputs.output_r2;
 98 |           }
 99 |         }
100 |     default: ""
101 | 
102 |   input_trim_b_adapters:
103 |     default: []
104 |     type:
105 |       type: array
106 |       items: string
107 |       inputBinding:
108 |         prefix: "-b "
109 |         separate: false
110 |         # prefix: "--anywhere=file:"
111 |         # prefix: "-b file:"
112 |     inputBinding:
113 |       position: 9
114 | 
115 |   input_trim_g_adapters:
116 |     type:
117 |       type: array
118 |       items: string
119 |       inputBinding:
120 |         prefix: "-g "
121 |         separate: false
122 |         # prefix: "--front=file:"
123 |         # prefix: "-g file:"
124 |     inputBinding:
125 |       position: 10
126 | 
127 |   input_trim_A_adapters:
128 |     type:
129 |       type: array
130 |       items: string
131 |       inputBinding:
132 |         prefix: "-A "
133 |         separate: false
134 |         # prefix: "--ADAPTER=file:"
135 |         # prefix: "-A file:"
136 |     inputBinding:
137 |       position: 11
138 | 
139 | 
140 |   input_trim_a_adapters:
141 |     type:
142 |       type: array
143 |       items: string
144 |       inputBinding:
145 |         prefix: "-a "
146 |         separate: false
147 |         # prefix: "--adapter=file:"
148 |         # prefix: "-a file:"
149 |     inputBinding:
150 |       position: 12
151 | 
152 |   input_trim:
153 |     type: File[]?
154 |     inputBinding:
155 |       position: 13
156 | 
157 | stdout: $(inputs.input_trim[0].nameroot)Tr.metrics
158 | 
159 | outputs:
160 | 
161 |   output_trim:
162 |     type: File[]?
163 |     outputBinding:
164 |       # glob: "*Tr.fq"
165 |       # If output_r1 and output_r2 were not specified, look for input basename
166 |       glob: |
167 |         ${
168 |           if (inputs.output_r1 == "") {
169 |             return [
170 |               inputs.input_trim[0].nameroot + "Tr.fq",
171 |               inputs.input_trim[1].nameroot + "Tr.fq"
172 |             ];
173 |           }
174 |           else {
175 |             return [
176 |               inputs.output_r1,
177 |               inputs.output_r2
178 |             ];
179 |           }
180 |         }
181 | 
182 |   output_trim_report:
183 |     type: File
184 |     outputBinding:
185 |       # glob: "*Tr.metrics"
186 |       glob: "*.metrics"
187 | 
188 | doc: |
189 |   This tool wraps cutadapt with default parameters set to paired-end eCLIP processing defaults.
190 |     Usage: cutadapt -a ADAPT1 -A ADAPT2 [options] -o out1.fastq -p out2.fastq in1.fastq in2.fastq
191 | 


--------------------------------------------------------------------------------
/cwl/trim_se.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | cwlVersion: v1.0
  4 | 
  5 | class: CommandLineTool
  6 | 
  7 | requirements:
  8 |   - class: ResourceRequirement
  9 |     coresMin: 2
 10 |   - class: StepInputExpressionRequirement
 11 |   - class: InlineJavascriptRequirement
 12 | 
 13 | hints:
 14 |   - class: DockerRequirement
 15 |     dockerPull: brianyee/cutadapt:1.14
 16 | 
 17 | baseCommand: [cutadapt]
 18 | 
 19 | inputs:
 20 | 
 21 |   input_trim_overlap_length:
 22 |     type: string
 23 |     default: "5"
 24 |     inputBinding:
 25 |       position: 0
 26 |       prefix: -O
 27 | 
 28 |   f:
 29 |     type: string
 30 |     default: "fastq"
 31 |     inputBinding:
 32 |       position: 1
 33 |       prefix: -f
 34 | 
 35 |   match_read_wildcards:
 36 |     type: boolean
 37 |     default: true
 38 |     inputBinding:
 39 |       position: 2
 40 |       prefix: --match-read-wildcards
 41 | 
 42 |   times:
 43 |     type: string
 44 |     default: "1"
 45 |     inputBinding:
 46 |       position: 3
 47 |       prefix: --times
 48 | 
 49 |   error_rate:
 50 |     type: string
 51 |     default: "0.1"
 52 |     inputBinding:
 53 |       position: 4
 54 |       prefix: -e
 55 | 
 56 |   quality_cutoff:
 57 |     type: string
 58 |     default: "6"
 59 |     inputBinding:
 60 |       position: 5
 61 |       prefix: --quality-cutoff
 62 | 
 63 |   minimum_length:
 64 |     type: string
 65 |     default: "18"
 66 |     inputBinding:
 67 |       position: 6
 68 |       prefix: -m
 69 | 
 70 |   output_r1:
 71 |     type: string
 72 |     inputBinding:
 73 |       position: 7
 74 |       prefix: -o
 75 |       valueFrom: |
 76 |         ${
 77 |           if (inputs.output_r1 == "") {
 78 |             return inputs.input_trim[0].nameroot + "Tr.fq";
 79 |           }
 80 |           else {
 81 |             return inputs.output_r1;
 82 |           }
 83 |         }
 84 |     default: ""
 85 | 
 86 |   input_trim_b_adapters:
 87 |     default: []
 88 |     type:
 89 |       type: array
 90 |       items: string
 91 |       inputBinding:
 92 |         prefix: "-b "
 93 |         separate: false
 94 |         # prefix: "--anywhere=file:"
 95 |         # prefix: "-b file:"
 96 |     inputBinding:
 97 |       position: 9
 98 | 
 99 |   input_trim_g_adapters:
100 |     default: []
101 |     type:
102 |       type: array
103 |       items: string
104 |       inputBinding:
105 |         prefix: "-g "
106 |         separate: false
107 |         # prefix: "--front=file:"
108 |         # prefix: "-g file:"
109 |     inputBinding:
110 |       position: 10
111 | 
112 |   input_trim_A_adapters:
113 |     default: []
114 |     type:
115 |       type: array
116 |       items: string
117 |       inputBinding:
118 |         prefix: "-A "
119 |         separate: false
120 |         # prefix: "--ADAPTER=file:"
121 |         # prefix: "-A file:"
122 |     inputBinding:
123 |       position: 11
124 | 
125 | 
126 |   input_trim_a_adapters:
127 |     type:
128 |       type: array
129 |       items: string
130 |       inputBinding:
131 |         prefix: "-a "
132 |         separate: false
133 |         # prefix: "--adapter=file:"
134 |         # prefix: "-a file:"
135 |     inputBinding:
136 |       position: 12
137 | 
138 |   input_trim:
139 |     type: File[]?
140 |     inputBinding:
141 |       position: 13
142 | 
143 | stdout: $(inputs.input_trim[0].nameroot)Tr.metrics
144 | 
145 | outputs:
146 | 
147 |   output_trim:
148 |     type: File[]?
149 |     outputBinding:
150 |       # glob: "*Tr.fq"
151 |       # If output_r1 wasnt not specified, look for input basename
152 |       glob: |
153 |         ${
154 |           if (inputs.output_r1 == "") {
155 |             return [
156 |               inputs.input_trim[0].nameroot + "Tr.fq"
157 |             ];
158 |           }
159 |           else {
160 |             return [
161 |               inputs.output_r1
162 |             ];
163 |           }
164 |         }
165 | 
166 |   output_trim_report:
167 |     type: File
168 |     outputBinding:
169 |       # glob: "*Tr.metrics"
170 |       glob: "*.metrics"
171 | 
172 | doc: |
173 |   This tool wraps cutadapt with default parameters set to single-end eCLIP processing defaults.
174 |     Usage: cutadapt -a ADAPT1 -A ADAPT2 [options] -o out1.fastq -p out2.fastq in1.fastq in2.fastq
175 | 


--------------------------------------------------------------------------------
/cwl/trim_umi.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | cwlVersion: v1.0
  4 | class: CommandLineTool
  5 | 
  6 | # , $overlap_length_option
  7 | # , $g_adapters_option
  8 | # , $A_adapters_option
  9 | # , $a_adapters_option
 10 | # , -o, out_fastq.fastq.gz
 11 | # , -p, out_pair.fastq.gz
 12 | # , in_fastq.fastq.gz
 13 | # , in_pair.fastq.gz
 14 | # > report
 15 | 
 16 | #$namespaces:
 17 | #  ex: http://example.com/
 18 | 
 19 | requirements:
 20 |   - class: ResourceRequirement
 21 |     coresMin: 2
 22 |     # ramMin: 30000
 23 |     # tmpdirMin: 4000
 24 |     # outdirMin: 4000
 25 |   - class: StepInputExpressionRequirement
 26 |   - class: InlineJavascriptRequirement
 27 | 
 28 | #hints:
 29 | #  - class: ex:PackageRequirement
 30 | #    packages:
 31 | #      - name: cutadapt
 32 | #        package_manager: pip
 33 | #        version: "1.10"
 34 | #  - class: ex:ScriptRequirement
 35 | #    scriptlines:
 36 | #      - "#!/bin/bash"
 37 | #  - class: ShellCommandRequirement
 38 | 
 39 | 
 40 | baseCommand: [cutadapt]
 41 | 
 42 | # arguments: [-f, fastq,
 43 | #   --match-read-wildcards,
 44 | #   --times, "2",
 45 | #   -e, "0.0",
 46 | #   --quality-cutoff, "6",
 47 | #   -m, "18",
 48 | #   -o, $(inputs.input_trim.nameroot)Tr.fqgz
 49 | #   ]
 50 | 
 51 | inputs:
 52 | 
 53 |   hard_trim_length:
 54 |     type: int
 55 |     default: -9
 56 |     inputBinding:
 57 |       position: 0
 58 |       prefix: -u
 59 | 
 60 |   # cores:
 61 |   #   type: int
 62 |   #   default: 4
 63 |   #   inputBinding:
 64 |   #     position: 1
 65 |   #     prefix: -cores
 66 | 
 67 |   output_r1:
 68 |     type: string
 69 |     inputBinding:
 70 |       position: 7
 71 |       prefix: -o
 72 |       valueFrom: |
 73 |         ${
 74 |           if (inputs.output_r1 == "") {
 75 |             return inputs.input_trim[0].nameroot + "Tr.fq";
 76 |           }
 77 |           else {
 78 |             return inputs.output_r1;
 79 |           }
 80 |         }
 81 |     default: ""
 82 | 
 83 |   input_trim:
 84 |     type: File[]?
 85 |     inputBinding:
 86 |       position: 14
 87 | 
 88 | 
 89 | stdout: $(inputs.input_trim[0].nameroot)Tr.metrics
 90 | 
 91 | outputs:
 92 | 
 93 |   output_trim:
 94 |     type: File[]?
 95 |     outputBinding:
 96 |       # glob: "*Tr.fq"
 97 |       # If output_r1 wasnt not specified, look for input basename
 98 |       glob: |
 99 |         ${
100 |           if (inputs.output_r1 == "") {
101 |             return [
102 |               inputs.input_trim[0].nameroot + "Tr.fq"
103 |             ];
104 |           }
105 |           else {
106 |             return [
107 |               inputs.output_r1
108 |             ];
109 |           }
110 |         }
111 | 
112 |   output_trim_report:
113 |     type: File
114 |     outputBinding:
115 |       # glob: "*Tr.metrics"
116 |       glob: "*.metrics"
117 | 
118 | doc: |
119 |   This tool wraps cutadapt to trim off the 3' end of R1 (may be UMIs) for eCLASH reads


--------------------------------------------------------------------------------
/cwl/wf_clipseqcore_chimeric_se_1barcode.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | ### Workflow for handling reads containing one barcode ###
  4 | ### Returns a bam file containing read2 only ###
  5 | 
  6 | cwlVersion: v1.0
  7 | class: Workflow
  8 | 
  9 | requirements:
 10 |   - class: StepInputExpressionRequirement
 11 |   - class: SubworkflowFeatureRequirement
 12 |   - class: ScatterFeatureRequirement      # TODO needed?
 13 |   - class: MultipleInputFeatureRequirement
 14 |   - class: InlineJavascriptRequirement
 15 | 
 16 | #hints:
 17 | #  - class: ex:ScriptRequirement
 18 | #    scriptlines:
 19 | #      - "#!/bin/bash"
 20 | 
 21 | 
 22 | inputs:
 23 |   dataset:
 24 |     type: string
 25 | 
 26 |   speciesGenomeDir:
 27 |     type: Directory
 28 | 
 29 |   repeatElementGenomeDir:
 30 |     type: Directory
 31 | 
 32 |   # TODO: remove, we don't use it here.
 33 |   species:
 34 |     type: string
 35 | 
 36 |   chrom_sizes:
 37 |     type: File
 38 | 
 39 |   # barcodesfasta:
 40 |   #   type: File
 41 | 
 42 |   # randomer_length:
 43 |   #   type: string
 44 | 
 45 |   read:
 46 |     type:
 47 |       type: record
 48 |       fields:
 49 |         read1:
 50 |           type: File
 51 |         # read2:
 52 |         #   type: File
 53 |         adapters:
 54 |           type: File
 55 |         name:
 56 |           type: string
 57 | 
 58 |   # r2_bam:
 59 |   #   type: string
 60 | 
 61 |   # output_bam:
 62 |   #   type: string
 63 |   
 64 |   # adapters:
 65 |   #   type: File
 66 | 
 67 |   ### Defaults ###
 68 |   
 69 |   # r2_bits:
 70 |   #   type: int
 71 |   #   default: 128
 72 |   # is_bam:
 73 |   #   type: boolean
 74 |   #   default: true
 75 |   
 76 | outputs:
 77 | 
 78 |   b1_demuxed_fastq_r1:
 79 |     type: File
 80 |     outputSource: demultiplex/A_output_demuxed_read1
 81 |   # b1_demuxed_fastq_r2:
 82 |   #   type: File
 83 |   #   outputSource: demultiplex/A_output_demuxed_read2
 84 | 
 85 |   b1_trimx1_fastq:
 86 |     type: File[]
 87 |     outputSource: b1_trim_and_map/X_output_trim_first
 88 |   b1_trimx1_metrics:
 89 |     type: File
 90 |     outputSource: b1_trim_and_map/X_output_trim_first_metrics
 91 |   b1_trimx1_fastqc_report:
 92 |     type: File
 93 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report
 94 |   b1_trimx1_fastqc_stats: 
 95 |     type: File
 96 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats
 97 |   b1_trimx2_fastq:
 98 |     type: File[]
 99 |     outputSource: b1_trim_and_map/X_output_trim_again
100 |   b1_trimx2_metrics:
101 |     type: File
102 |     outputSource: b1_trim_and_map/X_output_trim_again_metrics
103 |   b1_trimx2_fastqc_report:
104 |     type: File
105 |     outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report
106 |   b1_trimx2_fastqc_stats: 
107 |     type: File
108 |     outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats
109 |     
110 |   b1_maprepeats_mapped_to_genome:
111 |     type: File
112 |     outputSource: b1_trim_and_map/A_output_maprepeats_mapped_to_genome
113 |   b1_maprepeats_stats:
114 |     type: File
115 |     outputSource: b1_trim_and_map/A_output_maprepeats_stats
116 |   b1_maprepeats_star_settings:
117 |     type: File
118 |     outputSource: b1_trim_and_map/A_output_maprepeats_star_settings
119 |   b1_sorted_unmapped_fastq:
120 |     type: File
121 |     outputSource: b1_trim_and_map/A_output_sort_repunmapped_fastq
122 | 
123 |   b1_mapgenome_mapped_to_genome:
124 |     type: File
125 |     outputSource: b1_trim_and_map/A_output_mapgenome_mapped_to_genome
126 |   b1_mapgenome_stats:
127 |     type: File
128 |     outputSource: b1_trim_and_map/A_output_mapgenome_stats
129 |   b1_mapgenome_star_settings:
130 |     type: File
131 |     outputSource: b1_trim_and_map/A_output_mapgenome_star_settings
132 | 
133 |   b1_output_pre_rmdup_sorted_bam:
134 |     type: File
135 |     outputSource: b1_trim_and_map/A_output_sorted_bam
136 | 
137 |   b1_output_barcodecollapsese_metrics:
138 |     type: File
139 |     outputSource: b1_trim_and_map/X_output_barcodecollapsese_metrics
140 | 
141 |   b1_output_rmdup_sorted_bam:
142 |     type: File
143 |     outputSource: b1_trim_and_map/X_output_sorted_bam
144 | 
145 |   output_pos_bw:
146 |     type: File
147 |     outputSource: make_bigwigs/posbw
148 |   output_neg_bw:
149 |     type: File
150 |     outputSource: make_bigwigs/negbw
151 | 
152 | steps:
153 | 
154 | ###########################################################################
155 | # Upstream
156 | ###########################################################################
157 | 
158 |   demultiplex:
159 |     run: wf_demultiplex_se.cwl
160 |     in:
161 |       dataset: dataset
162 |       read: read
163 |     out: [
164 |       A_output_demuxed_read1,
165 |       read_name,
166 |       dataset_name
167 |     ]
168 | 
169 |   b1_trim_and_map:
170 |     run: wf_trim_and_map_chimeric_se.cwl
171 |     in:
172 |       speciesGenomeDir: speciesGenomeDir
173 |       repeatElementGenomeDir: repeatElementGenomeDir
174 |       trimfirst_overlap_length:
175 |         default: "1"
176 |       trimagain_overlap_length:
177 |         default: "5"
178 |       a_adapters: 
179 |         source: read
180 |         valueFrom: |
181 |           ${
182 |             return self.adapters;
183 |           }
184 |       read1: demultiplex/A_output_demuxed_read1
185 |       read_name: demultiplex/read_name
186 |       dataset_name: demultiplex/dataset_name
187 |     out: [
188 |       X_output_trim_first,
189 |       X_output_trim_first_metrics,
190 |       X_output_trim_first_fastqc_report,
191 |       X_output_trim_first_fastqc_stats,
192 |       X_output_trim_again,
193 |       X_output_trim_again_metrics,
194 |       X_output_trim_again_fastqc_report,
195 |       X_output_trim_again_fastqc_stats,
196 |       A_output_maprepeats_mapped_to_genome,
197 |       A_output_maprepeats_stats,
198 |       A_output_maprepeats_star_settings,
199 |       A_output_sort_repunmapped_fastq,
200 |       A_output_mapgenome_mapped_to_genome,
201 |       A_output_mapgenome_stats,
202 |       A_output_mapgenome_star_settings,
203 |       A_output_sorted_bam,
204 |       # A_output_sorted_bam_index,
205 |       X_output_barcodecollapsese_bam,
206 |       X_output_barcodecollapsese_metrics,
207 |       X_output_sorted_bam
208 |     ]
209 | 
210 | 
211 | ###########################################################################
212 | # Downstream (candidate for merging with main pipeline)
213 | ###########################################################################
214 | 
215 |   make_bigwigs:
216 |     run: makebigwigfiles.cwl
217 |     in:
218 |       chromsizes: chrom_sizes
219 |       bam: b1_trim_and_map/X_output_sorted_bam
220 |     out:
221 |       [posbw, negbw]
222 | 


--------------------------------------------------------------------------------
/cwl/wf_clipseqcore_nostats_se_1barcode.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | ### Workflow for handling reads containing one barcode ###
  4 | ### Returns a bam file containing read2 only ###
  5 | 
  6 | cwlVersion: v1.0
  7 | class: Workflow
  8 | 
  9 | requirements:
 10 |   - class: StepInputExpressionRequirement
 11 |   - class: SubworkflowFeatureRequirement
 12 |   - class: ScatterFeatureRequirement      # TODO needed?
 13 |   - class: MultipleInputFeatureRequirement
 14 |   - class: InlineJavascriptRequirement
 15 | 
 16 | #hints:
 17 | #  - class: ex:ScriptRequirement
 18 | #    scriptlines:
 19 | #      - "#!/bin/bash"
 20 | 
 21 | 
 22 | inputs:
 23 |   dataset:
 24 |     type: string
 25 | 
 26 |   speciesGenomeDir:
 27 |     type: Directory
 28 | 
 29 |   repeatElementGenomeDir:
 30 |     type: Directory
 31 | 
 32 |   # TODO: remove, we don't use it here.
 33 |   species:
 34 |     type: string
 35 | 
 36 |   chrom_sizes:
 37 |     type: File
 38 | 
 39 |   # barcodesfasta:
 40 |   #   type: File
 41 | 
 42 |   # randomer_length:
 43 |   #   type: string
 44 | 
 45 |   read:
 46 |     type:
 47 |       type: record
 48 |       fields:
 49 |         read1:
 50 |           type: File
 51 |         # read2:
 52 |         #   type: File
 53 |         adapters:
 54 |           type: File
 55 |         name:
 56 |           type: string
 57 | 
 58 |   # r2_bam:
 59 |   #   type: string
 60 | 
 61 |   # output_bam:
 62 |   #   type: string
 63 |   
 64 |   # adapters:
 65 |   #   type: File
 66 | 
 67 |   ### Defaults ###
 68 |   
 69 |   # r2_bits:
 70 |   #   type: int
 71 |   #   default: 128
 72 |   # is_bam:
 73 |   #   type: boolean
 74 |   #   default: true
 75 |   
 76 | outputs:
 77 | 
 78 |   b1_demuxed_fastq_r1:
 79 |     type: File
 80 |     outputSource: demultiplex/A_output_demuxed_read1
 81 |   # b1_demuxed_fastq_r2:
 82 |   #   type: File
 83 |   #   outputSource: demultiplex/A_output_demuxed_read2
 84 | 
 85 |   b1_trimx1_fastq:
 86 |     type: File[]
 87 |     outputSource: b1_trim_and_map/X_output_trim_first
 88 |   b1_trimx1_metrics:
 89 |     type: File
 90 |     outputSource: b1_trim_and_map/X_output_trim_first_metrics
 91 |   b1_trimx1_fastqc_report:
 92 |     type: File
 93 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report
 94 |   b1_trimx1_fastqc_stats: 
 95 |     type: File
 96 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats
 97 |   b1_trimx2_fastq:
 98 |     type: File[]
 99 |     outputSource: b1_trim_and_map/X_output_trim_again
100 |   b1_trimx2_metrics:
101 |     type: File
102 |     outputSource: b1_trim_and_map/X_output_trim_again_metrics
103 |   b1_trimx2_fastqc_report:
104 |     type: File
105 |     outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report
106 |   b1_trimx2_fastqc_stats: 
107 |     type: File
108 |     outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats
109 |     
110 |   b1_maprepeats_mapped_to_genome:
111 |     type: File
112 |     outputSource: b1_trim_and_map/A_output_maprepeats_mapped_to_genome
113 |   b1_maprepeats_stats:
114 |     type: File
115 |     outputSource: b1_trim_and_map/A_output_maprepeats_stats
116 |   b1_maprepeats_star_settings:
117 |     type: File
118 |     outputSource: b1_trim_and_map/A_output_maprepeats_star_settings
119 |   b1_sorted_unmapped_fastq:
120 |     type: File
121 |     outputSource: b1_trim_and_map/A_output_sort_repunmapped_fastq
122 | 
123 |   b1_mapgenome_mapped_to_genome:
124 |     type: File
125 |     outputSource: b1_trim_and_map/A_output_mapgenome_mapped_to_genome
126 |   b1_mapgenome_stats:
127 |     type: File
128 |     outputSource: b1_trim_and_map/A_output_mapgenome_stats
129 |   b1_mapgenome_star_settings:
130 |     type: File
131 |     outputSource: b1_trim_and_map/A_output_mapgenome_star_settings
132 | 
133 |   b1_output_pre_rmdup_sorted_bam:
134 |     type: File
135 |     outputSource: b1_trim_and_map/A_output_sorted_bam
136 | 
137 |   # b1_output_barcodecollapsese_metrics:
138 |   #   type: File
139 |   #   outputSource: b1_trim_and_map/X_output_barcodecollapsese_metrics
140 | 
141 |   b1_output_rmdup_sorted_bam:
142 |     type: File
143 |     outputSource: b1_trim_and_map/X_output_sorted_bam
144 | 
145 |   output_pos_bw:
146 |     type: File
147 |     outputSource: make_bigwigs/posbw
148 |   output_neg_bw:
149 |     type: File
150 |     outputSource: make_bigwigs/negbw
151 | 
152 | steps:
153 | 
154 | ###########################################################################
155 | # Upstream
156 | ###########################################################################
157 | 
158 |   demultiplex:
159 |     run: wf_demultiplex_se.cwl
160 |     in:
161 |       dataset: dataset
162 |       read: read
163 |     out: [
164 |       A_output_demuxed_read1,
165 |       read_name,
166 |       dataset_name
167 |     ]
168 | 
169 |   b1_trim_and_map:
170 |     run: wf_trim_and_map_se_nostats.cwl
171 |     in:
172 |       speciesGenomeDir: speciesGenomeDir
173 |       repeatElementGenomeDir: repeatElementGenomeDir
174 |       trimfirst_overlap_length:
175 |         default: "1"
176 |       trimagain_overlap_length:
177 |         default: "5"
178 |       a_adapters: 
179 |         source: read
180 |         valueFrom: |
181 |           ${
182 |             return self.adapters;
183 |           }
184 |       read1: demultiplex/A_output_demuxed_read1
185 |       read_name: demultiplex/read_name
186 |       dataset_name: demultiplex/dataset_name
187 |     out: [
188 |       X_output_trim_first,
189 |       X_output_trim_first_metrics,
190 |       X_output_trim_first_fastqc_report,
191 |       X_output_trim_first_fastqc_stats,
192 |       X_output_trim_again,
193 |       X_output_trim_again_metrics,
194 |       X_output_trim_again_fastqc_report,
195 |       X_output_trim_again_fastqc_stats,
196 |       A_output_maprepeats_mapped_to_genome,
197 |       A_output_maprepeats_stats,
198 |       A_output_maprepeats_star_settings,
199 |       A_output_sort_repunmapped_fastq,
200 |       A_output_mapgenome_mapped_to_genome,
201 |       A_output_mapgenome_stats,
202 |       A_output_mapgenome_star_settings,
203 |       A_output_sorted_bam,
204 |       # A_output_sorted_bam_index,
205 |       X_output_barcodecollapsese_bam,
206 |       # X_output_barcodecollapsese_metrics,
207 |       X_output_sorted_bam
208 |     ]
209 | 
210 | 
211 | ###########################################################################
212 | # Downstream (candidate for merging with main pipeline)
213 | ###########################################################################
214 | 
215 |   make_bigwigs:
216 |     run: makebigwigfiles.cwl
217 |     in:
218 |       chromsizes: chrom_sizes
219 |       bam: b1_trim_and_map/X_output_sorted_bam
220 |     out:
221 |       [posbw, negbw]
222 | 


--------------------------------------------------------------------------------
/cwl/wf_clipseqcore_pe_1barcode.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | doc: |
  4 |   Workflow for handling reads containing one barcode.
  5 |   Returns the bam file containing read2 only.
  6 |   
  7 |   Notes:
  8 | 
  9 |     runs the following steps: 
 10 |     - demultiplex
 11 |     - trimfirst_file2string
 12 |     - trimagain_file2string
 13 |     - b1_trim_and_map
 14 |     - view_r2
 15 |     - index_r2_bam
 16 |     - make_bigwigs
 17 | 
 18 | cwlVersion: v1.0
 19 | class: Workflow
 20 | 
 21 | requirements:
 22 |   - class: StepInputExpressionRequirement
 23 |   - class: SubworkflowFeatureRequirement
 24 |   - class: MultipleInputFeatureRequirement
 25 |   - class: InlineJavascriptRequirement
 26 | 
 27 | inputs:
 28 |   dataset:
 29 |     type: string
 30 | 
 31 |   speciesGenomeDir:
 32 |     type: Directory
 33 | 
 34 |   repeatElementGenomeDir:
 35 |     type: Directory
 36 | 
 37 |   chrom_sizes:
 38 |     type: File
 39 | 
 40 |   barcodesfasta:
 41 |     type: File
 42 | 
 43 |   randomer_length:
 44 |     type: string
 45 | 
 46 |   read:
 47 |     type:
 48 |       type: record
 49 |       fields:
 50 |         read1:
 51 |           type: File
 52 |         read2:
 53 |           type: File
 54 |         barcodeids:
 55 |           type: string[]
 56 |         name:
 57 |           type: string
 58 | 
 59 | outputs:
 60 | 
 61 | 
 62 |   ### DEMULTIPLEXED OUTPUTS ###
 63 | 
 64 | 
 65 |   b1_demuxed_fastq_r1:
 66 |     label: "Barcode1 read1 demultiplexed fastq"
 67 |     type: File
 68 |     outputSource: demultiplex/A_output_demuxed_read1
 69 |   b1_demuxed_fastq_r2:
 70 |     type: File
 71 |     outputSource: demultiplex/A_output_demuxed_read2
 72 | 
 73 | 
 74 |   ### TRIMMED OUTPUTS (ROUND 1) ###
 75 | 
 76 | 
 77 |   b1_trimx1_fastq:
 78 |     type: File[]
 79 |     outputSource: b1_trim_and_map/X_output_trim_first
 80 |   b1_trimx1_metrics:
 81 |     type: File
 82 |     outputSource: b1_trim_and_map/X_output_trim_first_metrics
 83 |   b1_trimx1_fastqc_report_R1:
 84 |     type: File
 85 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report_R1
 86 |   b1_trimx1_fastqc_stats_R1: 
 87 |     type: File
 88 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats_R1
 89 |   b1_trimx1_fastqc_report_R2:
 90 |     type: File
 91 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report_R2
 92 |   b1_trimx1_fastqc_stats_R2: 
 93 |     type: File
 94 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats_R2
 95 | 
 96 | 
 97 |   ### TRIMMED OUTPUTS (ROUND 2) ###
 98 | 
 99 | 
100 |   b1_trimx2_fastq:
101 |     type: File[]
102 |     outputSource: b1_trim_and_map/X_output_trim_again
103 |   b1_trimx2_metrics:
104 |     type: File
105 |     outputSource: b1_trim_and_map/X_output_trim_again_metrics
106 |   b1_trimx2_fastqc_report_R1:
107 |     type: File
108 |     outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report_R1
109 |   b1_trimx2_fastqc_stats_R1: 
110 |     type: File
111 |     outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats_R1
112 |   b1_trimx2_fastqc_report_R2:
113 |     type: File
114 |     outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report_R2
115 |   b1_trimx2_fastqc_stats_R2: 
116 |     type: File
117 |     outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats_R2
118 | 
119 | 
120 |   ### REPEAT MAPPING OUTPUTS ###
121 | 
122 | 
123 |   b1_maprepeats_mapped_to_genome:
124 |     type: File
125 |     outputSource: b1_trim_and_map/A_output_maprepeats_mapped_to_genome
126 |   b1_maprepeats_stats:
127 |     type: File
128 |     outputSource: b1_trim_and_map/A_output_maprepeats_stats
129 |   b1_maprepeats_star_settings:
130 |     type: File
131 |     outputSource: b1_trim_and_map/A_output_maprepeats_star_settings
132 |   b1_sorted_unmapped_fastq:
133 |     type: File[]
134 |     outputSource: b1_trim_and_map/A_output_sort_repunmapped_fastq
135 | 
136 | 
137 |   ### GENOME MAPPING OUTPUTS ###
138 | 
139 | 
140 |   b1_mapgenome_mapped_to_genome:
141 |     type: File
142 |     outputSource: b1_trim_and_map/A_output_mapgenome_mapped_to_genome
143 |   b1_mapgenome_stats:
144 |     type: File
145 |     outputSource: b1_trim_and_map/A_output_mapgenome_stats
146 |   b1_mapgenome_star_settings:
147 |     type: File
148 |     outputSource: b1_trim_and_map/A_output_mapgenome_star_settings
149 | 
150 | 
151 |   ### RMDUP BAM OUTPUTS ###
152 | 
153 | 
154 |   b1_output_prermdup_sorted_bam:
155 |     type: File
156 |     outputSource: b1_trim_and_map/A_output_sorted_bam
157 |   b1_output_barcodecollapsepe_bam:
158 |     type: File
159 |     outputSource: b1_trim_and_map/X_output_barcodecollapsepe_bam
160 |   b1_output_barcodecollapsepe_metrics:
161 |     type: File
162 |     outputSource: b1_trim_and_map/X_output_barcodecollapsepe_metrics
163 | 
164 | 
165 |   ### SORTED RMDUP BAM OUTPUTS ###
166 | 
167 | 
168 |   b1_output_sorted_bam:
169 |     type: File
170 |     outputSource: b1_trim_and_map/X_output_sorted_bam
171 | 
172 | 
173 |   ### READ2 MERGED BAM OUTPUTS ###
174 | 
175 | 
176 |   output_r2_bam:
177 |     type: File
178 |     outputSource: view_r2/output
179 | 
180 | 
181 |   ### BIGWIG FILES ###
182 | 
183 | 
184 |   output_pos_bw:
185 |     type: File
186 |     outputSource: make_bigwigs/posbw
187 |   output_neg_bw:
188 |     type: File
189 |     outputSource: make_bigwigs/negbw
190 | 
191 | steps:
192 | 
193 | ###########################################################################
194 | # Upstream
195 | ###########################################################################
196 | 
197 |   demultiplex:
198 |     run: wf_demultiplex_pe.cwl
199 |     in:
200 |       dataset: dataset
201 |       randomer_length: randomer_length
202 |       barcodesfasta: barcodesfasta
203 |       read: read
204 |     out: [
205 |       A_output_demuxed_read1,
206 |       A_output_demuxed_read2,
207 |       B_output_demuxed_read1,
208 |       B_output_demuxed_read2,
209 |       AB_output_trimfirst_overlap_length,
210 |       AB_output_trimagain_overlap_length,
211 |       AB_g_adapters,
212 |       AB_g_adapters_default,
213 |       AB_a_adapters,
214 |       AB_a_adapters_default,
215 |       AB_A_adapters
216 |     ]
217 | 
218 | ###########################################################################
219 | # Main workflow
220 | ###########################################################################
221 | 
222 |   trimfirst_file2string:
223 |     run: file2string.cwl
224 |     in:
225 |       file: demultiplex/AB_output_trimfirst_overlap_length
226 |     out: [output]
227 | 
228 |   trimagain_file2string:
229 |     run: file2string.cwl
230 |     in:
231 |       file: demultiplex/AB_output_trimagain_overlap_length
232 |     out: [output]
233 | 
234 |   b1_trim_and_map:
235 |     run: wf_trim_and_map_pe.cwl
236 |     in:
237 |       speciesGenomeDir: speciesGenomeDir
238 |       repeatElementGenomeDir: repeatElementGenomeDir
239 |       trimfirst_overlap_length: trimfirst_file2string/output
240 |       trimagain_overlap_length: trimagain_file2string/output
241 |       g_adapters: demultiplex/AB_g_adapters
242 |       g_adapters_default: demultiplex/AB_g_adapters_default
243 |       a_adapters: demultiplex/AB_a_adapters
244 |       a_adapters_default: demultiplex/AB_a_adapters_default
245 |       A_adapters: demultiplex/AB_A_adapters
246 |       read1: demultiplex/A_output_demuxed_read1
247 |       read2: demultiplex/A_output_demuxed_read2
248 |     out: [
249 |       X_output_trim_first,
250 |       X_output_trim_first_metrics,
251 |       X_output_trim_first_fastqc_report_R1,
252 |       X_output_trim_first_fastqc_stats_R1,
253 |       X_output_trim_first_fastqc_report_R2,
254 |       X_output_trim_first_fastqc_stats_R2,
255 |       X_output_trim_again,
256 |       X_output_trim_again_metrics,
257 |       X_output_trim_again_fastqc_report_R1,
258 |       X_output_trim_again_fastqc_stats_R1,
259 |       X_output_trim_again_fastqc_report_R2,
260 |       X_output_trim_again_fastqc_stats_R2,
261 |       A_output_maprepeats_mapped_to_genome,
262 |       A_output_maprepeats_stats,
263 |       A_output_maprepeats_star_settings,
264 |       A_output_sort_repunmapped_fastq,
265 |       A_output_mapgenome_mapped_to_genome,
266 |       A_output_mapgenome_stats,
267 |       A_output_mapgenome_star_settings,
268 |       A_output_sorted_bam,
269 |       X_output_barcodecollapsepe_bam,
270 |       X_output_barcodecollapsepe_metrics,
271 |       X_output_sorted_bam
272 |     ]
273 | 
274 | ###########################################################################
275 | # Downstream (candidate for merging with main pipeline)
276 | ###########################################################################
277 | 
278 |   view_r2:
279 |     run: samtools-viewr2.cwl
280 |     in:
281 |       input: b1_trim_and_map/X_output_sorted_bam
282 |       readswithbits:
283 |         default: 128
284 |       isbam:
285 |         default: true
286 |     out: [output]
287 | 
288 |   index_r2_bam:
289 |     run: samtools-index.cwl
290 |     in:
291 |       alignments: view_r2/output
292 |     out: [alignments_with_index]
293 | 
294 |   make_bigwigs:
295 |     run: makebigwigfiles_PE.cwl
296 |     in:
297 |       chromsizes: chrom_sizes
298 |       bam: index_r2_bam/alignments_with_index
299 |     out:
300 |       [posbw, negbw]
301 | 


--------------------------------------------------------------------------------
/cwl/wf_clipseqcore_se_1barcode.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | ### Workflow for handling reads containing one barcode ###
  4 | ### Returns a bam file containing read2 only ###
  5 | 
  6 | cwlVersion: v1.0
  7 | class: Workflow
  8 | 
  9 | requirements:
 10 |   - class: StepInputExpressionRequirement
 11 |   - class: SubworkflowFeatureRequirement
 12 |   - class: ScatterFeatureRequirement      # TODO needed?
 13 |   - class: MultipleInputFeatureRequirement
 14 |   - class: InlineJavascriptRequirement
 15 | 
 16 | #hints:
 17 | #  - class: ex:ScriptRequirement
 18 | #    scriptlines:
 19 | #      - "#!/bin/bash"
 20 | 
 21 | 
 22 | inputs:
 23 |   dataset:
 24 |     type: string
 25 | 
 26 |   speciesGenomeDir:
 27 |     type: Directory
 28 | 
 29 |   repeatElementGenomeDir:
 30 |     type: Directory
 31 | 
 32 |   # TODO: remove, we don't use it here.
 33 |   species:
 34 |     type: string
 35 | 
 36 |   chrom_sizes:
 37 |     type: File
 38 | 
 39 |   # barcodesfasta:
 40 |   #   type: File
 41 | 
 42 |   # randomer_length:
 43 |   #   type: string
 44 | 
 45 |   read:
 46 |     type:
 47 |       type: record
 48 |       fields:
 49 |         read1:
 50 |           type: File
 51 |         # read2:
 52 |         #   type: File
 53 |         adapters:
 54 |           type: File
 55 |         name:
 56 |           type: string
 57 | 
 58 |   # r2_bam:
 59 |   #   type: string
 60 | 
 61 |   # output_bam:
 62 |   #   type: string
 63 |   
 64 |   # adapters:
 65 |   #   type: File
 66 | 
 67 |   ### Defaults ###
 68 |   
 69 |   # r2_bits:
 70 |   #   type: int
 71 |   #   default: 128
 72 |   # is_bam:
 73 |   #   type: boolean
 74 |   #   default: true
 75 |   
 76 | outputs:
 77 | 
 78 |   b1_demuxed_fastq_r1:
 79 |     type: File
 80 |     outputSource: demultiplex/A_output_demuxed_read1
 81 |   # b1_demuxed_fastq_r2:
 82 |   #   type: File
 83 |   #   outputSource: demultiplex/A_output_demuxed_read2
 84 | 
 85 |   b1_trimx1_fastq:
 86 |     type: File[]
 87 |     outputSource: b1_trim_and_map/X_output_trim_first
 88 |   b1_trimx1_metrics:
 89 |     type: File
 90 |     outputSource: b1_trim_and_map/X_output_trim_first_metrics
 91 |   b1_trimx1_fastqc_report:
 92 |     type: File
 93 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report
 94 |   b1_trimx1_fastqc_stats: 
 95 |     type: File
 96 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats
 97 |   b1_trimx2_fastq:
 98 |     type: File[]
 99 |     outputSource: b1_trim_and_map/X_output_trim_again
100 |   b1_trimx2_metrics:
101 |     type: File
102 |     outputSource: b1_trim_and_map/X_output_trim_again_metrics
103 |   b1_trimx2_fastqc_report:
104 |     type: File
105 |     outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report
106 |   b1_trimx2_fastqc_stats: 
107 |     type: File
108 |     outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats
109 |     
110 |   b1_maprepeats_mapped_to_genome:
111 |     type: File
112 |     outputSource: b1_trim_and_map/A_output_maprepeats_mapped_to_genome
113 |   b1_maprepeats_stats:
114 |     type: File
115 |     outputSource: b1_trim_and_map/A_output_maprepeats_stats
116 |   b1_maprepeats_star_settings:
117 |     type: File
118 |     outputSource: b1_trim_and_map/A_output_maprepeats_star_settings
119 |   b1_sorted_unmapped_fastq:
120 |     type: File
121 |     outputSource: b1_trim_and_map/A_output_sort_repunmapped_fastq
122 | 
123 |   b1_mapgenome_mapped_to_genome:
124 |     type: File
125 |     outputSource: b1_trim_and_map/A_output_mapgenome_mapped_to_genome
126 |   b1_mapgenome_stats:
127 |     type: File
128 |     outputSource: b1_trim_and_map/A_output_mapgenome_stats
129 |   b1_mapgenome_star_settings:
130 |     type: File
131 |     outputSource: b1_trim_and_map/A_output_mapgenome_star_settings
132 | 
133 |   b1_output_pre_rmdup_sorted_bam:
134 |     type: File
135 |     outputSource: b1_trim_and_map/A_output_sorted_bam
136 | 
137 |   b1_output_barcodecollapsese_metrics:
138 |     type: File
139 |     outputSource: b1_trim_and_map/X_output_barcodecollapsese_metrics
140 | 
141 |   b1_output_rmdup_sorted_bam:
142 |     type: File
143 |     outputSource: b1_trim_and_map/X_output_sorted_bam
144 | 
145 |   output_pos_bw:
146 |     type: File
147 |     outputSource: make_bigwigs/posbw
148 |   output_neg_bw:
149 |     type: File
150 |     outputSource: make_bigwigs/negbw
151 | 
152 | steps:
153 | 
154 | ###########################################################################
155 | # Upstream
156 | ###########################################################################
157 | 
158 |   demultiplex:
159 |     run: wf_demultiplex_se.cwl
160 |     in:
161 |       dataset: dataset
162 |       read: read
163 |     out: [
164 |       A_output_demuxed_read1,
165 |       read_name,
166 |       dataset_name
167 |     ]
168 | 
169 |   b1_trim_and_map:
170 |     run: wf_trim_and_map_se.cwl
171 |     in:
172 |       speciesGenomeDir: speciesGenomeDir
173 |       repeatElementGenomeDir: repeatElementGenomeDir
174 |       trimfirst_overlap_length:
175 |         default: "1"
176 |       trimagain_overlap_length:
177 |         default: "5"
178 |       a_adapters: 
179 |         source: read
180 |         valueFrom: |
181 |           ${
182 |             return self.adapters;
183 |           }
184 |       read1: demultiplex/A_output_demuxed_read1
185 |       read_name: demultiplex/read_name
186 |       dataset_name: demultiplex/dataset_name
187 |     out: [
188 |       X_output_trim_first,
189 |       X_output_trim_first_metrics,
190 |       X_output_trim_first_fastqc_report,
191 |       X_output_trim_first_fastqc_stats,
192 |       X_output_trim_again,
193 |       X_output_trim_again_metrics,
194 |       X_output_trim_again_fastqc_report,
195 |       X_output_trim_again_fastqc_stats,
196 |       A_output_maprepeats_mapped_to_genome,
197 |       A_output_maprepeats_stats,
198 |       A_output_maprepeats_star_settings,
199 |       A_output_sort_repunmapped_fastq,
200 |       A_output_mapgenome_mapped_to_genome,
201 |       A_output_mapgenome_stats,
202 |       A_output_mapgenome_star_settings,
203 |       A_output_sorted_bam,
204 |       # A_output_sorted_bam_index,
205 |       X_output_barcodecollapsese_bam,
206 |       X_output_barcodecollapsese_metrics,
207 |       X_output_sorted_bam
208 |     ]
209 | 
210 | 
211 | ###########################################################################
212 | # Downstream (candidate for merging with main pipeline)
213 | ###########################################################################
214 | 
215 |   make_bigwigs:
216 |     run: makebigwigfiles.cwl
217 |     in:
218 |       chromsizes: chrom_sizes
219 |       bam: b1_trim_and_map/X_output_sorted_bam
220 |     out:
221 |       [posbw, negbw]
222 | 


--------------------------------------------------------------------------------
/cwl/wf_clipseqcore_trim_partial_se_1barcode.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | ### Workflow for handling reads containing one barcode ###
  4 | ### Returns a bam file containing read2 only ###
  5 | 
  6 | cwlVersion: v1.0
  7 | class: Workflow
  8 | 
  9 | requirements:
 10 |   - class: StepInputExpressionRequirement
 11 |   - class: SubworkflowFeatureRequirement
 12 |   - class: ScatterFeatureRequirement      # TODO needed?
 13 |   - class: MultipleInputFeatureRequirement
 14 |   - class: InlineJavascriptRequirement
 15 | 
 16 | #hints:
 17 | #  - class: ex:ScriptRequirement
 18 | #    scriptlines:
 19 | #      - "#!/bin/bash"
 20 | 
 21 | 
 22 | inputs:
 23 |   dataset:
 24 |     type: string
 25 | 
 26 |   speciesGenomeDir:
 27 |     type: Directory
 28 | 
 29 |   repeatElementGenomeDir:
 30 |     type: Directory
 31 | 
 32 |   # TODO: remove, we don't use it here.
 33 |   species:
 34 |     type: string
 35 | 
 36 |   chrom_sizes:
 37 |     type: File
 38 | 
 39 |   # barcodesfasta:
 40 |   #   type: File
 41 | 
 42 |   # randomer_length:
 43 |   #   type: string
 44 | 
 45 |   read:
 46 |     type:
 47 |       type: record
 48 |       fields:
 49 |         read1:
 50 |           type: File
 51 |         # read2:
 52 |         #   type: File
 53 |         adapters:
 54 |           type: File
 55 |         name:
 56 |           type: string
 57 | 
 58 |   # r2_bam:
 59 |   #   type: string
 60 | 
 61 |   # output_bam:
 62 |   #   type: string
 63 |   
 64 |   # adapters:
 65 |   #   type: File
 66 | 
 67 |   ### Defaults ###
 68 |   
 69 |   # r2_bits:
 70 |   #   type: int
 71 |   #   default: 128
 72 |   # is_bam:
 73 |   #   type: boolean
 74 |   #   default: true
 75 |   
 76 | outputs:
 77 | 
 78 |   b1_demuxed_fastq_r1:
 79 |     type: File
 80 |     outputSource: demultiplex/A_output_demuxed_read1
 81 |   # b1_demuxed_fastq_r2:
 82 |   #   type: File
 83 |   #   outputSource: demultiplex/A_output_demuxed_read2
 84 | 
 85 |   b1_trimx1_fastq:
 86 |     type: File[]
 87 |     outputSource: b1_trim_and_map/X_output_trim_first
 88 |   b1_trimx1_metrics:
 89 |     type: File
 90 |     outputSource: b1_trim_and_map/X_output_trim_first_metrics
 91 |   b1_trimx1_fastqc_report:
 92 |     type: File
 93 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report
 94 |   b1_trimx1_fastqc_stats: 
 95 |     type: File
 96 |     outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats
 97 |   # b1_trimx2_fastq:
 98 |   #   type: File[]
 99 |   #   outputSource: b1_trim_and_map/X_output_trim_again
100 |   # b1_trimx2_metrics:
101 |   #   type: File
102 |   #   outputSource: b1_trim_and_map/X_output_trim_again_metrics
103 |   # b1_trimx2_fastqc_report:
104 |   #   type: File
105 |   #   outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report
106 |   # b1_trimx2_fastqc_stats: 
107 |   #   type: File
108 |   #   outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats
109 |     
110 |   b1_maprepeats_mapped_to_genome:
111 |     type: File
112 |     outputSource: b1_trim_and_map/A_output_maprepeats_mapped_to_genome
113 |   b1_maprepeats_stats:
114 |     type: File
115 |     outputSource: b1_trim_and_map/A_output_maprepeats_stats
116 |   b1_maprepeats_star_settings:
117 |     type: File
118 |     outputSource: b1_trim_and_map/A_output_maprepeats_star_settings
119 |   b1_sorted_unmapped_fastq:
120 |     type: File
121 |     outputSource: b1_trim_and_map/A_output_sort_repunmapped_fastq
122 | 
123 |   b1_mapgenome_mapped_to_genome:
124 |     type: File
125 |     outputSource: b1_trim_and_map/A_output_mapgenome_mapped_to_genome
126 |   b1_mapgenome_stats:
127 |     type: File
128 |     outputSource: b1_trim_and_map/A_output_mapgenome_stats
129 |   b1_mapgenome_star_settings:
130 |     type: File
131 |     outputSource: b1_trim_and_map/A_output_mapgenome_star_settings
132 | 
133 |   b1_output_pre_rmdup_sorted_bam:
134 |     type: File
135 |     outputSource: b1_trim_and_map/A_output_sorted_bam
136 | 
137 |   b1_output_barcodecollapsese_metrics:
138 |     type: File
139 |     outputSource: b1_trim_and_map/X_output_barcodecollapsese_metrics
140 | 
141 |   b1_output_rmdup_sorted_bam:
142 |     type: File
143 |     outputSource: b1_trim_and_map/X_output_sorted_bam
144 | 
145 |   output_pos_bw:
146 |     type: File
147 |     outputSource: make_bigwigs/posbw
148 |   output_neg_bw:
149 |     type: File
150 |     outputSource: make_bigwigs/negbw
151 | 
152 | steps:
153 | 
154 | ###########################################################################
155 | # Upstream
156 | ###########################################################################
157 | 
158 |   demultiplex:
159 |     run: wf_demultiplex_se.cwl
160 |     in:
161 |       dataset: dataset
162 |       read: read
163 |     out: [
164 |       A_output_demuxed_read1,
165 |       read_name,
166 |       dataset_name
167 |     ]
168 | 
169 |   b1_trim_and_map:
170 |     run: wf_trim_partial_and_map_se.cwl
171 |     in:
172 |       speciesGenomeDir: speciesGenomeDir
173 |       repeatElementGenomeDir: repeatElementGenomeDir
174 |       trimfirst_overlap_length:
175 |         default: "1"
176 |       trimagain_overlap_length:
177 |         default: "5"
178 |       a_adapters: 
179 |         source: read
180 |         valueFrom: |
181 |           ${
182 |             return self.adapters;
183 |           }
184 |       read1: demultiplex/A_output_demuxed_read1
185 |       read_name: demultiplex/read_name
186 |       dataset_name: demultiplex/dataset_name
187 |     out: [
188 |       X_output_trim_first,
189 |       X_output_trim_first_metrics,
190 |       X_output_trim_first_fastqc_report,
191 |       X_output_trim_first_fastqc_stats,
192 |       A_output_maprepeats_mapped_to_genome,
193 |       A_output_maprepeats_stats,
194 |       A_output_maprepeats_star_settings,
195 |       A_output_sort_repunmapped_fastq,
196 |       A_output_mapgenome_mapped_to_genome,
197 |       A_output_mapgenome_stats,
198 |       A_output_mapgenome_star_settings,
199 |       A_output_sorted_bam,
200 |       # A_output_sorted_bam_index,
201 |       X_output_barcodecollapsese_bam,
202 |       X_output_barcodecollapsese_metrics,
203 |       X_output_sorted_bam
204 |     ]
205 | 
206 | 
207 | ###########################################################################
208 | # Downstream (candidate for merging with main pipeline)
209 | ###########################################################################
210 | 
211 |   make_bigwigs:
212 |     run: makebigwigfiles.cwl
213 |     in:
214 |       chromsizes: chrom_sizes
215 |       bam: b1_trim_and_map/X_output_sorted_bam
216 |     out:
217 |       [posbw, negbw]
218 | 


--------------------------------------------------------------------------------
/cwl/wf_demultiplex_pe.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | 
  4 | cwlVersion: v1.0
  5 | class: Workflow
  6 | 
  7 | requirements:
  8 |   - class: StepInputExpressionRequirement
  9 |   - class: SubworkflowFeatureRequirement
 10 |   - class: ScatterFeatureRequirement      # TODO needed?
 11 |   - class: MultipleInputFeatureRequirement
 12 | 
 13 | 
 14 | #hints:
 15 | #  - class: ex:ScriptRequirement
 16 | #    scriptlines:
 17 | #      - "#!/bin/bash"
 18 | 
 19 | 
 20 | inputs:
 21 |   dataset:
 22 |     type: string
 23 |   randomer_length:
 24 |     type: string
 25 |   barcodesfasta:
 26 |     type: File
 27 | 
 28 |   read:
 29 |     type:
 30 |       type: record
 31 |       fields:
 32 |         read1:
 33 |           type: File
 34 |         read2:
 35 |           type: File
 36 |         barcodeids:
 37 |           type: string[]
 38 |         name:
 39 |           type: string
 40 | outputs:
 41 | 
 42 |   ### DEMUXED FILES ###
 43 |   A_output_demuxed_read1:
 44 |     type: File
 45 |     outputSource: AB_demux/demuxedAfwd
 46 |   A_output_demuxed_read2:
 47 |     type: File
 48 |     outputSource: AB_demux/demuxedArev
 49 |   B_output_demuxed_read1:
 50 |     type: File
 51 |     outputSource: AB_demux/demuxedBfwd
 52 |   B_output_demuxed_read2:
 53 |     type: File
 54 |     outputSource: AB_demux/demuxedBrev
 55 | 
 56 |   ### TRIM/CUTADAPT PARAMS ###
 57 |   AB_output_trimfirst_overlap_length:
 58 |     type: File
 59 |     outputSource: AB_parsebarcodes/trimfirst_overlap_length
 60 |   AB_output_trimagain_overlap_length:
 61 |     type: File
 62 |     outputSource: AB_parsebarcodes/trimagain_overlap_length
 63 |   AB_g_adapters_default:
 64 |     type: File
 65 |     outputSource: AB_parsebarcodes/g_adapters_default
 66 |   AB_a_adapters_default:
 67 |     type: File
 68 |     outputSource: AB_parsebarcodes/a_adapters_default
 69 |   AB_g_adapters:
 70 |     type: File
 71 |     outputSource: AB_parsebarcodes/g_adapters
 72 |   AB_a_adapters:
 73 |     type: File
 74 |     outputSource: AB_parsebarcodes/a_adapters
 75 |   AB_A_adapters:
 76 |     type: File
 77 |     outputSource: AB_parsebarcodes/A_adapters
 78 | 
 79 | 
 80 | steps:
 81 | 
 82 | ###########################################################################
 83 | # Upstream
 84 | ###########################################################################
 85 |   AB_demux:
 86 |     run: demux_pe.cwl
 87 |     in:
 88 |       barcodesfasta: barcodesfasta
 89 |       randomer_length: randomer_length
 90 |       dataset: dataset
 91 |       # seqdatapath: seqdatapath
 92 |       reads: read
 93 |     out: [demuxedAfwd, demuxedArev,
 94 |           demuxedBfwd, demuxedBrev,
 95 |           output_demuxedpairedend_metrics,
 96 |           output_dataset,
 97 |           name,
 98 |           barcodeidA,
 99 |           barcodeidB
100 |          ]
101 | 
102 |   AB_parsebarcodes:
103 |     run: parsebarcodes.cwl
104 |     in:
105 |       randomer_length: randomer_length
106 |       barcodeidA: AB_demux/barcodeidA
107 |       barcodeidB: AB_demux/barcodeidB
108 |       barcodesfasta: barcodesfasta
109 |     out: [
110 |       trimfirst_overlap_length, trimagain_overlap_length,
111 |       g_adapters_default, a_adapters_default,
112 |       g_adapters, a_adapters, A_adapters
113 |     ]
114 | 
115 | ###########################################################################
116 | # Downstream
117 | ###########################################################################
118 | 
119 | 


--------------------------------------------------------------------------------
/cwl/wf_demultiplex_se.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | ### This is kind of a worthless workflow, ###
 4 | ### but to keep consistent with the paired-end ###
 5 | ### pipeline, I'm keeping it here. ###
 6 | 
 7 | cwlVersion: v1.0
 8 | class: Workflow
 9 | 
10 | requirements:
11 |   - class: StepInputExpressionRequirement
12 |   - class: SubworkflowFeatureRequirement
13 |   - class: ScatterFeatureRequirement      # TODO needed?
14 |   - class: MultipleInputFeatureRequirement
15 | 
16 | 
17 | #hints:
18 | #  - class: ex:ScriptRequirement
19 | #    scriptlines:
20 | #      - "#!/bin/bash"
21 | 
22 | 
23 | inputs:
24 |   dataset:
25 |     type: string
26 |   # randomer_length:
27 |   #   type: string
28 |   # barcodesfasta:
29 |   #   type: File
30 | 
31 |   read:
32 |     type:
33 |       type: record
34 |       fields:
35 |         read1:
36 |           type: File
37 |         # barcodeids:
38 |         #   type: string[]
39 |         name:
40 |           type: string
41 | outputs:
42 | 
43 |   ### DEMUXED FILES ###
44 |   A_output_demuxed_read1:
45 |     type: File
46 |     outputSource: gzip_demux/gzipped
47 |   read_name:
48 |     type: string
49 |     outputSource: AB_demux/name
50 |   dataset_name:
51 |     type: string
52 |     outputSource: AB_demux/output_dataset
53 |   ### TRIM/CUTADAPT PARAMS ###
54 | 
55 | 
56 | steps:
57 | 
58 | ###########################################################################
59 | # Upstream
60 | ###########################################################################
61 |   AB_demux:
62 |     run: demux_se.cwl
63 |     in:
64 |       reads: read
65 |       dataset: dataset
66 |     out: [
67 |       demuxedAfwd,
68 |       output_demuxedsingleend_metrics,
69 |       output_dataset,
70 |       name
71 |     ]
72 | 
73 | ###########################################################################
74 | # Downstream
75 | ###########################################################################
76 |   gzip_demux:
77 |     run: gzip.cwl
78 |     in:
79 |       input: AB_demux/demuxedAfwd
80 |     out:
81 |       - gzipped
82 |         
83 | doc: |
84 |   This workflow takes in single-end reads, and performs the following steps in order:
85 |   demux_se.cwl (does not actually demux for single end, but mirrors the paired-end processing protocol)
86 | 


--------------------------------------------------------------------------------
/cwl/wf_fastqc.cwl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env cwltool
 2 | 
 3 | ### Fastqc annoyingly does not allow customized output filenames, so we need to re-name each so they dont overlap each other. 
 4 | 
 5 | cwlVersion: v1.0
 6 | class: Workflow
 7 | 
 8 | requirements:
 9 |   - class: InlineJavascriptRequirement
10 |   - class: StepInputExpressionRequirement
11 |   - class: SubworkflowFeatureRequirement
12 | 
13 | inputs:
14 |   reads:
15 |     type: File
16 | 
17 | outputs:
18 |   output_qc_report:
19 |     type: File
20 |     outputSource: step_rename_report/outfile
21 |   output_qc_stats:
22 |     type: File
23 |     outputSource: step_rename_stats/outfile
24 | 
25 | 
26 | steps:
27 | 
28 | ###########################################################################
29 | # Upstream
30 | ###########################################################################
31 |   step_fastqc:
32 |     run: fastqc.cwl
33 |     in:
34 |       reads: reads
35 |     out: [
36 |       output_qc_report,
37 |       output_qc_stats
38 |     ]
39 | 
40 | ###########################################################################
41 | # Downstream
42 | ###########################################################################
43 |   step_rename_report:
44 |     run: rename.cwl
45 |     in:
46 |       srcfile: step_fastqc/output_qc_report
47 |       suffix: 
48 |         default: ".html"
49 |       newname:
50 |         source: reads
51 |         valueFrom: ${ return self.nameroot + ".fastqc_report"; }
52 |     out: [
53 |       outfile
54 |     ]
55 |   step_rename_stats:
56 |     run: rename.cwl
57 |     in:
58 |       srcfile: step_fastqc/output_qc_stats
59 |       suffix: 
60 |         default: ".txt"
61 |       newname:
62 |         source: reads
63 |         valueFrom: ${ return self.nameroot + ".fastqc_data"; }
64 |     out: [
65 |       outfile
66 |     ]
67 |     
68 | doc: |
69 |   This workflow takes in single-end reads, and performs the following steps in order:
70 |   demux_se.cwl (does not actually demux for single end, but mirrors the paired-end processing protocol)
71 | 


--------------------------------------------------------------------------------
/cwl/wf_trim_partial_and_map_se_scatter.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwltool
  2 | 
  3 | ### This sub workflow should be identical to wf_trim_and_map_se.cwl except that it runs cutadapt only once. ###
  4 | 
  5 | cwlVersion: v1.0
  6 | class: Workflow
  7 | 
  8 | requirements:
  9 |   - class: InlineJavascriptRequirement
 10 |   - class: StepInputExpressionRequirement
 11 |   - class: SubworkflowFeatureRequirement
 12 |   - class: ScatterFeatureRequirement      # TODO needed?
 13 |   - class: MultipleInputFeatureRequirement
 14 | 
 15 | 
 16 | #hints:
 17 | #  - class: ex:ScriptRequirement
 18 | #    scriptlines:
 19 | #      - "#!/bin/bash"
 20 | 
 21 | 
 22 | inputs:
 23 |   speciesGenomeDir:
 24 |     type: Directory
 25 |   repeatElementGenomeDir:
 26 |     type: Directory
 27 |   trimfirst_overlap_length:
 28 |     type: string
 29 |   trimagain_overlap_length:
 30 |     type: string
 31 |   # g_adapters:
 32 |   #   type: File
 33 |   # g_adapters_default:
 34 |   #   type: File
 35 |   a_adapters:
 36 |     type: File
 37 |   # a_adapters_default:
 38 |   #   type: File
 39 |   # A_adapters:
 40 |   #   type: File
 41 |   read1s:
 42 |     type: File[]
 43 |   read_name:
 44 |     type: string
 45 |   dataset_name:
 46 |     type: string
 47 | 
 48 |   ## Defaults (don't change unless we have a very good reason) ##
 49 |   
 50 |   sort_names:
 51 |     type: boolean
 52 |     default: true
 53 |   trim_times:
 54 |     type: string
 55 |     default: "1"
 56 |   trim_error_rate:
 57 |     type: string
 58 |     default: "0.1"
 59 | 
 60 |   fastq_suffix:
 61 |     type: string
 62 |     default: ".fq"
 63 |   bam_suffix:
 64 |     type: string
 65 |     default: ".bam"
 66 |     
 67 |   hard_trim_length:
 68 |     type: int
 69 |     default: -9
 70 |     
 71 | outputs:
 72 | 
 73 |   X_output_trim_first:
 74 |     type: 
 75 |       type: array
 76 |       items:
 77 |         type: array
 78 |         items: File
 79 |     outputSource: step_wf_trim_partial_and_map/X_output_trim_first
 80 |   X_output_trim_first_metrics:
 81 |     type: File[]
 82 |     outputSource: step_wf_trim_partial_and_map/X_output_trim_first_metrics
 83 | 
 84 |   A_output_maprepeats_mapped_to_genome:
 85 |     type: File[]
 86 |     outputSource: step_wf_trim_partial_and_map/A_output_maprepeats_mapped_to_genome
 87 |   A_output_maprepeats_stats:
 88 |     type: File[]
 89 |     outputSource: step_wf_trim_partial_and_map/A_output_maprepeats_stats
 90 |   A_output_maprepeats_star_settings:
 91 |     type: File[]
 92 |     outputSource: step_wf_trim_partial_and_map/A_output_maprepeats_star_settings
 93 |   A_output_sort_repunmapped_fastq:
 94 |     type: File[]
 95 |     outputSource: step_wf_trim_partial_and_map/A_output_sort_repunmapped_fastq
 96 | 
 97 |   A_output_mapgenome_mapped_to_genome:
 98 |     type: File[]
 99 |     outputSource: step_wf_trim_partial_and_map/A_output_mapgenome_mapped_to_genome
100 |   A_output_mapgenome_stats:
101 |     type: File[]
102 |     outputSource: step_wf_trim_partial_and_map/A_output_mapgenome_stats
103 |   A_output_mapgenome_star_settings:
104 |     type: File[]
105 |     outputSource: step_wf_trim_partial_and_map/A_output_mapgenome_star_settings
106 |   A_output_sorted_bam:
107 |     type: File[]
108 |     outputSource: step_wf_trim_partial_and_map/A_output_sorted_bam
109 | 
110 |   X_output_barcodecollapsese_bam:
111 |     type: File[]
112 |     outputSource: step_wf_trim_partial_and_map/X_output_barcodecollapsese_bam
113 |   # X_output_barcodecollapsese_metrics:
114 |   #   type: File[]
115 |   #   outputSource: step_wf_trim_partial_and_map/X_output_barcodecollapsese_metrics
116 | 
117 |   X_output_sorted_bam:
118 |     type: File[]
119 |     outputSource: step_wf_trim_partial_and_map/X_output_sorted_bam
120 | 
121 | steps:
122 | 
123 |   step_wf_trim_partial_and_map:
124 |     run: wf_trim_partial_and_map_se.cwl
125 |     scatter: read1
126 |     in:
127 |       read1: read1s
128 |       read_name: read_name
129 |       dataset_name: dataset_name
130 |       speciesGenomeDir: speciesGenomeDir
131 |       repeatElementGenomeDir: repeatElementGenomeDir
132 |       trimfirst_overlap_length: trimfirst_overlap_length
133 |       trimagain_overlap_length: trimagain_overlap_length
134 |       a_adapters: a_adapters
135 |     out:
136 |       - X_output_trim_first
137 |       - X_output_trim_first_metrics
138 |       - A_output_maprepeats_mapped_to_genome
139 |       - A_output_maprepeats_stats
140 |       - A_output_maprepeats_star_settings
141 |       - A_output_sort_repunmapped_fastq
142 |       - A_output_mapgenome_mapped_to_genome
143 |       - A_output_mapgenome_stats
144 |       - A_output_mapgenome_star_settings
145 |       - A_output_sorted_bam
146 |       - X_output_barcodecollapsese_bam
147 |       - X_output_sorted_bam
148 |       # - X_output_barcodecollapsese_metrics
149 | 


--------------------------------------------------------------------------------
/documentation/Repeat_mapping.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/Repeat_mapping.pdf


--------------------------------------------------------------------------------
/documentation/Reproducible_peaks.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/Reproducible_peaks.pdf


--------------------------------------------------------------------------------
/documentation/Zero_to_peaks.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/Zero_to_peaks.pdf


--------------------------------------------------------------------------------
/documentation/eCLIP_analysisSOP_v2.0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/eCLIP_analysisSOP_v2.0.pdf


--------------------------------------------------------------------------------
/documentation/eCLIP_analysisSOP_v2.2.1.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/eCLIP_analysisSOP_v2.2.1.docx


--------------------------------------------------------------------------------
/documentation/eCLIP_analysisSOP_v2.2.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/eCLIP_analysisSOP_v2.2.docx


--------------------------------------------------------------------------------
/documentation/eCLIP_single_end_analysisSOP_v1.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/eCLIP_single_end_analysisSOP_v1.docx


--------------------------------------------------------------------------------
/eCLIP-flowchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/eCLIP-flowchart.png


--------------------------------------------------------------------------------
/example/inputs/ENCFF039QTN.bed:
--------------------------------------------------------------------------------
 1 | chr1	185958734	185958753	skyscraper	0	+
 2 | chr22	38051278	38051282	skyscraper	0	+
 3 | chr22	38049865	38049880	skyscraper	0	+
 4 | chr10	27220229	27220248	skyscraper	0	-
 5 | chr8	38586651	38586671	skyscraper	0	+
 6 | chr5	179602225	179602245	skyscraper	0	-
 7 | chr3	49940108	49940127	skyscraper	0	-
 8 | chr12	54378855	54378873	skyscraper	0	+
 9 | chr13	24308537	24308555	skyscraper	0	-
10 | chr6	163123394	163123414	skyscraper	0	-
11 | chr9	127533506	127533527	skyscraper	0	+
12 | chr13	74014620	74014645	skyscraper	0	-
13 | chr2	219824456	219824478	skyscraper	0	+
14 | chr11	62019670	62019688	skyscraper	0	-
15 | chr7	39663474	39663497	skyscraper	0	+
16 | chr1	75210277	75210295	skyscraper	0	+
17 | chr9	77113739	77113772	skyscraper	0	+
18 | chr17	8090536	8090553	skyscraper	0	+
19 | chr19	48112258	48112284	skyscraper	0	+
20 | chr1	156186229	156186266	skyscraper	0	+
21 | chr19	36631938	36631974	skyscraper	0	+
22 | chr9	45727242	45727291	skyscraper	0	+
23 | chr1	237766308	237766764	rRNA pseudogene	0	+
24 | chr1	91852785	91853147	rRNA pseudogene	0	-
25 | chr10	68805210	68805496	rRNA pseudogene	0	-
26 | chr11	77597473	77597831	rRNA pseudogene	0	+
27 | chr12	20704357	20704522	rRNA pseudogene	0	+
28 | chr16	47538629	47539297	rRNA pseudogene	0	+
29 | chr17	22023344	22023500	rRNA pseudogene	0	+
30 | chr19	22877614	22877696	rRNA pseudogene	0	-
31 | chr19	43911665	43912167	rRNA pseudogene	0	+
32 | chr2	133011919	133013768	rRNA pseudogene	0	-
33 | chr2	230045487	230045734	rRNA pseudogene	0	-
34 | chr22	22210544	22210651	rRNA pseudogene	0	+
35 | chr22	22210670	22210856	rRNA pseudogene	0	+
36 | chr4	7584186	7584364	rRNA pseudogene	0	+
37 | chr6	133593944	133594166	rRNA pseudogene	0	+
38 | chr8	70602248	70602620	rRNA pseudogene	0	-
39 | chr6	31958260	31958278	skyscraper	0	+
40 | chr15	96826149	96826167	skyscraper	0	-
41 | chr6	10887836	10887855	skyscraper	0	+
42 | chr14	102706662	102706680	skyscraper	0	-
43 | chr14	102700509	102700527	skyscraper	0	-
44 | chr3	160122388	160122416	skyscraper	0	+
45 | chr7	101460733	101460756	skyscraper	0	+
46 | chr10	72577390	72577408	skyscraper	0	+
47 | chrM	10007	10047	skyscraper	0	+
48 | chrM	1616	1656	skyscraper	0	+
49 | chrX	117415458	117415476	skyscraper	0	+
50 | chr11	32165332	32165361	skyscraper	0	-
51 | chr4	13544063	13544089	skyscraper	0	-
52 | chr20	25840928	25840996	unreliably mapped satellite repeat	0	-
53 | chr20	25844275	25844365	unreliably mapped satellite repeat	0	-
54 | chr20	25846823	25846961	unreliably mapped satellite repeat	0	-
55 | chr20	25848666	25848731	unreliably mapped satellite repeat	0	-
56 | chr20	25848505	25848565	unreliably mapped satellite repeat	0	-
57 | chr14	103988910	103988945	low complexity skyscraper	0	-
58 | 


--------------------------------------------------------------------------------
/example/inputs/ENCFF269URO.bed:
--------------------------------------------------------------------------------
 1 | chr1	185989602	185989621	-	-	+
 2 | chr22	37655271	37655275	-	-	+
 3 | chr22	37653858	37653873	-	-	+
 4 | chr10	26931300	26931319	-	-	-
 5 | chr8	38729133	38729153	-	-	+
 6 | chr5	180175225	180175245	-	-	-
 7 | chr3	49902675	49902694	-	-	-
 8 | chr12	53985071	53985089	-	-	+
 9 | chr13	23734398	23734416	-	-	-
10 | chr6	162702362	162702382	-	-	-
11 | chr9	124771227	124771248	-	-	+
12 | chr13	73440483	73440508	-	-	-
13 | chr2	218959734	218959756	-	-	+
14 | chr11	62252198	62252216	-	-	-
15 | chr7	39623875	39623898	-	-	+
16 | chr1	74744593	74744611	-	-	+
17 | chr9	74498823	74498856	-	-	+
18 | chr17	8187218	8187235	-	-	+
19 | chr19	47609001	47609027	-	-	+
20 | chr1	156216438	156216475	-	-	+
21 | chr19	36141036	36141072	-	-	+
22 | chr1	237603008	237603464	-	-	+
23 | chr1	91387228	91387590	-	-	-
24 | chr10	67045452	67045738	-	-	-
25 | chr11	77886427	77886785	-	-	+
26 | chr12	20551423	20551588	-	-	+
27 | chr16	47504718	47505386	-	-	+
28 | chr17	22524018	22524174	-	-	+
29 | chr19	22694812	22694894	-	-	-
30 | chr19	43407513	43408015	-	-	+
31 | chr2	132254346	132256195	-	-	-
32 | chr2	229180771	229181018	-	-	-
33 | chr22	21856255	21856362	-	-	+
34 | chr22	21856381	21856567	-	-	+
35 | chr4	7582459	7582637	-	-	+
36 | chr6	133272806	133273028	-	-	+
37 | chr8	69690013	69690385	-	-	-
38 | chr6	31990483	31990501	-	-	+
39 | chr15	96282920	96282938	-	-	-
40 | chr6	10887603	10887622	-	-	+
41 | chr14	102240325	102240343	-	-	-
42 | chr14	102234172	102234190	-	-	-
43 | chr3	160404600	160404628	-	-	+
44 | chr7	101817453	101817476	-	-	+
45 | chr10	70817634	70817652	-	-	+
46 | chrM	10006	10046	-	-	+
47 | chrM	1614	1654	-	-	+
48 | chrX	118281495	118281513	-	-	+
49 | chr11	32143786	32143815	-	-	-
50 | chr4	13542439	13542465	-	-	-
51 | chr20	25860292	25860360	-	-	-
52 | chr20	25863639	25863729	-	-	-
53 | chr20	25866187	25866325	-	-	-
54 | chr20	25868030	25868095	-	-	-
55 | chr20	25867869	25867929	-	-	-
56 | chr14	103522573	103522608	-	-	-
57 | 


--------------------------------------------------------------------------------
/example/inputs/InvRNA1_adapters.fasta:
--------------------------------------------------------------------------------
 1 | >InvRNA1_0
 2 | NNAGCGCTAGAGATC
 3 | >InvRNA1_1
 4 | NAGCGCTAGAGATCG
 5 | >InvRNA1_2
 6 | AGCGCTAGAGATCGG
 7 | >InvRNA1_3
 8 | GCGCTAGAGATCGGA
 9 | >InvRNA1_4
10 | CGCTAGAGATCGGAA
11 | >InvRNA1_5
12 | GCTAGAGATCGGAAG
13 | >InvRNA1_6
14 | CTAGAGATCGGAAGA
15 | >InvRNA1_7
16 | TAGAGATCGGAAGAG
17 | >InvRNA1_8
18 | AGAGATCGGAAGAGC
19 | >InvRNA1_9
20 | GAGATCGGAAGAGCA
21 | >InvRNA1_10
22 | AGATCGGAAGAGCAC
23 | >InvRNA1_11
24 | GATCGGAAGAGCACA
25 | >InvRNA1_12
26 | ATCGGAAGAGCACAC
27 | >InvRNA1_13
28 | TCGGAAGAGCACACG
29 | >InvRNA1_14
30 | CGGAAGAGCACACGT
31 | >InvRNA1_15
32 | GGAAGAGCACACGTC
33 | >InvRNA1_16
34 | GAAGAGCACACGTCT
35 | >InvRNA1_17
36 | AAGAGCACACGTCTG
37 | >InvRNA1_18
38 | AGAGCACACGTCTGA
39 | >InvRNA1_19
40 | GAGCACACGTCTGAA
41 | >InvRNA1_20
42 | AGCACACGTCTGAAC
43 | >InvRNA1_21
44 | GCACACGTCTGAACT
45 | >InvRNA1_22
46 | CACACGTCTGAACTC
47 | >InvRNA1_23
48 | ACACGTCTGAACTCC
49 | >InvRNA1_24
50 | CACGTCTGAACTCCA
51 | >InvRNA1_25
52 | ACGTCTGAACTCCAG
53 | >InvRNA1_26
54 | CGTCTGAACTCCAGT
55 | >InvRNA1_27
56 | GTCTGAACTCCAGTC
57 | >InvRNA1_28
58 | TCTGAACTCCAGTCA
59 | >InvRNA1_29
60 | CTGAACTCCAGTCAC
61 | 


--------------------------------------------------------------------------------
/example/inputs/InvRNA2_adapters.fasta:
--------------------------------------------------------------------------------
 1 | >InvRNA2_0
 2 | NNGATATCGAAGATC
 3 | >InvRNA2_1
 4 | NGATATCGAAGATCG
 5 | >InvRNA2_2
 6 | GATATCGAAGATCGG
 7 | >InvRNA2_3
 8 | ATATCGAAGATCGGA
 9 | >InvRNA2_4
10 | TATCGAAGATCGGAA
11 | >InvRNA2_5
12 | ATCGAAGATCGGAAG
13 | >InvRNA2_6
14 | TCGAAGATCGGAAGA
15 | >InvRNA2_7
16 | CGAAGATCGGAAGAG
17 | >InvRNA2_8
18 | GAAGATCGGAAGAGC
19 | >InvRNA2_9
20 | AAGATCGGAAGAGCA
21 | >InvRNA2_10
22 | AGATCGGAAGAGCAC
23 | >InvRNA2_11
24 | GATCGGAAGAGCACA
25 | >InvRNA2_12
26 | ATCGGAAGAGCACAC
27 | >InvRNA2_13
28 | TCGGAAGAGCACACG
29 | >InvRNA2_14
30 | CGGAAGAGCACACGT
31 | >InvRNA2_15
32 | GGAAGAGCACACGTC
33 | >InvRNA2_16
34 | GAAGAGCACACGTCT
35 | >InvRNA2_17
36 | AAGAGCACACGTCTG
37 | >InvRNA2_18
38 | AGAGCACACGTCTGA
39 | >InvRNA2_19
40 | GAGCACACGTCTGAA
41 | >InvRNA2_20
42 | AGCACACGTCTGAAC
43 | >InvRNA2_21
44 | GCACACGTCTGAACT
45 | >InvRNA2_22
46 | CACACGTCTGAACTC
47 | >InvRNA2_23
48 | ACACGTCTGAACTCC
49 | >InvRNA2_24
50 | CACGTCTGAACTCCA
51 | >InvRNA2_25
52 | ACGTCTGAACTCCAG
53 | >InvRNA2_26
54 | CGTCTGAACTCCAGT
55 | >InvRNA2_27
56 | GTCTGAACTCCAGTC
57 | >InvRNA2_28
58 | TCTGAACTCCAGTCA
59 | >InvRNA2_29
60 | CTGAACTCCAGTCAC
61 | 


--------------------------------------------------------------------------------
/example/inputs/InvRNA3_adapters.fasta:
--------------------------------------------------------------------------------
 1 | >InvRNA3_0
 2 | NNCGCAGACGAGATC
 3 | >InvRNA3_1
 4 | NCGCAGACGAGATCG
 5 | >InvRNA3_2
 6 | CGCAGACGAGATCGG
 7 | >InvRNA3_3
 8 | GCAGACGAGATCGGA
 9 | >InvRNA3_4
10 | CAGACGAGATCGGAA
11 | >InvRNA3_5
12 | AGACGAGATCGGAAG
13 | >InvRNA3_6
14 | GACGAGATCGGAAGA
15 | >InvRNA3_7
16 | ACGAGATCGGAAGAG
17 | >InvRNA3_8
18 | CGAGATCGGAAGAGC
19 | >InvRNA3_9
20 | GAGATCGGAAGAGCA
21 | >InvRNA3_10
22 | AGATCGGAAGAGCAC
23 | >InvRNA3_11
24 | GATCGGAAGAGCACA
25 | >InvRNA3_12
26 | ATCGGAAGAGCACAC
27 | >InvRNA3_13
28 | TCGGAAGAGCACACG
29 | >InvRNA3_14
30 | CGGAAGAGCACACGT
31 | >InvRNA3_15
32 | GGAAGAGCACACGTC
33 | >InvRNA3_16
34 | GAAGAGCACACGTCT
35 | >InvRNA3_17
36 | AAGAGCACACGTCTG
37 | >InvRNA3_18
38 | AGAGCACACGTCTGA
39 | >InvRNA3_19
40 | GAGCACACGTCTGAA
41 | >InvRNA3_20
42 | AGCACACGTCTGAAC
43 | >InvRNA3_21
44 | GCACACGTCTGAACT
45 | >InvRNA3_22
46 | CACACGTCTGAACTC
47 | >InvRNA3_23
48 | ACACGTCTGAACTCC
49 | >InvRNA3_24
50 | CACGTCTGAACTCCA
51 | >InvRNA3_25
52 | ACGTCTGAACTCCAG
53 | >InvRNA3_26
54 | CGTCTGAACTCCAGT
55 | >InvRNA3_27
56 | GTCTGAACTCCAGTC
57 | >InvRNA3_28
58 | TCTGAACTCCAGTCA
59 | >InvRNA3_29
60 | CTGAACTCCAGTCAC
61 | 


--------------------------------------------------------------------------------
/example/inputs/InvRNA4_adapters.fasta:
--------------------------------------------------------------------------------
 1 | >InvRNA4_0
 2 | NNTATGAGTAAGATC
 3 | >InvRNA4_1
 4 | NTATGAGTAAGATCG
 5 | >InvRNA4_2
 6 | TATGAGTAAGATCGG
 7 | >InvRNA4_3
 8 | ATGAGTAAGATCGGA
 9 | >InvRNA4_4
10 | TGAGTAAGATCGGAA
11 | >InvRNA4_5
12 | GAGTAAGATCGGAAG
13 | >InvRNA4_6
14 | AGTAAGATCGGAAGA
15 | >InvRNA4_7
16 | GTAAGATCGGAAGAG
17 | >InvRNA4_8
18 | TAAGATCGGAAGAGC
19 | >InvRNA4_9
20 | AAGATCGGAAGAGCA
21 | >InvRNA4_10
22 | AGATCGGAAGAGCAC
23 | >InvRNA4_11
24 | GATCGGAAGAGCACA
25 | >InvRNA4_12
26 | ATCGGAAGAGCACAC
27 | >InvRNA4_13
28 | TCGGAAGAGCACACG
29 | >InvRNA4_14
30 | CGGAAGAGCACACGT
31 | >InvRNA4_15
32 | GGAAGAGCACACGTC
33 | >InvRNA4_16
34 | GAAGAGCACACGTCT
35 | >InvRNA4_17
36 | AAGAGCACACGTCTG
37 | >InvRNA4_18
38 | AGAGCACACGTCTGA
39 | >InvRNA4_19
40 | GAGCACACGTCTGAA
41 | >InvRNA4_20
42 | AGCACACGTCTGAAC
43 | >InvRNA4_21
44 | GCACACGTCTGAACT
45 | >InvRNA4_22
46 | CACACGTCTGAACTC
47 | >InvRNA4_23
48 | ACACGTCTGAACTCC
49 | >InvRNA4_24
50 | CACGTCTGAACTCCA
51 | >InvRNA4_25
52 | ACGTCTGAACTCCAG
53 | >InvRNA4_26
54 | CGTCTGAACTCCAGT
55 | >InvRNA4_27
56 | GTCTGAACTCCAGTC
57 | >InvRNA4_28
58 | TCTGAACTCCAGTCA
59 | >InvRNA4_29
60 | CTGAACTCCAGTCAC
61 | 


--------------------------------------------------------------------------------
/example/inputs/InvRNA5_adapters.fasta:
--------------------------------------------------------------------------------
 1 | >InvRNA5_0
 2 | NNAGGTGCGTAGATC
 3 | >InvRNA5_1
 4 | NAGGTGCGTAGATCG
 5 | >InvRNA5_2
 6 | AGGTGCGTAGATCGG
 7 | >InvRNA5_3
 8 | GGTGCGTAGATCGGA
 9 | >InvRNA5_4
10 | GTGCGTAGATCGGAA
11 | >InvRNA5_5
12 | TGCGTAGATCGGAAG
13 | >InvRNA5_6
14 | GCGTAGATCGGAAGA
15 | >InvRNA5_7
16 | CGTAGATCGGAAGAG
17 | >InvRNA5_8
18 | GTAGATCGGAAGAGC
19 | >InvRNA5_9
20 | TAGATCGGAAGAGCA
21 | >InvRNA5_10
22 | AGATCGGAAGAGCAC
23 | >InvRNA5_11
24 | GATCGGAAGAGCACA
25 | >InvRNA5_12
26 | ATCGGAAGAGCACAC
27 | >InvRNA5_13
28 | TCGGAAGAGCACACG
29 | >InvRNA5_14
30 | CGGAAGAGCACACGT
31 | >InvRNA5_15
32 | GGAAGAGCACACGTC
33 | >InvRNA5_16
34 | GAAGAGCACACGTCT
35 | >InvRNA5_17
36 | AAGAGCACACGTCTG
37 | >InvRNA5_18
38 | AGAGCACACGTCTGA
39 | >InvRNA5_19
40 | GAGCACACGTCTGAA
41 | >InvRNA5_20
42 | AGCACACGTCTGAAC
43 | >InvRNA5_21
44 | GCACACGTCTGAACT
45 | >InvRNA5_22
46 | CACACGTCTGAACTC
47 | >InvRNA5_23
48 | ACACGTCTGAACTCC
49 | >InvRNA5_24
50 | CACGTCTGAACTCCA
51 | >InvRNA5_25
52 | ACGTCTGAACTCCAG
53 | >InvRNA5_26
54 | CGTCTGAACTCCAGT
55 | >InvRNA5_27
56 | GTCTGAACTCCAGTC
57 | >InvRNA5_28
58 | TCTGAACTCCAGTCA
59 | >InvRNA5_29
60 | CTGAACTCCAGTCAC
61 | 


--------------------------------------------------------------------------------
/example/inputs/InvRNA6_adapters.fasta:
--------------------------------------------------------------------------------
 1 | >InvRNA6_0
 2 | NNGAACATACAGATC
 3 | >InvRNA6_1
 4 | NGAACATACAGATCG
 5 | >InvRNA6_2
 6 | GAACATACAGATCGG
 7 | >InvRNA6_3
 8 | AACATACAGATCGGA
 9 | >InvRNA6_4
10 | ACATACAGATCGGAA
11 | >InvRNA6_5
12 | CATACAGATCGGAAG
13 | >InvRNA6_6
14 | ATACAGATCGGAAGA
15 | >InvRNA6_7
16 | TACAGATCGGAAGAG
17 | >InvRNA6_8
18 | ACAGATCGGAAGAGC
19 | >InvRNA6_9
20 | CAGATCGGAAGAGCA
21 | >InvRNA6_10
22 | AGATCGGAAGAGCAC
23 | >InvRNA6_11
24 | GATCGGAAGAGCACA
25 | >InvRNA6_12
26 | ATCGGAAGAGCACAC
27 | >InvRNA6_13
28 | TCGGAAGAGCACACG
29 | >InvRNA6_14
30 | CGGAAGAGCACACGT
31 | >InvRNA6_15
32 | GGAAGAGCACACGTC
33 | >InvRNA6_16
34 | GAAGAGCACACGTCT
35 | >InvRNA6_17
36 | AAGAGCACACGTCTG
37 | >InvRNA6_18
38 | AGAGCACACGTCTGA
39 | >InvRNA6_19
40 | GAGCACACGTCTGAA
41 | >InvRNA6_20
42 | AGCACACGTCTGAAC
43 | >InvRNA6_21
44 | GCACACGTCTGAACT
45 | >InvRNA6_22
46 | CACACGTCTGAACTC
47 | >InvRNA6_23
48 | ACACGTCTGAACTCC
49 | >InvRNA6_24
50 | CACGTCTGAACTCCA
51 | >InvRNA6_25
52 | ACGTCTGAACTCCAG
53 | >InvRNA6_26
54 | CGTCTGAACTCCAGT
55 | >InvRNA6_27
56 | GTCTGAACTCCAGTC
57 | >InvRNA6_28
58 | TCTGAACTCCAGTCA
59 | >InvRNA6_29
60 | CTGAACTCCAGTCAC
61 | 


--------------------------------------------------------------------------------
/example/inputs/InvRNA7_adapters.fasta:
--------------------------------------------------------------------------------
 1 | >InvRNA7_0
 2 | NNACATAGCGAGATC
 3 | >InvRNA7_1
 4 | NACATAGCGAGATCG
 5 | >InvRNA7_2
 6 | ACATAGCGAGATCGG
 7 | >InvRNA7_3
 8 | CATAGCGAGATCGGA
 9 | >InvRNA7_4
10 | ATAGCGAGATCGGAA
11 | >InvRNA7_5
12 | TAGCGAGATCGGAAG
13 | >InvRNA7_6
14 | AGCGAGATCGGAAGA
15 | >InvRNA7_7
16 | GCGAGATCGGAAGAG
17 | >InvRNA7_8
18 | CGAGATCGGAAGAGC
19 | >InvRNA7_9
20 | GAGATCGGAAGAGCA
21 | >InvRNA7_10
22 | AGATCGGAAGAGCAC
23 | >InvRNA7_11
24 | GATCGGAAGAGCACA
25 | >InvRNA7_12
26 | ATCGGAAGAGCACAC
27 | >InvRNA7_13
28 | TCGGAAGAGCACACG
29 | >InvRNA7_14
30 | CGGAAGAGCACACGT
31 | >InvRNA7_15
32 | GGAAGAGCACACGTC
33 | >InvRNA7_16
34 | GAAGAGCACACGTCT
35 | >InvRNA7_17
36 | AAGAGCACACGTCTG
37 | >InvRNA7_18
38 | AGAGCACACGTCTGA
39 | >InvRNA7_19
40 | GAGCACACGTCTGAA
41 | >InvRNA7_20
42 | AGCACACGTCTGAAC
43 | >InvRNA7_21
44 | GCACACGTCTGAACT
45 | >InvRNA7_22
46 | CACACGTCTGAACTC
47 | >InvRNA7_23
48 | ACACGTCTGAACTCC
49 | >InvRNA7_24
50 | CACGTCTGAACTCCA
51 | >InvRNA7_25
52 | ACGTCTGAACTCCAG
53 | >InvRNA7_26
54 | CGTCTGAACTCCAGT
55 | >InvRNA7_27
56 | GTCTGAACTCCAGTC
57 | >InvRNA7_28
58 | TCTGAACTCCAGTCA
59 | >InvRNA7_29
60 | CTGAACTCCAGTCAC
61 | 


--------------------------------------------------------------------------------
/example/inputs/InvRNA8_adapters.fasta:
--------------------------------------------------------------------------------
 1 | >InvRNA8_0
 2 | NNGTGCGATAAGATC
 3 | >InvRNA8_1
 4 | NGTGCGATAAGATCG
 5 | >InvRNA8_2
 6 | GTGCGATAAGATCGG
 7 | >InvRNA8_3
 8 | TGCGATAAGATCGGA
 9 | >InvRNA8_4
10 | GCGATAAGATCGGAA
11 | >InvRNA8_5
12 | CGATAAGATCGGAAG
13 | >InvRNA8_6
14 | GATAAGATCGGAAGA
15 | >InvRNA8_7
16 | ATAAGATCGGAAGAG
17 | >InvRNA8_8
18 | TAAGATCGGAAGAGC
19 | >InvRNA8_9
20 | AAGATCGGAAGAGCA
21 | >InvRNA8_10
22 | AGATCGGAAGAGCAC
23 | >InvRNA8_11
24 | GATCGGAAGAGCACA
25 | >InvRNA8_12
26 | ATCGGAAGAGCACAC
27 | >InvRNA8_13
28 | TCGGAAGAGCACACG
29 | >InvRNA8_14
30 | CGGAAGAGCACACGT
31 | >InvRNA8_15
32 | GGAAGAGCACACGTC
33 | >InvRNA8_16
34 | GAAGAGCACACGTCT
35 | >InvRNA8_17
36 | AAGAGCACACGTCTG
37 | >InvRNA8_18
38 | AGAGCACACGTCTGA
39 | >InvRNA8_19
40 | GAGCACACGTCTGAA
41 | >InvRNA8_20
42 | AGCACACGTCTGAAC
43 | >InvRNA8_21
44 | GCACACGTCTGAACT
45 | >InvRNA8_22
46 | CACACGTCTGAACTC
47 | >InvRNA8_23
48 | ACACGTCTGAACTCC
49 | >InvRNA8_24
50 | CACGTCTGAACTCCA
51 | >InvRNA8_25
52 | ACGTCTGAACTCCAG
53 | >InvRNA8_26
54 | CGTCTGAACTCCAGT
55 | >InvRNA8_27
56 | GTCTGAACTCCAGTC
57 | >InvRNA8_28
58 | TCTGAACTCCAGTCA
59 | >InvRNA8_29
60 | CTGAACTCCAGTCAC
61 | 


--------------------------------------------------------------------------------
/example/inputs/InvRil19_adapters.yaml:
--------------------------------------------------------------------------------
 1 | >ril19_1
 2 | AGATCGGAAGAGCAC
 3 | >ril19_2
 4 | GATCGGAAGAGCACA
 5 | >ril19_3
 6 | ATCGGAAGAGCACAC
 7 | >ril19_4
 8 | TCGGAAGAGCACACG
 9 | >ril19_5
10 | CGGAAGAGCACACGT
11 | >ril19_6
12 | GGAAGAGCACACGTC
13 | >ril19_7
14 | GAAGAGCACACGTCT
15 | >ril19_8
16 | AAGAGCACACGTCTG
17 | >ril19_9
18 | AGAGCACACGTCTGA
19 | >ril19_10
20 | GAGCACACGTCTGAA
21 | >ril19_11
22 | AGCACACGTCTGAAC
23 | >ril19_12
24 | GCACACGTCTGAACT
25 | >ril19_13
26 | CACACGTCTGAACTC
27 | >ril19_14
28 | ACACGTCTGAACTCC
29 | >ril19_15
30 | CACGTCTGAACTCCA
31 | >ril19_16
32 | ACGTCTGAACTCCAG
33 | >ril19_17
34 | CGTCTGAACTCCAGT
35 | >ril19_18
36 | GTCTGAACTCCAGTC
37 | >ril19_19
38 | TCTGAACTCCAGTCA
39 | >ril19_20
40 | CTGAACTCCAGTCAC
41 | 
42 | 


--------------------------------------------------------------------------------
/example/inputs/example_fastqs/chrom19kbp550_clip1_r1.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_clip1_r1.fastq.gz


--------------------------------------------------------------------------------
/example/inputs/example_fastqs/chrom19kbp550_clip1_r2.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_clip1_r2.fastq.gz


--------------------------------------------------------------------------------
/example/inputs/example_fastqs/chrom19kbp550_clip2_r1.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_clip2_r1.fastq.gz


--------------------------------------------------------------------------------
/example/inputs/example_fastqs/chrom19kbp550_clip2_r2.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_clip2_r2.fastq.gz


--------------------------------------------------------------------------------
/example/inputs/example_fastqs/chrom19kbp550_input_r1.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_input_r1.fastq.gz


--------------------------------------------------------------------------------
/example/inputs/example_fastqs/chrom19kbp550_input_r2.fastq.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_input_r2.fastq.gz


--------------------------------------------------------------------------------
/example/inputs/hg113seqs_repbase_starindex/SA:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/hg113seqs_repbase_starindex/SA


--------------------------------------------------------------------------------
/example/inputs/hg113seqs_repbase_starindex/SAindex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/hg113seqs_repbase_starindex/SAindex


--------------------------------------------------------------------------------
/example/inputs/hg113seqs_repbase_starindex/chrLength.txt:
--------------------------------------------------------------------------------
  1 | 80
  2 | 87
  3 | 81
  4 | 84
  5 | 3036
  6 | 129
  7 | 378
  8 | 968
  9 | 586
 10 | 5035
 11 | 273
 12 | 254
 13 | 507
 14 | 3565
 15 | 387
 16 | 287
 17 | 798
 18 | 519
 19 | 930
 20 | 1084
 21 | 353
 22 | 611
 23 | 335
 24 | 586
 25 | 357
 26 | 510
 27 | 83
 28 | 76
 29 | 77
 30 | 75
 31 | 74
 32 | 76
 33 | 75
 34 | 81
 35 | 78
 36 | 1869
 37 | 85
 38 | 85
 39 | 1464
 40 | 1871
 41 | 1902
 42 | 1995
 43 | 2090
 44 | 92
 45 | 1803
 46 | 2137
 47 | 86
 48 | 791
 49 | 75
 50 | 3788
 51 | 75
 52 | 77
 53 | 76
 54 | 76
 55 | 1804
 56 | 1798
 57 | 1647
 58 | 3798
 59 | 990
 60 | 75
 61 | 86
 62 | 86
 63 | 3375
 64 | 3509
 65 | 77
 66 | 1834
 67 | 85
 68 | 2690
 69 | 73
 70 | 85
 71 | 85
 72 | 89
 73 | 85
 74 | 3241
 75 | 3655
 76 | 3900
 77 | 85
 78 | 75
 79 | 92
 80 | 75
 81 | 85
 82 | 75
 83 | 94
 84 | 586
 85 | 81
 86 | 75
 87 | 77
 88 | 75
 89 | 76
 90 | 77
 91 | 76
 92 | 75
 93 | 76
 94 | 77
 95 | 77
 96 | 75
 97 | 76
 98 | 76
 99 | 76
100 | 1332
101 | 1332
102 | 83
103 | 471
104 | 254
105 | 259
106 | 896
107 | 198
108 | 198
109 | 712
110 | 198
111 | 595
112 | 712
113 | 712
114 | 


--------------------------------------------------------------------------------
/example/inputs/hg113seqs_repbase_starindex/chrName.txt:
--------------------------------------------------------------------------------
  1 | tRNAGlyGGC_CB
  2 | tRNAAlaAGC_CB
  3 | tRNAIleATT_CB
  4 | tRNAThr_CB
  5 | 5SrRNA-1_BG
  6 | 5SrRNA_AN
  7 | tRNASAT-1_ZM
  8 | BAGY2_HV_MRNA
  9 | MARNA
 10 | LSU-rRNA_Hsa
 11 | Talud
 12 | Talua
 13 | Dorna1cons
 14 | LSU-rRNA_Mfr
 15 | Taluc
 16 | Talub
 17 | MARINERNA10_MD
 18 | MARINERNA3_MD
 19 | MARINERNA7_MD
 20 | MARINERNA8_MD
 21 | MARINERNA4_MD
 22 | MARINERNA9_MD
 23 | MARINERNA1_ME
 24 | MARNA
 25 | MARINERNA1_MD
 26 | MARINERNA12_MD
 27 | RRNA45
 28 | TRNA_ALA
 29 | TRNA_ASN
 30 | TRNA_GLU
 31 | TRNA_GLY
 32 | TRNA_VAL
 33 | tRNA-His-CAY_
 34 | tRNAGlnTTG_CB
 35 | tRNA-Leu-TTA(m)
 36 | SSU-rRNA_Hsa
 37 | tRNA-Ser-TCG
 38 | tRNA-Ser-TCA_
 39 | SSU-rRNA_Giardia
 40 | SSU-rRNA_Ddi
 41 | SSU-rRNA_Ath
 42 | SSU-rRNA_Dme
 43 | SSU-rRNA_Pfa
 44 | tRNALeuCTT_CB
 45 | SSU-rRNA_Lvi
 46 | SSU-rRNA_Lma
 47 | tRNA-Leu-TTG
 48 | LSU-rRNA_Tps
 49 | tRNA-Gln-CAG
 50 | LSU-rRNA_Pfa
 51 | tRNA-Gln-CAA
 52 | tRNA-Arg-CGY
 53 | tRNA-Tyr-TAC
 54 | tRNA-Tyr-TAT
 55 | SSU-rRNA_Tps
 56 | SSU-rRNA_Sce
 57 | SSU-rRNA_Cel
 58 | LSU-rRNA_Sce
 59 | LSU-rRNA_Ldo
 60 | tRNA-Pro-CCA
 61 | tRNA-Leu-TTA
 62 | tRNA-Leu-CTG
 63 | LSU-rRNA_Ath
 64 | LSU-rRNA_Cel
 65 | tRNA-Ile-ATA
 66 | SSU-rRNA_Sme
 67 | tRNA-Leu-CTA_
 68 | LSU-rRNA_Giardia
 69 | tRNA-Ser-TCA(m)
 70 | tRNA-Leu-CTY
 71 | tRNA-Leu-CTA
 72 | tRNA-SeC(e)-TGA
 73 | tRNA-Ser-TCY
 74 | LSU-rRNA_Ddi
 75 | LSU-rRNA_Hca
 76 | LSU-rRNA_Dme
 77 | tRNA-Ser-AGY
 78 | tRNA-His-CAY
 79 | tRNASerTCT_CB
 80 | tRNA-Gln-CAA_
 81 | tRNA-Ser-TCA
 82 | tRNA-Pro-CCG
 83 | 4.5SRNA
 84 | MARNA
 85 | RNALUIII
 86 | tRNA-Ala-GCA
 87 | tRNA-Thr-ACG_
 88 | tRNA-Asp-GAY
 89 | tRNA-Lys-AAG
 90 | tRNA-Ile-ATC
 91 | tRNA-Val-GTA
 92 | tRNA-Ala-GCY_
 93 | tRNA-Met
 94 | tRNA-Ile-ATT
 95 | tRNA-Thr-ACY_
 96 | tRNA-Met-i
 97 | tRNA-Arg-CGA
 98 | tRNA-Met_
 99 | tRNA-Arg-CGG
100 | TN10MRNA_NA
101 | TN10MRNA_NA
102 | RRNA45
103 | MARNA
104 | Talua
105 | Talua
106 | MARINERNA11_MD
107 | MARINERNA6_MD
108 | MARINERNA6_MD
109 | MARINERNA6A_MD
110 | MARINERNA6_MD
111 | MARINERNA5_MD
112 | MARINERNA6A_MD
113 | MARINERNA6A_MD
114 | 


--------------------------------------------------------------------------------
/example/inputs/hg113seqs_repbase_starindex/chrNameLength.txt:
--------------------------------------------------------------------------------
  1 | tRNAGlyGGC_CB	80
  2 | tRNAAlaAGC_CB	87
  3 | tRNAIleATT_CB	81
  4 | tRNAThr_CB	84
  5 | 5SrRNA-1_BG	3036
  6 | 5SrRNA_AN	129
  7 | tRNASAT-1_ZM	378
  8 | BAGY2_HV_MRNA	968
  9 | MARNA	586
 10 | LSU-rRNA_Hsa	5035
 11 | Talud	273
 12 | Talua	254
 13 | Dorna1cons	507
 14 | LSU-rRNA_Mfr	3565
 15 | Taluc	387
 16 | Talub	287
 17 | MARINERNA10_MD	798
 18 | MARINERNA3_MD	519
 19 | MARINERNA7_MD	930
 20 | MARINERNA8_MD	1084
 21 | MARINERNA4_MD	353
 22 | MARINERNA9_MD	611
 23 | MARINERNA1_ME	335
 24 | MARNA	586
 25 | MARINERNA1_MD	357
 26 | MARINERNA12_MD	510
 27 | RRNA45	83
 28 | TRNA_ALA	76
 29 | TRNA_ASN	77
 30 | TRNA_GLU	75
 31 | TRNA_GLY	74
 32 | TRNA_VAL	76
 33 | tRNA-His-CAY_	75
 34 | tRNAGlnTTG_CB	81
 35 | tRNA-Leu-TTA(m)	78
 36 | SSU-rRNA_Hsa	1869
 37 | tRNA-Ser-TCG	85
 38 | tRNA-Ser-TCA_	85
 39 | SSU-rRNA_Giardia	1464
 40 | SSU-rRNA_Ddi	1871
 41 | SSU-rRNA_Ath	1902
 42 | SSU-rRNA_Dme	1995
 43 | SSU-rRNA_Pfa	2090
 44 | tRNALeuCTT_CB	92
 45 | SSU-rRNA_Lvi	1803
 46 | SSU-rRNA_Lma	2137
 47 | tRNA-Leu-TTG	86
 48 | LSU-rRNA_Tps	791
 49 | tRNA-Gln-CAG	75
 50 | LSU-rRNA_Pfa	3788
 51 | tRNA-Gln-CAA	75
 52 | tRNA-Arg-CGY	77
 53 | tRNA-Tyr-TAC	76
 54 | tRNA-Tyr-TAT	76
 55 | SSU-rRNA_Tps	1804
 56 | SSU-rRNA_Sce	1798
 57 | SSU-rRNA_Cel	1647
 58 | LSU-rRNA_Sce	3798
 59 | LSU-rRNA_Ldo	990
 60 | tRNA-Pro-CCA	75
 61 | tRNA-Leu-TTA	86
 62 | tRNA-Leu-CTG	86
 63 | LSU-rRNA_Ath	3375
 64 | LSU-rRNA_Cel	3509
 65 | tRNA-Ile-ATA	77
 66 | SSU-rRNA_Sme	1834
 67 | tRNA-Leu-CTA_	85
 68 | LSU-rRNA_Giardia	2690
 69 | tRNA-Ser-TCA(m)	73
 70 | tRNA-Leu-CTY	85
 71 | tRNA-Leu-CTA	85
 72 | tRNA-SeC(e)-TGA	89
 73 | tRNA-Ser-TCY	85
 74 | LSU-rRNA_Ddi	3241
 75 | LSU-rRNA_Hca	3655
 76 | LSU-rRNA_Dme	3900
 77 | tRNA-Ser-AGY	85
 78 | tRNA-His-CAY	75
 79 | tRNASerTCT_CB	92
 80 | tRNA-Gln-CAA_	75
 81 | tRNA-Ser-TCA	85
 82 | tRNA-Pro-CCG	75
 83 | 4.5SRNA	94
 84 | MARNA	586
 85 | RNALUIII	81
 86 | tRNA-Ala-GCA	75
 87 | tRNA-Thr-ACG_	77
 88 | tRNA-Asp-GAY	75
 89 | tRNA-Lys-AAG	76
 90 | tRNA-Ile-ATC	77
 91 | tRNA-Val-GTA	76
 92 | tRNA-Ala-GCY_	75
 93 | tRNA-Met	76
 94 | tRNA-Ile-ATT	77
 95 | tRNA-Thr-ACY_	77
 96 | tRNA-Met-i	75
 97 | tRNA-Arg-CGA	76
 98 | tRNA-Met_	76
 99 | tRNA-Arg-CGG	76
100 | TN10MRNA_NA	1332
101 | TN10MRNA_NA	1332
102 | RRNA45	83
103 | MARNA	471
104 | Talua	254
105 | Talua	259
106 | MARINERNA11_MD	896
107 | MARINERNA6_MD	198
108 | MARINERNA6_MD	198
109 | MARINERNA6A_MD	712
110 | MARINERNA6_MD	198
111 | MARINERNA5_MD	595
112 | MARINERNA6A_MD	712
113 | MARINERNA6A_MD	712
114 | 


--------------------------------------------------------------------------------
/example/inputs/hg113seqs_repbase_starindex/chrStart.txt:
--------------------------------------------------------------------------------
  1 | 0
  2 | 262144
  3 | 524288
  4 | 786432
  5 | 1048576
  6 | 1310720
  7 | 1572864
  8 | 1835008
  9 | 2097152
 10 | 2359296
 11 | 2621440
 12 | 2883584
 13 | 3145728
 14 | 3407872
 15 | 3670016
 16 | 3932160
 17 | 4194304
 18 | 4456448
 19 | 4718592
 20 | 4980736
 21 | 5242880
 22 | 5505024
 23 | 5767168
 24 | 6029312
 25 | 6291456
 26 | 6553600
 27 | 6815744
 28 | 7077888
 29 | 7340032
 30 | 7602176
 31 | 7864320
 32 | 8126464
 33 | 8388608
 34 | 8650752
 35 | 8912896
 36 | 9175040
 37 | 9437184
 38 | 9699328
 39 | 9961472
 40 | 10223616
 41 | 10485760
 42 | 10747904
 43 | 11010048
 44 | 11272192
 45 | 11534336
 46 | 11796480
 47 | 12058624
 48 | 12320768
 49 | 12582912
 50 | 12845056
 51 | 13107200
 52 | 13369344
 53 | 13631488
 54 | 13893632
 55 | 14155776
 56 | 14417920
 57 | 14680064
 58 | 14942208
 59 | 15204352
 60 | 15466496
 61 | 15728640
 62 | 15990784
 63 | 16252928
 64 | 16515072
 65 | 16777216
 66 | 17039360
 67 | 17301504
 68 | 17563648
 69 | 17825792
 70 | 18087936
 71 | 18350080
 72 | 18612224
 73 | 18874368
 74 | 19136512
 75 | 19398656
 76 | 19660800
 77 | 19922944
 78 | 20185088
 79 | 20447232
 80 | 20709376
 81 | 20971520
 82 | 21233664
 83 | 21495808
 84 | 21757952
 85 | 22020096
 86 | 22282240
 87 | 22544384
 88 | 22806528
 89 | 23068672
 90 | 23330816
 91 | 23592960
 92 | 23855104
 93 | 24117248
 94 | 24379392
 95 | 24641536
 96 | 24903680
 97 | 25165824
 98 | 25427968
 99 | 25690112
100 | 25952256
101 | 26214400
102 | 26476544
103 | 26738688
104 | 27000832
105 | 27262976
106 | 27525120
107 | 27787264
108 | 28049408
109 | 28311552
110 | 28573696
111 | 28835840
112 | 29097984
113 | 29360128
114 | 29622272
115 | 


--------------------------------------------------------------------------------
/example/inputs/hg113seqs_repbase_starindex/genomeParameters.txt:
--------------------------------------------------------------------------------
 1 | versionGenome	20201
 2 | genomeFastaFiles	small_repelements.fa 
 3 | genomeSAindexNbases	8
 4 | genomeChrBinNbits	18
 5 | genomeSAsparseD	1
 6 | sjdbOverhang	0
 7 | sjdbFileChrStartEnd	- 
 8 | sjdbGTFfile	-
 9 | sjdbGTFchrPrefix	-
10 | sjdbGTFfeatureExon	exon
11 | sjdbGTFtagExonParentTranscript	transcript_id
12 | sjdbGTFtagExonParentGene	gene_id
13 | 


--------------------------------------------------------------------------------
/example/inputs/hg19.chrom.sizes:
--------------------------------------------------------------------------------
 1 | chr1	249250621
 2 | chr2	243199373
 3 | chr3	198022430
 4 | chr4	191154276
 5 | chr5	180915260
 6 | chr6	171115067
 7 | chr7	159138663
 8 | chrX	155270560
 9 | chr8	146364022
10 | chr9	141213431
11 | chr10	135534747
12 | chr11	135006516
13 | chr12	133851895
14 | chr13	115169878
15 | chr14	107349540
16 | chr15	102531392
17 | chr16	90354753
18 | chr17	81195210
19 | chr18	78077248
20 | chr20	63025520
21 | chrY	59373566
22 | chr19	59128983
23 | chr22	51304566
24 | chr21	48129895
25 | chr6_ssto_hap7	4928567
26 | chr6_mcf_hap5	4833398
27 | chr6_cox_hap2	4795371
28 | chr6_mann_hap4	4683263
29 | chr6_apd_hap1	4622290
30 | chr6_qbl_hap6	4611984
31 | chr6_dbb_hap3	4610396
32 | chr17_ctg5_hap1	1680828
33 | chr4_ctg9_hap1	590426
34 | chr1_gl000192_random	547496
35 | chrUn_gl000225	211173
36 | chr4_gl000194_random	191469
37 | chr4_gl000193_random	189789
38 | chr9_gl000200_random	187035
39 | chrUn_gl000222	186861
40 | chrUn_gl000212	186858
41 | chr7_gl000195_random	182896
42 | chrUn_gl000223	180455
43 | chrUn_gl000224	179693
44 | chrUn_gl000219	179198
45 | chr17_gl000205_random	174588
46 | chrUn_gl000215	172545
47 | chrUn_gl000216	172294
48 | chrUn_gl000217	172149
49 | chr9_gl000199_random	169874
50 | chrUn_gl000211	166566
51 | chrUn_gl000213	164239
52 | chrUn_gl000220	161802
53 | chrUn_gl000218	161147
54 | chr19_gl000209_random	159169
55 | chrUn_gl000221	155397
56 | chrUn_gl000214	137718
57 | chrUn_gl000228	129120
58 | chrUn_gl000227	128374
59 | chr1_gl000191_random	106433
60 | chr19_gl000208_random	92689
61 | chr9_gl000198_random	90085
62 | chr17_gl000204_random	81310
63 | chrUn_gl000233	45941
64 | chrUn_gl000237	45867
65 | chrUn_gl000230	43691
66 | chrUn_gl000242	43523
67 | chrUn_gl000243	43341
68 | chrUn_gl000241	42152
69 | chrUn_gl000236	41934
70 | chrUn_gl000240	41933
71 | chr17_gl000206_random	41001
72 | chrUn_gl000232	40652
73 | chrUn_gl000234	40531
74 | chr11_gl000202_random	40103
75 | chrUn_gl000238	39939
76 | chrUn_gl000244	39929
77 | chrUn_gl000248	39786
78 | chr8_gl000196_random	38914
79 | chrUn_gl000249	38502
80 | chrUn_gl000246	38154
81 | chr17_gl000203_random	37498
82 | chr8_gl000197_random	37175
83 | chrUn_gl000245	36651
84 | chrUn_gl000247	36422
85 | chr9_gl000201_random	36148
86 | chrUn_gl000235	34474
87 | chrUn_gl000239	33824
88 | chr21_gl000210_random	27682
89 | chrUn_gl000231	27386
90 | chrUn_gl000229	19913
91 | chrM	16571
92 | chrUn_gl000226	15008
93 | chr18_gl000207_random	4262
94 | 


--------------------------------------------------------------------------------
/example/inputs/hg19chr19.chrom.sizes:
--------------------------------------------------------------------------------
1 | chr19	59128983
2 | 


--------------------------------------------------------------------------------
/example/inputs/hg19chr19kbp550_starindex/SA:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/hg19chr19kbp550_starindex/SA


--------------------------------------------------------------------------------
/example/inputs/hg19chr19kbp550_starindex/SAindex:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/hg19chr19kbp550_starindex/SAindex


--------------------------------------------------------------------------------
/example/inputs/hg19chr19kbp550_starindex/chrLength.txt:
--------------------------------------------------------------------------------
1 | 550000
2 | 


--------------------------------------------------------------------------------
/example/inputs/hg19chr19kbp550_starindex/chrName.txt:
--------------------------------------------------------------------------------
1 | chr19
2 | 


--------------------------------------------------------------------------------
/example/inputs/hg19chr19kbp550_starindex/chrNameLength.txt:
--------------------------------------------------------------------------------
1 | chr19	550000
2 | 


--------------------------------------------------------------------------------
/example/inputs/hg19chr19kbp550_starindex/chrStart.txt:
--------------------------------------------------------------------------------
1 | 0
2 | 786432
3 | 


--------------------------------------------------------------------------------
/example/inputs/hg19chr19kbp550_starindex/genomeParameters.txt:
--------------------------------------------------------------------------------
 1 | ### STAR   --runMode genomeGenerate   --runThreadN 8   --genomeDir chr19_550000bases_index   --genomeFastaFiles chr19_550000bases.fa      --genomeSAindexNbases 9
 2 | versionGenome	20201
 3 | genomeFastaFiles	chr19_550000bases.fa 
 4 | genomeSAindexNbases	9
 5 | genomeChrBinNbits	18
 6 | genomeSAsparseD	1
 7 | sjdbOverhang	0
 8 | sjdbFileChrStartEnd	- 
 9 | sjdbGTFfile	-
10 | sjdbGTFchrPrefix	-
11 | sjdbGTFfeatureExon	exon
12 | sjdbGTFtagExonParentTranscript	transcript_id
13 | sjdbGTFtagExonParentGene	gene_id
14 | 


--------------------------------------------------------------------------------
/example/inputs/yeolabbarcodes_20170101.fasta:
--------------------------------------------------------------------------------
 1 | >A01
 2 | AAGCAAT
 3 | >A03
 4 | ATGACCNNNNT
 5 | >A04
 6 | CAGCTTNNNNT
 7 | >B06
 8 | GGCTTGT
 9 | >C01
10 | ACAAGTT
11 | >D8f
12 | TGGTCCT
13 | >F05
14 | GGATACNNNNT
15 | >G07
16 | TCCTGTNNNNT
17 | >X1A
18 | NNNNNCCTATAT
19 | >X1B
20 | NNNNNTGCTATT
21 | >X2A
22 | NNNNNTATACTT
23 | >X2B
24 | NNNNNATCTTCT
25 | 
26 | 


--------------------------------------------------------------------------------
/example/paired_end_clip.yaml:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env eCLIP_pairedend_singlenode
 2 | 
 3 | dataset: "204_01"
 4 | 
 5 | speciesGenomeDir:
 6 |   class: Directory
 7 |   path: /home/centos/refs/STAR
 8 | 
 9 | repeatElementGenomeDir:
10 |   class: Directory
11 |   path: /home/centos/refs/STAR_repeat/
12 | 
13 | species: hg19
14 | 
15 | chrom_sizes:
16 |   class: File
17 |   path: /home/centos/refs/STAR/chrNameLength.txt
18 | 
19 | barcodesfasta:
20 |   class: File
21 |   path: /home/centos/eclip/example/inputs/yeolabbarcodes_20170101.fasta
22 | 
23 | randomer_length: "5"
24 | 
25 | samples:
26 |   -
27 |     - ip_read:
28 |       name: rep1_clip
29 |       barcodeids: [A01, B06]
30 |       read1:
31 |         class: File
32 |         path: /home/centos/fastqs/RBFOX2-204-CLIP_S1_R1.fastq.gz
33 |       read2:
34 |         class: File
35 |         path: /home/centos/fastqs/RBFOX2-204-CLIP_S1_R2.fastq.gz
36 | 
37 |     - input_read:
38 |       name: rep1_input
39 |       barcodeids: [NIL, NIL]
40 |       read1:
41 |         class: File
42 |         path: /home/centos/fastqs/RBFOX2-204-INPUT_S2_R1.fastq.gz
43 |       read2:
44 |         class: File
45 |         path: /home/centos/fastqs/RBFOX2-204-INPUT_S2_R2.fastq.gz
46 |   -
47 |     - ip_read:
48 |       name: rep2_clip
49 |       barcodeids: [C01, D8f]
50 |       read1:
51 |         class: File
52 |         path: /home/centos/fastqs/RBFOX2-204-CLIP_S1_R1.fastq.gz
53 |       read2:
54 |         class: File
55 |         path: /home/centos/fastqs/RBFOX2-204-CLIP_S1_R2.fastq.gz
56 | 
57 |     - input_read:
58 |       name: rep2_input
59 |       barcodeids: [NIL, NIL]
60 |       read1:
61 |         class: File
62 |         path: /home/centos/fastqs/RBFOX2-204-INPUT_S2_R1.fastq.gz
63 |       read2:
64 |         class: File
65 |         path: /home/centos/fastqs/RBFOX2-204-INPUT_S2_R2.fastq.gz
66 | 
67 | 


--------------------------------------------------------------------------------
/example/single_end_clip.yaml:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env eCLIP_singleend_singlenode
 2 | 
 3 | dataset: ENCODE4
 4 | 
 5 | species: hg19
 6 | 
 7 | chrom_sizes:
 8 |   class: File
 9 |   path: /home/centos/refs/STAR/chrNameLength.txt
10 | 
11 | speciesGenomeDir:
12 |   class: Directory
13 |   path: /home/centos/refs/STAR
14 | 
15 | repeatElementGenomeDir:
16 |   class: Directory
17 |   path: /home/centos/refs/STAR_repeat/
18 | 
19 | samples:
20 |   - 
21 |     - ip_read:
22 |       name: IP
23 |       read1:
24 |         class: File
25 |         path: /home/centos/seRBFOX2/INV_IP_B_S58_L005_R1_001.fastq.gz
26 |       adapters:
27 |         class: File
28 |         path: /home/centos/eclip/example/inputs/InvRil19_adapters.yaml
29 | 
30 |     - input_read:
31 |       name: INPUT
32 |       read1:
33 |         class: File
34 |         path: /home/centos/seRBFOX2/INV_IN_B_S57_L005_R1_001.fastq.gz
35 |       adapters:
36 |         class: File
37 |         path: /home/centos/eclip/example/inputs/InvRil19_adapters.yaml
38 | 
39 | blacklist_file:
40 |   class: File
41 |   path: /home/centos/eclip/example/inputs/ENCFF039QTN.bed
42 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/01_umi_tools_extract/run_demux_se.sh:
--------------------------------------------------------------------------------
 1 | # umi_tools 1.0.0
 2 | 
 3 | umi_tools extract \
 4 | --random-seed 1 \
 5 | --bc-pattern NNNNNNNNNN \
 6 | --stdin inputs/seRBFOX2/INV_IP_B_S58_L005_R1_001.fastq.gz \
 7 | --stdout rep1.IP.umi.r1.fq.gz \
 8 | --log rep1.IP.---.--.metrics
 9 | 
10 | umi_tools extract \
11 | --random-seed 1 \
12 | --bc-pattern NNNNNNNNNN \
13 | --stdin inputs/seRBFOX2/INV_IN_B_S57_L006_R1_001.fastq.gz \
14 | --stdout rep1.IN.umi.r1.fq.gz \
15 | --log rep1.IN.---.--.metrics
16 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/02_cutadapt_round1/run_cutadapt.sh:
--------------------------------------------------------------------------------
 1 | # cutadapt 1.14 
 2 | 
 3 | cutadapt -O 1 \
 4 | -f fastq \
 5 | --match-read-wildcards \
 6 | --times 1 \
 7 | -e 0.1 \
 8 | --quality-cutoff 6 \
 9 | -m 18 \
10 | -o rep1.IP.umi.r1.fqTr.fq.gz \
11 | -a AGATCGGAAGAGCAC \
12 | -a GATCGGAAGAGCACA \
13 | -a ATCGGAAGAGCACAC \
14 | -a TCGGAAGAGCACACG \
15 | -a CGGAAGAGCACACGT \
16 | -a GGAAGAGCACACGTC \
17 | -a GAAGAGCACACGTCT \
18 | -a AAGAGCACACGTCTG \
19 | -a AGAGCACACGTCTGA \
20 | -a GAGCACACGTCTGAA \
21 | -a AGCACACGTCTGAAC \
22 | -a GCACACGTCTGAACT \
23 | -a CACACGTCTGAACTC \
24 | -a ACACGTCTGAACTCC \
25 | -a CACGTCTGAACTCCA \
26 | -a ACGTCTGAACTCCAG \
27 | -a CGTCTGAACTCCAGT \
28 | -a GTCTGAACTCCAGTC \
29 | -a TCTGAACTCCAGTCA \
30 | -a CTGAACTCCAGTCAC \
31 | ../01_umi_tools_extract/rep1.IP.umi.r1.fq.gz > rep1.IP.umi.r1.fqTr.metrics
32 | 
33 | cutadapt -O 1 \
34 | -f fastq \
35 | --match-read-wildcards \
36 | --times 1 \
37 | -e 0.1 \
38 | --quality-cutoff 6 \
39 | -m 18 \
40 | -o rep1.IN.umi.r1.fqTr.fq.gz \
41 | -a AGATCGGAAGAGCAC \
42 | -a GATCGGAAGAGCACA \
43 | -a ATCGGAAGAGCACAC \
44 | -a TCGGAAGAGCACACG \
45 | -a CGGAAGAGCACACGT \
46 | -a GGAAGAGCACACGTC \
47 | -a GAAGAGCACACGTCT \
48 | -a AAGAGCACACGTCTG \
49 | -a AGAGCACACGTCTGA \
50 | -a GAGCACACGTCTGAA \
51 | -a AGCACACGTCTGAAC \
52 | -a GCACACGTCTGAACT \
53 | -a CACACGTCTGAACTC \
54 | -a ACACGTCTGAACTCC \
55 | -a CACGTCTGAACTCCA \
56 | -a ACGTCTGAACTCCAG \
57 | -a CGTCTGAACTCCAGT \
58 | -a GTCTGAACTCCAGTC \
59 | -a TCTGAACTCCAGTCA \
60 | -a CTGAACTCCAGTCAC \
61 | ../01_umi_tools_extract/rep1.IN.umi.r1.fq.gz > rep1.IN.umi.r1.fqTr.metrics
62 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/03_cutadapt_round2/run_cutadapt.sh:
--------------------------------------------------------------------------------
 1 | # cutadapt 1.14
 2 | 
 3 | cutadapt \
 4 | -O 5 \
 5 | -f fastq \
 6 | --match-read-wildcards \
 7 | --times 1 \
 8 | -e 0.1 \
 9 | --quality-cutoff 6 \
10 | -m 18 \
11 | -o rep1.IP.umi.r1.fqTrTr.fq.gz \
12 | -a AGATCGGAAGAGCAC \
13 | -a GATCGGAAGAGCACA \
14 | -a ATCGGAAGAGCACAC \
15 | -a TCGGAAGAGCACACG \
16 | -a CGGAAGAGCACACGT \
17 | -a GGAAGAGCACACGTC \
18 | -a GAAGAGCACACGTCT \
19 | -a AAGAGCACACGTCTG \
20 | -a AGAGCACACGTCTGA \
21 | -a GAGCACACGTCTGAA \
22 | -a AGCACACGTCTGAAC \
23 | -a GCACACGTCTGAACT \
24 | -a CACACGTCTGAACTC \
25 | -a ACACGTCTGAACTCC \
26 | -a CACGTCTGAACTCCA \
27 | -a ACGTCTGAACTCCAG \
28 | -a CGTCTGAACTCCAGT \
29 | -a GTCTGAACTCCAGTC \
30 | -a TCTGAACTCCAGTCA \
31 | -a CTGAACTCCAGTCAC \
32 | ../02_cutadapt_round1/rep1.IP.umi.r1.fqTr.fq.gz > rep1.IP.umi.r1.fqTrTr.metrics
33 | 
34 | cutadapt \
35 | -O 5 \
36 | -f fastq \
37 | --match-read-wildcards \
38 | --times 1 \
39 | -e 0.1 \
40 | --quality-cutoff 6 \
41 | -m 18 \
42 | -o rep1.IN.umi.r1.fqTrTr.fq.gz \
43 | -a AGATCGGAAGAGCAC \
44 | -a GATCGGAAGAGCACA \
45 | -a ATCGGAAGAGCACAC \
46 | -a TCGGAAGAGCACACG \
47 | -a CGGAAGAGCACACGT \
48 | -a GGAAGAGCACACGTC \
49 | -a GAAGAGCACACGTCT \
50 | -a AAGAGCACACGTCTG \
51 | -a AGAGCACACGTCTGA \
52 | -a GAGCACACGTCTGAA \
53 | -a AGCACACGTCTGAAC \
54 | -a GCACACGTCTGAACT \
55 | -a CACACGTCTGAACTC \
56 | -a ACACGTCTGAACTCC \
57 | -a CACGTCTGAACTCCA \
58 | -a ACGTCTGAACTCCAG \
59 | -a CGTCTGAACTCCAGT \
60 | -a GTCTGAACTCCAGTC \
61 | -a TCTGAACTCCAGTCA \
62 | -a CTGAACTCCAGTCAC \
63 | ../02_cutadapt_round1/rep1.IN.umi.r1.fqTr.fq.gz > rep1.IN.umi.r1.fqTrTr.metrics
64 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/04_fastq_sort/run_fastq-sort.sh:
--------------------------------------------------------------------------------
 1 | # fastqtools 0.8
 2 | 
 3 | echo $(date +%x,%r) > TIMES.txt;
 4 | zcat ../03_cutadapt_round2/rep1.IP.umi.r1.fqTrTr.fq.gz > rep1.IP.umi.r1.fqTrTr.fq
 5 | echo $(date +%x,%r) >> TIMES.txt;
 6 | fastq-sort --id rep1.IP.umi.r1.fqTrTr.fq > rep1.IP.umi.r1.fqTrTr.sorted.fq
 7 | echo $(date +%x,%r) >> TIMES.txt;
 8 | 
 9 | echo $(date +%x,%r) >> TIMES.txt;
10 | zcat ../03_cutadapt_round2/rep1.IN.umi.r1.fqTrTr.fq.gz > rep1.IN.umi.r1.fqTrTr.fq
11 | echo $(date +%x,%r) >> TIMES.txt;
12 | fastq-sort --id rep1.IN.umi.r1.fqTrTr.fq > rep1.IN.umi.r1.fqTrTr.sorted.fq
13 | echo $(date +%x,%r) >> TIMES.txt;
14 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/05_star_repeat/run_star.sh:
--------------------------------------------------------------------------------
 1 | # STAR 2.7.6a
 2 | echo $(date +%x,%r) > TIMES.txt;
 3 | STAR \
 4 | --alignEndsType EndToEnd \
 5 | --genomeDir repbase_STARindex \
 6 | --genomeLoad NoSharedMemory \
 7 | --outBAMcompression 10 \
 8 | --outFileNamePrefix rep1.IP.umi.r1.fqTrTr.sorted.STAR \
 9 | --outFilterMultimapNmax 30 \
10 | --outFilterMultimapScoreRange 1 \
11 | --outFilterScoreMin 10 \
12 | --outFilterType BySJout \
13 | --outReadsUnmapped Fastx \
14 | --outSAMattrRGline ID:foo \
15 | --outSAMattributes All \
16 | --outSAMmode Full \
17 | --outSAMtype BAM Unsorted \
18 | --outSAMunmapped Within \
19 | --outStd Log \
20 | --readFilesIn ../04_fastq_sort/rep1.IP.umi.r1.fqTrTr.sorted.fq \
21 | --runMode alignReads \
22 | --runThreadN 8
23 | echo $(date +%x,%r) >> TIMES.txt;
24 | 
25 | echo $(date +%x,%r) >> TIMES.txt;
26 | STAR \
27 | --alignEndsType EndToEnd \
28 | --genomeDir repbase_STARindex \
29 | --genomeLoad NoSharedMemory \
30 | --outBAMcompression 10 \
31 | --outFileNamePrefix rep1.IN.umi.r1.fqTrTr.sorted.STAR \
32 | --outFilterMultimapNmax 30 \
33 | --outFilterMultimapScoreRange 1 \
34 | --outFilterScoreMin 10 \
35 | --outFilterType BySJout \
36 | --outReadsUnmapped Fastx \
37 | --outSAMattrRGline ID:foo \
38 | --outSAMattributes All \
39 | --outSAMmode Full \
40 | --outSAMtype BAM Unsorted \
41 | --outSAMunmapped Within \
42 | --outStd Log \
43 | --readFilesIn ../04_fastq_sort/rep1.IN.umi.r1.fqTrTr.sorted.fq \
44 | --runMode alignReads \
45 | --runThreadN 8
46 | echo $(date +%x,%r) >> TIMES.txt;
47 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/06_star_genome/run_star.sh:
--------------------------------------------------------------------------------
 1 | # STAR 2.7.6a
 2 | echo $(date +%x,%r) > TIMES.txt;
 3 | STAR \
 4 | --alignEndsType EndToEnd \
 5 | --genomeDir star_2_7_6a_gencode19_sjdb \
 6 | --genomeLoad NoSharedMemory \
 7 | --outBAMcompression 10 \
 8 | --outFileNamePrefix rep1.IP.umi.r1.fq.genome-mapped \
 9 | --outFilterMultimapNmax 1 \
10 | --outFilterMultimapScoreRange 1 \
11 | --outFilterScoreMin 10 \
12 | --outFilterType BySJout \
13 | --outReadsUnmapped Fastx \
14 | --outSAMattrRGline ID:foo \
15 | --outSAMattributes All \
16 | --outSAMmode Full \
17 | --outSAMtype BAM Unsorted \
18 | --outSAMunmapped Within \
19 | --outStd Log \
20 | --readFilesIn ../05_star_repeat/rep1.IP.umi.r1.fqTrTr.sorted.STARUnmapped.out.mate1 \
21 | --runMode alignReads \
22 | --runThreadN 8
23 | echo $(date +%x,%r) >> TIMES.txt;
24 | echo $(date +%x,%r) >> TIMES.txt;
25 | STAR \
26 | --alignEndsType EndToEnd \
27 | --genomeDir star_2_7_6a_gencode19_sjdb \
28 | --genomeLoad NoSharedMemory \
29 | --outBAMcompression 10 \
30 | --outFileNamePrefix rep1.IN.umi.r1.fq.genome-mapped \
31 | --outFilterMultimapNmax 1 \
32 | --outFilterMultimapScoreRange 1 \
33 | --outFilterScoreMin 10 \
34 | --outFilterType BySJout \
35 | --outReadsUnmapped Fastx \
36 | --outSAMattrRGline ID:foo \
37 | --outSAMattributes All \
38 | --outSAMmode Full \
39 | --outSAMtype BAM Unsorted \
40 | --outSAMunmapped Within \
41 | --outStd Log \
42 | --readFilesIn ../05_star_repeat/rep1.IN.umi.r1.fqTrTr.sorted.STARUnmapped.out.mate1 \
43 | --runMode alignReads \
44 | --runThreadN 8
45 | echo $(date +%x,%r) >> TIMES.txt;
46 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/07_sort/run_sort.sh:
--------------------------------------------------------------------------------
 1 | # samtools 1.6 
 2 | 
 3 | echo $(date +%x,%r) > TIMES.txt;
 4 | samtools \
 5 | sort \
 6 | -n \
 7 | -o inputs/rep1.IP.umi.r1.fq.genome-mappedSo.bam \
 8 | inputs/rep1.IP.umi.r1.fq.genome-mapped.bam
 9 | echo $(date +%x,%r) >> TIMES.txt;
10 | samtools \
11 | sort \
12 | -o rep1.IP.umi.r1.fq.genome-mappedSoSo.bam \
13 | inputs/rep1.IP.umi.r1.fq.genome-mappedSo.bam
14 | echo $(date +%x,%r) >> TIMES.txt;
15 | samtools \
16 | sort \
17 | -n \
18 | -o inputs/rep1.IN.umi.r1.fq.genome-mappedSo.bam \
19 | inputs/rep1.IN.umi.r1.fq.genome-mapped.bam
20 | echo $(date +%x,%r) >> TIMES.txt;
21 | samtools \
22 | sort \
23 | -o rep1.IN.umi.r1.fq.genome-mappedSoSo.bam \
24 | inputs/rep1.IN.umi.r1.fq.genome-mappedSo.bam
25 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/08_umi_tools_dedup/run_umitools.sh:
--------------------------------------------------------------------------------
 1 | # umi_tools 1.0.0
 2 | 
 3 | echo $(date +%x,%r) > TIMES.txt;
 4 | umi_tools dedup \
 5 | --random-seed 1 \
 6 | -I inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.bam \
 7 | --method unique \
 8 | --output-stats IP.umi.r1.fq.genome-mappedSoSo \
 9 | -S rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDup.bam
10 | echo $(date +%x,%r) >> TIMES.txt;
11 | echo $(date +%x,%r) >> TIMES.txt;
12 | umi_tools dedup \
13 | --random-seed 1 \
14 | -I inputs/rep1.IN.umi.r1.fq.genome-mappedSoSo.bam \
15 | --method unique \
16 | --output-stats IN.umi.r1.fq.genome-mappedSoSo \
17 | -S rep1.IN.umi.r1.fq.genome-mappedSoSo.rmDup.bam
18 | echo $(date +%x,%r) >> TIMES.txt;
19 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/09_clipper/run_204_01_RBFOX2_clipper.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | module load clipper/5d865bb;
4 | 
5 | cwltool \
6 | --no-container \
7 | /projects/ps-yeolab4/software/eclip/0.7.0/cwl/clipper.cwl \
8 | /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/clipper/204_01_RBFOX2_clipper.yaml
9 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/09_clipper/run_4020_CLIP1_clipper.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | module load clipper/5d865bb;
4 | 
5 | cwltool \
6 | --no-container \
7 | /projects/ps-yeolab4/software/eclip/0.7.0/cwl/clipper.cwl \
8 | /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/clipper/4020_CLIP1.yaml
9 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/09_clipper/run_clipper.sh:
--------------------------------------------------------------------------------
1 | clipper \
2 | --species \
3 | hg19 \
4 | --bam inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam \
5 | --outfile rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.peakClusters.bed
6 | 
7 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/10_normalize/run_input_norm.sh:
--------------------------------------------------------------------------------
 1 | # samtools 1.6
 2 | # overlap_peakfi_with_bam_PE.pl
 3 | 
 4 | samtools sort inputs/rep1.IN.umi.r1.fq.genome-mappedSoSo.rmDup.bam > inputs/rep1.IN.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam
 5 | samtools sort inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDup.bam > inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam
 6 | 
 7 | samtools view -cF 4 inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam > ip_mapped_readnum.txt
 8 | samtools view -cF 4 inputs/rep1.IN.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam > input_mapped_readnum.txt
 9 | 
10 | perl /projects/ps-yeolab4/software/eclip/0.7.0/bin/overlap_peakfi_with_bam_PE.pl \
11 | inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam \
12 | inputs/rep1.IN.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam \
13 | inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.peakClusters.bed \
14 | ip_mapped_readnum.txt \
15 | input_mapped_readnum.txt \
16 | rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.peakClusters.normed.bed
17 | 
18 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/wf_clipseqcore_2bc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #PBS -N wf_clipseqcore_2bc
 3 | #PBS -o wf_clipseqcore_2bc.sh.out
 4 | #PBS -e wf_clipseqcore_2bc.sh.err
 5 | #PBS -V
 6 | #PBS -l walltime=24:00:00
 7 | #PBS -l nodes=1:ppn=7
 8 | #PBS -A yeo-group
 9 | #PBS -q home
10 | #PBS -t 1-6
11 | 
12 | # Go to the directory from which the script was called
13 | cd $PBS_O_WORKDIR
14 | cmd[1]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./A01_B06.yaml"
15 | cmd[2]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./A03_G07.yaml"
16 | cmd[3]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./A04_F05.yaml"
17 | cmd[4]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./C01_D8f.yaml"
18 | cmd[5]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./X1A_X1B.yaml"
19 | cmd[6]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./X2A_X2B.yaml"
20 | eval ${cmd[$PBS_ARRAYID]}
21 | 
22 | 


--------------------------------------------------------------------------------
/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/wf_clipseqcore_1bc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #PBS -N wf_clipseqcore_1bc
 3 | #PBS -o wf_clipseqcore_1bc.sh.out
 4 | #PBS -e wf_clipseqcore_1bc.sh.err
 5 | #PBS -V
 6 | #PBS -l walltime=24:00:00
 7 | #PBS -l nodes=1:ppn=8
 8 | #PBS -A yeo-group
 9 | #PBS -q home
10 | #PBS -t 1-9
11 | 
12 | # Go to the directory from which the script was called
13 | cd $PBS_O_WORKDIR
14 | cmd[1]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA1.yaml"
15 | cmd[2]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA2.yaml"
16 | cmd[3]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA3.yaml"
17 | cmd[4]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA4.yaml"
18 | cmd[5]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA5.yaml"
19 | cmd[6]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA6.yaml"
20 | cmd[7]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA7.yaml"
21 | cmd[8]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA8.yaml"
22 | cmd[9]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRil19.yaml"
23 | eval ${cmd[$PBS_ARRAYID]}
24 | 
25 | 


--------------------------------------------------------------------------------
/wf/README.md:
--------------------------------------------------------------------------------
1 | ## This folder contains work-in-progress "metadata runners".
2 | 
3 | - The idea is to better facilitate switching between cwlref-runner, cwltoil (local), cwltoil (torque)
4 | - Any 'single end' workflow must have ```#!/usr/bin/env eCLIP_singleend``` at the top of their yaml document. This uses the eCLIP_singleend bash script, which specifies the cwl workflow be single-end-specific.
5 | - Likewise for 'paired end' workflows.


--------------------------------------------------------------------------------