├── .gitignore ├── CHANGELOG.md ├── ECLIP-VERSION-0.7.0 ├── LICENSE ├── README.md ├── bin ├── __init__.py ├── annotate_peaks_bedformat_wproxdistal_lncRNA.pl ├── barcodecollapsepe.py ├── bed_to_narrowpeak.py ├── calculate_entropy.py ├── combine_ReadsByLoc_files.pl ├── compress_l2foldenrpeakfi_for_replicate_overlapping_bedformat.pl ├── convert_ReadsByLoc_combined_significancecalls.pl ├── count_reads_broadfeatures_frombamfi_PEmap_lncRNA.pl ├── count_reads_broadfeatures_frombamfi_SEmap_lncRNA.pl ├── fix_bed_for_bigbed_conversion.py ├── generate_adaptertrim_fasta.ipynb ├── overlap_peakfi_with_bam.pl ├── overlap_peakfi_with_bam_PE.pl └── parsebarcodes.sh ├── cwl ├── barcodecollapse_pe.cwl ├── barcodecollapse_se.cwl ├── barcodecollapse_se_nostats.cwl ├── bed_to_bigbed.cwl ├── bed_to_narrowpeak.cwl ├── blacklist-remove.cwl ├── calculate_entropy.cwl ├── clipper.cwl ├── demux_pe.cwl ├── demux_se.cwl ├── demux_targeted_mir_se.cwl ├── fastqc.cwl ├── fastqsort.cwl ├── file2string.cwl ├── file2stringArray.cwl ├── fix_bed_for_bigbed_conversion.cwl ├── gzip.cwl ├── index.cwl ├── makebigwigfiles.cwl ├── makebigwigfiles_PE.cwl ├── makebigwigfiles_SE.cwl ├── namesort.cwl ├── overlap_peakfi_with_bam.cwl ├── overlap_peakfi_with_bam_PE.cwl ├── parsebarcodes.cwl ├── peakscompress.cwl ├── rename.cwl ├── samtools-index.cwl ├── samtools-mappedreadnum.cwl ├── samtools-merge.cwl ├── samtools-view.cwl ├── samtools-viewr2.cwl ├── sort-bed.cwl ├── sort.cwl ├── star-genome.cwl ├── star-repeatmapping.cwl ├── star.cwl ├── trim_pe.cwl ├── trim_se.cwl ├── trim_umi.cwl ├── wf_clipseqcore_chimeric_se_1barcode.cwl ├── wf_clipseqcore_nostats_se_1barcode.cwl ├── wf_clipseqcore_pe_1barcode.cwl ├── wf_clipseqcore_pe_1barcode_nodemux.cwl ├── wf_clipseqcore_pe_2barcodes.cwl ├── wf_clipseqcore_pe_2barcodes_nodemux.cwl ├── wf_clipseqcore_se_1barcode.cwl ├── wf_clipseqcore_trim_partial_se_1barcode.cwl ├── wf_demultiplex_pe.cwl ├── wf_demultiplex_se.cwl ├── wf_encode_se_full.cwl ├── wf_encode_se_full_nostats.cwl ├── wf_encode_se_full_scatter.cwl ├── wf_encode_se_full_scatter_nostats.cwl ├── wf_encode_se_just_repmap.cwl ├── wf_fastqc.cwl ├── wf_get_peaks_chimeric_se.cwl ├── wf_get_peaks_nostats_se.cwl ├── wf_get_peaks_pe.cwl ├── wf_get_peaks_scatter_chimeric_se.cwl ├── wf_get_peaks_scatter_pe.cwl ├── wf_get_peaks_scatter_se.cwl ├── wf_get_peaks_scatter_se_nostats.cwl ├── wf_get_peaks_se.cwl ├── wf_get_peaks_trim_partial_scatter_se.cwl ├── wf_get_peaks_trim_partial_se.cwl ├── wf_trim_and_map_chimeric_se.cwl ├── wf_trim_and_map_pe.cwl ├── wf_trim_and_map_se.cwl ├── wf_trim_and_map_se_nostats.cwl ├── wf_trim_partial_and_map_se.cwl └── wf_trim_partial_and_map_se_scatter.cwl ├── documentation ├── Repeat_mapping.pdf ├── Reproducible_peaks.pdf ├── Zero_to_peaks.pdf ├── eCLIP_analysisSOP_v2.0.pdf ├── eCLIP_analysisSOP_v2.2.1.docx ├── eCLIP_analysisSOP_v2.2.docx └── eCLIP_single_end_analysisSOP_v1.docx ├── eCLIP-flowchart.png ├── example ├── inputs │ ├── ENCFF039QTN.bed │ ├── ENCFF269URO.bed │ ├── InvRNA1_adapters.fasta │ ├── InvRNA2_adapters.fasta │ ├── InvRNA3_adapters.fasta │ ├── InvRNA4_adapters.fasta │ ├── InvRNA5_adapters.fasta │ ├── InvRNA6_adapters.fasta │ ├── InvRNA7_adapters.fasta │ ├── InvRNA8_adapters.fasta │ ├── InvRil19_adapters.yaml │ ├── example_fastqs │ │ ├── chrom19kbp550_clip1_r1.fastq.gz │ │ ├── chrom19kbp550_clip1_r2.fastq.gz │ │ ├── chrom19kbp550_clip2_r1.fastq.gz │ │ ├── chrom19kbp550_clip2_r2.fastq.gz │ │ ├── chrom19kbp550_input_r1.fastq.gz │ │ └── chrom19kbp550_input_r2.fastq.gz │ ├── hg113seqs_repbase_starindex │ │ ├── Genome │ │ ├── SA │ │ ├── SAindex │ │ ├── chrLength.txt │ │ ├── chrName.txt │ │ ├── chrNameLength.txt │ │ ├── chrStart.txt │ │ ├── genomeParameters.txt │ │ └── small_repelements.fa │ ├── hg19.chrom.sizes │ ├── hg19chr19.chrom.sizes │ ├── hg19chr19kbp550_starindex │ │ ├── Genome │ │ ├── SA │ │ ├── SAindex │ │ ├── chr19_550000bases.fa │ │ ├── chrLength.txt │ │ ├── chrName.txt │ │ ├── chrNameLength.txt │ │ ├── chrStart.txt │ │ └── genomeParameters.txt │ └── yeolabbarcodes_20170101.fasta ├── paired_end_clip.yaml └── single_end_clip.yaml ├── tests └── eCLIP-0.7.0 │ ├── 01_umi_tools_extract │ └── run_demux_se.sh │ ├── 02_cutadapt_round1 │ └── run_cutadapt.sh │ ├── 03_cutadapt_round2 │ └── run_cutadapt.sh │ ├── 04_fastq_sort │ └── run_fastq-sort.sh │ ├── 05_star_repeat │ └── run_star.sh │ ├── 06_star_genome │ └── run_star.sh │ ├── 07_sort │ └── run_sort.sh │ ├── 08_umi_tools_dedup │ └── run_umitools.sh │ ├── 09_clipper │ ├── run_204_01_RBFOX2_clipper.sh │ ├── run_4020_CLIP1_clipper.sh │ └── run_clipper.sh │ ├── 10_normalize │ └── run_input_norm.sh │ ├── wf_clipseqcore_pe_2barcodes │ └── wf_clipseqcore_2bc.sh │ └── wf_clipseqcore_se_1barcode │ └── wf_clipseqcore_1bc.sh ├── wf ├── README.md ├── eCLIP_pairedend ├── eCLIP_pairedend_singlenode ├── eCLIP_singleend ├── eCLIP_singleend_encode └── eCLIP_singleend_singlenode └── wf_debug ├── eCLIP_singleend_encode_nostats ├── eCLIP_singleend_nostats ├── eCLIP_singleend_singlenode_nostats └── eCLIP_singleend_trim_partial /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | .static_storage/ 56 | .media/ 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # 107 | archived* 108 | 109 | # 110 | *DS_Store 111 | 112 | # 113 | .ipynb_checkpoints/ 114 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | All notable changes to this project will be documented in this file. 3 | 4 | The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) 5 | 6 | ## [0.7.0] - 2020-10-09 7 | ### Added 8 | - Extra step to calculate total entropy 9 | 10 | ### Changed 11 | - Updated clipper.cwl to "Clipper3" () and include the latest ENCODE annotations (GRCh38_v29e). Also removed pickle intermediates, although this has never made it to the final outputs. 12 | - Updated star*.cwl STAR to version 2.7.6, fixes a bug that produces non-ascii characters 13 | 14 | ### 15 | 16 | ## [0.6.0a] - 2020-08-20 17 | ### Added 18 | - The core pipeline (wf_get_peaks_scatter_se.cwl and wf_get_peaks_scatter_pe.cwl) *should* now be fully portable on AWS. 19 | - Slight modifications to README (updated references) 20 | 21 | ### Fixed 22 | - Updated adapter examples (was missing one base in the last adapter) 23 | 24 | ## [Unreleased 0.5.99] - 2020-06-24 25 | ### Added 26 | - Added docker requirement definitions to most commandlinetools. 27 | - Added the following companion workflows: 28 | - wf_encode_se_full_nostats "full encode workflow (eCLIP + repeat mapping + region normalization)" minus umi_tools --stats (to save memory) 29 | - wf_encode_se_full_scatter_nostats "full encode workflow" minus umi_tools --nostats (multiple samples) 30 | - Added the following commandlinetools: 31 | - fastqc.cwl 32 | - Added the following subworkflows to the main workflow: 33 | - wf_fastqc.cwl essentially fastqc.cwl + rename.cwl (so fastqc files won't override each other) 34 | - Added a 'blacklist_file' required param to the following workflows 35 | - wf_get_peaks_scatter_se_nostats.cwl 36 | - wf_get_peaks_trim_partial_scatter_se.cwl 37 | - wf_get_peaks_trim_partial_se.cwl 38 | ### Fixed 39 | - Updated workflows to report uniquely-named fastqc reports so they don't override each other. 40 | - (unused in main pipeline) convert_ReadsByLoc_combined_significancecalls.pl now matches current region normalization script 41 | - (unused in main pipeline) duplicate_removal_inline_paired_count_region_other_reads_SE.pl now matches current repeat element scripts 42 | - (unused in main pipeline) split_bam_to_subfiles_SE.pl now matches current repeat element scripts 43 | 44 | ## [Unreleased 0.5.0] - 2020-02-21 45 | ### Changed 46 | - Version bumped to 0.5.0 47 | 48 | ### Added 49 | - Added the following steps to the main single-end pipeline: 50 | - sort_bed (sorts input normalized bed file) 51 | - blacklist remove (removes blacklisted regions from peak file) 52 | - bed to narrowPeak (converts peak bed file to narrowPeak format) 53 | - fix bed ("fixes" a peak bed file format such that it is compatible with bedToBigBed) 54 | - bed to bigbed (calls bedToBigBed to convert peak bed file to bigBed format) 55 | - Added a 'nostats' workflow in 'wf/' to optionally run the pipeline without requiring umi_tools stats generation. This dramatically cuts down on runtime/mem reqs 56 | - Added pre/post processing scripts (annotate_peaks_bedformat_wproxdistal_lncRNA.pl & generate_adaptertrim_fasta.ipynb) 57 | - annotate_peaks_bedformat_wproxdistal_lncRNA.pl (perl script that annotates bed files) 58 | - generate_adaptertrim_fasta.ipynb (jupyter notebook that generates fasta files w/ partial adapter sequences to trim) 59 | 60 | ## [0.4.0] - 2019-03-25 61 | ### Changed 62 | - YAML metadata changes slightly to account for each dataset to potentially have its own adapter sequences 63 | 64 | ## [0.3.0] - 2019-03-05 65 | - There is some work done to make the SE pipeline outputs deterministic. Outputs should be the same every time. 66 | - Introducing a "wf_encode_full" workflow that combines the peak calling workflow, the repeat mapping workflow (hg19 only), and region-level normalization workflow 67 | - The previous manifests (eCLIP-0.2.2) for eCLIP_pairedend and eCLIP_singleend should still work. 68 | 69 | ### Added 70 | - gzip step for all fastq files 71 | - added ```arguments: ["--random-seed", "1"]``` to barcodecollapse_se and demux_se definitions to decrease randomness in umi_tools outputs 72 | - added an "wf_encode_se_full" and "wf_encode_se_full_scatter" cwl definitions to run 1) peak finding, 2) region level normalization, 3) repeat mapping for SE reads. 73 | - region normalization subworkflow (regionnormalize/) cwl definitions to incorporate region level normalization 74 | - repeat mapping subworkflow (repmap/) cwl definitions to incorporate repeat mapping 75 | 76 | ### Changed 77 | - makebigwigs script is now split into _PE and _SE due to strand flipping 78 | - repeat-mapped reads now are named dataset.readname.umi.r1.repeat-mapped.bam (instead of dataset.readname.umi.r1TrTr.sorted.STARAligned.out.bam) 79 | - repeat-unmapped reads are now named dataset.readname.umi.r1.repeat-unmapped.sorted.fq (instead of dataset.readname.umi.r1TrTr.sorted.STARUnmapped.out.sorted.fq.gz) 80 | - genome-mapped reads now are named dataset.readname.umi.r1.genome-mapped.bam (instead of dataset.readname.umi.r1TrTr.sorted.STARUnmapped.out.sorted.STARAligned.outSo.rmDupSo.bam) 81 | - wf_trim_and_map_se.cwl now outputs gzipped X_output_trim_first and X_output_trim_again fastq files. 82 | 83 | [Unreleased]: https://github.com/yeolab/eclip...HEAD 84 | 85 | -------------------------------------------------------------------------------- /ECLIP-VERSION-0.7.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/ECLIP-VERSION-0.7.0 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | This software is Copyright © 2022 The Regents of the University of California. 3 | All Rights Reserved. 4 | 5 | Permission to copy, modify, and distribute this software and its documentation 6 | for educational, research and non-profit purposes, without fee, and without a 7 | written agreement is hereby granted, provided that the above copyright notice, 8 | this paragraph and the following three paragraphs appear in all copies. 9 | 10 | Permission to make commercial use of this software may be obtained by contacting: 11 | 12 | Office of Innovation & Commercialization 13 | 9500 Gilman Drive, Mail Code 0910 14 | University of California 15 | La Jolla, CA 92093-0910 16 | (858) 534-5815 17 | innovation@ucsd.edu 18 | 19 | This software program and documentation are copyrighted by The Regents of the 20 | University of California. The software program and documentation are supplied 21 | ìas isî, without any accompanying services from The Regents. The Regents does 22 | not warrant that the operation of the program will be uninterrupted or 23 | error-free. The end-user understands that the program was developed for 24 | research purposes and is advised not to rely exclusively on the program for 25 | any reason. 26 | 27 | IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR 28 | DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING 29 | LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, 30 | EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF 31 | SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY 32 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 33 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED 34 | HEREUNDER IS ON AN ìAS ISî BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO 35 | OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR 36 | MODIFICATIONS. 37 | -------------------------------------------------------------------------------- /bin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/bin/__init__.py -------------------------------------------------------------------------------- /bin/barcodecollapsepe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | """ 5 | barcodecollapsepe.py 6 | 7 | Created by Gabriel Pratt 8 | 9 | reads in a .bam file where the first 9 nt of the read name are the barcode 10 | and merge reads mapped to the same position that have the same barcode 11 | """ 12 | 13 | 14 | from __future__ import print_function 15 | 16 | 17 | from collections import Counter 18 | import itertools 19 | from optparse import OptionParser # TODO replace with argparse 20 | import sys 21 | import pysam 22 | 23 | 24 | help_message = """ 25 | barcodecollapse_pe reads in a .bam file where the first 9 nt of the read name 26 | are the barcode and merge reads mapped to the same position that have the same 27 | barcode 28 | """ 29 | 30 | 31 | def stranded_read_start(read): 32 | if read.is_reverse: 33 | return read.positions[-1] 34 | else: 35 | return read.pos 36 | 37 | 38 | def output_metrics(metrics_file, total_count, removed_count): 39 | with open(metrics_file, 'w') as metrics: 40 | metrics.write("\t".join(["randomer", 41 | "total_count", 42 | "removed_count"]) 43 | + "\n") 44 | for barcode in total_count.keys(): 45 | metrics.write("\t".join(map(str, [barcode, 46 | total_count[barcode], 47 | removed_count[barcode]])) 48 | + "\n") 49 | 50 | 51 | def barcode_collapse(in_bam, out_bam): 52 | number_of_unmapped_mate_pairs = 0 53 | different_chroms = 0 54 | removed_count = Counter() 55 | total_count = Counter() 56 | result_dict = {} 57 | 58 | # reads in 2 copies of in_bam 59 | with pysam.Samfile(in_bam, 'r') as samfile1: 60 | with pysam.Samfile(in_bam, 'r') as samfile2: 61 | 62 | samfile_read1 = itertools.islice(samfile1, 0, None, 2) 63 | samfile_read2 = itertools.islice(samfile2, 1, None, 2) 64 | for read1, read2 in itertools.izip(samfile_read1, samfile_read2): 65 | if not read1.qname == read2.qname: 66 | print(read1.qname, read2.qname) 67 | raise Exception("Read Names don't match") 68 | if read1.is_unmapped and read1.is_unmapped: 69 | #Both reads don't map, don't even both saving them. 70 | continue 71 | if ((not read1.is_unmapped and read2.is_unmapped) 72 | or (read1.is_unmapped and read2.is_unmapped)): 73 | number_of_unmapped_mate_pairs += 1 74 | continue 75 | if read1.rname != read2.rname: 76 | different_chroms += 1 77 | continue 78 | 79 | #if the read order is swapped swap everything before running. 80 | if not read1.is_read1: 81 | read1, read2 = read2, read1 82 | 83 | randomer = read1.qname.split(":")[0] 84 | 85 | start = stranded_read_start(read1) 86 | stop = stranded_read_start(read2) 87 | # read1.is_read1 88 | strand = "-" if read1.is_reverse else "+" 89 | unique_location = (read1.rname, start, stop, strand, randomer) 90 | 91 | # increment appropriate counter 92 | total_count[randomer] += 1 93 | if unique_location in result_dict: 94 | removed_count[randomer] += 1 95 | continue 96 | 97 | result_dict[(read1.rname, start, stop, strand, randomer)] = (read1, read2) 98 | 99 | # ouput barcode collapsed reads 100 | with pysam.Samfile(out_bam, 'wb', template=samfile1) as out_bam: 101 | for key, (read1, read2) in result_dict.items(): 102 | out_bam.write(read1) 103 | out_bam.write(read2) 104 | 105 | return total_count, removed_count 106 | 107 | 108 | def main(): 109 | description = """Paired End randomer aware duplciate removal algorithm.""" 110 | usage = """ 111 | Assumes paired end reads are adjacent in output file (ie needs unsorted bams) 112 | Also assumes no multimappers in the bam file (otherwise behavior is undefined) 113 | """ 114 | parser = OptionParser(usage=usage, description=description) 115 | parser.add_option("-b", "--bam", 116 | dest="bam", 117 | help="bam file to barcode collapse") 118 | parser.add_option("-o", "--out_file", 119 | dest="out_file") 120 | parser.add_option("-m", "--metrics_file", 121 | dest="metrics_file") 122 | (options, args) = parser.parse_args() 123 | 124 | if not (options.bam.endswith(".bam")): 125 | raise TypeError("%s, not bam file" % options.bam) 126 | 127 | total_count, removed_count = barcode_collapse(options.bam, options.out_file) 128 | output_metrics(options.metrics_file, total_count, removed_count) 129 | 130 | sys.exit(0) 131 | 132 | 133 | if __name__ == "__main__": 134 | main() 135 | -------------------------------------------------------------------------------- /bin/bed_to_narrowpeak.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | narrowPeak, 4 | cols 9 and 10 are just blank, 5 | col 5 is 1000 for things that meet the >=3 l2fc and l10pval cutoffs and 200 otherwise (its just for ucsc track coloring) 6 | """ 7 | import numpy as np 8 | import pandas as pd 9 | import argparse 10 | import os 11 | 12 | ECLIP_HEADER = [ 13 | 'chrom','start','end','pValue','signalValue','strand' 14 | ] 15 | 16 | def score_encode(row): 17 | if row['pValue'] >= 3 and row['signalValue'] >= 3: 18 | return 1000 19 | else: 20 | return 200 21 | 22 | def return_narrowpeak_header(bed, species, visibility=3): 23 | name = os.path.basename(bed) 24 | description = name + " input-normalized peaks" 25 | header = 'track type=narrowPeak visibility={} db={} name=\"{}\" description=\"{}\"'.format( 26 | visibility, species, name, description 27 | ) 28 | return header 29 | 30 | def bed_to_narrowpeak(bed, species, narrowpeak): 31 | 32 | peaks = pd.read_csv(bed, names=ECLIP_HEADER, sep='\t') 33 | peaks['name'] = '.' 34 | peaks['score'] = peaks.apply(score_encode, axis=1) 35 | peaks['qValue'] = -1 36 | peaks['peak'] = -1 37 | 38 | with open(narrowpeak, 'w') as f: 39 | f.write("{}\n".format(return_narrowpeak_header(bed, species))) 40 | with open(narrowpeak, 'a') as f: 41 | peaks[[ 42 | 'chrom','start','end','name','score','strand','signalValue','pValue','qValue','peak' 43 | ]].to_csv( 44 | f, 45 | sep='\t', 46 | header=False, 47 | index=False 48 | ) 49 | 50 | 51 | def main(): 52 | parser = argparse.ArgumentParser() 53 | 54 | parser.add_argument( 55 | "--input_bed", 56 | required=True, 57 | ) 58 | parser.add_argument( 59 | "--species", 60 | required=True, 61 | ) 62 | parser.add_argument( 63 | "--output_narrowpeak", 64 | required=True, 65 | ) 66 | 67 | # Process arguments 68 | args = parser.parse_args() 69 | bed = args.input_bed 70 | species = args.species 71 | narrowpeak = args.output_narrowpeak 72 | 73 | # Hack to get around the hg19/38 -> GRCh37/38 ucsc schema. 74 | if species.upper() == 'GRCH37' or species.upper().startswith('GRCH37'): 75 | species = 'hg19' 76 | elif species.upper() == 'GRCH38' or species.upper().startswith('GRCH38'): 77 | species = 'hg38' 78 | 79 | # main func 80 | bed_to_narrowpeak(bed, species, narrowpeak) 81 | 82 | if __name__ == "__main__": 83 | main() 84 | -------------------------------------------------------------------------------- /bin/calculate_entropy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import pandas as pd 5 | import argparse 6 | import os 7 | 8 | FULL_HEADER = [ 9 | 'chrom','start','end','peak','ip_count','input_count', 10 | 'pvalue','chivalue','chitype','isenriched','l10p','l2fc' 11 | ] 12 | 13 | def entropy(row, ip_mapped_num, input_mapped_num): 14 | """ 15 | Computes the entropy for a given peak (row). 16 | Uses the number of reads and number of total mapped reads 17 | to 18 | """ 19 | pip = float(row['ip_count']/float(ip_mapped_num)) 20 | pinp = float(row['input_count']/float(input_mapped_num)) 21 | return pip * np.log2(pip/pinp) 22 | 23 | def sum_entropy(full, ip_mapped, input_mapped, l10p, l2fc): 24 | """ 25 | Computes the entropy 26 | """ 27 | try: 28 | with open(ip_mapped, 'r') as f: 29 | ip_mapped_num = int(f.readline().rstrip()) 30 | with open(input_mapped, 'r') as f: 31 | input_mapped_num = int(f.readline().rstrip()) 32 | 33 | peaks = pd.read_csv(full, names=FULL_HEADER, sep='\t') 34 | peaks = peaks[(peaks['l10p'] >= l10p) & (peaks['l2fc'] >= l2fc)] 35 | peaks['entropy'] = peaks.apply(entropy, args=(ip_mapped_num, input_mapped_num, ), axis=1) 36 | 37 | return peaks['entropy'].sum() 38 | except Exception as e: 39 | return e 40 | 41 | def main(): 42 | parser = argparse.ArgumentParser() 43 | 44 | parser.add_argument( 45 | "--full", 46 | required=True, 47 | ) 48 | parser.add_argument( 49 | "--ip_mapped", 50 | required=True, 51 | ) 52 | parser.add_argument( 53 | "--input_mapped", 54 | required=True, 55 | ) 56 | parser.add_argument( 57 | "--l10p", 58 | required=False, 59 | default=3, 60 | help='Only consider peaks at or above this -log10p-value cutoff.' 61 | ) 62 | parser.add_argument( 63 | "--l2fc", 64 | required=False, 65 | default=3, 66 | help='Only consider peaks at or above this log2 fold change cutoff.' 67 | ) 68 | parser.add_argument( 69 | "--output", 70 | required=False, 71 | default=None, 72 | help='Write to file, default: stdout' 73 | ) 74 | # Process arguments 75 | args = parser.parse_args() 76 | 77 | full = args.full 78 | ip_mapped = args.ip_mapped 79 | input_mapped = args.input_mapped 80 | l10p = args.l10p 81 | l2fc = args.l2fc 82 | output = args.output 83 | 84 | # main func 85 | summed_entropy = sum_entropy( 86 | full=full, 87 | ip_mapped=ip_mapped, 88 | input_mapped=input_mapped, 89 | l10p=l10p, 90 | l2fc=l2fc 91 | ) 92 | if output is None: 93 | print(summed_entropy) 94 | else: 95 | with open(output, 'w') as o: 96 | o.write("{}".format(summed_entropy)) 97 | 98 | if __name__ == "__main__": 99 | main() 100 | -------------------------------------------------------------------------------- /bin/combine_ReadsByLoc_files.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use warnings; 4 | use strict; 5 | 6 | my $line_size = 14; 7 | my @files = @ARGV; 8 | 9 | my %hash; 10 | for my $fi (@files) { 11 | open(F,$fi); 12 | for my $line () { 13 | chomp($line); 14 | my @tmp = split(/\t/,$line); 15 | my $ensg = shift(@tmp); 16 | next if ($ensg eq "all"); 17 | 18 | $hash{$ensg}{$fi} = join("\t",@tmp); 19 | if (scalar(@tmp) == $line_size) { 20 | } else { 21 | print STDERR "changing line_size to ".scalar(@tmp)."\n"; 22 | $line_size = scalar(@tmp); 23 | } 24 | } 25 | close(F); 26 | } 27 | 28 | print "ENSG\t"; 29 | for my $fi (@files) { 30 | print "$fi|".$hash{"ENSG"}{$fi}."\t"; 31 | } 32 | print "\n"; 33 | 34 | for my $k (keys %hash) { 35 | next if ($k eq "ENSG"); 36 | print "$k\t"; 37 | for my $fi (@files) { 38 | unless (exists $hash{$k}{$fi}) { 39 | $hash{$k}{$fi} = "NaN"; 40 | for my $i (1..($line_size-1)) { 41 | $hash{$k}{$fi} .= "\tNaN"; 42 | } 43 | } 44 | # $hash{$k}{$fi} = "0\t0\t0\t0\t0\t0\t0\t0" unless (exists $hash{$k}{$fi}); 45 | print "$hash{$k}{$fi}\t"; 46 | } 47 | print "\n"; 48 | } 49 | -------------------------------------------------------------------------------- /bin/compress_l2foldenrpeakfi_for_replicate_overlapping_bedformat.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use warnings; 4 | use strict; 5 | 6 | ### Note 2015/11/12: fixed so that all bed formats are ucsc format (0-based, open ended) 7 | 8 | ## this is the first version - keeps MOST significant peak if two overlap 9 | my $hashing_value = 100000; 10 | 11 | # uses l2foldenr peak files 12 | 13 | my $fi = $ARGV[0]; 14 | my $output_fi = $ARGV[1]; 15 | # my $output_fi = $fi.".compressed.bed"; 16 | open(O,">$output_fi"); 17 | 18 | my %peaks2size; 19 | my %peaks2l2fenr; 20 | my %peaks2l10p; 21 | my %peaks2start; 22 | my %read_hash; 23 | my %peak_hash; 24 | &readfi($fi); 25 | 26 | my %overlap_hash; 27 | #for my $chr ("chr10") { 28 | for my $chr (keys %read_hash) { 29 | for my $str ("+","-") { 30 | # print STDERR "\non $chr $str\n"; 31 | 32 | my %deleted_peaks; 33 | my %kept_peaks; 34 | 35 | # my @sorted_peaks = sort {$peaks2l10p{$chr}{$str}{$fi}{$b} <=> $peaks2l10p{$chr}{$str}{$fi}{$a}} keys %{$peaks2l10p{$chr}{$str}{$fi}}; 36 | my @sorted_peaks = sort {$peaks2l10p{$chr}{$str}{$fi}{$b} <=> $peaks2l10p{$chr}{$str}{$fi}{$a} or $peaks2l2fenr{$chr}{$str}{$fi}{$b} <=> $peaks2l2fenr{$chr}{$str}{$fi}{$a} or $peaks2size{$chr}{$str}{$fi}{$b} <=> $peaks2size{$chr}{$str}{$fi}{$a} or $peaks2start{$chr}{$str}{$fi}{$b} <=> $peaks2start{$chr}{$str}{$fi}{$a}} keys %{$peaks2l10p{$chr}{$str}{$fi}}; 37 | 38 | my $i=0; 39 | 40 | for my $peak1 (@sorted_peaks) { 41 | my $verbose_flag = 0; 42 | # while ($i < scalar(@sorted_peaks)) { 43 | ## now take any peaks that overlap and merge them 44 | # my $peak1 = $sorted_peaks[$i]; 45 | 46 | next if (exists $deleted_peaks{$peak1}); 47 | # print STDERR "re-checking $peak1\r"; 48 | 49 | # my $peak_id = $chr.":".$start."-".$stop.":".$str.":".$vsinput_l10p.":".$vsinput_l2fenr; 50 | 51 | my ($p1chr,$p1pos,$p1str,$p1vsinput_l10p,$p1vsinput_l2fenr) = split(/\:/,$peak1); 52 | my ($p1start,$p1stop) = split(/\-/,$p1pos); 53 | 54 | my $p1x = int($p1start / $hashing_value); 55 | my $p1y = int( $p1stop / $hashing_value); 56 | 57 | for my $p1i ($p1x..$p1y) { 58 | for my $tocomp_peak (@{$read_hash{$chr}{$str}{$fi}{$p1i}}) { 59 | print STDERR "comparing $peak1 $tocomp_peak\n" if ($verbose_flag == 1); 60 | next if (exists $deleted_peaks{$tocomp_peak}); 61 | next if ($tocomp_peak eq $peak1); 62 | 63 | my ($p2compchr,$p2comppos,$p2compstr,$p2compvsinput_l10p,$p2compvsinput_l2fenr) = split(/\:/,$tocomp_peak); 64 | my ($p2compstart,$p2compstop) = split(/\-/,$p2comppos); 65 | 66 | next if ($p2compstop <= $p1start); 67 | next if ($p1stop <= $p2compstart); 68 | 69 | #peak2 overlaps with peak1 and has a lower l10pval - remove it! 70 | if ($p1vsinput_l10p >= $p2compvsinput_l10p) { 71 | print STDERR "discarding $tocomp_peak vs $peak1\n" if ($verbose_flag == 1); 72 | $deleted_peaks{$tocomp_peak} = 1; 73 | } elsif ($p1vsinput_l10p < $p2compvsinput_l10p) { 74 | $deleted_peaks{$peak1} = 1; 75 | print STDERR "discarding $peak1 vs $tocomp_peak\n" if ($verbose_flag == 1); 76 | } else { 77 | print STDERR "weird error shouldn't happen $peak1\n"; 78 | } 79 | } 80 | } 81 | } 82 | 83 | for my $peak (@sorted_peaks) { 84 | next if (exists $deleted_peaks{$peak}); 85 | 86 | my ($p1chr,$p1pos,$p1str,$p1vsinput_l10p,$p1vsinput_l2fenr) = split(/\:/,$peak); 87 | my ($p1start,$p1stop) = split(/\-/,$p1pos); 88 | print O "$p1chr\t$p1start\t$p1stop\t$p1vsinput_l10p\t$p1vsinput_l2fenr\t$p1str\n"; 89 | 90 | } 91 | } 92 | } 93 | close(O); 94 | 95 | 96 | sub min { 97 | my $x = shift; 98 | my $y = shift; 99 | 100 | if ($x < $y) { 101 | return($x); 102 | } else { 103 | return($y); 104 | } 105 | } 106 | 107 | sub max { 108 | my $x = shift; 109 | my $y = shift; 110 | 111 | if ($x > $y) { 112 | return($x); 113 | } else { 114 | return($y); 115 | } 116 | } 117 | 118 | sub readfi { 119 | my $fi = shift; 120 | open(F,$fi); 121 | for my $line () { 122 | chomp($line); 123 | 124 | my @tmp = split(/\t/,$line); 125 | 126 | my $chr = $tmp[0]; 127 | my $str = $tmp[5]; 128 | my $start = $tmp[1]; 129 | my $stop = $tmp[2]; 130 | my $vsinput_l10p = $tmp[3]; 131 | my $vsinput_l2fenr = $tmp[4]; 132 | # my ($chr,$pos,$str,$orig_pval) = split(/\:/,$tmp[0]); 133 | # my ($start,$stop) = split(/\-/,$pos); 134 | 135 | #### Gabe's peaks are open-ended on right side; this fixes that issue (so peak from 1-10 actually covers bases 1-10, not 1-9 136 | # removed 2015/11/12 137 | # $stop = $stop - 1; 138 | 139 | # my ($chr,$start,$stop,$ens_id,$pval,$str,$start2,$stop2) = split(/\t/,$line); 140 | 141 | my $peak_id = $chr.":".$start."-".$stop.":".$str.":".$vsinput_l10p.":".$vsinput_l2fenr; 142 | 143 | 144 | push @{$peak_hash{$chr}{$str}{$fi}},$peak_id; 145 | $peaks2start{$chr}{$str}{$fi}{$peak_id} = $start; 146 | $peaks2l10p{$chr}{$str}{$fi}{$peak_id} = $vsinput_l10p; 147 | $peaks2l2fenr{$chr}{$str}{$fi}{$peak_id} = $vsinput_l2fenr; 148 | $peaks2size{$chr}{$str}{$fi}{$peak_id} = $stop-$start; 149 | 150 | my $x = int($start / $hashing_value); 151 | my $y = int( $stop / $hashing_value); 152 | 153 | for my $i ($x..$y) { 154 | push @{$read_hash{$chr}{$str}{$fi}{$i}},$peak_id 155 | 156 | 157 | } 158 | 159 | } 160 | close(F); 161 | } 162 | -------------------------------------------------------------------------------- /bin/fix_bed_for_bigbed_conversion.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | narrowPeak, 5 | cols 9 and 10 are just blank, 6 | col 5 is 1000 for things that meet the >=3 l2fc and l10pval cutoffs and 200 otherwise (its just for ucsc track coloring) 7 | """ 8 | import numpy as np 9 | import pandas as pd 10 | import argparse 11 | import os 12 | 13 | ECLIP_HEADER = [ 14 | 'chrom','start','end','pValue','signalValue','strand' 15 | ] 16 | 17 | def combine_pvalue_fold(row): 18 | return "{}|{}".format(row['pValue'], row['signalValue']) 19 | 20 | def fix_bed(bed, fixed_bed): 21 | 22 | peaks = pd.read_csv(bed, names=ECLIP_HEADER, sep='\t') 23 | peaks['name'] = peaks.apply(combine_pvalue_fold, axis=1) 24 | peaks['score'] = 0 25 | 26 | peaks[[ 27 | 'chrom','start','end','name','score','strand' 28 | ]].to_csv( 29 | fixed_bed, 30 | sep='\t', 31 | header=False, 32 | index=False 33 | ) 34 | 35 | 36 | def main(): 37 | parser = argparse.ArgumentParser() 38 | 39 | parser.add_argument( 40 | "--input_bed", 41 | required=True, 42 | ) 43 | parser.add_argument( 44 | "--output_fixed_bed", 45 | required=True, 46 | ) 47 | 48 | # Process arguments 49 | args = parser.parse_args() 50 | bed = args.input_bed 51 | output_fixed_bed = args.output_fixed_bed 52 | 53 | # main func 54 | fix_bed(bed, output_fixed_bed) 55 | 56 | if __name__ == "__main__": 57 | main() 58 | -------------------------------------------------------------------------------- /bin/generate_adaptertrim_fasta.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# This generates the fasta adapter files from a predefined list of adapters\n", 8 | "- This is a pre-processing step that generates input adapter seqs (NOT PART OF THE PIPELINE)\n", 9 | "- These adapters are to be split into incrementing tiles, which are then used to trim excess adapter sequences with cutadapt.\n", 10 | "- We will store these as fasta files for convenience." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import os\n", 20 | "import glob\n", 21 | "import pandas as pd\n", 22 | "import numpy as np" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "output_dir = '/projects/ps-yeolab4/software/eclip/0.5.0/examples/inputs/'" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "# Notes from Eric:\n", 39 | "```\n", 40 | "You had tiles of:\n", 41 | "\n", 42 | "GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n", 43 | "\n", 44 | "Need to do tiles of:\n", 45 | "\n", 46 | "InvRNA1 NNAGCGCTAG A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n", 47 | "InvRNA2 NNGATATCGA A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n", 48 | "InvRNA3 NNCGCAGACG A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n", 49 | "InvRNA4 NNTATGAGTA A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n", 50 | "InvRNA5 NNAGGTGCGT A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n", 51 | "InvRNA6 NNGAACATAC A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n", 52 | "InvRNA7 NNACATAGCG A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n", 53 | "InvRNA8 NNGTGCGATA A GATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n", 54 | "```" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 3, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "adapter_sequences = {\n", 64 | " 'InvRNA1':\"NNAGCGCTAGAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n", 65 | " 'InvRNA2':\"NNGATATCGAAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n", 66 | " 'InvRNA3':\"NNCGCAGACGAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n", 67 | " 'InvRNA4':\"NNTATGAGTAAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n", 68 | " 'InvRNA5':\"NNAGGTGCGTAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n", 69 | " 'InvRNA6':\"NNGAACATACAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n", 70 | " 'InvRNA7':\"NNACATAGCGAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\",\n", 71 | " 'InvRNA8':\"NNGTGCGATAAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\"\n", 72 | "}" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 4, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "word_len = 15\n", 82 | "\n", 83 | "for name, sequence in adapter_sequences.items():\n", 84 | " offset = 0\n", 85 | " with open(os.path.join(output_dir, '{}_adapters.fasta'.format(name)), 'w') as f:\n", 86 | " for counter in range(len(sequence) - word_len):\n", 87 | " f.write(\n", 88 | " \">{}_{}\\n{}\\n\".format(\n", 89 | " name,\n", 90 | " offset,\n", 91 | " sequence[offset:offset+word_len]\n", 92 | " )\n", 93 | " )\n", 94 | " offset += 1" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [] 103 | } 104 | ], 105 | "metadata": { 106 | "kernelspec": { 107 | "display_name": "python3-essential", 108 | "language": "python", 109 | "name": "python3-essential" 110 | }, 111 | "language_info": { 112 | "codemirror_mode": { 113 | "name": "ipython", 114 | "version": 3 115 | }, 116 | "file_extension": ".py", 117 | "mimetype": "text/x-python", 118 | "name": "python", 119 | "nbconvert_exporter": "python", 120 | "pygments_lexer": "ipython3", 121 | "version": "3.6.7" 122 | } 123 | }, 124 | "nbformat": 4, 125 | "nbformat_minor": 2 126 | } 127 | -------------------------------------------------------------------------------- /bin/overlap_peakfi_with_bam_PE.pl: -------------------------------------------------------------------------------- 1 | overlap_peakfi_with_bam.pl -------------------------------------------------------------------------------- /cwl/barcodecollapse_pe.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### doc: "collapses eCLIP barcodes to remove PCR duplicates" ### 4 | 5 | cwlVersion: v1.0 6 | 7 | class: CommandLineTool 8 | 9 | requirements: 10 | - class: ResourceRequirement 11 | coresMin: 1 12 | ramMin: 16000 13 | 14 | hints: 15 | - class: DockerRequirement 16 | dockerPull: brianyee/eclip:0.7.0_python 17 | 18 | baseCommand: [barcodecollapsepe.py] 19 | 20 | arguments: [ 21 | "-o", 22 | $(inputs.input_barcodecollapsepe_bam.nameroot).rmDup.bam, 23 | "-m", 24 | $(inputs.input_barcodecollapsepe_bam.nameroot).rmDup.metrics 25 | ] 26 | 27 | inputs: 28 | 29 | input_barcodecollapsepe_bam: 30 | type: File 31 | 32 | inputBinding: 33 | position: 1 34 | prefix: -b 35 | label: "" 36 | doc: "input bam to barcode collapse. NOTE: no use for a bai index file!" 37 | 38 | outputs: 39 | 40 | output_barcodecollapsepe_bam: 41 | type: File 42 | outputBinding: 43 | glob: $(inputs.input_barcodecollapsepe_bam.nameroot).rmDup.bam 44 | label: "" 45 | doc: "barcode collapseed mappings bam " 46 | 47 | output_barcodecollapsepe_metrics: 48 | type: File 49 | outputBinding: 50 | glob: $(inputs.input_barcodecollapsepe_bam.nameroot).rmDup.metrics 51 | label: "" 52 | doc: "barcode collapse metrics" 53 | 54 | doc: | 55 | This tool wraps barcodecollapsepe.py, a paired-end PCR duplicate removal script 56 | which reads in a .bam file where the first string left of : split of the read name is the barcode 57 | and merge reads mapped to the same position that have the same barcode. 58 | Assumes paired end reads are adjacent in output file (ie needs unsorted bams) 59 | Also assumes no multimappers in the bam file (otherwise behavior is undefined) 60 | Usage: python barcodecollapsepe.py --bam BAM --out_file OUT_FILE --metrics_file METRICS_FILE 61 | -------------------------------------------------------------------------------- /cwl/barcodecollapse_se.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | ### doc: "collapses eCLIP barcodes to remove PCR duplicates" ### 4 | 5 | cwlVersion: v1.0 6 | 7 | class: CommandLineTool 8 | 9 | requirements: 10 | - class: InlineJavascriptRequirement 11 | - class: ResourceRequirement 12 | coresMin: 1 13 | ramMin: 32000 14 | 15 | hints: 16 | - class: DockerRequirement 17 | dockerPull: brianyee/umi_tools:1.0.0 18 | 19 | baseCommand: [umi_tools, dedup] 20 | 21 | arguments: ["--random-seed", "1"] 22 | 23 | inputs: 24 | 25 | input_barcodecollapsese_bam: 26 | type: File 27 | inputBinding: 28 | position: 1 29 | prefix: -I 30 | label: "" 31 | doc: "input bam to barcode collapse. NOTE: no use for a bai index file!" 32 | secondaryFiles: [.bai] 33 | 34 | output_stats: 35 | default: "" 36 | type: string 37 | inputBinding: 38 | position: 1 39 | prefix: --output-stats 40 | valueFrom: | 41 | ${ 42 | if (inputs.output_stats == "") { 43 | return inputs.input_barcodecollapsese_bam.nameroot; 44 | } 45 | else { 46 | return inputs.output_stats; 47 | } 48 | } 49 | label: "" 50 | doc: "stats i guess" 51 | 52 | method: 53 | default: "unique" 54 | type: string 55 | inputBinding: 56 | position: 1 57 | prefix: --method 58 | 59 | collapsed_bam: 60 | type: string 61 | default: "" 62 | inputBinding: 63 | position: 2 64 | prefix: -S 65 | valueFrom: | 66 | ${ 67 | if (inputs.collapsed_bam == "") { 68 | return inputs.input_barcodecollapsese_bam.nameroot + ".rmDup.bam"; 69 | } 70 | else { 71 | return inputs.collapsed_bam; 72 | } 73 | } 74 | label: "" 75 | doc: "input bam to barcode collapse. NOTE: no use for a bai index file!" 76 | 77 | outputs: 78 | 79 | output_barcodecollapsese_bam: 80 | type: File 81 | outputBinding: 82 | glob: | 83 | ${ 84 | if (inputs.collapsed_bam == "") { 85 | return inputs.input_barcodecollapsese_bam.nameroot + ".rmDup.bam"; 86 | } 87 | else { 88 | return inputs.collapsed_bam; 89 | } 90 | } 91 | label: "" 92 | doc: "barcode collapsed mappings bam " 93 | 94 | output_barcodecollapsese_metrics: 95 | type: File 96 | outputBinding: 97 | glob: | 98 | ${ 99 | if (inputs.output_stats == "") { 100 | return inputs.input_barcodecollapsese_bam.nameroot + "_per_umi.tsv"; 101 | } 102 | else { 103 | return inputs.output_stats; 104 | } 105 | } 106 | label: "" 107 | doc: "barcode collapsed mappings stats " 108 | 109 | doc: | 110 | The purpose of this command is to deduplicate BAM files based 111 | on the first mapping co-ordinate and the UMI attached to the read. 112 | It is assumed that the FASTQ files were processed with extract_umi.py 113 | before mapping and thus the UMI is the last word of the read name. e.g: 114 | 115 | @HISEQ:87:00000000_AATT 116 | 117 | where AATT is the UMI sequeuence. 118 | 119 | Usage: umi_tools dedup -I infile.bam -S deduped.bam -L dedup.log 120 | -------------------------------------------------------------------------------- /cwl/barcodecollapse_se_nostats.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### doc: "collapses eCLIP barcodes to remove PCR duplicates" ### 4 | 5 | cwlVersion: v1.0 6 | 7 | class: CommandLineTool 8 | 9 | requirements: 10 | - class: InlineJavascriptRequirement 11 | - class: ResourceRequirement 12 | coresMin: 1 13 | ramMin: 32000 14 | 15 | hints: 16 | - class: DockerRequirement 17 | dockerPull: brianyee/umi_tools:1.0.0 18 | 19 | baseCommand: [umi_tools, dedup] 20 | 21 | arguments: ["--random-seed", "1"] 22 | 23 | inputs: 24 | 25 | input_barcodecollapsese_bam: 26 | type: File 27 | inputBinding: 28 | position: 1 29 | prefix: -I 30 | label: "" 31 | doc: "input bam to barcode collapse. NOTE: no use for a bai index file!" 32 | secondaryFiles: [.bai] 33 | 34 | # output_stats: 35 | # default: "" 36 | # type: string 37 | # inputBinding: 38 | # position: 1 39 | # prefix: --output-stats 40 | # valueFrom: | 41 | # ${ 42 | # if (inputs.output_stats == "") { 43 | # return inputs.input_barcodecollapsese_bam.nameroot; 44 | # } 45 | # else { 46 | # return inputs.output_stats; 47 | # } 48 | # } 49 | # label: "" 50 | # doc: "stats i guess" 51 | 52 | method: 53 | default: "unique" 54 | type: string 55 | inputBinding: 56 | position: 1 57 | prefix: --method 58 | 59 | collapsed_bam: 60 | type: string 61 | default: "" 62 | inputBinding: 63 | position: 2 64 | prefix: -S 65 | valueFrom: | 66 | ${ 67 | if (inputs.collapsed_bam == "") { 68 | return inputs.input_barcodecollapsese_bam.nameroot + ".rmDup.bam"; 69 | } 70 | else { 71 | return inputs.collapsed_bam; 72 | } 73 | } 74 | label: "" 75 | doc: "input bam to barcode collapse. NOTE: no use for a bai index file!" 76 | 77 | outputs: 78 | 79 | output_barcodecollapsese_bam: 80 | type: File 81 | outputBinding: 82 | glob: | 83 | ${ 84 | if (inputs.collapsed_bam == "") { 85 | return inputs.input_barcodecollapsese_bam.nameroot + ".rmDup.bam"; 86 | } 87 | else { 88 | return inputs.collapsed_bam; 89 | } 90 | } 91 | label: "" 92 | doc: "barcode collapsed mappings bam " 93 | 94 | 95 | # output_barcodecollapsese_metrics: 96 | # type: File 97 | # outputBinding: 98 | # glob: | 99 | # ${ 100 | # if (inputs.output_stats == "") { 101 | # return inputs.input_barcodecollapsese_bam.nameroot + "_per_umi.tsv"; 102 | # } 103 | # else { 104 | # return inputs.output_stats; 105 | # } 106 | # } 107 | # label: "" 108 | # doc: "barcode collapsed mappings stats " 109 | 110 | doc: | 111 | The purpose of this command is to deduplicate BAM files based 112 | on the first mapping co-ordinate and the UMI attached to the read. 113 | It is assumed that the FASTQ files were processed with extract_umi.py 114 | before mapping and thus the UMI is the last word of the read name. e.g: 115 | 116 | @HISEQ:87:00000000_AATT 117 | 118 | where AATT is the UMI sequeuence. 119 | 120 | Usage: umi_tools dedup -I infile.bam -S deduped.bam -L dedup.log 121 | -------------------------------------------------------------------------------- /cwl/bed_to_bigbed.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | ### doc: "Convert peak bed to narrowPeak" ### 4 | 5 | cwlVersion: v1.0 6 | 7 | class: CommandLineTool 8 | 9 | requirements: 10 | - class: ResourceRequirement 11 | coresMin: 1 12 | coresMax: 16 13 | - class: InlineJavascriptRequirement 14 | hints: 15 | - class: DockerRequirement 16 | dockerPull: brianyee/ucsc-tools:377 17 | 18 | baseCommand: [bedToBigBed] 19 | 20 | inputs: 21 | 22 | input_bed: 23 | type: File 24 | inputBinding: 25 | position: 1 26 | label: "" 27 | 28 | chrom_sizes: 29 | type: File 30 | inputBinding: 31 | position: 2 32 | 33 | output_bb_filename: 34 | type: string 35 | default: "" 36 | inputBinding: 37 | position: 3 38 | valueFrom: | 39 | ${ 40 | if (inputs.output_bb_filename == "") { 41 | return inputs.input_bed.nameroot + ".bb"; 42 | } 43 | else { 44 | return inputs.output_bb_filename; 45 | } 46 | } 47 | 48 | outputs: 49 | 50 | output_bigbed: 51 | type: File 52 | outputBinding: 53 | glob: | 54 | ${ 55 | if (inputs.output_bb_filename == "") { 56 | return inputs.input_bed.nameroot + ".bb"; 57 | } 58 | else { 59 | return inputs.output_bb_filename; 60 | } 61 | } 62 | label: "" 63 | doc: "" 64 | 65 | doc: | 66 | This tool converts an input-normalized eCLIP peaks file (BED6) into a bigbed (bb) file. 67 | -------------------------------------------------------------------------------- /cwl/bed_to_narrowpeak.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | 6 | requirements: 7 | - class: ResourceRequirement 8 | coresMin: 1 9 | ramMin: 8000 10 | 11 | hints: 12 | - class: DockerRequirement 13 | dockerPull: brianyee/eclip:0.7.0_python 14 | 15 | baseCommand: [bed_to_narrowpeak.py] 16 | 17 | arguments: [ 18 | "--output_narrowpeak", 19 | $(inputs.input_bed.nameroot).narrowPeak 20 | ] 21 | 22 | inputs: 23 | 24 | input_bed: 25 | type: File 26 | inputBinding: 27 | position: 1 28 | prefix: --input_bed 29 | label: "" 30 | doc: "input bam to convert to narrowPeak format. Must be ECLIP-style input-normed format! (log10p in col4, log2fold in col5)" 31 | 32 | species: 33 | type: string 34 | inputBinding: 35 | position: 2 36 | prefix: --species 37 | 38 | outputs: 39 | 40 | output_narrowpeak: 41 | type: File 42 | outputBinding: 43 | glob: $(inputs.input_bed.nameroot).narrowPeak 44 | label: "" 45 | doc: "eCLIP peaks in narrowPeak format" 46 | 47 | doc: | 48 | This tool converts an input-normalized eCLIP peaks file (BED6) into a narrowPeak format for encode DCC. 49 | cols 9 and 10 are just blank, col 5 is 1000 for things that meet the >=3 l2fc and l10pval cutoffs and 200 otherwise (it’s just for ucsc track coloring) 50 | -------------------------------------------------------------------------------- /cwl/blacklist-remove.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: InlineJavascriptRequirement 9 | - class: ResourceRequirement 10 | coresMin: 1 11 | ramMin: 8000 12 | 13 | hints: 14 | - class: DockerRequirement 15 | dockerPull: brianyee/bedtools:2.27.1 16 | 17 | baseCommand: [bedtools, intersect] 18 | 19 | arguments: [ 20 | "-v", 21 | "-s", 22 | ] 23 | 24 | inputs: 25 | 26 | input_bed: 27 | type: File 28 | inputBinding: 29 | position: 1 30 | prefix: -a 31 | 32 | blacklist_file: 33 | type: File 34 | inputBinding: 35 | position: 2 36 | prefix: -b 37 | 38 | stdout: $(inputs.input_bed.nameroot).blacklist-removed.bed 39 | 40 | outputs: 41 | 42 | output_blacklist_removed_bed: 43 | type: File 44 | outputBinding: 45 | glob: $(inputs.input_bed.nameroot).blacklist-removed.bed 46 | 47 | doc: | 48 | Given a list of 'blacklist' regions, remove those regions from an input BED file 49 | This tool wraps bedtools intersect -v to remove blacklist regions 50 | -------------------------------------------------------------------------------- /cwl/calculate_entropy.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | ### doc: "Fixes a BED file" ### 4 | 5 | cwlVersion: v1.0 6 | class: CommandLineTool 7 | 8 | requirements: 9 | - class: ResourceRequirement 10 | coresMin: 1 11 | ramMin: 1000 12 | 13 | hints: 14 | - class: DockerRequirement 15 | dockerPull: brianyee/eclip:0.7.0_python 16 | 17 | baseCommand: [calculate_entropy.py] 18 | 19 | inputs: 20 | 21 | full: 22 | type: File 23 | inputBinding: 24 | position: 1 25 | prefix: --full 26 | label: "" 27 | doc: "output full file from overlap_peakfi_with_bam.pl (should contain number of reads per peak)" 28 | ip_mapped: 29 | type: File 30 | inputBinding: 31 | position: 2 32 | prefix: --ip_mapped 33 | label: "" 34 | doc: "File containing a single number corresponding to the number of mapped reads in IP" 35 | input_mapped: 36 | type: File 37 | inputBinding: 38 | position: 3 39 | prefix: --input_mapped 40 | label: "" 41 | doc: "File containing a single number corresponding to the number of mapped reads in INPUT" 42 | 43 | arguments: [ 44 | "--output", 45 | $(inputs.full.nameroot).entropynum 46 | ] 47 | 48 | outputs: 49 | 50 | output_entropynum: 51 | type: File 52 | outputBinding: 53 | glob: $(inputs.full.nameroot).entropynum 54 | label: "" 55 | doc: "File containing the sum entropy value" 56 | 57 | doc: | 58 | This tool computes and sums the entropy values for significant peaks (l10p >=3 and l2fc >=3). 59 | Returns the number as a file. 60 | -------------------------------------------------------------------------------- /cwl/clipper.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: InlineJavascriptRequirement 9 | - class: ResourceRequirement 10 | coresMin: 8 11 | ramMin: 32000 12 | 13 | hints: 14 | - class: DockerRequirement 15 | dockerPull: brianyee/clipper:5d865bb 16 | 17 | baseCommand: [clipper] 18 | 19 | inputs: 20 | 21 | species: 22 | type: string 23 | inputBinding: 24 | position: 0 25 | prefix: --species 26 | doc: "species: one of ce10 ce11 dm3 hg19 GRCh38 mm9 mm10 GRCh38_pU6 GRCh38_v29 GRCh38_v29e hg19_VSV" 27 | 28 | bam: 29 | type: File 30 | inputBinding: 31 | position: 1 32 | prefix: --bam 33 | 34 | gene: 35 | type: string? 36 | inputBinding: 37 | position: 8 38 | prefix: --gene 39 | 40 | outfile: 41 | type: string 42 | default: "" 43 | inputBinding: 44 | position: 10 45 | prefix: --outfile 46 | valueFrom: | 47 | ${ 48 | if (inputs.outfile == "") { 49 | return inputs.bam.nameroot + ".peakClusters.bed"; 50 | } 51 | else { 52 | return inputs.outfile; 53 | } 54 | } 55 | 56 | outputs: 57 | 58 | output_bed: 59 | type: File 60 | outputBinding: 61 | glob: | 62 | ${ 63 | if (inputs.outfile == "") { 64 | return inputs.bam.nameroot + ".peakClusters.bed"; 65 | } 66 | else { 67 | return inputs.outfile; 68 | } 69 | } 70 | 71 | doc: | 72 | CLIPper is a tool to define peaks in your CLIP-seq dataset. 73 | CLIPper was developed in the Yeo Lab at the University of California, San Diego. 74 | Usage: clipper --bam CLIP-seq_reads.srt.bam --species hg19 --outfile CLIP-seq_reads.srt.peaks.bed 75 | -------------------------------------------------------------------------------- /cwl/demux_pe.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | ### doc: "demultiplexes a paired-end eCLIP set of reads acording to the specified barcode and barcode file." ### 4 | 5 | cwlVersion: v1.0 6 | class: CommandLineTool 7 | 8 | requirements: 9 | - class: ResourceRequirement 10 | coresMin: 1 11 | ramMin: 8000 12 | 13 | hints: 14 | - class: DockerRequirement 15 | dockerPull: brianyee/eclipdemux:0.0.1 16 | 17 | baseCommand: [eclipdemux] 18 | 19 | arguments: ["--metrics", 20 | $(inputs.dataset).$(inputs.reads.name).---.--.metrics, 21 | "--expectedbarcodeida", 22 | "$(inputs.reads.barcodeids[0])", 23 | "--expectedbarcodeidb", 24 | "$(inputs.reads.barcodeids[1])" 25 | ] 26 | 27 | inputs: 28 | 29 | barcodesfasta: 30 | type: File 31 | inputBinding: 32 | position: 6 33 | prefix: --barcodesfile 34 | 35 | randomer_length: 36 | type: string 37 | # default: "10" 38 | inputBinding: 39 | position: 7 40 | prefix: --length 41 | doc: "randomer length" 42 | 43 | dataset: 44 | type: string 45 | inputBinding: 46 | position: 5 47 | prefix: --dataset 48 | 49 | reads: 50 | type: 51 | type: record 52 | #name: reads 53 | fields: 54 | read1: 55 | type: File 56 | inputBinding: 57 | position: 1 58 | prefix: --fastq_1 59 | read2: 60 | type: File 61 | inputBinding: 62 | position: 2 63 | prefix: --fastq_2 64 | barcodeids: 65 | type: string[] 66 | #default: [NIL, NIL] 67 | #inputBinding: 68 | # position: 3 69 | # prefix: --expectedbarcodeids 70 | name: 71 | type: string 72 | inputBinding: 73 | position: 4 74 | prefix: --newname 75 | 76 | 77 | outputs: 78 | 79 | output_dataset: 80 | type: string 81 | outputBinding: 82 | glob: $(inputs.dataset) 83 | loadContents: true 84 | outputEval: $(self[0].contents) 85 | name: 86 | type: string 87 | outputBinding: 88 | glob: $(inputs.reads.name) 89 | loadContents: true 90 | outputEval: $(self[0].contents) 91 | barcodeidA: 92 | type: string 93 | outputBinding: 94 | glob: $(inputs.reads.barcodeids[0]) 95 | loadContents: true 96 | outputEval: $(self[0].contents) 97 | barcodeidB: 98 | type: string 99 | outputBinding: 100 | glob: $(inputs.reads.barcodeids[1]) 101 | loadContents: true 102 | outputEval: $(self[0].contents) 103 | 104 | demuxedAfwd: 105 | type: File 106 | outputBinding: 107 | glob: $(inputs.dataset).$(inputs.reads.name).$(inputs.reads.barcodeids[0]).r1.fq.gz 108 | demuxedArev: 109 | type: File 110 | outputBinding: 111 | glob: $(inputs.dataset).$(inputs.reads.name).$(inputs.reads.barcodeids[0]).r2.fq.gz 112 | demuxedBfwd: 113 | type: File 114 | outputBinding: 115 | glob: $(inputs.dataset).$(inputs.reads.name).$(inputs.reads.barcodeids[1]).r1.fq.gz 116 | demuxedBrev: 117 | type: File 118 | outputBinding: 119 | glob: $(inputs.dataset).$(inputs.reads.name).$(inputs.reads.barcodeids[1]).r2.fq.gz 120 | 121 | output_demuxedpairedend_metrics: 122 | type: File 123 | outputBinding: 124 | glob: $(inputs.dataset).$(inputs.reads.name).---.--.metrics 125 | label: "" 126 | doc: "demuxedpairedend metrics" 127 | 128 | doc: | 129 | demultiplex utility for paired-end eCLIP raw fastq files (process eCLIP barcodes and ramdomers) 130 | See: https://github.com/YeoLab/eclipdemux for full code and documentation 131 | Usage: eclipdemux --dataset DATASET_ID --metrics METRICS_FILE --fastq_1 READ_1 --fastq_2 READ_2 --expectedbarcodeida BARCODE_A --expectedbarcodeidb BARCODE_B --barcodesfile BARCODES_FASTA --length LENGTH 132 | -------------------------------------------------------------------------------- /cwl/demux_se.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### doc: "Doesn't actually demultiplex!!!" ### 4 | ### just trims the first 10 bases, but named as such to match the demux_pe step ### 5 | 6 | cwlVersion: v1.0 7 | class: CommandLineTool 8 | 9 | requirements: 10 | - class: InlineJavascriptRequirement 11 | - class: ResourceRequirement 12 | coresMin: 1 13 | ramMin: 8000 14 | 15 | hints: 16 | - class: DockerRequirement 17 | dockerPull: brianyee/umi_tools:1.0.0 18 | 19 | baseCommand: [umi_tools, extract] 20 | arguments: ["--random-seed", "1"] 21 | inputs: 22 | 23 | # stdin: 24 | # type: File 25 | # inputBinding: 26 | # position: 1 27 | # prefix: --stdin 28 | 29 | bc_pattern: 30 | type: string 31 | default: "NNNNNNNNNN" 32 | inputBinding: 33 | position: 2 34 | prefix: --bc-pattern 35 | doc: "10 nt randomer" 36 | 37 | log: 38 | type: string 39 | default: "" 40 | inputBinding: 41 | position: 3 42 | prefix: --log 43 | valueFrom: | 44 | ${ 45 | if (inputs.log == "") { 46 | return inputs.dataset + "." + inputs.reads.name + ".---.--.metrics"; 47 | } 48 | else { 49 | return inputs.log; 50 | } 51 | } 52 | 53 | dataset: 54 | type: string 55 | inputBinding: 56 | position: 4 57 | 58 | stdout: 59 | type: string 60 | default: "" 61 | inputBinding: 62 | position: 4 63 | prefix: --stdout 64 | valueFrom: | 65 | ${ 66 | if (inputs.stdout == "") { 67 | return inputs.dataset + "." + inputs.reads.name + ".umi.r1.fq"; 68 | } 69 | else { 70 | return inputs.stdout; 71 | } 72 | } 73 | 74 | reads: 75 | type: 76 | type: record 77 | fields: 78 | read1: 79 | type: File 80 | inputBinding: 81 | position: 1 82 | prefix: --stdin 83 | name: 84 | type: string 85 | 86 | outputs: 87 | 88 | demuxedAfwd: 89 | type: File 90 | outputBinding: 91 | glob: $(inputs.dataset).$(inputs.reads.name).umi.r1.fq 92 | 93 | output_demuxedsingleend_metrics: 94 | type: File 95 | outputBinding: 96 | glob: $(inputs.dataset).$(inputs.reads.name).---.--.metrics 97 | label: "" 98 | doc: "demuxed se metrics" 99 | 100 | output_dataset: 101 | type: string 102 | outputBinding: 103 | loadContents: true 104 | outputEval: $(inputs.dataset) 105 | doc: "just passes output dataset string to output to match with PE demux" 106 | 107 | name: 108 | type: string 109 | outputBinding: 110 | loadContents: true 111 | outputEval: $(inputs.reads.name) 112 | doc: "just passes output name string to output to match with PE demux" 113 | 114 | # prefix: 115 | # type: string 116 | # outputBinding: 117 | # loadContents: true 118 | # outputEval: $(inputs.dataset).$(inputs.reads.name) 119 | # doc: "added to make the renaming step easier" 120 | 121 | doc: | 122 | Extract UMI barcode from a read and add it to the read name, leaving 123 | any sample barcode in place. Can deal with paired end reads and UMIs 124 | split across the paired ends. For eCLIP single-end processing, this step just 125 | trims the first 10 bases, but named as such to match the demux_pe step. 126 | 127 | Usage: umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS] 128 | -------------------------------------------------------------------------------- /cwl/demux_targeted_mir_se.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### doc: "Doesn't actually demultiplex!!!" ### 4 | ### just trims the first 10 bases, but named as such to match the demux_pe step ### 5 | 6 | cwlVersion: v1.0 7 | class: CommandLineTool 8 | 9 | requirements: 10 | - class: InlineJavascriptRequirement 11 | - class: ResourceRequirement 12 | coresMin: 1 13 | 14 | hints: 15 | - class: DockerRequirement 16 | dockerPull: brianyee/umi_tools:1.0.0 17 | 18 | baseCommand: [cat] 19 | inputs: 20 | 21 | dataset: 22 | type: string 23 | 24 | reads: 25 | type: 26 | type: record 27 | fields: 28 | read1: 29 | type: File 30 | inputBinding: 31 | position: 1 32 | name: 33 | type: string 34 | 35 | stdout: 36 | type: string 37 | default: "" 38 | inputBinding: 39 | position: 2 40 | valueFrom: | 41 | ${ 42 | if (inputs.stdout == "") { 43 | return inputs.dataset + "." + inputs.reads.name + ".umi.r1.fq"; 44 | } 45 | else { 46 | return inputs.stdout; 47 | } 48 | } 49 | 50 | 51 | 52 | outputs: 53 | 54 | demuxedAfwd: 55 | type: File 56 | outputBinding: 57 | glob: $(inputs.dataset).$(inputs.reads.name).umi.r1.fq 58 | 59 | output_demuxedsingleend_metrics: 60 | type: File 61 | outputBinding: 62 | glob: $(inputs.dataset).$(inputs.reads.name).---.--.metrics 63 | label: "" 64 | doc: "demuxed se metrics" 65 | 66 | output_dataset: 67 | type: string 68 | outputBinding: 69 | loadContents: true 70 | outputEval: $(inputs.dataset) 71 | doc: "just passes output dataset string to output to match with PE demux" 72 | 73 | name: 74 | type: string 75 | outputBinding: 76 | loadContents: true 77 | outputEval: $(inputs.reads.name) 78 | doc: "just passes output name string to output to match with PE demux" 79 | 80 | # prefix: 81 | # type: string 82 | # outputBinding: 83 | # loadContents: true 84 | # outputEval: $(inputs.dataset).$(inputs.reads.name) 85 | # doc: "added to make the renaming step easier" 86 | 87 | doc: | 88 | Extract UMI barcode from a read and add it to the read name, leaving 89 | any sample barcode in place. Can deal with paired end reads and UMIs 90 | split across the paired ends. For eCLIP single-end processing, this step just 91 | trims the first 10 bases, but named as such to match the demux_pe step. 92 | 93 | Usage: umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS] 94 | -------------------------------------------------------------------------------- /cwl/fastqc.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: ResourceRequirement 9 | coresMin: 2 10 | ramMin: 8000 11 | 12 | hints: 13 | - class: DockerRequirement 14 | dockerPull: brianyee/fastqc:0.11.8 15 | 16 | baseCommand: [fastqc, -t, "2", --extract, -k, "7"] 17 | 18 | inputs: 19 | 20 | output_postfix: 21 | type: string 22 | default: . 23 | inputBinding: 24 | position: 1 25 | prefix: -o 26 | label: "" 27 | doc: "" 28 | 29 | reads: 30 | type: File 31 | inputBinding: 32 | position: 1 33 | label: "" 34 | doc: "" 35 | 36 | outputs: 37 | 38 | output_qc_report: 39 | type: File 40 | outputBinding: 41 | glob: "*/fastqc_report.html" 42 | 43 | output_qc_stats: 44 | type: File 45 | outputBinding: 46 | glob: "*/fastqc_data.txt" 47 | -------------------------------------------------------------------------------- /cwl/fastqsort.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### doc: "Sorts fastq file by read name." ### 4 | 5 | cwlVersion: v1.0 6 | class: CommandLineTool 7 | 8 | requirements: 9 | - class: InlineJavascriptRequirement 10 | - class: ResourceRequirement 11 | coresMin: 1 12 | ramMin: 8000 13 | 14 | hints: 15 | - class: DockerRequirement 16 | dockerPull: brianyee/fastq-tools:0.8 17 | 18 | baseCommand: [fastq-sort] 19 | 20 | #hints: 21 | # 22 | # - class: ex:ScriptRequirement 23 | # scriptlines: 24 | # - "#!/bin/bash" 25 | 26 | inputs: 27 | 28 | input_fastqsort_fastq: 29 | type: File 30 | # format: http://edamontology.org/format_1930 31 | inputBinding: 32 | position: 1 33 | prefix: --id 34 | label: "" 35 | doc: "input fastq" 36 | 37 | output_fastqsort_fastq: 38 | type: string 39 | default: "" 40 | 41 | # stdout: $(inputs.input_fastqsort_fastq.basename)So.fq 42 | stdout: ${ 43 | if (inputs.output_fastqsort_fastq == "") { 44 | return inputs.input_fastqsort_fastq.nameroot + ".sorted.fq"; 45 | } 46 | else { 47 | return inputs.output_fastqsort_fastq; 48 | } 49 | } 50 | 51 | outputs: 52 | 53 | output_fastqsort_sortedfastq: 54 | type: File 55 | # format: http://edamontology.org/format_1930 56 | outputBinding: 57 | # glob: $(inputs.output_fastqsort_filename) 58 | # glob: $(inputs.input_fastqsort_fastq.basename)So.fq 59 | glob: | 60 | ${ 61 | if (inputs.output_fastqsort_fastq == "") { 62 | return inputs.input_fastqsort_fastq.nameroot + ".sorted.fq"; 63 | } 64 | else { 65 | return inputs.output_fastqsort_fastq; 66 | } 67 | } 68 | label: "" 69 | doc: "sorted fastq" 70 | 71 | doc: | 72 | Sorts FASTQ files by their read name. Sorted fastq files are required to keep mapping steps 73 | deterministic. 74 | 75 | Usage: fastq-sort --id FASTQ_FILE > STDOUT 76 | -------------------------------------------------------------------------------- /cwl/file2string.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### doc: "returns string expression based on file contents" ### 4 | 5 | cwlVersion: v1.0 6 | class: ExpressionTool 7 | 8 | requirements: 9 | - class: InlineJavascriptRequirement 10 | 11 | inputs: 12 | file: 13 | type: File 14 | inputBinding: 15 | loadContents: true 16 | 17 | outputs: 18 | output: 19 | type: string 20 | 21 | expression: "${return {'output':inputs.file.contents}; }" 22 | 23 | doc: | 24 | Returns string expression based on file contents. -------------------------------------------------------------------------------- /cwl/file2stringArray.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### doc: "Returns string array expression based on lines in a file" ### 4 | 5 | cwlVersion: v1.0 6 | class: ExpressionTool 7 | 8 | requirements: 9 | - class: InlineJavascriptRequirement 10 | 11 | inputs: 12 | file: 13 | type: File 14 | inputBinding: 15 | loadContents: true 16 | 17 | outputs: 18 | output: 19 | type: string[] 20 | 21 | expression: "${var lines=inputs.file.contents.split('\\n'); 22 | var seqs = []; 23 | for(var line = 0; line < lines.length; line++) { 24 | if(lines[line][0] != '>') { 25 | if (!lines[line] || 0 === lines[line].length) { 26 | 27 | } 28 | else { 29 | seqs.push(lines[line]); 30 | } 31 | } 32 | } 33 | return {'output':seqs}; 34 | }" 35 | 36 | doc: | 37 | Returns string array expression based on lines in a fasta file (SKIPS >). -------------------------------------------------------------------------------- /cwl/fix_bed_for_bigbed_conversion.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | ### doc: "Fixes a BED file" ### 4 | 5 | cwlVersion: v1.0 6 | class: CommandLineTool 7 | 8 | requirements: 9 | - class: ResourceRequirement 10 | coresMin: 1 11 | ramMin: 8000 12 | 13 | hints: 14 | - class: DockerRequirement 15 | dockerPull: brianyee/eclip:0.7.0_python 16 | 17 | baseCommand: [fix_bed_for_bigbed_conversion.py] 18 | 19 | inputs: 20 | 21 | input_bed: 22 | type: File 23 | inputBinding: 24 | position: 1 25 | prefix: --input_bed 26 | label: "" 27 | doc: "input bed (eCLIP input-normalized format) to be fixed (ie. change col4 to string, col5 to integer) for bigbed conversion" 28 | 29 | arguments: [ 30 | "--output_fixed_bed", 31 | $(inputs.input_bed.nameroot).fx.bed 32 | ] 33 | 34 | outputs: 35 | 36 | output_fixed_bed: 37 | type: File 38 | outputBinding: 39 | glob: $(inputs.input_bed.nameroot).fx.bed 40 | label: "" 41 | doc: "eCLIP peaks in proper BED6 format" 42 | 43 | doc: | 44 | This tool fixes the eCLIP input-normalized format to the proper BED6 format prior to bigbed conversion. 45 | -------------------------------------------------------------------------------- /cwl/gzip.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | baseCommand: [gzip] 8 | 9 | inputs: 10 | 11 | stdout: 12 | type: boolean 13 | inputBinding: 14 | position: 1 15 | prefix: -c 16 | default: true 17 | 18 | input: 19 | type: File 20 | inputBinding: 21 | position: 2 22 | 23 | stdout: $(inputs.input.basename).gz 24 | 25 | outputs: 26 | 27 | gzipped: 28 | type: File 29 | outputBinding: 30 | glob: $(inputs.input.basename).gz 31 | -------------------------------------------------------------------------------- /cwl/index.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: ResourceRequirement 9 | coresMin: 1 10 | ramMin: 4000 11 | 12 | hints: 13 | - class: DockerRequirement 14 | dockerPull: brianyee/samtools:1.6 15 | 16 | baseCommand: [samtools, index] 17 | 18 | inputs: 19 | 20 | input_index_bam: 21 | type: File 22 | inputBinding: 23 | position: -1 24 | label: "" 25 | doc: "input bam to index" 26 | 27 | arguments: [ $(inputs.input_index_bam.basename).bai ] 28 | 29 | outputs: 30 | 31 | output_index_bai: 32 | type: File 33 | outputBinding: 34 | glob: $(inputs.input_index_bam.basename).bai 35 | label: "" 36 | doc: "index" 37 | 38 | doc: | 39 | Indexes a bam file (should be deprecated by samtools-index.cwl so kept for legacy), 40 | with the difference being that this tool returns the *.bai index while the other 41 | returns a BAM file object containing an index file as a secondaryFile. 42 | 43 | Usage: samtools index 44 | -------------------------------------------------------------------------------- /cwl/makebigwigfiles.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: ResourceRequirement 9 | coresMin: 1 10 | ramMin: 8000 11 | - class: InitialWorkDirRequirement 12 | listing: 13 | - entry: $(inputs.bam) 14 | writable: true 15 | hints: 16 | - class: DockerRequirement 17 | dockerPull: brianyee/makebigwigfiles:0.0.3 18 | 19 | baseCommand: [makebigwigfiles] 20 | 21 | arguments: [ 22 | --bw_pos, 23 | $(inputs.bam.nameroot).norm.pos.bw, 24 | --bw_neg, 25 | $(inputs.bam.nameroot).norm.neg.bw 26 | ] 27 | 28 | inputs: 29 | 30 | bam: 31 | type: File 32 | inputBinding: 33 | position: 1 34 | prefix: --bam 35 | # secondaryFiles: [.bai] 36 | 37 | chromsizes: 38 | type: File 39 | inputBinding: 40 | position: 3 41 | prefix: --genome 42 | 43 | direction: 44 | default: f 45 | type: string 46 | inputBinding: 47 | position: 4 48 | prefix: --direction 49 | 50 | outputs: 51 | 52 | posbw: 53 | type: File 54 | outputBinding: 55 | glob: $(inputs.bam.nameroot).norm.pos.bw 56 | 57 | negbw: 58 | type: File 59 | outputBinding: 60 | glob: $(inputs.bam.nameroot).norm.neg.bw 61 | 62 | doc: | 63 | Creates strand-specific bigwig files from a BAM file. 64 | See original script here: https://github.com/YeoLab/gscripts/blob/master/gscripts/general/make_bigwig_files_pe.py 65 | Usage: makebigwigfiles --bam BAM --genome GENOME --dont_flip --bw_pos --bw_neg 66 | -------------------------------------------------------------------------------- /cwl/makebigwigfiles_PE.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: ResourceRequirement 9 | coresMin: 1 10 | ramMin: 8000 11 | - class: InitialWorkDirRequirement 12 | listing: 13 | - entry: $(inputs.bam) 14 | writable: true 15 | 16 | hints: 17 | - class: DockerRequirement 18 | dockerPull: brianyee/makebigwigfiles:0.0.3 19 | 20 | baseCommand: [makebigwigfiles] 21 | 22 | arguments: [ 23 | --bw_pos, 24 | $(inputs.bam.nameroot).norm.pos.bw, 25 | --bw_neg, 26 | $(inputs.bam.nameroot).norm.neg.bw 27 | ] 28 | 29 | inputs: 30 | 31 | bam: 32 | type: File 33 | inputBinding: 34 | position: 1 35 | prefix: --bam 36 | # secondaryFiles: [.bai] 37 | 38 | chromsizes: 39 | type: File 40 | inputBinding: 41 | position: 3 42 | prefix: --genome 43 | 44 | direction: 45 | default: r 46 | type: string 47 | inputBinding: 48 | position: 4 49 | prefix: --direction 50 | 51 | outputs: 52 | 53 | posbw: 54 | type: File 55 | outputBinding: 56 | glob: $(inputs.bam.nameroot).norm.pos.bw 57 | 58 | negbw: 59 | type: File 60 | outputBinding: 61 | glob: $(inputs.bam.nameroot).norm.neg.bw 62 | 63 | doc: | 64 | Creates strand-specific bigwig files from a BAM file. 65 | See original script here: https://github.com/YeoLab/gscripts/blob/master/gscripts/general/make_bigwig_files_pe.py 66 | Usage: makebigwigfiles --bam BAM --genome GENOME --dont_flip --bw_pos --bw_neg 67 | -------------------------------------------------------------------------------- /cwl/makebigwigfiles_SE.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: ResourceRequirement 9 | coresMin: 1 10 | ramMin: 8000 11 | - class: InitialWorkDirRequirement 12 | listing: 13 | - entry: $(inputs.bam) 14 | writable: true 15 | 16 | hints: 17 | - class: DockerRequirement 18 | dockerPull: brianyee/makebigwigfiles:0.0.3 19 | 20 | baseCommand: [makebigwigfiles] 21 | 22 | arguments: [ 23 | --bw_pos, 24 | $(inputs.bam.nameroot).norm.pos.bw, 25 | --bw_neg, 26 | $(inputs.bam.nameroot).norm.neg.bw 27 | ] 28 | 29 | inputs: 30 | 31 | bam: 32 | type: File 33 | inputBinding: 34 | position: 1 35 | prefix: --bam 36 | # secondaryFiles: [.bai] 37 | 38 | chromsizes: 39 | type: File 40 | inputBinding: 41 | position: 3 42 | prefix: --genome 43 | 44 | direction: 45 | default: f 46 | type: string 47 | inputBinding: 48 | position: 4 49 | prefix: --direction 50 | 51 | outputs: 52 | 53 | posbw: 54 | type: File 55 | outputBinding: 56 | glob: $(inputs.bam.nameroot).norm.pos.bw 57 | 58 | negbw: 59 | type: File 60 | outputBinding: 61 | glob: $(inputs.bam.nameroot).norm.neg.bw 62 | 63 | doc: | 64 | Creates strand-specific bigwig files from a BAM file. 65 | See original script here: https://github.com/YeoLab/gscripts/blob/master/gscripts/general/make_bigwig_files_pe.py 66 | Usage: makebigwigfiles --bam BAM --genome GENOME --dont_flip --bw_pos --bw_neg 67 | -------------------------------------------------------------------------------- /cwl/namesort.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### doc: "samtools sort tool (sort by name)" ### 4 | 5 | ### This is a copy of sort.cwl, ### 6 | ### exists in case TOIL mistakes namesorting with regular sorting ### 7 | ### Changes: name_sort flag is TRUE by default ### 8 | 9 | cwlVersion: v1.0 10 | class: CommandLineTool 11 | 12 | requirements: 13 | - class: InlineJavascriptRequirement 14 | - class: ResourceRequirement 15 | coresMin: 1 16 | ramMin: 16000 17 | 18 | hints: 19 | - class: DockerRequirement 20 | dockerPull: brianyee/samtools:1.6 21 | 22 | baseCommand: [samtools, sort] 23 | 24 | inputs: 25 | 26 | name_sort: 27 | type: boolean 28 | inputBinding: 29 | position: 1 30 | prefix: -n 31 | default: true 32 | 33 | output_file: 34 | type: string 35 | inputBinding: 36 | position: 2 37 | prefix: -o 38 | valueFrom: | 39 | ${ 40 | if (inputs.output_file == "") { 41 | return inputs.input_sort_bam.nameroot + "So.bam"; 42 | } 43 | else { 44 | return inputs.output_file; 45 | } 46 | } 47 | default: "" 48 | 49 | input_sort_bam: 50 | type: File 51 | inputBinding: 52 | position: 3 53 | label: "" 54 | doc: "input bam" 55 | 56 | outputs: 57 | 58 | output_sort_bam: 59 | type: File 60 | outputBinding: 61 | glob: | 62 | ${ 63 | if (inputs.output_file == "") { 64 | return inputs.input_sort_bam.nameroot + "So.bam"; 65 | } 66 | else { 67 | return inputs.output_file; 68 | } 69 | } 70 | label: "" 71 | doc: "sorted bam" 72 | 73 | doc: | 74 | This tool wraps samtools sort, setting the by-name (-n) flag to be True by default. 75 | Usage: samtools sort -n 76 | -------------------------------------------------------------------------------- /cwl/overlap_peakfi_with_bam.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: InlineJavascriptRequirement 9 | - class: ResourceRequirement 10 | coresMin: 1 11 | ramMin: 8000 12 | 13 | hints: 14 | - class: DockerRequirement 15 | dockerPull: brianyee/eclip:0.7.0_perl 16 | 17 | baseCommand: [overlap_peakfi_with_bam.pl] 18 | 19 | inputs: 20 | 21 | # IP BAM file 22 | clipBamFile: 23 | type: File 24 | inputBinding: 25 | position: -5 26 | 27 | inputBamFile: 28 | type: File 29 | inputBinding: 30 | position: -4 31 | 32 | peakFile: 33 | type: File 34 | inputBinding: 35 | position: -3 36 | 37 | # mapped_read_num 38 | clipReadnum: 39 | type: File 40 | inputBinding: 41 | position: -2 42 | 43 | #mapped_read_num" 44 | inputReadnum: 45 | type: File 46 | inputBinding: 47 | position: -1 48 | 49 | outputFile: 50 | type: string 51 | default: "" 52 | inputBinding: 53 | position: 0 54 | valueFrom: | 55 | ${ 56 | if (inputs.outputFile == "") { 57 | return inputs.peakFile.nameroot + ".normed.bed"; 58 | } 59 | else { 60 | return inputs.outputFile; 61 | } 62 | } 63 | 64 | outputs: 65 | 66 | inputnormedBed: 67 | type: File 68 | outputBinding: 69 | glob: | 70 | ${ 71 | if (inputs.outputFile == "") { 72 | return inputs.peakFile.nameroot + ".normed.bed"; 73 | } 74 | else { 75 | return inputs.outputFile; 76 | } 77 | } 78 | 79 | inputnormedBedfull: 80 | type: File 81 | outputBinding: 82 | glob: | 83 | ${ 84 | if (inputs.outputFile == "") { 85 | return inputs.peakFile.nameroot + ".normed.bed.full"; 86 | } 87 | else { 88 | return inputs.outputFile; 89 | } 90 | } 91 | 92 | doc: | 93 | This tool wraps overlap_peakfi_with_bam.pl 94 | Usage: 95 | -------------------------------------------------------------------------------- /cwl/overlap_peakfi_with_bam_PE.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: InlineJavascriptRequirement 9 | - class: ResourceRequirement 10 | coresMin: 1 11 | ramMin: 8000 12 | 13 | hints: 14 | - class: DockerRequirement 15 | dockerPull: brianyee/eclip:0.7.0_perl 16 | 17 | baseCommand: [overlap_peakfi_with_bam_PE.pl] 18 | 19 | inputs: 20 | 21 | # IP BAM file 22 | clipBamFile: 23 | type: File 24 | inputBinding: 25 | position: -5 26 | 27 | inputBamFile: 28 | type: File 29 | inputBinding: 30 | position: -4 31 | 32 | peakFile: 33 | type: File 34 | inputBinding: 35 | position: -3 36 | 37 | # mapped_read_num 38 | clipReadnum: 39 | type: File 40 | inputBinding: 41 | position: -2 42 | 43 | #mapped_read_num" 44 | inputReadnum: 45 | type: File 46 | inputBinding: 47 | position: -1 48 | 49 | outputFile: 50 | type: string 51 | default: "" 52 | inputBinding: 53 | position: 0 54 | valueFrom: | 55 | ${ 56 | if (inputs.outputFile == "") { 57 | return inputs.peakFile.nameroot + ".normed.bed"; 58 | } 59 | else { 60 | return inputs.outputFile; 61 | } 62 | } 63 | 64 | outputs: 65 | 66 | inputnormedBed: 67 | type: File 68 | outputBinding: 69 | glob: | 70 | ${ 71 | if (inputs.outputFile == "") { 72 | return inputs.peakFile.nameroot + ".normed.bed"; 73 | } 74 | else { 75 | return inputs.outputFile; 76 | } 77 | } 78 | 79 | inputnormedBedfull: 80 | type: File 81 | outputBinding: 82 | glob: | 83 | ${ 84 | if (inputs.outputFile == "") { 85 | return inputs.peakFile.nameroot + ".normed.bed.full"; 86 | } 87 | else { 88 | return inputs.outputFile; 89 | } 90 | } 91 | 92 | doc: | 93 | This tool wraps overlap_peakfi_with_bam_PE.pl 94 | Usage: 95 | -------------------------------------------------------------------------------- /cwl/parsebarcodes.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: ResourceRequirement 9 | coresMin: 1 10 | ramMin: 1000 11 | 12 | baseCommand: [parsebarcodes.sh] 13 | 14 | hints: 15 | - class: DockerRequirement 16 | dockerPull: brianyee/eclip:0.7.0_perl 17 | 18 | inputs: 19 | 20 | # these are now hard-coded in parser.sh 21 | # adapter3prime: 22 | # type: string 23 | # optional: true 24 | # default: AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC 25 | # adapter5prime: 26 | # type: string 27 | # optional: true 28 | # default : CTTCCGATCT 29 | 30 | randomer_length: 31 | type: string 32 | default: "10" 33 | inputBinding: 34 | position: 1 35 | doc: "randomer length: now normally 10, some old experiment used 5" 36 | 37 | barcodesfasta: 38 | type: File 39 | inputBinding: 40 | position: 2 41 | 42 | barcodeidA: 43 | type: string 44 | inputBinding: 45 | position: 3 46 | 47 | barcodeidB: 48 | type: string 49 | inputBinding: 50 | position: 4 51 | 52 | outputs: 53 | 54 | a_adapters_default: 55 | type: File 56 | outputBinding: 57 | glob: a_adapters_default.fasta 58 | 59 | g_adapters_default: 60 | type: File 61 | outputBinding: 62 | glob: g_adapters_default.fasta 63 | 64 | a_adapters: 65 | type: File 66 | outputBinding: 67 | glob: a_adapters.fasta 68 | 69 | g_adapters: 70 | type: File 71 | outputBinding: 72 | glob: g_adapters.fasta 73 | 74 | A_adapters: 75 | type: File 76 | outputBinding: 77 | glob: A_adapters.fasta 78 | 79 | trimfirst_overlap_length: 80 | type: File 81 | outputBinding: 82 | glob: trimfirst_overlap_length.txt 83 | 84 | trimagain_overlap_length: 85 | type: File 86 | outputBinding: 87 | glob: trimagain_overlap_length.txt 88 | 89 | doc: | 90 | This tool wraps parsebarcodes.sh. 91 | 92 | We have observed occasional double ligation events on the 5’ end of Read1, and we have found 93 | that to fix this requires we run cutadapt twice. Additionally, because two adapters are used for 94 | each library (to ensure proper balancing on the Illumina sequencer), two separate barcodes may 95 | be ligated to the same Read1 5’ end (often with 5’ truncations). To fix this we split the barcodes 96 | up into 15bp chunks so that cutadapt is able to deconvolute barcode adapters properly (as by 97 | default it will not find adapters missing the first N bases of the adapter sequence) 98 | 99 | parsebarcodes.sh writes the following files: 100 | trimfirst_overlap_length.txt : file that always contains "1" 101 | trimagain_overlap_length.txt : file that contains max((length of longest barcode - 2),5) 102 | g_adapters_default.fasta : empty file (to be fed to cutadapt properly) 103 | a_adapters_default.fasta : empty file (to be fed to cutadapt properly) 104 | g_adapters.fasta : fasta file containing sequences to be trimmed via cutadapt -g flag 105 | a_adapters.fasta : fasta file containing sequences to be trimmed via cutadapt -a flag 106 | A_adapters.fasta : fasta file containing sequences to be trimmed via cutadapt -A flag 107 | 108 | Usage: parsebarcodes.sh 109 | -------------------------------------------------------------------------------- /cwl/peakscompress.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: ResourceRequirement 9 | coresMin: 1 10 | ramMin: 8000 11 | 12 | hints: 13 | - class: DockerRequirement 14 | dockerPull: brianyee/eclip:0.6.0a_perl 15 | 16 | baseCommand: [compress_l2foldenrpeakfi_for_replicate_overlapping_bedformat.pl] 17 | 18 | arguments: [ $(inputs.input_bed.nameroot).compressed.bed ] 19 | 20 | inputs: 21 | 22 | input_bed: 23 | type: File 24 | inputBinding: 25 | position: -1 26 | 27 | outputs: 28 | 29 | output_bed: 30 | type: File 31 | outputBinding: 32 | glob: $(inputs.input_bed.nameroot).compressed.bed 33 | 34 | doc: | 35 | This tool wraps compress_l2foldenrpeakfi_for_replicate_overlapping_bedformat.pl, 36 | which merges neighboring or overlapping regions in a BED file. 37 | Usage: perl compress_l2foldenrpeakfi_for_replicate_overlapping_bedformat.pl 38 | -------------------------------------------------------------------------------- /cwl/rename.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | class: CommandLineTool 4 | 5 | cwlVersion: v1.0 6 | 7 | requirements: 8 | InitialWorkDirRequirement: 9 | listing: 10 | - entryname: $(inputs.newname + inputs.suffix) 11 | entry: $(inputs.srcfile) 12 | 13 | baseCommand: "true" 14 | 15 | inputs: 16 | srcfile: File 17 | 18 | suffix: string 19 | 20 | newname: string 21 | 22 | outputs: 23 | outfile: 24 | type: File 25 | outputBinding: 26 | glob: $(inputs.newname + inputs.suffix) 27 | -------------------------------------------------------------------------------- /cwl/samtools-index.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | ### doc: "Indexes input alignments and returns alignment with index." ### 4 | ### Differs from index.cwl in that index.cwl returns just index ### 5 | ### This tool returns alignments with index as secondaryFile ### 6 | 7 | cwlVersion: v1.0 8 | 9 | class: CommandLineTool 10 | 11 | hints: 12 | - class: DockerRequirement 13 | dockerPull: brianyee/samtools:1.6 14 | 15 | requirements: 16 | InitialWorkDirRequirement: 17 | listing: [ $(inputs.alignments) ] 18 | 19 | inputs: 20 | alignments: 21 | type: File 22 | inputBinding: 23 | position: 2 24 | valueFrom: $(self.basename) 25 | label: Input bam file. 26 | 27 | baseCommand: [samtools, index, -b] 28 | 29 | outputs: 30 | alignments_with_index: 31 | type: File 32 | secondaryFiles: .bai 33 | outputBinding: 34 | glob: $(inputs.alignments.basename) 35 | 36 | 37 | doc: The index file 38 | 39 | s:mainEntity: 40 | # $import: samtools-metadata.yaml 41 | 42 | s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-index.cwl 43 | s:codeRepository: https://github.com/common-workflow-language/workflows 44 | s:license: http://www.apache.org/licenses/LICENSE-2.0 45 | 46 | s:isPartOf: 47 | class: s:CreativeWork 48 | s:name: Common Workflow Language 49 | s:url: http://commonwl.org/ 50 | 51 | s:author: 52 | class: s:Person 53 | s:name: Andrey Kartashov 54 | s:email: mailto:Andrey.Kartashov@cchmc.org 55 | s:sameAs: 56 | - id: http://orcid.org/0000-0001-9102-5681 57 | s:worksFor: 58 | - class: s:Organization 59 | s:name: Cincinnati Children's Hospital Medical Center 60 | s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026 61 | s:department: 62 | - class: s:Organization 63 | s:name: Barski Lab 64 | doc: | 65 | samtools-index.cwl is developed for CWL consortium 66 | 67 | -------------------------------------------------------------------------------- /cwl/samtools-mappedreadnum.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | ### doc: "Returns a file containing the number of mapped reads in a BAM." ### 4 | ### Copy of samtools-view.cwl, except for changes due to bugs in TOIL ### 5 | ### readswithoutbits is by default 4 and NOT optional. ### 6 | ### count is by default set to true. ### 7 | 8 | cwlVersion: v1.0 9 | class: CommandLineTool 10 | 11 | requirements: 12 | - class: InlineJavascriptRequirement 13 | - class: ResourceRequirement 14 | coresMin: 1 15 | 16 | hints: 17 | - class: DockerRequirement 18 | dockerPull: brianyee/samtools:1.6 19 | 20 | inputs: 21 | isbam: 22 | type: boolean 23 | default: false 24 | inputBinding: 25 | position: 2 26 | prefix: -b 27 | doc: | 28 | output in BAM format 29 | readswithoutbits: 30 | type: int 31 | default: 4 32 | inputBinding: 33 | position: 1 34 | prefix: -F 35 | doc: | 36 | only include reads with none of the bits set in INT set in FLAG [0] 37 | collapsecigar: 38 | type: boolean 39 | default: false 40 | inputBinding: 41 | position: 1 42 | prefix: -B 43 | doc: | 44 | collapse the backward CIGAR operation 45 | readsingroup: 46 | type: string? 47 | inputBinding: 48 | position: 1 49 | prefix: -r 50 | doc: | 51 | only include reads in read group STR [null] 52 | bedoverlap: 53 | type: File? 54 | inputBinding: 55 | position: 1 56 | prefix: -L 57 | doc: | 58 | only include reads overlapping this BED FILE [null] 59 | uncompressed: 60 | type: boolean 61 | default: false 62 | inputBinding: 63 | position: 1 64 | prefix: -u 65 | doc: | 66 | uncompressed BAM output (implies -b) 67 | readtagtostrip: 68 | type: string[]? 69 | inputBinding: 70 | position: 1 71 | 72 | doc: | 73 | read tag to strip (repeatable) [null] 74 | input: 75 | type: File 76 | inputBinding: 77 | position: 4 78 | 79 | doc: | 80 | Input bam file. 81 | readsquality: 82 | type: int? 83 | inputBinding: 84 | position: 1 85 | prefix: -q 86 | doc: | 87 | only include reads with mapping quality >= INT [0] 88 | readswithbits: 89 | type: int? 90 | inputBinding: 91 | position: 1 92 | prefix: -f 93 | doc: | 94 | only include reads with all bits set in INT set in FLAG [0] 95 | cigar: 96 | type: int? 97 | inputBinding: 98 | position: 1 99 | prefix: -m 100 | doc: | 101 | only include reads with number of CIGAR operations 102 | consuming query sequence >= INT [0] 103 | iscram: 104 | type: boolean 105 | default: false 106 | inputBinding: 107 | position: 2 108 | prefix: -C 109 | doc: | 110 | output in CRAM format 111 | threads: 112 | type: int? 113 | inputBinding: 114 | position: 1 115 | prefix: -@ 116 | doc: | 117 | number of BAM compression threads [0] 118 | fastcompression: 119 | type: boolean 120 | default: false 121 | inputBinding: 122 | position: 1 123 | prefix: '-1' 124 | doc: | 125 | use fast BAM compression (implies -b) 126 | samheader: 127 | type: boolean 128 | default: false 129 | inputBinding: 130 | position: 1 131 | prefix: -h 132 | doc: | 133 | include header in SAM output 134 | count: 135 | type: boolean 136 | default: true 137 | inputBinding: 138 | position: 1 139 | prefix: -c 140 | doc: | 141 | print only the count of matching records 142 | randomseed: 143 | type: float? 144 | inputBinding: 145 | position: 1 146 | prefix: -s 147 | doc: | 148 | integer part sets seed of random number generator [0]; 149 | rest sets fraction of templates to subsample [no subsampling] 150 | referencefasta: 151 | type: File? 152 | inputBinding: 153 | position: 1 154 | prefix: -T 155 | doc: | 156 | reference sequence FASTA FILE [null] 157 | region: 158 | type: string? 159 | inputBinding: 160 | position: 5 161 | 162 | doc: | 163 | [region ...] 164 | readsingroupfile: 165 | type: File? 166 | inputBinding: 167 | position: 1 168 | prefix: -R 169 | doc: | 170 | only include reads with read group listed in FILE [null] 171 | readsinlibrary: 172 | type: string? 173 | inputBinding: 174 | position: 1 175 | prefix: -l 176 | doc: | 177 | only include reads in library STR [null] 178 | output_name: 179 | type: string 180 | inputBinding: 181 | position: 2 182 | prefix: -o 183 | 184 | outputs: 185 | output: 186 | type: File 187 | outputBinding: 188 | glob: $(inputs.output_name) 189 | 190 | baseCommand: [samtools, view] 191 | 192 | s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-view.cwl 193 | s:codeRepository: https://github.com/common-workflow-language/workflows 194 | s:license: http://www.apache.org/licenses/LICENSE-2.0 195 | 196 | s:isPartOf: 197 | class: s:CreativeWork 198 | s:name: Common Workflow Language 199 | s:url: http://commonwl.org/ 200 | 201 | s:author: 202 | class: s:Person 203 | s:name: Andrey Kartashov 204 | s:email: mailto:Andrey.Kartashov@cchmc.org 205 | s:sameAs: 206 | - id: http://orcid.org/0000-0001-9102-5681 207 | s:worksFor: 208 | - class: s:Organization 209 | s:name: Cincinnati Children's Hospital Medical Center 210 | s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026 211 | s:department: 212 | - class: s:Organization 213 | s:name: Barski Lab 214 | doc: | 215 | samtools-view.cwl is developed for CWL consortium 216 | Usage: samtools view [options] || [region ...] 217 | 218 | Options: -b output BAM 219 | -C output CRAM (requires -T) 220 | -1 use fast BAM compression (implies -b) 221 | -u uncompressed BAM output (implies -b) 222 | -h include header in SAM output 223 | -H print SAM header only (no alignments) 224 | -c print only the count of matching records 225 | -o FILE output file name [stdout] 226 | -U FILE output reads not selected by filters to FILE [null] 227 | -t FILE FILE listing reference names and lengths (see long help) [null] 228 | -T FILE reference sequence FASTA FILE [null] 229 | -L FILE only include reads overlapping this BED FILE [null] 230 | -r STR only include reads in read group STR [null] 231 | -R FILE only include reads with read group listed in FILE [null] 232 | -q INT only include reads with mapping quality >= INT [0] 233 | -l STR only include reads in library STR [null] 234 | -m INT only include reads with number of CIGAR operations 235 | consuming query sequence >= INT [0] 236 | -f INT only include reads with all bits set in INT set in FLAG [0] 237 | -F INT only include reads with none of the bits set in INT 238 | set in FLAG [0] 239 | -x STR read tag to strip (repeatable) [null] 240 | -B collapse the backward CIGAR operation 241 | -s FLOAT integer part sets seed of random number generator [0]; 242 | rest sets fraction of templates to subsample [no subsampling] 243 | -@ INT number of BAM compression threads [0] 244 | 245 | -------------------------------------------------------------------------------- /cwl/samtools-merge.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: InlineJavascriptRequirement 9 | - class: ResourceRequirement 10 | coresMin: 1 11 | ramMin: 8000 12 | 13 | baseCommand: [samtools, merge] 14 | 15 | hints: 16 | - class: DockerRequirement 17 | dockerPull: brianyee/samtools:1.6 18 | 19 | inputs: 20 | 21 | output_bam: 22 | type: string 23 | default: "" 24 | inputBinding: 25 | position: 1 26 | valueFrom: | 27 | ${ 28 | if (inputs.output_bam == "") { 29 | return inputs.input_bam_files[0].nameroot + ".merged.bam"; 30 | } 31 | else { 32 | return inputs.output_bam; 33 | } 34 | } 35 | label: "" 36 | doc: "output merged bam file name" 37 | 38 | input_bam_files: 39 | type: File[] 40 | inputBinding: 41 | position: 2 42 | label: "" 43 | doc: "input unmerged bam files" 44 | 45 | outputs: 46 | 47 | output_bam_file: 48 | type: File 49 | outputBinding: 50 | glob: | 51 | ${ 52 | if (inputs.output_bam == "") { 53 | return inputs.input_bam_files[0].nameroot + ".merged.bam"; 54 | } 55 | else { 56 | return inputs.output_bam; 57 | } 58 | } 59 | label: "" 60 | doc: "output merged bam file" 61 | 62 | doc: | 63 | samtools-merge.cwl takes in a list of input_bam_files and 64 | returns a merged BAM file. 65 | 66 | Usage: samtools merge [-nurlf] [-h inh.sam] [-b ] [ ... ] 67 | -------------------------------------------------------------------------------- /cwl/samtools-view.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | ### doc: "Samtools view tool (https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-view.cwl)" ### 4 | 5 | cwlVersion: v1.0 6 | class: CommandLineTool 7 | 8 | requirements: 9 | - class: InlineJavascriptRequirement 10 | - class: ResourceRequirement 11 | coresMin: 1 12 | ramMin: 8000 13 | 14 | hints: 15 | - class: DockerRequirement 16 | dockerPull: brianyee/samtools:1.6 17 | 18 | inputs: 19 | isbam: 20 | type: boolean 21 | default: false 22 | inputBinding: 23 | position: 2 24 | prefix: -b 25 | doc: | 26 | output in BAM format 27 | readswithoutbits: 28 | type: int? 29 | inputBinding: 30 | position: 1 31 | prefix: -F 32 | doc: | 33 | only include reads with none of the bits set in INT set in FLAG [0] 34 | collapsecigar: 35 | type: boolean 36 | default: false 37 | inputBinding: 38 | position: 1 39 | prefix: -B 40 | doc: | 41 | collapse the backward CIGAR operation 42 | readsingroup: 43 | type: string? 44 | inputBinding: 45 | position: 1 46 | prefix: -r 47 | doc: | 48 | only include reads in read group STR [null] 49 | bedoverlap: 50 | type: File? 51 | inputBinding: 52 | position: 1 53 | prefix: -L 54 | doc: | 55 | only include reads overlapping this BED FILE [null] 56 | uncompressed: 57 | type: boolean 58 | default: false 59 | inputBinding: 60 | position: 1 61 | prefix: -u 62 | doc: | 63 | uncompressed BAM output (implies -b) 64 | readtagtostrip: 65 | type: string[]? 66 | inputBinding: 67 | position: 1 68 | 69 | doc: | 70 | read tag to strip (repeatable) [null] 71 | input: 72 | type: File 73 | inputBinding: 74 | position: 4 75 | 76 | doc: | 77 | Input bam file. 78 | readsquality: 79 | type: int? 80 | inputBinding: 81 | position: 1 82 | prefix: -q 83 | doc: | 84 | only include reads with mapping quality >= INT [0] 85 | readswithbits: 86 | type: int? 87 | inputBinding: 88 | position: 1 89 | prefix: -f 90 | doc: | 91 | only include reads with all bits set in INT set in FLAG [0] 92 | cigar: 93 | type: int? 94 | inputBinding: 95 | position: 1 96 | prefix: -m 97 | doc: | 98 | only include reads with number of CIGAR operations 99 | consuming query sequence >= INT [0] 100 | iscram: 101 | type: boolean 102 | default: false 103 | inputBinding: 104 | position: 2 105 | prefix: -C 106 | doc: | 107 | output in CRAM format 108 | threads: 109 | type: int? 110 | inputBinding: 111 | position: 1 112 | prefix: -@ 113 | doc: | 114 | number of BAM compression threads [0] 115 | fastcompression: 116 | type: boolean 117 | default: false 118 | inputBinding: 119 | position: 1 120 | prefix: '-1' 121 | doc: | 122 | use fast BAM compression (implies -b) 123 | samheader: 124 | type: boolean 125 | default: false 126 | inputBinding: 127 | position: 1 128 | prefix: -h 129 | doc: | 130 | include header in SAM output 131 | count: 132 | type: boolean 133 | default: false 134 | inputBinding: 135 | position: 1 136 | prefix: -c 137 | doc: | 138 | print only the count of matching records 139 | randomseed: 140 | type: float? 141 | inputBinding: 142 | position: 1 143 | prefix: -s 144 | doc: | 145 | integer part sets seed of random number generator [0]; 146 | rest sets fraction of templates to subsample [no subsampling] 147 | referencefasta: 148 | type: File? 149 | inputBinding: 150 | position: 1 151 | prefix: -T 152 | doc: | 153 | reference sequence FASTA FILE [null] 154 | region: 155 | type: string? 156 | inputBinding: 157 | position: 5 158 | 159 | doc: | 160 | [region ...] 161 | readsingroupfile: 162 | type: File? 163 | inputBinding: 164 | position: 1 165 | prefix: -R 166 | doc: | 167 | only include reads with read group listed in FILE [null] 168 | readsinlibrary: 169 | type: string? 170 | inputBinding: 171 | position: 1 172 | prefix: -l 173 | doc: | 174 | only include reads in library STR [null] 175 | output_name: 176 | type: string 177 | inputBinding: 178 | position: 2 179 | prefix: -o 180 | outputs: 181 | output: 182 | type: File 183 | outputBinding: 184 | glob: $(inputs.output_name) 185 | 186 | baseCommand: [samtools, view] 187 | 188 | # s:mainEntity: 189 | # $import: samtools-metadata.yaml 190 | 191 | s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-view.cwl 192 | s:codeRepository: https://github.com/common-workflow-language/workflows 193 | s:license: http://www.apache.org/licenses/LICENSE-2.0 194 | 195 | s:isPartOf: 196 | class: s:CreativeWork 197 | s:name: Common Workflow Language 198 | s:url: http://commonwl.org/ 199 | 200 | s:author: 201 | class: s:Person 202 | s:name: Andrey Kartashov 203 | s:email: mailto:Andrey.Kartashov@cchmc.org 204 | s:sameAs: 205 | - id: http://orcid.org/0000-0001-9102-5681 206 | s:worksFor: 207 | - class: s:Organization 208 | s:name: Cincinnati Children's Hospital Medical Center 209 | s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026 210 | s:department: 211 | - class: s:Organization 212 | s:name: Barski Lab 213 | doc: | 214 | samtools-view.cwl is developed for CWL consortium 215 | Usage: samtools view [options] || [region ...] 216 | 217 | Options: -b output BAM 218 | -C output CRAM (requires -T) 219 | -1 use fast BAM compression (implies -b) 220 | -u uncompressed BAM output (implies -b) 221 | -h include header in SAM output 222 | -H print SAM header only (no alignments) 223 | -c print only the count of matching records 224 | -o FILE output file name [stdout] 225 | -U FILE output reads not selected by filters to FILE [null] 226 | -t FILE FILE listing reference names and lengths (see long help) [null] 227 | -T FILE reference sequence FASTA FILE [null] 228 | -L FILE only include reads overlapping this BED FILE [null] 229 | -r STR only include reads in read group STR [null] 230 | -R FILE only include reads with read group listed in FILE [null] 231 | -q INT only include reads with mapping quality >= INT [0] 232 | -l STR only include reads in library STR [null] 233 | -m INT only include reads with number of CIGAR operations 234 | consuming query sequence >= INT [0] 235 | -f INT only include reads with all bits set in INT set in FLAG [0] 236 | -F INT only include reads with none of the bits set in INT 237 | set in FLAG [0] 238 | -x STR read tag to strip (repeatable) [null] 239 | -B collapse the backward CIGAR operation 240 | -s FLOAT integer part sets seed of random number generator [0]; 241 | rest sets fraction of templates to subsample [no subsampling] 242 | -@ INT number of BAM compression threads [0] 243 | 244 | -------------------------------------------------------------------------------- /cwl/samtools-viewr2.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | ### doc: "Samtools view (just read2) tool" ### 4 | ### Copy of samtools-view.cwl, except for changes due to bugs in TOIL ### 5 | ### readswithbits is by default 128 and NOT optional. ### 6 | 7 | cwlVersion: v1.0 8 | 9 | class: CommandLineTool 10 | 11 | requirements: 12 | - class: InlineJavascriptRequirement 13 | - class: ResourceRequirement 14 | coresMin: 1 15 | ramMin: 8000 16 | 17 | hints: 18 | - class: DockerRequirement 19 | dockerPull: brianyee/samtools:1.6 20 | 21 | inputs: 22 | isbam: 23 | type: boolean 24 | default: false 25 | inputBinding: 26 | position: 2 27 | prefix: -b 28 | doc: | 29 | output in BAM format 30 | readswithoutbits: 31 | type: int? 32 | inputBinding: 33 | position: 1 34 | prefix: -F 35 | doc: | 36 | only include reads with none of the bits set in INT set in FLAG [0] 37 | collapsecigar: 38 | type: boolean 39 | default: false 40 | inputBinding: 41 | position: 1 42 | prefix: -B 43 | doc: | 44 | collapse the backward CIGAR operation 45 | readsingroup: 46 | type: string? 47 | inputBinding: 48 | position: 1 49 | prefix: -r 50 | doc: | 51 | only include reads in read group STR [null] 52 | bedoverlap: 53 | type: File? 54 | inputBinding: 55 | position: 1 56 | prefix: -L 57 | doc: | 58 | only include reads overlapping this BED FILE [null] 59 | uncompressed: 60 | type: boolean 61 | default: false 62 | inputBinding: 63 | position: 1 64 | prefix: -u 65 | doc: | 66 | uncompressed BAM output (implies -b) 67 | readtagtostrip: 68 | type: string[]? 69 | inputBinding: 70 | position: 1 71 | 72 | doc: | 73 | read tag to strip (repeatable) [null] 74 | input: 75 | type: File 76 | inputBinding: 77 | position: 4 78 | 79 | doc: | 80 | Input bam file. 81 | readsquality: 82 | type: int? 83 | inputBinding: 84 | position: 1 85 | prefix: -q 86 | doc: | 87 | only include reads with mapping quality >= INT [0] 88 | readswithbits: 89 | type: int 90 | default: 128 91 | inputBinding: 92 | position: 1 93 | prefix: -f 94 | doc: | 95 | only include reads with all bits set in INT set in FLAG [0] 96 | cigar: 97 | type: int? 98 | inputBinding: 99 | position: 1 100 | prefix: -m 101 | doc: | 102 | only include reads with number of CIGAR operations 103 | consuming query sequence >= INT [0] 104 | iscram: 105 | type: boolean 106 | default: false 107 | inputBinding: 108 | position: 2 109 | prefix: -C 110 | doc: | 111 | output in CRAM format 112 | threads: 113 | type: int? 114 | inputBinding: 115 | position: 1 116 | prefix: -@ 117 | doc: | 118 | number of BAM compression threads [0] 119 | fastcompression: 120 | type: boolean 121 | default: false 122 | inputBinding: 123 | position: 1 124 | prefix: '-1' 125 | doc: | 126 | use fast BAM compression (implies -b) 127 | samheader: 128 | type: boolean 129 | default: false 130 | inputBinding: 131 | position: 1 132 | prefix: -h 133 | doc: | 134 | include header in SAM output 135 | count: 136 | type: boolean 137 | default: false 138 | inputBinding: 139 | position: 1 140 | prefix: -c 141 | doc: | 142 | print only the count of matching records 143 | randomseed: 144 | type: float? 145 | inputBinding: 146 | position: 1 147 | prefix: -s 148 | doc: | 149 | integer part sets seed of random number generator [0]; 150 | rest sets fraction of templates to subsample [no subsampling] 151 | referencefasta: 152 | type: File? 153 | inputBinding: 154 | position: 1 155 | prefix: -T 156 | doc: | 157 | reference sequence FASTA FILE [null] 158 | region: 159 | type: string? 160 | inputBinding: 161 | position: 5 162 | 163 | doc: | 164 | [region ...] 165 | readsingroupfile: 166 | type: File? 167 | inputBinding: 168 | position: 1 169 | prefix: -R 170 | doc: | 171 | only include reads with read group listed in FILE [null] 172 | readsinlibrary: 173 | type: string? 174 | inputBinding: 175 | position: 1 176 | prefix: -l 177 | doc: | 178 | only include reads in library STR [null] 179 | output_name: 180 | type: string 181 | default: "" 182 | inputBinding: 183 | position: 2 184 | prefix: -o 185 | valueFrom: | 186 | ${ 187 | if (inputs.output_name == "") { 188 | return inputs.input.nameroot + ".r2.bam"; 189 | } 190 | else { 191 | return inputs.output_name; 192 | } 193 | } 194 | outputs: 195 | output: 196 | type: File 197 | outputBinding: 198 | glob: | 199 | ${ 200 | if (inputs.output_name == "") { 201 | return inputs.input.nameroot + ".r2.bam"; 202 | } 203 | else { 204 | return inputs.output_name; 205 | } 206 | } 207 | 208 | baseCommand: [samtools, view] 209 | 210 | 211 | # s:mainEntity: 212 | # $import: samtools-metadata.yaml 213 | 214 | s:downloadUrl: https://github.com/common-workflow-language/workflows/blob/master/tools/samtools-view.cwl 215 | s:codeRepository: https://github.com/common-workflow-language/workflows 216 | s:license: http://www.apache.org/licenses/LICENSE-2.0 217 | 218 | s:isPartOf: 219 | class: s:CreativeWork 220 | s:name: Common Workflow Language 221 | s:url: http://commonwl.org/ 222 | 223 | s:author: 224 | class: s:Person 225 | s:name: Andrey Kartashov 226 | s:email: mailto:Andrey.Kartashov@cchmc.org 227 | s:sameAs: 228 | - id: http://orcid.org/0000-0001-9102-5681 229 | s:worksFor: 230 | - class: s:Organization 231 | s:name: Cincinnati Children's Hospital Medical Center 232 | s:location: 3333 Burnet Ave, Cincinnati, OH 45229-3026 233 | s:department: 234 | - class: s:Organization 235 | s:name: Barski Lab 236 | doc: | 237 | samtools-view.cwl is developed for CWL consortium 238 | Usage: samtools view [options] || [region ...] 239 | 240 | Options: -b output BAM 241 | -C output CRAM (requires -T) 242 | -1 use fast BAM compression (implies -b) 243 | -u uncompressed BAM output (implies -b) 244 | -h include header in SAM output 245 | -H print SAM header only (no alignments) 246 | -c print only the count of matching records 247 | -o FILE output file name [stdout] 248 | -U FILE output reads not selected by filters to FILE [null] 249 | -t FILE FILE listing reference names and lengths (see long help) [null] 250 | -T FILE reference sequence FASTA FILE [null] 251 | -L FILE only include reads overlapping this BED FILE [null] 252 | -r STR only include reads in read group STR [null] 253 | -R FILE only include reads with read group listed in FILE [null] 254 | -q INT only include reads with mapping quality >= INT [0] 255 | -l STR only include reads in library STR [null] 256 | -m INT only include reads with number of CIGAR operations 257 | consuming query sequence >= INT [0] 258 | -f INT only include reads with all bits set in INT set in FLAG [0] 259 | -F INT only include reads with none of the bits set in INT 260 | set in FLAG [0] 261 | -x STR read tag to strip (repeatable) [null] 262 | -B collapse the backward CIGAR operation 263 | -s FLOAT integer part sets seed of random number generator [0]; 264 | rest sets fraction of templates to subsample [no subsampling] 265 | -@ INT number of BAM compression threads [0] 266 | 267 | -------------------------------------------------------------------------------- /cwl/sort-bed.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: InlineJavascriptRequirement 9 | - class: ResourceRequirement 10 | coresMin: 1 11 | ramMin: 8000 12 | 13 | hints: 14 | - class: DockerRequirement 15 | dockerPull: brianyee/bedtools:2.27.1 16 | 17 | baseCommand: [sort] 18 | 19 | arguments: [ 20 | "-k1,1", 21 | "-k2,2n" 22 | ] 23 | 24 | inputs: 25 | 26 | unsorted_bed: 27 | type: File 28 | inputBinding: 29 | position: 1 30 | 31 | stdout: $(inputs.unsorted_bed.nameroot).sorted.bed 32 | 33 | outputs: 34 | 35 | sorted_bed: 36 | type: File 37 | outputBinding: 38 | glob: $(inputs.unsorted_bed.nameroot).sorted.bed 39 | 40 | doc: | 41 | This tool wraps unix sort to sort a BED file. 42 | 43 | Usage: sort -k1,1 -k2,2n unsorted.bed > sorted.bed 44 | -------------------------------------------------------------------------------- /cwl/sort.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### doc: "samtools sort tool (sort by coordinate)" ### 4 | 5 | ### This is a copy of namesort.cwl, ### 6 | ### exists in case TOIL mistakes namesorting with regular sorting ### 7 | ### Changes: name_sort flag is FALSE by default ### 8 | 9 | cwlVersion: v1.0 10 | class: CommandLineTool 11 | 12 | requirements: 13 | - class: InlineJavascriptRequirement 14 | - class: ResourceRequirement 15 | coresMin: 1 16 | ramMin: 8000 17 | tmpdirMin: 8000 18 | outdirMin: 8000 19 | 20 | hints: 21 | - class: DockerRequirement 22 | dockerPull: brianyee/samtools:1.6 23 | 24 | baseCommand: [samtools, sort] 25 | 26 | inputs: 27 | 28 | name_sort: 29 | type: boolean 30 | inputBinding: 31 | position: 1 32 | prefix: -n 33 | default: false 34 | 35 | output_file: 36 | type: string 37 | inputBinding: 38 | position: 2 39 | prefix: -o 40 | valueFrom: | 41 | ${ 42 | if (inputs.output_file == "") { 43 | return inputs.input_sort_bam.nameroot + "So.bam"; 44 | } 45 | else { 46 | return inputs.output_file; 47 | } 48 | } 49 | default: "" 50 | 51 | memory: 52 | default: 3G 53 | type: string 54 | inputBinding: 55 | position: 3 56 | prefix: -m 57 | 58 | input_sort_bam: 59 | type: File 60 | inputBinding: 61 | position: 4 62 | label: "" 63 | doc: "input bam" 64 | 65 | outputs: 66 | 67 | output_sort_bam: 68 | type: File 69 | outputBinding: 70 | glob: | 71 | ${ 72 | if (inputs.output_file == "") { 73 | return inputs.input_sort_bam.nameroot + "So.bam"; 74 | } 75 | else { 76 | return inputs.output_file; 77 | } 78 | } 79 | label: "" 80 | doc: "sorted bam" 81 | 82 | doc: | 83 | This tool wraps samtools sort by coordinates (namesort flag is False by default). 84 | Usage: samtools sort [options...] [in.bam] 85 | -------------------------------------------------------------------------------- /cwl/trim_pe.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: ResourceRequirement 9 | coresMin: 2 10 | - class: StepInputExpressionRequirement 11 | - class: InlineJavascriptRequirement 12 | 13 | hints: 14 | - class: DockerRequirement 15 | dockerPull: brianyee/cutadapt:1.14 16 | 17 | baseCommand: [cutadapt] 18 | 19 | inputs: 20 | 21 | input_trim_overlap_length: 22 | type: string 23 | default: "5" 24 | inputBinding: 25 | position: 0 26 | prefix: -O 27 | 28 | f: 29 | type: string 30 | default: "fastq" 31 | inputBinding: 32 | position: 1 33 | prefix: -f 34 | 35 | match_read_wildcards: 36 | type: boolean 37 | default: true 38 | inputBinding: 39 | position: 2 40 | prefix: --match-read-wildcards 41 | 42 | times: 43 | type: string 44 | default: "1" 45 | inputBinding: 46 | position: 3 47 | prefix: --times 48 | 49 | error_rate: 50 | type: string 51 | default: "0.1" 52 | inputBinding: 53 | position: 4 54 | prefix: -e 55 | 56 | quality_cutoff: 57 | type: string 58 | default: "6" 59 | inputBinding: 60 | position: 5 61 | prefix: --quality-cutoff 62 | 63 | minimum_length: 64 | type: string 65 | default: "18" 66 | inputBinding: 67 | position: 6 68 | prefix: -m 69 | 70 | output_r1: 71 | type: string 72 | inputBinding: 73 | position: 7 74 | prefix: -o 75 | valueFrom: | 76 | ${ 77 | if (inputs.output_r1 == "") { 78 | return inputs.input_trim[0].nameroot + "Tr.fq"; 79 | } 80 | else { 81 | return inputs.output_r1; 82 | } 83 | } 84 | default: "" 85 | 86 | output_r2: 87 | type: string? 88 | inputBinding: 89 | position: 8 90 | prefix: -p 91 | valueFrom: | 92 | ${ 93 | if (inputs.output_r2 == "") { 94 | return inputs.input_trim[1].nameroot + "Tr.fq"; 95 | } 96 | else { 97 | return inputs.output_r2; 98 | } 99 | } 100 | default: "" 101 | 102 | input_trim_b_adapters: 103 | default: [] 104 | type: 105 | type: array 106 | items: string 107 | inputBinding: 108 | prefix: "-b " 109 | separate: false 110 | # prefix: "--anywhere=file:" 111 | # prefix: "-b file:" 112 | inputBinding: 113 | position: 9 114 | 115 | input_trim_g_adapters: 116 | type: 117 | type: array 118 | items: string 119 | inputBinding: 120 | prefix: "-g " 121 | separate: false 122 | # prefix: "--front=file:" 123 | # prefix: "-g file:" 124 | inputBinding: 125 | position: 10 126 | 127 | input_trim_A_adapters: 128 | type: 129 | type: array 130 | items: string 131 | inputBinding: 132 | prefix: "-A " 133 | separate: false 134 | # prefix: "--ADAPTER=file:" 135 | # prefix: "-A file:" 136 | inputBinding: 137 | position: 11 138 | 139 | 140 | input_trim_a_adapters: 141 | type: 142 | type: array 143 | items: string 144 | inputBinding: 145 | prefix: "-a " 146 | separate: false 147 | # prefix: "--adapter=file:" 148 | # prefix: "-a file:" 149 | inputBinding: 150 | position: 12 151 | 152 | input_trim: 153 | type: File[]? 154 | inputBinding: 155 | position: 13 156 | 157 | stdout: $(inputs.input_trim[0].nameroot)Tr.metrics 158 | 159 | outputs: 160 | 161 | output_trim: 162 | type: File[]? 163 | outputBinding: 164 | # glob: "*Tr.fq" 165 | # If output_r1 and output_r2 were not specified, look for input basename 166 | glob: | 167 | ${ 168 | if (inputs.output_r1 == "") { 169 | return [ 170 | inputs.input_trim[0].nameroot + "Tr.fq", 171 | inputs.input_trim[1].nameroot + "Tr.fq" 172 | ]; 173 | } 174 | else { 175 | return [ 176 | inputs.output_r1, 177 | inputs.output_r2 178 | ]; 179 | } 180 | } 181 | 182 | output_trim_report: 183 | type: File 184 | outputBinding: 185 | # glob: "*Tr.metrics" 186 | glob: "*.metrics" 187 | 188 | doc: | 189 | This tool wraps cutadapt with default parameters set to paired-end eCLIP processing defaults. 190 | Usage: cutadapt -a ADAPT1 -A ADAPT2 [options] -o out1.fastq -p out2.fastq in1.fastq in2.fastq 191 | -------------------------------------------------------------------------------- /cwl/trim_se.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | 5 | class: CommandLineTool 6 | 7 | requirements: 8 | - class: ResourceRequirement 9 | coresMin: 2 10 | - class: StepInputExpressionRequirement 11 | - class: InlineJavascriptRequirement 12 | 13 | hints: 14 | - class: DockerRequirement 15 | dockerPull: brianyee/cutadapt:1.14 16 | 17 | baseCommand: [cutadapt] 18 | 19 | inputs: 20 | 21 | input_trim_overlap_length: 22 | type: string 23 | default: "5" 24 | inputBinding: 25 | position: 0 26 | prefix: -O 27 | 28 | f: 29 | type: string 30 | default: "fastq" 31 | inputBinding: 32 | position: 1 33 | prefix: -f 34 | 35 | match_read_wildcards: 36 | type: boolean 37 | default: true 38 | inputBinding: 39 | position: 2 40 | prefix: --match-read-wildcards 41 | 42 | times: 43 | type: string 44 | default: "1" 45 | inputBinding: 46 | position: 3 47 | prefix: --times 48 | 49 | error_rate: 50 | type: string 51 | default: "0.1" 52 | inputBinding: 53 | position: 4 54 | prefix: -e 55 | 56 | quality_cutoff: 57 | type: string 58 | default: "6" 59 | inputBinding: 60 | position: 5 61 | prefix: --quality-cutoff 62 | 63 | minimum_length: 64 | type: string 65 | default: "18" 66 | inputBinding: 67 | position: 6 68 | prefix: -m 69 | 70 | output_r1: 71 | type: string 72 | inputBinding: 73 | position: 7 74 | prefix: -o 75 | valueFrom: | 76 | ${ 77 | if (inputs.output_r1 == "") { 78 | return inputs.input_trim[0].nameroot + "Tr.fq"; 79 | } 80 | else { 81 | return inputs.output_r1; 82 | } 83 | } 84 | default: "" 85 | 86 | input_trim_b_adapters: 87 | default: [] 88 | type: 89 | type: array 90 | items: string 91 | inputBinding: 92 | prefix: "-b " 93 | separate: false 94 | # prefix: "--anywhere=file:" 95 | # prefix: "-b file:" 96 | inputBinding: 97 | position: 9 98 | 99 | input_trim_g_adapters: 100 | default: [] 101 | type: 102 | type: array 103 | items: string 104 | inputBinding: 105 | prefix: "-g " 106 | separate: false 107 | # prefix: "--front=file:" 108 | # prefix: "-g file:" 109 | inputBinding: 110 | position: 10 111 | 112 | input_trim_A_adapters: 113 | default: [] 114 | type: 115 | type: array 116 | items: string 117 | inputBinding: 118 | prefix: "-A " 119 | separate: false 120 | # prefix: "--ADAPTER=file:" 121 | # prefix: "-A file:" 122 | inputBinding: 123 | position: 11 124 | 125 | 126 | input_trim_a_adapters: 127 | type: 128 | type: array 129 | items: string 130 | inputBinding: 131 | prefix: "-a " 132 | separate: false 133 | # prefix: "--adapter=file:" 134 | # prefix: "-a file:" 135 | inputBinding: 136 | position: 12 137 | 138 | input_trim: 139 | type: File[]? 140 | inputBinding: 141 | position: 13 142 | 143 | stdout: $(inputs.input_trim[0].nameroot)Tr.metrics 144 | 145 | outputs: 146 | 147 | output_trim: 148 | type: File[]? 149 | outputBinding: 150 | # glob: "*Tr.fq" 151 | # If output_r1 wasnt not specified, look for input basename 152 | glob: | 153 | ${ 154 | if (inputs.output_r1 == "") { 155 | return [ 156 | inputs.input_trim[0].nameroot + "Tr.fq" 157 | ]; 158 | } 159 | else { 160 | return [ 161 | inputs.output_r1 162 | ]; 163 | } 164 | } 165 | 166 | output_trim_report: 167 | type: File 168 | outputBinding: 169 | # glob: "*Tr.metrics" 170 | glob: "*.metrics" 171 | 172 | doc: | 173 | This tool wraps cutadapt with default parameters set to single-end eCLIP processing defaults. 174 | Usage: cutadapt -a ADAPT1 -A ADAPT2 [options] -o out1.fastq -p out2.fastq in1.fastq in2.fastq 175 | -------------------------------------------------------------------------------- /cwl/trim_umi.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | 6 | # , $overlap_length_option 7 | # , $g_adapters_option 8 | # , $A_adapters_option 9 | # , $a_adapters_option 10 | # , -o, out_fastq.fastq.gz 11 | # , -p, out_pair.fastq.gz 12 | # , in_fastq.fastq.gz 13 | # , in_pair.fastq.gz 14 | # > report 15 | 16 | #$namespaces: 17 | # ex: http://example.com/ 18 | 19 | requirements: 20 | - class: ResourceRequirement 21 | coresMin: 2 22 | # ramMin: 30000 23 | # tmpdirMin: 4000 24 | # outdirMin: 4000 25 | - class: StepInputExpressionRequirement 26 | - class: InlineJavascriptRequirement 27 | 28 | #hints: 29 | # - class: ex:PackageRequirement 30 | # packages: 31 | # - name: cutadapt 32 | # package_manager: pip 33 | # version: "1.10" 34 | # - class: ex:ScriptRequirement 35 | # scriptlines: 36 | # - "#!/bin/bash" 37 | # - class: ShellCommandRequirement 38 | 39 | 40 | baseCommand: [cutadapt] 41 | 42 | # arguments: [-f, fastq, 43 | # --match-read-wildcards, 44 | # --times, "2", 45 | # -e, "0.0", 46 | # --quality-cutoff, "6", 47 | # -m, "18", 48 | # -o, $(inputs.input_trim.nameroot)Tr.fqgz 49 | # ] 50 | 51 | inputs: 52 | 53 | hard_trim_length: 54 | type: int 55 | default: -9 56 | inputBinding: 57 | position: 0 58 | prefix: -u 59 | 60 | # cores: 61 | # type: int 62 | # default: 4 63 | # inputBinding: 64 | # position: 1 65 | # prefix: -cores 66 | 67 | output_r1: 68 | type: string 69 | inputBinding: 70 | position: 7 71 | prefix: -o 72 | valueFrom: | 73 | ${ 74 | if (inputs.output_r1 == "") { 75 | return inputs.input_trim[0].nameroot + "Tr.fq"; 76 | } 77 | else { 78 | return inputs.output_r1; 79 | } 80 | } 81 | default: "" 82 | 83 | input_trim: 84 | type: File[]? 85 | inputBinding: 86 | position: 14 87 | 88 | 89 | stdout: $(inputs.input_trim[0].nameroot)Tr.metrics 90 | 91 | outputs: 92 | 93 | output_trim: 94 | type: File[]? 95 | outputBinding: 96 | # glob: "*Tr.fq" 97 | # If output_r1 wasnt not specified, look for input basename 98 | glob: | 99 | ${ 100 | if (inputs.output_r1 == "") { 101 | return [ 102 | inputs.input_trim[0].nameroot + "Tr.fq" 103 | ]; 104 | } 105 | else { 106 | return [ 107 | inputs.output_r1 108 | ]; 109 | } 110 | } 111 | 112 | output_trim_report: 113 | type: File 114 | outputBinding: 115 | # glob: "*Tr.metrics" 116 | glob: "*.metrics" 117 | 118 | doc: | 119 | This tool wraps cutadapt to trim off the 3' end of R1 (may be UMIs) for eCLASH reads -------------------------------------------------------------------------------- /cwl/wf_clipseqcore_chimeric_se_1barcode.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### Workflow for handling reads containing one barcode ### 4 | ### Returns a bam file containing read2 only ### 5 | 6 | cwlVersion: v1.0 7 | class: Workflow 8 | 9 | requirements: 10 | - class: StepInputExpressionRequirement 11 | - class: SubworkflowFeatureRequirement 12 | - class: ScatterFeatureRequirement # TODO needed? 13 | - class: MultipleInputFeatureRequirement 14 | - class: InlineJavascriptRequirement 15 | 16 | #hints: 17 | # - class: ex:ScriptRequirement 18 | # scriptlines: 19 | # - "#!/bin/bash" 20 | 21 | 22 | inputs: 23 | dataset: 24 | type: string 25 | 26 | speciesGenomeDir: 27 | type: Directory 28 | 29 | repeatElementGenomeDir: 30 | type: Directory 31 | 32 | # TODO: remove, we don't use it here. 33 | species: 34 | type: string 35 | 36 | chrom_sizes: 37 | type: File 38 | 39 | # barcodesfasta: 40 | # type: File 41 | 42 | # randomer_length: 43 | # type: string 44 | 45 | read: 46 | type: 47 | type: record 48 | fields: 49 | read1: 50 | type: File 51 | # read2: 52 | # type: File 53 | adapters: 54 | type: File 55 | name: 56 | type: string 57 | 58 | # r2_bam: 59 | # type: string 60 | 61 | # output_bam: 62 | # type: string 63 | 64 | # adapters: 65 | # type: File 66 | 67 | ### Defaults ### 68 | 69 | # r2_bits: 70 | # type: int 71 | # default: 128 72 | # is_bam: 73 | # type: boolean 74 | # default: true 75 | 76 | outputs: 77 | 78 | b1_demuxed_fastq_r1: 79 | type: File 80 | outputSource: demultiplex/A_output_demuxed_read1 81 | # b1_demuxed_fastq_r2: 82 | # type: File 83 | # outputSource: demultiplex/A_output_demuxed_read2 84 | 85 | b1_trimx1_fastq: 86 | type: File[] 87 | outputSource: b1_trim_and_map/X_output_trim_first 88 | b1_trimx1_metrics: 89 | type: File 90 | outputSource: b1_trim_and_map/X_output_trim_first_metrics 91 | b1_trimx1_fastqc_report: 92 | type: File 93 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report 94 | b1_trimx1_fastqc_stats: 95 | type: File 96 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats 97 | b1_trimx2_fastq: 98 | type: File[] 99 | outputSource: b1_trim_and_map/X_output_trim_again 100 | b1_trimx2_metrics: 101 | type: File 102 | outputSource: b1_trim_and_map/X_output_trim_again_metrics 103 | b1_trimx2_fastqc_report: 104 | type: File 105 | outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report 106 | b1_trimx2_fastqc_stats: 107 | type: File 108 | outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats 109 | 110 | b1_maprepeats_mapped_to_genome: 111 | type: File 112 | outputSource: b1_trim_and_map/A_output_maprepeats_mapped_to_genome 113 | b1_maprepeats_stats: 114 | type: File 115 | outputSource: b1_trim_and_map/A_output_maprepeats_stats 116 | b1_maprepeats_star_settings: 117 | type: File 118 | outputSource: b1_trim_and_map/A_output_maprepeats_star_settings 119 | b1_sorted_unmapped_fastq: 120 | type: File 121 | outputSource: b1_trim_and_map/A_output_sort_repunmapped_fastq 122 | 123 | b1_mapgenome_mapped_to_genome: 124 | type: File 125 | outputSource: b1_trim_and_map/A_output_mapgenome_mapped_to_genome 126 | b1_mapgenome_stats: 127 | type: File 128 | outputSource: b1_trim_and_map/A_output_mapgenome_stats 129 | b1_mapgenome_star_settings: 130 | type: File 131 | outputSource: b1_trim_and_map/A_output_mapgenome_star_settings 132 | 133 | b1_output_pre_rmdup_sorted_bam: 134 | type: File 135 | outputSource: b1_trim_and_map/A_output_sorted_bam 136 | 137 | b1_output_barcodecollapsese_metrics: 138 | type: File 139 | outputSource: b1_trim_and_map/X_output_barcodecollapsese_metrics 140 | 141 | b1_output_rmdup_sorted_bam: 142 | type: File 143 | outputSource: b1_trim_and_map/X_output_sorted_bam 144 | 145 | output_pos_bw: 146 | type: File 147 | outputSource: make_bigwigs/posbw 148 | output_neg_bw: 149 | type: File 150 | outputSource: make_bigwigs/negbw 151 | 152 | steps: 153 | 154 | ########################################################################### 155 | # Upstream 156 | ########################################################################### 157 | 158 | demultiplex: 159 | run: wf_demultiplex_se.cwl 160 | in: 161 | dataset: dataset 162 | read: read 163 | out: [ 164 | A_output_demuxed_read1, 165 | read_name, 166 | dataset_name 167 | ] 168 | 169 | b1_trim_and_map: 170 | run: wf_trim_and_map_chimeric_se.cwl 171 | in: 172 | speciesGenomeDir: speciesGenomeDir 173 | repeatElementGenomeDir: repeatElementGenomeDir 174 | trimfirst_overlap_length: 175 | default: "1" 176 | trimagain_overlap_length: 177 | default: "5" 178 | a_adapters: 179 | source: read 180 | valueFrom: | 181 | ${ 182 | return self.adapters; 183 | } 184 | read1: demultiplex/A_output_demuxed_read1 185 | read_name: demultiplex/read_name 186 | dataset_name: demultiplex/dataset_name 187 | out: [ 188 | X_output_trim_first, 189 | X_output_trim_first_metrics, 190 | X_output_trim_first_fastqc_report, 191 | X_output_trim_first_fastqc_stats, 192 | X_output_trim_again, 193 | X_output_trim_again_metrics, 194 | X_output_trim_again_fastqc_report, 195 | X_output_trim_again_fastqc_stats, 196 | A_output_maprepeats_mapped_to_genome, 197 | A_output_maprepeats_stats, 198 | A_output_maprepeats_star_settings, 199 | A_output_sort_repunmapped_fastq, 200 | A_output_mapgenome_mapped_to_genome, 201 | A_output_mapgenome_stats, 202 | A_output_mapgenome_star_settings, 203 | A_output_sorted_bam, 204 | # A_output_sorted_bam_index, 205 | X_output_barcodecollapsese_bam, 206 | X_output_barcodecollapsese_metrics, 207 | X_output_sorted_bam 208 | ] 209 | 210 | 211 | ########################################################################### 212 | # Downstream (candidate for merging with main pipeline) 213 | ########################################################################### 214 | 215 | make_bigwigs: 216 | run: makebigwigfiles.cwl 217 | in: 218 | chromsizes: chrom_sizes 219 | bam: b1_trim_and_map/X_output_sorted_bam 220 | out: 221 | [posbw, negbw] 222 | -------------------------------------------------------------------------------- /cwl/wf_clipseqcore_nostats_se_1barcode.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### Workflow for handling reads containing one barcode ### 4 | ### Returns a bam file containing read2 only ### 5 | 6 | cwlVersion: v1.0 7 | class: Workflow 8 | 9 | requirements: 10 | - class: StepInputExpressionRequirement 11 | - class: SubworkflowFeatureRequirement 12 | - class: ScatterFeatureRequirement # TODO needed? 13 | - class: MultipleInputFeatureRequirement 14 | - class: InlineJavascriptRequirement 15 | 16 | #hints: 17 | # - class: ex:ScriptRequirement 18 | # scriptlines: 19 | # - "#!/bin/bash" 20 | 21 | 22 | inputs: 23 | dataset: 24 | type: string 25 | 26 | speciesGenomeDir: 27 | type: Directory 28 | 29 | repeatElementGenomeDir: 30 | type: Directory 31 | 32 | # TODO: remove, we don't use it here. 33 | species: 34 | type: string 35 | 36 | chrom_sizes: 37 | type: File 38 | 39 | # barcodesfasta: 40 | # type: File 41 | 42 | # randomer_length: 43 | # type: string 44 | 45 | read: 46 | type: 47 | type: record 48 | fields: 49 | read1: 50 | type: File 51 | # read2: 52 | # type: File 53 | adapters: 54 | type: File 55 | name: 56 | type: string 57 | 58 | # r2_bam: 59 | # type: string 60 | 61 | # output_bam: 62 | # type: string 63 | 64 | # adapters: 65 | # type: File 66 | 67 | ### Defaults ### 68 | 69 | # r2_bits: 70 | # type: int 71 | # default: 128 72 | # is_bam: 73 | # type: boolean 74 | # default: true 75 | 76 | outputs: 77 | 78 | b1_demuxed_fastq_r1: 79 | type: File 80 | outputSource: demultiplex/A_output_demuxed_read1 81 | # b1_demuxed_fastq_r2: 82 | # type: File 83 | # outputSource: demultiplex/A_output_demuxed_read2 84 | 85 | b1_trimx1_fastq: 86 | type: File[] 87 | outputSource: b1_trim_and_map/X_output_trim_first 88 | b1_trimx1_metrics: 89 | type: File 90 | outputSource: b1_trim_and_map/X_output_trim_first_metrics 91 | b1_trimx1_fastqc_report: 92 | type: File 93 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report 94 | b1_trimx1_fastqc_stats: 95 | type: File 96 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats 97 | b1_trimx2_fastq: 98 | type: File[] 99 | outputSource: b1_trim_and_map/X_output_trim_again 100 | b1_trimx2_metrics: 101 | type: File 102 | outputSource: b1_trim_and_map/X_output_trim_again_metrics 103 | b1_trimx2_fastqc_report: 104 | type: File 105 | outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report 106 | b1_trimx2_fastqc_stats: 107 | type: File 108 | outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats 109 | 110 | b1_maprepeats_mapped_to_genome: 111 | type: File 112 | outputSource: b1_trim_and_map/A_output_maprepeats_mapped_to_genome 113 | b1_maprepeats_stats: 114 | type: File 115 | outputSource: b1_trim_and_map/A_output_maprepeats_stats 116 | b1_maprepeats_star_settings: 117 | type: File 118 | outputSource: b1_trim_and_map/A_output_maprepeats_star_settings 119 | b1_sorted_unmapped_fastq: 120 | type: File 121 | outputSource: b1_trim_and_map/A_output_sort_repunmapped_fastq 122 | 123 | b1_mapgenome_mapped_to_genome: 124 | type: File 125 | outputSource: b1_trim_and_map/A_output_mapgenome_mapped_to_genome 126 | b1_mapgenome_stats: 127 | type: File 128 | outputSource: b1_trim_and_map/A_output_mapgenome_stats 129 | b1_mapgenome_star_settings: 130 | type: File 131 | outputSource: b1_trim_and_map/A_output_mapgenome_star_settings 132 | 133 | b1_output_pre_rmdup_sorted_bam: 134 | type: File 135 | outputSource: b1_trim_and_map/A_output_sorted_bam 136 | 137 | # b1_output_barcodecollapsese_metrics: 138 | # type: File 139 | # outputSource: b1_trim_and_map/X_output_barcodecollapsese_metrics 140 | 141 | b1_output_rmdup_sorted_bam: 142 | type: File 143 | outputSource: b1_trim_and_map/X_output_sorted_bam 144 | 145 | output_pos_bw: 146 | type: File 147 | outputSource: make_bigwigs/posbw 148 | output_neg_bw: 149 | type: File 150 | outputSource: make_bigwigs/negbw 151 | 152 | steps: 153 | 154 | ########################################################################### 155 | # Upstream 156 | ########################################################################### 157 | 158 | demultiplex: 159 | run: wf_demultiplex_se.cwl 160 | in: 161 | dataset: dataset 162 | read: read 163 | out: [ 164 | A_output_demuxed_read1, 165 | read_name, 166 | dataset_name 167 | ] 168 | 169 | b1_trim_and_map: 170 | run: wf_trim_and_map_se_nostats.cwl 171 | in: 172 | speciesGenomeDir: speciesGenomeDir 173 | repeatElementGenomeDir: repeatElementGenomeDir 174 | trimfirst_overlap_length: 175 | default: "1" 176 | trimagain_overlap_length: 177 | default: "5" 178 | a_adapters: 179 | source: read 180 | valueFrom: | 181 | ${ 182 | return self.adapters; 183 | } 184 | read1: demultiplex/A_output_demuxed_read1 185 | read_name: demultiplex/read_name 186 | dataset_name: demultiplex/dataset_name 187 | out: [ 188 | X_output_trim_first, 189 | X_output_trim_first_metrics, 190 | X_output_trim_first_fastqc_report, 191 | X_output_trim_first_fastqc_stats, 192 | X_output_trim_again, 193 | X_output_trim_again_metrics, 194 | X_output_trim_again_fastqc_report, 195 | X_output_trim_again_fastqc_stats, 196 | A_output_maprepeats_mapped_to_genome, 197 | A_output_maprepeats_stats, 198 | A_output_maprepeats_star_settings, 199 | A_output_sort_repunmapped_fastq, 200 | A_output_mapgenome_mapped_to_genome, 201 | A_output_mapgenome_stats, 202 | A_output_mapgenome_star_settings, 203 | A_output_sorted_bam, 204 | # A_output_sorted_bam_index, 205 | X_output_barcodecollapsese_bam, 206 | # X_output_barcodecollapsese_metrics, 207 | X_output_sorted_bam 208 | ] 209 | 210 | 211 | ########################################################################### 212 | # Downstream (candidate for merging with main pipeline) 213 | ########################################################################### 214 | 215 | make_bigwigs: 216 | run: makebigwigfiles.cwl 217 | in: 218 | chromsizes: chrom_sizes 219 | bam: b1_trim_and_map/X_output_sorted_bam 220 | out: 221 | [posbw, negbw] 222 | -------------------------------------------------------------------------------- /cwl/wf_clipseqcore_pe_1barcode.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | doc: | 4 | Workflow for handling reads containing one barcode. 5 | Returns the bam file containing read2 only. 6 | 7 | Notes: 8 | 9 | runs the following steps: 10 | - demultiplex 11 | - trimfirst_file2string 12 | - trimagain_file2string 13 | - b1_trim_and_map 14 | - view_r2 15 | - index_r2_bam 16 | - make_bigwigs 17 | 18 | cwlVersion: v1.0 19 | class: Workflow 20 | 21 | requirements: 22 | - class: StepInputExpressionRequirement 23 | - class: SubworkflowFeatureRequirement 24 | - class: MultipleInputFeatureRequirement 25 | - class: InlineJavascriptRequirement 26 | 27 | inputs: 28 | dataset: 29 | type: string 30 | 31 | speciesGenomeDir: 32 | type: Directory 33 | 34 | repeatElementGenomeDir: 35 | type: Directory 36 | 37 | chrom_sizes: 38 | type: File 39 | 40 | barcodesfasta: 41 | type: File 42 | 43 | randomer_length: 44 | type: string 45 | 46 | read: 47 | type: 48 | type: record 49 | fields: 50 | read1: 51 | type: File 52 | read2: 53 | type: File 54 | barcodeids: 55 | type: string[] 56 | name: 57 | type: string 58 | 59 | outputs: 60 | 61 | 62 | ### DEMULTIPLEXED OUTPUTS ### 63 | 64 | 65 | b1_demuxed_fastq_r1: 66 | label: "Barcode1 read1 demultiplexed fastq" 67 | type: File 68 | outputSource: demultiplex/A_output_demuxed_read1 69 | b1_demuxed_fastq_r2: 70 | type: File 71 | outputSource: demultiplex/A_output_demuxed_read2 72 | 73 | 74 | ### TRIMMED OUTPUTS (ROUND 1) ### 75 | 76 | 77 | b1_trimx1_fastq: 78 | type: File[] 79 | outputSource: b1_trim_and_map/X_output_trim_first 80 | b1_trimx1_metrics: 81 | type: File 82 | outputSource: b1_trim_and_map/X_output_trim_first_metrics 83 | b1_trimx1_fastqc_report_R1: 84 | type: File 85 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report_R1 86 | b1_trimx1_fastqc_stats_R1: 87 | type: File 88 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats_R1 89 | b1_trimx1_fastqc_report_R2: 90 | type: File 91 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report_R2 92 | b1_trimx1_fastqc_stats_R2: 93 | type: File 94 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats_R2 95 | 96 | 97 | ### TRIMMED OUTPUTS (ROUND 2) ### 98 | 99 | 100 | b1_trimx2_fastq: 101 | type: File[] 102 | outputSource: b1_trim_and_map/X_output_trim_again 103 | b1_trimx2_metrics: 104 | type: File 105 | outputSource: b1_trim_and_map/X_output_trim_again_metrics 106 | b1_trimx2_fastqc_report_R1: 107 | type: File 108 | outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report_R1 109 | b1_trimx2_fastqc_stats_R1: 110 | type: File 111 | outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats_R1 112 | b1_trimx2_fastqc_report_R2: 113 | type: File 114 | outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report_R2 115 | b1_trimx2_fastqc_stats_R2: 116 | type: File 117 | outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats_R2 118 | 119 | 120 | ### REPEAT MAPPING OUTPUTS ### 121 | 122 | 123 | b1_maprepeats_mapped_to_genome: 124 | type: File 125 | outputSource: b1_trim_and_map/A_output_maprepeats_mapped_to_genome 126 | b1_maprepeats_stats: 127 | type: File 128 | outputSource: b1_trim_and_map/A_output_maprepeats_stats 129 | b1_maprepeats_star_settings: 130 | type: File 131 | outputSource: b1_trim_and_map/A_output_maprepeats_star_settings 132 | b1_sorted_unmapped_fastq: 133 | type: File[] 134 | outputSource: b1_trim_and_map/A_output_sort_repunmapped_fastq 135 | 136 | 137 | ### GENOME MAPPING OUTPUTS ### 138 | 139 | 140 | b1_mapgenome_mapped_to_genome: 141 | type: File 142 | outputSource: b1_trim_and_map/A_output_mapgenome_mapped_to_genome 143 | b1_mapgenome_stats: 144 | type: File 145 | outputSource: b1_trim_and_map/A_output_mapgenome_stats 146 | b1_mapgenome_star_settings: 147 | type: File 148 | outputSource: b1_trim_and_map/A_output_mapgenome_star_settings 149 | 150 | 151 | ### RMDUP BAM OUTPUTS ### 152 | 153 | 154 | b1_output_prermdup_sorted_bam: 155 | type: File 156 | outputSource: b1_trim_and_map/A_output_sorted_bam 157 | b1_output_barcodecollapsepe_bam: 158 | type: File 159 | outputSource: b1_trim_and_map/X_output_barcodecollapsepe_bam 160 | b1_output_barcodecollapsepe_metrics: 161 | type: File 162 | outputSource: b1_trim_and_map/X_output_barcodecollapsepe_metrics 163 | 164 | 165 | ### SORTED RMDUP BAM OUTPUTS ### 166 | 167 | 168 | b1_output_sorted_bam: 169 | type: File 170 | outputSource: b1_trim_and_map/X_output_sorted_bam 171 | 172 | 173 | ### READ2 MERGED BAM OUTPUTS ### 174 | 175 | 176 | output_r2_bam: 177 | type: File 178 | outputSource: view_r2/output 179 | 180 | 181 | ### BIGWIG FILES ### 182 | 183 | 184 | output_pos_bw: 185 | type: File 186 | outputSource: make_bigwigs/posbw 187 | output_neg_bw: 188 | type: File 189 | outputSource: make_bigwigs/negbw 190 | 191 | steps: 192 | 193 | ########################################################################### 194 | # Upstream 195 | ########################################################################### 196 | 197 | demultiplex: 198 | run: wf_demultiplex_pe.cwl 199 | in: 200 | dataset: dataset 201 | randomer_length: randomer_length 202 | barcodesfasta: barcodesfasta 203 | read: read 204 | out: [ 205 | A_output_demuxed_read1, 206 | A_output_demuxed_read2, 207 | B_output_demuxed_read1, 208 | B_output_demuxed_read2, 209 | AB_output_trimfirst_overlap_length, 210 | AB_output_trimagain_overlap_length, 211 | AB_g_adapters, 212 | AB_g_adapters_default, 213 | AB_a_adapters, 214 | AB_a_adapters_default, 215 | AB_A_adapters 216 | ] 217 | 218 | ########################################################################### 219 | # Main workflow 220 | ########################################################################### 221 | 222 | trimfirst_file2string: 223 | run: file2string.cwl 224 | in: 225 | file: demultiplex/AB_output_trimfirst_overlap_length 226 | out: [output] 227 | 228 | trimagain_file2string: 229 | run: file2string.cwl 230 | in: 231 | file: demultiplex/AB_output_trimagain_overlap_length 232 | out: [output] 233 | 234 | b1_trim_and_map: 235 | run: wf_trim_and_map_pe.cwl 236 | in: 237 | speciesGenomeDir: speciesGenomeDir 238 | repeatElementGenomeDir: repeatElementGenomeDir 239 | trimfirst_overlap_length: trimfirst_file2string/output 240 | trimagain_overlap_length: trimagain_file2string/output 241 | g_adapters: demultiplex/AB_g_adapters 242 | g_adapters_default: demultiplex/AB_g_adapters_default 243 | a_adapters: demultiplex/AB_a_adapters 244 | a_adapters_default: demultiplex/AB_a_adapters_default 245 | A_adapters: demultiplex/AB_A_adapters 246 | read1: demultiplex/A_output_demuxed_read1 247 | read2: demultiplex/A_output_demuxed_read2 248 | out: [ 249 | X_output_trim_first, 250 | X_output_trim_first_metrics, 251 | X_output_trim_first_fastqc_report_R1, 252 | X_output_trim_first_fastqc_stats_R1, 253 | X_output_trim_first_fastqc_report_R2, 254 | X_output_trim_first_fastqc_stats_R2, 255 | X_output_trim_again, 256 | X_output_trim_again_metrics, 257 | X_output_trim_again_fastqc_report_R1, 258 | X_output_trim_again_fastqc_stats_R1, 259 | X_output_trim_again_fastqc_report_R2, 260 | X_output_trim_again_fastqc_stats_R2, 261 | A_output_maprepeats_mapped_to_genome, 262 | A_output_maprepeats_stats, 263 | A_output_maprepeats_star_settings, 264 | A_output_sort_repunmapped_fastq, 265 | A_output_mapgenome_mapped_to_genome, 266 | A_output_mapgenome_stats, 267 | A_output_mapgenome_star_settings, 268 | A_output_sorted_bam, 269 | X_output_barcodecollapsepe_bam, 270 | X_output_barcodecollapsepe_metrics, 271 | X_output_sorted_bam 272 | ] 273 | 274 | ########################################################################### 275 | # Downstream (candidate for merging with main pipeline) 276 | ########################################################################### 277 | 278 | view_r2: 279 | run: samtools-viewr2.cwl 280 | in: 281 | input: b1_trim_and_map/X_output_sorted_bam 282 | readswithbits: 283 | default: 128 284 | isbam: 285 | default: true 286 | out: [output] 287 | 288 | index_r2_bam: 289 | run: samtools-index.cwl 290 | in: 291 | alignments: view_r2/output 292 | out: [alignments_with_index] 293 | 294 | make_bigwigs: 295 | run: makebigwigfiles_PE.cwl 296 | in: 297 | chromsizes: chrom_sizes 298 | bam: index_r2_bam/alignments_with_index 299 | out: 300 | [posbw, negbw] 301 | -------------------------------------------------------------------------------- /cwl/wf_clipseqcore_se_1barcode.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### Workflow for handling reads containing one barcode ### 4 | ### Returns a bam file containing read2 only ### 5 | 6 | cwlVersion: v1.0 7 | class: Workflow 8 | 9 | requirements: 10 | - class: StepInputExpressionRequirement 11 | - class: SubworkflowFeatureRequirement 12 | - class: ScatterFeatureRequirement # TODO needed? 13 | - class: MultipleInputFeatureRequirement 14 | - class: InlineJavascriptRequirement 15 | 16 | #hints: 17 | # - class: ex:ScriptRequirement 18 | # scriptlines: 19 | # - "#!/bin/bash" 20 | 21 | 22 | inputs: 23 | dataset: 24 | type: string 25 | 26 | speciesGenomeDir: 27 | type: Directory 28 | 29 | repeatElementGenomeDir: 30 | type: Directory 31 | 32 | # TODO: remove, we don't use it here. 33 | species: 34 | type: string 35 | 36 | chrom_sizes: 37 | type: File 38 | 39 | # barcodesfasta: 40 | # type: File 41 | 42 | # randomer_length: 43 | # type: string 44 | 45 | read: 46 | type: 47 | type: record 48 | fields: 49 | read1: 50 | type: File 51 | # read2: 52 | # type: File 53 | adapters: 54 | type: File 55 | name: 56 | type: string 57 | 58 | # r2_bam: 59 | # type: string 60 | 61 | # output_bam: 62 | # type: string 63 | 64 | # adapters: 65 | # type: File 66 | 67 | ### Defaults ### 68 | 69 | # r2_bits: 70 | # type: int 71 | # default: 128 72 | # is_bam: 73 | # type: boolean 74 | # default: true 75 | 76 | outputs: 77 | 78 | b1_demuxed_fastq_r1: 79 | type: File 80 | outputSource: demultiplex/A_output_demuxed_read1 81 | # b1_demuxed_fastq_r2: 82 | # type: File 83 | # outputSource: demultiplex/A_output_demuxed_read2 84 | 85 | b1_trimx1_fastq: 86 | type: File[] 87 | outputSource: b1_trim_and_map/X_output_trim_first 88 | b1_trimx1_metrics: 89 | type: File 90 | outputSource: b1_trim_and_map/X_output_trim_first_metrics 91 | b1_trimx1_fastqc_report: 92 | type: File 93 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report 94 | b1_trimx1_fastqc_stats: 95 | type: File 96 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats 97 | b1_trimx2_fastq: 98 | type: File[] 99 | outputSource: b1_trim_and_map/X_output_trim_again 100 | b1_trimx2_metrics: 101 | type: File 102 | outputSource: b1_trim_and_map/X_output_trim_again_metrics 103 | b1_trimx2_fastqc_report: 104 | type: File 105 | outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report 106 | b1_trimx2_fastqc_stats: 107 | type: File 108 | outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats 109 | 110 | b1_maprepeats_mapped_to_genome: 111 | type: File 112 | outputSource: b1_trim_and_map/A_output_maprepeats_mapped_to_genome 113 | b1_maprepeats_stats: 114 | type: File 115 | outputSource: b1_trim_and_map/A_output_maprepeats_stats 116 | b1_maprepeats_star_settings: 117 | type: File 118 | outputSource: b1_trim_and_map/A_output_maprepeats_star_settings 119 | b1_sorted_unmapped_fastq: 120 | type: File 121 | outputSource: b1_trim_and_map/A_output_sort_repunmapped_fastq 122 | 123 | b1_mapgenome_mapped_to_genome: 124 | type: File 125 | outputSource: b1_trim_and_map/A_output_mapgenome_mapped_to_genome 126 | b1_mapgenome_stats: 127 | type: File 128 | outputSource: b1_trim_and_map/A_output_mapgenome_stats 129 | b1_mapgenome_star_settings: 130 | type: File 131 | outputSource: b1_trim_and_map/A_output_mapgenome_star_settings 132 | 133 | b1_output_pre_rmdup_sorted_bam: 134 | type: File 135 | outputSource: b1_trim_and_map/A_output_sorted_bam 136 | 137 | b1_output_barcodecollapsese_metrics: 138 | type: File 139 | outputSource: b1_trim_and_map/X_output_barcodecollapsese_metrics 140 | 141 | b1_output_rmdup_sorted_bam: 142 | type: File 143 | outputSource: b1_trim_and_map/X_output_sorted_bam 144 | 145 | output_pos_bw: 146 | type: File 147 | outputSource: make_bigwigs/posbw 148 | output_neg_bw: 149 | type: File 150 | outputSource: make_bigwigs/negbw 151 | 152 | steps: 153 | 154 | ########################################################################### 155 | # Upstream 156 | ########################################################################### 157 | 158 | demultiplex: 159 | run: wf_demultiplex_se.cwl 160 | in: 161 | dataset: dataset 162 | read: read 163 | out: [ 164 | A_output_demuxed_read1, 165 | read_name, 166 | dataset_name 167 | ] 168 | 169 | b1_trim_and_map: 170 | run: wf_trim_and_map_se.cwl 171 | in: 172 | speciesGenomeDir: speciesGenomeDir 173 | repeatElementGenomeDir: repeatElementGenomeDir 174 | trimfirst_overlap_length: 175 | default: "1" 176 | trimagain_overlap_length: 177 | default: "5" 178 | a_adapters: 179 | source: read 180 | valueFrom: | 181 | ${ 182 | return self.adapters; 183 | } 184 | read1: demultiplex/A_output_demuxed_read1 185 | read_name: demultiplex/read_name 186 | dataset_name: demultiplex/dataset_name 187 | out: [ 188 | X_output_trim_first, 189 | X_output_trim_first_metrics, 190 | X_output_trim_first_fastqc_report, 191 | X_output_trim_first_fastqc_stats, 192 | X_output_trim_again, 193 | X_output_trim_again_metrics, 194 | X_output_trim_again_fastqc_report, 195 | X_output_trim_again_fastqc_stats, 196 | A_output_maprepeats_mapped_to_genome, 197 | A_output_maprepeats_stats, 198 | A_output_maprepeats_star_settings, 199 | A_output_sort_repunmapped_fastq, 200 | A_output_mapgenome_mapped_to_genome, 201 | A_output_mapgenome_stats, 202 | A_output_mapgenome_star_settings, 203 | A_output_sorted_bam, 204 | # A_output_sorted_bam_index, 205 | X_output_barcodecollapsese_bam, 206 | X_output_barcodecollapsese_metrics, 207 | X_output_sorted_bam 208 | ] 209 | 210 | 211 | ########################################################################### 212 | # Downstream (candidate for merging with main pipeline) 213 | ########################################################################### 214 | 215 | make_bigwigs: 216 | run: makebigwigfiles.cwl 217 | in: 218 | chromsizes: chrom_sizes 219 | bam: b1_trim_and_map/X_output_sorted_bam 220 | out: 221 | [posbw, negbw] 222 | -------------------------------------------------------------------------------- /cwl/wf_clipseqcore_trim_partial_se_1barcode.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### Workflow for handling reads containing one barcode ### 4 | ### Returns a bam file containing read2 only ### 5 | 6 | cwlVersion: v1.0 7 | class: Workflow 8 | 9 | requirements: 10 | - class: StepInputExpressionRequirement 11 | - class: SubworkflowFeatureRequirement 12 | - class: ScatterFeatureRequirement # TODO needed? 13 | - class: MultipleInputFeatureRequirement 14 | - class: InlineJavascriptRequirement 15 | 16 | #hints: 17 | # - class: ex:ScriptRequirement 18 | # scriptlines: 19 | # - "#!/bin/bash" 20 | 21 | 22 | inputs: 23 | dataset: 24 | type: string 25 | 26 | speciesGenomeDir: 27 | type: Directory 28 | 29 | repeatElementGenomeDir: 30 | type: Directory 31 | 32 | # TODO: remove, we don't use it here. 33 | species: 34 | type: string 35 | 36 | chrom_sizes: 37 | type: File 38 | 39 | # barcodesfasta: 40 | # type: File 41 | 42 | # randomer_length: 43 | # type: string 44 | 45 | read: 46 | type: 47 | type: record 48 | fields: 49 | read1: 50 | type: File 51 | # read2: 52 | # type: File 53 | adapters: 54 | type: File 55 | name: 56 | type: string 57 | 58 | # r2_bam: 59 | # type: string 60 | 61 | # output_bam: 62 | # type: string 63 | 64 | # adapters: 65 | # type: File 66 | 67 | ### Defaults ### 68 | 69 | # r2_bits: 70 | # type: int 71 | # default: 128 72 | # is_bam: 73 | # type: boolean 74 | # default: true 75 | 76 | outputs: 77 | 78 | b1_demuxed_fastq_r1: 79 | type: File 80 | outputSource: demultiplex/A_output_demuxed_read1 81 | # b1_demuxed_fastq_r2: 82 | # type: File 83 | # outputSource: demultiplex/A_output_demuxed_read2 84 | 85 | b1_trimx1_fastq: 86 | type: File[] 87 | outputSource: b1_trim_and_map/X_output_trim_first 88 | b1_trimx1_metrics: 89 | type: File 90 | outputSource: b1_trim_and_map/X_output_trim_first_metrics 91 | b1_trimx1_fastqc_report: 92 | type: File 93 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_report 94 | b1_trimx1_fastqc_stats: 95 | type: File 96 | outputSource: b1_trim_and_map/X_output_trim_first_fastqc_stats 97 | # b1_trimx2_fastq: 98 | # type: File[] 99 | # outputSource: b1_trim_and_map/X_output_trim_again 100 | # b1_trimx2_metrics: 101 | # type: File 102 | # outputSource: b1_trim_and_map/X_output_trim_again_metrics 103 | # b1_trimx2_fastqc_report: 104 | # type: File 105 | # outputSource: b1_trim_and_map/X_output_trim_again_fastqc_report 106 | # b1_trimx2_fastqc_stats: 107 | # type: File 108 | # outputSource: b1_trim_and_map/X_output_trim_again_fastqc_stats 109 | 110 | b1_maprepeats_mapped_to_genome: 111 | type: File 112 | outputSource: b1_trim_and_map/A_output_maprepeats_mapped_to_genome 113 | b1_maprepeats_stats: 114 | type: File 115 | outputSource: b1_trim_and_map/A_output_maprepeats_stats 116 | b1_maprepeats_star_settings: 117 | type: File 118 | outputSource: b1_trim_and_map/A_output_maprepeats_star_settings 119 | b1_sorted_unmapped_fastq: 120 | type: File 121 | outputSource: b1_trim_and_map/A_output_sort_repunmapped_fastq 122 | 123 | b1_mapgenome_mapped_to_genome: 124 | type: File 125 | outputSource: b1_trim_and_map/A_output_mapgenome_mapped_to_genome 126 | b1_mapgenome_stats: 127 | type: File 128 | outputSource: b1_trim_and_map/A_output_mapgenome_stats 129 | b1_mapgenome_star_settings: 130 | type: File 131 | outputSource: b1_trim_and_map/A_output_mapgenome_star_settings 132 | 133 | b1_output_pre_rmdup_sorted_bam: 134 | type: File 135 | outputSource: b1_trim_and_map/A_output_sorted_bam 136 | 137 | b1_output_barcodecollapsese_metrics: 138 | type: File 139 | outputSource: b1_trim_and_map/X_output_barcodecollapsese_metrics 140 | 141 | b1_output_rmdup_sorted_bam: 142 | type: File 143 | outputSource: b1_trim_and_map/X_output_sorted_bam 144 | 145 | output_pos_bw: 146 | type: File 147 | outputSource: make_bigwigs/posbw 148 | output_neg_bw: 149 | type: File 150 | outputSource: make_bigwigs/negbw 151 | 152 | steps: 153 | 154 | ########################################################################### 155 | # Upstream 156 | ########################################################################### 157 | 158 | demultiplex: 159 | run: wf_demultiplex_se.cwl 160 | in: 161 | dataset: dataset 162 | read: read 163 | out: [ 164 | A_output_demuxed_read1, 165 | read_name, 166 | dataset_name 167 | ] 168 | 169 | b1_trim_and_map: 170 | run: wf_trim_partial_and_map_se.cwl 171 | in: 172 | speciesGenomeDir: speciesGenomeDir 173 | repeatElementGenomeDir: repeatElementGenomeDir 174 | trimfirst_overlap_length: 175 | default: "1" 176 | trimagain_overlap_length: 177 | default: "5" 178 | a_adapters: 179 | source: read 180 | valueFrom: | 181 | ${ 182 | return self.adapters; 183 | } 184 | read1: demultiplex/A_output_demuxed_read1 185 | read_name: demultiplex/read_name 186 | dataset_name: demultiplex/dataset_name 187 | out: [ 188 | X_output_trim_first, 189 | X_output_trim_first_metrics, 190 | X_output_trim_first_fastqc_report, 191 | X_output_trim_first_fastqc_stats, 192 | A_output_maprepeats_mapped_to_genome, 193 | A_output_maprepeats_stats, 194 | A_output_maprepeats_star_settings, 195 | A_output_sort_repunmapped_fastq, 196 | A_output_mapgenome_mapped_to_genome, 197 | A_output_mapgenome_stats, 198 | A_output_mapgenome_star_settings, 199 | A_output_sorted_bam, 200 | # A_output_sorted_bam_index, 201 | X_output_barcodecollapsese_bam, 202 | X_output_barcodecollapsese_metrics, 203 | X_output_sorted_bam 204 | ] 205 | 206 | 207 | ########################################################################### 208 | # Downstream (candidate for merging with main pipeline) 209 | ########################################################################### 210 | 211 | make_bigwigs: 212 | run: makebigwigfiles.cwl 213 | in: 214 | chromsizes: chrom_sizes 215 | bam: b1_trim_and_map/X_output_sorted_bam 216 | out: 217 | [posbw, negbw] 218 | -------------------------------------------------------------------------------- /cwl/wf_demultiplex_pe.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | 4 | cwlVersion: v1.0 5 | class: Workflow 6 | 7 | requirements: 8 | - class: StepInputExpressionRequirement 9 | - class: SubworkflowFeatureRequirement 10 | - class: ScatterFeatureRequirement # TODO needed? 11 | - class: MultipleInputFeatureRequirement 12 | 13 | 14 | #hints: 15 | # - class: ex:ScriptRequirement 16 | # scriptlines: 17 | # - "#!/bin/bash" 18 | 19 | 20 | inputs: 21 | dataset: 22 | type: string 23 | randomer_length: 24 | type: string 25 | barcodesfasta: 26 | type: File 27 | 28 | read: 29 | type: 30 | type: record 31 | fields: 32 | read1: 33 | type: File 34 | read2: 35 | type: File 36 | barcodeids: 37 | type: string[] 38 | name: 39 | type: string 40 | outputs: 41 | 42 | ### DEMUXED FILES ### 43 | A_output_demuxed_read1: 44 | type: File 45 | outputSource: AB_demux/demuxedAfwd 46 | A_output_demuxed_read2: 47 | type: File 48 | outputSource: AB_demux/demuxedArev 49 | B_output_demuxed_read1: 50 | type: File 51 | outputSource: AB_demux/demuxedBfwd 52 | B_output_demuxed_read2: 53 | type: File 54 | outputSource: AB_demux/demuxedBrev 55 | 56 | ### TRIM/CUTADAPT PARAMS ### 57 | AB_output_trimfirst_overlap_length: 58 | type: File 59 | outputSource: AB_parsebarcodes/trimfirst_overlap_length 60 | AB_output_trimagain_overlap_length: 61 | type: File 62 | outputSource: AB_parsebarcodes/trimagain_overlap_length 63 | AB_g_adapters_default: 64 | type: File 65 | outputSource: AB_parsebarcodes/g_adapters_default 66 | AB_a_adapters_default: 67 | type: File 68 | outputSource: AB_parsebarcodes/a_adapters_default 69 | AB_g_adapters: 70 | type: File 71 | outputSource: AB_parsebarcodes/g_adapters 72 | AB_a_adapters: 73 | type: File 74 | outputSource: AB_parsebarcodes/a_adapters 75 | AB_A_adapters: 76 | type: File 77 | outputSource: AB_parsebarcodes/A_adapters 78 | 79 | 80 | steps: 81 | 82 | ########################################################################### 83 | # Upstream 84 | ########################################################################### 85 | AB_demux: 86 | run: demux_pe.cwl 87 | in: 88 | barcodesfasta: barcodesfasta 89 | randomer_length: randomer_length 90 | dataset: dataset 91 | # seqdatapath: seqdatapath 92 | reads: read 93 | out: [demuxedAfwd, demuxedArev, 94 | demuxedBfwd, demuxedBrev, 95 | output_demuxedpairedend_metrics, 96 | output_dataset, 97 | name, 98 | barcodeidA, 99 | barcodeidB 100 | ] 101 | 102 | AB_parsebarcodes: 103 | run: parsebarcodes.cwl 104 | in: 105 | randomer_length: randomer_length 106 | barcodeidA: AB_demux/barcodeidA 107 | barcodeidB: AB_demux/barcodeidB 108 | barcodesfasta: barcodesfasta 109 | out: [ 110 | trimfirst_overlap_length, trimagain_overlap_length, 111 | g_adapters_default, a_adapters_default, 112 | g_adapters, a_adapters, A_adapters 113 | ] 114 | 115 | ########################################################################### 116 | # Downstream 117 | ########################################################################### 118 | 119 | -------------------------------------------------------------------------------- /cwl/wf_demultiplex_se.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### This is kind of a worthless workflow, ### 4 | ### but to keep consistent with the paired-end ### 5 | ### pipeline, I'm keeping it here. ### 6 | 7 | cwlVersion: v1.0 8 | class: Workflow 9 | 10 | requirements: 11 | - class: StepInputExpressionRequirement 12 | - class: SubworkflowFeatureRequirement 13 | - class: ScatterFeatureRequirement # TODO needed? 14 | - class: MultipleInputFeatureRequirement 15 | 16 | 17 | #hints: 18 | # - class: ex:ScriptRequirement 19 | # scriptlines: 20 | # - "#!/bin/bash" 21 | 22 | 23 | inputs: 24 | dataset: 25 | type: string 26 | # randomer_length: 27 | # type: string 28 | # barcodesfasta: 29 | # type: File 30 | 31 | read: 32 | type: 33 | type: record 34 | fields: 35 | read1: 36 | type: File 37 | # barcodeids: 38 | # type: string[] 39 | name: 40 | type: string 41 | outputs: 42 | 43 | ### DEMUXED FILES ### 44 | A_output_demuxed_read1: 45 | type: File 46 | outputSource: gzip_demux/gzipped 47 | read_name: 48 | type: string 49 | outputSource: AB_demux/name 50 | dataset_name: 51 | type: string 52 | outputSource: AB_demux/output_dataset 53 | ### TRIM/CUTADAPT PARAMS ### 54 | 55 | 56 | steps: 57 | 58 | ########################################################################### 59 | # Upstream 60 | ########################################################################### 61 | AB_demux: 62 | run: demux_se.cwl 63 | in: 64 | reads: read 65 | dataset: dataset 66 | out: [ 67 | demuxedAfwd, 68 | output_demuxedsingleend_metrics, 69 | output_dataset, 70 | name 71 | ] 72 | 73 | ########################################################################### 74 | # Downstream 75 | ########################################################################### 76 | gzip_demux: 77 | run: gzip.cwl 78 | in: 79 | input: AB_demux/demuxedAfwd 80 | out: 81 | - gzipped 82 | 83 | doc: | 84 | This workflow takes in single-end reads, and performs the following steps in order: 85 | demux_se.cwl (does not actually demux for single end, but mirrors the paired-end processing protocol) 86 | -------------------------------------------------------------------------------- /cwl/wf_fastqc.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### Fastqc annoyingly does not allow customized output filenames, so we need to re-name each so they dont overlap each other. 4 | 5 | cwlVersion: v1.0 6 | class: Workflow 7 | 8 | requirements: 9 | - class: InlineJavascriptRequirement 10 | - class: StepInputExpressionRequirement 11 | - class: SubworkflowFeatureRequirement 12 | 13 | inputs: 14 | reads: 15 | type: File 16 | 17 | outputs: 18 | output_qc_report: 19 | type: File 20 | outputSource: step_rename_report/outfile 21 | output_qc_stats: 22 | type: File 23 | outputSource: step_rename_stats/outfile 24 | 25 | 26 | steps: 27 | 28 | ########################################################################### 29 | # Upstream 30 | ########################################################################### 31 | step_fastqc: 32 | run: fastqc.cwl 33 | in: 34 | reads: reads 35 | out: [ 36 | output_qc_report, 37 | output_qc_stats 38 | ] 39 | 40 | ########################################################################### 41 | # Downstream 42 | ########################################################################### 43 | step_rename_report: 44 | run: rename.cwl 45 | in: 46 | srcfile: step_fastqc/output_qc_report 47 | suffix: 48 | default: ".html" 49 | newname: 50 | source: reads 51 | valueFrom: ${ return self.nameroot + ".fastqc_report"; } 52 | out: [ 53 | outfile 54 | ] 55 | step_rename_stats: 56 | run: rename.cwl 57 | in: 58 | srcfile: step_fastqc/output_qc_stats 59 | suffix: 60 | default: ".txt" 61 | newname: 62 | source: reads 63 | valueFrom: ${ return self.nameroot + ".fastqc_data"; } 64 | out: [ 65 | outfile 66 | ] 67 | 68 | doc: | 69 | This workflow takes in single-end reads, and performs the following steps in order: 70 | demux_se.cwl (does not actually demux for single end, but mirrors the paired-end processing protocol) 71 | -------------------------------------------------------------------------------- /cwl/wf_trim_partial_and_map_se_scatter.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwltool 2 | 3 | ### This sub workflow should be identical to wf_trim_and_map_se.cwl except that it runs cutadapt only once. ### 4 | 5 | cwlVersion: v1.0 6 | class: Workflow 7 | 8 | requirements: 9 | - class: InlineJavascriptRequirement 10 | - class: StepInputExpressionRequirement 11 | - class: SubworkflowFeatureRequirement 12 | - class: ScatterFeatureRequirement # TODO needed? 13 | - class: MultipleInputFeatureRequirement 14 | 15 | 16 | #hints: 17 | # - class: ex:ScriptRequirement 18 | # scriptlines: 19 | # - "#!/bin/bash" 20 | 21 | 22 | inputs: 23 | speciesGenomeDir: 24 | type: Directory 25 | repeatElementGenomeDir: 26 | type: Directory 27 | trimfirst_overlap_length: 28 | type: string 29 | trimagain_overlap_length: 30 | type: string 31 | # g_adapters: 32 | # type: File 33 | # g_adapters_default: 34 | # type: File 35 | a_adapters: 36 | type: File 37 | # a_adapters_default: 38 | # type: File 39 | # A_adapters: 40 | # type: File 41 | read1s: 42 | type: File[] 43 | read_name: 44 | type: string 45 | dataset_name: 46 | type: string 47 | 48 | ## Defaults (don't change unless we have a very good reason) ## 49 | 50 | sort_names: 51 | type: boolean 52 | default: true 53 | trim_times: 54 | type: string 55 | default: "1" 56 | trim_error_rate: 57 | type: string 58 | default: "0.1" 59 | 60 | fastq_suffix: 61 | type: string 62 | default: ".fq" 63 | bam_suffix: 64 | type: string 65 | default: ".bam" 66 | 67 | hard_trim_length: 68 | type: int 69 | default: -9 70 | 71 | outputs: 72 | 73 | X_output_trim_first: 74 | type: 75 | type: array 76 | items: 77 | type: array 78 | items: File 79 | outputSource: step_wf_trim_partial_and_map/X_output_trim_first 80 | X_output_trim_first_metrics: 81 | type: File[] 82 | outputSource: step_wf_trim_partial_and_map/X_output_trim_first_metrics 83 | 84 | A_output_maprepeats_mapped_to_genome: 85 | type: File[] 86 | outputSource: step_wf_trim_partial_and_map/A_output_maprepeats_mapped_to_genome 87 | A_output_maprepeats_stats: 88 | type: File[] 89 | outputSource: step_wf_trim_partial_and_map/A_output_maprepeats_stats 90 | A_output_maprepeats_star_settings: 91 | type: File[] 92 | outputSource: step_wf_trim_partial_and_map/A_output_maprepeats_star_settings 93 | A_output_sort_repunmapped_fastq: 94 | type: File[] 95 | outputSource: step_wf_trim_partial_and_map/A_output_sort_repunmapped_fastq 96 | 97 | A_output_mapgenome_mapped_to_genome: 98 | type: File[] 99 | outputSource: step_wf_trim_partial_and_map/A_output_mapgenome_mapped_to_genome 100 | A_output_mapgenome_stats: 101 | type: File[] 102 | outputSource: step_wf_trim_partial_and_map/A_output_mapgenome_stats 103 | A_output_mapgenome_star_settings: 104 | type: File[] 105 | outputSource: step_wf_trim_partial_and_map/A_output_mapgenome_star_settings 106 | A_output_sorted_bam: 107 | type: File[] 108 | outputSource: step_wf_trim_partial_and_map/A_output_sorted_bam 109 | 110 | X_output_barcodecollapsese_bam: 111 | type: File[] 112 | outputSource: step_wf_trim_partial_and_map/X_output_barcodecollapsese_bam 113 | # X_output_barcodecollapsese_metrics: 114 | # type: File[] 115 | # outputSource: step_wf_trim_partial_and_map/X_output_barcodecollapsese_metrics 116 | 117 | X_output_sorted_bam: 118 | type: File[] 119 | outputSource: step_wf_trim_partial_and_map/X_output_sorted_bam 120 | 121 | steps: 122 | 123 | step_wf_trim_partial_and_map: 124 | run: wf_trim_partial_and_map_se.cwl 125 | scatter: read1 126 | in: 127 | read1: read1s 128 | read_name: read_name 129 | dataset_name: dataset_name 130 | speciesGenomeDir: speciesGenomeDir 131 | repeatElementGenomeDir: repeatElementGenomeDir 132 | trimfirst_overlap_length: trimfirst_overlap_length 133 | trimagain_overlap_length: trimagain_overlap_length 134 | a_adapters: a_adapters 135 | out: 136 | - X_output_trim_first 137 | - X_output_trim_first_metrics 138 | - A_output_maprepeats_mapped_to_genome 139 | - A_output_maprepeats_stats 140 | - A_output_maprepeats_star_settings 141 | - A_output_sort_repunmapped_fastq 142 | - A_output_mapgenome_mapped_to_genome 143 | - A_output_mapgenome_stats 144 | - A_output_mapgenome_star_settings 145 | - A_output_sorted_bam 146 | - X_output_barcodecollapsese_bam 147 | - X_output_sorted_bam 148 | # - X_output_barcodecollapsese_metrics 149 | -------------------------------------------------------------------------------- /documentation/Repeat_mapping.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/Repeat_mapping.pdf -------------------------------------------------------------------------------- /documentation/Reproducible_peaks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/Reproducible_peaks.pdf -------------------------------------------------------------------------------- /documentation/Zero_to_peaks.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/Zero_to_peaks.pdf -------------------------------------------------------------------------------- /documentation/eCLIP_analysisSOP_v2.0.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/eCLIP_analysisSOP_v2.0.pdf -------------------------------------------------------------------------------- /documentation/eCLIP_analysisSOP_v2.2.1.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/eCLIP_analysisSOP_v2.2.1.docx -------------------------------------------------------------------------------- /documentation/eCLIP_analysisSOP_v2.2.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/eCLIP_analysisSOP_v2.2.docx -------------------------------------------------------------------------------- /documentation/eCLIP_single_end_analysisSOP_v1.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/documentation/eCLIP_single_end_analysisSOP_v1.docx -------------------------------------------------------------------------------- /eCLIP-flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/eCLIP-flowchart.png -------------------------------------------------------------------------------- /example/inputs/ENCFF039QTN.bed: -------------------------------------------------------------------------------- 1 | chr1 185958734 185958753 skyscraper 0 + 2 | chr22 38051278 38051282 skyscraper 0 + 3 | chr22 38049865 38049880 skyscraper 0 + 4 | chr10 27220229 27220248 skyscraper 0 - 5 | chr8 38586651 38586671 skyscraper 0 + 6 | chr5 179602225 179602245 skyscraper 0 - 7 | chr3 49940108 49940127 skyscraper 0 - 8 | chr12 54378855 54378873 skyscraper 0 + 9 | chr13 24308537 24308555 skyscraper 0 - 10 | chr6 163123394 163123414 skyscraper 0 - 11 | chr9 127533506 127533527 skyscraper 0 + 12 | chr13 74014620 74014645 skyscraper 0 - 13 | chr2 219824456 219824478 skyscraper 0 + 14 | chr11 62019670 62019688 skyscraper 0 - 15 | chr7 39663474 39663497 skyscraper 0 + 16 | chr1 75210277 75210295 skyscraper 0 + 17 | chr9 77113739 77113772 skyscraper 0 + 18 | chr17 8090536 8090553 skyscraper 0 + 19 | chr19 48112258 48112284 skyscraper 0 + 20 | chr1 156186229 156186266 skyscraper 0 + 21 | chr19 36631938 36631974 skyscraper 0 + 22 | chr9 45727242 45727291 skyscraper 0 + 23 | chr1 237766308 237766764 rRNA pseudogene 0 + 24 | chr1 91852785 91853147 rRNA pseudogene 0 - 25 | chr10 68805210 68805496 rRNA pseudogene 0 - 26 | chr11 77597473 77597831 rRNA pseudogene 0 + 27 | chr12 20704357 20704522 rRNA pseudogene 0 + 28 | chr16 47538629 47539297 rRNA pseudogene 0 + 29 | chr17 22023344 22023500 rRNA pseudogene 0 + 30 | chr19 22877614 22877696 rRNA pseudogene 0 - 31 | chr19 43911665 43912167 rRNA pseudogene 0 + 32 | chr2 133011919 133013768 rRNA pseudogene 0 - 33 | chr2 230045487 230045734 rRNA pseudogene 0 - 34 | chr22 22210544 22210651 rRNA pseudogene 0 + 35 | chr22 22210670 22210856 rRNA pseudogene 0 + 36 | chr4 7584186 7584364 rRNA pseudogene 0 + 37 | chr6 133593944 133594166 rRNA pseudogene 0 + 38 | chr8 70602248 70602620 rRNA pseudogene 0 - 39 | chr6 31958260 31958278 skyscraper 0 + 40 | chr15 96826149 96826167 skyscraper 0 - 41 | chr6 10887836 10887855 skyscraper 0 + 42 | chr14 102706662 102706680 skyscraper 0 - 43 | chr14 102700509 102700527 skyscraper 0 - 44 | chr3 160122388 160122416 skyscraper 0 + 45 | chr7 101460733 101460756 skyscraper 0 + 46 | chr10 72577390 72577408 skyscraper 0 + 47 | chrM 10007 10047 skyscraper 0 + 48 | chrM 1616 1656 skyscraper 0 + 49 | chrX 117415458 117415476 skyscraper 0 + 50 | chr11 32165332 32165361 skyscraper 0 - 51 | chr4 13544063 13544089 skyscraper 0 - 52 | chr20 25840928 25840996 unreliably mapped satellite repeat 0 - 53 | chr20 25844275 25844365 unreliably mapped satellite repeat 0 - 54 | chr20 25846823 25846961 unreliably mapped satellite repeat 0 - 55 | chr20 25848666 25848731 unreliably mapped satellite repeat 0 - 56 | chr20 25848505 25848565 unreliably mapped satellite repeat 0 - 57 | chr14 103988910 103988945 low complexity skyscraper 0 - 58 | -------------------------------------------------------------------------------- /example/inputs/ENCFF269URO.bed: -------------------------------------------------------------------------------- 1 | chr1 185989602 185989621 - - + 2 | chr22 37655271 37655275 - - + 3 | chr22 37653858 37653873 - - + 4 | chr10 26931300 26931319 - - - 5 | chr8 38729133 38729153 - - + 6 | chr5 180175225 180175245 - - - 7 | chr3 49902675 49902694 - - - 8 | chr12 53985071 53985089 - - + 9 | chr13 23734398 23734416 - - - 10 | chr6 162702362 162702382 - - - 11 | chr9 124771227 124771248 - - + 12 | chr13 73440483 73440508 - - - 13 | chr2 218959734 218959756 - - + 14 | chr11 62252198 62252216 - - - 15 | chr7 39623875 39623898 - - + 16 | chr1 74744593 74744611 - - + 17 | chr9 74498823 74498856 - - + 18 | chr17 8187218 8187235 - - + 19 | chr19 47609001 47609027 - - + 20 | chr1 156216438 156216475 - - + 21 | chr19 36141036 36141072 - - + 22 | chr1 237603008 237603464 - - + 23 | chr1 91387228 91387590 - - - 24 | chr10 67045452 67045738 - - - 25 | chr11 77886427 77886785 - - + 26 | chr12 20551423 20551588 - - + 27 | chr16 47504718 47505386 - - + 28 | chr17 22524018 22524174 - - + 29 | chr19 22694812 22694894 - - - 30 | chr19 43407513 43408015 - - + 31 | chr2 132254346 132256195 - - - 32 | chr2 229180771 229181018 - - - 33 | chr22 21856255 21856362 - - + 34 | chr22 21856381 21856567 - - + 35 | chr4 7582459 7582637 - - + 36 | chr6 133272806 133273028 - - + 37 | chr8 69690013 69690385 - - - 38 | chr6 31990483 31990501 - - + 39 | chr15 96282920 96282938 - - - 40 | chr6 10887603 10887622 - - + 41 | chr14 102240325 102240343 - - - 42 | chr14 102234172 102234190 - - - 43 | chr3 160404600 160404628 - - + 44 | chr7 101817453 101817476 - - + 45 | chr10 70817634 70817652 - - + 46 | chrM 10006 10046 - - + 47 | chrM 1614 1654 - - + 48 | chrX 118281495 118281513 - - + 49 | chr11 32143786 32143815 - - - 50 | chr4 13542439 13542465 - - - 51 | chr20 25860292 25860360 - - - 52 | chr20 25863639 25863729 - - - 53 | chr20 25866187 25866325 - - - 54 | chr20 25868030 25868095 - - - 55 | chr20 25867869 25867929 - - - 56 | chr14 103522573 103522608 - - - 57 | -------------------------------------------------------------------------------- /example/inputs/InvRNA1_adapters.fasta: -------------------------------------------------------------------------------- 1 | >InvRNA1_0 2 | NNAGCGCTAGAGATC 3 | >InvRNA1_1 4 | NAGCGCTAGAGATCG 5 | >InvRNA1_2 6 | AGCGCTAGAGATCGG 7 | >InvRNA1_3 8 | GCGCTAGAGATCGGA 9 | >InvRNA1_4 10 | CGCTAGAGATCGGAA 11 | >InvRNA1_5 12 | GCTAGAGATCGGAAG 13 | >InvRNA1_6 14 | CTAGAGATCGGAAGA 15 | >InvRNA1_7 16 | TAGAGATCGGAAGAG 17 | >InvRNA1_8 18 | AGAGATCGGAAGAGC 19 | >InvRNA1_9 20 | GAGATCGGAAGAGCA 21 | >InvRNA1_10 22 | AGATCGGAAGAGCAC 23 | >InvRNA1_11 24 | GATCGGAAGAGCACA 25 | >InvRNA1_12 26 | ATCGGAAGAGCACAC 27 | >InvRNA1_13 28 | TCGGAAGAGCACACG 29 | >InvRNA1_14 30 | CGGAAGAGCACACGT 31 | >InvRNA1_15 32 | GGAAGAGCACACGTC 33 | >InvRNA1_16 34 | GAAGAGCACACGTCT 35 | >InvRNA1_17 36 | AAGAGCACACGTCTG 37 | >InvRNA1_18 38 | AGAGCACACGTCTGA 39 | >InvRNA1_19 40 | GAGCACACGTCTGAA 41 | >InvRNA1_20 42 | AGCACACGTCTGAAC 43 | >InvRNA1_21 44 | GCACACGTCTGAACT 45 | >InvRNA1_22 46 | CACACGTCTGAACTC 47 | >InvRNA1_23 48 | ACACGTCTGAACTCC 49 | >InvRNA1_24 50 | CACGTCTGAACTCCA 51 | >InvRNA1_25 52 | ACGTCTGAACTCCAG 53 | >InvRNA1_26 54 | CGTCTGAACTCCAGT 55 | >InvRNA1_27 56 | GTCTGAACTCCAGTC 57 | >InvRNA1_28 58 | TCTGAACTCCAGTCA 59 | >InvRNA1_29 60 | CTGAACTCCAGTCAC 61 | -------------------------------------------------------------------------------- /example/inputs/InvRNA2_adapters.fasta: -------------------------------------------------------------------------------- 1 | >InvRNA2_0 2 | NNGATATCGAAGATC 3 | >InvRNA2_1 4 | NGATATCGAAGATCG 5 | >InvRNA2_2 6 | GATATCGAAGATCGG 7 | >InvRNA2_3 8 | ATATCGAAGATCGGA 9 | >InvRNA2_4 10 | TATCGAAGATCGGAA 11 | >InvRNA2_5 12 | ATCGAAGATCGGAAG 13 | >InvRNA2_6 14 | TCGAAGATCGGAAGA 15 | >InvRNA2_7 16 | CGAAGATCGGAAGAG 17 | >InvRNA2_8 18 | GAAGATCGGAAGAGC 19 | >InvRNA2_9 20 | AAGATCGGAAGAGCA 21 | >InvRNA2_10 22 | AGATCGGAAGAGCAC 23 | >InvRNA2_11 24 | GATCGGAAGAGCACA 25 | >InvRNA2_12 26 | ATCGGAAGAGCACAC 27 | >InvRNA2_13 28 | TCGGAAGAGCACACG 29 | >InvRNA2_14 30 | CGGAAGAGCACACGT 31 | >InvRNA2_15 32 | GGAAGAGCACACGTC 33 | >InvRNA2_16 34 | GAAGAGCACACGTCT 35 | >InvRNA2_17 36 | AAGAGCACACGTCTG 37 | >InvRNA2_18 38 | AGAGCACACGTCTGA 39 | >InvRNA2_19 40 | GAGCACACGTCTGAA 41 | >InvRNA2_20 42 | AGCACACGTCTGAAC 43 | >InvRNA2_21 44 | GCACACGTCTGAACT 45 | >InvRNA2_22 46 | CACACGTCTGAACTC 47 | >InvRNA2_23 48 | ACACGTCTGAACTCC 49 | >InvRNA2_24 50 | CACGTCTGAACTCCA 51 | >InvRNA2_25 52 | ACGTCTGAACTCCAG 53 | >InvRNA2_26 54 | CGTCTGAACTCCAGT 55 | >InvRNA2_27 56 | GTCTGAACTCCAGTC 57 | >InvRNA2_28 58 | TCTGAACTCCAGTCA 59 | >InvRNA2_29 60 | CTGAACTCCAGTCAC 61 | -------------------------------------------------------------------------------- /example/inputs/InvRNA3_adapters.fasta: -------------------------------------------------------------------------------- 1 | >InvRNA3_0 2 | NNCGCAGACGAGATC 3 | >InvRNA3_1 4 | NCGCAGACGAGATCG 5 | >InvRNA3_2 6 | CGCAGACGAGATCGG 7 | >InvRNA3_3 8 | GCAGACGAGATCGGA 9 | >InvRNA3_4 10 | CAGACGAGATCGGAA 11 | >InvRNA3_5 12 | AGACGAGATCGGAAG 13 | >InvRNA3_6 14 | GACGAGATCGGAAGA 15 | >InvRNA3_7 16 | ACGAGATCGGAAGAG 17 | >InvRNA3_8 18 | CGAGATCGGAAGAGC 19 | >InvRNA3_9 20 | GAGATCGGAAGAGCA 21 | >InvRNA3_10 22 | AGATCGGAAGAGCAC 23 | >InvRNA3_11 24 | GATCGGAAGAGCACA 25 | >InvRNA3_12 26 | ATCGGAAGAGCACAC 27 | >InvRNA3_13 28 | TCGGAAGAGCACACG 29 | >InvRNA3_14 30 | CGGAAGAGCACACGT 31 | >InvRNA3_15 32 | GGAAGAGCACACGTC 33 | >InvRNA3_16 34 | GAAGAGCACACGTCT 35 | >InvRNA3_17 36 | AAGAGCACACGTCTG 37 | >InvRNA3_18 38 | AGAGCACACGTCTGA 39 | >InvRNA3_19 40 | GAGCACACGTCTGAA 41 | >InvRNA3_20 42 | AGCACACGTCTGAAC 43 | >InvRNA3_21 44 | GCACACGTCTGAACT 45 | >InvRNA3_22 46 | CACACGTCTGAACTC 47 | >InvRNA3_23 48 | ACACGTCTGAACTCC 49 | >InvRNA3_24 50 | CACGTCTGAACTCCA 51 | >InvRNA3_25 52 | ACGTCTGAACTCCAG 53 | >InvRNA3_26 54 | CGTCTGAACTCCAGT 55 | >InvRNA3_27 56 | GTCTGAACTCCAGTC 57 | >InvRNA3_28 58 | TCTGAACTCCAGTCA 59 | >InvRNA3_29 60 | CTGAACTCCAGTCAC 61 | -------------------------------------------------------------------------------- /example/inputs/InvRNA4_adapters.fasta: -------------------------------------------------------------------------------- 1 | >InvRNA4_0 2 | NNTATGAGTAAGATC 3 | >InvRNA4_1 4 | NTATGAGTAAGATCG 5 | >InvRNA4_2 6 | TATGAGTAAGATCGG 7 | >InvRNA4_3 8 | ATGAGTAAGATCGGA 9 | >InvRNA4_4 10 | TGAGTAAGATCGGAA 11 | >InvRNA4_5 12 | GAGTAAGATCGGAAG 13 | >InvRNA4_6 14 | AGTAAGATCGGAAGA 15 | >InvRNA4_7 16 | GTAAGATCGGAAGAG 17 | >InvRNA4_8 18 | TAAGATCGGAAGAGC 19 | >InvRNA4_9 20 | AAGATCGGAAGAGCA 21 | >InvRNA4_10 22 | AGATCGGAAGAGCAC 23 | >InvRNA4_11 24 | GATCGGAAGAGCACA 25 | >InvRNA4_12 26 | ATCGGAAGAGCACAC 27 | >InvRNA4_13 28 | TCGGAAGAGCACACG 29 | >InvRNA4_14 30 | CGGAAGAGCACACGT 31 | >InvRNA4_15 32 | GGAAGAGCACACGTC 33 | >InvRNA4_16 34 | GAAGAGCACACGTCT 35 | >InvRNA4_17 36 | AAGAGCACACGTCTG 37 | >InvRNA4_18 38 | AGAGCACACGTCTGA 39 | >InvRNA4_19 40 | GAGCACACGTCTGAA 41 | >InvRNA4_20 42 | AGCACACGTCTGAAC 43 | >InvRNA4_21 44 | GCACACGTCTGAACT 45 | >InvRNA4_22 46 | CACACGTCTGAACTC 47 | >InvRNA4_23 48 | ACACGTCTGAACTCC 49 | >InvRNA4_24 50 | CACGTCTGAACTCCA 51 | >InvRNA4_25 52 | ACGTCTGAACTCCAG 53 | >InvRNA4_26 54 | CGTCTGAACTCCAGT 55 | >InvRNA4_27 56 | GTCTGAACTCCAGTC 57 | >InvRNA4_28 58 | TCTGAACTCCAGTCA 59 | >InvRNA4_29 60 | CTGAACTCCAGTCAC 61 | -------------------------------------------------------------------------------- /example/inputs/InvRNA5_adapters.fasta: -------------------------------------------------------------------------------- 1 | >InvRNA5_0 2 | NNAGGTGCGTAGATC 3 | >InvRNA5_1 4 | NAGGTGCGTAGATCG 5 | >InvRNA5_2 6 | AGGTGCGTAGATCGG 7 | >InvRNA5_3 8 | GGTGCGTAGATCGGA 9 | >InvRNA5_4 10 | GTGCGTAGATCGGAA 11 | >InvRNA5_5 12 | TGCGTAGATCGGAAG 13 | >InvRNA5_6 14 | GCGTAGATCGGAAGA 15 | >InvRNA5_7 16 | CGTAGATCGGAAGAG 17 | >InvRNA5_8 18 | GTAGATCGGAAGAGC 19 | >InvRNA5_9 20 | TAGATCGGAAGAGCA 21 | >InvRNA5_10 22 | AGATCGGAAGAGCAC 23 | >InvRNA5_11 24 | GATCGGAAGAGCACA 25 | >InvRNA5_12 26 | ATCGGAAGAGCACAC 27 | >InvRNA5_13 28 | TCGGAAGAGCACACG 29 | >InvRNA5_14 30 | CGGAAGAGCACACGT 31 | >InvRNA5_15 32 | GGAAGAGCACACGTC 33 | >InvRNA5_16 34 | GAAGAGCACACGTCT 35 | >InvRNA5_17 36 | AAGAGCACACGTCTG 37 | >InvRNA5_18 38 | AGAGCACACGTCTGA 39 | >InvRNA5_19 40 | GAGCACACGTCTGAA 41 | >InvRNA5_20 42 | AGCACACGTCTGAAC 43 | >InvRNA5_21 44 | GCACACGTCTGAACT 45 | >InvRNA5_22 46 | CACACGTCTGAACTC 47 | >InvRNA5_23 48 | ACACGTCTGAACTCC 49 | >InvRNA5_24 50 | CACGTCTGAACTCCA 51 | >InvRNA5_25 52 | ACGTCTGAACTCCAG 53 | >InvRNA5_26 54 | CGTCTGAACTCCAGT 55 | >InvRNA5_27 56 | GTCTGAACTCCAGTC 57 | >InvRNA5_28 58 | TCTGAACTCCAGTCA 59 | >InvRNA5_29 60 | CTGAACTCCAGTCAC 61 | -------------------------------------------------------------------------------- /example/inputs/InvRNA6_adapters.fasta: -------------------------------------------------------------------------------- 1 | >InvRNA6_0 2 | NNGAACATACAGATC 3 | >InvRNA6_1 4 | NGAACATACAGATCG 5 | >InvRNA6_2 6 | GAACATACAGATCGG 7 | >InvRNA6_3 8 | AACATACAGATCGGA 9 | >InvRNA6_4 10 | ACATACAGATCGGAA 11 | >InvRNA6_5 12 | CATACAGATCGGAAG 13 | >InvRNA6_6 14 | ATACAGATCGGAAGA 15 | >InvRNA6_7 16 | TACAGATCGGAAGAG 17 | >InvRNA6_8 18 | ACAGATCGGAAGAGC 19 | >InvRNA6_9 20 | CAGATCGGAAGAGCA 21 | >InvRNA6_10 22 | AGATCGGAAGAGCAC 23 | >InvRNA6_11 24 | GATCGGAAGAGCACA 25 | >InvRNA6_12 26 | ATCGGAAGAGCACAC 27 | >InvRNA6_13 28 | TCGGAAGAGCACACG 29 | >InvRNA6_14 30 | CGGAAGAGCACACGT 31 | >InvRNA6_15 32 | GGAAGAGCACACGTC 33 | >InvRNA6_16 34 | GAAGAGCACACGTCT 35 | >InvRNA6_17 36 | AAGAGCACACGTCTG 37 | >InvRNA6_18 38 | AGAGCACACGTCTGA 39 | >InvRNA6_19 40 | GAGCACACGTCTGAA 41 | >InvRNA6_20 42 | AGCACACGTCTGAAC 43 | >InvRNA6_21 44 | GCACACGTCTGAACT 45 | >InvRNA6_22 46 | CACACGTCTGAACTC 47 | >InvRNA6_23 48 | ACACGTCTGAACTCC 49 | >InvRNA6_24 50 | CACGTCTGAACTCCA 51 | >InvRNA6_25 52 | ACGTCTGAACTCCAG 53 | >InvRNA6_26 54 | CGTCTGAACTCCAGT 55 | >InvRNA6_27 56 | GTCTGAACTCCAGTC 57 | >InvRNA6_28 58 | TCTGAACTCCAGTCA 59 | >InvRNA6_29 60 | CTGAACTCCAGTCAC 61 | -------------------------------------------------------------------------------- /example/inputs/InvRNA7_adapters.fasta: -------------------------------------------------------------------------------- 1 | >InvRNA7_0 2 | NNACATAGCGAGATC 3 | >InvRNA7_1 4 | NACATAGCGAGATCG 5 | >InvRNA7_2 6 | ACATAGCGAGATCGG 7 | >InvRNA7_3 8 | CATAGCGAGATCGGA 9 | >InvRNA7_4 10 | ATAGCGAGATCGGAA 11 | >InvRNA7_5 12 | TAGCGAGATCGGAAG 13 | >InvRNA7_6 14 | AGCGAGATCGGAAGA 15 | >InvRNA7_7 16 | GCGAGATCGGAAGAG 17 | >InvRNA7_8 18 | CGAGATCGGAAGAGC 19 | >InvRNA7_9 20 | GAGATCGGAAGAGCA 21 | >InvRNA7_10 22 | AGATCGGAAGAGCAC 23 | >InvRNA7_11 24 | GATCGGAAGAGCACA 25 | >InvRNA7_12 26 | ATCGGAAGAGCACAC 27 | >InvRNA7_13 28 | TCGGAAGAGCACACG 29 | >InvRNA7_14 30 | CGGAAGAGCACACGT 31 | >InvRNA7_15 32 | GGAAGAGCACACGTC 33 | >InvRNA7_16 34 | GAAGAGCACACGTCT 35 | >InvRNA7_17 36 | AAGAGCACACGTCTG 37 | >InvRNA7_18 38 | AGAGCACACGTCTGA 39 | >InvRNA7_19 40 | GAGCACACGTCTGAA 41 | >InvRNA7_20 42 | AGCACACGTCTGAAC 43 | >InvRNA7_21 44 | GCACACGTCTGAACT 45 | >InvRNA7_22 46 | CACACGTCTGAACTC 47 | >InvRNA7_23 48 | ACACGTCTGAACTCC 49 | >InvRNA7_24 50 | CACGTCTGAACTCCA 51 | >InvRNA7_25 52 | ACGTCTGAACTCCAG 53 | >InvRNA7_26 54 | CGTCTGAACTCCAGT 55 | >InvRNA7_27 56 | GTCTGAACTCCAGTC 57 | >InvRNA7_28 58 | TCTGAACTCCAGTCA 59 | >InvRNA7_29 60 | CTGAACTCCAGTCAC 61 | -------------------------------------------------------------------------------- /example/inputs/InvRNA8_adapters.fasta: -------------------------------------------------------------------------------- 1 | >InvRNA8_0 2 | NNGTGCGATAAGATC 3 | >InvRNA8_1 4 | NGTGCGATAAGATCG 5 | >InvRNA8_2 6 | GTGCGATAAGATCGG 7 | >InvRNA8_3 8 | TGCGATAAGATCGGA 9 | >InvRNA8_4 10 | GCGATAAGATCGGAA 11 | >InvRNA8_5 12 | CGATAAGATCGGAAG 13 | >InvRNA8_6 14 | GATAAGATCGGAAGA 15 | >InvRNA8_7 16 | ATAAGATCGGAAGAG 17 | >InvRNA8_8 18 | TAAGATCGGAAGAGC 19 | >InvRNA8_9 20 | AAGATCGGAAGAGCA 21 | >InvRNA8_10 22 | AGATCGGAAGAGCAC 23 | >InvRNA8_11 24 | GATCGGAAGAGCACA 25 | >InvRNA8_12 26 | ATCGGAAGAGCACAC 27 | >InvRNA8_13 28 | TCGGAAGAGCACACG 29 | >InvRNA8_14 30 | CGGAAGAGCACACGT 31 | >InvRNA8_15 32 | GGAAGAGCACACGTC 33 | >InvRNA8_16 34 | GAAGAGCACACGTCT 35 | >InvRNA8_17 36 | AAGAGCACACGTCTG 37 | >InvRNA8_18 38 | AGAGCACACGTCTGA 39 | >InvRNA8_19 40 | GAGCACACGTCTGAA 41 | >InvRNA8_20 42 | AGCACACGTCTGAAC 43 | >InvRNA8_21 44 | GCACACGTCTGAACT 45 | >InvRNA8_22 46 | CACACGTCTGAACTC 47 | >InvRNA8_23 48 | ACACGTCTGAACTCC 49 | >InvRNA8_24 50 | CACGTCTGAACTCCA 51 | >InvRNA8_25 52 | ACGTCTGAACTCCAG 53 | >InvRNA8_26 54 | CGTCTGAACTCCAGT 55 | >InvRNA8_27 56 | GTCTGAACTCCAGTC 57 | >InvRNA8_28 58 | TCTGAACTCCAGTCA 59 | >InvRNA8_29 60 | CTGAACTCCAGTCAC 61 | -------------------------------------------------------------------------------- /example/inputs/InvRil19_adapters.yaml: -------------------------------------------------------------------------------- 1 | >ril19_1 2 | AGATCGGAAGAGCAC 3 | >ril19_2 4 | GATCGGAAGAGCACA 5 | >ril19_3 6 | ATCGGAAGAGCACAC 7 | >ril19_4 8 | TCGGAAGAGCACACG 9 | >ril19_5 10 | CGGAAGAGCACACGT 11 | >ril19_6 12 | GGAAGAGCACACGTC 13 | >ril19_7 14 | GAAGAGCACACGTCT 15 | >ril19_8 16 | AAGAGCACACGTCTG 17 | >ril19_9 18 | AGAGCACACGTCTGA 19 | >ril19_10 20 | GAGCACACGTCTGAA 21 | >ril19_11 22 | AGCACACGTCTGAAC 23 | >ril19_12 24 | GCACACGTCTGAACT 25 | >ril19_13 26 | CACACGTCTGAACTC 27 | >ril19_14 28 | ACACGTCTGAACTCC 29 | >ril19_15 30 | CACGTCTGAACTCCA 31 | >ril19_16 32 | ACGTCTGAACTCCAG 33 | >ril19_17 34 | CGTCTGAACTCCAGT 35 | >ril19_18 36 | GTCTGAACTCCAGTC 37 | >ril19_19 38 | TCTGAACTCCAGTCA 39 | >ril19_20 40 | CTGAACTCCAGTCAC 41 | 42 | -------------------------------------------------------------------------------- /example/inputs/example_fastqs/chrom19kbp550_clip1_r1.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_clip1_r1.fastq.gz -------------------------------------------------------------------------------- /example/inputs/example_fastqs/chrom19kbp550_clip1_r2.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_clip1_r2.fastq.gz -------------------------------------------------------------------------------- /example/inputs/example_fastqs/chrom19kbp550_clip2_r1.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_clip2_r1.fastq.gz -------------------------------------------------------------------------------- /example/inputs/example_fastqs/chrom19kbp550_clip2_r2.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_clip2_r2.fastq.gz -------------------------------------------------------------------------------- /example/inputs/example_fastqs/chrom19kbp550_input_r1.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_input_r1.fastq.gz -------------------------------------------------------------------------------- /example/inputs/example_fastqs/chrom19kbp550_input_r2.fastq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/example_fastqs/chrom19kbp550_input_r2.fastq.gz -------------------------------------------------------------------------------- /example/inputs/hg113seqs_repbase_starindex/SA: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/hg113seqs_repbase_starindex/SA -------------------------------------------------------------------------------- /example/inputs/hg113seqs_repbase_starindex/SAindex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/hg113seqs_repbase_starindex/SAindex -------------------------------------------------------------------------------- /example/inputs/hg113seqs_repbase_starindex/chrLength.txt: -------------------------------------------------------------------------------- 1 | 80 2 | 87 3 | 81 4 | 84 5 | 3036 6 | 129 7 | 378 8 | 968 9 | 586 10 | 5035 11 | 273 12 | 254 13 | 507 14 | 3565 15 | 387 16 | 287 17 | 798 18 | 519 19 | 930 20 | 1084 21 | 353 22 | 611 23 | 335 24 | 586 25 | 357 26 | 510 27 | 83 28 | 76 29 | 77 30 | 75 31 | 74 32 | 76 33 | 75 34 | 81 35 | 78 36 | 1869 37 | 85 38 | 85 39 | 1464 40 | 1871 41 | 1902 42 | 1995 43 | 2090 44 | 92 45 | 1803 46 | 2137 47 | 86 48 | 791 49 | 75 50 | 3788 51 | 75 52 | 77 53 | 76 54 | 76 55 | 1804 56 | 1798 57 | 1647 58 | 3798 59 | 990 60 | 75 61 | 86 62 | 86 63 | 3375 64 | 3509 65 | 77 66 | 1834 67 | 85 68 | 2690 69 | 73 70 | 85 71 | 85 72 | 89 73 | 85 74 | 3241 75 | 3655 76 | 3900 77 | 85 78 | 75 79 | 92 80 | 75 81 | 85 82 | 75 83 | 94 84 | 586 85 | 81 86 | 75 87 | 77 88 | 75 89 | 76 90 | 77 91 | 76 92 | 75 93 | 76 94 | 77 95 | 77 96 | 75 97 | 76 98 | 76 99 | 76 100 | 1332 101 | 1332 102 | 83 103 | 471 104 | 254 105 | 259 106 | 896 107 | 198 108 | 198 109 | 712 110 | 198 111 | 595 112 | 712 113 | 712 114 | -------------------------------------------------------------------------------- /example/inputs/hg113seqs_repbase_starindex/chrName.txt: -------------------------------------------------------------------------------- 1 | tRNAGlyGGC_CB 2 | tRNAAlaAGC_CB 3 | tRNAIleATT_CB 4 | tRNAThr_CB 5 | 5SrRNA-1_BG 6 | 5SrRNA_AN 7 | tRNASAT-1_ZM 8 | BAGY2_HV_MRNA 9 | MARNA 10 | LSU-rRNA_Hsa 11 | Talud 12 | Talua 13 | Dorna1cons 14 | LSU-rRNA_Mfr 15 | Taluc 16 | Talub 17 | MARINERNA10_MD 18 | MARINERNA3_MD 19 | MARINERNA7_MD 20 | MARINERNA8_MD 21 | MARINERNA4_MD 22 | MARINERNA9_MD 23 | MARINERNA1_ME 24 | MARNA 25 | MARINERNA1_MD 26 | MARINERNA12_MD 27 | RRNA45 28 | TRNA_ALA 29 | TRNA_ASN 30 | TRNA_GLU 31 | TRNA_GLY 32 | TRNA_VAL 33 | tRNA-His-CAY_ 34 | tRNAGlnTTG_CB 35 | tRNA-Leu-TTA(m) 36 | SSU-rRNA_Hsa 37 | tRNA-Ser-TCG 38 | tRNA-Ser-TCA_ 39 | SSU-rRNA_Giardia 40 | SSU-rRNA_Ddi 41 | SSU-rRNA_Ath 42 | SSU-rRNA_Dme 43 | SSU-rRNA_Pfa 44 | tRNALeuCTT_CB 45 | SSU-rRNA_Lvi 46 | SSU-rRNA_Lma 47 | tRNA-Leu-TTG 48 | LSU-rRNA_Tps 49 | tRNA-Gln-CAG 50 | LSU-rRNA_Pfa 51 | tRNA-Gln-CAA 52 | tRNA-Arg-CGY 53 | tRNA-Tyr-TAC 54 | tRNA-Tyr-TAT 55 | SSU-rRNA_Tps 56 | SSU-rRNA_Sce 57 | SSU-rRNA_Cel 58 | LSU-rRNA_Sce 59 | LSU-rRNA_Ldo 60 | tRNA-Pro-CCA 61 | tRNA-Leu-TTA 62 | tRNA-Leu-CTG 63 | LSU-rRNA_Ath 64 | LSU-rRNA_Cel 65 | tRNA-Ile-ATA 66 | SSU-rRNA_Sme 67 | tRNA-Leu-CTA_ 68 | LSU-rRNA_Giardia 69 | tRNA-Ser-TCA(m) 70 | tRNA-Leu-CTY 71 | tRNA-Leu-CTA 72 | tRNA-SeC(e)-TGA 73 | tRNA-Ser-TCY 74 | LSU-rRNA_Ddi 75 | LSU-rRNA_Hca 76 | LSU-rRNA_Dme 77 | tRNA-Ser-AGY 78 | tRNA-His-CAY 79 | tRNASerTCT_CB 80 | tRNA-Gln-CAA_ 81 | tRNA-Ser-TCA 82 | tRNA-Pro-CCG 83 | 4.5SRNA 84 | MARNA 85 | RNALUIII 86 | tRNA-Ala-GCA 87 | tRNA-Thr-ACG_ 88 | tRNA-Asp-GAY 89 | tRNA-Lys-AAG 90 | tRNA-Ile-ATC 91 | tRNA-Val-GTA 92 | tRNA-Ala-GCY_ 93 | tRNA-Met 94 | tRNA-Ile-ATT 95 | tRNA-Thr-ACY_ 96 | tRNA-Met-i 97 | tRNA-Arg-CGA 98 | tRNA-Met_ 99 | tRNA-Arg-CGG 100 | TN10MRNA_NA 101 | TN10MRNA_NA 102 | RRNA45 103 | MARNA 104 | Talua 105 | Talua 106 | MARINERNA11_MD 107 | MARINERNA6_MD 108 | MARINERNA6_MD 109 | MARINERNA6A_MD 110 | MARINERNA6_MD 111 | MARINERNA5_MD 112 | MARINERNA6A_MD 113 | MARINERNA6A_MD 114 | -------------------------------------------------------------------------------- /example/inputs/hg113seqs_repbase_starindex/chrNameLength.txt: -------------------------------------------------------------------------------- 1 | tRNAGlyGGC_CB 80 2 | tRNAAlaAGC_CB 87 3 | tRNAIleATT_CB 81 4 | tRNAThr_CB 84 5 | 5SrRNA-1_BG 3036 6 | 5SrRNA_AN 129 7 | tRNASAT-1_ZM 378 8 | BAGY2_HV_MRNA 968 9 | MARNA 586 10 | LSU-rRNA_Hsa 5035 11 | Talud 273 12 | Talua 254 13 | Dorna1cons 507 14 | LSU-rRNA_Mfr 3565 15 | Taluc 387 16 | Talub 287 17 | MARINERNA10_MD 798 18 | MARINERNA3_MD 519 19 | MARINERNA7_MD 930 20 | MARINERNA8_MD 1084 21 | MARINERNA4_MD 353 22 | MARINERNA9_MD 611 23 | MARINERNA1_ME 335 24 | MARNA 586 25 | MARINERNA1_MD 357 26 | MARINERNA12_MD 510 27 | RRNA45 83 28 | TRNA_ALA 76 29 | TRNA_ASN 77 30 | TRNA_GLU 75 31 | TRNA_GLY 74 32 | TRNA_VAL 76 33 | tRNA-His-CAY_ 75 34 | tRNAGlnTTG_CB 81 35 | tRNA-Leu-TTA(m) 78 36 | SSU-rRNA_Hsa 1869 37 | tRNA-Ser-TCG 85 38 | tRNA-Ser-TCA_ 85 39 | SSU-rRNA_Giardia 1464 40 | SSU-rRNA_Ddi 1871 41 | SSU-rRNA_Ath 1902 42 | SSU-rRNA_Dme 1995 43 | SSU-rRNA_Pfa 2090 44 | tRNALeuCTT_CB 92 45 | SSU-rRNA_Lvi 1803 46 | SSU-rRNA_Lma 2137 47 | tRNA-Leu-TTG 86 48 | LSU-rRNA_Tps 791 49 | tRNA-Gln-CAG 75 50 | LSU-rRNA_Pfa 3788 51 | tRNA-Gln-CAA 75 52 | tRNA-Arg-CGY 77 53 | tRNA-Tyr-TAC 76 54 | tRNA-Tyr-TAT 76 55 | SSU-rRNA_Tps 1804 56 | SSU-rRNA_Sce 1798 57 | SSU-rRNA_Cel 1647 58 | LSU-rRNA_Sce 3798 59 | LSU-rRNA_Ldo 990 60 | tRNA-Pro-CCA 75 61 | tRNA-Leu-TTA 86 62 | tRNA-Leu-CTG 86 63 | LSU-rRNA_Ath 3375 64 | LSU-rRNA_Cel 3509 65 | tRNA-Ile-ATA 77 66 | SSU-rRNA_Sme 1834 67 | tRNA-Leu-CTA_ 85 68 | LSU-rRNA_Giardia 2690 69 | tRNA-Ser-TCA(m) 73 70 | tRNA-Leu-CTY 85 71 | tRNA-Leu-CTA 85 72 | tRNA-SeC(e)-TGA 89 73 | tRNA-Ser-TCY 85 74 | LSU-rRNA_Ddi 3241 75 | LSU-rRNA_Hca 3655 76 | LSU-rRNA_Dme 3900 77 | tRNA-Ser-AGY 85 78 | tRNA-His-CAY 75 79 | tRNASerTCT_CB 92 80 | tRNA-Gln-CAA_ 75 81 | tRNA-Ser-TCA 85 82 | tRNA-Pro-CCG 75 83 | 4.5SRNA 94 84 | MARNA 586 85 | RNALUIII 81 86 | tRNA-Ala-GCA 75 87 | tRNA-Thr-ACG_ 77 88 | tRNA-Asp-GAY 75 89 | tRNA-Lys-AAG 76 90 | tRNA-Ile-ATC 77 91 | tRNA-Val-GTA 76 92 | tRNA-Ala-GCY_ 75 93 | tRNA-Met 76 94 | tRNA-Ile-ATT 77 95 | tRNA-Thr-ACY_ 77 96 | tRNA-Met-i 75 97 | tRNA-Arg-CGA 76 98 | tRNA-Met_ 76 99 | tRNA-Arg-CGG 76 100 | TN10MRNA_NA 1332 101 | TN10MRNA_NA 1332 102 | RRNA45 83 103 | MARNA 471 104 | Talua 254 105 | Talua 259 106 | MARINERNA11_MD 896 107 | MARINERNA6_MD 198 108 | MARINERNA6_MD 198 109 | MARINERNA6A_MD 712 110 | MARINERNA6_MD 198 111 | MARINERNA5_MD 595 112 | MARINERNA6A_MD 712 113 | MARINERNA6A_MD 712 114 | -------------------------------------------------------------------------------- /example/inputs/hg113seqs_repbase_starindex/chrStart.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 262144 3 | 524288 4 | 786432 5 | 1048576 6 | 1310720 7 | 1572864 8 | 1835008 9 | 2097152 10 | 2359296 11 | 2621440 12 | 2883584 13 | 3145728 14 | 3407872 15 | 3670016 16 | 3932160 17 | 4194304 18 | 4456448 19 | 4718592 20 | 4980736 21 | 5242880 22 | 5505024 23 | 5767168 24 | 6029312 25 | 6291456 26 | 6553600 27 | 6815744 28 | 7077888 29 | 7340032 30 | 7602176 31 | 7864320 32 | 8126464 33 | 8388608 34 | 8650752 35 | 8912896 36 | 9175040 37 | 9437184 38 | 9699328 39 | 9961472 40 | 10223616 41 | 10485760 42 | 10747904 43 | 11010048 44 | 11272192 45 | 11534336 46 | 11796480 47 | 12058624 48 | 12320768 49 | 12582912 50 | 12845056 51 | 13107200 52 | 13369344 53 | 13631488 54 | 13893632 55 | 14155776 56 | 14417920 57 | 14680064 58 | 14942208 59 | 15204352 60 | 15466496 61 | 15728640 62 | 15990784 63 | 16252928 64 | 16515072 65 | 16777216 66 | 17039360 67 | 17301504 68 | 17563648 69 | 17825792 70 | 18087936 71 | 18350080 72 | 18612224 73 | 18874368 74 | 19136512 75 | 19398656 76 | 19660800 77 | 19922944 78 | 20185088 79 | 20447232 80 | 20709376 81 | 20971520 82 | 21233664 83 | 21495808 84 | 21757952 85 | 22020096 86 | 22282240 87 | 22544384 88 | 22806528 89 | 23068672 90 | 23330816 91 | 23592960 92 | 23855104 93 | 24117248 94 | 24379392 95 | 24641536 96 | 24903680 97 | 25165824 98 | 25427968 99 | 25690112 100 | 25952256 101 | 26214400 102 | 26476544 103 | 26738688 104 | 27000832 105 | 27262976 106 | 27525120 107 | 27787264 108 | 28049408 109 | 28311552 110 | 28573696 111 | 28835840 112 | 29097984 113 | 29360128 114 | 29622272 115 | -------------------------------------------------------------------------------- /example/inputs/hg113seqs_repbase_starindex/genomeParameters.txt: -------------------------------------------------------------------------------- 1 | versionGenome 20201 2 | genomeFastaFiles small_repelements.fa 3 | genomeSAindexNbases 8 4 | genomeChrBinNbits 18 5 | genomeSAsparseD 1 6 | sjdbOverhang 0 7 | sjdbFileChrStartEnd - 8 | sjdbGTFfile - 9 | sjdbGTFchrPrefix - 10 | sjdbGTFfeatureExon exon 11 | sjdbGTFtagExonParentTranscript transcript_id 12 | sjdbGTFtagExonParentGene gene_id 13 | -------------------------------------------------------------------------------- /example/inputs/hg19.chrom.sizes: -------------------------------------------------------------------------------- 1 | chr1 249250621 2 | chr2 243199373 3 | chr3 198022430 4 | chr4 191154276 5 | chr5 180915260 6 | chr6 171115067 7 | chr7 159138663 8 | chrX 155270560 9 | chr8 146364022 10 | chr9 141213431 11 | chr10 135534747 12 | chr11 135006516 13 | chr12 133851895 14 | chr13 115169878 15 | chr14 107349540 16 | chr15 102531392 17 | chr16 90354753 18 | chr17 81195210 19 | chr18 78077248 20 | chr20 63025520 21 | chrY 59373566 22 | chr19 59128983 23 | chr22 51304566 24 | chr21 48129895 25 | chr6_ssto_hap7 4928567 26 | chr6_mcf_hap5 4833398 27 | chr6_cox_hap2 4795371 28 | chr6_mann_hap4 4683263 29 | chr6_apd_hap1 4622290 30 | chr6_qbl_hap6 4611984 31 | chr6_dbb_hap3 4610396 32 | chr17_ctg5_hap1 1680828 33 | chr4_ctg9_hap1 590426 34 | chr1_gl000192_random 547496 35 | chrUn_gl000225 211173 36 | chr4_gl000194_random 191469 37 | chr4_gl000193_random 189789 38 | chr9_gl000200_random 187035 39 | chrUn_gl000222 186861 40 | chrUn_gl000212 186858 41 | chr7_gl000195_random 182896 42 | chrUn_gl000223 180455 43 | chrUn_gl000224 179693 44 | chrUn_gl000219 179198 45 | chr17_gl000205_random 174588 46 | chrUn_gl000215 172545 47 | chrUn_gl000216 172294 48 | chrUn_gl000217 172149 49 | chr9_gl000199_random 169874 50 | chrUn_gl000211 166566 51 | chrUn_gl000213 164239 52 | chrUn_gl000220 161802 53 | chrUn_gl000218 161147 54 | chr19_gl000209_random 159169 55 | chrUn_gl000221 155397 56 | chrUn_gl000214 137718 57 | chrUn_gl000228 129120 58 | chrUn_gl000227 128374 59 | chr1_gl000191_random 106433 60 | chr19_gl000208_random 92689 61 | chr9_gl000198_random 90085 62 | chr17_gl000204_random 81310 63 | chrUn_gl000233 45941 64 | chrUn_gl000237 45867 65 | chrUn_gl000230 43691 66 | chrUn_gl000242 43523 67 | chrUn_gl000243 43341 68 | chrUn_gl000241 42152 69 | chrUn_gl000236 41934 70 | chrUn_gl000240 41933 71 | chr17_gl000206_random 41001 72 | chrUn_gl000232 40652 73 | chrUn_gl000234 40531 74 | chr11_gl000202_random 40103 75 | chrUn_gl000238 39939 76 | chrUn_gl000244 39929 77 | chrUn_gl000248 39786 78 | chr8_gl000196_random 38914 79 | chrUn_gl000249 38502 80 | chrUn_gl000246 38154 81 | chr17_gl000203_random 37498 82 | chr8_gl000197_random 37175 83 | chrUn_gl000245 36651 84 | chrUn_gl000247 36422 85 | chr9_gl000201_random 36148 86 | chrUn_gl000235 34474 87 | chrUn_gl000239 33824 88 | chr21_gl000210_random 27682 89 | chrUn_gl000231 27386 90 | chrUn_gl000229 19913 91 | chrM 16571 92 | chrUn_gl000226 15008 93 | chr18_gl000207_random 4262 94 | -------------------------------------------------------------------------------- /example/inputs/hg19chr19.chrom.sizes: -------------------------------------------------------------------------------- 1 | chr19 59128983 2 | -------------------------------------------------------------------------------- /example/inputs/hg19chr19kbp550_starindex/SA: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/hg19chr19kbp550_starindex/SA -------------------------------------------------------------------------------- /example/inputs/hg19chr19kbp550_starindex/SAindex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YeoLab/eCLIP/c0fffc4979a92371dc0667a03e3d957bf7f77600/example/inputs/hg19chr19kbp550_starindex/SAindex -------------------------------------------------------------------------------- /example/inputs/hg19chr19kbp550_starindex/chrLength.txt: -------------------------------------------------------------------------------- 1 | 550000 2 | -------------------------------------------------------------------------------- /example/inputs/hg19chr19kbp550_starindex/chrName.txt: -------------------------------------------------------------------------------- 1 | chr19 2 | -------------------------------------------------------------------------------- /example/inputs/hg19chr19kbp550_starindex/chrNameLength.txt: -------------------------------------------------------------------------------- 1 | chr19 550000 2 | -------------------------------------------------------------------------------- /example/inputs/hg19chr19kbp550_starindex/chrStart.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 786432 3 | -------------------------------------------------------------------------------- /example/inputs/hg19chr19kbp550_starindex/genomeParameters.txt: -------------------------------------------------------------------------------- 1 | ### STAR --runMode genomeGenerate --runThreadN 8 --genomeDir chr19_550000bases_index --genomeFastaFiles chr19_550000bases.fa --genomeSAindexNbases 9 2 | versionGenome 20201 3 | genomeFastaFiles chr19_550000bases.fa 4 | genomeSAindexNbases 9 5 | genomeChrBinNbits 18 6 | genomeSAsparseD 1 7 | sjdbOverhang 0 8 | sjdbFileChrStartEnd - 9 | sjdbGTFfile - 10 | sjdbGTFchrPrefix - 11 | sjdbGTFfeatureExon exon 12 | sjdbGTFtagExonParentTranscript transcript_id 13 | sjdbGTFtagExonParentGene gene_id 14 | -------------------------------------------------------------------------------- /example/inputs/yeolabbarcodes_20170101.fasta: -------------------------------------------------------------------------------- 1 | >A01 2 | AAGCAAT 3 | >A03 4 | ATGACCNNNNT 5 | >A04 6 | CAGCTTNNNNT 7 | >B06 8 | GGCTTGT 9 | >C01 10 | ACAAGTT 11 | >D8f 12 | TGGTCCT 13 | >F05 14 | GGATACNNNNT 15 | >G07 16 | TCCTGTNNNNT 17 | >X1A 18 | NNNNNCCTATAT 19 | >X1B 20 | NNNNNTGCTATT 21 | >X2A 22 | NNNNNTATACTT 23 | >X2B 24 | NNNNNATCTTCT 25 | 26 | -------------------------------------------------------------------------------- /example/paired_end_clip.yaml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env eCLIP_pairedend_singlenode 2 | 3 | dataset: "204_01" 4 | 5 | speciesGenomeDir: 6 | class: Directory 7 | path: /home/centos/refs/STAR 8 | 9 | repeatElementGenomeDir: 10 | class: Directory 11 | path: /home/centos/refs/STAR_repeat/ 12 | 13 | species: hg19 14 | 15 | chrom_sizes: 16 | class: File 17 | path: /home/centos/refs/STAR/chrNameLength.txt 18 | 19 | barcodesfasta: 20 | class: File 21 | path: /home/centos/eclip/example/inputs/yeolabbarcodes_20170101.fasta 22 | 23 | randomer_length: "5" 24 | 25 | samples: 26 | - 27 | - ip_read: 28 | name: rep1_clip 29 | barcodeids: [A01, B06] 30 | read1: 31 | class: File 32 | path: /home/centos/fastqs/RBFOX2-204-CLIP_S1_R1.fastq.gz 33 | read2: 34 | class: File 35 | path: /home/centos/fastqs/RBFOX2-204-CLIP_S1_R2.fastq.gz 36 | 37 | - input_read: 38 | name: rep1_input 39 | barcodeids: [NIL, NIL] 40 | read1: 41 | class: File 42 | path: /home/centos/fastqs/RBFOX2-204-INPUT_S2_R1.fastq.gz 43 | read2: 44 | class: File 45 | path: /home/centos/fastqs/RBFOX2-204-INPUT_S2_R2.fastq.gz 46 | - 47 | - ip_read: 48 | name: rep2_clip 49 | barcodeids: [C01, D8f] 50 | read1: 51 | class: File 52 | path: /home/centos/fastqs/RBFOX2-204-CLIP_S1_R1.fastq.gz 53 | read2: 54 | class: File 55 | path: /home/centos/fastqs/RBFOX2-204-CLIP_S1_R2.fastq.gz 56 | 57 | - input_read: 58 | name: rep2_input 59 | barcodeids: [NIL, NIL] 60 | read1: 61 | class: File 62 | path: /home/centos/fastqs/RBFOX2-204-INPUT_S2_R1.fastq.gz 63 | read2: 64 | class: File 65 | path: /home/centos/fastqs/RBFOX2-204-INPUT_S2_R2.fastq.gz 66 | 67 | -------------------------------------------------------------------------------- /example/single_end_clip.yaml: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env eCLIP_singleend_singlenode 2 | 3 | dataset: ENCODE4 4 | 5 | species: hg19 6 | 7 | chrom_sizes: 8 | class: File 9 | path: /home/centos/refs/STAR/chrNameLength.txt 10 | 11 | speciesGenomeDir: 12 | class: Directory 13 | path: /home/centos/refs/STAR 14 | 15 | repeatElementGenomeDir: 16 | class: Directory 17 | path: /home/centos/refs/STAR_repeat/ 18 | 19 | samples: 20 | - 21 | - ip_read: 22 | name: IP 23 | read1: 24 | class: File 25 | path: /home/centos/seRBFOX2/INV_IP_B_S58_L005_R1_001.fastq.gz 26 | adapters: 27 | class: File 28 | path: /home/centos/eclip/example/inputs/InvRil19_adapters.yaml 29 | 30 | - input_read: 31 | name: INPUT 32 | read1: 33 | class: File 34 | path: /home/centos/seRBFOX2/INV_IN_B_S57_L005_R1_001.fastq.gz 35 | adapters: 36 | class: File 37 | path: /home/centos/eclip/example/inputs/InvRil19_adapters.yaml 38 | 39 | blacklist_file: 40 | class: File 41 | path: /home/centos/eclip/example/inputs/ENCFF039QTN.bed 42 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/01_umi_tools_extract/run_demux_se.sh: -------------------------------------------------------------------------------- 1 | # umi_tools 1.0.0 2 | 3 | umi_tools extract \ 4 | --random-seed 1 \ 5 | --bc-pattern NNNNNNNNNN \ 6 | --stdin inputs/seRBFOX2/INV_IP_B_S58_L005_R1_001.fastq.gz \ 7 | --stdout rep1.IP.umi.r1.fq.gz \ 8 | --log rep1.IP.---.--.metrics 9 | 10 | umi_tools extract \ 11 | --random-seed 1 \ 12 | --bc-pattern NNNNNNNNNN \ 13 | --stdin inputs/seRBFOX2/INV_IN_B_S57_L006_R1_001.fastq.gz \ 14 | --stdout rep1.IN.umi.r1.fq.gz \ 15 | --log rep1.IN.---.--.metrics 16 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/02_cutadapt_round1/run_cutadapt.sh: -------------------------------------------------------------------------------- 1 | # cutadapt 1.14 2 | 3 | cutadapt -O 1 \ 4 | -f fastq \ 5 | --match-read-wildcards \ 6 | --times 1 \ 7 | -e 0.1 \ 8 | --quality-cutoff 6 \ 9 | -m 18 \ 10 | -o rep1.IP.umi.r1.fqTr.fq.gz \ 11 | -a AGATCGGAAGAGCAC \ 12 | -a GATCGGAAGAGCACA \ 13 | -a ATCGGAAGAGCACAC \ 14 | -a TCGGAAGAGCACACG \ 15 | -a CGGAAGAGCACACGT \ 16 | -a GGAAGAGCACACGTC \ 17 | -a GAAGAGCACACGTCT \ 18 | -a AAGAGCACACGTCTG \ 19 | -a AGAGCACACGTCTGA \ 20 | -a GAGCACACGTCTGAA \ 21 | -a AGCACACGTCTGAAC \ 22 | -a GCACACGTCTGAACT \ 23 | -a CACACGTCTGAACTC \ 24 | -a ACACGTCTGAACTCC \ 25 | -a CACGTCTGAACTCCA \ 26 | -a ACGTCTGAACTCCAG \ 27 | -a CGTCTGAACTCCAGT \ 28 | -a GTCTGAACTCCAGTC \ 29 | -a TCTGAACTCCAGTCA \ 30 | -a CTGAACTCCAGTCAC \ 31 | ../01_umi_tools_extract/rep1.IP.umi.r1.fq.gz > rep1.IP.umi.r1.fqTr.metrics 32 | 33 | cutadapt -O 1 \ 34 | -f fastq \ 35 | --match-read-wildcards \ 36 | --times 1 \ 37 | -e 0.1 \ 38 | --quality-cutoff 6 \ 39 | -m 18 \ 40 | -o rep1.IN.umi.r1.fqTr.fq.gz \ 41 | -a AGATCGGAAGAGCAC \ 42 | -a GATCGGAAGAGCACA \ 43 | -a ATCGGAAGAGCACAC \ 44 | -a TCGGAAGAGCACACG \ 45 | -a CGGAAGAGCACACGT \ 46 | -a GGAAGAGCACACGTC \ 47 | -a GAAGAGCACACGTCT \ 48 | -a AAGAGCACACGTCTG \ 49 | -a AGAGCACACGTCTGA \ 50 | -a GAGCACACGTCTGAA \ 51 | -a AGCACACGTCTGAAC \ 52 | -a GCACACGTCTGAACT \ 53 | -a CACACGTCTGAACTC \ 54 | -a ACACGTCTGAACTCC \ 55 | -a CACGTCTGAACTCCA \ 56 | -a ACGTCTGAACTCCAG \ 57 | -a CGTCTGAACTCCAGT \ 58 | -a GTCTGAACTCCAGTC \ 59 | -a TCTGAACTCCAGTCA \ 60 | -a CTGAACTCCAGTCAC \ 61 | ../01_umi_tools_extract/rep1.IN.umi.r1.fq.gz > rep1.IN.umi.r1.fqTr.metrics 62 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/03_cutadapt_round2/run_cutadapt.sh: -------------------------------------------------------------------------------- 1 | # cutadapt 1.14 2 | 3 | cutadapt \ 4 | -O 5 \ 5 | -f fastq \ 6 | --match-read-wildcards \ 7 | --times 1 \ 8 | -e 0.1 \ 9 | --quality-cutoff 6 \ 10 | -m 18 \ 11 | -o rep1.IP.umi.r1.fqTrTr.fq.gz \ 12 | -a AGATCGGAAGAGCAC \ 13 | -a GATCGGAAGAGCACA \ 14 | -a ATCGGAAGAGCACAC \ 15 | -a TCGGAAGAGCACACG \ 16 | -a CGGAAGAGCACACGT \ 17 | -a GGAAGAGCACACGTC \ 18 | -a GAAGAGCACACGTCT \ 19 | -a AAGAGCACACGTCTG \ 20 | -a AGAGCACACGTCTGA \ 21 | -a GAGCACACGTCTGAA \ 22 | -a AGCACACGTCTGAAC \ 23 | -a GCACACGTCTGAACT \ 24 | -a CACACGTCTGAACTC \ 25 | -a ACACGTCTGAACTCC \ 26 | -a CACGTCTGAACTCCA \ 27 | -a ACGTCTGAACTCCAG \ 28 | -a CGTCTGAACTCCAGT \ 29 | -a GTCTGAACTCCAGTC \ 30 | -a TCTGAACTCCAGTCA \ 31 | -a CTGAACTCCAGTCAC \ 32 | ../02_cutadapt_round1/rep1.IP.umi.r1.fqTr.fq.gz > rep1.IP.umi.r1.fqTrTr.metrics 33 | 34 | cutadapt \ 35 | -O 5 \ 36 | -f fastq \ 37 | --match-read-wildcards \ 38 | --times 1 \ 39 | -e 0.1 \ 40 | --quality-cutoff 6 \ 41 | -m 18 \ 42 | -o rep1.IN.umi.r1.fqTrTr.fq.gz \ 43 | -a AGATCGGAAGAGCAC \ 44 | -a GATCGGAAGAGCACA \ 45 | -a ATCGGAAGAGCACAC \ 46 | -a TCGGAAGAGCACACG \ 47 | -a CGGAAGAGCACACGT \ 48 | -a GGAAGAGCACACGTC \ 49 | -a GAAGAGCACACGTCT \ 50 | -a AAGAGCACACGTCTG \ 51 | -a AGAGCACACGTCTGA \ 52 | -a GAGCACACGTCTGAA \ 53 | -a AGCACACGTCTGAAC \ 54 | -a GCACACGTCTGAACT \ 55 | -a CACACGTCTGAACTC \ 56 | -a ACACGTCTGAACTCC \ 57 | -a CACGTCTGAACTCCA \ 58 | -a ACGTCTGAACTCCAG \ 59 | -a CGTCTGAACTCCAGT \ 60 | -a GTCTGAACTCCAGTC \ 61 | -a TCTGAACTCCAGTCA \ 62 | -a CTGAACTCCAGTCAC \ 63 | ../02_cutadapt_round1/rep1.IN.umi.r1.fqTr.fq.gz > rep1.IN.umi.r1.fqTrTr.metrics 64 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/04_fastq_sort/run_fastq-sort.sh: -------------------------------------------------------------------------------- 1 | # fastqtools 0.8 2 | 3 | echo $(date +%x,%r) > TIMES.txt; 4 | zcat ../03_cutadapt_round2/rep1.IP.umi.r1.fqTrTr.fq.gz > rep1.IP.umi.r1.fqTrTr.fq 5 | echo $(date +%x,%r) >> TIMES.txt; 6 | fastq-sort --id rep1.IP.umi.r1.fqTrTr.fq > rep1.IP.umi.r1.fqTrTr.sorted.fq 7 | echo $(date +%x,%r) >> TIMES.txt; 8 | 9 | echo $(date +%x,%r) >> TIMES.txt; 10 | zcat ../03_cutadapt_round2/rep1.IN.umi.r1.fqTrTr.fq.gz > rep1.IN.umi.r1.fqTrTr.fq 11 | echo $(date +%x,%r) >> TIMES.txt; 12 | fastq-sort --id rep1.IN.umi.r1.fqTrTr.fq > rep1.IN.umi.r1.fqTrTr.sorted.fq 13 | echo $(date +%x,%r) >> TIMES.txt; 14 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/05_star_repeat/run_star.sh: -------------------------------------------------------------------------------- 1 | # STAR 2.7.6a 2 | echo $(date +%x,%r) > TIMES.txt; 3 | STAR \ 4 | --alignEndsType EndToEnd \ 5 | --genomeDir repbase_STARindex \ 6 | --genomeLoad NoSharedMemory \ 7 | --outBAMcompression 10 \ 8 | --outFileNamePrefix rep1.IP.umi.r1.fqTrTr.sorted.STAR \ 9 | --outFilterMultimapNmax 30 \ 10 | --outFilterMultimapScoreRange 1 \ 11 | --outFilterScoreMin 10 \ 12 | --outFilterType BySJout \ 13 | --outReadsUnmapped Fastx \ 14 | --outSAMattrRGline ID:foo \ 15 | --outSAMattributes All \ 16 | --outSAMmode Full \ 17 | --outSAMtype BAM Unsorted \ 18 | --outSAMunmapped Within \ 19 | --outStd Log \ 20 | --readFilesIn ../04_fastq_sort/rep1.IP.umi.r1.fqTrTr.sorted.fq \ 21 | --runMode alignReads \ 22 | --runThreadN 8 23 | echo $(date +%x,%r) >> TIMES.txt; 24 | 25 | echo $(date +%x,%r) >> TIMES.txt; 26 | STAR \ 27 | --alignEndsType EndToEnd \ 28 | --genomeDir repbase_STARindex \ 29 | --genomeLoad NoSharedMemory \ 30 | --outBAMcompression 10 \ 31 | --outFileNamePrefix rep1.IN.umi.r1.fqTrTr.sorted.STAR \ 32 | --outFilterMultimapNmax 30 \ 33 | --outFilterMultimapScoreRange 1 \ 34 | --outFilterScoreMin 10 \ 35 | --outFilterType BySJout \ 36 | --outReadsUnmapped Fastx \ 37 | --outSAMattrRGline ID:foo \ 38 | --outSAMattributes All \ 39 | --outSAMmode Full \ 40 | --outSAMtype BAM Unsorted \ 41 | --outSAMunmapped Within \ 42 | --outStd Log \ 43 | --readFilesIn ../04_fastq_sort/rep1.IN.umi.r1.fqTrTr.sorted.fq \ 44 | --runMode alignReads \ 45 | --runThreadN 8 46 | echo $(date +%x,%r) >> TIMES.txt; 47 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/06_star_genome/run_star.sh: -------------------------------------------------------------------------------- 1 | # STAR 2.7.6a 2 | echo $(date +%x,%r) > TIMES.txt; 3 | STAR \ 4 | --alignEndsType EndToEnd \ 5 | --genomeDir star_2_7_6a_gencode19_sjdb \ 6 | --genomeLoad NoSharedMemory \ 7 | --outBAMcompression 10 \ 8 | --outFileNamePrefix rep1.IP.umi.r1.fq.genome-mapped \ 9 | --outFilterMultimapNmax 1 \ 10 | --outFilterMultimapScoreRange 1 \ 11 | --outFilterScoreMin 10 \ 12 | --outFilterType BySJout \ 13 | --outReadsUnmapped Fastx \ 14 | --outSAMattrRGline ID:foo \ 15 | --outSAMattributes All \ 16 | --outSAMmode Full \ 17 | --outSAMtype BAM Unsorted \ 18 | --outSAMunmapped Within \ 19 | --outStd Log \ 20 | --readFilesIn ../05_star_repeat/rep1.IP.umi.r1.fqTrTr.sorted.STARUnmapped.out.mate1 \ 21 | --runMode alignReads \ 22 | --runThreadN 8 23 | echo $(date +%x,%r) >> TIMES.txt; 24 | echo $(date +%x,%r) >> TIMES.txt; 25 | STAR \ 26 | --alignEndsType EndToEnd \ 27 | --genomeDir star_2_7_6a_gencode19_sjdb \ 28 | --genomeLoad NoSharedMemory \ 29 | --outBAMcompression 10 \ 30 | --outFileNamePrefix rep1.IN.umi.r1.fq.genome-mapped \ 31 | --outFilterMultimapNmax 1 \ 32 | --outFilterMultimapScoreRange 1 \ 33 | --outFilterScoreMin 10 \ 34 | --outFilterType BySJout \ 35 | --outReadsUnmapped Fastx \ 36 | --outSAMattrRGline ID:foo \ 37 | --outSAMattributes All \ 38 | --outSAMmode Full \ 39 | --outSAMtype BAM Unsorted \ 40 | --outSAMunmapped Within \ 41 | --outStd Log \ 42 | --readFilesIn ../05_star_repeat/rep1.IN.umi.r1.fqTrTr.sorted.STARUnmapped.out.mate1 \ 43 | --runMode alignReads \ 44 | --runThreadN 8 45 | echo $(date +%x,%r) >> TIMES.txt; 46 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/07_sort/run_sort.sh: -------------------------------------------------------------------------------- 1 | # samtools 1.6 2 | 3 | echo $(date +%x,%r) > TIMES.txt; 4 | samtools \ 5 | sort \ 6 | -n \ 7 | -o inputs/rep1.IP.umi.r1.fq.genome-mappedSo.bam \ 8 | inputs/rep1.IP.umi.r1.fq.genome-mapped.bam 9 | echo $(date +%x,%r) >> TIMES.txt; 10 | samtools \ 11 | sort \ 12 | -o rep1.IP.umi.r1.fq.genome-mappedSoSo.bam \ 13 | inputs/rep1.IP.umi.r1.fq.genome-mappedSo.bam 14 | echo $(date +%x,%r) >> TIMES.txt; 15 | samtools \ 16 | sort \ 17 | -n \ 18 | -o inputs/rep1.IN.umi.r1.fq.genome-mappedSo.bam \ 19 | inputs/rep1.IN.umi.r1.fq.genome-mapped.bam 20 | echo $(date +%x,%r) >> TIMES.txt; 21 | samtools \ 22 | sort \ 23 | -o rep1.IN.umi.r1.fq.genome-mappedSoSo.bam \ 24 | inputs/rep1.IN.umi.r1.fq.genome-mappedSo.bam 25 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/08_umi_tools_dedup/run_umitools.sh: -------------------------------------------------------------------------------- 1 | # umi_tools 1.0.0 2 | 3 | echo $(date +%x,%r) > TIMES.txt; 4 | umi_tools dedup \ 5 | --random-seed 1 \ 6 | -I inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.bam \ 7 | --method unique \ 8 | --output-stats IP.umi.r1.fq.genome-mappedSoSo \ 9 | -S rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDup.bam 10 | echo $(date +%x,%r) >> TIMES.txt; 11 | echo $(date +%x,%r) >> TIMES.txt; 12 | umi_tools dedup \ 13 | --random-seed 1 \ 14 | -I inputs/rep1.IN.umi.r1.fq.genome-mappedSoSo.bam \ 15 | --method unique \ 16 | --output-stats IN.umi.r1.fq.genome-mappedSoSo \ 17 | -S rep1.IN.umi.r1.fq.genome-mappedSoSo.rmDup.bam 18 | echo $(date +%x,%r) >> TIMES.txt; 19 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/09_clipper/run_204_01_RBFOX2_clipper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | module load clipper/5d865bb; 4 | 5 | cwltool \ 6 | --no-container \ 7 | /projects/ps-yeolab4/software/eclip/0.7.0/cwl/clipper.cwl \ 8 | /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/clipper/204_01_RBFOX2_clipper.yaml 9 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/09_clipper/run_4020_CLIP1_clipper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | module load clipper/5d865bb; 4 | 5 | cwltool \ 6 | --no-container \ 7 | /projects/ps-yeolab4/software/eclip/0.7.0/cwl/clipper.cwl \ 8 | /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/clipper/4020_CLIP1.yaml 9 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/09_clipper/run_clipper.sh: -------------------------------------------------------------------------------- 1 | clipper \ 2 | --species \ 3 | hg19 \ 4 | --bam inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam \ 5 | --outfile rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.peakClusters.bed 6 | 7 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/10_normalize/run_input_norm.sh: -------------------------------------------------------------------------------- 1 | # samtools 1.6 2 | # overlap_peakfi_with_bam_PE.pl 3 | 4 | samtools sort inputs/rep1.IN.umi.r1.fq.genome-mappedSoSo.rmDup.bam > inputs/rep1.IN.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam 5 | samtools sort inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDup.bam > inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam 6 | 7 | samtools view -cF 4 inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam > ip_mapped_readnum.txt 8 | samtools view -cF 4 inputs/rep1.IN.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam > input_mapped_readnum.txt 9 | 10 | perl /projects/ps-yeolab4/software/eclip/0.7.0/bin/overlap_peakfi_with_bam_PE.pl \ 11 | inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam \ 12 | inputs/rep1.IN.umi.r1.fq.genome-mappedSoSo.rmDupSo.bam \ 13 | inputs/rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.peakClusters.bed \ 14 | ip_mapped_readnum.txt \ 15 | input_mapped_readnum.txt \ 16 | rep1.IP.umi.r1.fq.genome-mappedSoSo.rmDupSo.peakClusters.normed.bed 17 | 18 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/wf_clipseqcore_2bc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #PBS -N wf_clipseqcore_2bc 3 | #PBS -o wf_clipseqcore_2bc.sh.out 4 | #PBS -e wf_clipseqcore_2bc.sh.err 5 | #PBS -V 6 | #PBS -l walltime=24:00:00 7 | #PBS -l nodes=1:ppn=7 8 | #PBS -A yeo-group 9 | #PBS -q home 10 | #PBS -t 1-6 11 | 12 | # Go to the directory from which the script was called 13 | cd $PBS_O_WORKDIR 14 | cmd[1]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./A01_B06.yaml" 15 | cmd[2]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./A03_G07.yaml" 16 | cmd[3]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./A04_F05.yaml" 17 | cmd[4]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./C01_D8f.yaml" 18 | cmd[5]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./X1A_X1B.yaml" 19 | cmd[6]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_pe_2barcodes/;./X2A_X2B.yaml" 20 | eval ${cmd[$PBS_ARRAYID]} 21 | 22 | -------------------------------------------------------------------------------- /tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/wf_clipseqcore_1bc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #PBS -N wf_clipseqcore_1bc 3 | #PBS -o wf_clipseqcore_1bc.sh.out 4 | #PBS -e wf_clipseqcore_1bc.sh.err 5 | #PBS -V 6 | #PBS -l walltime=24:00:00 7 | #PBS -l nodes=1:ppn=8 8 | #PBS -A yeo-group 9 | #PBS -q home 10 | #PBS -t 1-9 11 | 12 | # Go to the directory from which the script was called 13 | cd $PBS_O_WORKDIR 14 | cmd[1]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA1.yaml" 15 | cmd[2]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA2.yaml" 16 | cmd[3]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA3.yaml" 17 | cmd[4]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA4.yaml" 18 | cmd[5]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA5.yaml" 19 | cmd[6]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA6.yaml" 20 | cmd[7]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA7.yaml" 21 | cmd[8]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRNA8.yaml" 22 | cmd[9]="module load eclip/0.7.0;cd /home/bay001/projects/codebase/eclip/tests/eCLIP-0.7.0/wf_clipseqcore_se_1barcode/;./InvRil19.yaml" 23 | eval ${cmd[$PBS_ARRAYID]} 24 | 25 | -------------------------------------------------------------------------------- /wf/README.md: -------------------------------------------------------------------------------- 1 | ## This folder contains work-in-progress "metadata runners". 2 | 3 | - The idea is to better facilitate switching between cwlref-runner, cwltoil (local), cwltoil (torque) 4 | - Any 'single end' workflow must have ```#!/usr/bin/env eCLIP_singleend``` at the top of their yaml document. This uses the eCLIP_singleend bash script, which specifies the cwl workflow be single-end-specific. 5 | - Likewise for 'paired end' workflows. --------------------------------------------------------------------------------