├── .DS_Store
├── Examples
    ├── Cluster_config
    │   └── lsf
    │   │   └── cluster.json
    ├── Runs
    │   ├── Autism
    │   │   ├── NCBI_accession_list.txt
    │   │   └── config.yaml
    │   ├── COSMIC
    │   │   ├── config.yaml
    │   │   └── local_samples.tsv
    │   ├── C_elegans
    │   │   └── config.yaml
    │   ├── ENCODE
    │   │   ├── config.yaml
    │   │   └── sample_url.tsv
    │   ├── Parada_et_al
    │   │   ├── NCBI_accession_list.txt
    │   │   ├── config.yaml
    │   │   ├── sample_url.tsv
    │   │   └── whippet_delta.yaml
    │   ├── README.md
    │   └── Zebrafish
    │   │   ├── NCBI_accession_list.txt
    │   │   └── config.yaml
    └── Single_cell
    │   ├── run_metadata.super_clusters.tsv
    │   └── run_metadata.tsv
├── MicroExonator.smk
├── PWM
    ├── Human
    │   ├── hg19_GT_AG_U2_3.good.matrix
    │   └── hg19_GT_AG_U2_5.good.matrix
    └── Mouse
    │   ├── mm10_GT_AG_U2_3.good.matrix
    │   └── mm10_GT_AG_U2_5.good.matrix
├── README.md
├── config.py
├── docs
    ├── .DS_Store
    ├── Makefile
    ├── build
    │   ├── doctrees
    │   │   ├── differential_inclusion_analysis.doctree
    │   │   ├── discovery_and_quantification.doctree
    │   │   ├── environment.pickle
    │   │   ├── index.doctree
    │   │   ├── install.doctree
    │   │   ├── licence.doctree
    │   │   ├── setup.doctree
    │   │   └── support.doctree
    │   └── html
    │   │   ├── .buildinfo
    │   │   ├── _sources
    │   │       ├── differential_inclusion_analysis.rst.txt
    │   │       ├── discovery_and_quantification.rst.txt
    │   │       ├── index.rst.txt
    │   │       ├── install.rst.txt
    │   │       ├── licence.rst.txt
    │   │       ├── setup.rst.txt
    │   │       └── support.rst.txt
    │   │   ├── _static
    │   │       ├── basic.css
    │   │       ├── css
    │   │       │   ├── badge_only.css
    │   │       │   ├── fonts
    │   │       │   │   ├── Roboto-Slab-Bold.woff
    │   │       │   │   ├── Roboto-Slab-Bold.woff2
    │   │       │   │   ├── Roboto-Slab-Regular.woff
    │   │       │   │   ├── Roboto-Slab-Regular.woff2
    │   │       │   │   ├── fontawesome-webfont.eot
    │   │       │   │   ├── fontawesome-webfont.svg
    │   │       │   │   ├── fontawesome-webfont.ttf
    │   │       │   │   ├── fontawesome-webfont.woff
    │   │       │   │   ├── fontawesome-webfont.woff2
    │   │       │   │   ├── lato-bold-italic.woff
    │   │       │   │   ├── lato-bold-italic.woff2
    │   │       │   │   ├── lato-bold.woff
    │   │       │   │   ├── lato-bold.woff2
    │   │       │   │   ├── lato-normal-italic.woff
    │   │       │   │   ├── lato-normal-italic.woff2
    │   │       │   │   ├── lato-normal.woff
    │   │       │   │   └── lato-normal.woff2
    │   │       │   └── theme.css
    │   │       ├── doctools.js
    │   │       ├── documentation_options.js
    │   │       ├── file.png
    │   │       ├── jquery-3.5.1.js
    │   │       ├── jquery.js
    │   │       ├── js
    │   │       │   ├── badge_only.js
    │   │       │   ├── html5shiv-printshiv.min.js
    │   │       │   ├── html5shiv.min.js
    │   │       │   └── theme.js
    │   │       ├── language_data.js
    │   │       ├── minus.png
    │   │       ├── plus.png
    │   │       ├── pygments.css
    │   │       ├── searchtools.js
    │   │       ├── underscore-1.3.1.js
    │   │       └── underscore.js
    │   │   ├── differential_inclusion_analysis.html
    │   │   ├── discovery_and_quantification.html
    │   │   ├── genindex.html
    │   │   ├── index.html
    │   │   ├── install.html
    │   │   ├── licence.html
    │   │   ├── objects.inv
    │   │   ├── search.html
    │   │   ├── searchindex.js
    │   │   ├── setup.html
    │   │   └── support.html
    ├── make.bat
    └── source
    │   ├── conf.py
    │   ├── differential_inclusion_analysis.rst
    │   ├── discovery_and_quantification.rst
    │   ├── index.rst
    │   ├── install.rst
    │   ├── licence.rst
    │   ├── setup.rst
    │   ├── single_cell_analysis.rst
    │   └── support.rst
├── envs
    ├── MicroExonator.yaml
    ├── MicroExonator.yml
    ├── R.yaml
    ├── biopython_py3.yaml
    ├── core.yaml
    ├── core_py3.yaml
    ├── pybedtools.yaml
    └── snakemake.yaml
├── rules
    ├── Benchmark.smk
    ├── Get_data.smk
    ├── Round1.smk
    ├── Round1_post_processing.smk
    ├── Round2.smk
    ├── Round2_post_processing.smk
    ├── Snakepool.backup.py
    ├── Snakepool.py
    ├── Whippet_delta.smk
    ├── Whippet_quant.smk
    ├── init.smk
    ├── pseudo_pool.smk
    └── sashimi.smk
├── src
    ├── Filter1_round2.py
    ├── GTFtoBED12.py
    ├── GetPSI.py
    ├── Get_ME_matches.py
    ├── Get_annotated_microexons.py
    ├── Get_exons_from_sam.py
    ├── Get_fasta_from_bed12.py
    ├── Get_introns_from_sam.py
    ├── Get_splicing_PWMs.py
    ├── ME_SJ_coverage.py
    ├── ME_centric_table.py
    ├── ME_filter1.py
    ├── Micro_exons_tags.py
    ├── Replace_PSI_whippet.py
    ├── Report
    │   └── report_files
    │   │   └── figure-html
    │   │       ├── unnamed-chunk-4-1.png
    │   │       └── unnamed-chunk-5-1.png
    ├── SJ_tags_generator_for_micro_exons.py
    ├── Snakefile
    ├── Snakepool_BetaDist.R
    ├── alingment_pre_processing.py
    ├── alingment_pre_processing_round2_bowtie.py
    ├── counts_to_PSI.py
    ├── coverage_sample_filter.py
    ├── final_filters.R
    ├── final_filters.Rmd
    ├── final_filters2.Rmd
    ├── final_filters3.R
    ├── get_diff_ME_single_cell.py
    ├── get_isoforms2.py
    ├── high_confident_list.py
    ├── merge_pairs.py
    ├── merge_quant.py
    ├── round2_ME_reads_fastq.py
    ├── round2_ME_reads_fastq2.py
    ├── row_ME2.py
    ├── sashimi-plot.py
    ├── sashimi_input_generator.py
    ├── split_coverage.py
    ├── split_paired_end.py
    ├── stats
    │   └── discovery_stats.py
    ├── validate_fastq.py
    ├── whippet_delta_to_ME.py
    ├── write_bam_tsv.py
    └── write_sig_node_files.py
└── touch
    ├── MicroExonator
        └── github_clone
    ├── VastDb.bed12
    ├── gencode.vM16.annotation.bed12
    ├── gencode.vM16.annotation.gtf
    ├── miniconda
        └── envs
        │   └── julia_0.6.1
        │       └── share
        │           └── julia
        │               └── site
        │                   └── v0.6
        │                       └── Whippet
        │                           └── bin
        │                               └── whipet_scripts
    ├── mm10.60way.phyloP60way.bw
    ├── mm10.fa
    ├── mm10_GT_AG_U2_3.good.matrix
    └── mm10_GT_AG_U2_5.good.matrix


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/.DS_Store


--------------------------------------------------------------------------------
/Examples/Cluster_config/lsf/cluster.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "__default__" :
 3 |     {
 4 |         "queue"     : "normal",
 5 |         "nCPUs"     : "1",
 6 |         "memory"    : 10000,
 7 |         "resources" : "\"select[mem>10000] rusage[mem=10000] span[hosts=1]\"",
 8 |         "name"      : "JOBNAME.{rule}.{wildcards}",
 9 |         "output"    : "logs/{rule}.{wildcards}.out",
10 |         "error"     : "logs/{rule}.{wildcards}.err",
11 |         "Group"     : "team_hemberg",
12 |         "tCPU"      : "99999"
13 |     },
14 | 
15 |     "Round1_bwa_mem_to_tags" :
16 |     {
17 |         "nCPUs"    : 5
18 |     },
19 | 
20 | 
21 |     "hisat2_genome_index" :
22 |     {
23 |         "nCPUs"     : 5
24 |     },
25 | 
26 |     "whippet_quant" :
27 |     {
28 |         "nCPUs"     : 1,
29 |         "memory"    : 2000,
30 |         "resources" : "\"select[mem>2000] rusage[mem=2000] span[hosts=1]\""
31 | 
32 |     },
33 | 
34 | 
35 |     "Round2_bowtie_to_tags" :
36 |     {
37 |         "nCPUs"    : 5
38 |     },
39 | 
40 |     "bowtie_genome_index" :
41 |     {
42 |         "memory"    : 30000,
43 |         "resources" : "\"select[mem>30000] rusage[mem=30000] span[hosts=1]\""
44 |     },
45 |     
46 |     "bowtie_to_genome" :
47 |     {
48 |         "nCPUs"    : 2
49 |     },
50 | 
51 |     "Output" :
52 |     {
53 |         "nCPUs"    : 2,
54 |         "memory"    : 30000,
55 |         "resources" : "\"select[mem>30000] rusage[mem=30000] span[hosts=1]\""
56 |     },
57 | 
58 |     "total_hisat2_to_genome" :
59 |     {
60 |         "nCPUs"    : 5
61 |     },
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/Examples/Runs/Autism/NCBI_accession_list.txt:
--------------------------------------------------------------------------------
 1 | SRR309144
 2 | SRR309143
 3 | SRR309142
 4 | SRR309141
 5 | SRR309136
 6 | SRR309135
 7 | SRR309134
 8 | SRR309133
 9 | SRR309140
10 | SRR309139
11 | SRR309138
12 | SRR309137
13 | 


--------------------------------------------------------------------------------
/Examples/Runs/Autism/config.yaml:
--------------------------------------------------------------------------------
 1 | Genome_fasta : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/hg19.fa
 2 | Gene_anontation_bed12 : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/Gene_annotation/gencode.v19.chr_patch_hapl_scaff.annotation.bed12
 3 | GT_AG_U2_5 : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/SpliceRack/hg19_GT_AG_U2_5.good.matrix
 4 | GT_AG_U2_3 : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/SpliceRack/hg19_GT_AG_U2_3.good.matrix
 5 | conservation_bigwig : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/Phylop/hg19.100way.phyloP100way.bw  
 6 | working_directory : /lustre/scratch117/cellgen/team218/gp7/Micro-exons/Runs/Test_Martin/Autism/MicroExonator/
 7 | ME_DB : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/Gene_annotation/hg19.VastDb.bed12
 8 | ME_len : 30
 9 | Optimize_hard_drive : F
10 | min_number_files_detected : 3
11 | 
12 | 
13 | # Whippet
14 | 
15 | downstream_only : T
16 | whippet_bin_folder : /lustre/scratch117/cellgen/team218/gp7/miniconda/envs/julia_0.6.1/share/julia/site/v0.6/Whippet/bin
17 | Gene_anontation_GTF : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/Gene_annotation/gencode.v19.chr_patch_hapl_scaff.annotation.gtf
18 | 
19 | whippet_delta:
20 |     Control_vs_Autism-temporal_cortex :
21 |         A : SRR309144,SRR309143,SRR309142,SRR309141
22 |         B : SRR309136,SRR309135,SRR309134,SRR309133
23 |     Control_vs_Autism-frontal_cortex :
24 |         A : SRR309140,SRR309139
25 |         B : SRR309138,SRR309137
26 | 


--------------------------------------------------------------------------------
/Examples/Runs/COSMIC/config.yaml:
--------------------------------------------------------------------------------
 1 | Genome_fasta : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/hg19.fa
 2 | Gene_anontation_bed12 : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/Gene_annotation/gencode.v19.chr_patch_hapl_scaff.annotation.bed12
 3 | Gene_anontation_fasta : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/Gene_annotation/gencode.v19.pc_transcripts.fa
 4 | GT_AG_U2_5 : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/SpliceRack/hg19_GT_AG_U2_5.good.matrix
 5 | GT_AG_U2_3 : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/SpliceRack/hg19_GT_AG_U2_3.good.matrix
 6 | conservation_bigwig : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/Phylop/hg19.100way.phyloP100way.bw
 7 | ME_len : 30
 8 | working_directory : /lustre/scratch117/cellgen/team218/igs_gp7/MicroExons/COSMIC/
 9 | ME_DB : /lustre/scratch117/cellgen/team218/gp7/Genome/hg19/Tracks/Gene_annotation/hg19.VastDb.bed12
10 | Optimize_hard_drive : T
11 | 


--------------------------------------------------------------------------------
/Examples/Runs/C_elegans/config.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | Genome_fasta : /lustre/scratch117/cellgen/team218/gp7/Fabian/Caenorhabditis_elegans.WBcel235.dna.fa
 3 | Gene_anontation_bed12 : /lustre/scratch117/cellgen/team218/gp7/Fabian/ce11.Ensembl.genes.bed12.Ensembl
 4 | GT_AG_U2_5 : NA
 5 | GT_AG_U2_3 : NA
 6 | conservation_bigwig : NA
 7 | working_directory : /lustre/scratch117/cellgen/team218/gp7/Fabian/MicroExonator/
 8 | ME_DB : /lustre/scratch117/cellgen/team218/gp7/Fabian/empty_DB
 9 | ME_len : 30
10 | Optimize_hard_drive : F
11 | min_number_files_detected : 2
12 | 
13 | #Whippet
14 | 
15 | downstream_only : F
16 | whippet_bin_folder : /lustre/scratch117/cellgen/team218/gp7/miniconda/envs/julia_0.6.1/share/julia/site/v0.6/Whippet/bin
17 | Gene_anontation_GTF : /lustre/scratch117/cellgen/team218/gp7/Genome/danRer11/Danio_rerio.GRCz11.96.chr.gtf.UCSC_style
18 | 
19 | 
20 | whippet_delta:
21 |     WT_vs_SID1:
22 |         A : FB264-RNA-01_S1,FB264-RNA-06_S6,FB264-RNA-11_S11
23 |         B : FB264-RNA-02_S2,FB264-RNA-07_S7,FB264-RNA-12_S12,FB264-RNA-04_S4,FB264-RNA-09_S9,FB264-RNA-14_S14
24 |     WT_vs_SID1_qt129:
25 |         A : FB264-RNA-01_S1,FB264-RNA-06_S6,FB264-RNA-11_S11
26 |         B : FB264-RNA-02_S2,FB264-RNA-07_S7,FB264-RNA-12_S12
27 |     WT_vs_SID1_mj444:   
28 |         A : FB264-RNA-01_S1,FB264-RNA-06_S6,FB264-RNA-11_S11
29 |         B : FB264-RNA-04_S4,FB264-RNA-09_S9,FB264-RNA-14_S14
30 |     WT_vs_SID2:
31 |         A : FB264-RNA-01_S1,FB264-RNA-06_S6,FB264-RNA-11_S11
32 |         B : FB264-RNA-03_S3,FB264-RNA-08_S8,FB264-RNA-13_S13,FB264-RNA-05_S5,FB264-RNA-10_S10,FB264-RNA-15_S15
33 |     WT_vs_SID2_qt142:
34 |         A : FB264-RNA-01_S1,FB264-RNA-06_S6,FB264-RNA-11_S11
35 |         B : FB264-RNA-03_S3,FB264-RNA-08_S8,FB264-RNA-13_S13
36 |     WT_vs_SID2_mj465:
37 |         A : FB264-RNA-01_S1,FB264-RNA-06_S6,FB264-RNA-11_S11
38 |         B : FB264-RNA-05_S5,FB264-RNA-10_S10,FB264-RNA-15_S15
39 | 


--------------------------------------------------------------------------------
/Examples/Runs/ENCODE/config.yaml:
--------------------------------------------------------------------------------
 1 | Genome_fasta : /touch/mm10.fa
 2 | Gene_anontation_bed12 : /touch/gencode.vM16.annotation.bed12
 3 | GT_AG_U2_5 : /touch/mm10_GT_AG_U2_5.good.matrix
 4 | GT_AG_U2_3 : /touch/mm10_GT_AG_U2_3.good.matrix
 5 | conservation_bigwig : /touch/mm10.60way.phyloP60way.bw
 6 | working_directory : /touch/MicroExonator/
 7 | ME_DB : /touch/VastDb.bed12
 8 | ME_len : 30
 9 | Optimize_hard_drive : F
10 | min_number_files_detected : 2
11 | 
12 | # Whippet
13 | 
14 | whippet_bin_folder : /touch/miniconda/envs/julia_0.6.1/share/julia/site/v0.6/Whippet/bin/
15 | Gene_anontation_GTF : /touch/gencode.vM16.annotation.gtf
16 | condition1 : ENCFF920CNZ,ENCFF320FJX,ENCFF528EVC,ENCFF663SNC
17 | condition2 : ENCFF270GKY,ENCFF460TCF,ENCFF126IRS,ENCFF748SRJ
18 | comparison_name : forebrain_10.5_vs_forebrain_14.5
19 | 


--------------------------------------------------------------------------------
/Examples/Runs/ENCODE/sample_url.tsv:
--------------------------------------------------------------------------------
 1 | url	sample
 2 | https://www.encodeproject.org/files/ENCFF270GKY/@@download/ENCFF920CNZ.fastq.gz	ENCFF920CNZ
 3 | https://www.encodeproject.org/files/ENCFF270GKY/@@download/ENCFF320FJX.fastq.gz	ENCFF320FJX
 4 | https://www.encodeproject.org/files/ENCFF270GKY/@@download/ENCFF528EVC.fastq.gz	ENCFF528EVC
 5 | https://www.encodeproject.org/files/ENCFF270GKY/@@download/ENCFF663SNC.fastq.gz	ENCFF663SNC
 6 | https://www.encodeproject.org/files/ENCFF270GKY/@@download/ENCFF270GKY.fastq.gz	ENCFF270GKY
 7 | https://www.encodeproject.org/files/ENCFF270GKY/@@download/ENCFF460TCF.fastq.gz	ENCFF460TCF
 8 | https://www.encodeproject.org/files/ENCFF270GKY/@@download/ENCFF126IRS.fastq.gz	ENCFF126IRS
 9 | https://www.encodeproject.org/files/ENCFF270GKY/@@download/ENCFF748SRJ.fastq.gz	ENCFF748SRJ
10 | 


--------------------------------------------------------------------------------
/Examples/Runs/Parada_et_al/config.yaml:
--------------------------------------------------------------------------------
 1 | Genome_fasta : /lustre/scratch117/cellgen/team218/gp7/Genome/mm10/mm10.fa
 2 | Gene_anontation_bed12 : /lustre/scratch117/cellgen/team218/gp7/Genome/mm10/Tracks/Gene_annotation/gencode.vM16.annotation.bed12
 3 | GT_AG_U2_5 : /lustre/scratch117/cellgen/team218/MH/MicroExonator/PWM/Mouse/mm10_GT_AG_U2_5.good.matrix
 4 | GT_AG_U2_3 : /lustre/scratch117/cellgen/team218/MH/MicroExonator/PWM/Mouse/mm10_GT_AG_U2_3.good.matrix
 5 | conservation_bigwig : /lustre/scratch117/cellgen/team218/gp7/Genome/mm10/Tracks/Phylop/mm10.60way.phyloP60way.bw
 6 | working_directory : /lustre/scratch117/cellgen/team218/gp7/Micro-exons/Runs/Paper/MicroExonator/
 7 | ME_DB : /lustre/scratch117/cellgen/team218/gp7/Genome/mm10/Tracks/Gene_annotation/VastDb.bed12
 8 | ME_len : 30
 9 | Optimize_hard_drive : T
10 | min_number_files_detected : 2
11 | 
12 | # Whippet
13 | 
14 | #whippet_bin_folder : /lustre/scratch117/cellgen/team218/gp7/miniconda/envs/julia_0.6.1/share/julia/site/v0.6/Whippet/bin
15 | #Gene_anontation_GTF : /lustre/scratch117/cellgen/team218/gp7/Genome/mm10/Tracks/Gene_annotation/gencode.vM16.annotation.gtf
16 | #condition1 : ENCFF920CNZ,ENCFF320FJX,ENCFF528EVC,ENCFF663SNC
17 | #condition2 : ENCFF270GKY,ENCFF460TCF,ENCFF126IRS,ENCFF748SRJ
18 | #comparison_name : forebrain_10.5_vs_forebrain_14.5
19 | 


--------------------------------------------------------------------------------
/Examples/Runs/README.md:
--------------------------------------------------------------------------------
 1 | Here we provide example runs that we have implemented for two different projects:
 2 | 
 3 | # Zebrafish
 4 | 
 5 | Small project that were ran using SRA accession codes as input. These accession codes are inputed inside `NCBI_accession_list.txt` file.
 6 | 
 7 | # COSMIC
 8 | 
 9 | Large cancer cell-lines project, where we used a local copy of the input fastq.gz files as an input. The paths and the name of the samples needs to be provided inside a `desing.tvs` file.
10 | 
11 | 
12 | # Running under lsf
13 | 
14 | `snakemake -s MicroExonator.smk  --cluster-config cluster.json --cluster "bsub -n {cluster.nCPUs} -R {cluster.resources} -c {cluster.tCPU} -G {cluster.Group} -q {cluster.queue} -o {cluster.output} -e {cluster.error} -M {cluster.memory}" --use-conda -k  -j 1000000`
15 | 


--------------------------------------------------------------------------------
/Examples/Runs/Zebrafish/NCBI_accession_list.txt:
--------------------------------------------------------------------------------
 1 | SRR6652888
 2 | SRR6652889
 3 | SRR6652890
 4 | SRR6652891
 5 | SRR6652892
 6 | SRR6652893
 7 | SRR6652894
 8 | SRR6652895
 9 | SRR6652896
10 | SRR6652897
11 | SRR6652898
12 | SRR6652899
13 | SRR6652900
14 | SRR6652901
15 | SRR6652902
16 | SRR6652903
17 | SRR6652904
18 | SRR6652905
19 | SRR6652906
20 | SRR6652907
21 | SRR6652908
22 | SRR6652909
23 | SRR6652910
24 | 


--------------------------------------------------------------------------------
/Examples/Runs/Zebrafish/config.yaml:
--------------------------------------------------------------------------------
 1 | Genome_fasta : /lustre/scratch117/cellgen/team218/gp7/Genome/danRer11/danRer11.fa
 2 | Gene_anontation_bed12 : /lustre/scratch117/cellgen/team218/gp7/Genome/danRer11/darRer11.Ensembl.genes.bed12
 3 | GT_AG_U2_5 : /lustre/scratch117/cellgen/team218/igs_gp7/Zebrafish/Data/danRer11_GT_AG_U2_5.good.matrix
 4 | GT_AG_U2_3 : /lustre/scratch117/cellgen/team218/igs_gp7/Zebrafish/Data/danRer11_GT_AG_U2_3.good.matrix
 5 | conservation_bigwig : NA
 6 | ME_len : 30
 7 | working_directory : /lustre/scratch117/cellgen/team218/gp7/Micro-exons/Runs/Zebrafish/MicroExonator/
 8 | ME_DB : /lustre/scratch117/cellgen/team218/igs_gp7/Zebrafish/Data/VastDb.bed12
 9 | Optimize_hard_drive : F
10 | min_number_files_detected : 2
11 | 
12 | #Whippet
13 | 
14 | downstream_only : F
15 | whippet_bin_folder : /lustre/scratch117/cellgen/team218/gp7/miniconda/envs/julia_0.6.1/share/julia/site/v0.6/Whippet/bin
16 | Gene_anontation_GTF : /lustre/scratch117/cellgen/team218/gp7/Genome/danRer11/Danio_rerio.GRCz11.96.chr.gtf.UCSC_style
17 | 
18 | whippet_delta:
19 |     ZT16_vs_ZT4-20Mo:
20 |         A : SRR6652910,SRR6652909,SRR6652908,SRR6652907
21 |         B : SRR6652906,SRR6652905,SRR6652904,SRR6652903
22 |     ZT16_vs_ZT4-16Mo :
23 |         A : SRR6652902,SRR6652901,SRR6652900
24 |         B : SRR6652899,SRR6652898,SRR6652897,SRR6652896
25 |     ZT16_vs_ZT4-4Mo:
26 |         A : SRR6652895,SRR6652894,SRR6652893,SRR6652892
27 |         B : SRR6652891,SRR6652890,SRR6652889,SRR6652888
28 |         
29 |     20Mo_vs_16Mo-ZT16:
30 |         A : SRR6652910,SRR6652909,SRR6652908,SRR6652907
31 |         B : SRR6652902,SRR6652901,SRR6652900
32 |     20Mo_vs_4M-ZT16:
33 |         A : SRR6652910,SRR6652909,SRR6652908,SRR6652907
34 |         B : SRR6652895,SRR6652894,SRR6652893,SRR6652892
35 |     16Mo_vs_4Mo-ZT16:
36 |         A : SRR6652902,SRR6652901,SRR6652900
37 |         B : SRR6652895,SRR6652894,SRR6652893,SRR6652892
38 |         
39 |     20Mo_vs_16Mo-ZT4:
40 |         A : SRR6652906,SRR6652905,SRR6652904,SRR6652903
41 |         B : SRR6652899,SRR6652898,SRR6652897,SRR6652896
42 |     20Mo_vs_4M-ZT4:
43 |         A : SRR6652906,SRR6652905,SRR6652904,SRR6652903
44 |         B : SRR6652891,SRR6652890,SRR6652889,SRR6652888
45 |     16Mo_vs_4Mo-ZT4:
46 |         A : SRR6652899,SRR6652898,SRR6652897,SRR6652896
47 |         B : SRR6652891,SRR6652890,SRR6652889,SRR6652888
48 |         
49 |     20Mo_vs_16Mo-TOTAL:
50 |         A : SRR6652910,SRR6652909,SRR6652908,SRR6652907,SRR6652906,SRR6652905,SRR6652904,SRR6652903
51 |         B : SRR6652902,SRR6652901,SRR6652900,SRR6652899,SRR6652898,SRR6652897,SRR6652896
52 |     20Mo_vs_4M-TOTAL:
53 |         A : SRR6652910,SRR6652909,SRR6652908,SRR6652907,SRR6652906,SRR6652905,SRR6652904,SRR6652903
54 |         B : SRR6652895,SRR6652894,SRR6652893,SRR6652892,SRR6652891,SRR6652890,SRR6652889,SRR6652888
55 |     16Mo_vs_4Mo-TOTAL:
56 |         A : SRR6652902,SRR6652901,SRR6652900,SRR6652899,SRR6652898,SRR6652897,SRR6652896
57 |         B : SRR6652895,SRR6652894,SRR6652893,SRR6652892,SRR6652891,SRR6652890,SRR6652889,SRR6652888
58 | 


--------------------------------------------------------------------------------
/Examples/Single_cell/run_metadata.super_clusters.tsv:
--------------------------------------------------------------------------------
1 | Compare_ID	A.cluster_names	A.number_of_pools	B.cluster_names	B.number_of_pools	Repeat
2 | Mesoderm_vs_Neuroectoderm_10	Ect_caudal_neuroectoderm,Ect_For_mid_hindbrain,Ect_Neural_crest	10	M_Mesoderm_unknown,M_Mesenchyme,M_Paraxial_Mesoderm,M_Intermediate_Mesoderm	10	50
3 | Mesoderm_vs_Neuroectoderm_15	Ect_caudal_neuroectoderm,Ect_For_mid_hindbrain,Ect_Neural_crest	15	M_Mesoderm_unknown,M_Mesenchyme,M_Paraxial_Mesoderm,M_Intermediate_Mesoderm	15	50
4 | Mesoderm_vs_Neuroectoderm_20	Ect_caudal_neuroectoderm,Ect_For_mid_hindbrain,Ect_Neural_crest	20	M_Mesoderm_unknown,M_Mesenchyme,M_Paraxial_Mesoderm,M_Intermediate_Mesoderm	20	50
5 | Mesoderm_vs_Neuroectoderm_25	Ect_caudal_neuroectoderm,Ect_For_mid_hindbrain,Ect_Neural_crest	25	M_Mesoderm_unknown,M_Mesenchyme,M_Paraxial_Mesoderm,M_Intermediate_Mesoderm	25	50
6 | Mesoderm_vs_Neuroectoderm_30	Ect_caudal_neuroectoderm,Ect_For_mid_hindbrain,Ect_Neural_crest	30	M_Mesoderm_unknown,M_Mesenchyme,M_Paraxial_Mesoderm,M_Intermediate_Mesoderm	30	50
7 | 


--------------------------------------------------------------------------------
/Examples/Single_cell/run_metadata.tsv:
--------------------------------------------------------------------------------
 1 | Compare_ID	A.cluster_names	A.number_of_pools	B.cluster_names	B.number_of_pools	Repeat
 2 | E85_NMP_5_vs_E85_SC_5	E85_NMP	5	E85_SC	5	50
 3 | E85_NMP_10_vs_E85_SC_10	E85_NMP	10	E85_SC	10	50
 4 | E85_NMP_15_vs_E85_SC_15	E85_NMP	15	E85_SC	15	50
 5 | E85_NMP_20_vs_E85_SC_20	E85_NMP	20	E85_SC	20	50
 6 | E85_NMP_35_vs_E85_SC_35	E85_NMP	35	E85_SC	35	50
 7 | E85_NMP_35_vs_E85_SC_40	E85_NMP	40	E85_SC	40	50
 8 | E85_NMP_35_vs_E85_SC_45	E85_NMP	45	E85_SC	45	50
 9 | E85_NMP_35_vs_E85_SC_50	E85_NMP	50	E85_SC	50	50
10 | E85_NMP_35_vs_E85_SC_55	E85_NMP	55	E85_SC	55	50
11 | E85_NMP_35_vs_E85_SC_60	E85_NMP	60	E85_SC	60	50
12 | 


--------------------------------------------------------------------------------
/MicroExonator.smk:
--------------------------------------------------------------------------------
  1 | #version 0.9.0
  2 | 
  3 | import yaml
  4 | from collections import defaultdict
  5 | import csv
  6 | 
  7 | configfile : "config.yaml"
  8 | DATA = set([])
  9 | 
 10 | def str2bool(v):
 11 |   if v==True:
 12 |     return True
 13 |   elif v==False:
 14 |     return False
 15 |   else:
 16 |     return v.lower() in ("yes", "true", "t", "1")
 17 | 
 18 | rule quant:
 19 |     input:
 20 |         "Report/out.high_quality.txt",
 21 |         "Report/out_filtered_ME.PSI.txt",        
 22 |         #"Report/stats/Microexons.not_consensus",
 23 |         #"Report/stats/Microexons.annotation.stats"        
 24 |         
 25 |         #"Report/out_filtered_ME.txt"
 26 |         #expand("Genome_aligments/{Software}/TOTAL.exons.{Software}", Software=["Hisat2", "STAR", "Olego"])
 27 |         # expand("Genome_aligments/{Software}/{sample}.sam.SJ_count", sample=DATA, Software=["Hisat2", "STAR"]),
 28 |         #expand("Whippet/Quant/{sample}.psi.gz", sample=DATA),
 29 |         #expand("Ground_Truth/{sample}.GT.SJ_count", sample=DATA)
 30 | 
 31 | 
 32 | 
 33 | 
 34 | if 'cluster_metadata' in config:
 35 | 
 36 |     cluster_files = defaultdict(list)
 37 |     cluster_files_metadata = defaultdict(list)
 38 |     single_cell_files = set([])
 39 | 
 40 |     with open(config["cluster_metadata"]) as Single_Cell:
 41 | 
 42 |         Single_Cell_clustering = csv.DictReader(Single_Cell, delimiter="\t")
 43 | 
 44 |         for row in Single_Cell_clustering:
 45 | 
 46 |             cluster_files[row[config["cluster_name"]].replace(" ", "_")].append(row[config["file_basename"]])
 47 |             single_cell_files.add(row[config["file_basename"]])
 48 | 
 49 | 
 50 | 
 51 | #### MicroExonator ####
 52 | 
 53 | if ("deletion_penalty" in config)==False:
 54 |     config["deletion_penalty"]="6"
 55 |     
 56 | if ("insertion_penalty" in config)==False:
 57 |     config["insertion_penalty"]="2"
 58 | 
 59 | config["indel_penalty"] = ",".join([str(config["deletion_penalty"]), str(config["insertion_penalty"])])
 60 | 
 61 | if ("ME_DB" in config)==False:
 62 |     config["ME_DB"]="touch/VastDb.bed12"
 63 | 
 64 | if ("paired_samples" in config)==False:
 65 |     config["paired_samples"]="F"
 66 | 
 67 | if ("min_reads_PSI" in config)==False:
 68 |     config["min_reads_PSI"]="5"
 69 | 
 70 | 
 71 | include : "rules/init.smk"
 72 | include : "rules/Get_data.smk"
 73 | 
 74 | 
 75 | rule bamfiles:
 76 |     input:
 77 |         expand("Whippet/BAM/{samples}.bam", samples=DATA), 
 78 |         expand("Whippet/BAM/{samples}.bam.bai", samples=DATA)
 79 | 
 80 | 
 81 | if str2bool(config.get("downstream_only", False)):
 82 |     pass
 83 | elif str2bool(config.get("skip_discovery_and_quant", False)):
 84 |     include : "rules/Round2_post_processing.smk"
 85 | elif str2bool(config.get("skip_discovery", False)):
 86 |     include : "rules/Round2.smk"
 87 |     include : "rules/Round2_post_processing.smk"
 88 | else:
 89 |     include : "rules/Round1.smk"
 90 |     include : "rules/Round1_post_processing.smk"
 91 |     include : "rules/Round2.smk"
 92 |     include : "rules/Round2_post_processing.smk"
 93 | 
 94 | rule discovery:
 95 |     input:
 96 |         expand("Round1/{sample}.sam.row_ME.filter1", sample=DATA )
 97 | #        "Round2/ME_canonical_SJ_tags.de_novo.fa"
 98 | 
 99 | ##### Downstream Analysis ####
100 | 
101 | if "whippet_bin_folder" in config:
102 |    include : "rules/Whippet_quant.smk"
103 | 
104 | if "whippet_delta" in config:
105 |    with open(config["whippet_delta"], 'r') as stream:
106 |       whippet_delta = yaml.safe_load(stream)
107 |    include : "rules/Whippet_delta.smk"
108 | 
109 | 
110 | #### Single Cell ###
111 | 
112 | if not "Single_Cell" in config:
113 |    config["Single_Cell"]="F"
114 | 
115 | if str2bool(config["Single_Cell"]):
116 |    include : "rules/Snakepool.py"
117 |    include : "rules/pseudo_pool.smk"
118 | 
119 | #### Benchmark ####
120 | 
121 | #include : "rules/Benchmark.smk
122 | 
123 | 
124 | 
125 | #### Re-run incomplete round1 ####
126 | 
127 | import os
128 | 
129 | round1_incomplete = []
130 | 
131 | for file in DATA:
132 |     if os.path.isfile('./Round1/' + file  + '.sam.row_ME.filter1')!=True:
133 |         round1_incomplete.append(file)
134 |   
135 | rule rerun_incomplete_round1:
136 |     input:
137 |         expand("Round1/{sample}.sam.row_ME.filter1", sample=round1_incomplete )
138 |         
139 |         
140 |         
141 | round2_incomplete = []
142 | 
143 | for file in DATA:
144 |     if os.path.isfile('./Round2/' + file  + '.sam.pre_processed.filter1.ME_SJ_coverage')!=True:
145 |         round2_incomplete.append(file)
146 |   
147 | rule rerun_incomplete_round2:
148 |     input:
149 |         expand("Round2/{sample}.sam.pre_processed.filter1.ME_SJ_coverage", sample=round2_incomplete )
150 |     
151 |     
152 | include : "rules/sashimi.smk"
153 | 


--------------------------------------------------------------------------------
/PWM/Human/hg19_GT_AG_U2_3.good.matrix:
--------------------------------------------------------------------------------
 1 | A	C	G	T
 2 | 0.113282768418731	0.28224866187244	0.124165705041168	0.480302864667662
 3 | 0.102437941129138	0.27905292210387	0.117779669694435	0.500729467072556
 4 | 0.0934659153393549	0.280898502651647	0.108372108672138	0.51726347333686
 5 | 0.0858168278183322	0.261479075471941	0.103641107208957	0.54906298950077
 6 | 0.0878692876015532	0.284682214984111	0.109673270179273	0.517775227235063
 7 | 0.0987522242239905	0.296523329118078	0.114725478876432	0.489998967781499
 8 | 0.108900195141561	0.324043711622541	0.105410469091044	0.461645624144854
 9 | 0.113745524603276	0.336391135464306	0.0922355283075035	0.457627811624914
10 | 0.0869165542804294	0.343577466800782	0.0651125717027097	0.504393407216078
11 | 0.0898074193862394	0.298875219373652	0.063942070765329	0.547375290474779
12 | 0.239419215945119	0.273510736270133	0.205616237711646	0.281453810073102
13 | 0.0597772651044163	0.646824316628888	0.00363677363339388	0.289761644633302
14 | 0.999999836674288	5.44419040642188e-08	5.44419040642188e-08	5.44419040642188e-08
15 | 5.44419040642188e-08	5.44419040642188e-08	0.999999836674288	5.44419040642188e-08
16 | 0.256470420298032	0.143568799649655	0.487298649339914	0.112662130712398
17 | 0.245941356052012	0.190350727812039	0.195789474028054	0.367918442107895
18 | 0.258746091887917	0.234448670104056	0.236626346266625	0.270178891741403
19 | 


--------------------------------------------------------------------------------
/PWM/Human/hg19_GT_AG_U2_5.good.matrix:
--------------------------------------------------------------------------------
 1 | A	C	G	T
 2 | 0.334191682540111	0.362272816656435	0.184356674174568	0.119178826628885
 3 | 0.636937666850825	0.107185275163538	0.115226344393823	0.140650713591813
 4 | 0.0994980783096703	0.0267038083854034	0.805261145836577	0.0685369674683491
 5 | 5.44419040642188e-08	5.44419040642188e-08	0.999999836674288	5.44419040642188e-08
 6 | 5.44419040642188e-08	5.44419040642188e-08	5.44419040642188e-08	0.999999836674288
 7 | 0.59709708145663	0.027112122665885	0.349772955483291	0.0260178403941942
 8 | 0.699219205100292	0.0712155091483087	0.118830398442874	0.110734887308525
 9 | 0.0886695835912972	0.0543439630788073	0.781791240994492	0.0751952123354031
10 | 0.180088428895933	0.149170871577864	0.193464804724512	0.477275894801691
11 | 0.295799251794024	0.193960226051496	0.294514422858109	0.215726099296371
12 | 0.225972065641257	0.250378371233246	0.236942109310197	0.2867074538153
13 | 0.223870608144378	0.262028938702989	0.242048759911421	0.272051693241212
14 | 0.226886689629536	0.237960172916198	0.255446912501625	0.279706224952641
15 | 


--------------------------------------------------------------------------------
/PWM/Mouse/mm10_GT_AG_U2_3.good.matrix:
--------------------------------------------------------------------------------
 1 | A	C	G	T
 2 | 0.108773468722263	0.285810783269321	0.1286274787743	0.476788269234116
 3 | 0.100375016373261	0.281768992295637	0.122713640139999	0.495142351191103
 4 | 0.0920681626865521	0.277984822559651	0.112780910197587	0.517166104556209
 5 | 0.0808874009703962	0.263214538264883	0.108618895979643	0.547279164785077
 6 | 0.0832460665244445	0.279496200487488	0.113914443643465	0.523343289344602
 7 | 0.0920681626865521	0.297793033280542	0.119072593313847	0.491066210719059
 8 | 0.105246920223977	0.331054797525737	0.108573096648497	0.455125185601789
 9 | 0.110473768891079	0.336401869437099	0.094152032253721	0.458972329418101
10 | 0.088713361680066	0.347376534163095	0.064456890921565	0.499453213235274
11 | 0.0863604210424111	0.301279507364074	0.0613081869052384	0.551051884688276
12 | 0.252234148622493	0.268109341781173	0.201986557438315	0.277669952158019
13 | 0.0596823106495353	0.644299322902688	0.00347508149990978	0.292543284947867
14 | 0.999999828252508	5.72491639332099e-08	5.72491639332099e-08	5.72491639332099e-08
15 | 5.72491639332099e-08	5.72491639332099e-08	0.999999828252508	5.72491639332099e-08
16 | 0.256545010666664	0.142596274774003	0.487110293491274	0.113748421068059
17 | 0.245404323365262	0.1904279512402	0.19521970626141	0.368948019133129
18 | 0.260712749801002	0.235322745596623	0.231847721345877	0.272116783256497
19 | 


--------------------------------------------------------------------------------
/PWM/Mouse/mm10_GT_AG_U2_5.good.matrix:
--------------------------------------------------------------------------------
 1 | A	C	G	T
 2 | 0.336585066761685	0.360131647887414	0.184525562438686	0.118757722912215
 3 | 0.637057303665137	0.107680009691138	0.11313013009758	0.142132556546144
 4 | 0.100048696138842	0.0261285756682809	0.803160027900953	0.0706627002919249
 5 | 5.72491639332099e-08	5.72491639332099e-08	0.999999828252508	5.72491639332099e-08
 6 | 5.72491639332099e-08	5.72491639332099e-08	5.72491639332099e-08	0.999999828252508
 7 | 0.608776216682132	0.0282009954026631	0.336224397028906	0.0267983908862995
 8 | 0.701914881485071	0.0713496902591235	0.115213999664749	0.111521428591057
 9 | 0.0799542395982849	0.0569458006135278	0.793198673376574	0.0699012864116132
10 | 0.173985991358582	0.156794067429439	0.188699026489417	0.480520914722561
11 | 0.297627010705136	0.199450419476074	0.293304698828178	0.209617870990612
12 | 0.223231722173929	0.250115929556965	0.239272937908015	0.287379410361091
13 | 0.213465014806924	0.270422208004074	0.24902247052584	0.267090306663161
14 | 0.220403613475629	0.247516817514397	0.256676683743711	0.275402885266264
15 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction
 2 | 
 3 | MicroExonator is a fully-integrated computational pipeline that allows for systematic de novo discovery and quantification of microexons using raw RNA-seq data for any organism with a gene annotation. Compared to other available methods MicroExonator is more sensitive for discovering smaller microexons and it provides higher specificity for all lengths. Moreover, MicroExonator provides integrated downstream comparative analysis between cell types or tissues using [Whippet](https://github.com/timbitz/Whippet.jl) ([Sterne-Weiler et al. 2018](https://doi.org/10.1016/j.molcel.2018.08.018)).
 4 | 
 5 | 
 6 | # Installation
 7 | 
 8 | Start by cloning MicroExonator
 9 | 
10 |     git clone https://github.com/hemberg-lab/MicroExonator
11 | 
12 | Install [Miniconda 3](https://docs.conda.io/en/latest/miniconda.html)
13 | 
14 |     wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
15 |     chmod +x Miniconda3-latest-Linux-x86_64.sh 
16 |     ./Miniconda3-latest-Linux-x86_64.sh
17 | 
18 | 
19 | Finally, create an enviroment to run [snakemake](https://snakemake.readthedocs.io/en/stable/)
20 | 
21 |     conda create -n snakemake_env -c bioconda -c conda-forge snakemake
22 |     
23 |     
24 | # Documentation 
25 | 
26 | Extended documentation can be found at https://microexonator.readthedocs.io.
27 | 
28 | 
29 | # Contact
30 | 
31 | For questions, ideas, feature requests and potential bug reports please contact geparada@utoronto.ca
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/.DS_Store


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/build/doctrees/differential_inclusion_analysis.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/doctrees/differential_inclusion_analysis.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/discovery_and_quantification.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/doctrees/discovery_and_quantification.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/doctrees/environment.pickle


--------------------------------------------------------------------------------
/docs/build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/doctrees/index.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/install.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/doctrees/install.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/licence.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/doctrees/licence.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/setup.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/doctrees/setup.doctree


--------------------------------------------------------------------------------
/docs/build/doctrees/support.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/doctrees/support.doctree


--------------------------------------------------------------------------------
/docs/build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: de1274e5a5ba83764ffdbcdd6538995f
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/differential_inclusion_analysis.rst.txt:
--------------------------------------------------------------------------------
 1 | .. differential_inclusion_analysis
 2 | 
 3 | 
 4 | ===============================
 5 | Differential inclusion analysis
 6 | ===============================
 7 | 
 8 | 
 9 | On this secction we descrive the a downstream module that was developed to perform alternative splicing analysis between sample groups. To quantify and assess differential inclusion of novel and annotated microexons, on this moudle we have integrated `Whippet <https://github.com/timbitz/Whippet.jl>`_, which enables a fast and accurate assesment of alterntive splicing events across user-defined sample groups.
10 | 
11 | Install
12 | =======
13 | 
14 | To run this downstream module for the first time you need to create a environment that has `snakemake` and the version of `julia` that is compatible with `Whipet v0.11`. To creat this enviroment execute the following command inside ``MicroExonator/`` folder:
15 | 
16 | .. code-block:: bash
17 | 
18 |     conda env create -f Whippet/julia_0.6.1.yaml
19 | 
20 | Then, activate the newly created enviroment:
21 | 
22 | .. code-block:: bash
23 | 
24 |     source activate julia_0.6.1
25 | 
26 | Enter julia's interactive mode:
27 | 
28 | .. code-block:: bash
29 | 
30 |     julia
31 | 
32 | Install Whippet by excecuting the following command on the interactive session:
33 | 
34 | .. code-block:: bash
35 | 
36 |     Pkg.add("Whippet")
37 | 
38 | .. note::
39 | 
40 |     To exit julia interactive session press ``control + d``.
41 | 
42 | 
43 | Configure
44 | =========
45 | 
46 | Here there is an list of the additonal keys that need to be incorporated as a part of config.yaml:
47 | 
48 | .. code-block:: bash
49 |     
50 |     whippet_bin_folder : /path/to/miniconda/envs/julia_0.6.1/share/julia/site/v0.6/Whippet/bin
51 |     Gene_anontation_GTF : /path/to/gene.annotation.gtf
52 |     whippet_delta : /path/to/whippet_delta.yaml
53 | 
54 | * ``whippet_bin_folder`` correspodn t the path of whippet binary folder (``Whippet/bin``) that is located inside ``julia_0.6.1`` virtual enviroment folder. The specific routh to ``Whippet/bin`` may variate, so it is important that you manually identify the correct path.
55 | 
56 | * ``Gene_anontation_GTF`` corresponds the path of a gene annotation file as Gene Transfer Format (`GTF <https://en.wikipedia.org/wiki/Gene_transfer_format#:~:text=The%20Gene%20transfer%20format%20(GTF,conventions%20specific%20to%20gene%20information.>`_). Working with the same annotation data base than the one used on the previous steps is recommended. 
57 | 
58 | * ``whippet_delta`` indicate the path of a `YAML <https://en.wikipedia.org/wiki/YAML#:~:text=Open%20format%3F&text=YAML%20(a%20recursive%20acronym%20for,is%20being%20stored%20or%20transmitted.>`_ file you need to create to provide information about the desired comparisons between groups of samples.
59 | 
60 | 
61 | whippet_delta YAML file
62 | -----------------------
63 | 
64 | This file can contain the information to schedule any number of comparison between sample groups of any size. Every comparison should have the following structure inside the YAML file:
65 | 
66 | .. code-block:: bash
67 | 
68 |     comparison_ID:
69 |       A : sample1,sample2,sample3
70 |       B : sample4,sample5,sample6
71 | 
72 | Where ``sample1 ... sample6`` correspond to base names given to each RNA-seq samples at the corresponding input files (See :doc:`setup`) and `comparison_ID` to any given name for the sheduled comparison. As an example see the :download:`YAML file <../../Examples/Runs/Parada_et_al/whippet_delta.yaml>` we used in our publication. 
73 | 
74 | .. warning::
75 | 
76 |     Inside this YAML file sample groups must be named ``A`` and ``B``.
77 | 
78 | 
79 | Optional parameters
80 | -------------------
81 | 
82 | If you just want to skip Discovery and Quantification modules and just asses alternative splicing events annotated at the provided GTF file, then include the following like at the configuratio file:
83 | 
84 | .. code-block:: bash
85 | 
86 |     downstream_only : T
87 | 
88 | Output
89 | ======
90 | 
91 | Quantification files generated per each sample can be found at ``Whipet/Quant``. Differentially included microexon analyses that can be obtained with Whippet, are reported at ``Whippet/Delta`` folder. MicroExonator performs these analyses using both PSI values calculated internally by the pipeline and PSI values directly calculated with Whippet. These results are reported under the same format than the ``diff.gz`` descrived at the `Whippet's GitHub page <https://github.com/timbitz/Whippet.jl#output-formats>`_. However, to provide easier interpretation, we filter the Whippet splicing nodes that correspond to microexon inclusion events, these are reported as ``.microexons`` files, where ``.diff.ME.microexons`` files correspond to the output when MicroExonator PSI values are taken as input and ``.diff.microexons`` when Whippet PSI  values are taken as input.
92 | 
93 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/index.rst.txt:
--------------------------------------------------------------------------------
 1 | ========
 2 | Overview
 3 | ========
 4 | 
 5 | MicroExonator is a fully-integrated computational pipeline that allows for systematic de novo discovery and quantification
 6 | of microexons using raw RNA-seq data for any organism with a gene annotation. Compared to other available methods MicroExonator
 7 | is more sensitive for discovering smaller microexons and it provides higher specificity for all lengths. Moreover, MicroExonator
 8 | provides integrated downstream comparative analysis between cell types or tissues using
 9 | `Whippet <https://github.com/timbitz/Whippet.jl>`_. (`Sterne-Weiler et al. 2018 <https://doi.org/10.1016/j.molcel.2018.08.018>`_). 
10 | As a proof of principle MicroExonator  identified X novel microexons in Y RNA-seq samples from mouse early development to provide a systematic characterization 
11 | based on time and tissue specificity.
12 | 
13 | MicroExonator pipeline is divided in several modules:
14 |     * Discover
15 |     * Quantification
16 |     * Differential Inclusion
17 |     * Single cell analysis
18 | 
19 | **Support**
20 | 
21 |     For questions, ideas, feature requests and potential bug reports submit an issue on our GitHub page or write us at gp7@sanger.ac.uk.
22 | 
23 | .. toctree::
24 |     :name: MicroExonator-install
25 |     :maxdepth: 1
26 |     :hidden:
27 | 
28 |     install
29 | 
30 | .. toctree::
31 |     :name: MicroExonator-setup
32 |     :maxdepth: 1
33 |     :hidden:
34 | 
35 |     setup
36 | 
37 | .. toctree::
38 |     :name: MicroExonator-discovery-and-quantification
39 |     :maxdepth: 3
40 |     :hidden:
41 | 
42 |     discovery_and_quantification
43 | 
44 | .. toctree::
45 |     :name: MicroExonator-differential_inclusion_analysis
46 |     :maxdepth: 3
47 |     :hidden:
48 | 
49 |     differential_inclusion_analysis
50 | 
51 | .. toctree::
52 |     :name: MicroExonator-single_cell_analysis
53 |     :maxdepth: 3
54 |     :hidden:
55 | 
56 |     single_cell_analysis
57 | 
58 | .. toctree::
59 |     :name: MicroExonator-Licence
60 |     :maxdepth: 1
61 |     :hidden:
62 | 
63 |     licence
64 | 
65 | .. toctree::
66 |     :name: MicroExonator-Support
67 |     :maxdepth: 1
68 |     :hidden:
69 | 
70 |     support


--------------------------------------------------------------------------------
/docs/build/html/_sources/install.rst.txt:
--------------------------------------------------------------------------------
 1 | .. _Installation:
 2 |   
 3 | =====================
 4 | Installation
 5 | =====================
 6 | 
 7 | To install MicroExonator follow these instructions:
 8 | 
 9 | Clone repository
10 | =================
11 | Clone the github repository
12 | 
13 | .. code-block:: bash
14 | 
15 |   git clone https://github.com/hemberg-lab/MicroExonator
16 | 
17 | Install Miniconda 3
18 | 
19 | .. code-block:: bash
20 | 
21 |    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
22 |    chmod +x Miniconda3-latest-Linux-x86_64.sh ./Miniconda3-latest-Linux-x86_64.sh
23 |   
24 | 
25 | 
26 | Set up a master virtual environment
27 | ===================================
28 | 
29 | Create a conda virtual enviroment with the necesary dependencies
30 | 
31 | .. code-block:: bash
32 | 
33 |   conda create -n snakemake_env -c bioconda -c conda-forge snakemake
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/licence.rst.txt:
--------------------------------------------------------------------------------
 1 | .. _Licence:
 2 |   
 3 | =====================
 4 | MIT License (MIT)
 5 | =====================
 6 | 
 7 | Copyright (c) 2020 Guillermo Parada
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/docs/build/html/_sources/setup.rst.txt:
--------------------------------------------------------------------------------
 1 | .. _input_files:
 2 | 
 3 | ===========  
 4 | Setup
 5 | ===========
 6 | 
 7 | Before runnung MicroExonator there are several files that needs to be created inside ``MicroExonator/`` root folder:
 8 | 
 9 | RNA-seq samples
10 | ===============
11 | 
12 | Input RNA-seq data either a ``local_samples.tsv``, ``NCBI_accession_list.txt`` or ``sample_url.tsv`` needs to be defined.
13 | If you want to run MicroExonator over RNA-seq samples that are locally stored, they need to be defined inside ``local_samples.tsv``.
14 | MicroExonator can also download and run samples from NCBI if the corresponding SRA accession names are defined inside of ``NCBI_accession_list.txt``,
15 | in addition any ``fastq.gz`` that can be directly download from a URL can be included into the aalysis by defining them inside a ``sample_url.tsv``.
16 | You can find examples of these files inside the ``Examples/`` folder.
17 | Is posible to combine different types of input sources, but at least one of these files needs to be defined inside ``MicroExonator/`` root folder. 
18 | 
19 | Cluster configuration
20 | =====================
21 | 
22 | If you are working on a high performace cluster, then it is very likely that you need to submit jobs to queueing systems such as lsf, qsub, SLURM, etc.
23 | To make MicroExonator work with these queueing systems, you need to create a `cluster.json` file. 
24 | We currently provide in the Examples folder a ``cluster.json`` file to run MicroExonator with `lsf <https://www.ibm.com/support/knowledgecenter/en/SSETD4/product_welcome_platform_lsf.html>`_.
25 | To adapt MicroExonator to other quequing systems please see the `SnakeMake documentation <https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html?highlight=cluster.json#cluster-configuration>`_.
26 | 
27 | Config file
28 | ===========
29 | 
30 | Each MicroExonator's module has certain compulsory and optional parameters that needs to be defined inside a ``config.yaml`` file.
31 | The necesary content of ``config.yaml`` is described on each moudle section and examples can be found at the ``Examples/`` folder.
32 | 


--------------------------------------------------------------------------------
/docs/build/html/_sources/support.rst.txt:
--------------------------------------------------------------------------------
 1 | .. support
 2 |   
 3 | ========
 4 | Support
 5 | ========
 6 | 
 7 | For questions, ideas, feature requests and potential bug reports please contact gp7@sanger.ac.uk.
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/build/html/_static/css/badge_only.css:
--------------------------------------------------------------------------------
1 | .fa:before{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/Roboto-Slab-Bold.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/Roboto-Slab-Regular.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/fontawesome-webfont.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/fontawesome-webfont.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/lato-bold-italic.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/lato-bold-italic.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/lato-bold.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-bold.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/lato-bold.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal-italic.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/lato-normal-italic.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal-italic.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/lato-normal-italic.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/lato-normal.woff


--------------------------------------------------------------------------------
/docs/build/html/_static/css/fonts/lato-normal.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/css/fonts/lato-normal.woff2


--------------------------------------------------------------------------------
/docs/build/html/_static/documentation_options.js:
--------------------------------------------------------------------------------
 1 | var DOCUMENTATION_OPTIONS = {
 2 |     URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
 3 |     VERSION: '',
 4 |     LANGUAGE: 'None',
 5 |     COLLAPSE_INDEX: false,
 6 |     BUILDER: 'html',
 7 |     FILE_SUFFIX: '.html',
 8 |     LINK_SUFFIX: '.html',
 9 |     HAS_SOURCE: true,
10 |     SOURCELINK_SUFFIX: '.txt',
11 |     NAVIGATION_WITH_KEYS: false
12 | };


--------------------------------------------------------------------------------
/docs/build/html/_static/file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/file.png


--------------------------------------------------------------------------------
/docs/build/html/_static/js/badge_only.js:
--------------------------------------------------------------------------------
1 | !function(e){var t={};function r(n){if(t[n])return t[n].exports;var o=t[n]={i:n,l:!1,exports:{}};return e[n].call(o.exports,o,o.exports,r),o.l=!0,o.exports}r.m=e,r.c=t,r.d=function(e,t,n){r.o(e,t)||Object.defineProperty(e,t,{enumerable:!0,get:n})},r.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.t=function(e,t){if(1&t&&(e=r(e)),8&t)return e;if(4&t&&"object"==typeof e&&e&&e.__esModule)return e;var n=Object.create(null);if(r.r(n),Object.defineProperty(n,"default",{enumerable:!0,value:e}),2&t&&"string"!=typeof e)for(var o in e)r.d(n,o,function(t){return e[t]}.bind(null,o));return n},r.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return r.d(t,"a",t),t},r.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r.p="",r(r.s=4)}({4:function(e,t,r){}});


--------------------------------------------------------------------------------
/docs/build/html/_static/js/html5shiv-printshiv.min.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @preserve HTML5 Shiv 3.7.3-pre | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
3 | */
4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document);


--------------------------------------------------------------------------------
/docs/build/html/_static/js/html5shiv.min.js:
--------------------------------------------------------------------------------
1 | /**
2 | * @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
3 | */
4 | !function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document);


--------------------------------------------------------------------------------
/docs/build/html/_static/js/theme.js:
--------------------------------------------------------------------------------
1 | !function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("<div class='wy-table-responsive'></div>"),n("table.docutils.footnote").wrap("<div class='wy-table-responsive footnote'></div>"),n("table.docutils.citation").wrap("<div class='wy-table-responsive citation'></div>"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n('<span class="toctree-expand"></span>'),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}t.length>0&&($(".wy-menu-vertical .current").removeClass("current"),t.addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l1").parent().addClass("current"),t.closest("li.toctree-l1").addClass("current"),t.closest("li.toctree-l2").addClass("current"),t.closest("li.toctree-l3").addClass("current"),t.closest("li.toctree-l4").addClass("current"),t.closest("li.toctree-l5").addClass("current"),t[0].scrollIntoView())}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t<e.length&&!window.requestAnimationFrame;++t)window.requestAnimationFrame=window[e[t]+"RequestAnimationFrame"],window.cancelAnimationFrame=window[e[t]+"CancelAnimationFrame"]||window[e[t]+"CancelRequestAnimationFrame"];window.requestAnimationFrame||(window.requestAnimationFrame=function(e,t){var i=(new Date).getTime(),o=Math.max(0,16-(i-n)),r=window.setTimeout((function(){e(i+o)}),o);return n=i+o,r}),window.cancelAnimationFrame||(window.cancelAnimationFrame=function(n){clearTimeout(n)})}()}).call(window)},function(n,e){n.exports=jQuery},function(n,e,t){}]);


--------------------------------------------------------------------------------
/docs/build/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/minus.png


--------------------------------------------------------------------------------
/docs/build/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/_static/plus.png


--------------------------------------------------------------------------------
/docs/build/html/_static/pygments.css:
--------------------------------------------------------------------------------
 1 | .highlight .hll { background-color: #ffffcc }
 2 | .highlight  { background: #f8f8f8; }
 3 | .highlight .c { color: #408080; font-style: italic } /* Comment */
 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */
 5 | .highlight .k { color: #008000; font-weight: bold } /* Keyword */
 6 | .highlight .o { color: #666666 } /* Operator */
 7 | .highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */
 8 | .highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */
 9 | .highlight .cp { color: #BC7A00 } /* Comment.Preproc */
10 | .highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */
11 | .highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */
12 | .highlight .cs { color: #408080; font-style: italic } /* Comment.Special */
13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
14 | .highlight .ge { font-style: italic } /* Generic.Emph */
15 | .highlight .gr { color: #FF0000 } /* Generic.Error */
16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
18 | .highlight .go { color: #888888 } /* Generic.Output */
19 | .highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
20 | .highlight .gs { font-weight: bold } /* Generic.Strong */
21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
23 | .highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
24 | .highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
25 | .highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
26 | .highlight .kp { color: #008000 } /* Keyword.Pseudo */
27 | .highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
28 | .highlight .kt { color: #B00040 } /* Keyword.Type */
29 | .highlight .m { color: #666666 } /* Literal.Number */
30 | .highlight .s { color: #BA2121 } /* Literal.String */
31 | .highlight .na { color: #7D9029 } /* Name.Attribute */
32 | .highlight .nb { color: #008000 } /* Name.Builtin */
33 | .highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */
34 | .highlight .no { color: #880000 } /* Name.Constant */
35 | .highlight .nd { color: #AA22FF } /* Name.Decorator */
36 | .highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */
37 | .highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
38 | .highlight .nf { color: #0000FF } /* Name.Function */
39 | .highlight .nl { color: #A0A000 } /* Name.Label */
40 | .highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
41 | .highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */
42 | .highlight .nv { color: #19177C } /* Name.Variable */
43 | .highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
45 | .highlight .mb { color: #666666 } /* Literal.Number.Bin */
46 | .highlight .mf { color: #666666 } /* Literal.Number.Float */
47 | .highlight .mh { color: #666666 } /* Literal.Number.Hex */
48 | .highlight .mi { color: #666666 } /* Literal.Number.Integer */
49 | .highlight .mo { color: #666666 } /* Literal.Number.Oct */
50 | .highlight .sa { color: #BA2121 } /* Literal.String.Affix */
51 | .highlight .sb { color: #BA2121 } /* Literal.String.Backtick */
52 | .highlight .sc { color: #BA2121 } /* Literal.String.Char */
53 | .highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */
54 | .highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
55 | .highlight .s2 { color: #BA2121 } /* Literal.String.Double */
56 | .highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
57 | .highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */
58 | .highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
59 | .highlight .sx { color: #008000 } /* Literal.String.Other */
60 | .highlight .sr { color: #BB6688 } /* Literal.String.Regex */
61 | .highlight .s1 { color: #BA2121 } /* Literal.String.Single */
62 | .highlight .ss { color: #19177C } /* Literal.String.Symbol */
63 | .highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */
64 | .highlight .fm { color: #0000FF } /* Name.Function.Magic */
65 | .highlight .vc { color: #19177C } /* Name.Variable.Class */
66 | .highlight .vg { color: #19177C } /* Name.Variable.Global */
67 | .highlight .vi { color: #19177C } /* Name.Variable.Instance */
68 | .highlight .vm { color: #19177C } /* Name.Variable.Magic */
69 | .highlight .il { color: #666666 } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/docs/build/html/genindex.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html5" lang="en" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>Index &mdash; MicroExonator  documentation</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 |   <link rel="stylesheet" href="_static/theme_overrides.css" type="text/css" />
 17 | 
 18 |   
 19 |   
 20 |   
 21 |   
 22 | 
 23 |   
 24 |   <!--[if lt IE 9]>
 25 |     <script src="_static/js/html5shiv.min.js"></script>
 26 |   <![endif]-->
 27 |   
 28 |     
 29 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 30 |         <script src="_static/jquery.js"></script>
 31 |         <script src="_static/underscore.js"></script>
 32 |         <script src="_static/doctools.js"></script>
 33 |         <script src="_static/language_data.js"></script>
 34 |     
 35 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 36 | 
 37 |     
 38 |     <link rel="index" title="Index" href="#" />
 39 |     <link rel="search" title="Search" href="search.html" /> 
 40 | </head>
 41 | 
 42 | <body class="wy-body-for-nav">
 43 | 
 44 |    
 45 |   <div class="wy-grid-for-nav">
 46 |     
 47 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 48 |       <div class="wy-side-scroll">
 49 |         <div class="wy-side-nav-search" >
 50 |           
 51 | 
 52 |           
 53 |             <a href="index.html" class="icon icon-home" alt="Documentation Home"> MicroExonator
 54 |           
 55 | 
 56 |           
 57 |           </a>
 58 | 
 59 |           
 60 |             
 61 |             
 62 |           
 63 | 
 64 |           
 65 | <div role="search">
 66 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 67 |     <input type="text" name="q" placeholder="Search docs" />
 68 |     <input type="hidden" name="check_keywords" value="yes" />
 69 |     <input type="hidden" name="area" value="default" />
 70 |   </form>
 71 | </div>
 72 | 
 73 |           
 74 |         </div>
 75 | 
 76 |         
 77 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 78 |           
 79 |             
 80 |             
 81 |               
 82 |             
 83 |             
 84 |               <ul>
 85 | <li class="toctree-l1"><a class="reference internal" href="install.html">Installation</a></li>
 86 | </ul>
 87 | <ul>
 88 | <li class="toctree-l1"><a class="reference internal" href="setup.html">Setup</a></li>
 89 | </ul>
 90 | <ul>
 91 | <li class="toctree-l1"><a class="reference internal" href="discovery_and_quantification.html">Discovery and Quantification</a></li>
 92 | </ul>
 93 | <ul>
 94 | <li class="toctree-l1"><a class="reference internal" href="differential_inclusion_analysis.html">Differential inclusion analysis</a></li>
 95 | </ul>
 96 | <ul>
 97 | <li class="toctree-l1"><a class="reference internal" href="single_cell_analysis.html">Single cell analysis</a></li>
 98 | </ul>
 99 | <ul>
100 | <li class="toctree-l1"><a class="reference internal" href="licence.html">MIT License (MIT)</a></li>
101 | </ul>
102 | <ul>
103 | <li class="toctree-l1"><a class="reference internal" href="support.html">Support</a></li>
104 | </ul>
105 | 
106 |             
107 |           
108 |         </div>
109 |         
110 |       </div>
111 |     </nav>
112 | 
113 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
114 | 
115 |       
116 |       <nav class="wy-nav-top" aria-label="top navigation">
117 |         
118 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
119 |           <a href="index.html">MicroExonator</a>
120 |         
121 |       </nav>
122 | 
123 | 
124 |       <div class="wy-nav-content">
125 |         
126 |         <div class="rst-content">
127 |         
128 |           
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | <div role="navigation" aria-label="breadcrumbs navigation">
145 | 
146 |   <ul class="wy-breadcrumbs">
147 |     
148 |       <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
149 |         
150 |       <li>Index</li>
151 |     
152 |     
153 |       <li class="wy-breadcrumbs-aside">
154 |         
155 |             
156 |         
157 |       </li>
158 |     
159 |   </ul>
160 | 
161 |   
162 |   <hr/>
163 | </div>
164 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
165 |            <div itemprop="articleBody">
166 |             
167 | 
168 | <h1 id="index">Index</h1>
169 | 
170 | <div class="genindex-jumpbox">
171 |  
172 | </div>
173 | 
174 | 
175 |            </div>
176 |            
177 |           </div>
178 |           <footer>
179 |   
180 | 
181 |   <hr/>
182 | 
183 |   <div role="contentinfo">
184 |     <p>
185 |         
186 |         &copy; Copyright 2020, Guillermo E. Parada
187 | 
188 |     </p>
189 |   </div>
190 |     
191 |     
192 |     
193 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
194 |     
195 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
196 |     
197 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
198 | 
199 | </footer>
200 | 
201 |         </div>
202 |       </div>
203 | 
204 |     </section>
205 | 
206 |   </div>
207 |   
208 | 
209 |   <script type="text/javascript">
210 |       jQuery(function () {
211 |           SphinxRtdTheme.Navigation.enable(true);
212 |       });
213 |   </script>
214 | 
215 |   
216 |   
217 |     
218 |    
219 | 
220 | </body>
221 | </html>


--------------------------------------------------------------------------------
/docs/build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/docs/build/html/objects.inv


--------------------------------------------------------------------------------
/docs/build/html/search.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html5" lang="en" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>Search &mdash; MicroExonator  documentation</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 |   <link rel="stylesheet" href="_static/theme_overrides.css" type="text/css" />
 17 | 
 18 |   
 19 |   
 20 |   
 21 |   
 22 | 
 23 |   
 24 |     
 25 |   <!--[if lt IE 9]>
 26 |     <script src="_static/js/html5shiv.min.js"></script>
 27 |   <![endif]-->
 28 |   
 29 |     
 30 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 31 |         <script src="_static/jquery.js"></script>
 32 |         <script src="_static/underscore.js"></script>
 33 |         <script src="_static/doctools.js"></script>
 34 |         <script src="_static/language_data.js"></script>
 35 |     
 36 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 37 | 
 38 |     
 39 |     <script type="text/javascript" src="_static/searchtools.js"></script>
 40 |     <link rel="index" title="Index" href="genindex.html" />
 41 |     <link rel="search" title="Search" href="#" /> 
 42 | </head>
 43 | 
 44 | <body class="wy-body-for-nav">
 45 | 
 46 |    
 47 |   <div class="wy-grid-for-nav">
 48 |     
 49 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 50 |       <div class="wy-side-scroll">
 51 |         <div class="wy-side-nav-search" >
 52 |           
 53 | 
 54 |           
 55 |             <a href="index.html" class="icon icon-home" alt="Documentation Home"> MicroExonator
 56 |           
 57 | 
 58 |           
 59 |           </a>
 60 | 
 61 |           
 62 |             
 63 |             
 64 |           
 65 | 
 66 |           
 67 | <div role="search">
 68 |   <form id="rtd-search-form" class="wy-form" action="#" method="get">
 69 |     <input type="text" name="q" placeholder="Search docs" />
 70 |     <input type="hidden" name="check_keywords" value="yes" />
 71 |     <input type="hidden" name="area" value="default" />
 72 |   </form>
 73 | </div>
 74 | 
 75 |           
 76 |         </div>
 77 | 
 78 |         
 79 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 80 |           
 81 |             
 82 |             
 83 |               
 84 |             
 85 |             
 86 |               <ul>
 87 | <li class="toctree-l1"><a class="reference internal" href="install.html">Installation</a></li>
 88 | </ul>
 89 | <ul>
 90 | <li class="toctree-l1"><a class="reference internal" href="setup.html">Setup</a></li>
 91 | </ul>
 92 | <ul>
 93 | <li class="toctree-l1"><a class="reference internal" href="discovery_and_quantification.html">Discovery and Quantification</a></li>
 94 | </ul>
 95 | <ul>
 96 | <li class="toctree-l1"><a class="reference internal" href="differential_inclusion_analysis.html">Differential inclusion analysis</a></li>
 97 | </ul>
 98 | <ul>
 99 | <li class="toctree-l1"><a class="reference internal" href="single_cell_analysis.html">Single cell analysis</a></li>
100 | </ul>
101 | <ul>
102 | <li class="toctree-l1"><a class="reference internal" href="licence.html">MIT License (MIT)</a></li>
103 | </ul>
104 | <ul>
105 | <li class="toctree-l1"><a class="reference internal" href="support.html">Support</a></li>
106 | </ul>
107 | 
108 |             
109 |           
110 |         </div>
111 |         
112 |       </div>
113 |     </nav>
114 | 
115 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
116 | 
117 |       
118 |       <nav class="wy-nav-top" aria-label="top navigation">
119 |         
120 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
121 |           <a href="index.html">MicroExonator</a>
122 |         
123 |       </nav>
124 | 
125 | 
126 |       <div class="wy-nav-content">
127 |         
128 |         <div class="rst-content">
129 |         
130 |           
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | <div role="navigation" aria-label="breadcrumbs navigation">
147 | 
148 |   <ul class="wy-breadcrumbs">
149 |     
150 |       <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
151 |         
152 |       <li>Search</li>
153 |     
154 |     
155 |       <li class="wy-breadcrumbs-aside">
156 |         
157 |             
158 |         
159 |       </li>
160 |     
161 |   </ul>
162 | 
163 |   
164 |   <hr/>
165 | </div>
166 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
167 |            <div itemprop="articleBody">
168 |             
169 |   <noscript>
170 |   <div id="fallback" class="admonition warning">
171 |     <p class="last">
172 |       Please activate JavaScript to enable the search functionality.
173 |     </p>
174 |   </div>
175 |   </noscript>
176 | 
177 |   
178 |   <div id="search-results">
179 |   
180 |   </div>
181 | 
182 |            </div>
183 |            
184 |           </div>
185 |           <footer>
186 |   
187 | 
188 |   <hr/>
189 | 
190 |   <div role="contentinfo">
191 |     <p>
192 |         
193 |         &copy; Copyright 2020, Guillermo E. Parada
194 | 
195 |     </p>
196 |   </div>
197 |     
198 |     
199 |     
200 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
201 |     
202 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
203 |     
204 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
205 | 
206 | </footer>
207 | 
208 |         </div>
209 |       </div>
210 | 
211 |     </section>
212 | 
213 |   </div>
214 |   
215 | 
216 |   <script type="text/javascript">
217 |       jQuery(function () {
218 |           SphinxRtdTheme.Navigation.enable(true);
219 |       });
220 |   </script>
221 | 
222 |   
223 |   
224 |     
225 |   
226 |   <script type="text/javascript">
227 |     jQuery(function() { Search.loadIndex("searchindex.js"); });
228 |   </script>
229 |   
230 |   <script type="text/javascript" id="searchindexloader"></script>
231 |    
232 | 
233 | 
234 | </body>
235 | </html>


--------------------------------------------------------------------------------
/docs/build/html/support.html:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | <!DOCTYPE html>
  4 | <html class="writer-html5" lang="en" >
  5 | <head>
  6 |   <meta charset="utf-8">
  7 |   
  8 |   <meta name="viewport" content="width=device-width, initial-scale=1.0">
  9 |   
 10 |   <title>Support &mdash; MicroExonator  documentation</title>
 11 |   
 12 | 
 13 |   
 14 |   <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
 15 |   <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 16 |   <link rel="stylesheet" href="_static/theme_overrides.css" type="text/css" />
 17 | 
 18 |   
 19 |   
 20 |   
 21 |   
 22 | 
 23 |   
 24 |   <!--[if lt IE 9]>
 25 |     <script src="_static/js/html5shiv.min.js"></script>
 26 |   <![endif]-->
 27 |   
 28 |     
 29 |       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
 30 |         <script src="_static/jquery.js"></script>
 31 |         <script src="_static/underscore.js"></script>
 32 |         <script src="_static/doctools.js"></script>
 33 |         <script src="_static/language_data.js"></script>
 34 |     
 35 |     <script type="text/javascript" src="_static/js/theme.js"></script>
 36 | 
 37 |     
 38 |     <link rel="index" title="Index" href="genindex.html" />
 39 |     <link rel="search" title="Search" href="search.html" />
 40 |     <link rel="prev" title="MIT License (MIT)" href="licence.html" /> 
 41 | </head>
 42 | 
 43 | <body class="wy-body-for-nav">
 44 | 
 45 |    
 46 |   <div class="wy-grid-for-nav">
 47 |     
 48 |     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
 49 |       <div class="wy-side-scroll">
 50 |         <div class="wy-side-nav-search" >
 51 |           
 52 | 
 53 |           
 54 |             <a href="index.html" class="icon icon-home" alt="Documentation Home"> MicroExonator
 55 |           
 56 | 
 57 |           
 58 |           </a>
 59 | 
 60 |           
 61 |             
 62 |             
 63 |           
 64 | 
 65 |           
 66 | <div role="search">
 67 |   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
 68 |     <input type="text" name="q" placeholder="Search docs" />
 69 |     <input type="hidden" name="check_keywords" value="yes" />
 70 |     <input type="hidden" name="area" value="default" />
 71 |   </form>
 72 | </div>
 73 | 
 74 |           
 75 |         </div>
 76 | 
 77 |         
 78 |         <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
 79 |           
 80 |             
 81 |             
 82 |               
 83 |             
 84 |             
 85 |               <ul>
 86 | <li class="toctree-l1"><a class="reference internal" href="install.html">Installation</a></li>
 87 | </ul>
 88 | <ul>
 89 | <li class="toctree-l1"><a class="reference internal" href="setup.html">Setup</a></li>
 90 | </ul>
 91 | <ul>
 92 | <li class="toctree-l1"><a class="reference internal" href="discovery_and_quantification.html">Discovery and Quantification</a></li>
 93 | </ul>
 94 | <ul>
 95 | <li class="toctree-l1"><a class="reference internal" href="differential_inclusion_analysis.html">Differential inclusion analysis</a></li>
 96 | </ul>
 97 | <ul>
 98 | <li class="toctree-l1"><a class="reference internal" href="single_cell_analysis.html">Single cell analysis</a></li>
 99 | </ul>
100 | <ul>
101 | <li class="toctree-l1"><a class="reference internal" href="licence.html">MIT License (MIT)</a></li>
102 | </ul>
103 | <ul class="current">
104 | <li class="toctree-l1 current"><a class="current reference internal" href="#">Support</a></li>
105 | </ul>
106 | 
107 |             
108 |           
109 |         </div>
110 |         
111 |       </div>
112 |     </nav>
113 | 
114 |     <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
115 | 
116 |       
117 |       <nav class="wy-nav-top" aria-label="top navigation">
118 |         
119 |           <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
120 |           <a href="index.html">MicroExonator</a>
121 |         
122 |       </nav>
123 | 
124 | 
125 |       <div class="wy-nav-content">
126 |         
127 |         <div class="rst-content">
128 |         
129 |           
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | <div role="navigation" aria-label="breadcrumbs navigation">
146 | 
147 |   <ul class="wy-breadcrumbs">
148 |     
149 |       <li><a href="index.html" class="icon icon-home"></a> &raquo;</li>
150 |         
151 |       <li>Support</li>
152 |     
153 |     
154 |       <li class="wy-breadcrumbs-aside">
155 |         
156 |             
157 |             <a href="_sources/support.rst.txt" rel="nofollow"> View page source</a>
158 |           
159 |         
160 |       </li>
161 |     
162 |   </ul>
163 | 
164 |   
165 |   <hr/>
166 | </div>
167 |           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
168 |            <div itemprop="articleBody">
169 |             
170 |   <div class="section" id="support">
171 | <h1>Support<a class="headerlink" href="#support" title="Permalink to this headline">¶</a></h1>
172 | <p>For questions, ideas, feature requests and potential bug reports please contact <a class="reference external" href="mailto:gp7&#37;&#52;&#48;sanger&#46;ac&#46;uk">gp7<span>&#64;</span>sanger<span>&#46;</span>ac<span>&#46;</span>uk</a>.</p>
173 | </div>
174 | 
175 | 
176 |            </div>
177 |            
178 |           </div>
179 |           <footer>
180 |   
181 |     <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
182 |       
183 |       
184 |         <a href="licence.html" class="btn btn-neutral float-left" title="MIT License (MIT)" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
185 |       
186 |     </div>
187 |   
188 | 
189 |   <hr/>
190 | 
191 |   <div role="contentinfo">
192 |     <p>
193 |         
194 |         &copy; Copyright 2020, Guillermo E. Parada
195 | 
196 |     </p>
197 |   </div>
198 |     
199 |     
200 |     
201 |     Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a
202 |     
203 |     <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a>
204 |     
205 |     provided by <a href="https://readthedocs.org">Read the Docs</a>. 
206 | 
207 | </footer>
208 | 
209 |         </div>
210 |       </div>
211 | 
212 |     </section>
213 | 
214 |   </div>
215 |   
216 | 
217 |   <script type="text/javascript">
218 |       jQuery(function () {
219 |           SphinxRtdTheme.Navigation.enable(true);
220 |       });
221 |   </script>
222 | 
223 |   
224 |   
225 |     
226 |    
227 | 
228 | </body>
229 | </html>


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'MicroExonator'
21 | copyright = '2020, Guillermo E. Parada'
22 | author = 'Guillermo E. Parada'
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = [
31 |     'sphinx_rtd_theme',
32 | ]
33 | 
34 | # Add any paths that contain templates here, relative to this directory.
35 | templates_path = ['_templates']
36 | 
37 | # List of patterns, relative to source directory, that match files and
38 | # directories to ignore when looking for source files.
39 | # This pattern also affects html_static_path and html_extra_path.
40 | exclude_patterns = []
41 | 
42 | 
43 | # -- Options for HTML output -------------------------------------------------
44 | 
45 | # The theme to use for HTML and HTML Help pages.  See the documentation for
46 | # a list of builtin themes.
47 | #
48 | #html_theme = 'alabaster'
49 | html_theme = "sphinx_rtd_theme"
50 | 
51 | # Add any paths that contain custom static files (such as style sheets) here,
52 | # relative to this directory. They are copied after the builtin static files,
53 | # so a file named "default.css" will overwrite the builtin "default.css".
54 | html_static_path = ['_static']
55 | 
56 | html_context = {
57 |     'css_files': [
58 |         '_static/theme_overrides.css',  # override wide tables in RTD theme
59 |         ],
60 |      }
61 | 
62 | master_doc = 'index'
63 | 


--------------------------------------------------------------------------------
/docs/source/differential_inclusion_analysis.rst:
--------------------------------------------------------------------------------
  1 | .. differential_inclusion_analysis
  2 | 
  3 | 
  4 | ===============================
  5 | Differential inclusion analysis
  6 | ===============================
  7 | 
  8 | 
  9 | On this secction we descrive the a downstream module that was developed to perform alternative splicing analysis between sample groups. To quantify and assess differential inclusion of novel and annotated microexons, on this moudle we have integrated `Whippet <https://github.com/timbitz/Whippet.jl>`_, which enables a fast and accurate assesment of alterntive splicing events across user-defined sample groups.
 10 | 
 11 | Install
 12 | =======
 13 | 
 14 | To run this downstream module for the first time you need to create a environment that has `snakemake` and the version of `julia` that is compatible with `Whipet v0.11`. To creat this enviroment execute the following command inside ``MicroExonator/`` folder:
 15 | 
 16 | .. code-block:: bash
 17 | 
 18 |     conda env create -f Whippet/julia_0.6.1.yaml
 19 | 
 20 | Then, activate the newly created enviroment:
 21 | 
 22 | .. code-block:: bash
 23 | 
 24 |     source activate julia_0.6.1
 25 | 
 26 | Enter julia's interactive mode:
 27 | 
 28 | .. code-block:: bash
 29 | 
 30 |     julia
 31 | 
 32 | Install Whippet by excecuting the following command on the interactive session:
 33 | 
 34 | .. code-block:: bash
 35 | 
 36 |     Pkg.add("Whippet")
 37 | 
 38 | .. note::
 39 | 
 40 |     To exit julia interactive session press ``control + d``.
 41 | 
 42 | 
 43 | Configure
 44 | =========
 45 | 
 46 | Here there is an list of the additonal keys that need to be incorporated as a part of config.yaml:
 47 | 
 48 | .. code-block:: bash
 49 |     
 50 |     whippet_bin_folder : /path/to/miniconda/envs/julia_0.6.1/share/julia/site/v0.6/Whippet/bin
 51 |     Gene_anontation_GTF : /path/to/gene.annotation.gtf
 52 |     whippet_delta : /path/to/whippet_delta.yaml
 53 | 
 54 | * ``whippet_bin_folder`` correspodn t the path of whippet binary folder (``Whippet/bin``) that is located inside ``julia_0.6.1`` virtual enviroment folder. The specific routh to ``Whippet/bin`` may variate, so it is important that you manually identify the correct path.
 55 | 
 56 | * ``Gene_anontation_GTF`` corresponds the path of a gene annotation file as Gene Transfer Format (`GTF <https://en.wikipedia.org/wiki/Gene_transfer_format#:~:text=The%20Gene%20transfer%20format%20(GTF,conventions%20specific%20to%20gene%20information.>`_). Working with the same annotation data base than the one used on the previous steps is recommended. 
 57 | 
 58 | * ``whippet_delta`` indicate the path of a `YAML <https://en.wikipedia.org/wiki/YAML#:~:text=Open%20format%3F&text=YAML%20(a%20recursive%20acronym%20for,is%20being%20stored%20or%20transmitted.>`_ file you need to create to provide information about the desired comparisons between groups of samples.
 59 | 
 60 | 
 61 | whippet_delta YAML file
 62 | -----------------------
 63 | 
 64 | This file can contain the information to schedule any number of comparison between sample groups of any size. Every comparison should have the following structure inside the YAML file:
 65 | 
 66 | .. code-block:: bash
 67 | 
 68 |     comparison_ID:
 69 |       A : sample1,sample2,sample3
 70 |       B : sample4,sample5,sample6
 71 | 
 72 | Where ``sample1 ... sample6`` correspond to base names given to each RNA-seq samples at the corresponding input files (See :doc:`setup`) and `comparison_ID` to any given name for the sheduled comparison. As an example see the :download:`YAML file <../../Examples/Runs/Parada_et_al/whippet_delta.yaml>` we used in our publication. 
 73 | 
 74 | .. warning::
 75 | 
 76 |     Inside this YAML file sample groups must be named ``A`` and ``B``.
 77 | 
 78 | 
 79 | Optional parameters
 80 | -------------------
 81 | 
 82 | If you just want to skip Discovery and Quantification modules and just asses alternative splicing events annotated at the provided GTF file, then include the following like at the configuratio file:
 83 | 
 84 | .. code-block:: bash
 85 | 
 86 |     downstream_only : T
 87 | 
 88 | Run
 89 | ===
 90 | 
 91 | In order to run this module you need to run the standar MicroExonator command, but providing ``differential_inclusion`` as a target. If you have not run previous ``discovery`` and ``quantification`` modules, MicroExonator will include them into the job plan (unless ``downstream_only`` is set as ``T``)   
 92 | 
 93 | .. code-block:: bash
 94 | 
 95 |     snakemake -s MicroExonator.smk  --cluster-config cluster.json --cluster {cluster system params} --use-conda -k  -j {number of parallel jobs} differential_inclusion
 96 | 
 97 | 
 98 | 
 99 | Output
100 | ======
101 | 
102 | Quantification files generated per each sample can be found at ``Whipet/Quant``. Differentially included microexon analyses that can be obtained with Whippet, are reported at ``Whippet/Delta`` folder. MicroExonator performs these analyses using both PSI values calculated internally by the pipeline and PSI values directly calculated with Whippet. These results are reported under the same format than the ``diff.gz`` descrived at the `Whippet's GitHub page <https://github.com/timbitz/Whippet.jl#output-formats>`_. However, to provide easier interpretation, we filter the Whippet splicing nodes that correspond to microexon inclusion events, these are reported as ``.microexons`` files, where ``.diff.ME.microexons`` files correspond to the output when MicroExonator PSI values are taken as input and ``.diff.microexons`` when Whippet PSI  values are taken as input.
103 | 
104 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 | Overview
 3 | ========
 4 | 
 5 | MicroExonator is a fully-integrated computational pipeline that allows for systematic de novo discovery and quantification
 6 | of microexons using raw RNA-seq data for any organism with a gene annotation. Compared to other available methods MicroExonator
 7 | is more sensitive for discovering smaller microexons and it provides higher specificity for all lengths. Moreover, MicroExonator
 8 | provides integrated downstream comparative analysis between cell types or tissues using
 9 | `Whippet <https://github.com/timbitz/Whippet.jl>`_. (`Sterne-Weiler et al. 2018 <https://doi.org/10.1016/j.molcel.2018.08.018>`_).
10 | 
11 | MicroExonator pipeline is divided in several modules:
12 |     * Discover
13 |     * Quantification
14 |     * Differential Inclusion
15 |     * Single cell analysis
16 | 
17 | **Support**
18 | 
19 |     For questions, ideas, feature requests and potential bug reports submit an issue on our GitHub page or write us at gp7@sanger.ac.uk.
20 | 
21 | .. toctree::
22 |     :name: MicroExonator-install
23 |     :maxdepth: 1
24 |     :hidden:
25 | 
26 |     install
27 | 
28 | .. toctree::
29 |     :name: MicroExonator-setup
30 |     :maxdepth: 1
31 |     :hidden:
32 | 
33 |     setup
34 | 
35 | .. toctree::
36 |     :name: MicroExonator-discovery-and-quantification
37 |     :maxdepth: 3
38 |     :hidden:
39 | 
40 |     discovery_and_quantification
41 | 
42 | .. toctree::
43 |     :name: MicroExonator-differential_inclusion_analysis
44 |     :maxdepth: 3
45 |     :hidden:
46 | 
47 |     differential_inclusion_analysis
48 | 
49 | .. toctree::
50 |     :name: MicroExonator-single_cell_analysis
51 |     :maxdepth: 3
52 |     :hidden:
53 | 
54 |     single_cell_analysis
55 | 
56 | .. toctree::
57 |     :name: MicroExonator-Licence
58 |     :maxdepth: 1
59 |     :hidden:
60 | 
61 |     licence
62 | 
63 | .. toctree::
64 |     :name: MicroExonator-Support
65 |     :maxdepth: 1
66 |     :hidden:
67 | 
68 |     support
69 | 


--------------------------------------------------------------------------------
/docs/source/install.rst:
--------------------------------------------------------------------------------
 1 | .. _Installation:
 2 |   
 3 | =====================
 4 | Installation
 5 | =====================
 6 | 
 7 | To install MicroExonator follow these instructions:
 8 | 
 9 | Clone repository
10 | =================
11 | Clone the github repository
12 | 
13 | .. code-block:: bash
14 | 
15 |   git clone https://github.com/hemberg-lab/MicroExonator
16 | 
17 | Install Miniconda 3
18 | 
19 | .. code-block:: bash
20 | 
21 |    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
22 |    chmod +x Miniconda3-latest-Linux-x86_64.sh
23 |    ./Miniconda3-latest-Linux-x86_64.sh
24 | 
25 | Start using conda by opening a new terminal or just running:
26 | 
27 | .. code-block:: bash
28 | 
29 |    bash
30 | 
31 | 
32 | Set up a master virtual environment
33 | ===================================
34 | 
35 | Create a conda virtual enviroment with the necesary dependencies
36 | 
37 | .. code-block:: bash
38 | 
39 |   conda create -n snakemake_env -c bioconda -c conda-forge snakemake
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/docs/source/licence.rst:
--------------------------------------------------------------------------------
 1 | .. _Licence:
 2 |   
 3 | =====================
 4 | MIT License (MIT)
 5 | =====================
 6 | 
 7 | Copyright (c) 2020 Guillermo Parada
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/docs/source/setup.rst:
--------------------------------------------------------------------------------
 1 | .. _input_files:
 2 | 
 3 | ===========  
 4 | Setup
 5 | ===========
 6 | 
 7 | Before runnung MicroExonator there are several files that needs to be created inside ``MicroExonator/`` root folder:
 8 | 
 9 | RNA-seq samples
10 | ===============
11 | 
12 | Input RNA-seq data either a ``local_samples.tsv``, ``NCBI_accession_list.txt`` or ``sample_url.tsv`` needs to be defined.
13 | If you want to run MicroExonator over RNA-seq samples that are locally stored, they need to be defined inside ``local_samples.tsv``.
14 | MicroExonator can also download and run samples from NCBI if the corresponding SRA accession names are defined inside of ``NCBI_accession_list.txt``,
15 | in addition any ``fastq.gz`` that can be directly download from a URL can be included into the aalysis by defining them inside a ``sample_url.tsv``.
16 | You can find examples of these files inside the ``Examples/`` folder.
17 | Is posible to combine different types of input sources, but at least one of these files needs to be defined inside ``MicroExonator/`` root folder. 
18 | 
19 | Cluster configuration
20 | =====================
21 | 
22 | If you are working on a high performace cluster, then it is very likely that you need to submit jobs to queueing systems such as lsf, qsub, SLURM, etc.
23 | To make MicroExonator work with these queueing systems, you need to create a `cluster.json` file. 
24 | We currently provide in the Examples folder a ``cluster.json`` file to run MicroExonator with `lsf <https://www.ibm.com/support/knowledgecenter/en/SSETD4/product_welcome_platform_lsf.html>`_.
25 | To adapt MicroExonator to other quequing systems please see the `SnakeMake documentation <https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html?highlight=cluster.json#cluster-configuration>`_.
26 | 
27 | Config file
28 | ===========
29 | 
30 | Each MicroExonator's module has certain compulsory and optional parameters that needs to be defined inside a ``config.yaml`` file.
31 | The necesary content of ``config.yaml`` is described on each moudle section and examples can be found at the ``Examples/`` folder.
32 | 


--------------------------------------------------------------------------------
/docs/source/support.rst:
--------------------------------------------------------------------------------
 1 | .. support
 2 |   
 3 | ========
 4 | Support
 5 | ========
 6 | 
 7 | For questions, ideas, feature requests and potential bug reports please contact gp7@sanger.ac.uk.
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/envs/MicroExonator.yaml:
--------------------------------------------------------------------------------
  1 | #name: Micro-Exonator
  2 | channels:
  3 |   - conda-forge
  4 |   - bioconda
  5 |   - defaults
  6 |   - cgat
  7 | dependencies:
  8 |   - r-stringi
  9 |   - bcftools=1.7=0
 10 |   - bedtools=2.27.1=he941832_2
 11 |   - hisat2=2.1.0=py36pl5.22.0_0
 12 |   - htslib=1.7=0
 13 |   - libdeflate=1.0=h470a237_0
 14 |   - perl-threaded=5.22.0=10
 15 |   - pybedtools=0.7.10=py36_2
 16 |   - pysam=0.14.1=py36hae42fb6_1
 17 |   - samtools=1.9=h8ee4bcc_1
 18 |   - snakemake=5.2.2=py36_1
 19 |   - snakemake-minimal=5.2.2=py36_1
 20 |   - sra-tools=2.9.1_1=h470a237_0
 21 |   - r-reshape2
 22 |   - aioeasywebdav=2.2.0=py36_0
 23 |   - aiohttp=3.3.2=py36h470a237_1
 24 |   - appdirs=1.4.3=py_1
 25 |   - asn1crypto=0.24.0=py36_0
 26 |   - async-timeout=3.0.0=py36_0
 27 |   - attrs=18.1.0=py_1
 28 |   - biopython=1.72=py36_0
 29 |   - boto3=1.7.76=py_0
 30 |   - botocore=1.10.77=py_0
 31 |   - ca-certificates=2018.8.24=ha4d7672_0
 32 |   - cachetools=2.1.0=py_0
 33 |   - certifi=2018.8.24=py36_1001
 34 |   - cffi=1.11.5=py36_0
 35 |   - chardet=3.0.4=py36_0
 36 |   - configargparse=0.13.0=py_1
 37 |   - cryptography=2.1.4=py36_0
 38 |   - curl=7.61.0=h93b3f91_2
 39 |   - decorator=4.3.0=py_0
 40 |   - docutils=0.14=py36_0
 41 |   - dropbox=8.7.1=py_0
 42 |   - expat=2.2.5=hfc679d8_1
 43 |   - filechunkio=1.8=py36_1
 44 |   - freetype=2.8.1=hfa320df_1
 45 |   - ftputil=3.4=py_0
 46 |   - google-api-core=0.1.4=py_0
 47 |   - google-auth=1.5.1=py_0
 48 |   - google-cloud-core=0.28.1=py_0
 49 |   - google-cloud-storage=1.10.0=py_0
 50 |   - google-resumable-media=0.3.1=py_0
 51 |   - googleapis-common-protos=1.5.3=py_1
 52 |   - graphite2=1.3.12=hfc679d8_0
 53 |   - graphviz=2.38.0=7
 54 |   - icu=58.2=0
 55 |   - idna=2.6=py36_1
 56 |   - idna_ssl=1.0.0=0
 57 |   - jinja2=2.10=py_1
 58 |   - jmespath=0.9.3=py_1
 59 |   - jpeg=9c=h470a237_1
 60 |   - jsonschema=2.6.0=py36_1
 61 |   - krb5=1.14.6=0
 62 |   - libgfortran=3.0.0=1
 63 |   - libpng=1.6.34=0
 64 |   - libprotobuf=3.6.0=hd28b015_0
 65 |   - libtiff=4.0.9
 66 |   - libtool=2.4.6=h470a237_1
 67 |   - libuuid=1.0.3=1
 68 |   - libxcb=1.13=0
 69 |   - markupsafe=1.0=py36_0
 70 |   - multidict=4.3.1=py36h470a237_0
 71 |   - ncurses=6.1=hfc679d8_1
 72 |   - networkx=2.1=py_1
 73 |   - openssl=1.0.2p=h470a237_0
 74 |   - packaging=17.1=py_0
 75 |   - pandas=0.23.4=py36hf8a1672_0
 76 |   - pango=1.40.14=0
 77 |   - paramiko=2.4.1=py36_0
 78 |   - perl=5.22.0.1=0
 79 |   - pip=9.0.*
 80 |   - prettytable=0.7.2=py_2
 81 |   - protobuf=3.6.0=py36hfc679d8_0
 82 |   - psutil=5.4.3=py36_0
 83 |   - pyasn1=0.4.2=py_0
 84 |   - pyasn1-modules=0.2.1=py_0
 85 |   - pycparser=2.18=py36_0
 86 |   - pygraphviz=1.4rc1=py36h470a237_0
 87 |   - pynacl=1.1.2=py36_0
 88 |   - pyopenssl=17.5.0
 89 |   - pyparsing=2.2.0=py36_0
 90 |   - pysftp=0.2.9=py36_0
 91 |   - pysocks=1.6.8=py36_1
 92 |   - python=3.6.6=h5001a0f_0
 93 |   - python-dateutil=2.7.3=py_0
 94 |   - python-irodsclient=0.7.0=py_0
 95 |   - pytz=2018.5=py_0
 96 |   - pyyaml=3.12=py36_1
 97 |   - r=3.4.1=r3.4.1_0
 98 |   - r-assertthat=0.2.0=r3.4.1_0
 99 |   - r-backports=1.0.5=r3.4.1_0
100 |   - r-base
101 |   - r-base64enc
102 |   - r-bitops=1.0_6=r3.4.1_0
103 |   - r-boot=1.3_20=r3.4.1_0
104 |   - r-catools=1.17.1=r3.4.1_0
105 |   - r-class=7.3_14=r3.4.1_0
106 |   - r-cli=1.0.0=r3.4.1_0
107 |   - r-cluster=2.0.6=r3.4.1_0
108 |   - r-codetools=0.2_15=r3.4.1_0
109 |   - r-colorspace=1.3_2=r3.4.1_0
110 |   - r-crayon=1.3.4=r3.4.1_0
111 |   - r-data.table=1.10.4=r3.4.1_0
112 |   - r-dichromat=2.0_0=r3.4.1_0
113 |   - r-digest=0.6.12=r3.4.1_0
114 |   - r-evaluate=0.10.1=r3.4.1_0
115 |   - r-foreign=0.8_67=r3.4.1_0
116 |   - r-formatr=1.5=r3.4.1_0
117 |   - r-ggplot2=2.2.1=r3.4.1_0
118 |   - r-glue=1.2.0=r3.4.1_0
119 |   - r-gtable=0.2.0=r3.4.1_0
120 |   - r-highr=0.6=r3.4.1_0
121 |   - r-htmltools=0.3.6=r3.4.1_0
122 |   - r-jsonlite=1.5=r3.4.1_0
123 |   - r-kernsmooth=2.23_15=r3.4.1_0
124 |   - r-knitr=1.20=r3.4.1_0
125 |   - r-labeling=0.3=r3.4.1_0
126 |   - r-lattice=0.20_34=r3.4.1_0
127 |   - r-lazyeval=0.2.1=r3.4.1_0
128 |   - r-magrittr=1.5=r3.4.1_0
129 |   - r-markdown=0.8=r3.4.1_1
130 |   - r-mass=7.3_48=r3.4.1_0
131 |   - r-matrix=1.2_12=r3.4.1_0
132 |   - r-mgcv=1.8_17=r3.4.1_0
133 |   - r-mime=0.5=r3.4.1_0
134 |   - r-mixtools=1.1.0=r3.4.1_0
135 |   - r-munsell=0.4.3=r3.4.1_0
136 |   - r-nlme=3.1_131=r3.4.1_0
137 |   - r-nnet=7.3_12=r3.4.1_0
138 |   - r-pillar=1.2.1=r3.4.1_0
139 |   - r-plyr=1.8.4=r3.4.1_0
140 |   - r-r6=2.2.2=r3.4.1_0
141 |   - r-rcolorbrewer=1.1_2=r3.4.1_0
142 |   - r-rcpp=0.12.15=r3.4.1_0
143 |   - r-recommended=3.4.1=r3.4.1_0
144 |   - r-rlang=0.2.0=r3.4.1_0
145 |   - r-rmarkdown=1.8=r3.4.1_0
146 |   - r-rpart=4.1_13=r3.4.1_0
147 |   - r-rprojroot=1.2=r3.4.1_0
148 |   - r-scales=0.5.0=r3.4.1_0
149 |   - r-segmented=0.5_2.1=r3.4.1_0
150 |   - r-spatial=7.3_11=r3.4.1_0
151 |   #- r-stringi
152 |   - r-stringr
153 |   - r-survival=2.40_1=r3.4.1_0
154 |   - r-tibble=1.4.2=r3.4.1_0
155 |   - r-utf8=1.1.3=r3.4.1_0
156 |   - r-viridislite=0.2.0=r3.4.1_0
157 |   - r-yaml=2.1.14=r3.4.1_0
158 |   - ratelimiter=1.2.0=py36_0
159 |   - readline=7.0=haf1bffa_1
160 |   - requests=2.18.4=py36_1
161 |   - rsa=3.4.2=py_1
162 |   - s3transfer=0.1.13=py36_0
163 |   - setuptools=39.0.1=py36_0
164 |   - six=1.11.0=py36_1
165 |   - sqlite=3.25.2=hb1c47c0_0
166 |   - tk=8.6.8=ha92aebf_0
167 |   - urllib3=1.22=py36_0
168 |   - wheel=0.30.0=py36_2
169 |   - wrapt=1.10.11=py36_0
170 |   - xmlrunner=1.7.7=py_0
171 |   - xorg-kbproto=1.0.7=1
172 |   - xorg-libice=1.0.9=2
173 |   - xorg-libsm=1.2.2=2
174 |   - xorg-libx11=1.6.5=0
175 |   - xorg-libxau=1.0.8=3
176 |   - xorg-libxdmcp=1.1.2=3
177 |   - xorg-libxext=1.3.3=2
178 |   - xorg-libxrender=0.9.10=0
179 |   - xorg-libxt=1.1.5=h470a237_2
180 |   - xorg-renderproto=0.11.1=1
181 |   - xorg-xextproto=7.3.0=1
182 |   - xorg-xproto=7.0.31=6
183 |   - xz=5.2.4=h470a237_1
184 |   - yarl=1.2.6=py36h470a237_0
185 |   - zlib=1.2.11=0
186 |   - bcrypt=3.1.4=py36h621fe67_0
187 |   - bzip2=1.0.6=3
188 |   - cairo=1.14.12=h77bcde2_0
189 |   - datrie=0.7.1=py36_0
190 |   - fontconfig=2.12.4=h88586e7_1
191 |   - glib=2.53.6=h5d9569c_2
192 |   - gmp=6.1.0=0
193 |   - gsl=2.2.1=h0c605f7_3
194 |   - harfbuzz=1.7.6=hc5b324e_0
195 |   - jbig=2.1=0
196 |   - libffi=3.2.1=1
197 |   - libgcc=5.2.0=0
198 |   - libgcc-ng=7.2.0=hdf63c60_3
199 |   - libgfortran-ng=7.2.0=hdf63c60_3
200 |   - libiconv=1.14=0
201 |   - libopenblas=0.2.20=h9ac9557_7
202 |   - libssh2=1.8.0=0
203 |   - libstdcxx-ng=7.2.0=hdf63c60_3
204 |   - libxml2=2.9.8=h26e45fe_1
205 |   - numpy=1.14.3
206 |   - numpy-base=1.14.3=py36h0ea5e3f_1
207 |   - pandoc=1.15.0.6=0
208 |   - pcre=8.39=1
209 |   - pixman=0.34.0=0
210 |   - yaml=0.1.6=0
211 | #prefix: /lustre/scratch117/cellgen/team218/gp7/miniconda/envs/Micro-Exonator
212 | 


--------------------------------------------------------------------------------
/envs/MicroExonator.yml:
--------------------------------------------------------------------------------
  1 | #name: Micro-Exonator
  2 | channels:
  3 |   - conda-forge
  4 |   - bioconda
  5 |   - defaults
  6 |   - cgat
  7 | dependencies:
  8 |   - bcftools=1.7=0
  9 |   - bedtools=2.27.1=he941832_2
 10 |   - hisat2=2.1.0=py36pl5.22.0_0
 11 |   - htslib=1.7=0
 12 |   - libdeflate=1.0=h470a237_0
 13 |   - perl-threaded=5.22.0=10
 14 |   - pybedtools=0.7.10=py36_2
 15 |   - pysam=0.14.1=py36hae42fb6_1
 16 |   - samtools=1.9=h8ee4bcc_1
 17 |   - snakemake=5.2.2=py36_1
 18 |   - snakemake-minimal=5.2.2=py36_1
 19 |   - sra-tools=2.9.1_1=h470a237_0
 20 |   - r-reshape2=1.4.1=0
 21 |   - aioeasywebdav=2.2.0=py36_0
 22 |   - aiohttp=3.3.2=py36h470a237_1
 23 |   - appdirs=1.4.3=py_1
 24 |   - asn1crypto=0.24.0=py36_0
 25 |   - async-timeout=3.0.0=py36_0
 26 |   - attrs=18.1.0=py_1
 27 |   - biopython=1.72=py36_0
 28 |   - boto3=1.7.76=py_0
 29 |   - botocore=1.10.77=py_0
 30 |   - ca-certificates=2018.8.24=ha4d7672_0
 31 |   - cachetools=2.1.0=py_0
 32 |   - certifi=2018.8.24=py36_1001
 33 |   - cffi=1.11.5=py36_0
 34 |   - chardet=3.0.4=py36_0
 35 |   - configargparse=0.13.0=py_1
 36 |   - cryptography=2.1.4=py36_0
 37 |   - curl=7.61.0=h93b3f91_2
 38 |   - decorator=4.3.0=py_0
 39 |   - docutils=0.14=py36_0
 40 |   - dropbox=8.7.1=py_0
 41 |   - expat=2.2.5=hfc679d8_1
 42 |   - filechunkio=1.8=py36_1
 43 |   - freetype=2.8.1=hfa320df_1
 44 |   - ftputil=3.4=py_0
 45 |   - google-api-core=0.1.4=py_0
 46 |   - google-auth=1.5.1=py_0
 47 |   - google-cloud-core=0.28.1=py_0
 48 |   - google-cloud-storage=1.10.0=py_0
 49 |   - google-resumable-media=0.3.1=py_0
 50 |   - googleapis-common-protos=1.5.3=py_1
 51 |   - graphite2=1.3.12=hfc679d8_0
 52 |   - graphviz=2.38.0=7
 53 |   - icu=58.2=0
 54 |   - idna=2.6=py36_1
 55 |   - idna_ssl=1.0.0=0
 56 |   - jinja2=2.10=py_1
 57 |   - jmespath=0.9.3=py_1
 58 |   - jpeg=9c=h470a237_1
 59 |   - jsonschema=2.6.0=py36_1
 60 |   - krb5=1.14.6=0
 61 |   - libgfortran=3.0.0=1
 62 |   - libpng=1.6.34=0
 63 |   - libprotobuf=3.6.0=hd28b015_0
 64 |   - libtiff=4.0.9=0
 65 |   - libtool=2.4.6=h470a237_1
 66 |   - libuuid=1.0.3=1
 67 |   - libxcb=1.13=0
 68 |   - markupsafe=1.0=py36_0
 69 |   - multidict=4.3.1=py36h470a237_0
 70 |   - ncurses=6.1=hfc679d8_1
 71 |   - networkx=2.1=py_1
 72 |   - openssl=1.0.2p=h470a237_0
 73 |   - packaging=17.1=py_0
 74 |   - pandas=0.23.4=py36hf8a1672_0
 75 |   - pango=1.40.14=0
 76 |   - paramiko=2.4.1=py36_0
 77 |   - perl=5.22.0.1=0
 78 |   - pip=9.0.2=py36_0
 79 |   - prettytable=0.7.2=py_2
 80 |   - protobuf=3.6.0=py36hfc679d8_0
 81 |   - psutil=5.4.3=py36_0
 82 |   - pyasn1=0.4.2=py_0
 83 |   - pyasn1-modules=0.2.1=py_0
 84 |   - pycparser=2.18=py36_0
 85 |   - pygraphviz=1.4rc1=py36h470a237_0
 86 |   - pynacl=1.1.2=py36_0
 87 |   - pyopenssl=17.5.0=py36_0
 88 |   - pyparsing=2.2.0=py36_0
 89 |   - pysftp=0.2.9=py36_0
 90 |   - pysocks=1.6.8=py36_1
 91 |   - python=3.6.6=h5001a0f_0
 92 |   - python-dateutil=2.7.3=py_0
 93 |   - python-irodsclient=0.7.0=py_0
 94 |   - pytz=2018.5=py_0
 95 |   - pyyaml=3.12=py36_1
 96 |   - r=3.4.1=r3.4.1_0
 97 |   - r-assertthat=0.2.0=r3.4.1_0
 98 |   - r-backports=1.0.5=r3.4.1_0
 99 |   - r-base=3.4.1=h4fe35fd_8
100 |   - r-base64enc=0.1_3=r3.4.1_0
101 |   - r-bitops=1.0_6=r3.4.1_0
102 |   - r-boot=1.3_20=r3.4.1_0
103 |   - r-catools=1.17.1=r3.4.1_0
104 |   - r-class=7.3_14=r3.4.1_0
105 |   - r-cli=1.0.0=r3.4.1_0
106 |   - r-cluster=2.0.6=r3.4.1_0
107 |   - r-codetools=0.2_15=r3.4.1_0
108 |   - r-colorspace=1.3_2=r3.4.1_0
109 |   - r-crayon=1.3.4=r3.4.1_0
110 |   - r-data.table=1.10.4=r3.4.1_0
111 |   - r-dichromat=2.0_0=r3.4.1_0
112 |   - r-digest=0.6.12=r3.4.1_0
113 |   - r-evaluate=0.10.1=r3.4.1_0
114 |   - r-foreign=0.8_67=r3.4.1_0
115 |   - r-formatr=1.5=r3.4.1_0
116 |   - r-ggplot2=2.2.1=r3.4.1_0
117 |   - r-glue=1.2.0=r3.4.1_0
118 |   - r-gtable=0.2.0=r3.4.1_0
119 |   - r-highr=0.6=r3.4.1_0
120 |   - r-htmltools=0.3.6=r3.4.1_0
121 |   - r-jsonlite=1.5=r3.4.1_0
122 |   - r-kernsmooth=2.23_15=r3.4.1_0
123 |   - r-knitr=1.20=r3.4.1_0
124 |   - r-labeling=0.3=r3.4.1_0
125 |   - r-lattice=0.20_34=r3.4.1_0
126 |   - r-lazyeval=0.2.1=r3.4.1_0
127 |   - r-magrittr=1.5=r3.4.1_0
128 |   - r-markdown=0.8=r3.4.1_1
129 |   - r-mass=7.3_48=r3.4.1_0
130 |   - r-matrix=1.2_12=r3.4.1_0
131 |   - r-mgcv=1.8_17=r3.4.1_0
132 |   - r-mime=0.5=r3.4.1_0
133 |   - r-mixtools=1.1.0=r3.4.1_0
134 |   - r-munsell=0.4.3=r3.4.1_0
135 |   - r-nlme=3.1_131=r3.4.1_0
136 |   - r-nnet=7.3_12=r3.4.1_0
137 |   - r-pillar=1.2.1=r3.4.1_0
138 |   - r-plyr=1.8.4=r3.4.1_0
139 |   - r-r6=2.2.2=r3.4.1_0
140 |   - r-rcolorbrewer=1.1_2=r3.4.1_0
141 |   - r-rcpp=0.12.15=r3.4.1_0
142 |   - r-recommended=3.4.1=r3.4.1_0
143 |   - r-rlang=0.2.0=r3.4.1_0
144 |   - r-rmarkdown=1.8=r3.4.1_0
145 |   - r-rpart=4.1_13=r3.4.1_0
146 |   - r-rprojroot=1.2=r3.4.1_0
147 |   - r-scales=0.5.0=r3.4.1_0
148 |   - r-segmented=0.5_2.1=r3.4.1_0
149 |   - r-spatial=7.3_11=r3.4.1_0
150 |   - r-stringi=1.1.6=r3.4.1_0
151 |   - r-stringr=1.3.0=r3.4.1_0
152 |   - r-survival=2.40_1=r3.4.1_0
153 |   - r-tibble=1.4.2=r3.4.1_0
154 |   - r-utf8=1.1.3=r3.4.1_0
155 |   - r-viridislite=0.2.0=r3.4.1_0
156 |   - r-yaml=2.1.14=r3.4.1_0
157 |   - ratelimiter=1.2.0=py36_0
158 |   - readline=7.0=haf1bffa_1
159 |   - requests=2.18.4=py36_1
160 |   - rsa=3.4.2=py_1
161 |   - s3transfer=0.1.13=py36_0
162 |   - setuptools=39.0.1=py36_0
163 |   - six=1.11.0=py36_1
164 |   - sqlite=3.25.2=hb1c47c0_0
165 |   - tk=8.6.8=ha92aebf_0
166 |   - urllib3=1.22=py36_0
167 |   - wheel=0.30.0=py36_2
168 |   - wrapt=1.10.11=py36_0
169 |   - xmlrunner=1.7.7=py_0
170 |   - xorg-kbproto=1.0.7=1
171 |   - xorg-libice=1.0.9=2
172 |   - xorg-libsm=1.2.2=2
173 |   - xorg-libx11=1.6.5=0
174 |   - xorg-libxau=1.0.8=3
175 |   - xorg-libxdmcp=1.1.2=3
176 |   - xorg-libxext=1.3.3=2
177 |   - xorg-libxrender=0.9.10=0
178 |   - xorg-libxt=1.1.5=h470a237_2
179 |   - xorg-renderproto=0.11.1=1
180 |   - xorg-xextproto=7.3.0=1
181 |   - xorg-xproto=7.0.31=6
182 |   - xz=5.2.4=h470a237_1
183 |   - yarl=1.2.6=py36h470a237_0
184 |   - zlib=1.2.11=0
185 |   - bcrypt=3.1.4=py36h621fe67_0
186 |   - bzip2=1.0.6=3
187 |   - cairo=1.14.12=h77bcde2_0
188 |   - datrie=0.7.1=py36_0
189 |   - fontconfig=2.12.4=h88586e7_1
190 |   - glib=2.53.6=h5d9569c_2
191 |   - gmp=6.1.0=0
192 |   - gsl=2.2.1=h0c605f7_3
193 |   - harfbuzz=1.7.6=hc5b324e_0
194 |   - jbig=2.1=0
195 |   - libffi=3.2.1=1
196 |   - libgcc=5.2.0=0
197 |   - libgcc-ng=7.2.0=hdf63c60_3
198 |   - libgfortran-ng=7.2.0=hdf63c60_3
199 |   - libiconv=1.14=0
200 |   - libopenblas=0.2.20=h9ac9557_7
201 |   - libssh2=1.8.0=0
202 |   - libstdcxx-ng=7.2.0=hdf63c60_3
203 |   - libxml2=2.9.8=h26e45fe_1
204 |   - numpy=1.14.3=py36h28100ab_2
205 |   - numpy-base=1.14.3=py36h0ea5e3f_1
206 |   - pandoc=1.15.0.6=0
207 |   - pcre=8.39=1
208 |   - pixman=0.34.0=0
209 |   - yaml=0.1.6=0
210 | #prefix: /lustre/scratch117/cellgen/team218/gp7/miniconda/envs/Micro-Exonator
211 | 


--------------------------------------------------------------------------------
/envs/R.yaml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - bioconda
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   - r-base =3.6.3
 7 |   - r-ggplot2
 8 |   - r-mixtools
 9 |   - r-data.table
10 | 


--------------------------------------------------------------------------------
/envs/biopython_py3.yaml:
--------------------------------------------------------------------------------
1 | 
2 | channels:
3 |   - bioconda
4 | dependencies:
5 |   - biopython
6 |   - python=3
7 | 


--------------------------------------------------------------------------------
/envs/core.yaml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - bioconda
 4 |   - defaults
 5 |   - cgat
 6 | dependencies:
 7 |   - bcftools=1.7
 8 |   - bedtools=2.27.1
 9 |   - hisat2=2.1.0
10 |   - htslib=1.7
11 |   - pybedtools=0.7.10
12 |   - pysam=0.14.1
13 |   - samtools=1.9
14 |   - sra-tools
15 |   - biopython
16 |   - pandas=0.23.4
17 |   - python=2.7
18 |   - numpy=1.14.3
19 |   - numpy-base=1.14.3
20 |   - pybedtools
21 |   - pyBigWig
22 |   - bwa=0.7.15
23 |   - bowtie
24 |   - cramtools
25 |   - tbb=2020.2
26 | 


--------------------------------------------------------------------------------
/envs/core_py3.yaml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - bioconda
 4 |   - defaults
 5 |   - cgat
 6 | dependencies:
 7 |   - bcftools=1.7=0
 8 |   - bedtools=2.27.1=he941832_2
 9 |   - hisat2=2.1.0=py36pl5.22.0_0
10 |   - htslib=1.7=0
11 |   - pybedtools=0.7.10=py36_2
12 |   - pysam=0.14.1=py36hae42fb6_1
13 |   - samtools=1.9=h8ee4bcc_1
14 |   - sra-tools=2.9.1_1
15 |   - biopython
16 |   - pandas=0.23.4=py36hf8a1672_0
17 |   - python=3.6.6=h5001a0f_0
18 |   - numpy=1.14.3
19 |   - numpy-base=1.14.3=py36h0ea5e3f_1
20 |   - pybedtools
21 |   - pyBigWig
22 |   - bwa=0.7.15
23 |   - bowtie
24 | 


--------------------------------------------------------------------------------
/envs/pybedtools.yaml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - bioconda
 4 |   - defaults
 5 |   - cgat
 6 | dependencies:
 7 |  - pybedtools=0.8.0
 8 |  - bedtools=2.27.*
 9 |  - biopython
10 |  - pyBigWig
11 |  - python=2.7.*
12 | 


--------------------------------------------------------------------------------
/envs/snakemake.yaml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   -conda-forge
 3 |   -bioconda
 4 |   -defaults
 5 |   -cgat
 6 | dependencies:
 7 |  -snakemake
 8 |  -pybedtools=0.8.0
 9 |  -bedtools=2.27.1
10 |  -biopython
11 |  -pyBigWig
12 |  -python=3.*
13 | 


--------------------------------------------------------------------------------
/rules/Benchmark.smk:
--------------------------------------------------------------------------------
  1 | 
  2 | ############## Gene Count #######
  3 | 
  4 | # rule generate_star_olego:
  5 | #     input:
  6 | #         "config["Genome_fasta"]",
  7 | #
  8 | #     shell:
  9 | #         "start --runThreadN 5 --runMode genomeGenerate --genomeDir data/ "
 10 | #
 11 | #
 12 | #
 13 | # rule generate_star_index:
 14 | #     input:
 15 | #         "config["Genome_fasta"]",
 16 | #
 17 | #     shell:
 18 | #         "start --runThreadN 5 --runMode genomeGenerate --genomeDir data/ "
 19 | #
 20 | 
 21 | 
 22 | 
 23 | rule total_hisat2_to_genome:
 24 |      input:
 25 |          "FASTQ/{sample}.fastq",
 26 |          "data/Genome.1.ht2"
 27 |      output:
 28 |          "Genome_aligments/Hisat2/{sample}.sam"
 29 |      threads: 5
 30 |      shell:
 31 |          "hisat2 -x data/Genome -U {input[0]} -p 5 > {output}"
 32 | 
 33 | rule total_olego_to_Genome:
 34 |     input:
 35 |         "FASTQ/{sample}.fastq"
 36 |     output:
 37 |         "Genome_aligments/Olego/{sample}.sam"
 38 |     threads: 10
 39 |     shell:
 40 |         "/lustre/scratch117/cellgen/team218/gp7/olego/olego  -t 10 data/Genome_olego {input} > {output}"
 41 | 
 42 | 
 43 | rule total_STAR_to_Genome:
 44 |     input:
 45 |         "FASTQ/{sample}.fastq"
 46 |     output:
 47 |         "Genome_aligments/STAR/{sample}.samAligned.out.sam"
 48 |     threads: 5
 49 |     shell:
 50 |         "STAR --genomeDir data --readFilesIn {input} --runThreadN 5 --outFileNamePrefix  {output}"
 51 | 
 52 | rule mv_STAR:
 53 |     input:
 54 |         "Genome_aligments/STAR/{sample}.samAligned.out.sam"
 55 |     output:
 56 |         "Genome_aligments/STAR/{sample}.sam"
 57 |     threads: 5
 58 |     shell:
 59 |         "mv {input} {output}"
 60 | 
 61 | 
 62 | 
 63 | 
 64 | rule total_tophat_to_Genome:
 65 |     input:
 66 |         "FASTQ/{sample}.fastq"
 67 |     output:
 68 |         dir = "Genome_aligments/Tophat2/{sample}",
 69 |         sam = "Genome_aligments/Tophat2/{sample}.sam"
 70 |     threads: 5
 71 |     shell:
 72 |         "tophat2 -p 5 --no-convert-bam --microexon-search -o {output.dir} data/Genome_bowtie2 {input} && mv {output.dir}/accepted_hit.sam {output}"
 73 | 
 74 | 
 75 | rule SJ_count:
 76 |     input:
 77 |         "Genome_aligments/{Software}/{sample}.sam"
 78 |     output:
 79 |         "Genome_aligments/{Software}/{sample}.sam.SJ_count"
 80 |     shell:
 81 |         "python2 src/Get_introns_from_sam.py {input} Rd1 40 1000000 8 > {output}"
 82 | 
 83 | 
 84 | rule sam_merge:
 85 |     input:
 86 |         ["Genome_aligments/{Software}/" + x for x in expand("{sample}.sam", sample=DATA ) ]
 87 |     output:
 88 |         temp("Genome_aligments/{Software}/TOTAL.sam")
 89 |     shell:
 90 |         "samtools merge {output} {input}"
 91 | 
 92 | 
 93 | rule get_exons:
 94 |     input:
 95 |         "Genome_aligments/{Software}/TOTAL.sam"
 96 |     output:
 97 |         "Genome_aligments/{Software}/TOTAL.exons.{Software}"
 98 |     shell:
 99 |         "python2 Get_exons_from_sam.py {input} > {output}"
100 | 
101 | 
102 | 
103 | 
104 | rule SJ_ground_count:
105 |     input:
106 |         config["fastq_path"] + '{sample}.fastq.gz'
107 |     output:
108 |         "Ground_Truth/{sample}.GT.SJ_count"
109 |     shell:
110 |         "python2 SJ_count_truth.py /lustre/scratch117/cellgen/team218/gp7/Genome/mm10/Tracks/Gene_annotation/gencode.vM11.annotation.bed12 simulated_ME_isoforms.bed12 {input}  > {output}"
111 | 
112 | 
113 | 
114 | rule gene_count:
115 |     input:
116 |         "/lustre/scratch117/cellgen/team218/gp7/Genome/mm10/Tracks/Gene_annotation/gencode.vM11.annotation.gtf",
117 |         "Genome_aligments/{sample}.sam"
118 |     output:
119 |         "Genome_aligments/{sample}.gene_count.txt"
120 |     threads: 1
121 |     shell:
122 |         "featureCounts -a {input[0]} -o {output} {input[1]}"
123 | 
124 | 
125 | 
126 | rule done_gene_count:
127 |     input:
128 |         expand("Genome_aligments/{sample}.gene_count.txt", sample=DATA )
129 |     output:
130 |         "Round2/done.txt"
131 |     shell:
132 |         "echo done > {output}"
133 | #####
134 | 


--------------------------------------------------------------------------------
/rules/Get_data.smk:
--------------------------------------------------------------------------------
  1 | if str2bool(config.get("Keep_fastq_gz", False)):
  2 |     rule download_fastq:
  3 |         input:
  4 |             "download/{sample}.download.sh"
  5 |         output:
  6 |             "FASTQ/{sample}.fastq.gz"
  7 |         resources:
  8 |             get_data = 1 
  9 |         conda:
 10 |             "../envs/core.yaml"
 11 |         priority: -10
 12 |         shell:
 13 |             "bash {input}"
 14 | 
 15 | else:
 16 |     rule download_fastq:
 17 |         input:
 18 |             "download/{sample}.download.sh"
 19 |         output:
 20 |             temp("FASTQ/{sample}.fastq.gz")
 21 |         resources:
 22 |             get_data = 1 
 23 |         conda:
 24 |             "../envs/core.yaml"
 25 |         priority: -10
 26 |         shell:
 27 |             "bash {input}"
 28 | 
 29 | rule unzip:
 30 |    input:
 31 |        "FASTQ/{sample}.fastq.gz"
 32 |    output:
 33 |        temp("FASTQ/{sample}.fastq")
 34 |    shell:
 35 |        "zcat {input} > {output}"
 36 | 
 37 | rule get_fastq:
 38 |     input:
 39 |         expand("FASTQ/{sample}.fastq.gz", sample=DATA)	        
 40 | 
 41 | if "Gene_anontation_bed12" in config:
 42 |     pass
 43 | else:
 44 |     rule generate_bed12:
 45 |         input:
 46 |             config["Gene_anontation_GTF"]
 47 |         output:
 48 |             "data/transcriptome.bed12"
 49 |         shell:
 50 |             "python2 src/GTFtoBED12.py {input} > {output}"
 51 |   
 52 |     config["Gene_anontation_bed12"] = "data/transcriptome.bed12"
 53 |             
 54 |         
 55 | rule generate_fasta_from_bed12:
 56 |     input:
 57 |         config["Genome_fasta"],
 58 |         config["Gene_anontation_bed12"]
 59 |     output:
 60 |         "data/transcripts.fa"
 61 |     conda:
 62 |         "../envs/pybedtools.yaml"
 63 |     shell:
 64 |         "python2 src/Get_fasta_from_bed12.py {input} > {output}"
 65 | 
 66 | if not "ME_len" in config:
 67 |     config["ME_len"] = 30
 68 |     
 69 | if not "max_read_len" in config:
 70 |     config["max_read_len"] = 100
 71 |     
 72 | rule Splice_Junction_Library:
 73 |     input:
 74 |         config["Genome_fasta"],
 75 |         "data/transcripts.fa",
 76 |         config["Gene_anontation_bed12"]
 77 |     params:
 78 |         ME_len = config["ME_len"],
 79 |         max_read_len = config["max_read_len"]
 80 |     output:
 81 |         "Round1/ME_TAGs.fa"
 82 |     conda:
 83 |         "../envs/core.yaml"
 84 |     shell:
 85 |         "python2 src/SJ_tags_generator_for_micro_exons.py {input} {params.ME_len} {params.max_read_len} > {output}"
 86 | 
 87 | 
 88 | rule GetPWM:
 89 |     input:
 90 |         config["Genome_fasta"],
 91 |         config["Gene_anontation_bed12"]
 92 |     params:
 93 |         config["GT_AG_U2_5"],
 94 |         config["GT_AG_U2_3"]
 95 |     output:
 96 |         "data/GT_AG_U2_5.pwm",
 97 |         "data/GT_AG_U2_3.pwm"
 98 |     conda:
 99 |         "../envs/biopython_py3.yaml"
100 |     shell:
101 |         "python3 src/Get_splicing_PWMs.py {input} {params} {output}"
102 | 
103 | #if str2bool(config.get("Only_whippet", False))==False:
104 | #    rule gzip_fastq:
105 | #        input:
106 | #            "FASTQ/{sample}.fastq"
107 | #        output:
108 | #            temp("FASTQ/{sample}.fastq.gz")
109 | #        priority: 100
110 | #        shell:
111 | #            "gzip -c {input} > {output}"
112 |           
113 | #else:
114 | #    rule gzip_fastq:
115 | #        input:
116 | #            "FASTQ/{sample}.fastq"
117 | #        output:
118 | #            temp("FASTQ/{sample}.fastq.gz")
119 | #        priority: 100
120 | #        shell:
121 | #            "gzip {input}"            
122 |          
123 | 
124 | 
125 | # rule sra_to_fastq:
126 | #     input:
127 | #         config["input_dir"] + "/{sample}.sra"
128 | #     output:
129 | #         temp("data/fastq_paired/{sample}.fastq")
130 | #     shell:
131 | #         "fastq-dump {input} -O data/fastq_paired/"
132 | 
133 | 
134 | # rule fastq_gz_to_fastq:
135 | #     input:
136 | #         config["input_dir"] + "/{sample}.fastq.gz"
137 | #     output:
138 | #         temp("data/fastq/{sample}.fastq")
139 | #     shell:
140 | #         "gzip -dc {input} > {output}"
141 | #
142 | # rule fastq_input:
143 | #     input:
144 | #         config["input_dir"] + "/{sample}.fastq"
145 | #     output:
146 | #         "data/fastq/{sample}.fastq"
147 | #     shell:
148 | #         "ln -s {input} {output}"
149 | 
150 | #rule download_to_fastq:
151 | #    input:
152 | #        "download/{sample}.download.sh"
153 | #    output:
154 | #        "data/fastq/{sample}.fastq"
155 | #    shell:
156 | #        "bash {input}"
157 | 
158 | 
159 | # rule split_fastq:
160 | #     input:
161 | #         "data/fastq_paired/{sample}.fastq"
162 | #     output:
163 | #         temp("data/fastq/{sample}.fastq")
164 | #     shell:
165 | #         "python2 src/split_paired_end.py {input} > {output}"
166 | 


--------------------------------------------------------------------------------
/rules/Round1.smk:
--------------------------------------------------------------------------------
 1 | 
 2 | rule bwa_index:
 3 |     input:
 4 |         "Round1/ME_TAGs.fa"
 5 |     output:
 6 |         "Round1/ME_TAGs.fa.amb"
 7 |     conda:
 8 |         "../envs/core.yaml"
 9 |     shell:
10 |         "bwa index {input}"
11 | 
12 | rule Round1_bwa_mem_to_tags:
13 |     input:
14 |         "Round1/ME_TAGs.fa",
15 |         "FASTQ/{sample}.fastq.gz",
16 |         "Round1/ME_TAGs.fa.amb"
17 |     output:
18 |         temp("Round1/{sample}.sam")
19 |     threads: 5
20 |     priority: 100
21 |     params: 
22 |         indel = config["indel_penalty"]
23 |     conda:
24 |         "../envs/core.yaml"
25 |     shell:
26 |         "bwa mem -t {threads} -O {params.indel} -L 25 {input[0]} {input[1]} | awk '$6 ~ /I/' > {output}"
27 | 
28 | 
29 | rule Round1_alingment_pre_processing:
30 |     input:
31 |         "Round1/{sample}.sam"
32 |     output:
33 |         temp("Round1/{sample}.sam.pre_processed")
34 |     priority: 100
35 |     conda:
36 |         "../envs/core.yaml"
37 |     shell:
38 |         "python2 src/alingment_pre_processing.py {input} F > {output}"
39 | 


--------------------------------------------------------------------------------
/rules/Round1_post_processing.smk:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | rule row_Micro_Exon_reads:
 4 |     input:
 5 |         config["Genome_fasta"],
 6 |         "Round1/{sample}.sam.pre_processed",
 7 | 	"FASTQ/{sample}.fastq.gz"
 8 |     output:
 9 |         temp("Round1/{sample}.sam.row_ME"),
10 |         temp("Round1/{sample}.sam.row_ME.fastq")
11 |     conda:
12 |         "../envs/core.yaml"
13 |     shell:
14 |         "python2 src/row_ME2.py {input} > {output[0]}"
15 | 
16 | 
17 | rule hisat2_genome_index:
18 |     input:
19 |         config["Genome_fasta"]
20 |     output:
21 |         "data/Genome.1.ht2"
22 |     threads: 5
23 |     conda:
24 |         "../envs/core.yaml"
25 |     shell:
26 |         "hisat2-build {input} data/Genome"
27 | 
28 | if str2bool(config.get("skip_genome_alignment", False)):
29 | 
30 | 	rule hisat2_to_Genome:
31 | 	    input:
32 | 	        "Round1/{sample}.sam.row_ME.fastq",
33 | 	        "data/Genome.1.ht2"
34 | 	    output:
35 | 	        temp("Round1/{sample}.sam.row_ME.Genome.Aligned.out.sam")
36 | 	    threads: 1
37 | 	    conda:
38 | 	        "../envs/core.yaml"
39 | 	    shell:
40 | 	        "touch {output}"
41 | else:
42 | 
43 | 	rule hisat2_to_Genome:
44 | 	    input:
45 | 	        "Round1/{sample}.sam.row_ME.fastq",
46 | 	        "data/Genome.1.ht2"
47 | 	    output:
48 | 	        temp("Round1/{sample}.sam.row_ME.Genome.Aligned.out.sam")
49 | 	    threads: 1
50 | 	    conda:
51 | 	        "../envs/core.yaml"
52 | 	    shell:
53 | 	        "hisat2 -x data/Genome -U {input[0]} > {output}"
54 | 
55 | 
56 | rule Round1_filter:
57 |     input:
58 |         config["Genome_fasta"],
59 |         "Round1/{sample}.sam.row_ME",
60 |         "Round1/{sample}.sam.row_ME.Genome.Aligned.out.sam",
61 |         "data/GT_AG_U2_5.pwm",
62 |         "data/GT_AG_U2_3.pwm"
63 |     params:
64 |         bw = config["conservation_bigwig"],
65 |         ME_len = config["ME_len"]
66 |     output:
67 |         protected("Round1/{sample}.sam.row_ME.filter1")
68 |     conda:
69 |         "../envs/pybedtools.yaml"
70 |     shell:
71 |         "python2 src/ME_filter1.py {input} {params.bw} {params.ME_len} > {output}"
72 | 
73 | 
74 | rule Micro_Exon_table:
75 |     input:
76 |         expand("Round1/{sample}.sam.row_ME.filter1", sample=DATA )
77 |     output:
78 |         protected("Round1/TOTAL/TOTAL.sam.row_ME.filter1.ME_centric")
79 |     conda:
80 |         "../envs/core.yaml"
81 |     shell:
82 |         "cat Round1/*.sam.row_ME.filter1 | awk 'NF==16' > Round1/TOTAL/TOTAL.sam.row_ME.filter1  &&"
83 |         "python2 src/ME_centric_table.py Round1/TOTAL/TOTAL.sam.row_ME.filter1 > {output}"
84 | 
85 | 


--------------------------------------------------------------------------------
/rules/Round2.smk:
--------------------------------------------------------------------------------
  1 | rule Micro_Exon_Tags:
  2 |     input:
  3 |         "Round1/ME_TAGs.fa",
  4 |         "Round1/TOTAL/TOTAL.sam.row_ME.filter1.ME_centric"
  5 |     output:
  6 |         "Round2/ME_canonical_SJ_tags.de_novo.fa"
  7 |     conda:
  8 |         "../envs/core.yaml"
  9 |     shell:
 10 |         "python2 src/Micro_exons_tags.py  {input} > {output}"
 11 | 
 12 | rule Get_ME_from_annotation:
 13 |     input:
 14 |         config["Genome_fasta"],
 15 |         "Round1/TOTAL/TOTAL.sam.row_ME.filter1.ME_centric",
 16 |         config["Gene_anontation_bed12"],
 17 |         "data/GT_AG_U2_5.pwm",
 18 |         "data/GT_AG_U2_3.pwm",
 19 |         config["ME_DB"]
 20 |     params:
 21 |         bw = config["conservation_bigwig"],
 22 |         ME_len = config["ME_len"]
 23 |     output:
 24 |         "data/ME_canonical_SJ_tags.DB.fa",
 25 |         "data/DB.ME_centric"
 26 |     conda:
 27 |         "../envs/pybedtools.yaml"
 28 |     shell:
 29 |         "python2 src/Get_annotated_microexons.py  {input[0]} {input[1]} {input[2]} {input[3]} {input[4]} {params.bw} {params.ME_len} {input[5]} "
 30 | 
 31 | 
 32 | rule merge_tags:
 33 |     input:
 34 |         "Round2/ME_canonical_SJ_tags.de_novo.fa",
 35 |         "data/ME_canonical_SJ_tags.DB.fa"
 36 |     output:
 37 |         "Round2/ME_canonical_SJ_tags.fa"
 38 |     conda:
 39 |         "../envs/core.yaml"
 40 |     shell:
 41 |         "cat {input[0]} {input[1]} > {output}"
 42 | 
 43 | 
 44 | rule merge_ME_centric:
 45 |     input:
 46 |         "Round1/TOTAL/TOTAL.sam.row_ME.filter1.ME_centric",
 47 |         "data/DB.ME_centric"
 48 |     output:
 49 |         "Round2/TOTAL.ME_centric.txt"
 50 |     conda:
 51 |         "../envs/core.yaml"
 52 |     shell:
 53 |         "cat {input[0]} {input[1]} > {output}"
 54 | 
 55 | 
 56 | rule Round2_bowtie_tags_index:
 57 |     input:
 58 |         "Round2/ME_canonical_SJ_tags.fa"
 59 |     output:
 60 |         "Round2/ME_canonical_SJ_tags.fa.1.ebwt"
 61 |     conda:
 62 |         "../envs/core.yaml"
 63 |     shell:
 64 |         "bowtie-build {input} {input}"
 65 | 
 66 | rule download_fastq2:
 67 |     input:
 68 |         "download/{sample}.download.sh",
 69 |         "Round2/TOTAL.ME_centric.txt"
 70 |     params:
 71 |         "FASTQ/{sample}.fastq"
 72 |     output:
 73 |         temp("FASTQ/round2/{sample}.fastq")
 74 |     priority: -10
 75 |     resources: 
 76 |         get_data = 1
 77 |     conda:
 78 |         "../envs/core.yaml"
 79 |     shell:
 80 |         #"bash {input[0]}"
 81 |         "bash {input[0]} && mv {params} {output}"
 82 | 
 83 | def hard_drive_behavior(fastq):
 84 |     if config.get("Optimize_hard_drive", False)=="T":
 85 |     
 86 |         if "validate_fastq_list" in config:
 87 |         
 88 |             to_validate = set[()]
 89 |             
 90 |             with open(config["validate_fastq_list"]) as fastq_list:
 91 |                 reader = csv.reader(fastq_list, delimiter="\t")
 92 |                 for row in reader:
 93 |                     to_validate.add(row[0])
 94 |                     
 95 |             if fastq in to_validate:
 96 |                 return("FASTQ/round2/" + fastq + ".fastq.gz.valid")
 97 |             else:
 98 |                 return(  "FASTQ/round2/" + fastq + ".fastq.gz")
 99 |                 
100 |         else:
101 |             return(  "FASTQ/round2/" + fastq + ".fastq.gz")
102 |     else:
103 | 
104 |         if "validate_fastq_list" in config:
105 |         
106 |             to_validate = set([])
107 |             
108 |             with open(config["validate_fastq_list"]) as fastq_list:
109 |                 reader = csv.reader(fastq_list, delimiter="\t")
110 |                 for row in reader:
111 |                     to_validate.add(row[0])
112 |                     
113 |             if fastq in to_validate:
114 |                 return("FASTQ/" + fastq + ".fastq.gz.valid")
115 |             else:
116 |                 return(  "FASTQ/" + fastq + ".fastq.gz")
117 |         else:
118 | 
119 |             return("FASTQ/" + fastq + ".fastq.gz")
120 | 
121 | 
122 | rule validate_fastq:
123 |     input:
124 |         "FASTQ/{sample}.fastq.gz"
125 |     output:
126 |         "FASTQ/{sample}.fastq.gz.valid"
127 |     shell:
128 |         "python3 src/validate_fastq.py {input}"
129 |     
130 | rule validate_fastq2:
131 |     input:
132 |         "FASTQ/round2/{sample}.fastq.gz"
133 |     output:
134 |         "FASTQ/round2/{sample}.fastq.gz.valid"
135 |     shell:
136 |         "python3 src/validate_fastq.py {input}"
137 | 
138 | rule Round2_bowtie_to_tags:
139 |     input:
140 |         "Round2/ME_canonical_SJ_tags.fa",
141 |         hard_drive_behavior("{sample}"),
142 |         "Round2/ME_canonical_SJ_tags.fa.1.ebwt"
143 |     output:
144 |         temp("Round2/{sample}.sam")
145 |     threads: 5
146 |     priority: 100
147 |     conda:
148 |         "../envs/core.yaml"
149 |     shell:
150 |         "gzip -dc {input[1]} |  bowtie {input[0]} -p {threads} -q - -S -v 2 --seed 123 | awk '!($6 ~ /I/) && !($6 ~ /D/) && !($6 ~ /S/) && !($6 ~ /*/)' > {output}"
151 | 
152 | 
153 | rule Round2_alingment_pre_processing:
154 |     input:
155 |         "Round2/{sample}.sam"
156 |     output:
157 |         temp("Round2/{sample}.sam.pre_processed")
158 |     priority: 100
159 |     conda:
160 |         "../envs/core.yaml"
161 |     shell:
162 |         "python2 src/alingment_pre_processing_round2_bowtie.py {input} F > {output}"
163 | 


--------------------------------------------------------------------------------
/rules/Round2_post_processing.smk:
--------------------------------------------------------------------------------
  1 | 
  2 | rule ME_reads:
  3 |     input:
  4 |         "Round2/{sample}.sam.pre_processed",
  5 | 	"FASTQ/{sample}.fastq.gz"
  6 |     output:
  7 |         temp("Round2/{sample}.sam.pre_processed.fastq")
  8 |     priority: 100
  9 |     conda:
 10 |         "../envs/core.yaml"
 11 |     shell:
 12 |         "python2 src/round2_ME_reads_fastq2.py {input}"
 13 |         
 14 | rule Get_Genome:
 15 |     input:
 16 |         config["Genome_fasta"]
 17 |     output:
 18 |         "data/Genome"
 19 |     priority: 100
 20 |     shell:
 21 |         "cp {input} {output}"
 22 | 
 23 | rule bowtie_genome_index:
 24 |     input:
 25 |         "data/Genome"
 26 |     output:
 27 |         "data/Genome" + ".1.ebwt"
 28 |     priority: 100
 29 |     conda:
 30 |         "../envs/core.yaml"
 31 |     shell:
 32 |         "bowtie-build {input} {input}"
 33 |         
 34 | if str2bool(config.get("skip_genome_alignment", False)):
 35 | 
 36 |     rule bowtie_to_genome:
 37 |         input:
 38 |             "Round2/{sample}.sam.pre_processed.fastq",
 39 |             "data/Genome",
 40 |             "data/Genome" + ".1.ebwt"
 41 |         output:
 42 |             temp("Round2/{sample}.sam.pre_processed.hg19.sam")
 43 |         priority: 100
 44 |         conda:
 45 |             "../envs/core.yaml"
 46 |         shell:
 47 |             "touch {output}"
 48 | else:
 49 | 
 50 |     rule bowtie_to_genome:
 51 |         input:
 52 |             "Round2/{sample}.sam.pre_processed.fastq",
 53 |             "data/Genome",
 54 |             "data/Genome" + ".1.ebwt"
 55 |         output:
 56 |             temp("Round2/{sample}.sam.pre_processed.hg19.sam")
 57 |         priority: 100
 58 |         conda:
 59 |             "../envs/core.yaml"
 60 |         shell:
 61 |             "bowtie {input[1]} -p 1 -q {input[0]} -S -v 2 --seed 123| awk '$2==0 || $2==16'> {output}"
 62 | 
 63 | 
 64 | rule Round2_filter:
 65 |     input:
 66 |         "Round2/{sample}.sam.pre_processed",
 67 |         "Round2/{sample}.sam.pre_processed.hg19.sam",
 68 |     output:
 69 |         temp("Round2/{sample}.sam.pre_processed.filter1")
 70 |     priority: 100
 71 |     conda:
 72 |         "../envs/core.yaml"
 73 |     shell:
 74 |         "python2 src/Filter1_round2.py {input} > {output}"
 75 | 
 76 | 
 77 | rule ME_SJ_coverage:
 78 |     input:
 79 |         "Round2/ME_canonical_SJ_tags.fa",
 80 |         "Round2/TOTAL.ME_centric.txt",
 81 |         config["Gene_anontation_bed12"],
 82 |         "Round2/{sample}.sam.pre_processed.filter1"
 83 |     params:
 84 |         ME_len = config["ME_len"]
 85 |     output:
 86 |         protected("Round2/{sample}.sam.pre_processed.filter1.ME_SJ_coverage")
 87 |     priority: 100
 88 |     conda:
 89 |         "../envs/core.yaml"
 90 |     shell:
 91 |         "python2 src/ME_SJ_coverage.py {input} {params.ME_len} > {output}"
 92 | 
 93 | 
 94 | rule Total_sample_exon_counts:
 95 |     input:
 96 |         expand("Round2/{sample}.sam.pre_processed.filter1.ME_SJ_coverage", sample=DATA )
 97 |     output:
 98 |         "Round2/TOTAL.filter1.ME_SJ_coverage"
 99 |     conda:
100 |         "../envs/core.yaml"
101 |     shell:
102 |       "cat Round2/*.filter1.ME_SJ_coverage > {output}"
103 | 
104 | rule write_ME_matches:
105 |     input:
106 |         "Round2/TOTAL.ME_centric.txt"
107 |     output:
108 |         "Round2/TOTAL.ME_centric.ME_matches.txt"
109 |     conda:
110 |         "../envs/core_py3.yaml"
111 |     shell:
112 |         "python3 src/Get_ME_matches.py {input} > {output}"
113 | 
114 | 
115 | def get_min_reads():
116 |     if 'min_reads_PSI' in config:
117 |         return(int(config['min_reads_PSI']))
118 |     else:
119 |         return(5)
120 | 
121 | 
122 | rule coverage_filter:
123 |     input:
124 |        "Round2/TOTAL.filter1.ME_SJ_coverage"
125 |     params:
126 |         min_reads_sample = get_min_reads()
127 |     output:
128 |         "Round2/TOTAL.sample_cov_filter.txt"
129 |     script:
130 |         "../src/coverage_sample_filter.py"
131 | 
132 | def get_min_conservation():
133 |     if "min_conservation" in config:
134 |         return(int(config["min_conservation"]))
135 |     else:
136 |         return(2) #default value for min_conservation is 2
137 | 	
138 | rule Output:
139 |     input:
140 |         ME_table = "Round2/TOTAL.ME_centric.txt",
141 |         ME_coverage = "Round2/TOTAL.sample_cov_filter.txt",
142 |         ME_matches_file = "Round2/TOTAL.ME_centric.ME_matches.txt"
143 |     params:
144 |         wd = config["working_directory"],
145 |         min_number_files_detected = config["min_number_files_detected"],
146 |         skip_mixture = str(str2bool(config.get("skip_mixture_model_filter", False))),
147 |         min_conservation = get_min_conservation()
148 |     output:
149 |         out_filtered_ME = "Report/out_filtered_ME.txt",
150 |         out_low_scored_ME = "Report/out_low_scored_ME.txt",
151 |         out_shorter_than_3_ME = "Report/out_shorter_than_3_ME.txt",
152 |         #"Report/report.html",
153 |         #out_filtered_ME_cov = "Report/out_filtered_ME.cov.txt"
154 |     log:
155 |         "logs/Output.log"
156 |     conda:
157 |         "../envs/R.yaml"
158 |     script:
159 |         "../src/final_filters3.R"        
160 |         
161 | #    shell:
162 | #        '''R -e  'rmarkdown::render("src/final_filters2.Rmd",params = list(ME_table="{params.wd}{input[0]}", ME_coverage="{params.wd}{input[1]}", ME_matches_file="{params.wd}{input[2]}", out_filtered_ME="{params.wd}{output[0]}", out_low_scored_ME="{params.wd}{output[1]}", out_shorter_than_3_ME="{params.wd}{output[2]}", min_number_files_detected={params.min_number_files_detected}, out_filtered_ME_cov="{params.wd}{output[4]}" ), output_file="{params.wd}{output[3]}")' 2> {log} '''
163 | 
164 | 
165 | rule high_confident_filters:
166 |     input:
167 |         config["Genome_fasta"],
168 |         config["Gene_anontation_bed12"],
169 |         "Round2/TOTAL.filter1.ME_SJ_coverage",
170 |         "Report/out_filtered_ME.txt",
171 |         "Report/out_low_scored_ME.txt"
172 |     output:
173 |         "Report/out.high_quality.txt"
174 |     conda:
175 |         "../envs/core_py3.yaml"
176 |     shell:
177 |         "python src/high_confident_list.py {input}  > {output}"
178 | 
179 | 
180 | rule coverage_to_PSI:
181 |     input:
182 |         "Round2/TOTAL.filter1.ME_SJ_coverage"
183 |     params:
184 |         config["min_reads_PSI"],
185 |         config["paired_samples"]    
186 |     output:
187 |         "Report/out_filtered_ME.PSI.txt"
188 |     conda:
189 |         "../envs/core_py3.yaml"
190 |     shell:
191 |         "python src/counts_to_PSI.py {input} {params} > {output}"
192 | 
193 | 
194 | rule annotation_stats:
195 |     input:
196 |         config["Gene_anontation_bed12"],
197 |         "Report/out.high_quality.txt",
198 |     params:
199 |         30
200 |     output:
201 |         "Report/stats/Microexons.not_consensus",
202 |         "Report/stats/Microexons.annotation.stats"
203 |     conda:
204 |         "../envs/core_py3.yaml"
205 |     shell:
206 |         "python3 src/stats/discovery_stats.py {input} {params}"
207 | 


--------------------------------------------------------------------------------
/rules/Whippet_delta.smk:
--------------------------------------------------------------------------------
 1 | 
 2 | comparison_names = whippet_delta.keys()
 3 | 
 4 | 
 5 | if "whippet_delta" in config:
 6 | 
 7 |       if str2bool(config.get("Only_whippet", False)):
 8 |             rule differential_inclusion:
 9 |                 input:
10 |                     expand("Whippet/Delta/{comparison_name}.diff.gz", comparison_name=comparison_names)
11 |       else:
12 |             rule differential_inclusion:
13 |                 input:
14 |                     expand("Whippet/Delta/{comparison_name}.diff.microexons", comparison_name=comparison_names),
15 |                     expand("Whippet/Delta/{comparison_name}.diff.ME.microexons", comparison_name=comparison_names)
16 | 
17 | 
18 | rule whippet_delta:
19 |     input:
20 |         lambda wildcards : expand("Whippet/Quant/{sample}.psi.gz", sample= whippet_delta[wildcards.comparison_name]["A"].split(",")),
21 |         lambda wildcards : expand("Whippet/Quant/{sample}.psi.gz", sample= whippet_delta[wildcards.comparison_name]["B"].split(","))
22 |     output:
23 |         "Whippet/Delta/{comparison_name}.diff.gz"
24 |     params:
25 |         bin = config["whippet_bin_folder"],
26 |         a = lambda wildcards : ",".join(expand("Whippet/Quant/{sample}.psi.gz", sample= whippet_delta[wildcards.comparison_name]["A"].split(","))),
27 |         b = lambda wildcards : ",".join(expand("Whippet/Quant/{sample}.psi.gz", sample= whippet_delta[wildcards.comparison_name]["B"].split(","))),
28 |         o = lambda wildcards : "Whippet/Delta/" + wildcards.comparison_name,
29 |         julia = config["julia"]
30 |     shell:
31 |         "{params.julia} {params.bin}/whippet-delta.jl -a {params.a} -b {params.b} -o {params.o}"
32 | 
33 | 
34 | 
35 | rule whippet_delta_ME:
36 |     input:
37 |         lambda wildcards : expand("Whippet/Quant/{sample}.psi.ME.gz", sample= whippet_delta[wildcards.comparison_name]["A"].split(",")),
38 |         lambda wildcards : expand("Whippet/Quant/{sample}.psi.ME.gz", sample= whippet_delta[wildcards.comparison_name]["B"].split(","))
39 |     output:
40 |         "Whippet/Delta/{comparison_name}.ME.diff.gz"
41 |     params:
42 |         bin = config["whippet_bin_folder"],
43 |         a = lambda wildcards : ",".join(expand("Whippet/Quant/{sample}.psi.ME.gz", sample= whippet_delta[wildcards.comparison_name]["A"].split(","))),
44 |         b = lambda wildcards : ",".join(expand("Whippet/Quant/{sample}.psi.ME.gz", sample= whippet_delta[wildcards.comparison_name]["B"].split(","))),
45 |         o = lambda wildcards : "Whippet/Delta/" + wildcards.comparison_name + ".ME",
46 |         julia = config["julia"]
47 |     shell:
48 |         "{params.julia} {params.bin}/whippet-delta.jl -a {params.a} -b {params.b} -o {params.o} "
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/rules/pseudo_pool.smk:
--------------------------------------------------------------------------------
  1 | import glob, os
  2 | import random
  3 | import csv
  4 | import gzip
  5 | from collections import defaultdict
  6 | 
  7 | 
  8 | def partition (list_in, n):  # Function to do random pooling
  9 |     random.shuffle(list_in)
 10 |     return [list_in[i::n] for i in range(n)]
 11 | 
 12 | #n_sb = 5
 13 | if "n_pseudo_bulks" in config:
 14 |     n_cells = int(config["n_pseudo_bulks"])
 15 | else:
 16 |     n_cells = 15 
 17 | 
 18 | cluster_files_pb = dict()
 19 | sb_IDs = set()
 20 | 
 21 | for cluster, files in cluster_files.items():
 22 |     sb = 1
 23 |     n_sb = round(len(files)/n_cells)
 24 |     if n_sb<3:
 25 |         n_sb=3
 26 |         
 27 |     for pool in partition(files, n_sb):
 28 |         cluster_files_pb[(cluster, sb)] = pool
 29 |         sb_IDs.add(cluster + "_" + str(sb))
 30 |         sb += 1
 31 | 
 32 | rule get_pseudo_bulk_membership:
 33 |     output:
 34 |         table = "Whippet/Quant/Single_Cell/Pseudo_bulks/pseudo_bulk_membership.tsv"
 35 |     run:
 36 |         with open(output.table, "w") as out:
 37 |             writer = csv.writer(out, delimiter='\t')
 38 |             writer.writerow(["pseudo_bulk_ID", "samples"])
 39 |             
 40 |             for key in cluster_files_pb:
 41 |                 cluster, sb = key
 42 |                 sb_ID = cluster + "_" + str(sb)
 43 |                 samples = ",".join(cluster_files_pb[key])
 44 |                 writer.writerow([sb_ID, samples])
 45 |                 
 46 | 
 47 | def get_files_by_cluster_pb(cluster, pool_ID):
 48 |     ext = ".fastq.gz"
 49 |     path="FASTQ/"
 50 |     return([path + x + ext for x in cluster_files_pb[(cluster, int(pool_ID))]])
 51 | 
 52 | rule quant_pool_pb:
 53 |     input:
 54 |         fastq = lambda w: get_files_by_cluster_pb(w.cluster, w.pool_ID),
 55 | 	index = "Whippet/Index/whippet.jls"
 56 |     output:
 57 |         temp("Whippet/Quant/Single_Cell/Pseudo_bulks/{cluster}_{pool_ID}.gene.tpm.gz"),
 58 | 	temp("Whippet/Quant/Single_Cell/Pseudo_bulks/{cluster}_{pool_ID}.isoform.tpm.gz"),
 59 |         temp("Whippet/Quant/Single_Cell/Pseudo_bulks/{cluster}_{pool_ID}.jnc.gz"),
 60 |         temp("Whippet/Quant/Single_Cell/Pseudo_bulks/{cluster}_{pool_ID}.map.gz"),
 61 |         temp("Whippet/Quant/Single_Cell/Pseudo_bulks/{cluster}_{pool_ID}.psi.gz")
 62 |     params:
 63 |         bin = config["whippet_bin_folder"],
 64 |         output = "Whippet/Quant/Single_Cell/Pseudo_bulks/{cluster}_{pool_ID}"
 65 |     priority: 10
 66 |     shell:
 67 |         "julia {params.bin}/whippet-quant.jl <( cat {input.fastq} ) --force-gz -x {input.index}  -o {params.output}"
 68 |         
 69 | #print(expand("Whippet/Quant/Single_Cell/Pseudo_bulks/{cluster}_{pool_ID}.psi.gz", cluster=cluster_files.keys(), pool_ID=list(range(1, n_sb+1  ))))
 70 |         
 71 | rule get_pseudo_pools:
 72 |     input:
 73 |         expand("Whippet/Quant/Single_Cell/Pseudo_bulks/{pseudo_pool}.psi.gz", pseudo_pool=sb_IDs)
 74 | 
 75 | 	
 76 | 	
 77 | rule collapse_pseudo_pools:
 78 |   input: 
 79 |       gene = "Whippet/Quant/Single_Cell/Pseudo_bulks/pseudo_bulks.gene.tpm.tsv",
 80 |       isoform = "Whippet/Quant/Single_Cell/Pseudo_bulks/pseudo_bulks.isoform.tpm.tsv",
 81 |       psi =  "Whippet/Quant/Single_Cell/Pseudo_bulks/pseudo_bulks.psi.tsv",
 82 |       table =  "Whippet/Quant/Single_Cell/Pseudo_bulks/pseudo_bulk_membership.tsv"
 83 | 
 84 | 
 85 | rule merge_quant_gene_sp:
 86 |     input:
 87 |         files = expand("Whippet/Quant/Single_Cell/Pseudo_bulks/{pseudo_pool}.gene.tpm.gz",  pseudo_pool=sb_IDs),
 88 |         jnc =  expand("Whippet/Quant/Single_Cell/Pseudo_bulks/{pseudo_pool}.jnc.gz", pseudo_pool=sb_IDs),
 89 |         mapf =  expand("Whippet/Quant/Single_Cell/Pseudo_bulks/{pseudo_pool}.map.gz", pseudo_pool=sb_IDs),
 90 |         psi =  expand("Whippet/Quant/Single_Cell/Pseudo_bulks/{pseudo_pool}.psi.gz", pseudo_pool=sb_IDs)
 91 |     params:
 92 |         feature = "Gene"
 93 |     output:
 94 |         merged = "Whippet/Quant/Single_Cell/Pseudo_bulks/pseudo_bulks.gene.tpm.tsv"
 95 |     script:
 96 |         "../src/merge_quant.py"
 97 | 
 98 | 
 99 | rule merge_quant_isoform_sp:
100 |     input:
101 |         files = expand("Whippet/Quant/Single_Cell/Pseudo_bulks/{pseudo_pool}.isoform.tpm.gz", pseudo_pool=sb_IDs)
102 |     params:
103 |         feature = "Isoform"
104 |     output:
105 |         merged = "Whippet/Quant/Single_Cell/Pseudo_bulks/pseudo_bulks.isoform.tpm.tsv"
106 |     script:
107 |         "../src/merge_quant.py"
108 | 	
109 | rule merge_quant_PSI_sp:
110 |     input:
111 |         files = expand("Whippet/Quant/Single_Cell/Pseudo_bulks/{pseudo_pool}.psi.gz", pseudo_pool=sb_IDs)
112 |     params:
113 |         feature = "PSI"
114 |     output:
115 |         merged = "Whippet/Quant/Single_Cell/Pseudo_bulks/pseudo_bulks.psi.tsv"
116 |     script:
117 |         "../src/merge_quant.py"
118 | 


--------------------------------------------------------------------------------
/rules/sashimi.smk:
--------------------------------------------------------------------------------
 1 | import csv
 2 | 
 3 | def get_sasshimi_window(ME, w):
 4 | 
 5 | 	ME_strand, ME_start, ME_end = ME.split("_")[-3:]
 6 | 	chrom =  "_".join(ME.split("_")[:-3])
 7 | 					
 8 | 	start = str(int(ME_start)-w)
 9 | 	end = str(int(ME_start)+w)
10 | 	return(chrom + ":" + start + "-" + end)
11 | 
12 | if "sashimi_tsv" in config:
13 | 
14 |     target_ME = set([])
15 | 	
16 |     with open("Report/novel_highly_included.tsv") as tsv:
17 | 					 
18 |         reader = csv.DictReader(tsv, delimiter="\t")
19 |         for row in reader:
20 |             #print(row)
21 |             if float(row["mean_PSI"])>0.9:
22 |                 target_ME.add(row["ME"])
23 | 
24 |         rule ggsashmi_bulk_scripts:
25 |             params:
26 |                 gtf = config["Gene_anontation_GTF"],
27 |                 tsv = config["sashimi_tsv"],
28 |                 region = lambda w: get_sasshimi_window(w.ME, 10000),
29 |                 out = "ggsashimi/{ME}",
30 |                 pallete = config["sashimi_pallete"]
31 |             output:
32 |                 "ggsashimi/{ME}.sh"
33 |                 #"ggsashimi/{ME}.pdf"
34 |             shell:
35 |                 "echo python src/sashimi-plot.py -b {params.tsv} -c {params.region} -g {params.gtf} -o {params.out} -P {params.pallete} -C 3 -O 3 -A mean > {output}"
36 | 
37 |         rule run_sashimi:
38 |             input:
39 |                 "ggsashimi/{ME}.sh"
40 |             output:
41 |                 "ggsashimi/{ME}.pdf"
42 |             shell:
43 |                 "bash {input}"
44 | 
45 |         rule get_sashimis_bulk:
46 |             input:
47 |                 expand("ggsashimi/{ME}.pdf", ME=target_ME)
48 | 


--------------------------------------------------------------------------------
/src/Filter1_round2.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | from collections import defaultdict
 4 | 
 5 | csv.field_size_limit(100000000)
 6 | 
 7 | def main(pre_processed, genome_sam):
 8 | 
 9 | 	read_SJ = defaultdict(set)
10 | 	black_list = set([])
11 | 
12 | 	# for row in csv.reader(open(dust), delimiter = '>'):
13 | 
14 | 	# 	black_list.add(row[1])
15 | 
16 | 	# for row in csv.reader(open(repbase), delimiter = '\t'):
17 | 
18 | 	# 	black_list.add(row[9])
19 | 
20 | 	for row in csv.reader(open(genome_sam), delimiter = '\t'):
21 |                 
22 |             try:
23 | 	        if row[1]=="0" or row[1]=="16":
24 | 	            black_list.add(row[0])
25 |             except ValueError:
26 |                 pass
27 | 
28 | 	for row in csv.reader(open(pre_processed), delimiter = '\t'):
29 |             
30 |             try:
31 | 	        read, flag, tag, start, cigar, seq, qual = row
32 | 
33 | 		SJ = tag.split("|")[0]
34 | 		read_SJ[read].add(SJ)
35 |             except ValueError:
36 |                 pass
37 | 
38 | 	for row in csv.reader(open(pre_processed), delimiter = '\t'):
39 |             try:        
40 | 		read, flag, tag, start, cigar, seq, qual = row
41 | 
42 | 		#if (read in black_list)==False and len(read_SJ[read])==1:
43 | 		if (read in black_list)==False:
44 | 		    print "\t".join(row)
45 |             except ValueError:
46 |                 pass
47 | 		#print black_list
48 | 
49 | 
50 | main(sys.argv[1], sys.argv[2]) #, sys.argv[3], sys.argv[4])
51 | 


--------------------------------------------------------------------------------
/src/GTFtoBED12.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | from collections import defaultdict
 4 | 
 5 | def main(gtf_file):
 6 |     
 7 |     with open(gtf_file) as gtf:
 8 |         
 9 |         transcript_coords = dict()
10 |         transcript_qstarts_blocksize = defaultdict(list)
11 |         reader = csv.reader(gtf, delimiter="\t")
12 |         
13 |         for row in reader:
14 |             
15 |             if row[0][0]!="#":
16 |                 
17 |                 chrom = row[0]
18 |                 group = row[1]
19 |                 blocktype = row[2]
20 |                 block_start = int(row[3]) - 1
21 |                 block_end = int(row[4])
22 |                 block_size = block_end - block_start
23 |                 strand = row[6]
24 | 
25 |                 tags = row[8].strip(" ").split(";")
26 | 
27 |                 for t in tags:
28 |                     pair =  t.strip(" ").split(" ")
29 |                     if len(pair)==2:
30 |                         ID_type, ID  = pair
31 |                         if ID_type == "transcript_id":
32 |                             transcript = ID.strip('"')
33 | 
34 |                 if blocktype == 'transcript':
35 | 
36 |                     transcript_coords[transcript] = (chrom, block_start, block_end, strand)
37 | 
38 |                 if blocktype == 'exon':
39 | 
40 |                     exon_size = block_end - block_start
41 | 
42 | 
43 |                     transcript_qstarts_blocksize[transcript].append((block_start, exon_size))
44 | 
45 | 
46 |         for transcript in transcript_coords:
47 | 
48 | 
49 |             chrom, start, end, strand = transcript_coords[transcript]
50 | 
51 |             n_blocks = len(transcript_qstarts_blocksize[transcript])
52 | 
53 | 
54 |             q_b_tuples = sorted(transcript_qstarts_blocksize[transcript] , key=lambda x: x[0])
55 | 
56 |             qstarts_list = [x[0] for x in q_b_tuples ]
57 |             blocksizes_list = [x[1] for x in q_b_tuples ]
58 | 
59 |             qstarts = ",".join(map(str, [x - start for x in qstarts_list] ))
60 |             blocksizes = ",".join(map(str, blocksizes_list))
61 | 
62 | 
63 |             bed12 = [chrom, start, end, transcript, "0", strand, start, end, "0", n_blocks, blocksizes, qstarts]
64 | 
65 |             print( "\t".join(map(str, bed12)))
66 | 
67 | if __name__ == '__main__':
68 | 	main(sys.argv[1])
69 | 


--------------------------------------------------------------------------------
/src/Get_ME_matches.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import sys
 3 | import csv
 4 | 
 5 | 
 6 | def main(ME_centric):
 7 |     
 8 |     header = ["ME", "U2_score", "Vertebrate_conservation", "ME_len", "ME_max_U2"]
 9 |     
10 |     print("\t".join(header))
11 | 
12 |     for row in csv.reader(open(ME_centric), delimiter = '\t'):
13 | 
14 |         ME, transcript, sum_total_coverage, total_SJs, total_coverages, len_micro_exon_seq_found, micro_exon_seq_found, total_number_of_micro_exons_matches, U2_scores, mean_conservations, P_MEs, total_ME = row
15 | 
16 |         ME_strand, ME_start, ME_end = ME.split("_")[-3:]
17 |         ME_chrom =  "_".join(ME.split("_")[:-3])
18 |         
19 |         #print(total_ME.split("|"))
20 | 		
21 |         for ME_match in total_ME.split(","):
22 |             
23 |             print("\t".join(ME_match.split("|") + [len_micro_exon_seq_found, U2_scores]))
24 | 
25 | if __name__ == '__main__':
26 |         main (sys.argv[1])
27 | 


--------------------------------------------------------------------------------
/src/Get_exons_from_sam.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import csv
  3 | from Bio import SeqIO
  4 | from Bio.Seq import Seq
  5 | from Bio.Alphabet import generic_dna
  6 | from collections import defaultdict
  7 | 
  8 | 
  9 | SeqTable = []
 10 | 
 11 | #dicionario de fag [Rd1/Rd2, +/-]
 12 | flag_dict = {'73':[1,1], '89':[1,1], '121':[1,-1], '153':[-1,-1], '185':[-1,-1], '137':[-1,1], '99':[1,1], '147':[-1,-1], '83':[1,-1], '163':[-1,1], '67':[1,1], '115':[1,-1], '179':[-1,-1], '81':[1,-1], "161":[-1,1], '97':[1,1], '145':[-1,-1], '65':[1,1], '129':[-1,1], '113':[1,-1], '177':[-1,-1] }
 13 | 
 14 | 
 15 | 
 16 | def ascii_classifier(c): 
 17 | 
 18 | 	ascii = ord(c)
 19 | 	if ascii >= 48 and ascii <= 57:
 20 | 		return 'number'
 21 | 	else:
 22 | 		return 'letter'
 23 | 			
 24 | 
 25 | def main(sam):          #hay que indicar si forward es Rd1 o Rd2
 26 | 	reader = csv.reader(open(sam), delimiter = '\t')
 27 | 
 28 | 	exon_count = defaultdict(int)
 29 | 	
 30 | 	forward = "Rd1"
 31 | 	
 32 | 	pair_ori = 0
 33 | 	if forward == "Rd1":
 34 | 		pair_ori = 1
 35 | 	elif forward == "Rd2":
 36 | 		pair_ori = -1
 37 | 	
 38 | 	
 39 | 	for row in reader:
 40 | 		if row[0][0] != "@":
 41 | 			if "N" in row[5]:
 42 | 				read = row[0]
 43 | 				flag = row[1]
 44 | 				chr = row[2]
 45 | 				start = int(row[3]) - 1             #Sam es 1 referenciado y es mas comodo trabajar en cordeneadas 0 refereciadas
 46 | 				cigar = row[5]
 47 | 				seq = row[9]
 48 | 				
 49 | 				pair_ori = 0
 50 | 				if forward == "Rd1":
 51 | 					pair_ori = 1
 52 | 				elif forward == "Rd2":
 53 | 					pair_ori = -1
 54 | 						
 55 | 				self_strand = 1
 56 | 				pair_strand = '+'
 57 | 							
 58 | 				#Si no se tiene el flag XS:A:- se tienen que implementar las operaciones a nivel de bits:
 59 | 				
 60 | 				if (1 & int(flag)):    #paired end
 61 | 					pair_number = flag_dict[flag][0]
 62 | 					self_strand = flag_dict[flag][1]
 63 | 					if pair_ori*self_strand*pair_number==-1:
 64 | 						pair_strand = '-'
 65 | 											
 66 | 				elif (16 & int(flag)):   #single end      
 67 | 					self_strand = -1
 68 | 					pair_strand = '-'
 69 | 				
 70 | 				if self_strand == -1:
 71 | 					seq = str(Seq(seq).reverse_complement())
 72 | 						
 73 | 				aux_str = ''
 74 | 				cigar_vars = [] 
 75 | 							
 76 | 				for c in cigar:
 77 | 					c_type = ascii_classifier(c)
 78 | 
 79 | 					if c_type == 'number':
 80 | 						aux_str += c
 81 | 
 82 | 					elif c_type == 'letter':
 83 | 						cigar_vars.append((c, int(aux_str)))
 84 | 						aux_str = ''
 85 | 
 86 | 				Exon_starts = [start]
 87 | 				Exon_ends = []
 88 | 				
 89 | 				block = 0
 90 | 				var_index = 0
 91 | 						
 92 | 				for var in cigar_vars:
 93 | 					var_type = var[0]
 94 | 					var_value = var[1]
 95 | 					var_index += 1
 96 | 					
 97 | 					if var_type == 'M':
 98 | 						block += var_value						
 99 | 						
100 | 					if var_type == 'D':
101 | 						block += var_value
102 | 											
103 | 					if var_type == 'I':
104 | 						block += 0
105 | 						
106 | 					if var_type == 'N':
107 | 						Exon_ends.append(Exon_starts[-1] + block)
108 | 						Exon_starts.append(Exon_ends[-1] + var_value)
109 | 						block = 0
110 | 						
111 | 					if var_index == len(cigar_vars):
112 | 						Exon_ends.append(Exon_starts[-1] + block)
113 | 
114 | 				if len(Exon_starts)>=3:
115 | 
116 | 				
117 | 					#for e5s, e5e, e3s, e3e  in zip(Exon_starts, Exon_ends, Exon_starts[1:], Exon_ends[1:]):
118 | 					for estart, eend in zip(Exon_starts[1:-1], Exon_ends[1:-1]):
119 | 
120 | 						exon = "_".join(map(str, [chr, estart, eend]))
121 | 
122 | 						exon_count[exon] += 1
123 | 
124 | 
125 | 	for exon, count in 	exon_count.items():
126 | 
127 | 		print exon, count
128 | 							
129 | 			
130 | 					
131 | 			
132 | 
133 | 
134 | 
135 | if __name__ == '__main__':
136 | 	main(sys.argv[1])
137 | 


--------------------------------------------------------------------------------
/src/Get_fasta_from_bed12.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | from collections import defaultdict
 4 | from Bio import SeqIO
 5 | from Bio.Seq import Seq
 6 | from Bio.Alphabet import generic_dna
 7 | 
 8 | Genome = {}
 9 | 
10 | def Genomictabulator(fasta):
11 | 
12 | 	print >> sys.stderr, "Loading the genome into RAM memory ...",
13 | 
14 | 	f = open(fasta)
15 | 
16 | 	for chrfa in SeqIO.parse(f, "fasta"):
17 | 		Genome[chrfa.id] = chrfa.seq
18 | 
19 | 	print >> sys.stderr, "OK"
20 | 
21 | 	f.close()
22 | 
23 | 
24 | def main(bed12):
25 | 
26 |     transcripts_seq = defaultdict(str)
27 | 	
28 | 
29 |     for row in csv.reader(open(bed12), delimiter = '\t'):
30 |         start = int(row[1])
31 |         end = int(row[2])
32 |         strand = row[5]
33 |         bn = int(row[9])
34 |         chrom = row[0]
35 |         transcript = row[3]
36 |         blocksizes = map(int, row[10].strip(",").split(","))
37 |         qstarts = map (int, row[11].strip(",").split(","))
38 |         
39 | 	seq = ""
40 | 	
41 |         if chrom in Genome:
42 | 
43 |             for q, b in zip(qstarts, blocksizes):
44 | 
45 |                 estart = start + q
46 |                 eend = start + q + b
47 |                 elength = eend - estart
48 | 
49 |                 exon_seq = Genome[chrom][estart:eend]
50 | 
51 |                 seq += exon_seq
52 | 		
53 | 	
54 |         if strand=="-":
55 |             seq = seq.reverse_complement()
56 | 
57 |         seq = str(seq).upper()
58 | 
59 |         print(">" + transcript)
60 |         print(seq)
61 | 	
62 | 
63 | if __name__ == '__main__':
64 |     Genomictabulator(sys.argv[1])
65 |     main(sys.argv[2])
66 | 


--------------------------------------------------------------------------------
/src/Get_introns_from_sam.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import csv
  3 | from Bio import SeqIO
  4 | from Bio.Seq import Seq
  5 | from Bio.Alphabet import generic_dna
  6 | from collections import defaultdict
  7 | 
  8 | 
  9 | SeqTable = []
 10 | 
 11 | #dicionario de fag [Rd1/Rd2, +/-]
 12 | flag_dict = {'73':[1,1], '89':[1,1], '121':[1,-1], '153':[-1,-1], '185':[-1,-1], '137':[-1,1], '99':[1,1], '147':[-1,-1], '83':[1,-1], '163':[-1,1], '67':[1,1], '115':[1,-1], '179':[-1,-1], '81':[1,-1], "161":[-1,1], '97':[1,1], '145':[-1,-1], '65':[1,1], '129':[-1,1], '113':[1,-1], '177':[-1,-1] }
 13 | 
 14 | def ascii_classifier(c):
 15 | 
 16 |         ascii = ord(c)
 17 |         if ascii >= 48 and ascii <= 57:
 18 |                 return 'number'
 19 |         else:
 20 |                 return 'letter'
 21 | 
 22 | 
 23 | def main(sam, forward, min_ilen, max_ilen, anchor):          #hay que indicar si forward es Rd1 o Rd2
 24 | #	reader = csv.reader(open(sam), delimiter = '\t')
 25 | 
 26 | 	intron_count = defaultdict(int)
 27 | 
 28 | 	pair_ori = 0
 29 | 	if forward == "Rd1":
 30 | 		pair_ori = 1
 31 | 	elif forward == "Rd2":
 32 | 		pair_ori = -1
 33 | 
 34 | 
 35 | 	reader = csv.reader(open(sam), delimiter = '\t')
 36 | 
 37 | 
 38 | 	for row in reader:
 39 | 		if row[0][0] != "@":
 40 | 			if "N" in row[5]:
 41 | 				read = row[0]
 42 | 				flag = row[1]
 43 | 				chr = row[2]
 44 | 				start = int(row[3]) - 1             #Sam es 1 referenciado y es mas comodo trabajar en cordeneadas 0 refereciadas
 45 | 				cigar = row[5]
 46 | 				seq = row[9]
 47 | 
 48 | 				pair_ori = 0
 49 | 				if forward == "Rd1":
 50 | 					pair_ori = 1
 51 | 				elif forward == "Rd2":
 52 | 					pair_ori = -1
 53 | 
 54 | 				self_strand = 1
 55 | 				pair_strand = '+'
 56 | 
 57 | 				#Si no se tiene el flag XS:A:- se tienen que implementar las operaciones a nivel de bits:
 58 | 
 59 | 				if (1 & int(flag)):    #paired end
 60 | 					pair_number = flag_dict[flag][0]
 61 | 					self_strand = flag_dict[flag][1]
 62 | 					if pair_ori*self_strand*pair_number==-1:
 63 | 						pair_strand = '-'
 64 | 
 65 | 				elif (16 & int(flag)):   #single end
 66 | 					self_strand = -1
 67 | 					pair_strand = '-'
 68 | 
 69 | 				if self_strand == -1:
 70 | 					seq = str(Seq(seq).reverse_complement())
 71 | 
 72 | 				aux_str = ''
 73 | 				cigar_vars = []
 74 | 
 75 | 				for c in cigar:
 76 | 					c_type = ascii_classifier(c)
 77 | 
 78 | 					if c_type == 'number':
 79 | 						aux_str += c
 80 | 
 81 | 					elif c_type == 'letter':
 82 | 						cigar_vars.append((c, int(aux_str)))
 83 | 						aux_str = ''
 84 | 
 85 | 				Exon_starts = [start]
 86 | 				Exon_ends = []
 87 | 
 88 | 				block = 0
 89 | 				var_index = 0
 90 | 
 91 | 				for var in cigar_vars:
 92 | 					var_type = var[0]
 93 | 					var_value = var[1]
 94 | 					var_index += 1
 95 | 
 96 | 					if var_type == 'M':
 97 | 						block += var_value
 98 | 
 99 | 					if var_type == 'D':
100 | 						block += var_value
101 | 
102 | 					if var_type == 'I':
103 | 						block += 0
104 | 
105 | 					if var_type == 'N':
106 | 						Exon_ends.append(Exon_starts[-1] + block)
107 | 						Exon_starts.append(Exon_ends[-1] + var_value)
108 | 						block = 0
109 | 
110 | 					if var_index == len(cigar_vars):
111 | 						Exon_ends.append(Exon_starts[-1] + block)
112 | 
113 | 
114 | 				for e5s, e5e, e3s, e3e  in zip(Exon_starts, Exon_ends, Exon_starts[1:], Exon_ends[1:]):
115 | 					e5len= e5e - e5s
116 | 					e3len = e3e - e3s
117 | 					istart = e5e
118 | 					iend = e3s
119 | 					ilen = iend - istart
120 | 
121 | 					intron = "_".join(map(str, [chr, pair_strand, istart, iend]) )
122 | 
123 | 					#intron = chr + ":" +  str(istart) + pair_strand + str(iend)
124 | 
125 | 					intron_count[intron]+=1
126 | 
127 | 
128 | 
129 | 					#if max_ilen >= ilen >=min_ilen and e5len >= anchor <= e3len:                #filtro tamano de intrones y anchor
130 | 					#print read, chr, istart, iend, pair_strand, ilen, intron, dn, start, cigar, e5s, e5e, e3s, e3e, seq
131 | 
132 | 
133 | 	for intron, count in intron_count.items():
134 | 
135 | 		print intron, count
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | if __name__ == '__main__':
147 | 	main(sys.argv[1], sys.argv[2], int(sys.argv[3]), int(sys.argv[4]), int(sys.argv[5]))
148 | 


--------------------------------------------------------------------------------
/src/Get_splicing_PWMs.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import csv
  3 | from Bio import SeqIO
  4 | from Bio.Seq import Seq
  5 | from collections import defaultdict
  6 | from shutil import copyfile
  7 | 
  8 | Genome = {}
  9 | 
 10 | def Genomictabulator(fasta):
 11 | 
 12 |     f = open(fasta)
 13 | 
 14 |     for chrfa in SeqIO.parse(f, "fasta"):
 15 |         Genome[chrfa.id] = chrfa.seq
 16 | 
 17 | 
 18 |     f.close()
 19 | 
 20 | def main(bed12, in_GT_AG_U2_5, in_GT_AG_U2_3, out_GT_AG_U2_5, out_GT_AG_U2_3):
 21 | 
 22 |     if in_GT_AG_U2_5=="NA" and in_GT_AG_U2_3=="NA":
 23 | 
 24 | 
 25 |         GT_AG_U2_5 = defaultdict(int)   #intro-centric
 26 |         GT_AG_U2_3 = defaultdict(int)
 27 | 
 28 |         for row in csv.reader(open(bed12), delimiter = '\t'):
 29 | 
 30 |                 csv.field_size_limit(1000000000)
 31 | 
 32 |                 qstarts = list(map (int, row[11].strip(",").split(",")))[1:-1]
 33 |                 blocksizes = list(map(int, row[10].strip(",").split(",")))[1:-1]
 34 | 
 35 |                 start = int(row[1])
 36 |                 strand = row[5]
 37 |                 bn = int(row[9])
 38 |                 chrom = row[0]
 39 |                 
 40 |                 if chrom in Genome:
 41 | 
 42 |                     for q1, b in zip(qstarts, blocksizes):
 43 |                         estart = start + q1
 44 |                         eend = start + q1 + b
 45 |                         elenght = eend - estart
 46 | 
 47 | 
 48 |                         ME5 = str(Genome[chrom][estart-14:estart+3]).upper()  #exon-centric
 49 |                         ME3 = str(Genome[chrom][eend-3:eend+10]).upper()
 50 | 
 51 | 
 52 |                         if strand == "-":
 53 | 
 54 |                             ME5 = str(Genome[chrom][eend-3:eend+14].reverse_complement()).upper()
 55 |                             ME3 = str(Genome[chrom][estart-10:estart+3].reverse_complement()).upper()
 56 | 
 57 | 
 58 |                         dn = ME3[3:5] + ME5[-5:-3]
 59 | 
 60 | 
 61 |                         if dn=="GTAG":
 62 | 
 63 |                             for pos, nt in enumerate(ME3):
 64 | 
 65 |                                 GT_AG_U2_5[(pos, nt)] += 1
 66 | 
 67 |                             for pos, nt in enumerate(ME5):
 68 | 
 69 |                                 GT_AG_U2_3[(pos, nt)] += 1
 70 | 
 71 | 
 72 | 
 73 |         with open(out_GT_AG_U2_5, "w") as GT_AG_U2_5_out:
 74 | 
 75 | 
 76 |             GT_AG_U2_5_out.write( "\t".join(["A", "C", "G", "T"]) +"\n")
 77 | 
 78 |             for i in range(len(GT_AG_U2_5)): #This range is about 4 times biger
 79 |                 A = GT_AG_U2_5[(i, "A")]
 80 |                 G = GT_AG_U2_5[(i, "G")]
 81 |                 C = GT_AG_U2_5[(i, "C")]
 82 |                 T = GT_AG_U2_5[(i, "T")]
 83 | 
 84 |                 TOTAL = A + G + C + T
 85 | 
 86 |                 if TOTAL >0:
 87 |                     GT_AG_U2_5_out.write("\t".join(map(str, [ x/TOTAL for x in [A, C, G, T]])) + "\n" )
 88 | 
 89 | 
 90 | 
 91 |         with open(out_GT_AG_U2_3, "w") as GT_AG_U2_3_out:
 92 | 
 93 | 
 94 |             GT_AG_U2_3_out.write( "\t".join(["A", "C", "G", "T"]) +"\n")
 95 | 
 96 |             for i in range(len(GT_AG_U2_3)): #This range is about 4 times biger
 97 |                 A = GT_AG_U2_3[(i, "A")]
 98 |                 G = GT_AG_U2_3[(i, "G")]
 99 |                 C = GT_AG_U2_3[(i, "C")]
100 |                 T = GT_AG_U2_3[(i, "T")]
101 | 
102 |                 TOTAL = A + G + C + T
103 | 
104 |                 if TOTAL >0:
105 |                     GT_AG_U2_3_out.write("\t".join(map(str, [ x/TOTAL for x in [A, C, G, T]])) + "\n" )
106 | 
107 | 
108 |     else:
109 | 
110 |         copyfile(in_GT_AG_U2_5, out_GT_AG_U2_5)
111 |         copyfile(in_GT_AG_U2_3, out_GT_AG_U2_3)
112 | 
113 | 
114 | 
115 | 
116 | if __name__ == '__main__':
117 |     Genomictabulator(sys.argv[1])
118 |     main(sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6])
119 | 


--------------------------------------------------------------------------------
/src/ME_centric_table.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import csv
  3 | from collections import defaultdict
  4 | import re
  5 | from Bio import SeqIO
  6 | from Bio.Seq import Seq
  7 | from Bio.Alphabet import generic_dna
  8 | 
  9 | csv.field_size_limit(1000000000)
 10 | 
 11 | 
 12 | def main(row_ME_filter1):
 13 | 
 14 | 	ME_reads = defaultdict(set)
 15 | 	ME_SJs = defaultdict(set)
 16 | 	SJ_info = {}
 17 | 
 18 | 	SJ_SJ = defaultdict(set)
 19 | 	SJ_same_ME = set([])
 20 | 
 21 | 	for row in csv.reader(open(row_ME_filter1), delimiter = ' '):
 22 | 
 23 | 		read, seq, qual, tag_alingment, t_score, genome_alingment, g_score, same_ME, len_micro_exon_seq_found, micro_exon_seq_found, number_of_micro_exons_matches, max_U2_scores, max_mean_conservations, micro_exons_coords, U2_scores, mean_conservations = row
 24 | 		SJ, transcript, anchors, cigar =  tag_alingment.split("|")
 25 | 		info = " ".join([SJ, transcript, len_micro_exon_seq_found, micro_exon_seq_found, number_of_micro_exons_matches, max_U2_scores, max_mean_conservations, micro_exons_coords, U2_scores, mean_conservations])
 26 | 
 27 | 		ME_reads[info].add(seq)
 28 | 
 29 | 
 30 | 		# if max_mean_conservations_primates != "None" and max_mean_conservations !="None": #I need to fix this
 31 | 
 32 | 		for ME in  micro_exons_coords.split(","):
 33 | 			ME_SJs[ME].add(SJ + "_" + micro_exon_seq_found)
 34 | 
 35 | 
 36 | ###Cheking if any ME is in two or more SJ
 37 | 
 38 | 	for i in ME_SJs.items():
 39 | 		ME, SJs = i
 40 | 
 41 | 		for SJ_A in SJs:
 42 | 
 43 | 			for SJ_B in SJs:
 44 | 
 45 | 				SJ_SJ[SJ_A].add(SJ_B)
 46 | 
 47 | 
 48 | 	for i in SJ_SJ.items():
 49 | 
 50 | 		SJ_same_ME.add(" ".join(sorted(list(i[1]))))
 51 | 
 52 | ####Coverage dict #####
 53 | 
 54 | 	for i in ME_reads.items():
 55 | 
 56 | 		SJ, transcript, len_micro_exon_seq_found, micro_exon_seq_found, number_of_micro_exons_matches, max_U2_scores, max_mean_conservations,  micro_exons_coords, U2_scores, mean_conservations, = i[0].split(" ")
 57 | 
 58 | 		coverage = len(i[1])
 59 | 
 60 | 		info = " ".join([str(coverage), transcript, len_micro_exon_seq_found, micro_exon_seq_found, number_of_micro_exons_matches, max_U2_scores, max_mean_conservations,  micro_exons_coords, U2_scores, mean_conservations])
 61 | 		SJ_MEseq = "_".join([SJ, micro_exon_seq_found])
 62 | 
 63 | 		SJ_info[SJ_MEseq] = info
 64 | 
 65 | 
 66 | ####
 67 | 
 68 | 	for i in SJ_same_ME:
 69 | 
 70 | 		#if len(i.split(" ")) > 1:
 71 | 
 72 | 		SJs = []
 73 | 		SJ_Coverages = []
 74 | 		SJ_number_of_micro_exons_matches = []
 75 | 		SJ_max_U2_scores = []
 76 | 		SJ_max_mean_conservations = []
 77 | 		ME = []
 78 | 
 79 | 
 80 | 		P_MEs  = []
 81 | 
 82 | 		info = set([])
 83 | 
 84 | 		for SJ_MEseq in i.split(" "):
 85 | 
 86 | 			SJ = "_".join(SJ_MEseq.split("_")[:-1])
 87 | 
 88 | 			coverage, transcript, len_micro_exon_seq_found, micro_exon_seq_found, number_of_micro_exons_matches, max_U2_scores, max_mean_conservations,  micro_exons_coords, U2_scores, mean_conservations = SJ_info[SJ_MEseq].split(" ")
 89 | 
 90 | 			SJs.append(SJ)
 91 | 			SJ_Coverages.append(int(coverage))
 92 | 			info.add((transcript, len_micro_exon_seq_found, micro_exon_seq_found))
 93 | 			SJ_number_of_micro_exons_matches.append(int(number_of_micro_exons_matches))
 94 | 			SJ_max_U2_scores.append(float(max_U2_scores))
 95 | 
 96 | 
 97 | 			# print SJ_info
 98 | 
 99 | 			# if max_mean_conservations_primates == "None":
100 | 
101 | 			# 	print SJ_info
102 | 
103 | 			try:
104 | 				SJ_max_mean_conservations.append(float(max_mean_conservations))
105 | 			except ValueError:
106 | 				SJ_max_mean_conservations.append(0)
107 | 
108 | 
109 | 			#print SJ_MEseq, SJ
110 | 			SJ_chr = "_".join((re.findall(r"[\w']+", SJ)[:-2]))
111 | 			SJ_istart, SJ_iend = re.findall(r"[\w']+", SJ)[-2:]
112 | 			SJ_istart = int(SJ_istart)
113 | 			SJ_iend = int(SJ_iend)
114 | 
115 | 			len_micro_exon_seq_found = int(len_micro_exon_seq_found)
116 | 
117 | 			SJ_len = SJ_iend - SJ_istart
118 | 			Kmer = SJ_len - (len_micro_exon_seq_found+4)
119 | 			#P_ME = 1 - ( 1 - (float(1)/float(4**len_micro_exon_seq_found+4)))**Kmer
120 | 			#P_ME = 	1 - ( 1 - (float(1)/float(4**len_micro_exon_seq_found+4 )))**( SJ_len - (len_micro_exon_seq_found+4))
121 | 
122 | 			P_ME = 	1 - ( 1 - (float(1)/float(4**(len_micro_exon_seq_found+4) )))**( SJ_len - (len_micro_exon_seq_found+4))
123 | 
124 | 			P_MEs.append(P_ME)
125 | 
126 | 			set_ME = set([])
127 | 
128 | 			for a, b, c in zip(micro_exons_coords.split(","), U2_scores.split(","), mean_conservations.split(",")):
129 | 
130 | 				set_ME.add("|".join([a,b,c]))
131 | 
132 | 			ME.append(set_ME)
133 | 
134 | 
135 | 
136 | 		sum_total_coverage = sum(SJ_Coverages)
137 | 		total_SJs = ",".join(SJs)
138 | 		total_coverages = ",".join(map(str, SJ_Coverages))
139 | 
140 | 		# total_max_U2_scores = min(SJ_max_U2_scores)
141 | 		# total_max_mean_conservations = min(SJ_max_mean_conservations)
142 | 		# total_max_mean_conservations_primates = min(SJ_max_mean_conservations_primates)
143 | 
144 | 
145 | 		total_ME = ",".join(set.intersection(*ME))
146 | 		total_number_of_micro_exons_matches = len(total_ME.split(","))
147 | 
148 | 		transcript, len_micro_exon_seq_found, micro_exon_seq_found = list(info)[0]
149 | 
150 | 
151 | 		if total_ME!="":   #The empty fields refeclts no interesection between micro-exons present on the splice junctions
152 | 
153 | 			true_ME =  max([i.split("|")  for i in total_ME.split(",")], key=lambda item:float(item[1]))
154 | 
155 | 
156 | 			ME, U2_scores, mean_conservations = true_ME
157 | 
158 | 			#if 6 >= len(micro_exon_seq_found) >= 3:
159 | 
160 | 			#### Probabilidad ###
161 | 
162 | 			# if total_ME!="":
163 | 
164 | 			#out =  map(str, [ME, transcript, sum_total_coverage, total_SJs, total_coverages, len_micro_exon_seq_found, micro_exon_seq_found, total_number_of_micro_exons_matches, total_max_U2_scores, total_max_mean_conservations, total_max_mean_conservations_primates, min(P_MEs), total_ME])
165 | 
166 | 			out =  map(str, [ME, transcript, sum_total_coverage, total_SJs, total_coverages, len_micro_exon_seq_found, micro_exon_seq_found, total_number_of_micro_exons_matches, U2_scores, mean_conservations, min(P_MEs), total_ME])
167 | 
168 | 
169 | 
170 | 		print "\t".join(out)
171 | 
172 | 
173 | 
174 | 
175 | 
176 | if __name__ == '__main__':
177 | 	main(sys.argv[1])
178 | 
179 | 
180 | #python ~/my_src/ME/Pipeline/ME_centric_table.py _clip1.trim.sam.row_ME.filter1
181 | 


--------------------------------------------------------------------------------
/src/Micro_exons_tags.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | import re
 4 | from Bio import SeqIO
 5 | from Bio.Seq import Seq
 6 | from Bio.Alphabet import generic_dna
 7 | 
 8 | SJ_Tags_seq = {}
 9 | SJ_Tags_info = {}
10 | 
11 | def Tagloader(fasta):
12 | 
13 | 	print >> sys.stderr, "Loading SJ Tags in RAM memory ...",
14 | 
15 | 	f = open(fasta)
16 | 
17 | 	for tag in SeqIO.parse(f, "fasta"):
18 | 		SJ_Tags_seq[tag.id.split("|")[0]] = tag.seq
19 | 		SJ_Tags_info[tag.id.split("|")[0]] = tag.id
20 | 
21 | 		print ">" + tag.id
22 | 		print tag.seq
23 | 
24 | 	print >> sys.stderr, "OK"
25 | 
26 | 	f.close()
27 | 
28 | 
29 | 
30 | 
31 | def main(ME_centric):
32 | 
33 | 	for row in csv.reader(open(ME_centric), delimiter = '\t'):
34 | 
35 | 		#ME, sum_total_coverage, total_SJs, total_coverages, len_micro_exon_seq_found, micro_exon_seq_found, total_number_of_micro_exons_matches, total_max_U2_scores, total_max_mean_conservations, total_max_mean_conservations_primates, min_P_ME, total_ME = row   #, true_ME, score, is_annotated = row
36 | 		ME, transcript, sum_total_coverage, total_SJs, total_coverages, len_micro_exon_seq_found, micro_exon_seq_found, total_number_of_micro_exons_matches, U2_scores, mean_conservations, P_MEs, total_ME = row
37 | 
38 | 		for SJ in total_SJs.split(","):
39 | 
40 | 			SJ_Tag_seq = SJ_Tags_seq[SJ]
41 | 			up_block, down_block =  SJ_Tags_info[SJ].split("|")[-1].split("_")
42 | 
43 | 			ME_Tag_ID = "|".join(SJ_Tags_info[SJ].split("|")[:-1] + ["_".join([up_block, micro_exon_seq_found, down_block])] )
44 | 			ME_Tag_seq = SJ_Tag_seq[:int(up_block)] + micro_exon_seq_found + SJ_Tag_seq[int(up_block):]
45 | 			#
46 | 			print ">" + ME_Tag_ID
47 | 			print ME_Tag_seq
48 | 
49 | 
50 | 
51 | if __name__ == '__main__':
52 | 	Tagloader(sys.argv[1])
53 | 	main(sys.argv[2])
54 | 


--------------------------------------------------------------------------------
/src/Replace_PSI_whippet.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | import gzip
 4 | csv.field_size_limit(300000000000000)
 5 | 
 6 | 
 7 | def main(ME_PSI, whippet_PSI):
 8 | 
 9 |     Coord_info = dict()
10 | 
11 |     reader1 = csv.reader(open(ME_PSI), delimiter="\t")
12 |     header1 = next(reader1)
13 | 
14 |     for row in reader1:
15 | 
16 | 
17 |         ME, Coord, PSI, CI_Lo, CI_Hi, Class = row
18 | 
19 |         Coord_info[Coord] = row
20 | 
21 |     reader2 = csv.reader(gzip.open(whippet_PSI, mode="rt"), delimiter="\t")
22 |     header2 = next(reader2)
23 | 
24 |     print( "\t".join(header2) )
25 | 
26 |     for row in reader2:
27 | 
28 |         Gene, Node, Coord, Strand, Type, Psi, CI_Width, CI_Lo_Hi, Total_Reads, Complexity, Entropy, Inc_Paths, Exc_Paths, Edges = row
29 | 
30 |         if Coord in Coord_info:
31 | 
32 |             ME, Coord, ME_PSI, CI_Lo, CI_Hi, Class = Coord_info[Coord]
33 | 
34 |             if ME_PSI == "NA":
35 | 
36 |                 print("\t".join(row))
37 | 
38 |             else:
39 | 
40 |                 ME_CI_Lo_Hi = ",".join([ CI_Lo, CI_Hi])
41 | 
42 |                 ME_CI_Width = str(float(CI_Hi) - float(CI_Lo))
43 | 
44 |                 out = [Gene, Node, Coord, Strand, Type, ME_PSI, ME_CI_Width, ME_CI_Lo_Hi, Total_Reads, Complexity, Entropy, Inc_Paths, Exc_Paths, Edges]
45 | 
46 |                 print("\t".join(out))
47 | 
48 |         else:
49 | 
50 |             print("\t".join(row))
51 | 
52 | 
53 | if __name__ == '__main__':
54 | 	main(sys.argv[1], sys.argv[2]  )
55 | 


--------------------------------------------------------------------------------
/src/Report/report_files/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/src/Report/report_files/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/src/Report/report_files/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/src/Report/report_files/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/src/SJ_tags_generator_for_micro_exons.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import csv
  3 | from Bio import SeqIO
  4 | from Bio.Seq import Seq
  5 | from random import randint, sample
  6 | from operator import itemgetter
  7 | from collections import defaultdict
  8 | from operator import itemgetter
  9 | 
 10 | 
 11 | Transcriptome = {}
 12 | Genome = {}
 13 | 
 14 | def Genomictabulator(fasta):
 15 | 
 16 | 	print >> sys.stderr, "Loading the genome into RAM memory ...",
 17 | 
 18 | 	f = open(fasta)
 19 | 
 20 | 	for chrfa in SeqIO.parse(f, "fasta"):
 21 | 		Genome[chrfa.id] = chrfa.seq
 22 | 
 23 | 	print >> sys.stderr, "OK"
 24 | 
 25 | 	f.close()
 26 | 
 27 | 
 28 | 
 29 | def Transcriptometabulator(genecode_fasta):
 30 | 
 31 | 	print >> sys.stderr, "Loading the genome into RAM memory ...",
 32 | 
 33 | 	for record in SeqIO.parse(genecode_fasta, "fasta"):
 34 | 		id = str(record.id).split("|")[0].split(" ")[0]
 35 | 		Transcriptome[id] = record.seq
 36 | 
 37 | 		#print(id)
 38 | 
 39 | 	print >> sys.stderr, "OK"
 40 | 
 41 | 
 42 | def main(bed12, ME_len, max_read_len):
 43 | 
 44 | 	n = max_read_len
 45 | 
 46 | 	transcript_intron_info = defaultdict(list)
 47 | 
 48 | 	min_intron_lenght = 80
 49 | 
 50 | 	for row in csv.reader(open(bed12), delimiter = '\t'):
 51 | 
 52 | 		try:
 53 | 
 54 | 
 55 | 			qName = row[3]
 56 | 			seq = Transcriptome[qName]
 57 | 
 58 | 			qstarts = map (int, row[11].strip(",").split(","))
 59 | 			blocksizes = map(int, row[10].strip(",").split(","))
 60 | 
 61 | 			start = int(row[1])
 62 | 			strand = row[5]
 63 | 			bn = int(row[9])
 64 | 			chr = row[0]
 65 | 			qstart = 0
 66 | 
 67 | 			for q1, q2, b, b2 in zip(qstarts, qstarts[1:], blocksizes, blocksizes[1:]):
 68 | 
 69 | 				qstart = qstart + b
 70 | 				tag_start = qstart - n
 71 | 				tag_end = qstart + n
 72 | 
 73 | 				#if tag_start <= 0:
 74 | 				#	print tag_start, qstart, tag_end, strand
 75 | 
 76 | 				istart = start + q1 + b
 77 | 				iend = start + q2
 78 | 				ilen = iend - istart
 79 | 				intron = row[0] + ":" +  str(istart) + row[5] + str(iend)
 80 | 				intron = chr + ":" + str(istart) + strand + str(iend)
 81 | 				ilength = iend - istart
 82 | 
 83 | 				block_up = n
 84 | 				block_down = n
 85 | 				dn = str(Genome[chr][istart:(istart+2)] + Genome[chr][(iend-2):iend]).upper()
 86 | 
 87 | 
 88 | 				if strand == '+' :                          #Para los que aliniean en la hebra +
 89 | 
 90 | 					if tag_start<0:                             #Precausiones generar buenos tag del primer y ultimo tag
 91 | 						tag_start = 0
 92 | 						block_up = qstart
 93 | 
 94 | 					if tag_end>len(seq):
 95 | 						tag_end=len(seq)
 96 | 						block_down = tag_end - qstart
 97 | 
 98 | 
 99 | 					tag = seq[tag_start:tag_end]
100 | 
101 | 
102 | 				if strand == '-' :
103 | 
104 | 					dn = str((Genome[chr][istart:(istart+2)] + Genome[chr][(iend-2):iend]).reverse_complement()).upper()
105 | 
106 | 					if tag_end>len(seq):                 #Para los que alinian en la hebra - es todo al inverso
107 | 						tag_end=len(seq)
108 | 						block_up = tag_end - qstart
109 | 
110 | 					tag = seq[-tag_end:-tag_start]
111 | 
112 | 					if tag_start<=0:
113 | 
114 | 						tag = seq[-tag_end:]
115 | 						block_down = qstart
116 | 
117 | 
118 | 				if b > ME_len and b2 > ME_len and ilength >= min_intron_lenght and (dn=="GTAG" or dn=="GCAG" or dn=="ATAC"):  # hay que agregarle el filtro de los micro exones!!
119 | 
120 | 
121 | 					info = qName, tag, chr, istart, iend, strand, block_up, block_down, block_up + block_down
122 | 					transcript_intron_info[intron].append(info)
123 | 
124 | 
125 | 		except KeyError:
126 | 			pass
127 | 
128 | 
129 | 	for i in transcript_intron_info.items():
130 | 
131 | 		infos = i[1]
132 | 		intron = i[0]
133 | 
134 | 		qName, tag, chr, istart, iend, strand, block_up, block_down, sum_blocks = max(infos, key=itemgetter(8))
135 | 
136 | 
137 | 		ID = ">" + intron + "|" + qName + "|" + str(block_up) + "_" + str(block_down)
138 | 
139 | 		print ID
140 | 		print tag
141 | 
142 | 
143 | #>chr12:3701518+3702264|ENST00000562877.1|100_19
144 | #AGCTTTCTGTTTAGTTGTGTCAATCGCAGGCCACTCTGCTGAGCATCTTCTCCCAGGAGTACCAGAAACACATTAAAAGAACACATGCCAAACATCATACTTCGGAAGCAATTGAAAGT
145 | 
146 | 
147 | if __name__ == '__main__':
148 | 	Genomictabulator(sys.argv[1])
149 | 	Transcriptometabulator(sys.argv[2])
150 | 	main (sys.argv[3], int(sys.argv[4]), int(sys.argv[5]))
151 | 
152 | 
153 | 
154 | #El filtro del los intrones canonicos fue anadido despues
155 | 


--------------------------------------------------------------------------------
/src/Snakepool_BetaDist.R:
--------------------------------------------------------------------------------
  1 | log <- file(snakemake@log[[1]], open="wt")
  2 | 
  3 | cdf_t = snakemake@params[["ct"]]
  4 | min_rep = snakemake@params[["mr"]]
  5 | min.p.mean = snakemake@params[["mm"]]
  6 | path_run_metatda = snakemake@params[["pm"]]
  7 | path_delta = snakemake@params[["path_delta"]]
  8 | path_out = snakemake@params[["path_out"]]
  9 | min_delta = snakemake@params[["min_delta"]]
 10 | 
 11 | 
 12 | 
 13 | library(data.table)
 14 | library(distributions3)
 15 | 
 16 | 
 17 | 
 18 | 
 19 | 
 20 | get_rep_table <- function( file_path, rep){
 21 | 
 22 | 
 23 |   
 24 |   comparison <- data.table()
 25 |   
 26 |   
 27 |   for ( i in seq(1:rep)){
 28 |     
 29 |     print(i)
 30 |     
 31 |     path <- paste0(file_path, i, ".diff")
 32 |     file <- fread(path)
 33 |     file[, Rep:=i]
 34 |     comparison <- rbind(comparison, file)
 35 |   
 36 |   }
 37 |   
 38 |   
 39 |   colnames(comparison) <- c( "Gene", "Node", "Coord", "Strand", "Type", "Psi_A", "Psi_B", "DeltaPsi", "Probability", "Complexity", "Entropy", "V1", "Rep")
 40 |   comparison[ , V1:=NULL]
 41 |   
 42 |   comparison
 43 | 
 44 | }
 45 | 
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 
 51 | cdf.beta <- function(mu, var, p) {
 52 |   alpha <- ((1 - mu) / var - 1 / mu) * mu ^ 2
 53 |   beta <- alpha * (1 / mu - 1)
 54 |   
 55 |   return(cdf( Beta(alpha, beta), p))
 56 | }
 57 | 
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 
 64 | get_diff_nodes <- function (path, comp_name, reps, beta_t, min.p.mean, min_number_reps){
 65 |   
 66 |   
 67 |     print(comp_name)
 68 | 
 69 |     comp <- get_rep_table( paste0(path, comp_name, "_rep_") , reps)
 70 |     
 71 |     
 72 |     
 73 |     comp.stats <- comp[ , .(Psi_A.mean =mean(Psi_A) , Psi_B.mean =mean(Psi_B) ,
 74 |                                                                                         DeltaPsi.mean = mean(DeltaPsi), DeltaPsi.sd = sd(DeltaPsi),
 75 |                                                                                         Probability.mean=mean(Probability, na.rm=T), Probability.sd=sd(Probability, na.rm=T),
 76 |                                                                                         Probability.var = var(Probability, na.rm=T),  Number=.N),
 77 |                                                                                     by=c( "Gene", "Node", "Coord", "Strand", "Type")]
 78 |     
 79 |     
 80 |     comp.stats[ ,  cdf.beta:=cdf.beta( Probability.mean, Probability.var  , beta_t) ]
 81 |     
 82 |     
 83 |     comp.stats[ , diff:=(abs(DeltaPsi.mean)>=min_delta & cdf.beta  < 0.05 & Probability.mean>=min.p.mean & ! Type %in% c("TE", "TS") & Number > min_number_reps)  ]
 84 |     
 85 | 
 86 | 
 87 | }
 88 | 
 89 | 
 90 | snakepool_BetaDist <-function(beta_t, min.p.mean, min_number_reps, path_metadata, path_delta, out_dir){
 91 |   
 92 |   
 93 | 
 94 | 
 95 | run_metadata <-  fread(path_metadata)
 96 | 
 97 | 
 98 | 
 99 | for (i in 1:nrow(run_metadata)) {
100 |   
101 |   #print(run_metadata[i, Compare_ID])
102 |   #print(run_metadata[i, Repeat])
103 |   
104 |   out <- get_diff_nodes(path_delta, run_metadata[i, Compare_ID], run_metadata[i, Repeat], beta_t, min.p.mean, min_number_reps )
105 |   
106 |   fwrite(out, file= paste0(out_dir, run_metadata[i, Compare_ID], ".txt") , append = FALSE, quote = "auto", sep = "\t",  row.names = FALSE, col.names = TRUE)
107 | 
108 |   }
109 |   
110 | }
111 | 
112 | snakepool_BetaDist(cdf_t, min.p.mean, min_rep,  
113 |                    path_run_metatda,
114 |                    path_delta,
115 |                    path_out)
116 | 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/src/coverage_sample_filter.py:
--------------------------------------------------------------------------------
 1 | from snakemake.utils import min_version
 2 | import csv
 3 | from collections import defaultdict
 4 | import sys
 5 | 
 6 | 
 7 | 
 8 | 
 9 | with open(snakemake.output[0], "w") as out:
10 | #with open("test.txt", "w") as out:
11 | 
12 |     min_read_per_sample = int(snakemake.params[0])
13 |     #min_read_per_sample = 5
14 |     ME_n_samples = defaultdict(int)
15 |     all_ME = set([])
16 | 
17 |     for file in snakemake.input:
18 |     #for file in sys.argv[1:]:
19 | 
20 |         with open(file) as cov_file:
21 | 
22 |             reader = csv.reader(cov_file, delimiter="\t")
23 |            
24 | 
25 |             for row in reader:
26 | 
27 | 
28 |                 FILE_NAME, ME, total_SJs, ME_SJ_coverages, sum_ME_coverage, sum_ME_SJ_coverage_up_down_uniq, sum_ME_SJ_coverage_up, sum_ME_SJ_coverage_down, SJ_coverages, sum_SJ_coverage, is_alternative_5, is_alternative_3, alternatives_5, cov_alternatives_5, total_cov_alternatives_5, alternatives_3, cov_alternatives_3, total_cov_alternatives_3 = row
29 | 
30 |                 all_ME.add(ME) 
31 |                 if int(sum_ME_SJ_coverage_up_down_uniq)>=min_read_per_sample:
32 |                     ME_n_samples[ME] += 1
33 |                     
34 |     out.write("\t".join(["ME", "N_samples" ]) + "\n")
35 |     
36 |     for ME in all_ME:
37 |         
38 |         n = ME_n_samples[ME]
39 |         
40 |         out.write("\t".join([ME, str(n)]) + "\n")
41 | 


--------------------------------------------------------------------------------
/src/final_filters.R:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | 
  3 | #' ---
  4 | #' title: Micro-exon final filtering report
  5 | #' author: Guillermo Parada
  6 | #' output:
  7 | #'    html_document:
  8 | #'      toc: true
  9 | #'      highlight: zenburn
 10 | #' ---
 11 | #' 
 12 | #' ## Loading libraries
 13 | #' 
 14 | #' The libraries which are used by this script are:"
 15 | #' *ggplot2
 16 | #' *reshape2
 17 | #' *stringr
 18 | #' *mixtools
 19 | #' *simecol
 20 | #' *data.table
 21 | #' *optparse
 22 | 
 23 | 
 24 | library(ggplot2)
 25 | library(reshape2)
 26 | library(stringr)
 27 | library(mixtools)
 28 | library(simecol)
 29 | library(data.table)
 30 | #library(optparse)
 31 | 
 32 | 
 33 | #' ## Input micro-exon profiling
 34 | #' 
 35 | #' De-novo discovery of micro-exons by uExonator relies on the detection of inserted sequenses over
 36 | #' exon-exon junctions, which then are re-mapped inside the cognate introns. Inserted sequences smaller
 37 | #' than 6 nucleotides are very likely to be mapped by chance, therefore detected micro-exons smaller than
 38 | #' 6 nt are prone to be artefacts by sequencing error or genomic variations. The following plot shows the
 39 | #' spurious micro-exon/intron match probability distribution for micro-exon in between 1-15 nt.
 40 | 
 41 | 
 42 | 
 43 | 
 44 | #option_list <- list(
 45 | #  make_option(c("-met", "--micro_exon_table"), type="character", default=NULL, 
 46 | #              help="Micro-exon centric table", metavar="character"),
 47 | #  make_option(c("-c", "--micro_exon_coverages"), type="character", default=NULL, 
 48 | #              help="Micro-exon coverage table", metavar="character"),  
 49 | #  make_option(c("-o", "--out"), type="character", default="out.txt", 
 50 | #              help="output file name [default= %default]", metavar="character")
 51 | #); 
 52 | 
 53 | #opt_parser <- OptionParser(option_list=option_list);
 54 | #opt <- parse_args(opt_parser);
 55 | 
 56 | 
 57 | 
 58 | ME_centric_raw <- read.delim("~/Google_Drive/Results/ME/Single_cell/TOTAL.sam.row_ME.filter1.ME_centric", header=FALSE, stringsAsFactors=FALSE)
 59 | colnames(ME_centric_raw) <- c('ME', 'transcript', 'sum_total_coverage', 'total_SJs', 'total_coverages', 'len_micro_exon_seq_found', 'micro_exon_seq_found', 'total_number_of_micro_exons_matches', 'U2_scores', 'mean_conservations_vertebrates', 'P_MEs', 'total_ME')
 60 | 
 61 | ME_centric_raw <- data.table(ME_centric_raw)
 62 | 
 63 | ggplot(ME_centric_raw[len_micro_exon_seq_found<=15, ],
 64 |        aes(x=factor(len_micro_exon_seq_found), y=P_MEs) ) +
 65 |   geom_violin(scale = "width") +
 66 |   xlab("Micro-exon leght") +
 67 |   ylab("Spurious micro-exon/intron match probability") +
 68 |   theme(panel.background = element_rect(fill = 'white', colour = 'black'))
 69 | 
 70 | #' The higher spurious micro-exon/intron match probability is reflected on the number of micro-exon/intron
 71 | #' matches inside
 72 | 
 73 | ggplot(ME_centric_raw[len_micro_exon_seq_found<=15, ],
 74 |        aes(x=factor(len_micro_exon_seq_found), y=total_number_of_micro_exons_matches) ) + 
 75 |   geom_jitter() +
 76 |   ylim(0,100) +
 77 |   theme(panel.background = element_rect(fill = 'white', colour = 'black'))
 78 | 
 79 | 
 80 | #'     True 
 81 | #' splicing events relies on splicing signals, therefore false micro-exons will have weaker splicing signals
 82 | #' than the true micro-exons. The following plot show the distribution of U2/GT-AG splicing signal strengh
 83 | #' (U2_score) for population the total micro-exons and longer or equal then 3, 6, and 9 nt. Micro-exons equal
 84 | #' or longer than 9 nt are less prone to be artefacts, therefore have a U2_score distribution which is expected
 85 | #' from real splicng events.
 86 | 
 87 | 
 88 | #ME_matches <- unlist(strsplit(ME_centric_raw$total_ME, "[,]"))
 89 | #ME_matches <- read.table(text=ME_matches, sep="|")
 90 | #colnames(ME_matches) <- c("ME", "U2_score", "Vertebrate_conservation", "Primate_conservation")
 91 | #ME_matches$Filter = "None"
 92 | #ME_centric_raw_longer_3 <- subset(ME_centric_raw, len_micro_exon_seq_found>=3)
 93 | #ME_centric_raw_longer_6 <- subset(ME_centric_raw, len_micro_exon_seq_found>=6)
 94 | #ME_centric_raw_longer_9 <- subset(ME_centric_raw, len_micro_exon_seq_found>=9)
 95 | #ME_matches_3 <- unlist(strsplit(ME_centric_raw_longer_3$total_ME, "[,]"))
 96 | #ME_matches_3 <- read.table(text=ME_matches_3, sep="|")
 97 | #colnames(ME_matches_3) <- c("ME", "U2_score", "Vertebrate_conservation", "Primate_conservation")
 98 | #ME_matches_3$Filter = ">=3"
 99 | #ME_matches_6 <- unlist(strsplit(ME_centric_raw_longer_6$total_ME, "[,]"))
100 | #ME_matches_6 <- read.table(text=ME_matches_6, sep="|")
101 | #colnames(ME_matches_6) <- c("ME", "U2_score", "Vertebrate_conservation", "Primate_conservation")
102 | #ME_matches_6$Filter = ">=6"
103 | #ME_matches_9 <- unlist(strsplit(ME_centric_raw_longer_9$total_ME, "[,]"))
104 | #ME_matches_9 <- read.table(text=ME_matches_9, sep="|")
105 | #colnames(ME_matches_9) <- c("ME", "U2_score", "Vertebrate_conservation", "Primate_conservation")
106 | #ME_matches_9$Filter = ">=9"
107 | #ME_matches_Filters <- rbind(ME_matches, ME_matches_3, ME_matches_6, ME_matches_9)
108 | #ggplot(ME_matches_Filters, aes(x=U2_score, ..density.., colour=Filter)) +
109 | #  geom_freqpoly(binwidth=5) +
110 | #  xlim(40, 100) +
111 | #  theme(panel.background = element_rect(fill = 'white', colour = 'black'))
112 | 
113 | 
114 | R -e "rmarkdown::render('../../../Software/Micro-Exonator/src/final_filters.R',output_file='output.html')"


--------------------------------------------------------------------------------
/src/get_diff_ME_single_cell.py:
--------------------------------------------------------------------------------
  1 | import csv, sys
  2 | from collections import defaultdict
  3 | import gzip
  4 | 
  5 | 
  6 | 
  7 | def main(jls_exons_tab, delta, high_qual_ME ):
  8 | 
  9 |     node_exons = dict()
 10 |     MEs = set([])
 11 |     ME_info = dict()
 12 |     
 13 |     header_out = ["Gene", "Node", "Coord", "Strand", "Type", "Psi_A.mean", "Psi_B.mean", "DeltaPsi.mean", "DeltaPsi.sd", "Probability.mean", "Probability.sd", "Probability.var", "N.detected.reps", "cdf.beta", "is.diff", "microexon_ID"]
 14 | 
 15 |     header = ["Gene", "Node", "Coord", "Strand", "Type", "Psi_A.mean", "Psi_B.mean", "DeltaPsi.mean", "DeltaPsi.sd", "Probability.mean", "Probability.sd", "Probability.var", "Number", "cdf.beta", "diff"]
 16 | 
 17 |     print("\t".join(header_out))
 18 | 
 19 | 
 20 |     with open(high_qual_ME) as F:
 21 | 
 22 |         reader = csv.DictReader(F, delimiter="\t")
 23 | 
 24 |         for row in reader:
 25 |             MEs.add(row["ME"])
 26 |     
 27 |     
 28 |     with gzip.open(jls_exons_tab, mode="rt") as F:
 29 | 
 30 |         reader = csv.DictReader(F, delimiter="\t")
 31 |         
 32 |         for row in reader:
 33 |             chrom, locus, strand = row["Potential_Exon"].split(":")
 34 |             estart, eend = locus.split("-")
 35 | 
 36 |             for node in row["Whippet_Nodes"].split(","):
 37 |                 node_exons[(row["Gene"], node)] = [row["Potential_Exon"], row["Is_Annotated"]]
 38 | 
 39 | 
 40 |     with open(delta) as F: 
 41 | 
 42 |         reader = csv.DictReader(F, delimiter="\t")
 43 | 
 44 |         for row in reader:
 45 |             
 46 |             chrom, pos = row["Coord"].split(":")
 47 |             estart, eend = pos.split("-")
 48 |             estart = str(int(estart)-1)
 49 |             exon_ID = "_".join([chrom, row["Strand"], estart, eend])
 50 |             
 51 |             #if exon_ID == "chr10_+_127272438_127272444":
 52 |             #if "12727243" in exon_ID: 
 53 |             #    print(row, exon_ID)
 54 |             
 55 | 
 56 | 
 57 |             if (row["Gene"], row["Node"] ) in node_exons:
 58 |                 
 59 | 
 60 |                 
 61 |                 Potential_Exon, Is_Annotated = node_exons[(row["Gene"], row["Node"] )]
 62 |                 
 63 |                 out =  [row[x] for x in header] + node_exons[(row["Gene"], row["Node"] )]
 64 |  
 65 | 
 66 |                 
 67 |                 if row["Type"]=="AD":
 68 |             
 69 |                     nchrom, nstrand, nstart, nend = exon_ID.split("_")
 70 | 
 71 | 
 72 |                     echrom, eloci, estrand =  Potential_Exon.split(":")
 73 | 
 74 |                     estart, eend =  eloci.split("-")
 75 | 
 76 |                     if estrand == "+" and eend == nend:
 77 | 
 78 |                         new_exon_ID = "_".join([echrom, estrand, str(int(estart)-1), eend ])
 79 | 
 80 |                         exon_ID = new_exon_ID
 81 | 
 82 |                     if estrand == "-" and str(int(estart)-1) == nstart:
 83 | 
 84 |                         new_exon_ID = "_".join([echrom, estrand, str(int(estart)-1), eend ])
 85 | 
 86 |                         exon_ID = new_exon_ID
 87 | 
 88 | 
 89 |                 elif row["Type"]=="AA":
 90 | 
 91 |                     nchrom, nstrand, nstart, nend = exon_ID.split("_")
 92 | 
 93 | 
 94 |                     echrom, eloci, estrand =  Potential_Exon.split(":")
 95 | 
 96 |                     estart, eend =  eloci.split("-")
 97 | 
 98 |                     if estrand == "-" and eend == nend:
 99 | 
100 |                         new_exon_ID = "_".join([echrom, estrand, str(int(estart)-1), eend ])
101 | 
102 | 
103 |                         exon_ID = new_exon_ID
104 | 
105 | 
106 |                     if estrand == "+" and str(int(estart)-1) == nstart:
107 | 
108 |                         new_exon_ID = "_".join([echrom, estrand, str(int(estart)-1), eend ])
109 | 
110 |                         exon_ID = new_exon_ID
111 |                         
112 |                         #if "12727243" in exon_ID: 
113 |                         #    print(row, exon_ID, Potential_Exon)
114 |            
115 | 
116 |                 if exon_ID in MEs:
117 |                    print("\t".join( [row[x] for x in header] + [exon_ID] ))
118 |           
119 |                 else:
120 |                    print("\t".join( [row[x] for x in header] + ["NA"] ))
121 |             
122 |             
123 | if __name__ == '__main__':
124 |     main(sys.argv[1], sys.argv[2], sys.argv[3])
125 | 


--------------------------------------------------------------------------------
/src/merge_pairs.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import sys
 3 | from Bio import SeqIO
 4 | from Bio.SeqRecord import SeqRecord
 5 | 
 6 | def main(fastq1, fastq2, fastq12):
 7 | 
 8 |     r1 = open(fastq1)
 9 |     r2 = open(fastq2)
10 |     out = open(fastq12, 'w')
11 | 
12 |     for read in SeqIO.parse(r1, "fastq"):
13 | 
14 |         rd_id = read.id + "_1"
15 |         rd_Q = read.letter_annotations["phred_quality"]
16 |         reads_rd = SeqRecord( read.seq, rd_id, description = "" )
17 |         reads_rd.letter_annotations["phred_quality"] = rd_Q
18 |         out.write(reads_rd.format("fastq"))
19 | 
20 |     for read in SeqIO.parse(r2, "fastq"):
21 | 
22 |         rd_id = read.id + "_2"
23 |         rd_Q = read.letter_annotations["phred_quality"]
24 |         reads_rd = SeqRecord( read.seq, rd_id, description = "" )
25 |         reads_rd.letter_annotations["phred_quality"] = rd_Q
26 |         out.write(reads_rd.format("fastq"))
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     main(sys.argv[1], sys.argv[2], sys.argv[3])  


--------------------------------------------------------------------------------
/src/merge_quant.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | import gzip
 4 | from snakemake.utils import min_version
 5 | 
 6 | csv.field_size_limit(100000000)
 7 | csv.field_size_limit()
 8 | 
 9 | 
10 | 
11 | def main(mode, out_file, file_list  ):
12 |     with gzip.open(out_file, 'wt') as out:
13 | 
14 |         for file in file_list:
15 | 
16 |             with gzip.open(file, mode="rt") as f:
17 | 
18 |                 if mode=="Isoform" or mode=="Gene":
19 |                     header = ["Sample", mode, "TpM", "Read_Counts"]
20 |                 elif mode=="PSI":
21 |                     header = ['Sample', 'Gene', 'Node', 'Coord', 'Strand', 'Type', 'Psi', 'CI_Width', 'CI_Lo,Hi', 'Total_Reads', 'Complexity', 'Entropy', 'Inc_Paths', 'Exc_Paths', 'Edges']
22 | 
23 |                 writer = csv.DictWriter(out, fieldnames=header, extrasaction='ignore', delimiter="\t")
24 |                 writer.writeheader()
25 | 
26 |                 sample = file.strip(snakemake.params["trim"])
27 | 
28 |                 #sample = "".join(file.split("/")[-1].split(".")[:-2])  #files needs to finish with *.fastq.gz
29 |                 reader = csv.DictReader(f, delimiter="\t")
30 | 
31 |                 for row in reader:
32 | 
33 |                     row["Sample"] = sample
34 |                     writer.writerow(row)
35 | 
36 | #print(snakemake.input)
37 | 
38 | if __name__ == '__main__':
39 |     main(snakemake.params["feature"], snakemake.output["merged"],  snakemake.input["files"])
40 | 


--------------------------------------------------------------------------------
/src/round2_ME_reads_fastq.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | 
 4 | 
 5 | def main(alingment_pre_processed_round2):
 6 | 	
 7 | 	for row in csv.reader(open(alingment_pre_processed_round2), delimiter = '\t'):
 8 | 
 9 | 		try:
10 | 			read, flag, tag, start, cigar, seq, qual = row
11 | 
12 | 			if len(seq)>len(qual):
13 | 				qual = qual + qual[ -(len(seq) - len(qual)) : ]
14 | 
15 | 			elif len(seq)<len(qual):
16 | 				qual = qual[:len(seq)]
17 | 
18 | 			print "@" + read
19 | 			print seq
20 | 			print "+"
21 | 			print qual
22 | 
23 | 		except ValueError: #minor fraction of lines dont have 7 columns due to errors.
24 | 			pass
25 | 
26 | 
27 | if __name__ == '__main__':
28 | 	main(sys.argv[1])
29 | 


--------------------------------------------------------------------------------
/src/round2_ME_reads_fastq2.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | from Bio import SeqIO
 4 | from Bio import SeqIO
 5 | from Bio.Seq import Seq
 6 | from Bio.Alphabet import generic_dna
 7 | from Bio.SeqRecord import SeqRecord
 8 | import gzip
 9 | 
10 | csv.field_size_limit(100000000)
11 | 
12 | def main(alingment_pre_processed_round2, row_fastq):
13 | 	
14 |     ME_reads = set([])
15 |     fastq_out = open( alingment_pre_processed_round2 + ".fastq", 'w')
16 | 
17 |     for row in csv.reader(open(alingment_pre_processed_round2), delimiter = '\t'):
18 | 
19 | 
20 | 	#print row, len(row)
21 | 
22 | 
23 | 	read, flag, tag, start, cigar, seq, qual = row[:7]
24 |         ME_reads.add(read)
25 | 
26 |     with gzip.open(row_fastq) as f:
27 | 
28 |         for read in SeqIO.parse(f, "fastq"):
29 | 
30 |             if read.id in ME_reads:
31 |                 
32 |                 f_out = SeqRecord( read.seq, read.id, description = "" )
33 |                 f_out.letter_annotations["phred_quality"] = read.letter_annotations["phred_quality"]
34 | 
35 |                 fastq_out.write(f_out.format("fastq"))
36 | 
37 | #		try:
38 | #			read, flag, tag, start, cigar, seq, qual = row
39 | #c
40 | #			if len(seq)>len(qual):
41 | #				qual = qual + qual[ -(len(seq) - len(qual)) : ]
42 | #
43 | #			elif len(seq)<len(qual):
44 | #				qual = qual[:len(seq)]
45 | #
46 | #			print "@" + read
47 | #			print seq
48 | #			print "+"
49 | #			print qual
50 | #
51 | #		except ValueError: #minor fraction of lines dont have 7 columns due to errors.
52 | #			pass
53 | 
54 | 
55 | if __name__ == '__main__':
56 | 	main(sys.argv[1], sys.argv[2])
57 | 


--------------------------------------------------------------------------------
/src/row_ME2.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import csv
  3 | from collections import defaultdict
  4 | from Bio import SeqIO
  5 | from Bio import SeqIO
  6 | from Bio.Seq import Seq
  7 | from Bio.Alphabet import generic_dna
  8 | from Bio.SeqRecord import SeqRecord
  9 | from random import randint, sample
 10 | from operator import itemgetter
 11 | import re
 12 | import gzip
 13 | 
 14 | Genome = {}
 15 | 
 16 | def Genomictabulator(fasta):
 17 | 
 18 | 	print >> sys.stderr, "Loading the genome into RAM memory ...",
 19 | 
 20 | 	f = open(fasta)
 21 | 
 22 | 	for chrfa in SeqIO.parse(f, "fasta"):
 23 | 		Genome[chrfa.id] = chrfa.seq
 24 | 
 25 | 	print >> sys.stderr, "OK"
 26 | 
 27 | 	f.close()
 28 | 
 29 | 
 30 | def main(sam_pre_processed, row_fastq):
 31 | 
 32 | 	fastq_out = open( ".".join(sys.argv[2].split(".")[:-1]) + ".row_ME.fastq", 'w')
 33 | 	ME_reads = set([])
 34 | 
 35 | 	for row in csv.reader(open(sam_pre_processed), delimiter = '\t'):
 36 | 
 37 | 		if len(row)==14: #To avoid rare errors (like SRR2138604.sam.pre_processed)
 38 | 			read, flag, tag, start, cigar, seq, qual, q_block_starts, q_block_ends,  micro_exon_seq_found, I_pos_tag, DRU, DRD, DR_corrected_micro_exon_seq_found = row
 39 | 			intron_tag, transcript_ID, anchors = tag.split("|")
 40 | 
 41 | 			chr = "_".join(re.findall(r"[\w']+", intron_tag)[:-2])
 42 | 			istart, iend = re.findall(r"[\w']+", intron_tag)[-2:]
 43 | 
 44 | 			istart = int(istart)
 45 | 			iend = int(iend)
 46 | 			
 47 | 			try:
 48 | 
 49 | 				intron_seq = str(Genome[chr][istart:iend]).upper()
 50 | 
 51 | 				micro_exons_coords = []
 52 | 
 53 | 				island = "AG" + DR_corrected_micro_exon_seq_found + "GT"
 54 | 				rev_island = str(Seq(island).reverse_complement())
 55 | 
 56 | 				strand = "+"
 57 | 
 58 | 				if "-" in intron_tag:
 59 | 					strand = "-"
 60 | 
 61 | 				if strand == "+" and island in intron_seq:
 62 | 
 63 | 					for i in [i for i in range(len(intron_seq)) if intron_seq.startswith(island, i)]:
 64 | 
 65 | 						ME_start = i + 2 + istart
 66 | 						ME_end = ME_start + len(DR_corrected_micro_exon_seq_found)
 67 | 						ME_chr = chr
 68 | 						ME_strand = strand
 69 | 
 70 | 						micro_exons_coords.append("_".join((map(str, [ME_chr, ME_strand, ME_start, ME_end]))))
 71 | 
 72 | 
 73 | 				elif strand == "-" and rev_island in intron_seq:
 74 | 
 75 | 
 76 | 					for i in [i for i in range(len(intron_seq)) if intron_seq.startswith(rev_island, i)]:
 77 | 
 78 | 						ME_start = i + 2 + istart
 79 | 						ME_end = ME_start + len(DR_corrected_micro_exon_seq_found)
 80 | 						ME_chr = chr
 81 | 						ME_strand = strand
 82 | 
 83 | 						micro_exons_coords.append("_".join((map(str, [ME_chr, ME_strand, ME_start, ME_end]))))
 84 | 
 85 | 				micro_exons_coords = ",".join(micro_exons_coords)
 86 | 
 87 | 				if micro_exons_coords!="":
 88 | 					print "\t".join(row) + "\t" + micro_exons_coords
 89 | 					ME_reads.add(read)
 90 |                     
 91 | # 					if flag==16:
 92 | # 						seq = str(Seq(island).reverse_complement())
 93 | # 						qual = qual[::-1]
 94 | 
 95 | # 					if len(seq)==len(qual):
 96 | 
 97 | # 						fastq_out.write("@" + read + "\n")
 98 | # 						fastq_out.write(seq + "\n")
 99 | # 						fastq_out.write("+" + "\n")
100 | # 						fastq_out.write(qual + "\n")
101 | 
102 | # 					elif len(seq)>len(qual):  ## preventing errors with hisat
103 | 
104 | # 						qual2 = qual + qual[ -(len(seq) - len(qual)) : ]
105 | 
106 | # 						fastq_out.write("@" + read + "\n")
107 | # 						fastq_out.write(seq + "\n")
108 | # 						fastq_out.write("+" + "\n")
109 | # 						fastq_out.write(qual2 + "\n")
110 | 
111 | # 					elif len(seq)<len(qual):
112 | 
113 | # 						qual2 = qual[:len(seq)]
114 | 
115 | # 						fastq_out.write("@" + read + "\n")
116 | # 						fastq_out.write(seq + "\n")
117 | # 						fastq_out.write("+" + "\n")
118 | # 						fastq_out.write(qual2 + "\n")
119 |                         
120 | 					# ME_fastq = SeqRecord( seq, id = read, description = "" )
121 | 					# ME_fastq.letter_annotations["phred_quality"] = qual
122 | 
123 | 					# fastq_out.write(ME_fastq.format("fastq"))
124 | 
125 | 
126 | 					# print "@" + read.qname
127 | 					# print seq
128 | 					# print "+"
129 | 					# print q
130 | 
131 | 					
132 | 			except KeyError:
133 | 				pass 
134 | 
135 | 			
136 | 	with gzip.open(row_fastq) as f:
137 | 
138 | 		for read in SeqIO.parse(f, "fastq"):
139 | 
140 | 			if read.id in ME_reads:
141 |                 
142 | 				f_out = SeqRecord( read.seq, read.id, description = "" )
143 | 				f_out.letter_annotations["phred_quality"] = read.letter_annotations["phred_quality"]
144 | 
145 | 				fastq_out.write(f_out.format("fastq"))
146 | 			
147 | 			
148 | if __name__ == '__main__':
149 | 	Genomictabulator(sys.argv[1])
150 | 	main(sys.argv[2], sys.argv[3])
151 | 


--------------------------------------------------------------------------------
/src/sashimi_input_generator.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | import argparse
 4 | 
 5 | 
 6 | 
 7 | 
 8 | def main():
 9 | 
10 | 
11 | 
12 |     parser = argparse.ArgumentParser()
13 |     parser.add_argument("-f", "--files",  help="Comma separated list of file names", required=True)
14 |     parser.add_argument("-p", "--paths",  help="Comma separated list of file paths", required=True)
15 |     parser.add_argument("-c", "--conditions",  help="Comma separated list of condition names", required=True)
16 |     parser.add_argument("-wd", "--working_directory",  help="full path to the snakemake directory", required=True)
17 | 
18 | 
19 | 
20 |     args = parser.parse_args()
21 | 
22 |     for file, path, condition  in zip(args.files.split(","), args.paths.split(","), args.conditions.split(",")):
23 |         print( "\t".join([ file, args.working_directory + path, condition]) )
24 | 
25 | main()
26 | 


--------------------------------------------------------------------------------
/src/split_coverage.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | 
 4 | def main(ME_SJ_coverage):
 5 | 
 6 |     with open(ME_SJ_coverage) as F:
 7 | 
 8 |         reader = csv.reader(F, delimiter="\t")
 9 | 
10 |         #print( "\t".join( [ "ME", "Coord", "PSI", "CI_Lo", "CI_Hi", "is_alternative_5", "alternatives_5",  "is_alternative_3", "alternatives_3" ]) )
11 | 
12 |         for row in reader:
13 | 
14 |             FILE, ME, total_SJs, ME_SJ_coverages, sum_ME_coverage, sum_ME_SJ_coverage_up_down_uniq, sum_ME_SJ_coverage_up, sum_ME_SJ_coverage_down, SJ_coverages, sum_SJ_coverage, is_alternative_5, is_alternative_3, alternatives_5, cov_alternatives_5, total_cov_alternatives_5, alternatives_3, cov_alternatives_3,  total_cov_alternatives_3 = row
15 |             
16 |             with open( FILE + ".sam.pre_processed.filter1.ME_SJ_coverage", "a") as out:
17 |                 out.write( "\t".join(row) + "\n")
18 | 
19 | if __name__ == '__main__':
20 | 	main(sys.argv[1]  )
21 | 


--------------------------------------------------------------------------------
/src/split_paired_end.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import csv
 3 | from Bio import SeqIO
 4 | from Bio import SeqIO
 5 | from Bio.Seq import Seq
 6 | from Bio.Alphabet import generic_dna
 7 | from Bio.SeqRecord import SeqRecord
 8 | 
 9 | 
10 | 
11 | def main (fastq):
12 | 	""" Splits the reads at N position. It generate two files rd1 and rd2 """
13 | 		
14 | 	#N = 80	
15 | 
16 | 	f = open(fastq)
17 | 	# rd1 = open(fastq + '.rd1', 'w')
18 | 	# rd2 = open(fastq + '.rd2', 'w')
19 | 		
20 | 	for read in SeqIO.parse(f, "fastq"):
21 | 		
22 | 		N = len(read.seq)/2
23 | 
24 | 		# if len(read.seq) == 155:
25 | 		# 	N = 80 
26 | 		
27 | 		rd1_seq = read.seq[:N]
28 | 		rd2_seq = read.seq[N:]
29 | 		
30 | 		rd1_id = read.id + "_1"
31 | 		rd2_id = read.id + "_2"
32 | 				
33 | 		rd1_Q = read.letter_annotations["phred_quality"][:N]
34 | 		rd2_Q = read.letter_annotations["phred_quality"][N:]
35 | 		
36 | 		reads_rd1 = SeqRecord( rd1_seq, rd1_id, description = "" )
37 | 		reads_rd1.letter_annotations["phred_quality"] = rd1_Q	
38 | 
39 | 		reads_rd2 = SeqRecord( rd2_seq, rd2_id, description = "" )
40 | 		reads_rd2.letter_annotations["phred_quality"] = rd2_Q	
41 | 		
42 | 		# rd1.write(reads_rd1.format("fastq"))
43 | 		# rd2.write(reads_rd2.format("fastq"))
44 | 
45 | 		print reads_rd1.format("fastq")
46 | 		print reads_rd2.format("fastq")
47 | 
48 | 
49 | if __name__ == '__main__':
50 | 	main(sys.argv[1])
51 | 


--------------------------------------------------------------------------------
/src/stats/discovery_stats.py:
--------------------------------------------------------------------------------
 1 | import sys,csv,pdb
 2 | 
 3 | ##### Use as #####
 4 |     #python bed12 MicroExonator_high_qual ME_len
 5 |     #Compatible with python2 and python3
 6 | ######
 7 | 
 8 | #def main(bed12, ME_high_cual , ME_len):
 9 | 
10 | def bed12_microexon_extract(bed12_path,ME_len):
11 | 	ME_annotated = set()
12 | 	for row in csv.reader(open(bed12_path), delimiter = '\t'):
13 | 		blocksizes = list(map(int, row[10].strip(",").split(",")))
14 | 		qstarts = list(map(int, row[11].strip(",").split(",")))
15 | 
16 | 		start = int(row[1])
17 | 		end = int(row[2])
18 | 		strand = row[5]
19 | 		bn = int(row[9])
20 | 		chrom = row[0]
21 | 
22 | 		for q1, q2, q3, b1, b2, b3 in zip(qstarts, qstarts[1:] , qstarts[2:], blocksizes, blocksizes[1:], blocksizes[2:]):
23 | 
24 | 			estart = start + q2
25 | 			eend = start + q2 + b2
26 | 			elength = eend - estart
27 | 			exon = (chrom, strand, str(estart), str(eend))
28 | 			transcript = row[3]
29 | 			SJ_start = start + q1 + b1
30 | 			SJ_end = start + q3
31 | 			ME_intron = " ".join([chrom, str(SJ_start), str(SJ_end), "SJ", "0", strand])
32 | 
33 | 			if elength <= ME_len:
34 |                			ME_annotated.add(exon)
35 | 
36 | 	return ME_annotated
37 | 
38 | def microexonator_output_reader(high_qual_filters):
39 | 	ME_MicroExonator = set()
40 | 	reader = csv.reader(open(high_qual_filters), delimiter = '\t')
41 | 	next(reader)
42 | 	for row in reader:
43 | 		ME = row[0]
44 | 		chrom = "_".join(ME.split("_")[:-3])  # To avoid errors with chrom_
45 | 		strand, start, end = ME.split("_")[-3:]
46 | 		ME = (chrom, strand, start, end)
47 | 		ME_MicroExonator.add(ME)
48 | 	return ME_MicroExonator
49 | 
50 | def compare(set_Microexonator,set_consensus):
51 | 	set_novel = set_Microexonator-set_consensus
52 | 	datafile=open("Report/stats/Microexons.not_consensus","w")
53 | 	for line in set_novel:
54 | 		datafile.write('\t'.join([str(x) for x in line])+'\n')
55 | 	datafile.close()
56 | 
57 | 	set_total = set_Microexonator | set_consensus
58 | 	datafile=open("Report/stats/Microexons.annotation.stats","w")
59 | 	for line in set_total:
60 | 		if line in set_novel:
61 | 			datafile.write('\t'.join([str(x) for x in line])+'\t'+str(1)+'\n')
62 | 		else:
63 | 			datafile.write('\t'.join([str(x) for x in line])+'\t'+str(0)+'\n')
64 | 	datafile.close()
65 | 	return
66 | 
67 | 
68 | if __name__ == '__main__':
69 | 	consensus_file = sys.argv[1]
70 | 	MicroExonator_file = sys.argv[2]
71 | 	length_limit =int(sys.argv[3]) 
72 | 	Consensus_microexons = bed12_microexon_extract(consensus_file,length_limit)
73 | 	MicroExonator_microexons=microexonator_output_reader(MicroExonator_file)
74 | 	compare(MicroExonator_microexons,Consensus_microexons)
75 | 


--------------------------------------------------------------------------------
/src/validate_fastq.py:
--------------------------------------------------------------------------------
 1 | from Bio.SeqRecord import SeqRecord
 2 | import gzip
 3 | from Bio import bgzf
 4 | 
 5 | def main(row_fastq):
 6 | 	
 7 |     with gzip.open(row_fastq, "rt") as f, gzip.open(row_fastq + ".gz", 'wt') as out:
 8 | 
 9 |         for read in SeqIO.parse(f, "fastq"):
10 |  
11 |             fastq_out = SeqRecord( read.seq, read.id, description = "" )
12 |             fastq_out.letter_annotations["phred_quality"] = read.letter_annotations["phred_quality"][ :len(read.seq)]
13 |             
14 |             if len(read.seq)==len(read.letter_annotations["phred_quality"]):
15 |                 out.write(fastq_out.format("fastq"))
16 |                 
17 |                 
18 | if __name__ == '__main__':
19 |     main(sys.argv[1])
20 | 


--------------------------------------------------------------------------------
/src/whippet_delta_to_ME.py:
--------------------------------------------------------------------------------
  1 | import csv, sys
  2 | from collections import defaultdict
  3 | import gzip
  4 | 
  5 | 
  6 | def main(jls_exons_tab, delta, high_qual_ME ):
  7 | 
  8 |     node_exons = dict()
  9 |     MEs = set([])
 10 |     ME_info = dict()
 11 |     
 12 |     #header  = ["Gene", "Node", "Coord", "Strand", "Type", "Psi_A", "Psi_B", "DeltaPsi", "Probability" ,"Complexity", "Entropy"]
 13 | 
 14 |     header = []
 15 | 
 16 |     with gzip.open(delta, mode="rt") as F:
 17 | 
 18 |         reader = csv.reader(F, delimiter="\t")
 19 | 
 20 |         header = next(reader)
 21 | 
 22 | 
 23 |     print("\t".join(["exon_ID"] + header))
 24 | 
 25 | 
 26 |     with open(high_qual_ME) as F:
 27 | 
 28 |         reader = csv.DictReader(F, delimiter="\t")
 29 | 
 30 |         for row in reader:
 31 |             MEs.add(row["ME"])
 32 |     
 33 |     
 34 |     with gzip.open(jls_exons_tab, mode="rt") as F:
 35 | 
 36 |         reader = csv.DictReader(F, delimiter="\t")
 37 |         
 38 |         for row in reader:
 39 |             chrom, locus, strand = row["Potential_Exon"].split(":")
 40 |             estart, eend = locus.split("-")
 41 | 
 42 |             for node in row["Whippet_Nodes"].split(","):
 43 |                 node_exons[(row["Gene"], node)] = [row["Potential_Exon"], row["Is_Annotated"]]
 44 | 
 45 | 
 46 |     with gzip.open(delta, mode="rt") as F: 
 47 | 
 48 |         reader = csv.DictReader(F, delimiter="\t")
 49 | 
 50 |         for row in reader:
 51 |             
 52 |             chrom, pos = row["Coord"].split(":")
 53 |             estart, eend = pos.split("-")
 54 |             estart = str(int(estart)-1)
 55 |             exon_ID = "_".join([chrom, row["Strand"], estart, eend])
 56 |             
 57 |             #if exon_ID == "chr10_+_127272438_127272444":
 58 |             #if "12727243" in exon_ID: 
 59 |             #    print(row, exon_ID)
 60 |             
 61 | 
 62 | 
 63 |             if (row["Gene"], row["Node"] ) in node_exons:
 64 |                 
 65 | 
 66 |                 
 67 |                 Potential_Exon, Is_Annotated = node_exons[(row["Gene"], row["Node"] )]
 68 |                 
 69 |                 out =  [row[x] for x in header] + node_exons[(row["Gene"], row["Node"] )]
 70 |  
 71 | 
 72 |                 
 73 |                 if row["Type"]=="AD":
 74 |             
 75 |                     nchrom, nstrand, nstart, nend = exon_ID.split("_")
 76 | 
 77 | 
 78 |                     echrom, eloci, estrand =  Potential_Exon.split(":")
 79 | 
 80 |                     estart, eend =  eloci.split("-")
 81 | 
 82 |                     if estrand == "+" and eend == nend:
 83 | 
 84 |                         new_exon_ID = "_".join([echrom, estrand, str(int(estart)-1), eend ])
 85 | 
 86 |                         exon_ID = new_exon_ID
 87 | 
 88 |                     if estrand == "-" and str(int(estart)-1) == nstart:
 89 | 
 90 |                         new_exon_ID = "_".join([echrom, estrand, str(int(estart)-1), eend ])
 91 | 
 92 |                         exon_ID = new_exon_ID
 93 | 
 94 | 
 95 |                 elif row["Type"]=="AA":
 96 | 
 97 |                     nchrom, nstrand, nstart, nend = exon_ID.split("_")
 98 | 
 99 | 
100 |                     echrom, eloci, estrand =  Potential_Exon.split(":")
101 | 
102 |                     estart, eend =  eloci.split("-")
103 | 
104 |                     if estrand == "-" and eend == nend:
105 | 
106 |                         new_exon_ID = "_".join([echrom, estrand, str(int(estart)-1), eend ])
107 | 
108 | 
109 |                         exon_ID = new_exon_ID
110 | 
111 | 
112 |                     if estrand == "+" and str(int(estart)-1) == nstart:
113 | 
114 |                         new_exon_ID = "_".join([echrom, estrand, str(int(estart)-1), eend ])
115 | 
116 |                         exon_ID = new_exon_ID
117 |                         
118 |                         #if "12727243" in exon_ID: 
119 |                         #    print(row, exon_ID, Potential_Exon)
120 |            
121 | 
122 |                 if exon_ID in MEs:
123 |                 #if exon_ID!="":
124 |           
125 |                    print("\t".join([exon_ID] + [row[x] for x in header] ))
126 |           
127 | 
128 | if __name__ == '__main__':
129 |     main(sys.argv[1], sys.argv[2], sys.argv[3])
130 | 


--------------------------------------------------------------------------------
/src/write_bam_tsv.py:
--------------------------------------------------------------------------------
 1 | from snakemake.utils import min_version
 2 | import csv
 3 | 
 4 | 
 5 | with open(snakemake.input[0]) as run:   #Populating the dictionaries
 6 | 
 7 |     run_metadata = csv.DictReader(run, delimiter="\t")
 8 | 
 9 |     for row in run_metadata:
10 |         
11 |         with open("Whippet/ggsashimi/" + row["Compare_ID"] + "/" + row["Compare_ID"] + ".tvs", "w") as out:
12 | 
13 |             A_cluster_names = []
14 |             B_cluster_names = []
15 |             
16 |             for c in row["A.cluster_names"].split(","):
17 |                 
18 |                 out.write(c + "\t" + "../../../Whippet/BAM/Merge/" + c + ".sort.bam" + "\n")
19 | 
20 |             for c in row["B.cluster_names"].split(","):
21 | 
22 |                 out.write(c + "\t" + "../../../Whippet/BAM/Merge/" + c + ".sort.bam" + "\n")
23 | 


--------------------------------------------------------------------------------
/src/write_sig_node_files.py:
--------------------------------------------------------------------------------
 1 | from snakemake.utils import min_version
 2 | import glob, os, csv
 3 | 
 4 | 
 5 | for path in glob.glob(snakemake.params[0] + "/*.*"):
 6 |   
 7 |   compare_name = path.split("/")[-1].split(".")[0]
 8 |   
 9 |   with open(path) as file  :
10 |     
11 |     reader = csv.DictReader(file, delimiter="\t")
12 |     
13 |     for row in reader:
14 |       
15 |       with open("Whippet/ggsashimi/" + compare_name + "/" + "_".join([row["Gene"], row["Node"], row["Strand"]] ) + ".txt", "w") as output:
16 |         
17 |         output.write(" ".join([row["Gene"], row["Node"], row["Strand"]] ))
18 |       
19 |         
20 |   
21 |   
22 | 


--------------------------------------------------------------------------------
/touch/MicroExonator/github_clone:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/touch/MicroExonator/github_clone


--------------------------------------------------------------------------------
/touch/VastDb.bed12:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/touch/VastDb.bed12


--------------------------------------------------------------------------------
/touch/gencode.vM16.annotation.bed12:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/touch/gencode.vM16.annotation.bed12


--------------------------------------------------------------------------------
/touch/gencode.vM16.annotation.gtf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/touch/gencode.vM16.annotation.gtf


--------------------------------------------------------------------------------
/touch/miniconda/envs/julia_0.6.1/share/julia/site/v0.6/Whippet/bin/whipet_scripts:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/touch/miniconda/envs/julia_0.6.1/share/julia/site/v0.6/Whippet/bin/whipet_scripts


--------------------------------------------------------------------------------
/touch/mm10.60way.phyloP60way.bw:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/touch/mm10.60way.phyloP60way.bw


--------------------------------------------------------------------------------
/touch/mm10.fa:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/touch/mm10.fa


--------------------------------------------------------------------------------
/touch/mm10_GT_AG_U2_3.good.matrix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/touch/mm10_GT_AG_U2_3.good.matrix


--------------------------------------------------------------------------------
/touch/mm10_GT_AG_U2_5.good.matrix:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hemberg-lab/MicroExonator/3f8d4aa9c8ace8d1fcc99e8e5cb14f782eccb5c6/touch/mm10_GT_AG_U2_5.good.matrix


--------------------------------------------------------------------------------