├── bamliquidator_internal ├── __init__.py ├── debian │ ├── compat │ ├── bamliquidator.dirs │ ├── source │ │ └── format │ ├── rules │ ├── control │ └── copyright ├── bamliquidatorbatch │ ├── __init__.py │ └── flattener.py ├── .gitignore ├── python-bamliquidatorbatch.preinst ├── python-bamliquidatorbatch.control ├── test_install.sh ├── make_deb.sh ├── bamliquidator_util.cpp ├── Dockerfile ├── bamliquidator.m.cpp ├── bamliquidator.h ├── bamliquidator_util.h ├── makefile ├── bamliquidator.cpp └── bamliquidator_bins.m.cpp ├── .gitignore ├── docs ├── Fast.png ├── README.md ├── bamliquidator-packaging.md ├── bamplot-turbo.md └── collaboration-workflow.md ├── bamliquidator_batch.py ├── hockeysticks ├── css │ ├── asc.gif │ ├── bg.gif │ ├── desc.gif │ ├── gene.css │ ├── slider.css │ ├── clustering.css │ └── main.css ├── hierarch_clustering.R ├── license.md ├── clustering.html ├── gene.html ├── hockeystick.html └── js │ ├── clustering.js │ └── gene.js ├── .travis.yml ├── README.md ├── LICENSE ├── annotation ├── mm10.chrom.sizes └── hg19.chrom.sizes ├── ROSE2_stitchOpt.R ├── tophatTemplate.sh ├── GPL16043.sh ├── pythonTemplate.py ├── bamPlotExample.sh ├── dynamicEnhancer_rank.R ├── python_template.py ├── bamToGFFExample.sh ├── commandline_template.py ├── heatMapOrdered.R ├── dynamicEnhancer_plot.R ├── RNA_SEQ_PIPELINE_README.txt ├── bamTableUpdate.py ├── pipeline_template.py ├── bamPlot.R ├── extractGuides.py ├── .pylintrc ├── GPL16043.r ├── bamToGFF_turbo.py └── makeBamMeta.py /bamliquidator_internal/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bamliquidator_internal/debian/compat: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *~ 3 | *.sif 4 | __pycache__ 5 | -------------------------------------------------------------------------------- /bamliquidator_internal/bamliquidatorbatch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bamliquidator_internal/debian/bamliquidator.dirs: -------------------------------------------------------------------------------- 1 | usr/bin 2 | -------------------------------------------------------------------------------- /bamliquidator_internal/debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /docs/Fast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BradnerLab/pipeline/HEAD/docs/Fast.png -------------------------------------------------------------------------------- /bamliquidator_batch.py: -------------------------------------------------------------------------------- 1 | bamliquidator_internal/bamliquidatorbatch/bamliquidator_batch.py -------------------------------------------------------------------------------- /hockeysticks/css/asc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BradnerLab/pipeline/HEAD/hockeysticks/css/asc.gif -------------------------------------------------------------------------------- /hockeysticks/css/bg.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BradnerLab/pipeline/HEAD/hockeysticks/css/bg.gif -------------------------------------------------------------------------------- /bamliquidator_internal/.gitignore: -------------------------------------------------------------------------------- 1 | bamliquidator_bins 2 | bamliquidator_regions 3 | bamliquidator 4 | *.o 5 | -------------------------------------------------------------------------------- /hockeysticks/css/desc.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BradnerLab/pipeline/HEAD/hockeysticks/css/desc.gif -------------------------------------------------------------------------------- /bamliquidator_internal/debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | %: 3 | dh $@ 4 | 5 | override_dh_auto_install: 6 | $(MAKE) DESTDIR=$$(pwd)/debian/bamliquidator prefix=/usr install 7 | -------------------------------------------------------------------------------- /bamliquidator_internal/python-bamliquidatorbatch.preinst: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | if [ -d "/usr/local/lib/python2.7/dist-packages/bamliquidatorbatch" ]; then 5 | pip uninstall -y BamLiquidatorBatch 6 | fi 7 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Bamliquidator 2 | 3 | This is the documentation base for bamliquidator. You can read about any of the 4 | following: 5 | 6 | * [bamliquidator](bamliquidator.md) 7 | * [Collaboration Workflow](collaboration-workflow.md) 8 | * [Packaging](bamliquidator-packaging.md) 9 | * [Bamplot Turbo](bamplot-turbo.md) 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.6" 5 | notifications: 6 | email: false 7 | install: 8 | - "pip install pep8 pylint" 9 | - "pip install tables scipy" 10 | # TODO: Move tables, etc. to requirements.txt, specify versions. 11 | script: 12 | - "pylint --rcfile=.pylintrc --errors-only bamliquidator_internal/" 13 | -------------------------------------------------------------------------------- /hockeysticks/css/gene.css: -------------------------------------------------------------------------------- 1 | .bar { 2 | fill: steelblue; 3 | } 4 | 5 | .bar:hover { 6 | fill: brown; 7 | } 8 | 9 | .axis { 10 | font: 10px sans-serif; 11 | } 12 | 13 | .axis path, 14 | .axis line { 15 | fill: none; 16 | stroke: #000; 17 | shape-rendering: crispEdges; 18 | } 19 | 20 | #bar_chart_sort { 21 | position: absolute; 22 | width: 100px; 23 | left: 20px; 24 | top: 100px; 25 | font-size: 12px; 26 | font-family: sans-serif; 27 | } -------------------------------------------------------------------------------- /hockeysticks/hierarch_clustering.R: -------------------------------------------------------------------------------- 1 | matrix_a <- read.table(file.choose(), sep='\t', header=T); 2 | 3 | t <- cor(matrix_a, method="pearson") 4 | 5 | t <- 1-t 6 | 7 | matrix_d <- dist(t); 8 | 9 | hc <- hclust(matrix_d,"average"); 10 | 11 | plot(hc) 12 | hmcols<-colorRampPalette(c("red","white"))(256) 13 | 14 | hclust.ave <- function(x) hclust(x, method="complete") 15 | 16 | heatmap(t, Colv=T,Rowv=T, scale='none', col=hmcols, hclustfun=hclust.ave) 17 | 18 | write.table(t, file="corr.csv") 19 | 20 | svg("mymap.svg") 21 | heatmap(...) 22 | dev.off() -------------------------------------------------------------------------------- /bamliquidator_internal/python-bamliquidatorbatch.control: -------------------------------------------------------------------------------- 1 | Source: bamliquidatorbatch 2 | Maintainer: John DiMatteo 3 | Section: python 4 | Priority: optional 5 | Build-Depends: python-setuptools (>= 0.6b3), python-all (>= 2.6.6-3), debhelper (>= 7.4.3), dh-python 6 | Standards-Version: 3.9.1 7 | 8 | Package: python-bamliquidatorbatch 9 | Architecture: all 10 | Depends: ${misc:Depends}, ${python:Depends}, python-pip 11 | Description: Python wrapper around bamliquidator for analyzing the density of short DNA sequence read alignments in the BAM file format. 12 | 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pipeline 2 | ======== 3 | 4 | No longer actively maintained :( 5 | Bradner Lab 4 Lyfe! 6 | 7 | bradner lab computation pipeline scripts 8 | 9 | [![Build Status](https://travis-ci.org/BradnerLab/pipeline.svg)](https://travis-ci.org/BradnerLab/pipeline) 10 | 11 | For documentation including usage, containers, and other supported functionality, see 12 | the [documentation](docs) provided with the repository. 13 | 14 | Copyright (c) 2013 Charles Lin and collaborators: 15 | - John DiMatteo 16 | - Nick Semenkovich 17 | - Xin Zhong 18 | 19 | License: MIT (see LICENSE for details) 20 | -------------------------------------------------------------------------------- /bamliquidator_internal/debian/control: -------------------------------------------------------------------------------- 1 | Source: bamliquidator 2 | Maintainer: John DiMatteo 3 | Section: science 4 | Priority: optional 5 | Standards-Version: 3.9.2 6 | Build-Depends: debhelper (>= 9), libbam-dev, libhdf5-serial-dev, libboost-dev, libgoogle-perftools-dev, samtools, libtbb-dev 7 | 8 | Package: bamliquidator 9 | Architecture: any 10 | Depends: ${shlibs:Depends}, ${misc:Depends}, samtools, python-bamliquidatorbatch (>= 1.3.0) 11 | Description: Analyzes the density of short DNA sequence read alignments in the BAM file format. 12 | Read counts across one or more BAM files are grouped, normalized, summarized, and graphed in interactive html files and stored in HDF5 tables and optionally copied into tab delimitted text files. 13 | -------------------------------------------------------------------------------- /bamliquidator_internal/test_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | # this script assumes you have $TMP configured with a few GBs of space 5 | # available 6 | 7 | # create a tmpdir for easy cleanup 8 | TMPDIR=$(mktemp -d -t bamliquidator-XXXXXXXX) 9 | cd $TMPDIR 10 | 11 | # download the data 12 | wget https://www.dropbox.com/s/bu75ojqr2ibkf57/04032013_D1L57ACXX_4.TTAGGC.hg18.bwt.sorted.bam 13 | wget https://www.dropbox.com/s/a71ngagu2k8pgiv/04032013_D1L57ACXX_4.TTAGGC.hg18.bwt.sorted.bam.bai 14 | wget https://www.dropbox.com/s/g7rcde76jya11y0/04032013_D1L57ACXX_4.TTAGGC.hg18.summary_chr1.tab 15 | 16 | # run the test 17 | bamliquidator_batch --flatten 04032013_D1L57ACXX_4.TTAGGC.hg18.bwt.sorted.bam 18 | 19 | # check the results 20 | diff -qs 04032013_D1L57ACXX_4.TTAGGC.hg18.summary_chr1.tab output/summary_chr1.tab 21 | 22 | ECODE=$? 23 | 24 | # cleanup 25 | rm -r $TMPDIR 26 | 27 | if [ $ECODE -eq 0 ]; then 28 | echo -e "\033[0;32mOK: \033[0mtest produces expected results" 29 | else 30 | echo -e "\033[0;31mFAIL: \033[0mtest script failed or produced unexpected results" 31 | fi 32 | 33 | -------------------------------------------------------------------------------- /bamliquidator_internal/debian/copyright: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Xin Zhong, Charles Lin, and John DiMatteo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /hockeysticks/license.md: -------------------------------------------------------------------------------- 1 | hockey-sticks 2 | ============= 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2014 Angela Fan 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Charles Lin (Bradner Lab) 4 | and Collaborators: 5 | John DiMatteo 6 | Nick Semenkovich | semenko@alum.mit.edu | https://nick.semenkovich.com 7 | Xin Zhong 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy of 10 | this software and associated documentation files (the "Software"), to deal in 11 | the Software without restriction, including without limitation the rights to 12 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 13 | the Software, and to permit persons to whom the Software is furnished to do so, 14 | subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 21 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 22 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 23 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 24 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /annotation/mm10.chrom.sizes: -------------------------------------------------------------------------------- 1 | chr1 195471971 2 | chr2 182113224 3 | chrX 171031299 4 | chr3 160039680 5 | chr4 156508116 6 | chr5 151834684 7 | chr6 149736546 8 | chr7 145441459 9 | chr10 130694993 10 | chr8 129401213 11 | chr14 124902244 12 | chr9 124595110 13 | chr11 122082543 14 | chr13 120421639 15 | chr12 120129022 16 | chr15 104043685 17 | chr16 98207768 18 | chr17 94987271 19 | chrY 91744698 20 | chr18 90702639 21 | chr19 61431566 22 | chr5_JH584299_random 953012 23 | chrX_GL456233_random 336933 24 | chrY_JH584301_random 259875 25 | chr1_GL456211_random 241735 26 | chr4_GL456350_random 227966 27 | chr4_JH584293_random 207968 28 | chr1_GL456221_random 206961 29 | chr5_JH584297_random 205776 30 | chr5_JH584296_random 199368 31 | chr5_GL456354_random 195993 32 | chr4_JH584294_random 191905 33 | chr5_JH584298_random 184189 34 | chrY_JH584300_random 182347 35 | chr7_GL456219_random 175968 36 | chr1_GL456210_random 169725 37 | chrY_JH584303_random 158099 38 | chrY_JH584302_random 155838 39 | chr1_GL456212_random 153618 40 | chrUn_JH584304 114452 41 | chrUn_GL456379 72385 42 | chr4_GL456216_random 66673 43 | chrUn_GL456393 55711 44 | chrUn_GL456366 47073 45 | chrUn_GL456367 42057 46 | chrUn_GL456239 40056 47 | chr1_GL456213_random 39340 48 | chrUn_GL456383 38659 49 | chrUn_GL456385 35240 50 | chrUn_GL456360 31704 51 | chrUn_GL456378 31602 52 | chrUn_GL456389 28772 53 | chrUn_GL456372 28664 54 | chrUn_GL456370 26764 55 | chrUn_GL456381 25871 56 | chrUn_GL456387 24685 57 | chrUn_GL456390 24668 58 | chrUn_GL456394 24323 59 | chrUn_GL456392 23629 60 | chrUn_GL456382 23158 61 | chrUn_GL456359 22974 62 | chrUn_GL456396 21240 63 | chrUn_GL456368 20208 64 | chrM 16299 65 | chr4_JH584292_random 14945 66 | chr4_JH584295_random 1976 67 | -------------------------------------------------------------------------------- /ROSE2_stitchOpt.R: -------------------------------------------------------------------------------- 1 | stitchFile = commandArgs(TRUE)[1] 2 | 3 | outFolder = commandArgs(TRUE)[2] 4 | 5 | name = commandArgs(TRUE)[3] 6 | 7 | #print(stitchFile) 8 | #print(outFolder) 9 | #print(name) 10 | 11 | 12 | stitchTable = read.table(stitchFile,header=TRUE) 13 | 14 | 15 | 16 | 17 | 18 | xTitle = 'Number of enhancer regions' 19 | xVector =stitchTable$NUM_REGIONS 20 | 21 | yVector=stitchTable$MEAN_CONSTIT/stitchTable$MEAN_REGION 22 | yTitle = 'Average fraction of \nenhancer sequence in each region' 23 | yDerivTitle = 'Decrease in fraction of \nenhancer sequence per region' 24 | yDeriv = diff(yVector) 25 | 26 | # print('x vector has a length of') 27 | # print(length(xVector)) 28 | # print(xVector) 29 | # print(xVector[2:length(xVector)]) 30 | # print(length(xVector[2:length(xVector)])) 31 | # print('y spline vector has a length of') 32 | # print(length(yDeriv)) 33 | # print(yVector) 34 | # print(yDeriv) 35 | 36 | yDerviFit = smooth.spline(xVector[2:length(xVector)],yDeriv) 37 | optX= yDerviFit$x[which.min(yDerviFit$y)] 38 | 39 | #get the optimal stitching parameter 40 | optStitch = stitchTable[which(stitchTable[,2]==optX),1] 41 | 42 | 43 | stitchPDFName = paste(outFolder,name,'_stitch_parameter.pdf',sep='') 44 | #print(stitchPDFName) 45 | 46 | pdf(file=stitchPDFName,width=8.5,height=11) 47 | par(mai=c(1,1.5,.2,.5)) 48 | par(mfrow=c(2,1)) 49 | plot(xVector,yVector,type='l',xlab=xTitle,ylab=yTitle) 50 | abline(v= optX) 51 | stitchText = paste('OPTIMUM STITCHING AT: ',optStitch,'bp',sep='') 52 | text(min(xVector),.8*max(yVector),stitchText,pos=4) 53 | 54 | plot(xVector[2:length(xVector)],yDeriv,xlab=xTitle,ylab=yDerivTitle) 55 | lines(yDerviFit,col='blue') 56 | abline(v= optX) 57 | 58 | dev.off() 59 | write(optStitch,stdout()) -------------------------------------------------------------------------------- /hockeysticks/css/slider.css: -------------------------------------------------------------------------------- 1 | .d3-slider { 2 | position: relative; 3 | font-family: Verdana,Arial,sans-serif; 4 | font-size: 1.1em; 5 | border: 1px solid #aaaaaa; 6 | z-index: 2; 7 | } 8 | 9 | .d3-slider-horizontal { 10 | height: .8em; 11 | } 12 | 13 | .d3-slider-range { 14 | background:#2980b9; 15 | left:0px; 16 | right:0px; 17 | height: 0.8em; 18 | position: absolute; 19 | } 20 | 21 | .d3-slider-range-vertical { 22 | background:#2980b9; 23 | left:0px; 24 | right:0px; 25 | position: absolute; 26 | top:0; 27 | } 28 | 29 | .d3-slider-vertical { 30 | width: .8em; 31 | height: 100px; 32 | } 33 | 34 | .d3-slider-handle { 35 | position: absolute; 36 | width: 1.2em; 37 | height: 1.2em; 38 | border: 1px solid #d3d3d3; 39 | border-radius: 4px; 40 | background: #eee; 41 | background: linear-gradient(to bottom, #eee 0%, #ddd 100%); 42 | z-index: 3; 43 | } 44 | 45 | .d3-slider-handle:hover { 46 | border: 1px solid #999999; 47 | } 48 | 49 | .d3-slider-horizontal .d3-slider-handle { 50 | top: -.3em; 51 | margin-left: -.6em; 52 | } 53 | 54 | .d3-slider-axis { 55 | position: relative; 56 | z-index: 1; 57 | } 58 | 59 | .d3-slider-axis-bottom { 60 | top: .8em; 61 | } 62 | 63 | .d3-slider-axis-right { 64 | left: .8em; 65 | } 66 | 67 | .d3-slider-axis path { 68 | stroke-width: 0; 69 | fill: none; 70 | } 71 | 72 | .d3-slider-axis line { 73 | fill: none; 74 | stroke: #aaa; 75 | shape-rendering: crispEdges; 76 | } 77 | 78 | .d3-slider-axis text { 79 | font-size: 11px; 80 | } 81 | 82 | .d3-slider-vertical .d3-slider-handle { 83 | left: -.25em; 84 | margin-left: 0; 85 | margin-bottom: -.6em; 86 | } -------------------------------------------------------------------------------- /hockeysticks/css/clustering.css: -------------------------------------------------------------------------------- 1 | /*default tooltip styles*/ 2 | .d3-tip { 3 | z-index: 101; 4 | line-height: 1; 5 | padding: 5px; 6 | background: rgba(0, 0, 0, 0.8); 7 | color: #fff; 8 | border-radius: 1px; 9 | text-align: center; 10 | pointer-events: none; 11 | } 12 | 13 | .d3-tip.n:after { 14 | /*content: "\25BC";*/ 15 | margin: -1px 0 0 0; 16 | top: 100%; 17 | left: 0; 18 | text-align: center; 19 | } 20 | 21 | .d3-tip.s:after { 22 | margin: 0 0 1px 0; 23 | top: -8px; 24 | left: 0; 25 | text-align: center; 26 | } 27 | 28 | .d3-tip.e:after { 29 | margin: -4px 0 0 0; 30 | top: 50%; 31 | left: -8px; 32 | } 33 | 34 | .d3-tip.w:after { 35 | margin: -4px 0 0 -1px; 36 | top: 50%; 37 | left: 100%; 38 | } 39 | 40 | /*page styles*/ 41 | body { 42 | font-family: Trebuchet, sans-serif; 43 | color: black; 44 | font-size: 14px; 45 | font-weight: normal; 46 | -webkit-touch-callout: none; 47 | -webkit-user-select: none; 48 | -khtml-user-select: none; 49 | -moz-user-select: none; 50 | -ms-user-select: none; 51 | user-select: none; 52 | } 53 | 54 | label { 55 | font-weight: normal; 56 | } 57 | 58 | #page_header { 59 | background-color: #a6a6a6; 60 | height: 50px; 61 | margin: 0px auto 20px auto; 62 | border-bottom: 2px black solid; 63 | } 64 | 65 | #page_title { 66 | margin: 0px auto 0px auto; 67 | line-height: 50px; 68 | font-size: 24px; 69 | text-align: center; 70 | color: black; 71 | font-family: Trebuchet, sans-serif; 72 | } 73 | 74 | #page_description { 75 | margin: 0px auto 0px auto; 76 | left: 0; 77 | right: 0; 78 | font-size: 12px; 79 | line-height: 25px; 80 | text-align: center; 81 | color: black; 82 | font-family: Trebuchet, sans-serif; 83 | } 84 | 85 | #del_button { 86 | position: relative; 87 | bottom: 15px; 88 | } 89 | 90 | #clear_button { 91 | position: relative; 92 | bottom: 15px; 93 | } -------------------------------------------------------------------------------- /bamliquidator_internal/make_deb.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # this script is intended to be run by the makefile in this directory 4 | # it used to be part of the makefile, but it resulted in git being a dependency of running "make" 5 | # which was not permitted by the ppa build 6 | 7 | git diff --quiet 8 | if [ $? -eq 0 ]; then 9 | CLEAN=clean 10 | else 11 | CLEAN=unclean 12 | fi 13 | 14 | set -ex 15 | 16 | CHANGELOG="%s ($VERSION-0ppa1~%s) %s; urgency=low 17 | 18 | * Auto generated from makefile 19 | * $(git config --get remote.origin.url) 20 | * $(git rev-parse HEAD) ($CLEAN) 21 | 22 | -- $UPLOADER <$UPLOADER_EMAIL> $(date +"%a, %d %b %G %H:%M:%S %z") 23 | 24 | " 25 | 26 | py2dsc -m "$UPLOADER <$UPLOADER_EMAIL>" bamliquidatorbatch_$VERSION.orig.tar.gz 27 | cp python-bamliquidatorbatch.preinst deb_dist/bamliquidatorbatch-$VERSION/debian/ 28 | cp python-bamliquidatorbatch.control deb_dist/bamliquidatorbatch-$VERSION/debian/control 29 | 30 | for ubuntu_version in "trusty" "xenial" "bionic" 31 | do 32 | cp -R deb_dist/bamliquidatorbatch-$VERSION deb_dist/bamliquidatorbatch-$VERSION-$ubuntu_version 33 | pushd deb_dist/bamliquidatorbatch-$VERSION-$ubuntu_version 34 | printf "$CHANGELOG" bamliquidatorbatch $ubuntu_version $ubuntu_version > debian/changelog 35 | debuild $debuild_args 36 | popd 37 | done 38 | 39 | mv bamliquidator-$VERSION.tar.gz bamliquidator_$VERSION.orig.tar.gz 40 | tar xf bamliquidator_$VERSION.orig.tar.gz 41 | 42 | cp -R debian bamliquidator-$VERSION 43 | for ubuntu_version in "trusty" "xenial" "bionic" 44 | do 45 | cp -R bamliquidator-$VERSION bamliquidator-$VERSION-$ubuntu_version 46 | printf "$CHANGELOG" bamliquidator $ubuntu_version $ubuntu_version > bamliquidator-$VERSION-$ubuntu_version/debian/changelog 47 | pushd bamliquidator-$VERSION-$ubuntu_version/debian 48 | debuild $debuild_args 49 | popd 50 | done 51 | rm -rf bamliquidator-$VERSION 52 | -------------------------------------------------------------------------------- /tophatTemplate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | # The MIT License (MIT) 4 | 5 | # Copyright (c) 2013 Charles Lin 6 | 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | 14 | # The above copyright notice and this permission notice shall be included in 15 | # all copies or substantial portions of the Software. 16 | 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | # THE SOFTWARE. 24 | 25 | 26 | PROJECT_DIR='/ark/home/cl512/projects/ezhi/' 27 | NAME='K422_cmpd5_rep1' 28 | FASTQ='/ark/home/cl512/ressrv19/raw/130410Bra/D13-1625/130410Bra_D13-1625_2_sequence.fastq' 29 | GENOME='hg18' 30 | 31 | mkdir $PROJECT_DIR$NAME 32 | tophat -p 4 -o $PROJECT_DIR$NAME/ --transcriptome-index=/ark/home/cl512/ressrv19/genomes/transcriptome_data/hg18_genes /ark/home/cl512/ressrv19/genomes/hg18_withERCC/hg18_ercc_noRand $FASTQ 33 | 34 | samtools sort $PROJECT_DIR$NAME/accepted_hits.bam $PROJECT_DIR$NAME/$NAME.$GENOME.tophat.sorted 35 | samtools index $PROJECT_DIR$NAME/$NAME.$GENOME.tophat.sorted.bam 36 | 37 | 38 | 39 | python /usr/local/bin/RPKM_count.py -r '/ark/home/cl512/ressrv19/genomes/ERCC_Technical_Data/ERCC92.bed' -i $PROJECT_DIR$NAME/$NAME.$GENOME.tophat.sorted.bam -e -o $PROJECT_DIR$NAME/$NAME\_ERCC 40 | 41 | cufflinks -p 4 -G /ark/home/cl512/ressrv19/annotations/hg18_genes.gtf -o $PROJECT_DIR$NAME $PROJECT_DIR$NAME/$NAME.$GENOME.tophat.sorted.bam -------------------------------------------------------------------------------- /hockeysticks/clustering.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | HockeyStick Viz 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 20 | 21 |
22 | 23 |
24 | 27 | 32 |
33 | 34 |
35 |
36 | 37 | 38 | 39 | 40 |
41 |
42 | 43 |
44 | 45 | 46 |
47 | 48 |
49 | 50 | 51 | -------------------------------------------------------------------------------- /GPL16043.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | # The MIT License (MIT) 3 | 4 | # Copyright (c) 2013 Charles Lin 5 | 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | # THE SOFTWARE. 23 | 24 | 25 | #first make temp directory 26 | 27 | #get the zip of the cel files and the name of the zip 28 | CEL_ZIP=$1 29 | NAME=$2 30 | 31 | #CEL_ZIP=/ark/home/cl512/ressrv19/raw/expression/JB20130926st.zip 32 | #NAME=AFFY 33 | ID=$RANDOM 34 | #ID=1234 35 | TEMP_DIR_ROOT=/ark/temp/ 36 | TEMP_DIR=$TEMP_DIR_ROOT$NAME\_tmp\_$ID 37 | INITIAL_DIR=`pwd` 38 | 39 | #making the temp directory 40 | mkdir $TEMP_DIR 41 | 42 | #copy and unzip stuff 43 | cp $CEL_ZIP $TEMP_DIR/$NAME\_tmp_$ID.zip 44 | cd $TEMP_DIR 45 | unzip $TEMP_DIR/$NAME\_tmp_$ID.zip 46 | 47 | #make an analysis output directory 48 | mkdir $TEMP_DIR/output 49 | 50 | #run the spikey normy 51 | R --no-save $TEMP_DIR/ $NAME < $INITIAL_DIR/GPL16043.r 52 | 53 | #run the GPL gene level script 54 | python $INITIAL_DIR/GPL16043.py -i $TEMP_DIR/output/$NAME\_all_mas5_probe_exprs_raw.txt 55 | python $INITIAL_DIR/GPL16043.py -i $TEMP_DIR/output/$NAME\_all_mas5_probe_exprs_norm.txt 56 | 57 | #zip up the output 58 | cd $TEMP_DIR 59 | zip -r $NAME\_output.zip output 60 | 61 | echo "WROTE OUTPUT TO: " $TEMP_DIR/$NAME\_output.zip 62 | -------------------------------------------------------------------------------- /annotation/hg19.chrom.sizes: -------------------------------------------------------------------------------- 1 | chr1 249250621 2 | chr2 243199373 3 | chr3 198022430 4 | chr4 191154276 5 | chr5 180915260 6 | chr6 171115067 7 | chr7 159138663 8 | chrX 155270560 9 | chr8 146364022 10 | chr9 141213431 11 | chr10 135534747 12 | chr11 135006516 13 | chr12 133851895 14 | chr13 115169878 15 | chr14 107349540 16 | chr15 102531392 17 | chr16 90354753 18 | chr17 81195210 19 | chr18 78077248 20 | chr20 63025520 21 | chrY 59373566 22 | chr19 59128983 23 | chr22 51304566 24 | chr21 48129895 25 | chr6_ssto_hap7 4928567 26 | chr6_mcf_hap5 4833398 27 | chr6_cox_hap2 4795371 28 | chr6_mann_hap4 4683263 29 | chr6_apd_hap1 4622290 30 | chr6_qbl_hap6 4611984 31 | chr6_dbb_hap3 4610396 32 | chr17_ctg5_hap1 1680828 33 | chr4_ctg9_hap1 590426 34 | chr1_gl000192_random 547496 35 | chrUn_gl000225 211173 36 | chr4_gl000194_random 191469 37 | chr4_gl000193_random 189789 38 | chr9_gl000200_random 187035 39 | chrUn_gl000222 186861 40 | chrUn_gl000212 186858 41 | chr7_gl000195_random 182896 42 | chrUn_gl000223 180455 43 | chrUn_gl000224 179693 44 | chrUn_gl000219 179198 45 | chr17_gl000205_random 174588 46 | chrUn_gl000215 172545 47 | chrUn_gl000216 172294 48 | chrUn_gl000217 172149 49 | chr9_gl000199_random 169874 50 | chrUn_gl000211 166566 51 | chrUn_gl000213 164239 52 | chrUn_gl000220 161802 53 | chrUn_gl000218 161147 54 | chr19_gl000209_random 159169 55 | chrUn_gl000221 155397 56 | chrUn_gl000214 137718 57 | chrUn_gl000228 129120 58 | chrUn_gl000227 128374 59 | chr1_gl000191_random 106433 60 | chr19_gl000208_random 92689 61 | chr9_gl000198_random 90085 62 | chr17_gl000204_random 81310 63 | chrUn_gl000233 45941 64 | chrUn_gl000237 45867 65 | chrUn_gl000230 43691 66 | chrUn_gl000242 43523 67 | chrUn_gl000243 43341 68 | chrUn_gl000241 42152 69 | chrUn_gl000236 41934 70 | chrUn_gl000240 41933 71 | chr17_gl000206_random 41001 72 | chrUn_gl000232 40652 73 | chrUn_gl000234 40531 74 | chr11_gl000202_random 40103 75 | chrUn_gl000238 39939 76 | chrUn_gl000244 39929 77 | chrUn_gl000248 39786 78 | chr8_gl000196_random 38914 79 | chrUn_gl000249 38502 80 | chrUn_gl000246 38154 81 | chr17_gl000203_random 37498 82 | chr8_gl000197_random 37175 83 | chrUn_gl000245 36651 84 | chrUn_gl000247 36422 85 | chr9_gl000201_random 36148 86 | chrUn_gl000235 34474 87 | chrUn_gl000239 33824 88 | chr21_gl000210_random 27682 89 | chrUn_gl000231 27386 90 | chrUn_gl000229 19913 91 | chrM 16571 92 | chrUn_gl000226 15008 93 | chr18_gl000207_random 4262 94 | -------------------------------------------------------------------------------- /pythonTemplate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | #pythonTemplate.py <- change to title of your script 4 | #130801 <- date 5 | #Name 6 | 7 | 8 | #Description: 9 | 10 | #This is a generic python template that has functions from utils.py imported and can be used on CFCE1 11 | 12 | 13 | 14 | #================================================================================ 15 | #=============================DEPENDENCIES======================================= 16 | #================================================================================ 17 | 18 | import sys 19 | 20 | print "Using python version %s" % sys.version 21 | 22 | 23 | #importing utils package 24 | sys.path.append('/home/cl512/src/pipeline/') 25 | import utils 26 | 27 | 28 | 29 | #================================================================================ 30 | #============================GLOBAL PARAMETERS=================================== 31 | #================================================================================ 32 | 33 | #add locations of files and global parameters in this section 34 | 35 | 36 | dataFile ='/location/file.txt' 37 | genome = 'hg18' 38 | 39 | 40 | #================================================================================ 41 | #===================================CLASSES====================================== 42 | #================================================================================ 43 | 44 | #user defined classes here 45 | 46 | #================================================================================ 47 | #=================================FUNCTIONS====================================== 48 | #================================================================================ 49 | 50 | #write your specific functions here 51 | 52 | 53 | def returnGenome(genome): 54 | 55 | ''' 56 | prints the genome being used 57 | ''' 58 | 59 | print "Using genome %s for analysis" % (genome) 60 | 61 | 62 | 63 | 64 | 65 | #================================================================================ 66 | #===============================MAIN RUN========================================= 67 | #================================================================================ 68 | 69 | 70 | #write the actual script here 71 | 72 | 73 | def main(): 74 | 75 | ''' 76 | this is the main run function for the script 77 | all of the work should occur here, but no functions should be defined here 78 | ''' 79 | 80 | 81 | returnGenome(genome) 82 | 83 | 84 | 85 | 86 | 87 | 88 | main() 89 | 90 | -------------------------------------------------------------------------------- /docs/bamliquidator-packaging.md: -------------------------------------------------------------------------------- 1 | # Bamliquidator Packaging 2 | 3 | ### Overview 4 | 5 | * `make dput` needs to be run from a proper environment to update the bamliquidator ppa version 6 | * prerequisites in addition to [developer setup](https://github.com/BradnerLab/pipeline/wiki/bamliquidator#check-list) on Ubuntu 18.04: `sudo apt install devscripts python-all python-stdeb dput` 7 | * the Bradner Lab main EC2 instance is configured for packaging with the user `packager` 8 | * you can use the ubuntu ssh key to as packager 9 | * if you need the gpg passphrase, contact jdimatteo@gmail.com 10 | * Ubuntu Launchpad account name is dfbradnerlab@gmail.com 11 | * Upload errors and acknowledgments are emailed to dfbradnerlab@gmail.com 12 | * it takes a few minutes for new versions to be published 13 | * it takes a couple minutes for the upload acknowledgment to be emailed 14 | * next, it takes a couple more minutes to go from status "Pending" to "Published" at https://launchpad.net/~bradner-computation/+archive/ubuntu/pipeline/+packages 15 | 16 | ### Example Updating the Package Version 17 | 18 | ```bash 19 | $ ssh packager@ec2 20 | $ cd pipeline/bamliquidator_internal/ 21 | $ git pull 22 | $ make dput 23 | ``` 24 | 25 | ### Example Testing Package Before Uploading 26 | 27 | ```bash 28 | $ ssh packager@ec2 29 | $ cd pipeline/bamliquidator_internal/ 30 | $ git pull 31 | $ make deb 32 | $ scp deb_dist/python-bamliquidatorbatch_*trusty*.deb bamliquidator_*trusty*.deb ubuntu@some-other-ec2-machine:/home/ubuntu 33 | ``` 34 | 35 | ```bash 36 | $ ssh ubuntu@some-other-ec2-machine 37 | $ sudo apt-get install gdebi-core 38 | $ sudo gdebi python-bamliquidatorbatch*.deb 39 | $ sudo gdebi bamliquidator*.deb 40 | $ # do testing 41 | $ sudo dpkg --remove bamliquidator python-bamliquidatorbatch 42 | ``` 43 | 44 | ### Environment Setup Notes 45 | 46 | 1. Follow the steps on https://github.com/BradnerLab/pipeline/wiki/bamliquidator#check-list 47 | 2. `sudo apt-get install devscripts debhelper gnupg-agent pinentry-curses python-stdeb python-all-dev` 48 | 3. generate a gpg key, e.g. `gpg --gen-key` 49 | 4. register the gpg key, e.g. see https://help.ubuntu.com/community/GnuPrivacyGuardHowto#Uploading_the_key_to_Ubuntu_keyserver 50 | 5. import the gpg key at https://launchpad.net/~bradner-computation/+editpgpkeys 51 | * you will get an email "Launchpad: Confirm your OpenPGP Key" 52 | * copy from "-----BEGIN PGP MESSAGE-----" to "-----END PGP MESSAGE-----" (including the BEGIN/END PGP MESSAGE lines) to a file doc.encrypted on the system with the gpg key 53 | * run `gpg --decrypt doc.encrypted` and enter your passphrase when prompted 54 | * follow the instructions in the decrypted message 55 | 6. to reduce number of times entering the gpg passphrase, configure gpg-agent (e.g. see http://unix.stackexchange.com/questions/46960/how-to-configure-gpg-to-enter-passphrase-only-once-per-session) 56 | -------------------------------------------------------------------------------- /bamPlotExample.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | cd /home/cl512/pipeline/ 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2013 Charles Lin 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | # THE SOFTWARE. 25 | 26 | #bamPlot.py makes publication quality tracks of ChIP-Seq data that can be edited in adobe illustrator 27 | 28 | 29 | #HOW TO USE: 30 | 31 | #1. Make a copy of this script and rename it for each job. 32 | #2. Edit variables in the parameters 33 | #3. cd to folder containing script 34 | #4. run by typing 'bash ./script.sh where script.sh is the name of your script 35 | 36 | 37 | #==================================================== 38 | #====================PARAMETERS====================== 39 | #==================================================== 40 | 41 | #replace BAM1,BAM2 with file paths of sorted bam files 42 | #can add BAM3 and NAME3 etc... using same convention 43 | 44 | BAM1='sample1.sorted.bam' 45 | BAM2='sample2.sorted.bam' 46 | 47 | #Edit the names to give each bam a title 48 | NAME1='sample1' 49 | NAME2='sample2' 50 | 51 | #edit these variables to specify region, genome, title etc... 52 | REGION='chr1:+:1001000-1002000' 53 | GENOME='HG18' 54 | OUTPUT='OUTPUTFOLDER' 55 | TITLE='TITLE' 56 | YAXIS='UNIFORM' #use either UNIFORM or RELATIVE 57 | SENSE='both' #sense of reads plotted. use either 58 | COLOR='255,0,0:0,0,255' #use a colon separated list of RGB colors w/ values from 0 to 255 59 | 60 | 61 | echo 62 | echo Using $BAM1 as BAM1 63 | echo Using $BAM1 as BAM1 64 | echo Calling bamPlot.py on region $REGION in genome $GENOME and directing output to $TITLE in $OUTPUT 65 | echo 66 | echo Running the following command: 67 | #COMMAND 68 | echo python bamPlot.py -b $BAM1,$BAM2 -i $REGION -t $TITLE -y UNIFORM -o $OUTPUT -n $NAME1,$NAME2 -p single -c $COLOR -s $SENSE 69 | python bamPlot.py -b $BAM1,$BAM2 -i $REGION -t $TITLE -y UNIFORM -o $OUTPUT -n $NAME1,$NAME2 -p single -c $COLOR -s $SENSE 70 | -------------------------------------------------------------------------------- /bamliquidator_internal/bamliquidator_util.cpp: -------------------------------------------------------------------------------- 1 | #include "bamliquidator_util.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace 9 | { 10 | std::ofstream log_file; 11 | 12 | bool include_warnings(true); 13 | } 14 | 15 | void Logger::configure(const std::string& log_file_path, bool include_warnings_in_stderr) 16 | { 17 | log_file.open(log_file_path.c_str(), std::ios::app); 18 | include_warnings = include_warnings_in_stderr; 19 | } 20 | 21 | Logger Logger::warn() 22 | { 23 | return Logger("WARNING", include_warnings); 24 | } 25 | 26 | Logger Logger::error() 27 | { 28 | return Logger("ERROR", true); 29 | } 30 | 31 | Logger::Logger(const std::string& a_level, bool a_write_to_stderr): 32 | level(a_level), 33 | write_to_stderr(a_write_to_stderr), 34 | copied(false) 35 | {} 36 | 37 | Logger::~Logger() 38 | { 39 | // rvo should make the copied check unnecessary, but just in case... 40 | if (copied) return; 41 | 42 | try 43 | { 44 | if (write_to_stderr) 45 | { 46 | std::cerr << level << '\t' << ss.str() << std::endl; 47 | } 48 | 49 | std::time_t t = std::time(NULL); 50 | const char datefmt[] = "%Y-%m-%d %H:%M:%S "; 51 | char buffer[sizeof(datefmt)*4]; 52 | const size_t written_bytes = std::strftime(buffer, sizeof(buffer), datefmt, std::localtime(&t)); 53 | 54 | log_file << (written_bytes == 0 ? "" : buffer) << level << '\t' << ss.str() << std::endl; 55 | } 56 | catch(...) {} // don't let destructor throw 57 | } 58 | 59 | Logger::Logger(const Logger& logger): 60 | level(logger.level), 61 | write_to_stderr(logger.write_to_stderr) 62 | { 63 | ss << logger.ss.str(); 64 | logger.copied = true; 65 | } 66 | 67 | /* The MIT License (MIT) 68 | 69 | Copyright (c) 2014 John DiMatteo (jdimatteo@gmail.com) 70 | 71 | Permission is hereby granted, free of charge, to any person obtaining a copy 72 | of this software and associated documentation files (the "Software"), to deal 73 | in the Software without restriction, including without limitation the rights 74 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 75 | copies of the Software, and to permit persons to whom the Software is 76 | furnished to do so, subject to the following conditions: 77 | 78 | The above copyright notice and this permission notice shall be included in 79 | all copies or substantial portions of the Software. 80 | 81 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 82 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 83 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 84 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 85 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 86 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 87 | THE SOFTWARE. 88 | */ 89 | -------------------------------------------------------------------------------- /bamliquidator_internal/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:bionic as builder 2 | 3 | RUN apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get install -y libbam-dev libhdf5-serial-dev libboost-dev \ 4 | libboost-timer-dev libgoogle-perftools-dev libtbb-dev samtools build-essential 5 | 6 | COPY . /opt/liquidator 7 | 8 | WORKDIR /opt/liquidator 9 | RUN make -j6 10 | 11 | 12 | FROM ubuntu:bionic as runner 13 | 14 | RUN apt-get -y update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3 \ 15 | python3-tables python3-scipy \ 16 | && rm -rf /var/lib/apt/lists/* 17 | 18 | WORKDIR /opt/liquidator 19 | COPY --from=builder /usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4 \ 20 | /usr/lib/x86_64-linux-gnu/libtbb.so.2 \ 21 | /usr/lib/x86_64-linux-gnu/libsz.so.2 \ 22 | /usr/lib/x86_64-linux-gnu/libhdf5_serial.so.100 \ 23 | /usr/lib/x86_64-linux-gnu/libhdf5_serial_hl.so.100 \ 24 | /usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.1 \ 25 | /usr/lib/x86_64-linux-gnu/libaec.so.0 \ 26 | /usr/lib/x86_64-linux-gnu/libhts.so.2 \ 27 | /usr/lib/x86_64-linux-gnu/libcurl-gnutls.so.4 \ 28 | /usr/lib/x86_64-linux-gnu/libnghttp2.so.14 \ 29 | /usr/lib/x86_64-linux-gnu/librtmp.so.1 \ 30 | /usr/lib/x86_64-linux-gnu/libpsl.so.5 \ 31 | /usr/lib/x86_64-linux-gnu/libgssapi_krb5.so.2 \ 32 | /usr/lib/x86_64-linux-gnu/libkrb5.so.3 \ 33 | /usr/lib/x86_64-linux-gnu/libk5crypto.so.3 \ 34 | /usr/lib/x86_64-linux-gnu/libkrb5support.so.0 \ 35 | /lib/x86_64-linux-gnu/libkeyutils.so.1 \ 36 | /usr/lib/x86_64-linux-gnu/libldap_r-2.4.so.2 \ 37 | /usr/lib/x86_64-linux-gnu/liblber-2.4.so.2 \ 38 | /usr/lib/x86_64-linux-gnu/libsasl2.so.2 \ 39 | /usr/lib/x86_64-linux-gnu/libgssapi.so.3 \ 40 | /usr/lib/x86_64-linux-gnu/libheimntlm.so.0 \ 41 | /usr/lib/x86_64-linux-gnu/libkrb5.so.26 \ 42 | /usr/lib/x86_64-linux-gnu/libasn1.so.8 \ 43 | /usr/lib/x86_64-linux-gnu/libhcrypto.so.4 \ 44 | /usr/lib/x86_64-linux-gnu/libroken.so.18 \ 45 | /usr/lib/x86_64-linux-gnu/libwind.so.0 \ 46 | /usr/lib/x86_64-linux-gnu/libheimbase.so.1 \ 47 | /usr/lib/x86_64-linux-gnu/libhx509.so.5 \ 48 | /lib/x86_64-linux-gnu/ 49 | COPY --from=builder /usr/bin/samtools /usr/bin/ 50 | COPY --from=builder /opt/liquidator/bamliquidator \ 51 | /opt/liquidator/bamliquidator_bins \ 52 | /opt/liquidator/bamliquidator_regions \ 53 | ./ 54 | COPY --from=builder /opt/liquidator/bamliquidatorbatch /opt/liquidator/bamliquidatorbatch 55 | 56 | ENV PATH="$PATH:/opt/liquidator" 57 | 58 | RUN python3 bamliquidatorbatch/test.py 59 | 60 | ARG GIT_COMMIT 61 | LABEL git_commit=$GIT_COMMIT 62 | 63 | ENTRYPOINT ["/usr/bin/python3","/opt/liquidator/bamliquidatorbatch/bamliquidator_batch.py"] 64 | -------------------------------------------------------------------------------- /dynamicEnhancer_rank.R: -------------------------------------------------------------------------------- 1 | #dynamicEnhancer_rank.R 2 | 3 | #produces a pair of hockey sticks that are red/green labeled 4 | 5 | # The MIT License (MIT) 6 | 7 | # Copyright (c) 2013 Charles Lin 8 | 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | 27 | #enhancerFile ='mergeTest/EC_BRD4_CON_ROSE/HG18_EC_MERGED_SUPERS_-0_+0_0KB_STITCHED_ENHANCER_DELTA_ENHANCER_TO_GENE_100KB_RANK.txt' 28 | 29 | #nSuper1 = 347 30 | #nSuper2 = 271 31 | #=========================================================== 32 | #===============READING IN ARGUMNETS======================== 33 | #=========================================================== 34 | 35 | args <- commandArgs() 36 | 37 | print(args[3:7]) 38 | 39 | enhancerFile = args[3] 40 | 41 | name1 = args[4] 42 | name2 = args[5] 43 | 44 | nSuper1 = as.numeric(args[6]) 45 | nSuper2 = as.numeric(args[7]) 46 | 47 | 48 | 49 | 50 | #=========================================================== 51 | #==================GETTING IN TABLES======================== 52 | #=========================================================== 53 | 54 | enhancerTable = read.delim(enhancerFile) 55 | 56 | name1Supers = which(enhancerTable[,15]<=nSuper1) 57 | name2Supers = which(enhancerTable[,16]<=nSuper2) 58 | 59 | superRows = union(name1Supers,name2Supers) 60 | 61 | conservedSupers = intersect(name1Supers,name2Supers) 62 | 63 | plotName = gsub('RANK','RANK_PLOT',enhancerFile) 64 | plotName = gsub('txt','png',plotName) 65 | 66 | 67 | png(filename=plotName,width = 800,height = 800) 68 | 69 | plot(enhancerTable[name1Supers,15],enhancerTable[name1Supers,16],col='green',pch=16,xlim = c(1,max(enhancerTable[,15:16])),ylim = c(1,max(enhancerTable[15:16])),log='xy',ylab=paste('Rank in',name2),xlab= paste('Rank in',name1)) 70 | points(enhancerTable[name2Supers,15],enhancerTable[name2Supers,16],col='red',pch=16) 71 | 72 | points(enhancerTable[conservedSupers,15],enhancerTable[conservedSupers,16],col='grey',pch=16) 73 | abline(h=nSuper2) 74 | abline(v=nSuper1) 75 | text(1,1,paste(length(conservedSupers),'conserved supers'),pos=4) 76 | 77 | text(1,nSuper1+100,paste(nSuper1,name1,'\nonly supers'),pos=4) 78 | text(nSuper2+100,1,paste(nSuper2,name2,'\nonly supers'),pos=4) 79 | 80 | dev.off() -------------------------------------------------------------------------------- /docs/bamplot-turbo.md: -------------------------------------------------------------------------------- 1 | # Bamplot Turbo 2 | 3 | * [Overview](#Overview) 4 | * [Install](#Install) 5 | * [Usage](#Usage) 6 | * [Examples](#Examples) 7 | * [Plotting a single locus](#single_locus_example) 8 | 9 | # Overview 10 | * bamPlot_turbo is a set of scripts designed to create publication quality vector graphic plots of read density from nextgen sequencing data across specific genomic loci. 11 | * Read data must be in the form of sorted and indexed [BAM](http://samtools.sourceforge.net/) files 12 | * Input regions can either be specified individually e.g. `chr1:+:1-1000` or as a batch via a [.gff](http://genome.ucsc.edu/FAQ/FAQformat.html#format3) file. 13 | * Reference gene annotations are also plotted. (Currently, only HG18,HG19,MM8,MM9 are supported) 14 | * Additional reference regions can be plotted when provided in [.bed](http://genome.ucsc.edu/FAQ/FAQformat.html#format1) format 15 | 16 | # Install 17 | * bamPlot_turbo is part of the bradnerLab pipeline module. Install instructions under construction 18 | 19 | # Usage 20 | * bamPlot_turbo is called from the command line using python ./bamPlot_turbo.py 21 | 22 | ```bash 23 | Usage: bamPlot_turbo.py [options] -g [GENOME] -b [SORTED BAMFILE(S)] -i [INPUTFILE] -o [OUTPUTFOLDER] 24 | 25 | Options: 26 | -h, --help show this help message and exit 27 | -b BAM, --bam=BAM Enter a comma separated list of .bam files to be 28 | processed. 29 | -i INPUT, --input=INPUT 30 | Enter .gff or genomic region e.g. chr1:+:1-1000. 31 | -g GENOME, --genome=GENOME 32 | specify a genome, HG18,HG19,MM8,MM9 are currently 33 | supported 34 | -o OUTPUT, --output=OUTPUT 35 | Enter the output folder. 36 | -c COLOR, --color=COLOR 37 | Enter a colon separated list of colors e.g. 38 | 255,0,0:255,125,0, default samples the rainbow 39 | -s SENSE, --sense=SENSE 40 | Map to '+','-' or 'both' strands. Default maps to 41 | both. 42 | -e EXTENSION, --extension=EXTENSION 43 | Extends reads by n bp. Default value is 200bp 44 | -r, --rpm Normalizes density to reads per million (rpm) Default 45 | is True 46 | -y YSCALE, --yScale=YSCALE 47 | Choose either relative or uniform y axis scaling. 48 | options = 'relative,uniform' Default is relative 49 | scaling 50 | -n NAMES, --names=NAMES 51 | Enter a comma separated list of names for your bams 52 | -p PLOT, --plot=PLOT Choose either all lines on a single plot or multiple 53 | plots. options = 'single,multiple' 54 | -t TITLE, --title=TITLE 55 | Specify a title for the output plot(s), default will 56 | be the coordinate region 57 | --save-temp If flagged will save temporary files made by bamPlot 58 | --bed=BED Add a comma separated list of bam files to plot 59 | ``` 60 | 61 | # Examples 62 | 63 | 64 | #### Single locus plot 65 | * plotting reads from two datasets at a single genomic region 66 | 67 | ```bash 68 | python ./bamPlot_turbo.py -g 'hg18' -b $BAM1,$BAM2 -i 'chr1:+:100900000-100980000' -r -y 'UNIFORM' -t 'VCAM1' --bed $BED1 -o './' 69 | ``` 70 | -------------------------------------------------------------------------------- /bamliquidator_internal/bamliquidator.m.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #include "bamliquidator.h" 8 | 9 | /* The MIT License (MIT) 10 | 11 | Copyright (c) 2013 Xin Zhong and Charles Lin 12 | 13 | Permission is hereby granted, free of charge, to any person obtaining a copy 14 | of this software and associated documentation files (the "Software"), to deal 15 | in the Software without restriction, including without limitation the rights 16 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 | copies of the Software, and to permit persons to whom the Software is 18 | furnished to do so, subject to the following conditions: 19 | 20 | The above copyright notice and this permission notice shall be included in 21 | all copies or substantial portions of the Software. 22 | 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 | THE SOFTWARE. 30 | */ 31 | 32 | int parseArgs(std::string& bamfile, std::string& chromosome, 33 | unsigned int& start, unsigned int& stop, 34 | char& strand, unsigned int& spnum, 35 | unsigned int& extendlen, 36 | const int argc, char* argv[]) 37 | { 38 | if(argc!=8) 39 | { 40 | printf("[ bamliquidator ] output to stdout\n1. bam file (.bai file has to be at same location)\n2. chromosome\n3. start\n4. stop\n5. strand +/-, use dot (.) for both strands\n6. number of summary points\n7. extension length\n\nNote that each summary point is floor((stop-start)/(number of summary points)) long,\nand if it doesn't divide evenly then the range is truncated.\n"); 41 | return 1; 42 | } 43 | 44 | bamfile=argv[1]; 45 | chromosome=argv[2]; 46 | 47 | char* tail=NULL; 48 | start=strtol(argv[3],&tail,10); 49 | if(tail[0]!='\0') 50 | { 51 | fprintf(stderr, "wrong start (%s)\n", argv[3]); 52 | return 1; 53 | } 54 | stop=strtol(argv[4],&tail,10); 55 | if(tail[0]!='\0' || stop<=start) 56 | { 57 | fprintf(stderr, "wrong stop (%s)\n", argv[4]); 58 | return 1; 59 | } 60 | strand=argv[5][0]; 61 | if(strand!='+' && strand!='-' && strand!='.') 62 | { 63 | fputs("wrong strand, must be +/-/.\n",stderr); 64 | return 1; 65 | } 66 | spnum=strtol(argv[6],&tail,10); 67 | if(tail[0]!='\0' || spnum<=0) 68 | { 69 | fprintf(stderr, "wrong spnum (%s)\n", argv[6]); 70 | return 1; 71 | } 72 | extendlen=(unsigned short)strtol(argv[7],&tail,10); 73 | if(tail[0]!='\0') 74 | { 75 | fprintf(stderr, "wrong extension length (%s)\n", argv[7]); 76 | return 1; 77 | } 78 | 79 | return 0; 80 | } 81 | 82 | int main(int argc, char* argv[]) 83 | { 84 | std::string bamfile; 85 | std::string chromosome; 86 | unsigned int start = 0; 87 | unsigned int stop = 0; 88 | char strand = 0; 89 | unsigned int spnum = 0; 90 | unsigned int extendlen = 0; 91 | if (parseArgs(bamfile, chromosome, start, stop, strand, spnum, extendlen, argc, argv) != 0) 92 | { 93 | return 1; 94 | } 95 | 96 | const std::vector counts = liquidate(bamfile, chromosome, start, stop, 97 | strand, spnum, extendlen); 98 | 99 | for(double count : counts) 100 | { 101 | printf("%d\n", (int) count); 102 | } 103 | 104 | return 0; 105 | } 106 | -------------------------------------------------------------------------------- /python_template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2015 Charles Lin 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | ''' 26 | 27 | #pythonTemplate.py <- change to title of your script 28 | #130801 <- date 29 | #Name 30 | 31 | 32 | #Description: 33 | 34 | #This is a generic python template that has functions from utils.py imported and can be used on CFCE1 35 | 36 | 37 | 38 | #================================================================================ 39 | #=============================DEPENDENCIES======================================= 40 | #================================================================================ 41 | 42 | import sys 43 | 44 | print "Using python version %s" % sys.version 45 | 46 | 47 | #importing utils package 48 | sys.path.append('/ark/home/cl512/src/pipeline/') 49 | import utils 50 | 51 | 52 | 53 | #================================================================================ 54 | #============================GLOBAL PARAMETERS=================================== 55 | #================================================================================ 56 | 57 | #add locations of files and global parameters in this section 58 | 59 | 60 | dataFile ='/location/file.txt' 61 | genome = 'hg18' 62 | projectFolder = '/grail/projects/gordon/' 63 | 64 | #================================================================================ 65 | #===================================CLASSES====================================== 66 | #================================================================================ 67 | 68 | #user defined classes here 69 | 70 | #================================================================================ 71 | #=================================FUNCTIONS====================================== 72 | #================================================================================ 73 | 74 | #write your specific functions here 75 | 76 | 77 | def returnGenome(genome): 78 | 79 | ''' 80 | prints the genome being used 81 | ''' 82 | 83 | print "Using genome %s for analysis" % (genome) 84 | 85 | 86 | 87 | 88 | 89 | #================================================================================ 90 | #===============================MAIN RUN========================================= 91 | #================================================================================ 92 | 93 | 94 | #write the actual script here 95 | 96 | 97 | def main(): 98 | 99 | ''' 100 | this is the main run function for the script 101 | all of the work should occur here, but no functions should be defined here 102 | ''' 103 | 104 | returnGenome(genome) 105 | 106 | 107 | 108 | 109 | 110 | main() 111 | 112 | -------------------------------------------------------------------------------- /bamliquidator_internal/bamliquidator.h: -------------------------------------------------------------------------------- 1 | #ifndef PIPELINE_BAMLIQUIDATORINTERNAL_BAMLIQUIDATOR_H 2 | #define PIPELINE_BAMLIQUIDATORINTERNAL_BAMLIQUIDATOR_H 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | /** 10 | * Count the number of reads in a chromosome between start and stop. This function 11 | * is thread safe. This function throws if an error is encountered opening or parsing 12 | * the bamfile with samtools. 13 | * 14 | * @param bamfile the path of the bamfile (which is opened readonly), 15 | * e.g. "../tmp/04032013_D1L57ACXX_4.TTAGGC.hg18.bwt.sorted.bam" 16 | * @param chromsome the chromosome to count on, e.g. "chr1" or "chrX" 17 | * @param start the first base pair index to count on (inclusive), e.g. 0 to start 18 | at the beginning 19 | * @param stop the last base pair index to count on (exclusive), e.g. 247249719 20 | * @param strand '+' for the forward strand, '-' for the reverse strand, and '.' for both 21 | * @param spnum number of summary points, e.g. if 4 with start of 0 and stop of 99, 22 | the returned vector will have four counts, the first for the range 23 | [0, 24], the second for [25, 49], third for [50, 74], and the last for 24 | [75, 99] -- use 1 for a single summary point 25 | * @param extenlen extension length, e.g. 0 is usually used 26 | * 27 | * @return the read counts for the range [start, stop], split into spnum pieces 28 | */ 29 | // todo: why is this a vector of doubles instead of a vector of integers? 30 | std::vector liquidate(const std::string& bamfile, const std::string& chromosome, 31 | unsigned int start, unsigned int stop, 32 | char strand, unsigned int spnum, 33 | unsigned int extendlen); 34 | 35 | /** 36 | * Same as above function, except this is not thread safe (as bamfile/bamidx cannot be 37 | * used simultaneously in different threads). This variant should be preferred when 38 | * looping over many start/stop values for the same bamfile in a single thread, since 39 | * opening the file/index can take more time than the liquidation. 40 | */ 41 | std::vector liquidate(const samfile_t* bamfile, const bam_index_t* bamidx, 42 | const std::string& chromosome, 43 | unsigned int start, unsigned int stop, 44 | char strand, unsigned int spnum, 45 | unsigned int extendlen); 46 | 47 | /* The MIT License (MIT) 48 | 49 | Copyright (c) 2013 Xin Zhong and Charles Lin 50 | 51 | Permission is hereby granted, free of charge, to any person obtaining a copy 52 | of this software and associated documentation files (the "Software"), to deal 53 | in the Software without restriction, including without limitation the rights 54 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 55 | copies of the Software, and to permit persons to whom the Software is 56 | furnished to do so, subject to the following conditions: 57 | 58 | The above copyright notice and this permission notice shall be included in 59 | all copies or substantial portions of the Software. 60 | 61 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 62 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 63 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 64 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 65 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 66 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 67 | THE SOFTWARE. 68 | */ 69 | 70 | #endif // PIPELINE_BAMLIQUIDATORINTERNAL_BAMLIQUIDATOR_H 71 | -------------------------------------------------------------------------------- /bamToGFFExample.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | cd /home/cl512/pipeline/ 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2013 Charles Lin 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | # THE SOFTWARE. 25 | 26 | #bamToGFF_turbo.py quickly identifies read density in specified genomic regions and provides output in 27 | #normalized units of reads per million per basepair 28 | 29 | #only works on sorted bam files 30 | #to sort a bam file 31 | #run these commands 32 | #samtools -sort unsorted.bam sorted. 33 | 34 | 35 | #HOW TO USE: 36 | 37 | #1. Make a copy of this script and rename it for each job. 38 | #2. Edit variables in the parameters 39 | #3. cd to folder containing script 40 | #4. Use only code from 1 example. Use # to comment out lines from a different example 41 | #5. run by typing 'bash ./script.sh where script.sh is the name of your script 42 | 43 | 44 | 45 | 46 | #Example 1 - determining density using fixed bin sizes 47 | 48 | #==================================================== 49 | #====================PARAMETERS====================== 50 | #==================================================== 51 | 52 | #replace BAM1 with file path of a sorted bam file 53 | BAM1='sample1.sorted.bam' 54 | 55 | 56 | 57 | #edit these variables to specify input regions etc... 58 | GFF='regions.gff' #visit https://genome.ucsc.edu/FAQ/FAQformat.html#format3 for a description of gff format 59 | EXTENSION='200' #length each read is extended 60 | OUTPUT='OUTPUT_FILE' 61 | SENSE='both' #sense of reads plotted. use either +,-,both 62 | BINSIZE=50 #size in bp of bins. E.g. a 200bp gff region will be broken up into 4x50bp bins 63 | 64 | 65 | echo 66 | echo Calling bamToGFF_turbo.py using $BAM1 on gff $GFF and directing output to $OUTPUT 67 | echo 68 | #COMMAND 69 | echo Running the following command: 70 | echo python bamToGFF_turbo.py -b $BAM1 -i $GFF -o $OUTPUT -s $SENSE -e $EXTENSION -c $BINSIZE -r 71 | python bamToGFF_turbo.py -b $BAM1 -i $GFF -o $OUTPUT -s $SENSE -e $EXTENSION -c $BINSIZE -r 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | #Example 2 - determining density using variable binning 81 | #In this version, regions are broken up into a fixed number of bins regardless of size 82 | 83 | #==================================================== 84 | #====================PARAMETERS====================== 85 | #==================================================== 86 | 87 | #replace BAM1 with file path of a sorted bam file 88 | BAM1='sample1.sorted.bam' 89 | 90 | 91 | 92 | #edit these variables to specify input regions etc... 93 | GFF='regions.gff' #visit https://genome.ucsc.edu/FAQ/FAQformat.html#format3 for a description of gff format 94 | EXTENSION='200' #length each read is extended 95 | OUTPUT='OUTPUT_FILE' 96 | SENSE='both' #sense of reads plotted. use either +,-,both 97 | NBINS=10 #breaks each region into N bins. E.g. a 2000bp region with NBINS=10 will be broken up into 10 200bp bins 98 | 99 | 100 | echo 101 | echo Calling bamToGFF_turbo.py using $BAM1 on gff $GFF and directing output to $OUTPUT 102 | echo 103 | #COMMAND 104 | echo Running the following command: 105 | echo python bamToGFF_turbo.py -b $BAM1 -i $GFF -o $OUTPUT -s $SENSE -e $EXTENSION -m $NBINS -r 106 | python bamToGFF_turbo.py -b $BAM1 -i $GFF -o $OUTPUT -s $SENSE -e $EXTENSION -m $NBINS -r 107 | 108 | 109 | -------------------------------------------------------------------------------- /hockeysticks/gene.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | HockeyStick Viz 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 24 | 25 |
26 | 27 |
28 | 31 | 36 |
37 | 38 |
39 |
40 | 41 | 42 | 43 | 44 |
45 |
46 | 47 |
48 | 49 | 50 |
51 | 52 |
53 |
54 |
Sorting Options

55 | Rank
56 | Signal
57 | Super
58 |
59 |
60 | 61 | 62 |
63 | 64 | 94 | 95 |
96 | 97 | 98 | -------------------------------------------------------------------------------- /commandline_template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2013 Charles Lin 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | ''' 26 | 27 | #pythonTemplate.py <- change to title of your script 28 | #130801 <- date 29 | #Name 30 | 31 | 32 | #Description: 33 | 34 | #This is a generic python template that has functions from utils.py imported and can be used on CFCE1 35 | 36 | 37 | 38 | #================================================================================ 39 | #=============================DEPENDENCIES======================================= 40 | #================================================================================ 41 | 42 | import sys 43 | 44 | print "Using python version %s" % sys.version 45 | 46 | 47 | #importing utils package 48 | sys.path.append('/ark/home/cl512/src/pipeline/') 49 | import utils 50 | 51 | 52 | 53 | #================================================================================ 54 | #============================GLOBAL PARAMETERS=================================== 55 | #================================================================================ 56 | 57 | #add locations of files and global parameters in this section 58 | 59 | 60 | dataFile ='/location/file.txt' 61 | genome = 'hg18' 62 | projectFolder = '/grail/projects/gordon/' 63 | 64 | #================================================================================ 65 | #===================================CLASSES====================================== 66 | #================================================================================ 67 | 68 | #user defined classes here 69 | 70 | #================================================================================ 71 | #=================================FUNCTIONS====================================== 72 | #================================================================================ 73 | 74 | #write your specific functions here 75 | 76 | 77 | def returnGenome(genome): 78 | 79 | ''' 80 | prints the genome being used 81 | ''' 82 | 83 | print "Using genome %s for analysis" % (genome) 84 | 85 | 86 | 87 | 88 | 89 | #================================================================================ 90 | #===============================MAIN RUN========================================= 91 | #================================================================================ 92 | 93 | 94 | #write the actual script here 95 | 96 | 97 | def main(): 98 | 99 | 100 | ''' 101 | main run function 102 | ''' 103 | 104 | from optparse import OptionParser 105 | 106 | usage = "usage: %prog [options] -f [FASTQFILE] -g [GENOME] -u [UNIQUEID] -o [OUTPUTFOLDER]" 107 | parser = OptionParser(usage = usage) 108 | #required flags 109 | parser.add_option("-f","--fastq", dest="fastq",nargs = 1, default=None, 110 | help = "Enter the full path of a fastq file to be mapped") 111 | 112 | 113 | #optional arguments 114 | parser.add_option("-N","--mismatch",dest="mismatchN",nargs =1, default = 0, 115 | help = "Specify 0 or 1 for allowed mismatches") 116 | parser.add_option("-r","--rpm", dest="rpm",action = 'store_true', default=True, 117 | help = "Normalizes density to reads per million (rpm) Default is True") 118 | 119 | 120 | (options,args) = parser.parse_args() 121 | 122 | if not options.fastq: 123 | parser.print_help() 124 | exit() 125 | 126 | 127 | 128 | 129 | if __name__ == "__main__": 130 | main() 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /bamliquidator_internal/bamliquidatorbatch/flattener.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import csv 5 | import os 6 | import tables 7 | 8 | def write_tab(table, file_names, output_directory, log=False): 9 | chromosome_to_file_writer_pair = {} 10 | 11 | columns = [col for col in table.colnames if col != "chromosome"] 12 | columns = [col if col != "file_key" else "file_name" for col in columns] 13 | 14 | for row in table: 15 | chromosome = row["chromosome"].decode() 16 | if chromosome not in chromosome_to_file_writer_pair: 17 | tab_file_path = os.path.join(output_directory, table.name + "_" + chromosome + ".tab") 18 | if log: 19 | print("Writing", tab_file_path) 20 | 21 | tab_file = open(tab_file_path, 'w') 22 | writer = csv.writer(tab_file, delimiter='\t') 23 | writer.writerow(columns) 24 | chromosome_to_file_writer_pair[chromosome] = (tab_file, writer) 25 | else: 26 | _, writer = chromosome_to_file_writer_pair[chromosome] 27 | 28 | # pickup here: translate file_key to file_name 29 | row_list = [] 30 | for col in columns: 31 | if col == "file_name": 32 | row_list.append(file_names[row["file_key"]]) 33 | else: 34 | row_list.append(row[col]) 35 | 36 | writer.writerow(row_list) 37 | 38 | for tab_file, _ in list(chromosome_to_file_writer_pair.values()): 39 | tab_file.close() 40 | 41 | def write_tab_for_all(h5_file, output_directory, log=False): 42 | for table in h5_file.root: 43 | if table.name not in ("files", "file_names"): 44 | write_tab(table, h5_file.root.file_names, output_directory, log) 45 | 46 | def main(): 47 | parser = argparse.ArgumentParser(description='Writes bamliquidator_batch.py hdf5 tables into tab delimited ' 48 | 'text files, one for each chromosome. Note that this is provided as a convenience, but it is hoped that ' 49 | 'the hdf5 files will be used directly since they are much more efficient to work with -- e.g. please see ' 50 | 'http://www.pytables.org/ for easy to use Python APIs and ' 51 | 'http://www.hdfgroup.org/products/java/hdf-java-html/hdfview/ for an easy to use GUI for browsing HDF5 ' 52 | 'files. For more info, please see https://github.com/BradnerLab/pipeline/wiki/bamliquidator .') 53 | parser.add_argument('-t', '--table', default=None, help='the table to write to hdf5, e.g. "region_counts" for ' 54 | 'a regions counts.h5 file, or one of the following for a uniform bins counts.h5 file: "bin_counts", ' 55 | '"normalized_counts", "sorted_summary", or "summary". If none specified flattens every table in the h5 file, ' 56 | 'using the table name as a file prefix.') 57 | parser.add_argument('h5_file', help='the hdf5 file generated by bamliquidator_batch.py') 58 | parser.add_argument('output_directory', help='directory to store the tab files (must already exist)') 59 | args = parser.parse_args() 60 | 61 | h5_file = tables.open_file(args.h5_file, mode = "r") 62 | 63 | log = True 64 | 65 | if args.table: 66 | table = h5_file.get_node("/" + args.table) 67 | write_tab(table, h5_file.root.file_names, args.output_directory, log) 68 | else: 69 | write_tab_for_all(h5_file, args.output_directory, log) 70 | 71 | h5_file.close() 72 | 73 | if __name__ == "__main__": 74 | main() 75 | 76 | ''' 77 | The MIT License (MIT) 78 | 79 | Copyright (c) 2013 John DiMatteo (jdimatteo@gmail.com) 80 | 81 | Permission is hereby granted, free of charge, to any person obtaining a copy 82 | of this software and associated documentation files (the "Software"), to deal 83 | in the Software without restriction, including without limitation the rights 84 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 85 | copies of the Software, and to permit persons to whom the Software is 86 | furnished to do so, subject to the following conditions: 87 | 88 | The above copyright notice and this permission notice shall be included in 89 | all copies or substantial portions of the Software. 90 | 91 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 92 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 93 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 94 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 95 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 96 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 97 | THE SOFTWARE. 98 | ''' 99 | -------------------------------------------------------------------------------- /heatMapOrdered.R: -------------------------------------------------------------------------------- 1 | # The MIT License (MIT) 2 | 3 | # Copyright (c) 2013 Charles Lin 4 | 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | library(graphics) 24 | 25 | 26 | args <- commandArgs() 27 | 28 | print(args[3:9]) 29 | 30 | referenceGFF = args[3] 31 | mappedGFF = args[4] 32 | colorVector = as.numeric(unlist(strsplit(args[5],','))) 33 | color = rgb(colorVector[1],colorVector[2],colorVector[3],maxColorValue=255) 34 | output = args[6] 35 | geneListFile = args[7] 36 | relative = as.numeric(args[8]) 37 | backgroundGFF = args[9] 38 | 39 | 40 | #getting the reference order and color spectrum 41 | referenceData <- read.delim(file=referenceGFF,sep="\t",header=TRUE) 42 | 43 | 44 | if(nchar(geneListFile) > 5){ 45 | geneListTable = read.delim(geneListFile,header=FALSE) 46 | geneList = as.vector(geneListTable[,1])}else{geneList = as.vector(seq(1,nrow(referenceData),1))} 47 | 48 | #loading mappedGFF 49 | mappedData <- read.delim(file=mappedGFF,sep="\t",header=TRUE) 50 | mappedData <- as.matrix(mappedData[geneList,3:ncol(mappedData)]) #remove GENE_ID & locusLine and force to matrix 51 | colnames(mappedData) <- NULL 52 | 53 | #performing background correction 54 | if(backgroundGFF != 'NONE' | nchar(backgroundGFF) > 5){ 55 | 56 | print('PERFORMING BACKGROUND CORRECTION') 57 | backgroundData <- read.delim(file=backgroundGFF,sep="\t",header=TRUE) 58 | backgroundData <- as.matrix(backgroundData[geneList,3:ncol(backgroundData)]) #remove GENE_ID & locusLine and force to matrix 59 | colnames(backgroundData) <- NULL 60 | mappedData = mappedData - backgroundData 61 | mappedData[which(mappedData < 0)] <- 0 62 | } 63 | 64 | 65 | 66 | 67 | referenceData <- as.matrix(referenceData[geneList,3:ncol(referenceData)]) #remove GENE_ID & locusLine and force to matrix 68 | colnames(referenceData) <- NULL 69 | referenceOrder = order(apply(referenceData,1,mean,na.rm=TRUE)) 70 | 71 | 72 | 73 | #if scaling by reference 74 | colorSpectrum <- colorRampPalette(c("white",color))(100) 75 | minValue <- quantile(referenceData,na.rm=TRUE,prob=0.6,names=FALSE) 76 | print('min value is') 77 | print(minValue) 78 | #maxValue <- quantile(referenceData,na.rm=TRUE,prob=0.95,names=FALSE) 79 | maxValue = 3 80 | print('max value is') 81 | print(maxValue) 82 | color_cuts <- seq(minValue,maxValue,length=100) 83 | 84 | #add extreme points and one extra min color 85 | #color_cuts <- c(min(referenceData,na.rm=TRUE), color_cuts,max(referenceData,na.rm=TRUE)) 86 | color_cuts <- c(min(referenceData,na.rm=TRUE), color_cuts,max(5,max(referenceData,na.rm=TRUE))) 87 | colorSpectrum <- c(colorSpectrum[1],colorSpectrum) 88 | 89 | #if relative scaling 90 | if(relative==1){ 91 | colorSpectrum <- colorRampPalette(c("white",color))(100) 92 | minValue <- quantile(mappedData,na.rm=TRUE,prob=0.6,names=FALSE) 93 | print('min value is') 94 | print(minValue) 95 | maxValue <- quantile(mappedData,na.rm=TRUE,prob=0.95,names=FALSE) 96 | print('max value is') 97 | print(maxValue) 98 | color_cuts <- seq(minValue,maxValue,length=100) 99 | 100 | #add extreme points and one extra min color 101 | color_cuts <- c(min(mappedData,na.rm=TRUE), color_cuts,max(mappedData,na.rm=TRUE)) 102 | colorSpectrum <- c(colorSpectrum[1],colorSpectrum) 103 | } 104 | 105 | 106 | #reorder by reference 107 | mappedData <- mappedData[referenceOrder,] 108 | 109 | png(filename = output,width = 480,height = 1600) 110 | layout(matrix(data=c(1,1,1,1,1,2),nrow=1)) 111 | image(1:ncol(mappedData),1:nrow(mappedData),t(mappedData),breaks=color_cuts,col=colorSpectrum,xaxt="n",yaxt="n",xlab="",ylab="") 112 | image(1:2,color_cuts[2:101],t(matrix(data=color_cuts[2:101],ncol=2,nrow=100)),breaks=color_cuts,col=colorSpectrum,xaxt="n",xlab="",ylab="") 113 | dev.off() 114 | -------------------------------------------------------------------------------- /bamliquidator_internal/bamliquidator_util.h: -------------------------------------------------------------------------------- 1 | #ifndef PIPELINE_BAMLIQUIDATORINTERNAL_BAMLIQUIDATOR_UTIL_H 2 | #define PIPELINE_BAMLIQUIDATORINTERNAL_BAMLIQUIDATOR_UTIL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | // todo: use a namespace here 15 | 16 | // copies str to dest 17 | // precondition: dest_size > 0 18 | // postcondition: dest is null terminated, dest isn't overflowed, and any excess in dest is filled with \0 characters 19 | inline void copy(char* dest, const std::string& str, size_t dest_size) 20 | { 21 | // my hdf5/pytables usage seems to require that any excess space in dest be 22 | // filled with null terminators (not random garbage), so strncpy is ideal 23 | strncpy(dest, str.c_str(), dest_size - 1); 24 | dest[dest_size - 1] = '\0'; 25 | } 26 | 27 | // The logger class is intended to be used to match bamliquidator_batch.py logging output style 28 | class Logger 29 | { 30 | public: 31 | // Configures logging. If not configured, logs are just written to stderr. 32 | // If include_warnings_in_stderr is false, also disables all other writing to stderr 33 | // (e.g. so other library call writes to stderr go to /dev/null). 34 | static void configure(const std::string& log_file_path, 35 | bool include_warnings_in_stderr); 36 | 37 | /* e.g. Logger::warn() << "oops " << 123 results in a logged line like the following written to the log file: 38 | * 39 | * 2014-08-05 13:25:06 WARNING oops 123 40 | * 41 | * and if configured to log to stderr, the following line written there as well: 42 | * 43 | * WARNING oops 123 44 | * 45 | * Since copy constructor is private, returned value must be used as an anonymous temporary as in the example. 46 | */ 47 | static Logger warn(); 48 | 49 | /* e.g. Logger::error() << "oops " << 123 results in a logged line like the following written to the log file: 50 | * 51 | * 2014-08-05 13:25:06 ERROR oops 123 52 | * 53 | * and the following written to stderr as well: 54 | * 55 | * ERROR oops 123 56 | * 57 | * Since copy constructor is private, returned value must be used as an anonymous temporary as in the example. 58 | */ 59 | static Logger error(); 60 | 61 | template 62 | Logger& operator<<(const T& v) 63 | { 64 | ss << v; 65 | return *this; 66 | } 67 | 68 | ~Logger(); 69 | 70 | private: 71 | Logger(const std::string& level, bool write_to_stderr); 72 | 73 | // Prevent copy construction so callers of warn() and error() can only use returned object as an anonymous 74 | // temporary with <<, which is appropriate since the actual logging occurs on destruction. 75 | Logger(const Logger& logger); 76 | 77 | Logger& operator=( const Logger& ) = delete; 78 | 79 | const std::string level; 80 | const bool write_to_stderr; 81 | std::stringstream ss; 82 | mutable bool copied; 83 | }; 84 | 85 | inline std::vector> 86 | extract_chromosome_lengths(int argc, char* argv[], int chr1_arg) 87 | { 88 | std::vector> chromosome_lengths; 89 | for (int arg = chr1_arg; arg < argc && arg + 1 < argc; arg += 2) 90 | { 91 | chromosome_lengths.push_back( 92 | std::make_pair(argv[arg], boost::lexical_cast(argv[arg+1]))); 93 | } 94 | return chromosome_lengths; 95 | } 96 | 97 | /* The MIT License (MIT) 98 | 99 | Copyright (c) 2014 John DiMatteo (jdimatteo@gmail.com) 100 | 101 | Permission is hereby granted, free of charge, to any person obtaining a copy 102 | of this software and associated documentation files (the "Software"), to deal 103 | in the Software without restriction, including without limitation the rights 104 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 105 | copies of the Software, and to permit persons to whom the Software is 106 | furnished to do so, subject to the following conditions: 107 | 108 | The above copyright notice and this permission notice shall be included in 109 | all copies or substantial portions of the Software. 110 | 111 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 112 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 113 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 114 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 115 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 116 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 117 | THE SOFTWARE. 118 | */ 119 | 120 | #endif 121 | -------------------------------------------------------------------------------- /hockeysticks/hockeystick.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | HockeyStick Viz 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 27 | 28 | 31 | 32 |
33 | 34 |
35 | 38 | 43 |
44 | 45 |
46 |
47 | 48 | 49 | 50 | 51 |
52 |
53 | 54 |
55 | 56 | 57 |
58 | 59 |
60 | 61 |
62 | 63 |
genes
64 |
65 | 66 | 67 |
68 | 69 | 70 |
71 |
Data Table
72 |
Click on table headers to sort the rows. 73 |
Hovering over a row will highlight the corresponding circle above.
74 | 75 | 76 | 77 | 78 | Export 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 |
SelectDatasetPeak NumberRankSuper?ChromosomeStartEndNearby GenesFunctional Categories
101 | 102 |
103 | 104 |
105 |
106 | 107 |
108 | 109 | 110 |
111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /dynamicEnhancer_plot.R: -------------------------------------------------------------------------------- 1 | #dynamicEnhancer_plot.R 2 | 3 | #produces a ranked table from the region map table 4 | 5 | # The MIT License (MIT) 6 | 7 | # Copyright (c) 2013 Charles Lin 8 | 9 | # Permission is hereby granted, free of charge, to any person obtaining a copy 10 | # of this software and associated documentation files (the "Software"), to deal 11 | # in the Software without restriction, including without limitation the rights 12 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | # copies of the Software, and to permit persons to whom the Software is 14 | # furnished to do so, subject to the following conditions: 15 | 16 | # The above copyright notice and this permission notice shall be included in 17 | # all copies or substantial portions of the Software. 18 | 19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | # THE SOFTWARE. 26 | 27 | 28 | #=========================================================== 29 | #======================DEBUG PARAM========================== 30 | #=========================================================== 31 | #name1 = 'EC_CON_BRD4' 32 | #name2 = 'EC_TNF_BRD4' 33 | #stitchedFile = 'mergeTest/EC_BRD4_CON_ROSE/HG18_EC_MERGED_SUPERS_-0_+0_0KB_STITCHED_ENHANCER_REGION_MAP.txt' 34 | #outFile = gsub('REGION_MAP','DELTA',stitchedFile) 35 | 36 | #setwd('/Volumes/bradnerlab/projects/athero') 37 | 38 | 39 | #stitched_regions= read.delim('mergeTest/EC_BRD4_CON_ROSE/HG18_EC_MERGED_SUPERS_-0_+0_0KB_STITCHED_ENHANCER_REGION_MAP.txt',header=TRUE,sep='\t') 40 | 41 | #=========================================================== 42 | #===============READING IN ARGUMNETS======================== 43 | #=========================================================== 44 | 45 | args <- commandArgs() 46 | 47 | print(args[3:7]) 48 | 49 | stitchedFile = args[3] 50 | name1 = args[4] 51 | name2 = args[5] 52 | median1 = as.numeric(args[6]) 53 | median2 = as.numeric(args[7]) 54 | 55 | #=========================================================== 56 | #=================MAKING DELTA TABLE======================== 57 | #=========================================================== 58 | outFile = gsub('REGION_MAP','DELTA',stitchedFile) 59 | stitched_regions = read.delim(stitchedFile,header=TRUE) 60 | if(ncol(stitched_regions) == 10){ 61 | 62 | factor1 = stitched_regions[,7] - stitched_regions[,8] 63 | factor2 = stitched_regions[,9] - stitched_regions[,10] 64 | }else{ 65 | 66 | factor1 = stitched_regions[,7] 67 | factor2 = stitched_regions[,8] 68 | } 69 | 70 | 71 | 72 | #prevent negative values and 0 division 73 | factor1[factor1 <= 1] <- 1 74 | factor2[factor2 <= 1] <- 1 75 | 76 | factor1 = factor1/median1 77 | factor2 = factor2/median2 78 | 79 | deltaFactor = log2(factor2/factor1) 80 | 81 | deltaFactor[deltaFactor < -8] <- -8 82 | deltaFactor[deltaFactor > 8] <- 8 83 | 84 | deltaOrder = order(deltaFactor,decreasing=TRUE) 85 | 86 | 87 | #write the new table 88 | newTable = cbind(stitched_regions[deltaOrder,1:6],deltaFactor[deltaOrder],factor1[deltaOrder],factor2[deltaOrder],1:length(deltaFactor),1) 89 | colnames(newTable)[7:11] = c(paste('LOG2_',name2,'_VS_',name1,'_CHANGE',sep=''),name1,name2,'RANK','IS_SUPER') 90 | 91 | write.table(newTable,outFile,quote=FALSE,sep='\t',row.names=FALSE) 92 | 93 | 94 | #=========================================================== 95 | #===============PLOTTING WATERFALL========================== 96 | #=========================================================== 97 | 98 | 99 | colorSpectrum <- colorRampPalette(c("green","black","black","red"))(100) 100 | 101 | #setting a color data range 102 | minValue <- -1.5 103 | maxValue <- 1.5 104 | color_cuts <- seq(minValue,maxValue,length=100) 105 | color_cuts <- c(min(deltaFactor,na.rm=TRUE), color_cuts,max(deltaFactor,na.rm=TRUE)) 106 | 107 | 108 | #add one extra min color to even out sampling 109 | colorSpectrum <- c(colorSpectrum[1],colorSpectrum[1],colorSpectrum) 110 | 111 | colorVector = c() 112 | for(i in deltaOrder){ 113 | delta = deltaFactor[i] 114 | color = colorSpectrum[max(which(color_cuts <= delta))] 115 | colorVector =c(colorVector,color) 116 | 117 | 118 | 119 | } 120 | 121 | outPlot = gsub('txt','pdf',outFile) 122 | pdf(file=outPlot,width =10,height=5) 123 | plot(1:length(deltaFactor),deltaFactor[deltaOrder],ylim =c(-1.2*max(abs(deltaFactor)),1.2*max(abs(deltaFactor))),type='l',lwd=2,ylab=paste('Log2 change in signal ',name2,' over ',name1,sep=''),xlab=paste('Super-enhancer regions in either ',name1,' or ',name2,sep='')) 124 | lines(1:length(deltaFactor),deltaFactor[deltaOrder],type='h',lwd=3,col=colorVector) 125 | dev.off() 126 | -------------------------------------------------------------------------------- /RNA_SEQ_PIPELINE_README.txt: -------------------------------------------------------------------------------- 1 | #RNA-SEQ PIPELINE README 2 | 3 | ‘’’ 4 | Description of processing steps and output steps in Bradner Lab RNA-SEQ pipeline 5 | ‘’’ 6 | I. Summary 7 | II. Input Data 8 | III. Normalization and Gene Expression Quantification 9 | IV. Data Files 10 | V. Quality Control 11 | VI. Pairwise Analysis 12 | 13 | I.Summary: 14 | 15 | This pipeline takes transcriptome aligned RNA-Seq data and quantifies common gene name level expression values with or without cell count normalized ERCC spike-in addition. Additional replicate quality control metrics as well as pair-wise sample comparisons are performed. 16 | 17 | II. Input Data: 18 | 19 | Currently pipeline supports any plain text input table with header where the first column is a gene name and subsequent columns are individual samples (e.g. genes.fpkm.table out table from Cufflinks). Units are in coverage and transcript length normalized units (e.g. FPKM or RPKM). 20 | 21 | A name for the project is used in the analysis and provides the prefix for all output files (replacing * in the subsequent text) 22 | 23 | Sample names must reflect a group structure (e.g., A_1, A_2, B_1, B_2) where all samples in a group share a prefix. These will be treated as replicates (A_1 and A_2) and (B_1 and B_2). 24 | 25 | III. Normalization and Gene Expression Quantification: 26 | 27 | 1. To aid in transformations and normalization, a lower bound of 0.01 is set on all expression levels 28 | 2. If ERCC spike-ins are added, a loess normalization is applied using all ERCC probes as a subset. The file ERCC_Controls_Analysis.txt contains information about the ERCC mix including individual spike-in concentrations. 29 | 3. Expression values are then filtered based on expression level, keeping only genes with an expression >1 FPKM in at least one sample. 30 | 4. Mean expression values are summarized across all genes across group. 31 | 32 | IV. Data Files: 33 | 34 | 1. *_all_fpkm_exprs_raw.txt - This is the raw input 35 | 2. *_all_fpkm_exprs_norm.txt - If ERCC normalizations are used, this is cell count normalized data 36 | 3. *_all_fpkm_means.txt - Mean summarized expression data across groups (if ERCC spike-ins are used, then derived from *_all_fpkm_exprs_norm.txt) 37 | 4. *_exprs_fpkm_means.txt - Mean summarized expression data across groups filtered for expressed genes (if ERCC spike-ins are used, then derived from *_all_fpkm_exprs_norm.txt) 38 | 39 | V. Quality Control: 40 | 41 | 1. *_all_fpkm_exprs_raw_scatter.png - all sample pairwise scatter plots of raw expression data 42 | 2. *_all_fpkm_exprs_norm_scatter.png - all sample pairwise scatter plots of ERCC normalized expression data 43 | 3. *_spike_raw.pdf - scatter plot showing expression levels of ERCC spike-ins (y-axis) versus their input concentration (x-axis). A loess regression for each sample is plotted as a line. This should illustrate differences in sample level spike-in abundance. 44 | 4. *_spike_norm.pdf - scatter plot showing expression levels of ERCC spike-ins (y-axis) versus their input concentration (x-axis) post Loess Normalization. A loess regression for each sample is plotted as a line. If normalization is performed correctly, all lines should overlap. 45 | 5. *_exprs_boxplot.pdf - Boxplots showing the distribution of all expression values in each sample for raw data (left) or ERCC cell count normalized data (right). Helpful in quickly determining if spike-ins were added in equivalent amoutns to replicates and in identifying global changes in gene expression. 46 | 6. *_replicate correlations.pdf - Replicate scatter plots of normalized expression values filtered by expressed genes. All pairwise comparisons of replicates are plotted by group. Helpful in quickly identifying poor quality samples. 47 | 48 | VI. Pairwise Analysis: 49 | 50 | For each pairwise comparison of groups (e.g. group A vs. group B), the following outputs are produced. 51 | 52 | 1. *_A_vs_B_exprs_matrix.txt - Table of all expressed genes with the mean expression in group A and B, the log2 fold change, and the p-Value of their difference by a two-tailed t-test. 53 | 2. *_A_vs_B.cls and *_A_vs_B.gct - GSEA formatted input files (.cls and .gct) to perform leading edge analysis on any pairwise comparison 54 | 3. *_A_vs_B.pdf - Multipage analysis summary to identify differential gene expression and global changes in gene expression. 55 | i. Volcano scatter plot of all expressed genes with the log2 fold change in expression (x-axis) plotted vs. the significance of the difference in gene expression (y-axis). Genes that exceed a log2 1 fold change and a p-value of 0.05 are colored blue and red. For each direction, top 10 most differential genes in terms of magnitude of fold change and significance are highlighted. 56 | ii. Scatter plot of expressed genes in A (y-axis) vs. B (x-axis). Differential genes from same criteria as i are colored 57 | iii. Waterfall plot of all genes ranked by fold change in A vs. B. No significance criteria is applied here. 58 | iv. Rank ordered plots of gene expression to detect global change in gene expression or normalization artifacts. Left: Genes are ranked by expression in A and plotted (A: grey, B: red). A loess regression is shown for expression values in B (red line). Right: the opposite pairwise comparison. Genes are ranked by expression in B and plotted (B: grey, A: red). A loess regression is shown for expression values in A (red line). 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /docs/collaboration-workflow.md: -------------------------------------------------------------------------------- 1 | # Collaborative Workflow 2 | 3 | ![open_source_collaboration_workflow.png](http://jdimatteo.github.io/images/open_source_collaboration_workflow.png) 4 | 5 | ### Objectives 6 | 7 | * stable and usable master branch 8 | * code review and testing so that master branch code is understandable and of high quality 9 | * someone other than original developer verifying test plan passes 10 | * as simple a a process as could possibly work (with the understanding that it can be made more complicated as needed) 11 | 12 | ### Example Hello World github usage 13 | 14 | Bar will update hello.py, and Foo will code review it. 15 | 16 | 1. issue created with objective 17 | * e.g. Foo opens the [HelloWorld github repo page](https://github.com/FooBar11112222/HelloWorld), clicks [Issues](https://github.com/FooBar11112222/HelloWorld/issues), and then [New Issue](https://github.com/FooBar11112222/HelloWorld/issues/new) 18 | ![NewIssue.png](http://jdimatteo.github.io/images/NewIssue.png) 19 | 2. issue assignee creates feature branch or fork from master (branch preferred) 20 | * e.g. Bar [creates a branch](https://github.com/blog/1377-create-and-delete-branches) 21 | * ![CreateBranch.png](http://jdimatteo.github.io/images/CreateBranch.png) 22 | 3. objective completed in branch 23 | * clone the branch, e.g. `$ git clone -b greet-user https://github.com/FooBar11112222/HelloWorld.git` 24 | * develop code, pushing regularly to branch, e.g. `$ git commit -a -m "Added name prompt, for #1"; git push` 25 | * note that if you [mention](https://github.com/blog/957-introducing-issue-mentions) the issue number followed by a # in the commit message like above, a comment will automatically be added to the issue with a link to the commit 26 | * add test plan, verify test plan passing 27 | * ![TestPlanAdded.png](http://jdimatteo.github.io/images/TestPlanAdded.png) 28 | * ideally also add automated test so that it will continue to automatically be tested in the future (e.g. a diff test, like [the Bam_Liquidator_Bin_Counter_Test Jenkins test](http://tod.dfci.harvard.edu/jenkins/job/Bam_Liquidator_Bin_Counter_Test/), which runs automatically on every commit, and verifies that the counts have not changed since a prior run's calculated counts for a specific bam file) 29 | 4. merge request created 30 | * click the green "Compare & review" button 31 | * ![PullRequestButton.png](http://jdimatteo.github.io/images/PullRequestButton.png) 32 | * click the "Click to create a pull request for this comparison" header button 33 | * ![CreatePullRequestHeaderButton.png](http://jdimatteo.github.io/images/CreatePullRequestHeaderButton.png) 34 | * Update the pull request as you see fit, then click the "Send pull request" button 35 | * ![SendPullRequestButton.png](http://jdimatteo.github.io/images/SendPullRequestButton.png) 36 | 5. peer code review 37 | * Foo opens the pull request and reviews the code changes under the "Files Changed" tab 38 | * ![FilesChanged.png](http://jdimatteo.github.io/images/FilesChanged.png) 39 | * Foo adds a comment to the diff: 40 | * ![Feedback.png](http://jdimatteo.github.io/images/Feedback.png) 41 | 6. assignee responds to peer comments 42 | * Bar reads the reviewer comments, and pushes commits to the greet-user branch 43 | * github automatically adds those commits to the pull request 44 | * Foo reviews the changes, and signs off on it 45 | * ![GoAhead.png](http://jdimatteo.github.io/images/GoAhead.png) 46 | 7. assignee merges branch with master 47 | * Bar clicks the "Merge pull request" button 48 | * ![MergePullRequest.png](http://jdimatteo.github.io/images/MergePullRequest.png) 49 | * if the pull request can't be merged automatically due to conflicts, read [documentation](https://help.github.com/articles/resolving-a-merge-conflict-from-the-command-line/) or ask another developer for help 50 | * Bar clicks the "Confirm merge" button 51 | * the pull request is now marked as merged 52 | * Bar clicks the "Delete branch" button 53 | * Note that all branch history is preserved, deleting it just indicates that it is no longer being developed 54 | * Bar can go ahead and close the issue 55 | 8. Jenkins GUI wrapper jobs (and anything else using master) automatically uses updated master 56 | 57 | ### Prior Drafts 58 | 59 | * initial diagram, more complicated than current draft, with stable master but named release branch for a dedicated testing phase: 60 | ![open_source_collaboration_workflow_with_release_phase.png](http://jdimatteo.github.io/images/open_source_collaboration_workflow_with_release_phase.png) 61 | 62 | ### References 63 | 64 | * code reviews: http://blog.codinghorror.com/code-reviews-just-do-it/ 65 | * git: 66 | * http://scottchacon.com/2011/08/31/github-flow.html 67 | * https://github.com/blog/1557-github-flow-in-the-browser 68 | * https://help.github.com/articles/using-pull-requests 69 | * https://github.com/blog/1377-create-and-delete-branches 70 | * https://github.com/blog/957-introducing-issue-mentions 71 | * https://help.github.com/articles/how-do-i-set-up-a-team 72 | * http://nvie.com/posts/a-successful-git-branching-model/ 73 | * https://help.github.com/articles/resolving-a-merge-conflict-from-the-command-line/ 74 | * https://git-scm.com/book/en/v2/Git-Branching-Basic-Branching-and-Merging#Basic-Merge-Conflicts 75 | * [Version Control with Git](http://www.amazon.com/Version-Control-Git-collaborative-development/dp/1449316387) 76 | -------------------------------------------------------------------------------- /hockeysticks/js/clustering.js: -------------------------------------------------------------------------------- 1 | //Angela Fan 2 | 3 | var margin = {top: 50, right: 50, bottom: 50, left: 50}, 4 | width = 720, 5 | height = 720; 6 | 7 | var x = d3.scale.ordinal().rangeBands([0, width]), 8 | z = d3.scale.linear().domain([0, 4]).clamp(true), 9 | c = d3.scale.category10().domain(d3.range(10)); 10 | 11 | var svg = d3.select("body").append("svg") 12 | .attr("width", width + margin.left + margin.right) 13 | .attr("height", height + margin.top + margin.bottom) 14 | .style("margin-left", -margin.left + "px") 15 | .append("g") 16 | .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); 17 | 18 | d3.csv("/Documents/Bradner_work/hockey-sticks/corr.csv", function(data) { 19 | 20 | //console.log(data) 21 | 22 | d = {nodes: [], links: []} 23 | 24 | for(var k in data[0]) d.nodes.push(k); 25 | 26 | console.log(d) 27 | 28 | data.forEach(function(d) { 29 | 30 | //d.nodes.append("") 31 | }) 32 | // var matrix = [], 33 | // nodes = miserables.nodes, 34 | // n = nodes.length; 35 | 36 | // // Compute index per node. 37 | // nodes.forEach(function(node, i) { 38 | // node.index = i; 39 | // node.count = 0; 40 | // matrix[i] = d3.range(n).map(function(j) { return {x: j, y: i, z: 0}; }); 41 | // }); 42 | 43 | // // Convert links to matrix; count character occurrences. 44 | // miserables.links.forEach(function(link) { 45 | // matrix[link.source][link.target].z += link.value; 46 | // matrix[link.target][link.source].z += link.value; 47 | // matrix[link.source][link.source].z += link.value; 48 | // matrix[link.target][link.target].z += link.value; 49 | // nodes[link.source].count += link.value; 50 | // nodes[link.target].count += link.value; 51 | // }); 52 | 53 | // // Precompute the orders. 54 | // var orders = { 55 | // name: d3.range(n).sort(function(a, b) { return d3.ascending(nodes[a].name, nodes[b].name); }), 56 | // count: d3.range(n).sort(function(a, b) { return nodes[b].count - nodes[a].count; }), 57 | // group: d3.range(n).sort(function(a, b) { return nodes[b].group - nodes[a].group; }) 58 | // }; 59 | 60 | // // The default sort order. 61 | // x.domain(orders.name); 62 | 63 | // svg.append("rect") 64 | // .attr("class", "background") 65 | // .attr("width", width) 66 | // .attr("height", height); 67 | 68 | // var row = svg.selectAll(".row") 69 | // .data(matrix) 70 | // .enter().append("g") 71 | // .attr("class", "row") 72 | // .attr("transform", function(d, i) { return "translate(0," + x(i) + ")"; }) 73 | // .each(row); 74 | 75 | // row.append("line") 76 | // .attr("x2", width); 77 | 78 | // row.append("text") 79 | // .attr("x", -6) 80 | // .attr("y", x.rangeBand() / 2) 81 | // .attr("dy", ".32em") 82 | // .attr("text-anchor", "end") 83 | // .text(function(d, i) { return nodes[i].name; }); 84 | 85 | // var column = svg.selectAll(".column") 86 | // .data(matrix) 87 | // .enter().append("g") 88 | // .attr("class", "column") 89 | // .attr("transform", function(d, i) { return "translate(" + x(i) + ")rotate(-90)"; }); 90 | 91 | // column.append("line") 92 | // .attr("x1", -width); 93 | 94 | // column.append("text") 95 | // .attr("x", 6) 96 | // .attr("y", x.rangeBand() / 2) 97 | // .attr("dy", ".32em") 98 | // .attr("text-anchor", "start") 99 | // .text(function(d, i) { return nodes[i].name; }); 100 | 101 | function row(row) { 102 | var cell = d3.select(this).selectAll(".cell") 103 | .data(row.filter(function(d) { return d.z; })) 104 | .enter().append("rect") 105 | .attr("class", "cell") 106 | .attr("x", function(d) { return x(d.x); }) 107 | .attr("width", x.rangeBand()) 108 | .attr("height", x.rangeBand()) 109 | .style("fill-opacity", function(d) { return z(d.z); }) 110 | .style("fill", function(d) { return nodes[d.x].group == nodes[d.y].group ? c(nodes[d.x].group) : null; }) 111 | .on("mouseover", mouseover) 112 | .on("mouseout", mouseout); 113 | } 114 | 115 | function mouseover(p) { 116 | d3.selectAll(".row text").classed("active", function(d, i) { return i == p.y; }); 117 | d3.selectAll(".column text").classed("active", function(d, i) { return i == p.x; }); 118 | } 119 | 120 | function mouseout() { 121 | d3.selectAll("text").classed("active", false); 122 | } 123 | 124 | d3.select("#order").on("change", function() { 125 | clearTimeout(timeout); 126 | order(this.value); 127 | }); 128 | 129 | function order(value) { 130 | x.domain(orders[value]); 131 | 132 | var t = svg.transition().duration(2500); 133 | 134 | t.selectAll(".row") 135 | .delay(function(d, i) { return x(i) * 4; }) 136 | .attr("transform", function(d, i) { return "translate(0," + x(i) + ")"; }) 137 | .selectAll(".cell") 138 | .delay(function(d) { return x(d.x) * 4; }) 139 | .attr("x", function(d) { return x(d.x); }); 140 | 141 | t.selectAll(".column") 142 | .delay(function(d, i) { return x(i) * 4; }) 143 | .attr("transform", function(d, i) { return "translate(" + x(i) + ")rotate(-90)"; }); 144 | } 145 | 146 | // var timeout = setTimeout(function() { 147 | // order("group"); 148 | // d3.select("#order").property("selectedIndex", 2).node().focus(); 149 | // }, 5000); 150 | }); -------------------------------------------------------------------------------- /bamTableUpdate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | ''' 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2013 Charles Lin 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | ''' 26 | 27 | #bamTableUpdate.py 28 | #131216 29 | #Jaime 30 | 31 | 32 | #Description: 33 | 34 | #when called updates the /grail/projects/masterBamTable.txt 35 | 36 | 37 | 38 | #================================================================================ 39 | #=============================DEPENDENCIES======================================= 40 | #================================================================================ 41 | 42 | import sys 43 | import subprocess 44 | import string 45 | 46 | #print "Using python version %s" % sys.version 47 | 48 | 49 | #importing utils package 50 | sys.path.append('/home/cl512/src/pipeline/') 51 | import utils 52 | 53 | 54 | 55 | #================================================================================ 56 | #============================GLOBAL PARAMETERS=================================== 57 | #================================================================================ 58 | 59 | #add locations of files and global parameters in this section 60 | 61 | 62 | dataFile ='/location/file.txt' 63 | genome = 'hg18' 64 | 65 | 66 | #================================================================================ 67 | #===================================CLASSES====================================== 68 | #================================================================================ 69 | 70 | #user defined classes here 71 | 72 | #================================================================================ 73 | #=================================FUNCTIONS====================================== 74 | #================================================================================ 75 | 76 | #write your specific functions here 77 | 78 | 79 | def getUniqueIDList(): 80 | 81 | ''' 82 | function that gets all uniqueIDs 83 | ''' 84 | 85 | cmd = "mysql -u youngcompread --password='Fg78$Dr' -e 'SELECT uniqueID FROM baseExp' seqDB" 86 | 87 | sqlOut = subprocess.Popen(cmd,stdin = subprocess.PIPE,stderr = subprocess.PIPE,stdout = subprocess.PIPE,shell = True) 88 | 89 | sqlText = sqlOut.communicate() 90 | sqlText =sqlText[0] 91 | uniqueIDList =sqlText.split('\n')[1:-1] 92 | return uniqueIDList 93 | 94 | 95 | 96 | def getTonyInfo(uniqueIDList,colList): 97 | 98 | ''' 99 | pass this a uniqueID List and a list of columns 100 | 101 | ''' 102 | 103 | uniqueIDString = string.join(uniqueIDList,',') 104 | 105 | columnString = string.join([str(x) for x in colList],',') 106 | 107 | cmd = "perl /ark/tony/admin/getDB_Data.pl -i %s -c %s -o TAB" % (uniqueIDString,columnString) 108 | 109 | sqlOut = subprocess.Popen(cmd,stdin = subprocess.PIPE,stderr = subprocess.PIPE,stdout = subprocess.PIPE,shell = True) 110 | 111 | sqlText = sqlOut.communicate() 112 | 113 | sqlText = sqlText[0] 114 | 115 | sqlTable = sqlText.split('\n') 116 | sqlTable = [x for x in sqlTable if len(x) > 0] 117 | 118 | sqlTable = [x.split('\t') for x in sqlTable] 119 | 120 | header = [x.split(':')[-1] for x in sqlTable[0][1:]] 121 | header= [str.upper(x) for x in header] 122 | header = ['GENOME', 'SOURCE', 'CELL_TYPE', 'NAME', 'BAMFILE'] 123 | tonyDict = {} 124 | for line in sqlTable[1:]: 125 | uniqueID = line[0] 126 | tonyDict[uniqueID] = {} 127 | for i in range(len(header)): 128 | tonyDict[uniqueID][header[i]] = line[(i+1)] 129 | newTable = [] 130 | newTable.append(header) 131 | 132 | for key in tonyDict.keys(): 133 | newLine = [] 134 | newLine.append(str.upper(tonyDict[key]['GENOME'])) 135 | newLine.append(tonyDict[key]['SOURCE']) 136 | newLine.append(tonyDict[key]['CELL_TYPE']) 137 | newLine.append(tonyDict[key]['NAME']) 138 | newLine.append(tonyDict[key]['BAMFILE']) 139 | newTable.append(newLine) 140 | 141 | #print newTable 142 | 143 | utils.unParseTable(newTable, '/grail/projects/masterBamTable.txt', '\t') 144 | 145 | 146 | 147 | 148 | #================================================================================ 149 | #===============================MAIN RUN========================================= 150 | #================================================================================ 151 | 152 | #write the actual script here 153 | 154 | 155 | def main(): 156 | 157 | ''' 158 | this is the main run function for the script 159 | all of the work should occur here, but no functions should be defined here 160 | ''' 161 | colList = [48,6,7,3,47] 162 | 163 | #uniqueIDList = ['20130724_73','20130726_83'] 164 | uniqueIDList = getUniqueIDList() 165 | getTonyInfo(uniqueIDList,colList) 166 | 167 | 168 | main() 169 | 170 | -------------------------------------------------------------------------------- /pipeline_template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #pipeline_template.py 3 | 4 | ''' 5 | The MIT License (MIT) 6 | 7 | Copyright (c) 2015 Charles Lin 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | THE SOFTWARE. 26 | ''' 27 | 28 | #generic pipeline template for human data 29 | 30 | 31 | #========================================================================== 32 | #=============================DEPENDENCIES================================= 33 | #========================================================================== 34 | 35 | 36 | import sys 37 | sys.path.append('/ark/home/cl512/src/pipeline/') 38 | 39 | import pipeline_dfci 40 | 41 | #========================================================================== 42 | #============================PARAMETERS==================================== 43 | #========================================================================== 44 | 45 | 46 | 47 | projectName = 'projectName' 48 | dataFile = '/grail/projects/%s/DATA_TABLE.txt' % (projectName) 49 | genome ='hg19' 50 | annotFile = '/ark/home/cl512/src/pipeline/annotation/%s_refseq.ucsc' % (genome) 51 | 52 | #project folders 53 | projectFolder = '/grail/projects/%s/' % (projectName) #PATH TO YOUR PROJECT FOLDER 54 | 55 | #standard folder names 56 | gffFolder ='%sgff/' % (projectFolder) 57 | macsFolder = '%smacsFolder/' % (projectFolder) 58 | macsEnrichedFolder = '%smacsEnriched/' % (projectFolder) 59 | mappedEnrichedFolder = '%smappedEnriched/' % (projectFolder) 60 | mappedFolder = '%smappedFolder/' % (projectFolder) 61 | wiggleFolder = '%swiggles/' % (projectFolder) 62 | metaFolder = '%smeta/' % (projectFolder) 63 | 64 | #making folders 65 | folderList = [gffFolder,macsFolder,macsEnrichedFolder,mappedEnrichedFolder,mappedFolder,wiggleFolder,metaFolder] 66 | 67 | for folder in folderList: 68 | pipeline_dfci.formatFolder(folder,True) 69 | 70 | 71 | 72 | #========================================================================== 73 | #========================FORMATTING SAMPLE TABLE=========================== 74 | #========================================================================== 75 | 76 | ##THIS SECTION CREATES A DATA TABLE FROM A WHITEHEAD ANNOTATION SPREADSHEET 77 | 78 | ##give full path 79 | ##sampleTableFile = 'YOUR_WIGTC_ANNOTATION.xls' #<- the .xls file in the seq data folder provided by WI 80 | 81 | #dirpath = '' <- provide full path of folder containing raw seq files 82 | ##e.g. /ark/home/jr246/raw/130925_..../QualityScore/ 83 | 84 | ##bamPath <- where we store our bams. Must have write access if you want to call bowtie 85 | ##e.g. /ark/home/jr246/bam/ 86 | #bamPath = '/ark/home/jr246/bam/' 87 | 88 | #pipeline_dfci.makePipelineTable(sampleTableFile,dirPath,bamPath,dataFile) 89 | 90 | #dataDict = pipeline_dfci.loadDataTable(dataFile) 91 | 92 | #namesList = dataDict.keys() 93 | 94 | #print(namesList) 95 | 96 | #========================================================================== 97 | #=======================LOADING DATA ANNOTATION============================ 98 | #========================================================================== 99 | 100 | ##THIS SECTION LOADS A DATA TABLE. MUST BE UNCOMMENTED FOR REST OF CODE TO WORK 101 | 102 | 103 | #LOADING THE DATA TABLE 104 | dataDict = pipeline_dfci.loadDataTable(dataFile) 105 | print(dataDict.keys()) 106 | 107 | pipeline_dfci.summary(dataFile) 108 | 109 | #========================================================================== 110 | #==========================CALLING BOWTIE================================== 111 | #========================================================================== 112 | 113 | ##THIS SECTION CALLS BOWTIE ON RAW READ FILES TO GENERATE SORTED AND INDEXED BAMS IN THE BAM FOLDER 114 | 115 | 116 | #namesList = [] <- fill this in if you want to only map a subset of the data. otherwise leave blank 117 | 118 | ##SET LAUNCH TO False to debug 119 | #pipeline_dfci.makeBowtieBashJobs(dataFile,namesList,launch=True) 120 | 121 | #========================================================================== 122 | #=============================CALL MACS==================================== 123 | #========================================================================== 124 | 125 | ##THIS SECTION CALLS THE MACS ERROR MODEL 126 | 127 | 128 | #namesList = dataDict.keys() 129 | 130 | #print(namesList) 131 | #pipeline_dfci.callMacs(dataFile,macsFolder,namesList,overwrite=False,pvalue='1e-9') 132 | 133 | 134 | #========================================================================== 135 | #=======================FORMAT MACS OUTPUT================================= 136 | #========================================================================== 137 | 138 | ##THIS SECTION FORMATS THE OUTPUT FROM MACS, CREATES THE MACSENRICHED FOLDER AND MOVES WIGGLES TO THE DESTINATION 139 | 140 | #pipeline_dfci.formatMacsOutput(dataFile,macsFolder,macsEnrichedFolder,wiggleFolder,wigLink='/ark/wiggles/') 141 | 142 | 143 | #========================================================================== 144 | #====================ADDITIONAL PIPELINE ANALYSIS========================== 145 | #========================================================================== 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /bamPlot.R: -------------------------------------------------------------------------------- 1 | #setwd('/Volumes/young_ata4/myc_111311/bamPlot/') 2 | library(graphics) 3 | 4 | # The MIT License (MIT) 5 | 6 | # Copyright (c) 2013 Charles Lin 7 | 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | 15 | # The above copyright notice and this permission notice shall be included in 16 | # all copies or substantial portions of the Software. 17 | 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | # THE SOFTWARE. 25 | 26 | 27 | 28 | args <- commandArgs() 29 | 30 | print(args[3:7]) 31 | 32 | 33 | nameTable = read.delim(args[3],header=FALSE) 34 | diagramTable = read.delim(args[4],header=FALSE) 35 | plotTable = read.delim(args[5]) 36 | yScale = args[6] 37 | plotStyle = args[7] 38 | fileName = args[8] 39 | 40 | plotHeight = (nrow(plotTable)+1)*3 41 | 42 | nameFile = unlist(strsplit(args[3],'/')) 43 | outFolder = paste(as.vector(nameFile[2:(length(nameFile)-1)]),collapse='/') 44 | inputName = nameFile[length(nameFile)] 45 | start = unlist(strsplit(inputName,'_'))[3] 46 | sense = unlist(strsplit(inputName,'_'))[2] 47 | end = unlist(strsplit(inputName,'_'))[4] 48 | chrom = unlist(strsplit(inputName,'_'))[1] 49 | 50 | print('fileName is') 51 | print(fileName) 52 | print(length(fileName)) 53 | if(nchar(fileName) > 1){ 54 | outputName = fileName}else{outputName = paste(chrom,sense,start,end,'plot.pdf',sep='_')} 55 | outFile = paste('',outFolder,outputName,sep='/') 56 | 57 | print(outFile) 58 | 59 | 60 | 61 | 62 | 63 | 64 | nBins = length(plotTable[1,])-7 65 | yMinDiagram = min(diagramTable[,2]-3) 66 | 67 | #first bring in the colors 68 | colorVector = c() 69 | for(i in 1:nrow(plotTable)){ 70 | color = rgb(plotTable[i,5],plotTable[i,6],plotTable[i,7],maxColorValue=255) 71 | colorVector = c(colorVector,color) 72 | } 73 | 74 | 75 | #do the plotting 76 | pdf(file=outFile,width = 8.5,height =plotHeight) 77 | 78 | if(plotStyle == 'SINGLE'){ 79 | m = matrix(c(2,2,2,2,2,2,2,2,1,1,1),nrow=11,ncol=8) 80 | layout(m) 81 | #plotting the diagram 82 | plot(0,0,xlim = c(0,nBins),ylim = c(yMinDiagram,2),col=rgb(1,1,1),xaxt='n',yaxt='n',ylab='',xlab='',main ='') 83 | for(i in 2:nrow(diagramTable)){ 84 | rect(diagramTable[i,1],diagramTable[i,2],diagramTable[i,3],diagramTable[i,4],col='black') 85 | 86 | 87 | } 88 | 89 | #plotting the names 90 | for(i in 2:nrow(nameTable)){ 91 | text(nameTable[i,2],nameTable[i,3],nameTable[i,1],cex=1) 92 | } 93 | 94 | #for all on the same plot 95 | 96 | 97 | yMax = 1.2*max(plotTable[1,(8:(nBins+7))]) 98 | 99 | if(yScale =='RELATIVE'){ 100 | color = colorVector[1] 101 | plot(spline(1:nBins,as.numeric(plotTable[1,(8:(nBins+7))]),n=nBins),ylim = c(0.1,yMax),type='l',col= color,lwd=2.5,xaxt='n',yaxt='n',xlab='',ylab='Relative peak heights') 102 | if(sense =='-'){ 103 | axis(1,at = c(0,nBins),labels= c(paste(chrom,end,sep=':'),paste(chrom,start,sep=':'))) 104 | }else{ 105 | axis(1,at = c(0,nBins),labels= c(paste(chrom,start,sep=':'),paste(chrom,end,sep=':'))) 106 | } 107 | legend(0,yMax,as.vector(plotTable[,3]),col=colorVector,lwd=2.5,cex=1.2) 108 | 109 | for(i in 2:nrow(plotTable)){ 110 | scaleFactor = max(plotTable[1,(8:(nBins+7))])/(1.2*max(plotTable[i,(8:(nBins+7))])) 111 | 112 | print(scaleFactor) 113 | #scaleFactor = 1 114 | color = colorVector[i] 115 | lines(spline(1:nBins,scaleFactor*as.numeric(plotTable[i,(8:(nBins+7))]),n=3*nBins),lwd=2,col = color) 116 | } 117 | 118 | }else{ 119 | color = colorVector[1] ## BJA tweaked to style of RELATIVE 120 | plot(spline(1:nBins,as.numeric(plotTable[1,(8:(nBins+7))]),n=nBins),ylim = c(0.1,yMax),type='l',col= color,lwd=2.5,xaxt='n',xlab='',ylab='ChIP-Seq reads') 121 | if(sense =='-'){ 122 | axis(1,at = c(0,nBins),labels= c(paste(chrom,end,sep=':'),paste(chrom,start,sep=':'))) 123 | }else{ 124 | axis(1,at = c(0,nBins),labels= c(paste(chrom,start,sep=':'),paste(chrom,end,sep=':'))) 125 | } 126 | legend(0,yMax,as.vector(plotTable[,3]),col=colorVector,lwd=2.5,cex=1.2) 127 | 128 | for(i in 2:nrow(plotTable)){ 129 | color = colorVector[i] ## BJA tweaked to style of RELATIVE 130 | # color = rgb(plotTable[i,5],plotTable[i,6],plotTable[i,7],maxColorValue=255) 131 | lines(spline(1:nBins,as.numeric(plotTable[i,(8:(nBins+7))]),n=3*nBins),lwd=2,col = color) 132 | } 133 | } 134 | 135 | } 136 | 137 | 138 | 139 | 140 | 141 | #for different plots 142 | if(plotStyle == 'MULTIPLE'){ 143 | par(mfrow = c(nrow(plotTable)+1,1)) 144 | if(yScale == 'UNIFORM'){ 145 | yMax = 1.2*max(plotTable[,(8:(nBins+7))]) 146 | } 147 | for(i in 1:nrow(plotTable)){ 148 | if(yScale == 'RELATIVE'){ 149 | yMax = 1.2*max(plotTable[i,(8:(nBins+7))]) 150 | } 151 | color = colorVector[i] 152 | plot(spline(1:nBins,as.numeric(plotTable[i,(8:(nBins+7))]),n=2*nBins),ylim = c(0.05*yMax,yMax),type='l',col= color,lwd=2,xlab='',ylab='ChIP-Seq Reads',xaxt = 'n') 153 | legend(0,yMax,as.vector(plotTable[i,3]),col=colorVector[i],lwd=2.5,cex=1.2) 154 | if(sense =='-'){ 155 | axis(1,at = c(0,nBins),labels= c(paste(chrom,end,sep=':'),paste(chrom,start,sep=':'))) 156 | }else{ 157 | axis(1,at = c(0,nBins),labels= c(paste(chrom,start,sep=':'),paste(chrom,end,sep=':'))) 158 | } 159 | } 160 | 161 | plot(0,0,xlim = c(0,nBins),ylim = c(yMinDiagram,2),col=rgb(1,1,1),xaxt='n',yaxt='n',ylab='',xlab='',main ='') 162 | for(i in 2:nrow(diagramTable)){ 163 | rect(diagramTable[i,1],diagramTable[i,2],diagramTable[i,3],diagramTable[i,4],col='black') 164 | } 165 | for(i in 2:nrow(nameTable)){ 166 | text(nameTable[i,2],nameTable[i,3],nameTable[i,1],cex=1) 167 | } 168 | 169 | 170 | } 171 | 172 | dev.off() 173 | -------------------------------------------------------------------------------- /bamliquidator_internal/makefile: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # The MIT License (MIT) 3 | # 4 | # Copyright (c) 2013 John DiMatteo, Xin Zhong, and Charles Lin 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in 14 | # all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | # THE SOFTWARE. 23 | # 24 | ############################################################################### 25 | 26 | # For dependencies, please see https://github.com/BradnerLab/pipeline/wiki/bamliquidator 27 | 28 | VERSION := $(shell grep "__version__ =" bamliquidatorbatch/bamliquidator_batch.py | cut -d' ' -f 3 | sed "s/'//g") 29 | export VERSION 30 | 31 | GIT_COMMIT := $(shell git rev-parse HEAD) 32 | export GIT_COMMIT 33 | 34 | # The directory to install in: 35 | prefix = /usr/local 36 | bindir = $(prefix)/bin 37 | 38 | # I prefer clang++, but g++ is more easily available, so using that instead 39 | #CC=clang++ 40 | CC=g++ 41 | 42 | # CPPFLAGS used to include march=native, but was removed so executables would 43 | # work on platforms besides the one used for the build 44 | CPPFLAGS := -std=c++0x -O3 -g -Wall -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -I/usr/include/hdf5/serial -I/usr/local/include 45 | LDFLAGS := -O3 -g -Wall -L/usr/lib/x86_64-linux-gnu/hdf5/serial 46 | LDLIBS := -lbam -lz -lpthread 47 | ADDITIONAL_LDLIBS := -lhdf5 -lhdf5_hl -ltcmalloc_minimal -ltbb 48 | 49 | # if someone else does a ppa dput upload, first change the name/email and commit it 50 | UPLOADER := John DiMatteo 51 | UPLOADER_EMAIL := jdimatteo@gmail.com 52 | export UPLOADER 53 | export UPLOADER_EMAIL 54 | 55 | define SETUP_PY 56 | from setuptools import setup 57 | 58 | setup( 59 | name='BamLiquidatorBatch', 60 | version='$(VERSION)', 61 | description='Python wrapper around bamliquidator for analyzing the density of short DNA sequence read alignments in the BAM file format.', 62 | maintainer='$(UPLOADER)', 63 | maintainer_email='$(UPLOADER_EMAIL)', 64 | packages=['bamliquidatorbatch'], 65 | url='https://github.com/BradnerLab/pipeline/wiki/bamliquidator', 66 | license='The MIT License (MIT)', 67 | entry_points = { 68 | 'console_scripts': [ 69 | 'bamliquidator_batch = bamliquidatorbatch.bamliquidator_batch:main', 70 | 'bamliquidator_flattener = bamliquidatorbatch.flattener:main' 71 | ] 72 | }, 73 | install_requires=[ 74 | 'numpy', 75 | 'pandas', 76 | 'redis', 77 | 'tables', 78 | 'numexpr' 79 | ] 80 | ) 81 | 82 | endef 83 | export SETUP_PY 84 | 85 | all: bamliquidator bamliquidator_bins bamliquidator_regions 86 | 87 | bamliquidator: bamliquidator.m.o bamliquidator.o 88 | $(CC) $(LDFLAGS) -o bamliquidator bamliquidator.o bamliquidator.m.o $(LDLIBS) 89 | 90 | bamliquidator_bins: bamliquidator_bins.m.o bamliquidator.o bamliquidator_util.o 91 | $(CC) $(LDFLAGS) -o bamliquidator_bins bamliquidator.o bamliquidator_bins.m.o bamliquidator_util.o \ 92 | $(LDLIBS) $(ADDITIONAL_LDLIBS) 93 | 94 | bamliquidator_regions: bamliquidator_regions.m.o bamliquidator.o bamliquidator_util.o 95 | $(CC) $(LDFLAGS) -o bamliquidator_regions bamliquidator.o bamliquidator_regions.m.o bamliquidator_util.o \ 96 | $(LDLIBS) $(ADDITIONAL_LDLIBS) 97 | 98 | bamliquidator.m.o: bamliquidator.m.cpp 99 | $(CC) $(CPPFLAGS) -c bamliquidator.m.cpp 100 | 101 | bamliquidator_bins.m.o: bamliquidator_bins.m.cpp 102 | $(CC) $(CPPFLAGS) -c bamliquidator_bins.m.cpp 103 | 104 | bamliquidator_regions.m.o: bamliquidator_regions.m.cpp 105 | $(CC) $(CPPFLAGS) -c bamliquidator_regions.m.cpp 106 | 107 | bamliquidator.o: bamliquidator.cpp bamliquidator.h 108 | $(CC) $(CPPFLAGS) -pthread -c bamliquidator.cpp 109 | 110 | bamliquidator_util.o: bamliquidator_util.cpp bamliquidator_util.h 111 | $(CC) $(CPPFLAGS) -c bamliquidator_util.cpp 112 | 113 | EXECUTABLES = bamliquidator bamliquidator_bins bamliquidator_regions 114 | 115 | archive: 116 | mkdir -p bamliquidator-$(VERSION)/bamliquidatorbatch 117 | cp *.h *.cpp makefile bamliquidator-$(VERSION) 118 | cp bamliquidatorbatch/bamliquidator_batch.py bamliquidator-$(VERSION)/bamliquidatorbatch 119 | tar -czf bamliquidator-$(VERSION).tar.gz bamliquidator-$(VERSION) 120 | rm -rf bamliquidator-$(VERSION) 121 | mkdir bamliquidatorbatch_$(VERSION) 122 | cp -r bamliquidatorbatch bamliquidatorbatch_$(VERSION) 123 | rm bamliquidatorbatch_$(VERSION)/bamliquidatorbatch/test.py 124 | echo "$$SETUP_PY" > bamliquidatorbatch_$(VERSION)/setup.py 125 | tar -czf bamliquidatorbatch_$(VERSION).orig.tar.gz bamliquidatorbatch_$(VERSION) 126 | 127 | deb: clean archive 128 | ./make_deb.sh 129 | 130 | dput: clean archive 131 | debuild_args=-S ./make_deb.sh 132 | for ubuntu_version in "trusty" "xenial" "bionic" ; do \ 133 | dput ppa:bradner-computation/pipeline deb_dist/bamliquidatorbatch_$(VERSION)-*$$(ubuntu_version)_source.changes ; \ 134 | dput ppa:bradner-computation/pipeline bamliquidator_$(VERSION)-*$$(ubuntu_version)_source.changes ; \ 135 | done 136 | 137 | clean: 138 | rm -f $(EXECUTABLES) *.o MANIFEST setup.py bamliquidator*.tar.gz 139 | rm -rf bamliquidator*precise* bamliquidator*trusty* BamLiquidatorBatch.egg-info dist bamliquidatorbatch_* deb_dist 140 | 141 | install: all 142 | install $(EXECUTABLES) $(DESTDIR)$(bindir) 143 | 144 | docker: 145 | docker build . --build-arg GIT_COMMIT=$(GIT_COMMIT) -t bioliquidator/bamliquidator:latest -t bioliquidator/bamliquidator:$(VERSION) 146 | 147 | docker-hub: docker 148 | docker login --username bioliquidator 149 | docker push bioliquidator/bamliquidator:latest 150 | docker push bioliquidator/bamliquidator:$(VERSION) 151 | -------------------------------------------------------------------------------- /bamliquidator_internal/bamliquidator.cpp: -------------------------------------------------------------------------------- 1 | #include "bamliquidator.h" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | /* The MIT License (MIT) 11 | 12 | Copyright (c) 2013 Xin Zhong and Charles Lin 13 | 14 | Permission is hereby granted, free of charge, to any person obtaining a copy 15 | of this software and associated documentation files (the "Software"), to deal 16 | in the Software without restriction, including without limitation the rights 17 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 18 | copies of the Software, and to permit persons to whom the Software is 19 | furnished to do so, subject to the following conditions: 20 | 21 | The above copyright notice and this permission notice shall be included in 22 | all copies or substantial portions of the Software. 23 | 24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 30 | THE SOFTWARE. 31 | */ 32 | 33 | struct ReadItem 34 | { 35 | unsigned int start; 36 | /* read stop is start + strlen(seq) 37 | this *stop* will only be used for computing density 38 | will not be reported to js for bed plotting 39 | the actual stop need to be determined by cigar 40 | */ 41 | unsigned int stop; 42 | uint32_t flag; // flag from bam 43 | char strand; 44 | std::vector cigar; 45 | }; 46 | 47 | 48 | int intMin(int a, int b) 49 | { 50 | if(a < b) return a; 51 | return b; 52 | } 53 | 54 | int intMax(int a, int b) 55 | { 56 | if(a > b) return a; 57 | return b; 58 | } 59 | 60 | 61 | 62 | struct UserData 63 | { 64 | std::deque readItems; 65 | char strand; 66 | unsigned int extendlen; 67 | }; 68 | 69 | 70 | 71 | 72 | static int bam_fetch_func(const bam1_t* b,void* data) 73 | { 74 | if (b->core.tid < 0) return 0; 75 | 76 | UserData *udata=(UserData *)data; 77 | 78 | const bam1_core_t* c = &b->core; 79 | 80 | char strand= (c->flag&BAM_FREVERSE)?'-':'+'; 81 | if(udata->strand=='+') 82 | { 83 | if(strand!='+') return 0; 84 | } 85 | else if(udata->strand=='-') 86 | { 87 | if(strand!='-') return 0; 88 | } 89 | 90 | ReadItem r; 91 | r.strand=strand; 92 | 93 | uint32_t* cigar = bam1_cigar(b); 94 | 95 | // get read length 96 | int i, readlen; 97 | if (b->core.tid < 0) return 0; 98 | for (i = readlen = 0; i < c->n_cigar; ++i) 99 | { 100 | int op = cigar[i]&0xf; 101 | if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP) 102 | readlen += cigar[i]>>4; 103 | } 104 | 105 | r.cigar = std::vector(cigar, cigar+c->n_cigar); 106 | 107 | r.start=c->pos; 108 | r.stop=c->pos+readlen; 109 | 110 | //printf("%d\t%d\t%c\t", r.start, r.stop, strand); 111 | 112 | // extend 113 | if(udata->extendlen>0) 114 | { 115 | if(strand=='+') 116 | { 117 | r.stop+=udata->extendlen; 118 | } 119 | else 120 | { 121 | r.start=intMax(0,r.start-udata->extendlen); 122 | } 123 | } 124 | 125 | //printf("%d\t%d\n", r.start, r.stop); 126 | 127 | r.flag=c->flag; 128 | udata->readItems.push_back(r); 129 | return 0; 130 | } 131 | 132 | std::deque bamQuery_region(const samfile_t* fp, const bam_index_t* idx, const std::string& coord, char strand, unsigned int extendlen) 133 | { 134 | // will not fill chromidx 135 | int ref,beg,end; 136 | int rc = bam_parse_region(fp->header,coord.c_str(),&ref,&beg,&end); 137 | if (rc != 0) 138 | { 139 | std::stringstream error_msg; 140 | error_msg << "bam_parse_region failed with return code " << rc; 141 | throw std::runtime_error(error_msg.str()); 142 | } 143 | if(ref<0) 144 | { 145 | return std::deque(); 146 | } 147 | UserData d; 148 | d.strand=strand; 149 | d.extendlen=extendlen; 150 | bam_fetch(fp->x.bam,idx,ref,beg,end,&d,bam_fetch_func); 151 | return d.readItems; 152 | } 153 | 154 | 155 | std::vector liquidate(const std::string& bamfile, const std::string& chromosome, 156 | const unsigned int start, const unsigned int stop, 157 | const char strand, const unsigned int spnum, 158 | const unsigned int extendlen) 159 | { 160 | samfile_t* fp=NULL; 161 | fp=samopen(bamfile.c_str(),"rb",0); 162 | if(fp == NULL) 163 | { 164 | throw std::runtime_error("samopen() error with " + bamfile); 165 | } 166 | 167 | bam_index_t* bamidx=NULL; 168 | bamidx=bam_index_load(bamfile.c_str()); 169 | if (bamidx == NULL) 170 | { 171 | throw std::runtime_error("bam_index_load() error with " + bamfile); 172 | } 173 | 174 | std::vector counts = liquidate(fp, bamidx, chromosome, start, stop, strand, spnum, extendlen); 175 | 176 | bam_index_destroy(bamidx); 177 | samclose(fp); 178 | 179 | return counts; 180 | } 181 | 182 | std::vector liquidate(const samfile_t* fp, const bam_index_t* bamidx, 183 | const std::string& chromosome, 184 | const unsigned int start, const unsigned int stop, 185 | const char strand, const unsigned int spnum, 186 | const unsigned int extendlen) 187 | { 188 | std::vector data(spnum, 0); 189 | 190 | std::string coord; 191 | { 192 | std::stringstream ss; 193 | ss << chromosome << ':' << start << '-' << stop; 194 | coord = ss.str(); 195 | } 196 | 197 | /* fetch bed items for a region and compute density 198 | only deal with coord, so use generic item 199 | */ 200 | int startArr[spnum], stopArr[spnum]; 201 | int pieceLength = (stop-start) / spnum; 202 | for(int i=0; i items = bamQuery_region(fp,bamidx,coord,strand,extendlen); 209 | 210 | for(const ReadItem& item : items) 211 | { 212 | // collapse this bed item onto the density counter 213 | for(int i=0; i stopArr[i]) continue; 216 | if(item.stop < startArr[i]) break; 217 | int start=intMax(item.start,startArr[i]); 218 | int stop=intMin(item.stop,stopArr[i]); 219 | if(start alt : test.pdf ' ) 93 | 94 | 95 | } 96 | } 97 | 98 | }); 99 | 100 | //console.log(ranking_array) 101 | 102 | // console.log(number) 103 | // console.log(file_name_array[0].length) 104 | 105 | if (number == 0) { 106 | 107 | tip = d3.tip().attr('class', 'd3-tip'); 108 | 109 | var margin = {top: 50, right: 50, bottom: 50, left: 100}, 110 | width = 1100 - margin.left - margin.right, 111 | height = 350 - margin.top - margin.bottom; 112 | 113 | var x = d3.scale.ordinal() 114 | .rangeRoundBands([0, width], .1); 115 | 116 | var y = d3.scale.linear() 117 | .range([height, 0]); 118 | 119 | var xAxis = d3.svg.axis() 120 | .scale(x) 121 | .orient("bottom") 122 | .tickFormat(""); 123 | 124 | var yAxis = d3.svg.axis() 125 | .scale(y) 126 | .orient("left") 127 | .outerTickSize([0]); 128 | 129 | var svg = d3.select("#bars").append("svg") 130 | .attr("width", width + margin.left + margin.right) 131 | .attr("height", height + margin.top + margin.bottom) 132 | .append("g") 133 | .attr("transform", "translate(" + (margin.left+100) + "," + margin.top+ ")"); 134 | 135 | svg.call(tip) 136 | 137 | svg.append("text") 138 | .attr("x", 350) 139 | .attr("y", -20) 140 | .text("Comparison across all datasets") 141 | .attr("font-size", "18px") 142 | .attr("font-weight", "bold") 143 | 144 | 145 | function range(start, end) { 146 | var foo = []; 147 | for (var i = start; i <= end; i++) { 148 | foo.push(i); 149 | } 150 | return foo; 151 | } 152 | 153 | 154 | x.domain(range(0,ranking_array.length-1)); 155 | y.domain([0, d3.max(ranking_array, function(d) { return d.rank; })]); 156 | 157 | // console.log(x.domain()) 158 | // console.log(x.range()) 159 | 160 | svg.append("g") 161 | .attr("class", "x axis") 162 | .attr("transform", "translate(0," + height + ")") 163 | .call(xAxis) 164 | .append("text") 165 | .attr("x", 1000) 166 | .attr("y", 15) 167 | .style("text-anchor", "middle") 168 | .text("Files"); 169 | 170 | svg.append("g") 171 | .attr("class", "y axis") 172 | .call(yAxis) 173 | .append("text") 174 | .attr("transform", "rotate(-90)") 175 | .attr("dy", "-3.71em") 176 | .style("text-anchor", "end") 177 | .text("Super-enhancer rank"); 178 | 179 | svg.selectAll(".bar") 180 | .data(ranking_array) 181 | .enter().append("rect") 182 | .attr("class", "bar") 183 | .attr("x", function(d, j) { 184 | 185 | d.number = j 186 | //console.log(d) 187 | 188 | // console.log(j) 189 | // console.log(x(j)) 190 | return x(j) 191 | }) 192 | .attr("width", x.rangeBand()) 193 | .attr("y", function(d) { 194 | 195 | return y(d.rank); 196 | 197 | }) 198 | .attr("height", function(d) { 199 | 200 | return height - y(d.rank); 201 | }) 202 | .on("mouseover", function(d) { 203 | 204 | //console.log(d) 205 | 206 | tip.html("File: " + d.filename + "
Rank: " + d.rank + "
Signal: " + d.signal) 207 | 208 | return tip.show(d) 209 | 210 | }) 211 | .on("mouseout", function(d) { 212 | return tip.hide(d) 213 | }); 214 | 215 | function type(d) { 216 | d.frequency = +d.rank; 217 | return d; 218 | } 219 | 220 | 221 | d3.select("input#by_rank").on("change", change_rank); 222 | d3.select("input#by_signal").on("change", change_signal); 223 | d3.select("input#by_super").on("change", change_super); 224 | 225 | function change_rank() { 226 | 227 | // Copy-on-write since tweens are evaluated after a delay. 228 | var x0 = x.domain(ranking_array.sort(this.checked 229 | ? function(a, b) { return a.rank - b.rank; } 230 | : function(a, b) { return d3.descending(a.number, b.number); }) 231 | .map(function(d) { 232 | //console.log(d) 233 | return d.number; 234 | })) 235 | .copy(); 236 | 237 | var transition = svg.transition().duration(750), 238 | delay = function(d, i) { return i * 50; }; 239 | 240 | transition.selectAll(".bar") 241 | .delay(delay) 242 | .attr("x", function(d) { return x0(d.number); }); 243 | 244 | transition.select(".x.axis") 245 | .call(xAxis) 246 | .selectAll("g") 247 | .delay(delay); 248 | 249 | } 250 | 251 | function change_signal() { 252 | 253 | // Copy-on-write since tweens are evaluated after a delay. 254 | var x0 = x.domain(ranking_array.sort(this.checked 255 | ? function(a, b) { return b.signal - a.signal; } 256 | : function(a, b) { return d3.descending(a.number, b.number); }) 257 | .map(function(d) { 258 | //console.log(d) 259 | return d.number; 260 | })) 261 | .copy(); 262 | 263 | var transition = svg.transition().duration(750), 264 | delay = function(d, i) { return i * 50; }; 265 | 266 | transition.selectAll(".bar") 267 | .delay(delay) 268 | .attr("x", function(d) { return x0(d.number); }); 269 | 270 | transition.select(".x.axis") 271 | .call(xAxis) 272 | .selectAll("g") 273 | .delay(delay); 274 | 275 | } 276 | 277 | function change_super() { 278 | 279 | // Copy-on-write since tweens are evaluated after a delay. 280 | var x0 = x.domain(ranking_array.sort(this.checked 281 | ? function(a, b) { return a.super - b.super; } 282 | : function(a, b) { return d3.descending(a.number, b.number); }) 283 | .map(function(d) { 284 | //console.log(d) 285 | return d.number; 286 | })) 287 | .copy(); 288 | 289 | var transition = svg.transition().duration(750), 290 | delay = function(d, i) { return i * 50; }; 291 | 292 | transition.selectAll(".bar") 293 | .delay(delay) 294 | .attr("x", function(d) { return x0(d.number); }); 295 | 296 | transition.select(".x.axis") 297 | .call(xAxis) 298 | .selectAll("g") 299 | .delay(delay); 300 | 301 | } 302 | 303 | } 304 | 305 | 306 | }) 307 | } 308 | 309 | 310 | } 311 | 312 | }); 313 | 314 | -------------------------------------------------------------------------------- /GPL16043.r: -------------------------------------------------------------------------------- 1 | library(affy) 2 | library(makecdfenv) 3 | library(graphics) 4 | 5 | 6 | # The MIT License (MIT) 7 | 8 | # Copyright (c) 2013 Charles Lin 9 | 10 | # Permission is hereby granted, free of charge, to any person obtaining a copy 11 | # of this software and associated documentation files (the "Software"), to deal 12 | # in the Software without restriction, including without limitation the rights 13 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | # copies of the Software, and to permit persons to whom the Software is 15 | # furnished to do so, subject to the following conditions: 16 | 17 | # The above copyright notice and this permission notice shall be included in 18 | # all copies or substantial portions of the Software. 19 | 20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 | # THE SOFTWARE. 27 | 28 | 29 | 30 | #======================================================================== 31 | #===========================JOB PARAMETERS=============================== 32 | #======================================================================== 33 | 34 | args = commandArgs() 35 | cel_file_directory = args[3] 36 | name = args[4] 37 | 38 | print(cel_file_directory) 39 | print(name) 40 | #quit() 41 | #name = "JB20130926st" 42 | #cel_file_directory <- "/ark/home/cl512/ressrv19/raw/expression/JB20130926st/" 43 | 44 | #======================================================================== 45 | #=========================HARD CODED STUFF=============================== 46 | #======================================================================== 47 | erccTable = read.delim("/grail/genomes/ERCC_Technical_Data/ERCC_Controls_Analysis.txt") 48 | 49 | #primeviewcdf_env <- make.cdf.env(filename="PrimeView_withERCC_binary.cdf", cdf.path="/ark/home/cl512/ressrv19/annotations/platforms/GPL16043/annotation/",compress=FALSE) 50 | 51 | primeviewcdf_env <- make.cdf.env(filename="PrimeView_withERCC_binary.cdf", cdf.path="/grail/annotations/platforms/GPL16043/annotation/",compress=FALSE) 52 | 53 | #======================================================================== 54 | #=============================FUNCTIONS================================== 55 | #======================================================================== 56 | ## Add an alpha value to a colour 57 | add.alpha <- function(col, alpha=1){ 58 | if(missing(col)) 59 | stop("Please provide a vector of colours.") 60 | apply(sapply(col, col2rgb)/255, 2, 61 | function(x) 62 | rgb(x[1], x[2], x[3], alpha=alpha)) 63 | } 64 | 65 | #panel function to do a scatter with a red diagonal line 66 | panel.awesome <- function(x, y, col = par("col"), bg = NA, pch = par("pch"), 67 | cex = 1, col.smooth = "red", span = 2/3, iter = 3, ...) 68 | { 69 | points(x, y, pch = pch, col = col, bg = bg, cex = cex,ylab='log2 expression (a.u.)',xlab='log2 expression (a.u.)') 70 | ok <- is.finite(x) & is.finite(y) 71 | if (any(ok)) 72 | #lines(stats::lowess(x[ok], y[ok], f = span, iter = iter),col = 'red', ...) 73 | abline(a=0,b=1,lwd=2,col='red') 74 | } 75 | 76 | #panel function to do correlation 77 | #adapted from http://www.r-bloggers.com/five-ways-to-visualize-your-pairwise-comparisons/ 78 | panel.cor <- function(x,y,digits=2,prefix="",...){ 79 | usr <- par("usr"); on.exit(par(usr)) 80 | par(usr=c(0,1,0,1)) 81 | r <- abs(cor(x,y,method='spearman')) 82 | txt <- round(r,2) 83 | txt <- paste(prefix,txt,sep="") 84 | cex <- 2 85 | test <- cor.test(x,y,method='spearman') 86 | Signif <- symnum(test$p.value,corr=FALSE,na=FALSE,cutpoints = c(0,0.001,0.01,0.05,0.1,1),symbols = c("***","**","*",".","N.S")) 87 | text(0.5,0.5,txt,cex=cex*r) 88 | text(.8,.8,Signif,cex=cex,col=2) 89 | 90 | } 91 | 92 | 93 | 94 | #returns a vector of the concentrations for each ercc probe 95 | plot_ercc <- function(erccTable,all_mas5_exprs,tag){ 96 | #first get the erccRows 97 | erccRows = grep("ERCC-",rownames(all_mas5_exprs)) 98 | erccList = rownames(all_mas5_exprs)[erccRows] 99 | exprsRowVector = c() 100 | concVector = c() 101 | for(i in 1:length(erccList)){ 102 | erccProbe = erccList[i] 103 | erccName = substr(erccProbe,1,10) 104 | #print(erccName) 105 | row = which(erccTable[,2] ==erccName) 106 | #print(row) 107 | if(length(row) >0){ 108 | concentration = as.numeric(erccTable[row,4]) 109 | concVector = c(concVector,concentration) 110 | exprsRowVector = c(exprsRowVector,erccRows[i]) 111 | } 112 | } 113 | 114 | #now let's do some cute plotting 115 | plot(log10(concVector),log2(all_mas5_exprs[exprsRowVector]),cex=0,xlab='log10 attomoles/ul',ylab='log2 expression (a.u.)',main=paste(tag,' spike-in expression',sep="")) 116 | palette = rainbow(ncol(all_mas5_exprs),alpha=0.3) 117 | for(i in 1:ncol(all_mas5_exprs)){ 118 | #color = add.alpha(i,0.2) 119 | points(log10(concVector),log2(all_mas5_exprs[exprsRowVector,i]),pch=19,col =add.alpha(i,0.2),cex=0.4) 120 | lines(loess.smooth(log10(concVector),log2(all_mas5_exprs[exprsRowVector,i])),lwd=2,col=i) 121 | 122 | 123 | } 124 | legend(-1.5,.95*max(log2(all_mas5_exprs[exprsRowVector])),colnames(all_mas5_exprs),col=1:ncol(all_mas5_exprs),lwd=2) 125 | 126 | } 127 | 128 | 129 | 130 | 131 | 132 | 133 | #======================================================================== 134 | #============================DATA PROCESSING============================= 135 | #======================================================================== 136 | 137 | 138 | cel_files <- list.celfiles(path= cel_file_directory,full.names=TRUE) 139 | raw_data <- read.affybatch(cel_files, cdfname="primeviewcdf_env") 140 | 141 | mas5Result <- expresso(raw_data,bgcorrect.method="mas",normalize=TRUE,pmcorrect.method="pmonly",summary.method="mas") 142 | 143 | 144 | all_mas5_exprs <- exprs(mas5Result) 145 | all_mas5_exprs_norm <- loess.normalize(all_mas5_exprs,subset=grep("ERCC-",rownames(all_mas5_exprs))) 146 | 147 | #write probe level expression raw 148 | filename_raw = paste(cel_file_directory,'output/',name,'_all_mas5_probe_exprs_raw.txt',sep='') 149 | write.table(all_mas5_exprs,file=filename_raw,quote=FALSE,sep='\t') 150 | 151 | #write probe level expression spikey normy 152 | filename_raw = paste(cel_file_directory,'output/',name,'_all_mas5_probe_exprs_norm.txt',sep='') 153 | write.table(all_mas5_exprs_norm,file=filename_raw,quote=FALSE,sep='\t') 154 | 155 | #======================================================================== 156 | #================================ANALYSIS================================ 157 | #======================================================================== 158 | 159 | #plotting spike-ins raw 160 | filename_spike = paste(cel_file_directory,'output/',name,'_spike_raw.pdf',sep='') 161 | pdf(file=filename_spike,width = 8,height =8) 162 | plot_ercc(erccTable,all_mas5_exprs,'Raw') 163 | dev.off() 164 | 165 | 166 | #plotting spike-ins raw 167 | filename_spike = paste(cel_file_directory,'output/',name,'_spike_norm.pdf',sep='') 168 | pdf(file=filename_spike,width = 8,height =8) 169 | plot_ercc(erccTable,all_mas5_exprs_norm,'Normalized') 170 | dev.off() 171 | 172 | 173 | #identify expressed probes 174 | #at least 1 probe above 50 175 | expressedProbesRaw = which(apply(all_mas5_exprs,1,max)>100) 176 | expressedProbesNorm = which(apply(all_mas5_exprs_norm,1,max)>100) 177 | 178 | #provide a size scaling factor for the pngs 179 | png_size = 200 * ncol(all_mas5_exprs) 180 | 181 | #now do a pairwise scatter plot either raw or norm 182 | axisMinRaw = min(log2(all_mas5_exprs[expressedProbesRaw,])) 183 | axisMaxRaw = max(log2(all_mas5_exprs[expressedProbesRaw,])) 184 | filename_raw = paste(cel_file_directory,'output/',name,'_all_mas5_probe_exprs_raw_scatter.png',sep='') 185 | 186 | png(filename=filename_raw,width =png_size,height =png_size,pointsize=24) 187 | pairs(log2(all_mas5_exprs[expressedProbesRaw[1:1000],]),lower.panel=panel.awesome,upper.panel=panel.cor,cex.labels=0.8,xlim =c(axisMinRaw,axisMaxRaw),ylim = c(axisMinRaw,axisMaxRaw),pch=19,col=rgb(0.5,0.5,0.5,0.4),cex=1,main='Unnormalized log2 expression (a.u.)') 188 | dev.off() 189 | 190 | #now do a pairwise scatter plot either raw or norm 191 | axisMinNorm = min(log2(all_mas5_exprs_norm[expressedProbesNorm,])) 192 | axisMaxNorm = max(log2(all_mas5_exprs_norm[expressedProbesNorm,])) 193 | filename_norm = paste(cel_file_directory,'output/',name,'_all_mas5_probe_exprs_norm_scatter.png',sep='') 194 | png(filename=filename_norm,width =png_size,height =png_size,pointsize=24) 195 | pairs(log2(all_mas5_exprs_norm[expressedProbesNorm[1:1000],]),lower.panel=panel.awesome,upper.panel=panel.cor,cex.labels=0.8,xlim =c(axisMinNorm,axisMaxNorm),ylim = c(axisMinRaw,axisMaxRaw),pch=19,col=rgb(0.5,0.5,0.5,0.4),cex=1,main='Spike-in normalized log2 expression (a.u.)') 196 | dev.off() 197 | 198 | 199 | 200 | #now make some boxplots 201 | 202 | filename_box = paste(cel_file_directory,'output/',name,'_probe_exprs_boxplot.pdf',sep='') 203 | pdf(file=filename_box,width = 10,height = 8) 204 | par(mfrow=c(1,2)) 205 | par(mar=c(12,6,3,1)) 206 | axisMinBox = min(axisMinRaw,axisMinNorm) 207 | axisMaxBox = max(axisMaxRaw,axisMaxNorm) 208 | boxplot(log2(all_mas5_exprs[expressedProbesRaw[1:1000],]),cex=0,main='Unnormalized expression',ylab='log2 expression (a.u.)',las=3,ylim = c(axisMinBox,axisMaxBox)) 209 | 210 | boxplot(log2(all_mas5_exprs_norm[expressedProbesNorm[1:1000],]),cex=0,main='Spike-in normalized expression',ylab='log2 expression (a.u.)',las=3,ylim = c(axisMinBox,axisMaxBox)) 211 | dev.off() 212 | 213 | -------------------------------------------------------------------------------- /bamToGFF_turbo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #bamToGFF_turbo.py 3 | 4 | ''' 5 | The MIT License (MIT) 6 | 7 | Copyright (c) 2013 Charles Lin 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | THE SOFTWARE. 26 | ''' 27 | 28 | #20130716 29 | 30 | #script to grab reads from a bam that align to a .gff file 31 | 32 | #uses the bamliquidator super fast uber thingy written by Xin Zhou 33 | 34 | import os 35 | import string 36 | import subprocess 37 | import utils 38 | 39 | def mapBamToGFF(bamFile,gff,sense = '.',extension = 200,rpm = False,clusterGram = None,matrix = None): 40 | '''maps reads from a bam to a gff''' 41 | 42 | #creating a new gff to output 43 | newGFF = [] 44 | #reading in the bam 45 | bam = utils.Bam(bamFile) 46 | 47 | #getting RPM normalization 48 | if rpm: 49 | MMR= round(float(bam.getTotalReads('mapped'))/1000000,4) 50 | else: 51 | MMR = 1 52 | 53 | print('using a MMR value of %s' % (MMR)) 54 | 55 | #creating a sense trans 56 | senseTrans = string.maketrans('-+.','+-+') 57 | 58 | #reading in the gff 59 | if type(gff) == str: 60 | gff = utils.parseTable(gff,'\t') 61 | 62 | #setting up a clustergram table 63 | if clusterGram: 64 | binSize = int(clusterGram) 65 | binSizeList = [] 66 | #now go through each line of the gff and make sure they're all the same length 67 | for i in range(0,len(gff),1): 68 | line = gff[i] 69 | gffLocus = utils.Locus(line[0],int(line[3]),int(line[4]),line[6],line[1]) 70 | binSizeList.append(gffLocus.len()/binSize) 71 | binSizeList = utils.uniquify(binSizeList) 72 | if len(binSizeList) > 1: 73 | print('WARNING: lines in gff are of different length. Output clustergram will have variable row length') 74 | newGFF.append(['GENE_ID','locusLine'] + [str(x*binSize)+'_'+bamFile.split('/')[-1] for x in range(1,max(binSizeList)+1,1)]) 75 | 76 | #setting up a maxtrix table 77 | if matrix: 78 | newGFF.append(['GENE_ID','locusLine'] + ['bin_'+str(n)+'_'+bamFile.split('/')[-1] for n in range(1,int(matrix)+1,1)]) 79 | nBin = int(matrix) 80 | 81 | # Try to use the bamliquidatior script on cluster, otherwise, failover to local (in path), otherwise fail. 82 | bamliquidatorString = '/usr/bin/bamliquidator' 83 | if not os.path.isfile(bamliquidatorString): 84 | bamliquidatorString = './bamliquidator' 85 | if not os.path.isfile(bamliquidatorString): 86 | raise ValueError('bamliquidator not found in path') 87 | 88 | #getting and processing reads for gff lines 89 | ticker = 0 90 | print('Number lines processed') 91 | for line in gff: 92 | line = line[0:9] 93 | if ticker%100 == 0: 94 | print(ticker) 95 | ticker+=1 96 | gffLocus = utils.Locus(line[0],int(line[3]),int(line[4]),line[6],line[1]) 97 | 98 | #get the nBin and binSize 99 | if clusterGram: 100 | nBin =gffLocus.len()/int(clusterGram) 101 | binSize = int(clusterGram) 102 | if matrix: 103 | nBin = int(matrix) 104 | binSize = gffLocus.len()/nBin 105 | #some regions will be too short to get info on 106 | if binSize == 0: 107 | clusterLine = [gffLocus.ID(),gffLocus.__str__()] + ['NA']*nBin 108 | newGFF.append(clusterLine) 109 | continue 110 | 111 | 112 | #flippy flip if sense is negative 113 | if sense == '-': 114 | bamSense = string.translate(gffLocus.sense(),senseTrans) 115 | elif sense == '+': 116 | bamSense = gffLocus.sense() 117 | else: 118 | bamSense = '.' 119 | #using the bamLiquidator to get the readstring 120 | #print('using nBin of %s' % nBin) 121 | bamCommand = "%s %s %s %s %s %s %s %s" % (bamliquidatorString,bamFile,line[0],gffLocus.start(),gffLocus.end(),bamSense,nBin,extension) 122 | #print(bamCommand) 123 | getReads = subprocess.Popen(bamCommand,stdin = subprocess.PIPE,stderr = subprocess.PIPE,stdout = subprocess.PIPE,shell = True) 124 | readString, stderr = getReads.communicate() 125 | if stderr: 126 | print("STDERR out: %s" % (stderr)) 127 | denList = readString.split('\n')[:-1] 128 | #print("denlist is: %s" % denList) 129 | #flip the denList if the actual gff region is - 130 | if gffLocus.sense() == '-': 131 | denList = denList[::-1] 132 | 133 | #converting from units of total bp of read sequence per bin to rpm/bp 134 | 135 | denList = [round(float(x)/binSize/MMR,4) for x in denList] 136 | 137 | #if the gff region is - strand, flip the 138 | 139 | clusterLine = [gffLocus.ID(),gffLocus.__str__()] + denList 140 | newGFF.append(clusterLine) 141 | 142 | return newGFF 143 | 144 | 145 | 146 | def convertEnrichedRegionsToGFF(enrichedRegionFile): 147 | '''converts a young lab enriched regions file into a gff''' 148 | newGFF = [] 149 | enrichedRegions = open(enrichedRegionFile,'r') 150 | header = enrichedRegions.readline() 151 | i = 0 152 | for line in enrichedRegions: 153 | line = line[:-1].split('\t') 154 | newLine = ['chr'+line[0],'row_'+str(i),line[4],line[1],line[2],'','.','','row_'+str(i),''] 155 | newGFF.append(newLine) 156 | i+=1 157 | return newGFF 158 | 159 | 160 | #python bamToGFF.py --density --floor 0 -b test.sam.sorted.bam -g pol2_sample.gff -o pol2_sample_mapped.gff 161 | 162 | def main(): 163 | from optparse import OptionParser 164 | usage = "usage: %prog [options] -b [SORTED BAMFILE] -i [INPUTFILE] -o [OUTPUTFILE]" 165 | parser = OptionParser(usage = usage) 166 | #required flags 167 | parser.add_option("-b","--bam", dest="bam",nargs = 1, default=None, 168 | help = "Enter .bam file to be processed.") 169 | parser.add_option("-i","--input", dest="input",nargs = 1, default=None, 170 | help = "Enter .gff or ENRICHED REGION file to be processed.") 171 | #output flag 172 | parser.add_option("-o","--output", dest="output",nargs = 1, default=None, 173 | help = "Enter the output filename.") 174 | #additional options 175 | parser.add_option("-s","--sense", dest="sense",nargs = 1, default='.', 176 | help = "Map to '+','-' or 'both' strands. Default maps to both.") 177 | parser.add_option("-e","--extension", dest="extension",nargs = 1, default=200, 178 | help = "Extends reads by n bp. Default value is 200bp") 179 | parser.add_option("-r","--rpm", dest="rpm",action = 'store_true', default=False, 180 | help = "Normalizes density to reads per million (rpm)") 181 | parser.add_option("-c","--cluster", dest="cluster",nargs = 1, default=None, 182 | help = "Outputs a fixed bin size clustergram. user must specify bin size.") 183 | parser.add_option("-m","--matrix", dest="matrix",nargs = 1, default=None, 184 | help = "Outputs a variable bin sized matrix. User must specify number of bins.") 185 | (options,args) = parser.parse_args() 186 | 187 | print(options) 188 | print(args) 189 | 190 | 191 | if options.sense: 192 | if ['+','-','.','both'].count(options.sense) == 0: 193 | print('ERROR: sense flag must be followed by +,-,.,both') 194 | parser.print_help() 195 | exit() 196 | 197 | if options.cluster and options.matrix: 198 | print('ERROR: Cannot specify both matrix and clustergram flags.') 199 | parser.print_help() 200 | exit() 201 | 202 | if options.matrix: 203 | try: 204 | int(options.matrix) 205 | except: 206 | print('ERROR: User must specify an integer bin number for matrix (try 50)') 207 | parser.print_help() 208 | exit() 209 | 210 | if options.cluster: 211 | try: 212 | int(options.cluster) 213 | except: 214 | print('ERROR: User must specify an integer bin size for clustergram (try 25)') 215 | parser.print_help() 216 | exit() 217 | 218 | 219 | 220 | if options.input and options.bam: 221 | inputFile = options.input 222 | if inputFile.split('.')[-1] != 'gff': 223 | print('converting file to a .gff') 224 | gffFile = convertEnrichedRegionsToGFF(inputFile) 225 | else: 226 | gffFile = inputFile 227 | 228 | bamFile = options.bam 229 | 230 | if options.output == None: 231 | output = os.getcwd() + inputFile.split('/')[-1]+'.mapped' 232 | else: 233 | output = options.output 234 | if options.cluster: 235 | print('mapping to GFF and making clustergram with fixed bin width') 236 | newGFF = mapBamToGFF(bamFile,gffFile,options.sense,int(options.extension),options.rpm,int(options.cluster),None) 237 | elif options.matrix: 238 | print('mapping to GFF and making a matrix with fixed bin number') 239 | newGFF = mapBamToGFF(bamFile,gffFile,options.sense,int(options.extension),options.rpm,None,int(options.matrix)) 240 | 241 | print('bamToGFF_turbo writing output to: %s' % (output)) 242 | # Hackjob to make subdirectories for ROSE integration 243 | try: 244 | os.mkdir(os.path.dirname(output)) 245 | except OSError: 246 | pass 247 | utils.unParseTable(newGFF,output,'\t') 248 | 249 | else: 250 | parser.print_help() 251 | 252 | 253 | 254 | if __name__ == "__main__": 255 | main() 256 | -------------------------------------------------------------------------------- /makeBamMeta.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #makeBamMeta.py 3 | 4 | ''' 5 | The MIT License (MIT) 6 | 7 | Copyright (c) 2013 Charles Lin 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | THE SOFTWARE. 26 | ''' 27 | 28 | #makes a plus and minus strand meta from 3 gffs (tss,txn,ttr) and a bam 29 | 30 | 31 | #parameters to set here 32 | #Size of bins in the upstream region (bp) 33 | tssBins = 50 34 | 35 | #Number of bins in the txn region (nBins) 36 | txnBins = 200 37 | 38 | #Size of bins in the ttr region (bp) 39 | 40 | ttrBins =50 41 | import sys 42 | print(sys.version) 43 | 44 | 45 | sys.path.append('/ark/home/cl512/pipeline/') 46 | 47 | 48 | from utils import * 49 | import time 50 | 51 | def main(): 52 | 53 | from optparse import OptionParser 54 | usage ="usage: %prog [options] -g [COMMA_SEPARATED_GFFS_IN_ORDER] -b [SORTED_BAM_FILE] -o [OUTPUT_FOLDER]" 55 | parser = OptionParser(usage = usage) 56 | 57 | parser.add_option("-g", "--gff", dest = "gffList", nargs = 1,default = None, 58 | help = "Comma separated list of gffs in order e.g. tssGFF,txnGFF,ttr,GFF") 59 | parser.add_option("-b", "--bam", dest = "bam", nargs = 1, default = None, 60 | help = "Sorted bam file with a .bai in the same folder") 61 | parser.add_option("-o", "--out", dest = "output", nargs =1, default = None, 62 | help = "Path of the output FOLDER") 63 | parser.add_option("-e", "--ext", dest = "ext", nargs =1, default = 200, 64 | help = "Extension of reads") 65 | 66 | parser.add_option("-n", "--name", dest = "name", nargs = 1,default = None, 67 | help = "Specify a name for the output files. default uses the bam name") 68 | parser.add_option("-f", "--finish", dest = "finish", action='store_true',default = False, 69 | help = "If true, tries to finish the script and stitch gffs together") 70 | parser.add_option("-c", "--combine", dest = "combine", action = 'store_true',default = False, 71 | help = "If true, combines the sense and antisense output into one single combined meta") 72 | (options,args) = parser.parse_args() 73 | 74 | print(options) 75 | print(args) 76 | 77 | if options.bam and options.gffList and options.output and not options.finish: 78 | 79 | jobIDRoot = options.name+ '_meta_'+join(str(time.time()).split('.'),'') 80 | bamFile = options.bam 81 | gffList = options.gffList 82 | outFolder = options.output 83 | extension = options.ext 84 | combine = options.combine 85 | 86 | gffList = gffList.split(',') 87 | if len(gffList) != 3: 88 | print('Must give 3 gffs in order TSS,TXN,TTR') 89 | exit() 90 | 91 | try: 92 | #check to make sure the output directory exists 93 | foo = os.listdir(outFolder) 94 | except OSError: 95 | print('OUTPUT DIRECTORY DOES NOT EXIST. CREATING IT NOW') 96 | os.system('mkdir %s' % (outFolder)) 97 | 98 | [tssGFF,txnGFF,ttrGFF] = gffList 99 | bamName = join(bamFile.split('/')[-1].split('.')[0:2],'.') 100 | 101 | #next set get the name 102 | if options.name: 103 | name = options.name 104 | else: 105 | #use the txn gff as the root 106 | name = '%s_%s' % (bamName,txnGFF.split('/')[-1]) 107 | print('using %s as the file name root' % (name)) 108 | 109 | metaSettings = [['%s_JOB_ID' % (name)]] 110 | 111 | 112 | 113 | bashFileName = '%s/mapBamMeta_%s.sh' % (outFolder,jobIDRoot) 114 | bashFile = open(bashFileName,'w') 115 | 116 | sense = '+' 117 | tssOutfile = '%s%s_%s_%s' % (outFolder,bamName,sense,tssGFF.split('/')[-1]) 118 | txnOutfile = '%s%s_%s_%s' % (outFolder,bamName,sense,txnGFF.split('/')[-1]) 119 | ttrOutfile = '%s%s_%s_%s' % (outFolder,bamName,sense,ttrGFF.split('/')[-1]) 120 | for outFile in [tssOutfile,txnOutfile,ttrOutfile]: 121 | metaSettings.append([outFile.split('/')[-1]]) 122 | 123 | 124 | 125 | job1ID = jobIDRoot + '_1' 126 | job2ID = jobIDRoot + '_2' 127 | job3ID = jobIDRoot + '_3' 128 | 129 | 130 | 131 | cmd1 = "python /ark/home/cl512/pipeline/bamToGFF_turbo.py -r -e %s -c %s -s %s -b %s -i %s -o %s &" % (extension,tssBins,sense,bamFile,tssGFF,tssOutfile) 132 | cmd2 = "python /ark/home/cl512/pipeline/bamToGFF_turbo.py -r -e %s -m %s -s %s -b %s -i %s -o %s &" % (extension,txnBins,sense,bamFile,txnGFF,txnOutfile) 133 | cmd3 = "python /ark/home/cl512/pipeline/bamToGFF_turbo.py -r -e %s -c %s -s %s -b %s -i %s -o %s &" % (extension,ttrBins,sense,bamFile,ttrGFF,ttrOutfile) 134 | 135 | for cmd in [cmd1,cmd2,cmd3]: 136 | bashFile.write(cmd) 137 | bashFile.write('\n') 138 | 139 | #launch jobs in the antisense strand 140 | 141 | sense = '-' 142 | tssOutfile = '%s%s_%s_%s' % (outFolder,bamName,sense,tssGFF.split('/')[-1]) 143 | txnOutfile = '%s%s_%s_%s' % (outFolder,bamName,sense,txnGFF.split('/')[-1]) 144 | ttrOutfile = '%s%s_%s_%s' % (outFolder,bamName,sense,ttrGFF.split('/')[-1]) 145 | for outFile in [tssOutfile,txnOutfile,ttrOutfile]: 146 | metaSettings.append([outFile.split('/')[-1]]) 147 | 148 | job4ID = jobIDRoot + '_4' 149 | job5ID = jobIDRoot + '_5' 150 | job6ID = jobIDRoot + '_6' 151 | 152 | 153 | cmd4 = "python /ark/home/cl512/pipeline/bamToGFF_turbo.py -r -e %s -c %s -s %s -b %s -i %s -o %s &" % (extension,tssBins,sense,bamFile,tssGFF,tssOutfile) 154 | cmd5 = "python /ark/home/cl512/pipeline/bamToGFF_turbo.py -r -e %s -m %s -s %s -b %s -i %s -o %s &" % (extension,txnBins,sense,bamFile,txnGFF,txnOutfile) 155 | cmd6 = "python /ark/home/cl512/pipeline/bamToGFF_turbo.py -r -e %s -c %s -s %s -b %s -i %s -o %s &" % (extension,ttrBins,sense,bamFile,ttrGFF,ttrOutfile) 156 | 157 | 158 | 159 | for cmd in [cmd4,cmd5,cmd6]: 160 | bashFile.write(cmd) 161 | bashFile.write('\n') 162 | 163 | 164 | unParseTable(metaSettings,outFolder+'%s_metaSettings.txt' % (name),'\t') 165 | 166 | #now launch the finishing jobs. waits until all 6 jobs are done. 167 | 168 | 169 | if combine: 170 | finishCommand = "python /ark/home/cl512/pipeline/makeBamMeta.py -c -f -n %s -o %s" % (name,options.output) 171 | else: 172 | finishCommand = "python /ark/home/cl512/pipeline/makeBamMeta.py -f -n %s -o %s" % (name,options.output) 173 | bashFile.write(finishCommand) 174 | bashFile.close() 175 | print bashFileName 176 | os.system('bash %s &' % (bashFileName)) 177 | elif options.finish and options.name and options.output: 178 | #this is the finishing job 179 | #want to stitch two gffs together 180 | print('finishing and writing combined output') 181 | bamFile = options.bam 182 | gffList = options.gffList 183 | outFolder = options.output 184 | name = options.name 185 | combine = options.combine 186 | #make the sense output 187 | metaSettings = parseTable(outFolder+'%s_metaSettings.txt' % (name),'\t') 188 | 189 | [job1ID,job2ID,job3ID,job4ID,job5ID,job6ID]= [line[0] for line in metaSettings[1:]] 190 | 191 | if combine: 192 | 193 | tssOutSense = parseTable(outFolder + job1ID,'\t') 194 | txnOutSense = parseTable(outFolder + job2ID,'\t') 195 | ttrOutSense = parseTable(outFolder + job3ID,'\t') 196 | 197 | tssOutAnti = parseTable(outFolder + job4ID,'\t') 198 | txnOutAnti = parseTable(outFolder + job5ID,'\t') 199 | ttrOutAnti = parseTable(outFolder + job6ID,'\t') 200 | 201 | combinedOut = [] 202 | header = tssOutSense[0] + txnOutSense[0][2:] + ttrOutSense[0][2:] 203 | combinedOut.append(header) 204 | 205 | for i in range(1,len(tssOutSense),1): 206 | headerLine = tssOutSense[i][0:2] 207 | try: 208 | tssLine = [float(tssOutSense[i][j]) + float(tssOutAnti[i][j]) for j in range(2,len(tssOutSense[i]),1)] 209 | txnLine = [float(txnOutSense[i][j]) + float(txnOutAnti[i][j]) for j in range(2,len(txnOutSense[i]),1)] 210 | ttrLine = [float(ttrOutSense[i][j]) + float(ttrOutAnti[i][j]) for j in range(2,len(ttrOutSense[i]),1)] 211 | 212 | newLine = headerLine + tssLine +txnLine + ttrLine 213 | except ValueError: 214 | newLine = headerLine + ['NA']*320 215 | 216 | combinedOut.append(newLine) 217 | 218 | unParseTable(combinedOut,outFolder+'%s_meta.txt' % (name),'\t') 219 | 220 | else: 221 | 222 | 223 | tssOut = parseTable(outFolder + job1ID,'\t') 224 | txnOut = parseTable(outFolder + job2ID,'\t') 225 | ttrOut = parseTable(outFolder + job3ID,'\t') 226 | senseOut = [] 227 | for i in range(len(tssOut)): 228 | 229 | newLine = tssOut[i] + txnOut[i][2:] + ttrOut[i][2:] 230 | senseOut.append(newLine) 231 | 232 | unParseTable(senseOut,outFolder+'%s_+_meta.txt' % (name),'\t') 233 | 234 | 235 | 236 | #make the antisensesense output 237 | 238 | tssOut = parseTable(outFolder + job4ID,'\t') 239 | txnOut = parseTable(outFolder + job5ID,'\t') 240 | ttrOut = parseTable(outFolder + job6ID,'\t') 241 | antiSenseOut = [] 242 | for i in range(len(tssOut)): 243 | 244 | newLine = tssOut[i] + txnOut[i][2:] + ttrOut[i][2:] 245 | antiSenseOut.append(newLine) 246 | 247 | unParseTable(antiSenseOut,outFolder+'%s_-_meta.txt' % (name),'\t') 248 | 249 | else: 250 | parser.print_help() 251 | exit() 252 | 253 | if __name__ == '__main__': 254 | main() 255 | 256 | -------------------------------------------------------------------------------- /bamliquidator_internal/bamliquidator_bins.m.cpp: -------------------------------------------------------------------------------- 1 | #include "bamliquidator.h" 2 | #include "bamliquidator_util.h" 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | // this CountH5Record must match exactly the structure in HDF5 24 | // -- see bamliquidator_batch.py function create_count_table 25 | struct CountH5Record 26 | { 27 | uint32_t bin_number; 28 | char cell_type[16]; 29 | char chromosome[64]; 30 | uint64_t count; 31 | uint32_t bam_file_key; 32 | }; 33 | 34 | 35 | void write(hid_t& file, 36 | const std::vector& records) 37 | { 38 | const size_t record_size = sizeof(CountH5Record); 39 | 40 | size_t record_offset[] = { HOFFSET(CountH5Record, bin_number), 41 | HOFFSET(CountH5Record, cell_type), 42 | HOFFSET(CountH5Record, chromosome), 43 | HOFFSET(CountH5Record, count), 44 | HOFFSET(CountH5Record, bam_file_key) }; 45 | 46 | size_t field_sizes[] = { sizeof(CountH5Record::bin_number), 47 | sizeof(CountH5Record::cell_type), 48 | sizeof(CountH5Record::chromosome), 49 | sizeof(CountH5Record::count), 50 | sizeof(CountH5Record::bam_file_key) }; 51 | 52 | herr_t status = H5TBappend_records(file, "bin_counts", records.size(), record_size, 53 | record_offset, field_sizes, records.data()); 54 | if (status != 0) 55 | { 56 | std::stringstream ss; 57 | ss << "Failed to append records, status = " << status; 58 | throw std::runtime_error(ss.str()); 59 | } 60 | } 61 | 62 | class Liquidator 63 | { 64 | public: 65 | Liquidator(const std::string& bam_file_path): 66 | bam_file_path(bam_file_path), 67 | fp(nullptr), 68 | bamidx(nullptr) 69 | { 70 | init(); 71 | } 72 | 73 | Liquidator(const Liquidator& other): 74 | bam_file_path(other.bam_file_path), 75 | fp(nullptr), 76 | bamidx(nullptr) 77 | { 78 | init(); 79 | } 80 | 81 | Liquidator& operator=(const Liquidator& other) = delete; 82 | 83 | ~Liquidator() 84 | { 85 | bam_index_destroy(bamidx); 86 | samclose(fp); 87 | } 88 | 89 | double liquidate(const std::string& chromosome, int start, int stop, char strand, unsigned int extension) 90 | { 91 | std::vector counts = ::liquidate(fp, bamidx, chromosome, start, stop, strand, 1, extension); 92 | if (counts.size() != 1) 93 | { 94 | throw std::runtime_error("liquidate failed to provide exactly one count (count is " + 95 | boost::lexical_cast(counts.size()) + ")"); 96 | } 97 | return counts[0]; 98 | } 99 | 100 | private: 101 | std::string bam_file_path; 102 | samfile_t* fp; 103 | bam_index_t* bamidx; 104 | 105 | void init() 106 | { 107 | fp = samopen(bam_file_path.c_str(),"rb",0); 108 | if(fp == NULL) 109 | { 110 | throw std::runtime_error("samopen() error with " + bam_file_path); 111 | } 112 | 113 | bamidx = bam_index_load(bam_file_path.c_str()); 114 | if (bamidx == NULL) 115 | { 116 | throw std::runtime_error("bam_index_load() error with " + bam_file_path); 117 | } 118 | } 119 | }; 120 | 121 | // my testing doesn't show using ets keys significantly improving performance, 122 | // but it doesn't hurt and I guess might help with the right hardware 123 | typedef tbb::enumerable_thread_specific, 125 | tbb::ets_key_per_instance> 126 | Liquidators; 127 | 128 | 129 | void liquidate_bins(std::vector& counts, const std::string& bam_file_path, 130 | size_t region_begin, size_t region_end, const size_t bin_size, 131 | unsigned int extension, const char strand, 132 | Liquidators& liquidators) 133 | { 134 | Liquidator& liquidator = liquidators.local(); 135 | 136 | for (size_t i=region_begin; i < region_end; ++i) 137 | { 138 | try 139 | { 140 | const size_t start = counts[i].bin_number * bin_size; 141 | const size_t stop = start + bin_size; 142 | counts[i].count = liquidator.liquidate(counts[i].chromosome, 143 | start, 144 | stop, 145 | strand, 146 | extension); 147 | } catch(const std::exception& e) 148 | { 149 | Logger::warn() << "Skipping " << counts[i].chromosome 150 | << " bin " << i << " due to error: " << e.what(); 151 | } 152 | } 153 | } 154 | 155 | void batch_liquidate(std::vector& counts, 156 | const unsigned int bin_size, 157 | const unsigned int extension, 158 | const char strand, 159 | const std::string& bam_file_path) 160 | { 161 | Liquidators liquidators((Liquidator(bam_file_path))); 162 | 163 | tbb::parallel_for( 164 | tbb::blocked_range(0, counts.size(), 1), 165 | [&](const tbb::blocked_range& range) 166 | { 167 | liquidate_bins(counts, bam_file_path, range.begin(), range.end(), bin_size, extension, strand, liquidators); 168 | }, 169 | tbb::auto_partitioner()); 170 | } 171 | 172 | std::vector count_placeholders( 173 | const std::vector>& chromosome_lengths, 174 | const std::string& cell_type, 175 | const unsigned int bam_file_key, 176 | const unsigned int bin_size) 177 | { 178 | size_t num_records = 0; 179 | for (auto& chr_length : chromosome_lengths) 180 | { 181 | int bins = std::ceil(chr_length.second / (double) bin_size); 182 | num_records += bins; 183 | } 184 | 185 | CountH5Record empty_record; 186 | empty_record.bam_file_key = bam_file_key; 187 | empty_record.bin_number = 0; 188 | empty_record.count = 0; 189 | copy(empty_record.cell_type, cell_type, sizeof(CountH5Record::cell_type)); 190 | copy(empty_record.chromosome, "", sizeof(CountH5Record::chromosome)); 191 | 192 | std::vector records(num_records, empty_record); 193 | 194 | size_t i=0; 195 | for (auto& chr_length : chromosome_lengths) 196 | { 197 | int bins = std::ceil(chr_length.second / (double) bin_size); 198 | for (int j=0; j < bins; ++j, ++i) 199 | { 200 | records[i].bin_number = j; 201 | copy(records[i].chromosome, chr_length.first, sizeof(CountH5Record::chromosome)); 202 | } 203 | } 204 | 205 | return records; 206 | } 207 | 208 | int main(int argc, char* argv[]) 209 | { 210 | try 211 | { 212 | if (argc < 13 || argc % 2 != 1) 213 | { 214 | std::cerr << "usage: " << argv[0] 215 | << " number_of_threads cell_type bin_size extension strand bam_file bam_file_key hdf5_file log_file write_warnings_to_stderr chr1 length1 ... \n" 216 | << "\ne.g. " << argv[0] << " mm1s 100000 0 . /ifs/hg18/mm1s/04032013_D1L57ACXX_4.TTAGGC.hg18.bwt.sorted.bam " 217 | << "137 counts.hdf5 output/log.txt 1 chr1 247249719 chr2 242951149 chr3 199501827" 218 | << "\nnumber of threads <= 0 means use a number of threads equal to the number of logical cpus." 219 | << "\nnote that this application is intended to be run from bamliquidator_batch.py -- see" 220 | << "\nhttps://github.com/BradnerLab/pipeline/wiki for more information" 221 | << std::endl; 222 | return 1; 223 | } 224 | 225 | const int number_of_threads = boost::lexical_cast(argv[1]); 226 | const std::string cell_type = argv[2]; 227 | const unsigned int bin_size = boost::lexical_cast(argv[3]); 228 | const unsigned int extension = boost::lexical_cast(argv[4]); 229 | const char strand = boost::lexical_cast(argv[5]); 230 | const std::string bam_file_path = argv[6]; 231 | const unsigned int bam_file_key = boost::lexical_cast(argv[7]); 232 | const std::string hdf5_file_path = argv[8]; 233 | const std::string log_file_path = argv[9]; 234 | const bool write_warnings_to_stderr = boost::lexical_cast(argv[10]); 235 | const std::vector> chromosome_lengths = extract_chromosome_lengths(argc, argv, 11); 236 | 237 | tbb::task_scheduler_init init( number_of_threads <= 0 238 | ? tbb::task_scheduler_init::automatic 239 | : number_of_threads); 240 | 241 | Logger::configure(log_file_path, write_warnings_to_stderr); 242 | 243 | if (bin_size == 0) 244 | { 245 | Logger::error() << "Bin size cannot be zero"; 246 | return 2; 247 | } 248 | 249 | hid_t h5file = H5Fopen(hdf5_file_path.c_str(), H5F_ACC_RDWR, H5P_DEFAULT); 250 | if (h5file < 0) 251 | { 252 | Logger::error() << "Failed to open H5 file " << hdf5_file_path; 253 | return 3; 254 | } 255 | 256 | std::vector counts = count_placeholders(chromosome_lengths, cell_type, bam_file_key, bin_size); 257 | batch_liquidate(counts, bin_size, extension, strand, bam_file_path); 258 | write(h5file, counts); 259 | 260 | H5Fclose(h5file); 261 | 262 | return 0; 263 | } 264 | catch(const std::exception& e) 265 | { 266 | Logger::error() << "Unhandled exception: " << e.what(); 267 | 268 | return 4; 269 | } 270 | } 271 | 272 | /* The MIT License (MIT) 273 | 274 | Copyright (c) 2013 John DiMatteo (jdimatteo@gmail.com) 275 | 276 | Permission is hereby granted, free of charge, to any person obtaining a copy 277 | of this software and associated documentation files (the "Software"), to deal 278 | in the Software without restriction, including without limitation the rights 279 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 280 | copies of the Software, and to permit persons to whom the Software is 281 | furnished to do so, subject to the following conditions: 282 | 283 | The above copyright notice and this permission notice shall be included in 284 | all copies or substantial portions of the Software. 285 | 286 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 287 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 288 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 289 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 290 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 291 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 292 | THE SOFTWARE. 293 | */ 294 | --------------------------------------------------------------------------------