├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── R ├── BAMscale_peak_density_version2 │ └── PlotXY_version2.R ├── BAMscale_plot_peak_density │ ├── FPKM_normalized_coverages.tsv │ ├── peakDensity.command │ └── plotXY │ │ └── PlotXY.R ├── OKseq_switches.R ├── Plot_heatmap │ ├── Heatmap_with_clustering.R │ ├── Heatmapper.R │ ├── Heatmapper_functions.R │ └── heatmap_functions_clean.R └── Replication_timing_segmenter.R ├── README.md ├── bamscale-cov.cwl ├── bamscale-scale.cwl ├── bamscale.yml ├── doc ├── ATAC-seq_peak_quant_benchmark.png ├── ATAC-seq_peak_validation_CB450h.png ├── ATAC-seq_peak_validation_CB452h.png ├── ATAC-seq_peak_validation_CB454h.png ├── ATAC-seq_peak_validation_CEM0h.png ├── ATAC-seq_peak_validation_CEM2h.png ├── ATAC-seq_peak_validation_CEM4h.png └── images │ ├── ATAC-seq_coverage_benchmark.png │ ├── ATAC-seq_coverage_comparison.png │ ├── ATAC-seq_peak_quant_benchmark.png │ ├── ATAC-seq_peak_validation.png │ ├── BMAscale_RNAseq_coverage.png │ ├── Detailed_usage_ATAC_exampleTOP1.png │ ├── Detailed_usage_ENDseq.png │ ├── Detailed_usage_OKseq.png │ ├── Detailed_usage_RepTime_closer.png │ ├── Detailed_usage_RepTime_full.png │ ├── Detailed_usage_peakQuant_CB45_2h_vs_0h.png │ ├── Detailed_usage_peakQuant_CB45_4h_vs_0h.png │ ├── Detailed_usage_peakQuant_CEM_2h_vs_0h.png │ ├── Detailed_usage_peakQuant_CEM_4h_vs_0h.png │ ├── ENDseq_stranded.png │ ├── FPKM_formula.png │ ├── Libsize_formula.png │ ├── MAIN.png │ ├── MAIN_figure.png │ ├── OKseq_segmenter_figure.png │ ├── OKseq_switch_example1.png │ ├── OKseq_switch_example2.png │ ├── Read_count_matrix_example.png │ ├── Replication_timing_script_example.png │ ├── TPM_formula.png │ ├── XY_Empty_page.png │ ├── XY_drop_file.png │ ├── XY_dropdown.png │ ├── XY_example.png │ ├── XY_exec.png │ ├── XY_explanation.png │ └── XY_rstudio.png ├── includes ├── BAMcoverage.h ├── BAMstructs.h ├── BEDstruct.h ├── CHROMstruct.h ├── Definitions.h ├── Inputs.h ├── Writer.h ├── binning.h ├── main.h ├── multithreads.h ├── scale.h └── segmenter.h ├── nbproject ├── Makefile-Release.mk ├── Makefile-impl.mk ├── Makefile-variables.mk ├── Package-Release.bash ├── configurations.xml ├── private │ ├── Makefile-variables.mk │ ├── c_standard_headers_indexer.c │ ├── configurations.xml │ ├── cpp_standard_headers_indexer.cpp │ ├── launcher.properties │ └── private.xml └── project.xml └── src ├── BAMcoverage.c ├── BAMstructs.c ├── BEDstruct.c ├── CHROMstruct.c ├── Inputs.c ├── Writer.c ├── binning.c ├── main.c ├── multithreads.c ├── scale.c └── segmenter.c /.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /build 3 | /dist 4 | .dep.inc 5 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Base Image 2 | FROM continuumio/miniconda3 3 | 4 | # Metadata 5 | LABEL base.image="continuumio/miniconda3" 6 | LABEL version="1" 7 | LABEL software="BAMscale" 8 | LABEL software.version="0.0.7" 9 | LABEL description="BAMscale is a one-step tool for either 1) quantifying and normalizing the coverage of peaks or 2) generated scaled BigWig files for easy visualization of commonly used DNA-seq capture based methods." 10 | LABEL tags="BAM" 11 | LABEL website="https://github.com/ncbi/BAMscale" 12 | 13 | # Maintainer 14 | MAINTAINER Roberto Vera Alvarez 15 | 16 | USER root 17 | 18 | RUN apt-get update && \ 19 | apt-get install -y apt-utils wget bzip2 sudo gcc make && \ 20 | apt-get clean && \ 21 | apt-get purge && \ 22 | rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 23 | 24 | # Updating Anaconda packages 25 | RUN conda update conda 26 | RUN conda update --all 27 | RUN conda config --add channels defaults 28 | RUN conda config --add channels bioconda 29 | RUN conda config --add channels conda-forge 30 | RUN conda install htslib libbigwig 31 | 32 | # Add user ubuntu with no password, add to sudo group 33 | RUN adduser --disabled-password --gecos '' ubuntu 34 | RUN chmod a+rwx /home/ubuntu/ 35 | RUN mkdir /home/ubuntu/bin 36 | RUN chown -R ubuntu /home/ubuntu 37 | USER ubuntu 38 | 39 | ENV URL=https://github.com/ncbi/BAMscale 40 | ENV FOLDER=BAMscale 41 | ENV PATH="/home/ubuntu/bin:${PATH}" 42 | ENV CONDA_DIR="/opt/conda/" 43 | ENV CPPFLAGS="-I $CONDA_DIR/include" 44 | ENV LDFLAGS="-L $CONDA_DIR/lib -Wl,-rpath,$CONDA_DIR/lib" 45 | 46 | RUN cd /home/ubuntu/ && \ 47 | git clone $URL && \ 48 | cd $FOLDER && \ 49 | make && \ 50 | mv bin/BAMscale /home/ubuntu/bin/ && \ 51 | cd .. && \ 52 | rm -rf $FOLDER 53 | 54 | WORKDIR /data 55 | 56 | CMD ["/home/ubuntu/bin/BAMscale"] 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | # Public Domain notice 2 | 3 | National Center for Biotechnology Information. 4 | 5 | This software is a "United States Government Work" under the terms of the United States 6 | Copyright Act. It was written as part of the authors' official duties as United States 7 | Government employees and thus cannot be copyrighted. This software is freely available 8 | to the public for use. The National Library of Medicine and the U.S. Government have not 9 | placed any restriction on its use or reproduction. 10 | 11 | Although all reasonable efforts have been taken to ensure the accuracy and reliability 12 | of the software and data, the NLM and the U.S. Government do not and cannot warrant the 13 | performance or results that may be obtained by using this software or data. The NLM and 14 | the U.S. Government disclaim all warranties, express or implied, including warranties 15 | of performance, merchantability or fitness for any particular purpose. 16 | 17 | Please cite NCBI in any work or product based on this material. 18 | 19 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # There exist several targets which are by default empty and which can be 3 | # used for execution of your targets. These targets are usually executed 4 | # before and after some main targets. They are: 5 | # 6 | # .build-pre: called before 'build' target 7 | # .build-post: called after 'build' target 8 | # .clean-pre: called before 'clean' target 9 | # .clean-post: called after 'clean' target 10 | # .clobber-pre: called before 'clobber' target 11 | # .clobber-post: called after 'clobber' target 12 | # .all-pre: called before 'all' target 13 | # .all-post: called after 'all' target 14 | # .help-pre: called before 'help' target 15 | # .help-post: called after 'help' target 16 | # 17 | # Targets beginning with '.' are not intended to be called on their own. 18 | # 19 | # Main targets can be executed directly, and they are: 20 | # 21 | # build build a specific configuration 22 | # clean remove built files from a configuration 23 | # clobber remove all built files 24 | # all build all configurations 25 | # help print help mesage 26 | # 27 | # Targets .build-impl, .clean-impl, .clobber-impl, .all-impl, and 28 | # .help-impl are implemented in nbproject/makefile-impl.mk. 29 | # 30 | # Available make variables: 31 | # 32 | # CND_BASEDIR base directory for relative paths 33 | # CND_DISTDIR default top distribution directory (build artifacts) 34 | # CND_BUILDDIR default top build directory (object files, ...) 35 | # CONF name of current configuration 36 | # CND_PLATFORM_${CONF} platform name (current configuration) 37 | # CND_ARTIFACT_DIR_${CONF} directory of build artifact (current configuration) 38 | # CND_ARTIFACT_NAME_${CONF} name of build artifact (current configuration) 39 | # CND_ARTIFACT_PATH_${CONF} path to build artifact (current configuration) 40 | # CND_PACKAGE_DIR_${CONF} directory of package (current configuration) 41 | # CND_PACKAGE_NAME_${CONF} name of package (current configuration) 42 | # CND_PACKAGE_PATH_${CONF} path to package (current configuration) 43 | # 44 | # NOCDDL 45 | 46 | 47 | # Environment 48 | MKDIR=mkdir 49 | CP=cp 50 | CCADMIN=CCadmin 51 | 52 | 53 | # build 54 | build: .build-post 55 | 56 | .build-pre: 57 | # Add your pre 'build' code here... 58 | 59 | .build-post: .build-impl 60 | # Add your post 'build' code here... 61 | 62 | 63 | # clean 64 | clean: .clean-post 65 | 66 | .clean-pre: 67 | # Add your pre 'clean' code here... 68 | 69 | .clean-post: .clean-impl 70 | # Add your post 'clean' code here... 71 | 72 | 73 | # clobber 74 | clobber: .clobber-post 75 | 76 | .clobber-pre: 77 | # Add your pre 'clobber' code here... 78 | 79 | .clobber-post: .clobber-impl 80 | # Add your post 'clobber' code here... 81 | 82 | 83 | # all 84 | all: .all-post 85 | 86 | .all-pre: 87 | # Add your pre 'all' code here... 88 | 89 | .all-post: .all-impl 90 | # Add your post 'all' code here... 91 | 92 | 93 | # build tests 94 | build-tests: .build-tests-post 95 | 96 | .build-tests-pre: 97 | # Add your pre 'build-tests' code here... 98 | 99 | .build-tests-post: .build-tests-impl 100 | # Add your post 'build-tests' code here... 101 | 102 | 103 | # run tests 104 | test: .test-post 105 | 106 | .test-pre: build-tests 107 | # Add your pre 'test' code here... 108 | 109 | .test-post: .test-impl 110 | # Add your post 'test' code here... 111 | 112 | 113 | # help 114 | help: .help-post 115 | 116 | .help-pre: 117 | # Add your pre 'help' code here... 118 | 119 | .help-post: .help-impl 120 | # Add your post 'help' code here... 121 | 122 | 123 | 124 | # include project implementation makefile 125 | include nbproject/Makefile-impl.mk 126 | 127 | # include project make variables 128 | include nbproject/Makefile-variables.mk 129 | -------------------------------------------------------------------------------- /R/BAMscale_peak_density_version2/PlotXY_version2.R: -------------------------------------------------------------------------------- 1 | library(shiny) 2 | library(GenomicRanges) 3 | library(data.table) 4 | library(ggplot2) 5 | 6 | options(shiny.maxRequestSize=300*1024^2) 7 | 8 | 9 | subsetQuantwithBed = function(peaksfile, bedfile) { 10 | coords = read.table(bedfile)[,1:3] 11 | colnames(coords) = c("chr", "start", "end") 12 | coords = makeGRangesFromDataFrame(coords) 13 | 14 | peaks = read.table(peaksfile, 15 | header = T, 16 | sep = "\t") 17 | 18 | peakcoords = GRanges(peaks[,1]) 19 | 20 | overs = findOverlaps(query = peakcoords, subject = coords) 21 | peakoverlaps = unique(queryHits(overs)) 22 | 23 | return (peaks[peakoverlaps,]) 24 | 25 | } 26 | 27 | 28 | ui <- fluidPage( 29 | fluidRow( 30 | column( 31 | 6, 32 | align = "center", 33 | tags$img(src = "https://healthtech.upenn.edu/wp-content/uploads/2018/09/nci_-1200x600.jpg", align = "left", height = "100px", width = "200px") #NIH-NCI 34 | ) 35 | ), 36 | sidebarLayout( 37 | sidebarPanel( 38 | fileInput('datafile', 'Choose quantified peak file', 39 | accept = c('text/csv', 'text/comma-separated-values,text/plain', '.csv')), 40 | 41 | fileInput("bedfile", "Choose a BED file to subset peaks", 42 | multiple = FALSE, 43 | accept = c(".bed")), 44 | 45 | conditionalPanel( 46 | # use a server side condition 47 | # placeholders will be replaced from the server 48 | condition = "output.fileUploaded", 49 | selectInput("xAxis", "X-axis sample", ""), 50 | selectInput("yAxis", "Y-axis sample", ""), 51 | numericInput("slidelimit", "Axis limit", 1), 52 | numericInput("hexcount", "No. of hex bins", 200)) 53 | 54 | 55 | 56 | ), 57 | mainPanel( 58 | plotOutput("plotXY") 59 | ) 60 | ) 61 | ) 62 | 63 | server <- function(input, output, session){ 64 | # create reactive version of the dataset (a data.frame object) 65 | filedata <- reactive({ 66 | infile <- input$datafile 67 | if (is.null(infile)) 68 | # User has not uploaded a file yet. Use NULL to prevent observeEvent from triggering 69 | return(NULL) 70 | 71 | if(is.null(input$bedfile)) { 72 | temp <- read.table(input$datafile$datapath, 73 | header = T, 74 | sep = "\t", check.names = F) 75 | } else { 76 | temp = subsetQuantwithBed(input$datafile$datapath, input$bedfile$datapath) 77 | } 78 | }) 79 | 80 | output$fileUploaded <- reactive({ 81 | return(!is.null(filedata())) 82 | }) 83 | outputOptions(output, 'fileUploaded', suspendWhenHidden=FALSE) 84 | 85 | observeEvent(filedata(), { 86 | snames = names(filedata()) 87 | snames = snames[2:length(snames)] 88 | updateSelectInput(session, "xAxis", choices =snames, selected=snames[1]) 89 | updateSelectInput(session, "yAxis", choices =snames, selected = snames[2]) 90 | xv = input$yAxis 91 | yv = input$xAxi 92 | if (is.null(xv) || is.null(yv)){ 93 | xv = snames[1] 94 | yv = snames[2] 95 | } 96 | 97 | df = as.data.frame(t(rbind(filedata()[, xv], filedata()[, yv]))) 98 | axislim = round(min(quantile(df[,1], .99)[1], quantile(df[,2], .99)[1])) 99 | axismax = max(max(df[,1]), max(df[,2])) 100 | baxislim = max(quantile(df[,1], .999)[1], quantile(df[,2], .999)[1]) 101 | updateNumericInput(session, "slidelimit", value = axislim) 102 | }) 103 | 104 | output$plotXY <- renderPlot({ 105 | colpal = rev(rainbow(5)) 106 | colpal[1] = "#562188FF" 107 | colpal[3] = "#225A16FF" 108 | 109 | gp1 <- NULL 110 | gp2 = NULL 111 | 112 | dat = filedata() 113 | if (!is.null(dat)){ 114 | xv <- input$xAxis 115 | yv <- input$yAxis 116 | if (!is.null(xv) & !is.null(yv)){ 117 | df = as.data.frame(t(rbind(filedata()[, input$xAxis], filedata()[, input$yAxis]))) 118 | colnames(df) = c(input$xAxis, input$yAxis) 119 | axislim = min(quantile(df[,1], .99)[1], quantile(df[,2], .99)[1]) 120 | axismax = max(max(df[,1]), max(df[,2])) 121 | baxislim = max(quantile(df[,1], .999)[1], quantile(df[,2], .999)[1]) 122 | 123 | gp1 = ggplot(df, aes(x = df[,1], y = df[,2])) + 124 | geom_hex(bins = input$hexcount) + 125 | scale_fill_gradientn(colours = colpal, name = "log2\n(Density of Peaks)", trans = "log2") + 126 | scale_x_continuous(expand = c(0, 0), limits = c(0, input$slidelimit)) + 127 | scale_y_continuous(expand = c(0, 0), limits = c(0, input$slidelimit)) + 128 | xlab(input$xAxis) + 129 | ylab(input$yAxis) + 130 | coord_equal(ratio = 1) + 131 | geom_abline(intercept = 0, slope = 1, colour="black", size=1.00, linetype = "twodash") + 132 | theme_classic() + 133 | theme(text = element_text(size = 14)) 134 | } 135 | } 136 | 137 | return (gp1) 138 | }) 139 | } 140 | 141 | shinyApp(ui, server) 142 | -------------------------------------------------------------------------------- /R/BAMscale_plot_peak_density/peakDensity.command: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export SCPATH=$(dirname "$BASH_SOURCE") 4 | Rscript -e 'library(methods); shiny::runApp(paste0(Sys.getenv("SCPATH"), "/plotXY/PlotXY.R"), launch.browser = TRUE)' 5 | -------------------------------------------------------------------------------- /R/BAMscale_plot_peak_density/plotXY/PlotXY.R: -------------------------------------------------------------------------------- 1 | list.of.packages <- c("shiny", "ggplot2", "tidyr", "ggrepel", "gridExtra") 2 | new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] 3 | if(length(new.packages)) install.packages(new.packages) 4 | 5 | 6 | library(shiny) 7 | library(ggplot2) 8 | library(tidyr) 9 | library(ggrepel) 10 | library(gridExtra) 11 | 12 | options(shiny.maxRequestSize=150*1024^2) # Current Max File Size: 150MB (For larger files increase this accordingly) 13 | 14 | # Define UI for application that draws a histogram 15 | ui <- fluidPage( 16 | tags$style(type="text/css", 17 | ".shiny-output-error { visibility: hidden; }", 18 | ".shiny-output-error:before { visibility: hidden; }" 19 | ), 20 | fluidRow( 21 | column( 22 | 6, 23 | align = "center", 24 | tags$img(src = "https://healthtech.upenn.edu/wp-content/uploads/2018/09/nci_-1200x600.jpg", align = "left", height = "100px", width = "200px") #NIH-NCI 25 | ) 26 | ), 27 | fluidRow( 28 | column(12, 29 | align = "center", 30 | titlePanel( 31 | title = "Application for Plotting Peak-Density Figures", 32 | windowTitle = "Application for Plotting Peak Density Figures" 33 | ), 34 | fluidRow( 35 | column( 36 | 3, 37 | align = "center", 38 | fileInput( 39 | "file1", 40 | "Choose Normalized Coverage File", 41 | accept = c(".tsv")), 42 | tags$hr() 43 | ), 44 | column( 45 | 6, 46 | align = "center", 47 | uiOutput("slider") 48 | ) 49 | ), 50 | fluidRow( 51 | column( 52 | 6, 53 | align = "center", 54 | uiOutput("xAxis") 55 | ), 56 | column( 57 | 6, 58 | align = "center", 59 | uiOutput("yAxis") 60 | ) 61 | ), 62 | fluidRow( 63 | column( 64 | 12, 65 | align = "center", 66 | plotOutput("plotXY", width = "700px", height = "700px") 67 | ) 68 | ), 69 | fluidRow( 70 | column( 71 | 12, 72 | align = "right", 73 | "Developed by Jacob M Gross & Lorinc S Pongor" 74 | ) 75 | ) 76 | ) 77 | ) 78 | ) 79 | 80 | # Define server logic required to draw a histogram 81 | server <- function(input, output) { 82 | 83 | raw_data = eventReactive(input$file1, { 84 | a = read.table(input$file1$datapath, stringsAsFactors = F, check.names = F, sep = "\t", header = T, fill = T) 85 | a = separate(a, 1, c("chr", "position"), sep = ":") 86 | a = separate(a, 2, c("Start", "Stop"), sep = "-") 87 | }) 88 | 89 | output$slider = renderUI({ 90 | dat = as.data.frame(t(rbind(raw_data()[, input$xaxis], raw_data()[, input$yaxis]))) 91 | sliderInput( 92 | "slider1", 93 | label = h3("Axis Limit Slider"), 94 | min = 0, 95 | max = round(max(dat)+1), 96 | value = max(quantile(dat[,1], .99), quantile(dat[,2], .99)) 97 | ) 98 | }) 99 | 100 | output$xAxis = renderUI({ 101 | dat1 = as.data.frame(raw_data()[,4:ncol(raw_data())]) 102 | items = names(dat1) 103 | selectInput("xaxis", "X-axis:", items, selected = items[1]) 104 | }) 105 | 106 | output$yAxis = renderUI({ 107 | dat1 = as.data.frame(raw_data()[,4:ncol(raw_data())]) 108 | items = names(dat1) 109 | #names(items) = items 110 | selectInput("yaxis", "Y-axis:", items, selected = items[2]) 111 | }) 112 | 113 | output$plotXY <- renderPlot({ 114 | dat = as.data.frame(t(rbind(raw_data()[, input$xaxis], raw_data()[, input$yaxis]))) 115 | colnames(dat) = c(paste0("1: ", input$xaxis), paste0("2: ", input$yaxis)) 116 | 117 | colpal = rev(rainbow(5)) 118 | colpal[1] = "#562188FF" 119 | colpal[3] = "#225A16FF" 120 | 121 | p1 = ggplot(dat, aes(x = dat[,1], y = dat[,2])) + 122 | labs(x = paste0(colnames(dat)[1], " (Reads Per Peak)") , y = paste0(colnames(dat)[2], " (Reads Per Peak)")) + 123 | ggtitle(label = paste0(colnames(dat)[1]," vs ",colnames(dat)[2], " LibSize Filtered")) + 124 | geom_hex(bins = 200) + 125 | scale_fill_gradientn(colours = colpal, name = "log2\n(Density of Peaks)", trans = "log2") + 126 | scale_x_continuous(expand = c(0, 0), limits = c(0, input$slider1)) + 127 | scale_y_continuous(expand = c(0, 0), limits = c(0, input$slider1)) + 128 | geom_abline(intercept = 0, slope = c(0.15579, 0.3193, 0.7208, 1.38742, 3.1315, 6.4188), colour="#8B8B8B", size = 0.5) + 129 | geom_abline(intercept = 0, slope = c( 0.5, 2), colour="#7E7D7D", size = 0.85)+ 130 | geom_abline(intercept = 0, slope = 1, colour="black", size=0.8, linetype = "twodash") + 131 | scale_colour_gradient(low = "red", high = "blue") + 132 | coord_equal(ratio = 1) + 133 | theme(plot.title = element_text(size = 10), legend.title = element_text(size=8), axis.title = element_text(size=8.5), 134 | panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_rect(fill = "#F3F3F3", colour = "black"), 135 | axis.text = element_text(size = 16), axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20)) 136 | 137 | grid.arrange(p1) 138 | }) 139 | 140 | } 141 | 142 | # Run the application 143 | shinyApp(ui = ui, server = server) 144 | 145 | -------------------------------------------------------------------------------- /R/OKseq_switches.R: -------------------------------------------------------------------------------- 1 | #args = commandArgs(trailingOnly=TRUE) 2 | 3 | if (!require(argparse)) { 4 | PrintErrorMessage() 5 | stop("Please install package names \"rtracklayer\"") 6 | } 7 | 8 | 9 | 10 | library(argparse) 11 | parser <- ArgumentParser() 12 | parser$add_argument("-i", "--input", type="character", default="test.bw", 13 | help="Input OK-seq bigWig file.") 14 | 15 | parser$add_argument("-b", "--binsize", type="integer", default=50000, 16 | help="Bin size [default %(default)s]") 17 | 18 | parser$add_argument("-l", "--leftmin", type="double", default=-0.15, 19 | help="Mean value of bin on left side [default %(default)s]") 20 | 21 | parser$add_argument("-r", "--rightmin", type="double", default=0.15, 22 | help="Mean value of bin on right side [default %(default)s]") 23 | 24 | parser$add_argument("-d", "--diff", type="double", default=0.4, 25 | help="Minimum difference between left and right side means [default %(default)s]") 26 | 27 | parser$add_argument("-s", "--slidesize", type="double", default=50000, 28 | help="Minimum difference between left and right side means [default %(default)s]") 29 | 30 | 31 | args <- parser$parse_args() 32 | 33 | min_left_value = args$leftmin 34 | min_right_value = args$rightmin 35 | min_diff = args$diff 36 | windowsize = args$binsize 37 | infile = args$input 38 | sliding_window = args$slidesize 39 | 40 | 41 | if(length(args) < 1){ 42 | parser$print_help() 43 | stop("Not enough arguments. Replication timing bigwig file has to be specified (created with BAMscale).") 44 | } 45 | 46 | if(!file.exists(infile)) { 47 | parser$print_help() 48 | stop(paste0("Input file specified does not exist: \"", infile, "\"")) 49 | } 50 | 51 | if (!require(rtracklayer)) { 52 | parser$print_help() 53 | stop("Please install package names \"rtracklayer\"") 54 | } 55 | 56 | library(rtracklayer) 57 | 58 | GetMergedElements = function(fbw, typeseg) { 59 | fbw = fbw[fbw$segments == typeseg,] 60 | fbw = reduce(fbw) 61 | fbw$length = width(fbw) 62 | fbw = fbw[fbw$length > min_segment_length] 63 | fbw$segments = typeseg 64 | return(fbw) 65 | } 66 | 67 | outfile = gsub(pattern ="\\.bw|\\.bigWig|\\.bigwig", replacement = "", x = infile) 68 | outfile = paste0(outfile, ".OKseq_switches.bed") 69 | 70 | include_zeroes_segment_thresholds = 0 71 | 72 | bw = import(infile) 73 | 74 | dat_bw = as.data.frame(bw) 75 | 76 | chrnames = as.character(unique(dat_bw$seqnames)) 77 | chrnames = chrnames[grepl(pattern = "_", chrnames) == F] 78 | 79 | df <- data.frame(chr=character(), 80 | start=numeric(), 81 | end=numeric(), 82 | stringsAsFactors=FALSE) 83 | 84 | binsize = dat_bw[1, "end"] - dat_bw[1, 'start'] + 1 85 | windowsize = windowsize / binsize 86 | rsize = args$binsize 87 | 88 | 89 | for(j in 1:length(chrnames)) { 90 | print(chrnames[j]) 91 | dat = dat_bw[dat_bw$seqnames == chrnames[j],] 92 | endcoord = dat[nrow(dat), "end"] 93 | 94 | nbins = (endcoord - rsize) / sliding_window 95 | lastpos = -1 96 | reslist = list() 97 | 98 | for(i in 1:nbins) { 99 | startcoord = as.integer(((i-1)*sliding_window)) - 1 100 | endcoord = as.integer(startcoord + rsize*3) + 2 101 | 102 | tdat = dat[dat$start >= (startcoord-1) & dat$end <= (endcoord + 2),] 103 | tscore = rep(tdat$score, tdat$width) 104 | 105 | if(length(tscore) >= rsize*2) { 106 | lscore = mean(tscore[0:rsize]) 107 | rscore = mean(tscore[(length(tscore)-rsize):length(tscore)]) 108 | 109 | if(rscore > lscore) { 110 | if(rscore - lscore >= min_diff & lscore <= min_left_value & rscore >= min_right_value) { 111 | reslist = append(reslist, list(chrnames[j], startcoord, endcoord)) 112 | lastpos = endcoord 113 | } 114 | } 115 | } 116 | } 117 | 118 | if(length(reslist) > 0) { 119 | leftpos = data.frame(matrix(unlist(reslist), nrow=length(reslist) / 3, byrow=T),stringsAsFactors=FALSE) 120 | colnames(leftpos) = c("chr", "start", "end") 121 | leftpos$start = as.numeric(leftpos$start) 122 | leftpos$end = as.numeric(leftpos$end) 123 | 124 | if(nrow(df) == 0) { 125 | df = leftpos 126 | } else { 127 | df = merge(df, leftpos, all = T, no.dup = F) 128 | } 129 | } 130 | } 131 | 132 | leftpos = df 133 | leftpos$tval = "." 134 | leftpos$nval = "." 135 | leftpos$color = "77,0,0" 136 | 137 | rightpos = leftpos 138 | rightpos$start = rightpos$start + 2*rsize 139 | rightpos$end = rightpos$start + rsize 140 | rightpos$color = "19,108,0" 141 | leftpos$end = leftpos$start + rsize 142 | merged = rbind(leftpos, rightpos) 143 | 144 | interpos = leftpos 145 | interpos$start = interpos$start + rsize 146 | interpos$end = interpos$start + rsize 147 | interpos$color = "240,230,140" 148 | merged = rbind(merged, interpos) 149 | 150 | merged = merged[order(merged$chr, merged$start),] 151 | merged = merged[,c("chr", "start", "end", "tval", "tval", "nval", "start", "end", "color")] 152 | write.table(merged, outfile, quote = F, sep = "\t", col.names = F, row.names = F) 153 | 154 | -------------------------------------------------------------------------------- /R/Plot_heatmap/Heatmap_with_clustering.R: -------------------------------------------------------------------------------- 1 | library(rtracklayer) 2 | library(ggplot2) 3 | library(ComplexHeatmap) 4 | require(reshape2) 5 | library(data.table) 6 | library(circlize) 7 | library(RColorBrewer) 8 | source("heatmap_functions_clean.R") 9 | 10 | # If you use this script, please cite BAMscale, rtracklayer and ComplexHeatmap!!! 11 | # Disclaimer/FYI: this is still under development! 12 | 13 | #Author: Lorinc Pongor (pongorlorinc@gmail.com) 14 | 15 | ## INPUTS ### 16 | 17 | plot_pdf_name = "Heatmap_plot.pdf" 18 | 19 | # downsample peaks to these many coordinates (separately each peak) 20 | subset_peaks = 0 # set to 0 to stop 21 | extend = 5000 #extend peaks by these many bases 22 | nbin = 100 #number of bins in heatmap, the default 100 is pretty good 23 | 24 | # Number of clusters in k-means clustering. 25 | # Set to 0 to turn it off 26 | k_means_clusters = 3 27 | 28 | # Numeric values to set for clustering, or ordering of peaks based on intensity 29 | # Please give numeric values. Eg: 30 | # samples_used_clustering_arranging = c(5,6) for samples 5 and 6 31 | samples_used_clustering_arranging = NULL # set to NULL to use all samples 32 | 33 | 34 | #Z-score data for k-means clustering or sorting peaks? 0: no, 1: yes 35 | z_score_data_clustering_arranging = 1 36 | 37 | # Scale samples separately? 38 | # Yes: plot_scale_samples_separate = 1 39 | # No: plot_scale_samples_separate = 0 40 | # It is encouraged to set to 1 (No) when comparing bigwigs from different sources. 41 | # If the data is from one experiment (eg. same antibody, same time), and bigwig is normalized, this can be set to 0 42 | plot_scale_samples_separate = 1 43 | 44 | # one or multiple bigwigs 45 | bwfile = c("data/Sample_MCF7_RecQ1_2_020_C_HCVW3BGX2.dd.bam.scaled.bw", 46 | "data/Sample_MCF7_ER_2_020_C_HCVW3BGX2.dd.bam.scaled.bw", 47 | "data/FOXA1_ENCFF255FPM.bigWig", 48 | "data/GATA3_ENCFF477GZL.bigWig", 49 | "data/H3K27ac_ENCFF411FCW.bigWig", 50 | "data/H3K4me1_ENCFF983TTS.bigWig", 51 | "data/H3K4Me3_ENCFF862CKA.bigWig", 52 | "data/H3K9Me3_ENCFF688REP.bigWig") 53 | 54 | #one or multiple peaks 55 | peaks = c("data/MCF7_RecQ1_2_vs_MCF7_IgG_2_MACS2_brdpks.bed") 56 | 57 | # Name of bigwigs to be used for the subtitle of each heatmap (column) 58 | # Same number of names have to be specified as the number of bigwig files 59 | # Set to bwnames = NULL to use file name 60 | bwnames = c("RECQ1", "ERa", "FOXA1", "GATA3", "H3K27ac", "H3K4me1", "H3K4Me3", "H3K9Me3") 61 | 62 | # Name of peaks to be used for the rowname of each heatmap 63 | # Same number of names have to be specified as the number of peak files 64 | # Set to pnames = NULL to use file name 65 | pnames = NULL 66 | 67 | ## END OF INPUTS ### 68 | 69 | 70 | z_score_data = 0 # please don't use this for now 71 | 72 | peakds = list() 73 | peakds$peaks = peaks 74 | peakds$bwfile = bwfile 75 | peakds$bwnames = bwnames 76 | peakds$peaknames = pnames 77 | peakds$subset_peaks = subset_peaks 78 | peakds$extend = extend 79 | peakds$nbin = nbin 80 | peakds$k_means_clusters = k_means_clusters 81 | peakds$samples_used_clustering_arranging = samples_used_clustering_arranging 82 | peakds$plot_scale_samples_separate = plot_scale_samples_separate 83 | peakds$z_score_data_clustering_arranging = z_score_data_clustering_arranging 84 | peakds$bed = GRanges() 85 | peakds$peak_quants = data.frame() 86 | peakds$raw_binmats = list() 87 | peakds$binmats = list() 88 | peakds$order = NULL 89 | peakds$clusters = NULL 90 | peakds$errors = 0 91 | peakds$plotlist = list() 92 | peakds$z_score_data = z_score_data 93 | peakds$heatmap_lineplot_means = list() 94 | peakds$max_ann = list() 95 | peakds$min_ann = list() 96 | peakds$heatmap_max = list() 97 | peakds$heatmap_min = list() 98 | source("heatmap_functions_clean.R") 99 | start.time <- Sys.time() 100 | peakds = ImportHeatmapData(peakds) 101 | 102 | end.time <- Sys.time() 103 | time.taken <- end.time - start.time 104 | time.taken 105 | 106 | if(peakds$errors > 0) { 107 | stop("Check inputs!!!") 108 | } 109 | 110 | peakds = PrepareDataForPlotting(peakds) 111 | peakds = PlotHeatmaps(peakds) 112 | draw(peakds$combined_plot, ht_gap = unit(1, "cm")) 113 | 114 | pdf(plot_pdf_name, 115 | width = (length(peakds$bwfile) * 4 + (length(peakds$bwfile)-1)) * 0.393701, 116 | height = 18 * 0.393701) 117 | draw(peakds$combined_plot, ht_gap = unit(1, "cm")) 118 | dev.off() 119 | -------------------------------------------------------------------------------- /R/Plot_heatmap/Heatmapper.R: -------------------------------------------------------------------------------- 1 | library(GenomicRanges) 2 | library(dynamicTreeCut) 3 | library(ComplexHeatmap) 4 | library(circlize) 5 | library(RColorBrewer) 6 | library(ChIPseeker) 7 | library(TxDb.Hsapiens.UCSC.hg19.knownGene) 8 | library(ggplot2) 9 | library(data.table) 10 | 11 | source("Heatmapper_functions.R") 12 | 13 | 14 | bwfile = c("../../SCLC_NAPY_ChIP/NA_chip/bigwigs/GSM1700639_H889_ASCL1.scaled.bw", 15 | "../../SCLC_NAPY_ChIP/NA_chip/bigwigs/GSM1700641_H82_NEUROD1.scaled.bw", 16 | "/Volumes/LMP/ngs/chip/SCLC_cell_lines/bigwigs_hg19_clean/NCI-H1048_POU2F3_rep1.hg19_clean.bam.scaled.bw", 17 | "/Volumes/LMP/ngs/chip/SCLC_cell_lines/bigwigs_hg19_clean/NCI-H889_H3K27ac_rep1.hg19_clean.bam.scaled.bw", 18 | "/Volumes/LMP/ngs/chip/SCLC_cell_lines/bigwigs_hg19_clean/NCI-H82_H3K27ac_rep1.hg19_clean.bam.scaled.bw", 19 | "/Volumes/LMP/ngs/chip/SCLC_cell_lines/bigwigs_hg19_clean/NCI-H1048_H3K27ac_rep1.hg19_clean.bam.scaled.bw", 20 | "/Volumes/LMP/ngs/chip/SCLC_cell_lines/bigwigs_hg19_clean/DMS114_H3K27ac_rep1.hg19_clean.bam.scaled.bw") 21 | 22 | peaks = c("Intervene_results/sets/100_ASCL1.bed", 23 | "Intervene_results/sets/010_NEUROD1.bed", 24 | "Intervene_results/sets/001_POU2F3.bed") 25 | 26 | bwnames = c("H889_ASCL1", "H82_NEUROD1", "H1048_POU2F3", "H889", "H82", "H1048", "DMS114") 27 | pnames = c("A", "N", "P") 28 | 29 | heatobj = ImportHeatMapperData(bedfiles = peaks, 30 | bednames = pnames, 31 | bwfiles = bwfile, 32 | bwnames = bwnames, 33 | extend_peaks = c(2500), 34 | subset_peaks = 1500, 35 | individual_subsetting = 1) 36 | 37 | heatobj = PrepBEDheatmapdata(obj = heatobj, cluster = "no") 38 | heatobj = PrepHeatmapPlots(obj = heatobj, 39 | color_palette = c("Reds", "Greens", "Blues", "Purples", "Purples", "Purples", "Purples"), 40 | #lineplot_max_value = c(20), lineplot_min_value = c(10), 41 | split_colors = c("red", "green", "blue"), 42 | raster_quality = 1, same_scale = 0) 43 | draw(heatobj@combined_heatmaps, ht_gap = unit(1, "cm")) 44 | 45 | -------------------------------------------------------------------------------- /R/Plot_heatmap/heatmap_functions_clean.R: -------------------------------------------------------------------------------- 1 | library(data.table) 2 | library(rtracklayer) 3 | 4 | ScoreGrangesBWmean = function(coords, bw) { 5 | overs = as.data.frame(findOverlaps(coords, bw)) 6 | overs$score = bw[overs$subjectHits]$score 7 | overs.dt = data.table(overs) 8 | oversum = overs.dt[,list(score = mean(score)), by='queryHits'] 9 | meanvals = rep(NA, length(coords)) 10 | meanvals[oversum$queryHits] = oversum$score 11 | return (meanvals) 12 | } 13 | 14 | Quantify_peaks = function(bed, bwfile) { 15 | print("Quantifying peaks:") 16 | peak_quants = as.data.frame(matrix(ncol = length(bwfile) + 1, nrow = length(bed))) 17 | colnames(peak_quants) = c(as.character(seq(1,length(bwfile))), "mean") 18 | rownames(peak_quants) = bed$pname 19 | 20 | for(i in 1:length(bwfile)) { 21 | cat("\tQuantifying peaks for:", bwfile[i],"\n") 22 | bw = import(bwfile[i], which = bed) 23 | peak_quants[,i] = ScoreGrangesBWmean(bed, bw) 24 | } 25 | 26 | if(length(bwfile) > 1) { 27 | peak_quants$mean = rowMeans(peak_quants[,1:length(bwfile)]) 28 | } else { 29 | peak_quants$mean = as.numeric(peak_quants[,1]) 30 | } 31 | 32 | peak_quants$id = bed$id 33 | cat("\t", "Done importing peaks","\n") 34 | return(peak_quants) 35 | } 36 | 37 | CheckBWfiles = function(peakds) { 38 | file_not_exist = 0 39 | if(is.null(length(peakds$bwfile)) | length(peakds$bwfile) < 1) { 40 | print(paste0("ERROR: no BW files were specified")) 41 | } 42 | 43 | for(i in 1:length(peakds$bwfile)) { 44 | if(!file.exists(peakds$bwfile[i])) { 45 | print(paste0("ERROR: file \"", peakds$bwfile[i], "\" does not exist")) 46 | file_not_exist = 1 47 | } 48 | } 49 | 50 | if(file_not_exist == 1) { 51 | peakds$errors = 1 52 | return (peakds) 53 | } 54 | 55 | if(is.null(peakds$bwnames)) { 56 | peakds$bwnames = basename(peakds$bwfile) 57 | } else { 58 | if(length(peakds$bwnames) != length(peakds$bwfile)) { 59 | print(paste0("ERROR: number of specified BW names (",length(peakds$bwnames),") is not equal to number of BW files(",length(peakds$bwfile),")")) 60 | peakds$errors = 1 61 | } 62 | } 63 | 64 | if(file_not_exist == 1) { 65 | peakds$errors = 1 66 | return (peakds) 67 | } 68 | 69 | return (peakds) 70 | } 71 | 72 | 73 | 74 | CheckBEDfiles = function(peakds) { 75 | print("Importing BED coordinates") 76 | file_not_exist = 0 77 | if(is.null(length(peakds$peaks)) | length(peakds$peaks) < 1) { 78 | print(paste0("ERROR: no peak files were specified")) 79 | } 80 | 81 | for(i in 1:length(peakds$peaks)) { 82 | if(!file.exists(peakds$peaks[i])) { 83 | print(paste0("ERROR: file \"", peakds$peaks[i], "\" does not exist")) 84 | file_not_exist = 1 85 | } 86 | } 87 | 88 | if(file_not_exist == 1) { 89 | peakds$errors = 1 90 | return (peakds) 91 | } 92 | 93 | if(is.null(peakds$peaknames)) { 94 | peakds$peaknames = basename(peakds$peaks) 95 | } else { 96 | if(length(peakds$peaknames) != length(peakds$peaks)) { 97 | print(paste0("ERROR: number of specified peak names (",length(peakds$peaknames),") is not equal to number of peak files(",length(peakds$peaks),")")) 98 | peakds$errors = 1 99 | } 100 | } 101 | 102 | if(file_not_exist == 1) { 103 | peakds$errors = 1 104 | return (peakds) 105 | } 106 | 107 | for(i in 1:length(peakds$peaks)) { 108 | cat("\tImporting BED:", peakds$peaks[i],"\n") 109 | tbed = read.table(peakds$peaks[i], sep = "\t")[,1:3] 110 | colnames(tbed) = c("chr", "start", "end") 111 | tbed$id = i 112 | tbed = makeGRangesFromDataFrame(tbed, keep.extra.columns = T) 113 | tbed = tbed + extend 114 | 115 | if(peakds$subset_peaks > 0 & length(tbed) > peakds$subset_peaks) { 116 | tbed = tbed[sample(seq(1,length(tbed)), peakds$subset_peaks)] 117 | } 118 | 119 | if(i == 1) { 120 | bed = tbed 121 | } else { 122 | bed = c(bed, tbed) 123 | } 124 | } 125 | 126 | bed$pname = seq(1, length(bed)) 127 | names(bed) = bed$pname 128 | peakds$bed = bed 129 | cat("\t", "Done reading BED files","\n") 130 | 131 | return (peakds) 132 | } 133 | 134 | BinCoordinates = function(bw, coord, nbins) { 135 | coord$length = width(coord) 136 | overs = as.data.frame(findOverlaps(coord, bw)) 137 | overs$width = coord[overs$queryHits]$length 138 | 139 | dfcoord = as.data.frame(coord) 140 | 141 | dfcoord = dfcoord[overs$queryHits,] 142 | overs$strand = as.character(dfcoord$strand) 143 | overs$tss = ifelse(overs$strand == "+", dfcoord$end, dfcoord$start) 144 | overs$probe_pos = start(bw[overs$subjectHits]) 145 | overs$dist = ifelse(overs$strand == "+", overs$tss - overs$probe_pos, overs$probe_pos - overs$tss) 146 | overs$bin = round(overs$dist / (overs$width / (nbins-1))) 147 | overs$bin = overs$bin + 1 148 | overs$score = bw[overs$subjectHits]$score 149 | overs.dt = data.table(overs) 150 | oversum = overs.dt[,list(score = mean(score)), by=c('queryHits', 'bin')] 151 | 152 | return (oversum) 153 | } 154 | 155 | Subset_Binned_to_common_peaks = function(raw_binmats) { 156 | qhits = list() 157 | 158 | for(i in 1:length(raw_binmats)) { 159 | if(i == 1) { 160 | qhits = rownames(raw_binmats[[i]]) 161 | } else { 162 | qhits = intersect(qhits, rownames(raw_binmats[[i]])) 163 | } 164 | } 165 | 166 | for(i in 1:length(raw_binmats)) { 167 | raw_binmats[[i]] = raw_binmats[[i]][qhits,] 168 | } 169 | 170 | return (raw_binmats) 171 | } 172 | 173 | 174 | Bin_peaks = function(bed, bwfile, nbin) { 175 | print("Importing and binning peaks:") 176 | raw_binmats = list() 177 | 178 | for(i in 1:length(bwfile)) { 179 | cat("\tImporting peaks for:", bwfile[i],"\n") 180 | bw = import(bwfile[i], which = bed) 181 | df = BinCoordinates(bw, bed, nbin) 182 | raw_binmats[[i]] = as.data.frame(acast(df,queryHits~bin, value.var = "score")) 183 | raw_binmats[[i]] = raw_binmats[[i]][,as.character(seq(1:nbin))] 184 | } 185 | 186 | raw_binmats = Subset_Binned_to_common_peaks(raw_binmats) 187 | cat("\t", "Done quantifying peaks","\n") 188 | return (raw_binmats) 189 | } 190 | 191 | ImportHeatmapData = function(peakds) { 192 | peakds = CheckBEDfiles(peakds) 193 | 194 | if(peakds$errors > 0) { 195 | return (peakds) 196 | } 197 | 198 | peakds = CheckBWfiles(peakds) 199 | 200 | if(peakds$errors > 0) { 201 | return (peakds) 202 | } 203 | 204 | peakds$peak_quants = Quantify_peaks(peakds$bed, peakds$bwfile) 205 | peakds$raw_binmats = Bin_peaks(bed = peakds$bed, bwfile = peakds$bwfile, nbin = peakds$nbin) 206 | peakds$bed = peakds$bed[rownames(peakds$raw_binmats[[1]])] 207 | 208 | return (peakds) 209 | } 210 | 211 | Scale_binned_matrices = function(raw_binmats, z_score_data) { 212 | binmats = list() 213 | 214 | for(i in 1:length(raw_binmats)) { 215 | if(z_score_data == 1) { 216 | binmats[[i]] = as.data.frame(t(scale(t(raw_binmats[[i]])))) 217 | } else if (z_score_data == -1) { 218 | binmats[[i]] = as.data.frame(scale(raw_binmats[[i]])) 219 | } else { 220 | binmats[[i]] = raw_binmats[[i]] 221 | } 222 | } 223 | 224 | return (binmats) 225 | } 226 | 227 | Arrange_peaks_for_plotting = function(peak_quants, bwfile, k_means_clusters, z_score_data_clustering_arranging, samples_used_clustering_arranging) { 228 | mean_table = data.frame(mean = peak_quants$mean, id = peak_quants$id) 229 | tmp_peak_quants = peak_quants # used for ordering temporarily 230 | 231 | if(z_score_data_clustering_arranging == 1) { 232 | for(i in 1:length(bwfile)) { 233 | tmp_peak_quants[,i] = as.numeric(scale(tmp_peak_quants[,i])) 234 | } 235 | } 236 | 237 | if(is.null(samples_used_clustering_arranging)) { 238 | if(length(bwfile) == 1) { 239 | tmp_peak_quants$mean = as.numeric(tmp_peak_quants[,1]) 240 | } else { 241 | tmp_peak_quants$mean = rowMeans(tmp_peak_quants[,1:length(bwfile)], na.rm = T) 242 | } 243 | } else { 244 | if(length(samples_used_clustering_arranging) == 1) { 245 | tmp_peak_quants$mean = as.numeric(tmp_peak_quants[,samples_used_clustering_arranging]) 246 | } else { 247 | tmp_peak_quants$mean = rowMeans(tmp_peak_quants[,samples_used_clustering_arranging], na.rm = T) 248 | } 249 | } 250 | 251 | if(k_means_clusters > 0) { 252 | clusters = NULL 253 | 254 | if(length(bwfile) == 1) { 255 | clusters = kmeans(tmp_peak_quants[,1], centers = k_means_clusters) 256 | } else if (!is.null(samples_used_clustering_arranging)){ 257 | clusters = kmeans(tmp_peak_quants[,samples_used_clustering_arranging], centers = k_means_clusters) 258 | } else { 259 | clusters = kmeans(tmp_peak_quants[,1:length(bwfile)], centers = k_means_clusters) 260 | } 261 | 262 | tmp_peak_quants$split = clusters$cluster 263 | clusterids = unique(as.numeric(clusters$cluster)) 264 | clustermeans = rep(0, length(clusterids)) 265 | names(clustermeans) = clusterids 266 | 267 | for(i in 1:length(clustermeans)) { 268 | clustermeans[i]=mean(tmp_peak_quants[tmp_peak_quants$split == clusterids[i], "mean"]) 269 | } 270 | 271 | clustermeans = sort(clustermeans, decreasing = T) 272 | peak_quants_reordered = data.frame() 273 | for(i in 1:length(clustermeans)) { 274 | tmp = tmp_peak_quants[tmp_peak_quants$split == names(clustermeans[i]),] 275 | tmp = tmp[order(tmp$mean, decreasing = T),] 276 | tmp$split = i 277 | peak_quants_reordered = rbind(peak_quants_reordered, tmp) 278 | } 279 | tmp_peak_quants = peak_quants_reordered 280 | peak_quants = peak_quants[rownames(tmp_peak_quants),] 281 | peak_quants$split = tmp_peak_quants$split 282 | 283 | } else { 284 | peak_quants$split = peak_quants$id 285 | tmp_peak_quants = tmp_peak_quants[order(tmp_peak_quants$split, tmp_peak_quants$mean, decreasing = T),] 286 | peak_quants = peak_quants[rownames(tmp_peak_quants),] 287 | } 288 | 289 | return (peak_quants) 290 | } 291 | 292 | Calc_mean_values_for_plots = function(bwfile, binmats, peak_quants) { 293 | cmeans = list() 294 | heatmap_min = c(rep(0, length(bwfile))) 295 | for(i in 1:length(bwfile)) { 296 | cmeans[[i]] = as.data.frame(matrix(ncol = length(unique(peak_quants$split)), nrow = nbin)) 297 | colnames(cmeans[[i]]) = unique(peak_quants$split) 298 | for(j in 1:length(unique(peak_quants$split))) { 299 | cmeans[[i]][,j] = colMeans(as.matrix(binmats[[i]][rownames(peak_quants[peak_quants$split == unique(peak_quants$split)[j],]),]), na.rm = T) 300 | } 301 | } 302 | 303 | return (cmeans) 304 | } 305 | 306 | Calc_max_line_plot = function(cmeans, plot_scale_samples_separate) { 307 | max_ann = c(rep(0, length(cmeans))) 308 | 309 | for(i in 1:length(cmeans)) { 310 | max_ann[i] = max(cmeans[[i]], na.rm = T) 311 | } 312 | 313 | if(plot_scale_samples_separate == 0) { 314 | max_ann = c(rep(max(max_ann), length(cmeans))) 315 | } 316 | 317 | return (max_ann) 318 | } 319 | 320 | Calc_min_line_plot = function(cmeans, plot_scale_samples_separate) { 321 | min_ann = c(rep(0, length(cmeans))) 322 | 323 | for(i in 1:length(cmeans)) { 324 | min_ann[i] = min(0, min(cmeans[[i]], na.rm = T)) 325 | } 326 | 327 | if(plot_scale_samples_separate == 0) { 328 | min_ann = c(rep(min(min_ann), length(cmeans))) 329 | } 330 | 331 | return (min_ann) 332 | } 333 | 334 | Calc_heatmap_max_values = function(binmats, plot_scale_samples_separate) { 335 | heatmap_max = c(rep(0, length(binmats))) 336 | 337 | for(i in 1:length(binmats)) { 338 | heatmap_max[i] = quantile(binmats[[i]],.95, na.rm=T)[[1]] 339 | } 340 | 341 | if(plot_scale_samples_separate == 0) { 342 | heatmap_max = c(rep(max(heatmap_max), length(binmats))) 343 | } 344 | 345 | return (heatmap_max) 346 | } 347 | 348 | Calc_heatmap_min_values = function(binmats, plot_scale_samples_separate) { 349 | heatmap_min = c(rep(0, length(binmats))) 350 | 351 | for(i in 1:length(binmats)) { 352 | heatmap_min[i] = quantile(binmats[[i]],.05, na.rm=T)[[1]] 353 | } 354 | 355 | if(plot_scale_samples_separate == 0) { 356 | heatmap_min = c(rep(max(heatmap_min), length(binmats))) 357 | } 358 | 359 | return (heatmap_min) 360 | } 361 | 362 | 363 | PrepareDataForPlotting = function(peakds) { 364 | peakds$binmats = Scale_binned_matrices(raw_binmats = peakds$raw_binmats, z_score_data = peakds$z_score_data) 365 | peakds$peak_quants = Arrange_peaks_for_plotting(peak_quants = peakds$peak_quants, 366 | bwfile = peakds$bwfile, 367 | k_means_clusters = peakds$k_means_clusters, 368 | z_score_data_clustering_arranging = peakds$z_score_data_clustering_arranging, 369 | samples_used_clustering_arranging = peakds$samples_used_clustering_arranging) 370 | 371 | 372 | peakds$heatmap_lineplot_means = Calc_mean_values_for_plots(peakds$bwfile, peakds$binmats, peakds$peak_quants) 373 | peakds$max_ann = Calc_max_line_plot(peakds$heatmap_lineplot_means, peakds$plot_scale_samples_separate) 374 | peakds$min_ann = Calc_min_line_plot(peakds$heatmap_lineplot_means, peakds$plot_scale_samples_separate) 375 | peakds$heatmap_max = Calc_heatmap_max_values(binmats = peakds$binmats, peakds$plot_scale_samples_separate) 376 | peakds$heatmap_min = Calc_heatmap_min_values(binmats = peakds$binmats, peakds$plot_scale_samples_separate) 377 | 378 | return (peakds) 379 | } 380 | 381 | PlotHeatmaps = function(peakds) { 382 | peakds$plotlist = list() 383 | for(i in 1:length(peakds$bwfile)) { 384 | ptable = peakds$binmats[[i]] 385 | ha = HeatmapAnnotation(mean = anno_lines(peakds$heatmap_lineplot_means[[i]], 386 | ylim = c(peakds$min_ann[i],peakds$max_ann[i]), 387 | height = unit(2, "cm"), 388 | gp = gpar(col = 1:length(unique(peakds$peak_quants$split)))), 389 | show_annotation_name = c(mean = FALSE)) 390 | 391 | col_fun = colorRamp2(c(peakds$heatmap_min[i], 392 | peakds$heatmap_min[i] + peakds$heatmap_max[i]*0.15, 393 | peakds$heatmap_min[i] + peakds$heatmap_max[i]*0.3, 394 | peakds$heatmap_min[i] + peakds$heatmap_max[i]*0.6, 395 | peakds$heatmap_min[i] + peakds$heatmap_max[i], 396 | peakds$heatmap_min[i] + peakds$heatmap_max[i]*1.15), brewer.pal(n = 11, name = "RdYlBu")[c(1,4,6,8,10,11)]) 397 | 398 | peakds$plotlist[[i]] = Heatmap(ptable[rownames(peakds$peak_quants),], 399 | name = peakds$bwnames[i], 400 | column_title = peakds$bwnames[i], 401 | show_row_names = F, 402 | show_column_names = F, 403 | cluster_rows = F, 404 | cluster_columns = F, 405 | use_raster = T, 406 | width = unit(3, "cm"), 407 | height = unit(14, "cm"), 408 | top_annotation = ha, 409 | col = col_fun, 410 | row_split = peakds$peak_quants$split, 411 | row_title_gp = gpar(col = 1:length(unique(peakds$peak_quants$split)), font = 2)) 412 | 413 | 414 | if(i == 1) { 415 | oplot = peakds$plotlist[[i]] 416 | } else { 417 | oplot = oplot + peakds$plotlist[[i]] 418 | } 419 | } 420 | peakds$combined_plot = oplot 421 | return (peakds) 422 | } -------------------------------------------------------------------------------- /R/Replication_timing_segmenter.R: -------------------------------------------------------------------------------- 1 | args = commandArgs(trailingOnly=TRUE) 2 | 3 | PrintErrorMessage = function() { 4 | cat("\nUsage: Rscrip Replication_timing_segmenter.R \n") 5 | cat("\nTakes as input a replication timing log2 ratio file in bigwig format, and outputs a BED file with the timing segments\n") 6 | cat("\ttiming segments are created by splitting the log2 ratios into 4 quartiles:\n") 7 | cat("\t\t1) upper (early replication)\n") 8 | cat("\t\t2) mid-upper (mid-early replication)\n") 9 | cat("\t\t3) mid-lower (mid-late replication)\n") 10 | cat("\t\t4) lower (late replication)\n") 11 | cat("\nThis is a simple adaptation of a messy script originally created that uses the following packages: rtracklayer\n\tPlease cite rtracklayer when using this script\n\n") 12 | } 13 | 14 | if(length(args) < 1){ 15 | PrintErrorMessage() 16 | stop("Not enough arguments. Replication timing bigwig file has to be specified (created with BAMscale).") 17 | } 18 | 19 | infile = args[1] 20 | 21 | if(!file.exists(infile)) { 22 | PrintErrorMessage() 23 | stop(paste0("Input file specified does not exist: \"", infile, "\"")) 24 | } 25 | 26 | if (!require(rtracklayer)) { 27 | PrintErrorMessage() 28 | stop("Please install package names \"rtracklayer\"") 29 | } 30 | 31 | library(rtracklayer) 32 | 33 | outfile = gsub(pattern ="\\.bw|\\.bigWig|\\.bigwig", replacement = "", x = infile) 34 | outfile = paste0(outfile, ".replication_timings.bed") 35 | 36 | options(warn=-1) 37 | GetMergedElements = function(fbw, typeseg) { 38 | fbw = fbw[fbw$segments == typeseg,] 39 | fbw = reduce(fbw) 40 | fbw$length = width(fbw) 41 | fbw = fbw[fbw$length > min_segment_length] 42 | fbw$segments = typeseg 43 | return(fbw) 44 | } 45 | 46 | include_zeroes_segment_thresholds = 0 47 | min_segment_length = 5000 48 | 49 | bw = import(infile) 50 | 51 | if(include_zeroes_segment_thresholds == 0) { 52 | medval = quantile(bw$score[bw$score != 0], .5, na.rm = T)[[1]] 53 | upper = quantile(bw$score[bw$score != 0], .75, na.rm = T)[[1]] 54 | lower = quantile(bw$score[bw$score != 0], .25, na.rm = T)[[1]] 55 | } else { 56 | medval = quantile(bw$score, .5, na.rm = T)[[1]] 57 | upper = quantile(bw$score, .75, na.rm = T)[[1]] 58 | lower = quantile(bw$score, .25, na.rm = T)[[1]] 59 | } 60 | 61 | 62 | bw$segments = 0 63 | bw$segments = ifelse(bw$score < lower, -2, bw$segments) 64 | bw$segments = ifelse(bw$score < medval & bw$segments == 0, -1, bw$segments) 65 | bw$segments = ifelse(bw$score > medval & bw$segments == 0, 1, bw$segments) 66 | bw$segments = ifelse(bw$score > upper, 2, bw$segments) 67 | bw = resize(bw, width(bw) + 1, fix="start") 68 | 69 | segments = GetMergedElements(bw, 2) 70 | segments = c(segments, GetMergedElements(bw, 1)) 71 | segments = c(segments, GetMergedElements(bw, -1)) 72 | segments = c(segments, GetMergedElements(bw, -2)) 73 | segments = sort(segments) 74 | segments = resize(segments, width(segments) - 1, fix="end") 75 | 76 | seg_gaps = setdiff(as(seqinfo(segments), "GRanges"), segments) 77 | seg_gaps$length = width(seg_gaps) 78 | seg_gaps$segments = 0 79 | segments = c(segments, seg_gaps) 80 | segments = sort(segments) 81 | 82 | if(segments$segments[1] == 0) { 83 | segments$segments[1] = segments$segments[2] 84 | } 85 | 86 | if(segments$segments[length(segments)] == 0) { 87 | segments$segments[length(segments)] = segments$segments[length(segments)-1] 88 | } 89 | 90 | segments$final_seg = sapply(seq(1, length(segments), 1), function(i) { 91 | if(i > 1 & i < length(segments)) { 92 | if(segments$segments[i] == 0) { 93 | if(segments$length[i-1] > segments$length[i+1]) { 94 | segments$segments[i-1] 95 | } else { 96 | segments$segments[i+1] 97 | } 98 | } else { 99 | segments$segments[i] 100 | } 101 | } else { 102 | segments$segments[i] 103 | } 104 | }) 105 | 106 | segments = resize(segments, width(segments) + 1, fix="start") 107 | segments$segments = segments$final_seg 108 | 109 | segments_final = GetMergedElements(segments, 2) 110 | segments_final = c(segments_final, GetMergedElements(segments, 1)) 111 | segments_final = c(segments_final, GetMergedElements(segments, -1)) 112 | segments_final = c(segments_final, GetMergedElements(segments, -2)) 113 | segments_final = sort(segments_final) 114 | segments_final = resize(segments_final, width(segments_final) - 1, fix="end") 115 | 116 | segments_final = as.data.frame(segments_final) 117 | segments_final$width = NULL 118 | segments_final$strand = NULL 119 | segments_final$length = NULL 120 | 121 | segments_final$color = "126,0,21" 122 | segments_final$color = ifelse(segments_final$segments == -1, "153,89,31", segments_final$color) 123 | segments_final$color = ifelse(segments_final$segments == 1, "154,161,14", segments_final$color) 124 | segments_final$color = ifelse(segments_final$segments == 2, "20,155,3", segments_final$color) 125 | 126 | segments_final$time = "late" 127 | segments_final$time = ifelse(segments_final$segments == -1, "mid-late", segments_final$time) 128 | segments_final$time = ifelse(segments_final$segments == 1, "mid-early", segments_final$time) 129 | segments_final$time = ifelse(segments_final$segments == 2, "early", segments_final$time) 130 | 131 | segments_final$tval = 0 132 | segments_final$nval = "." 133 | 134 | segments_final = segments_final[,c("seqnames", "start", "end", "time", "tval", "nval", "start", "end", "color")] 135 | write.table(segments_final, outfile, sep = "\t", quote = F, col.names = F, row.names = F) 136 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | BAMscale 2 | === 3 | 4 | **Overview of BAMscale applications** 5 | 6 | 7 |

8 | 9 |

10 | 11 | 12 | BAMscale is a one-step tool to 13 | 14 | 1) quantify/normalize peak coverages from multiple BAM files 15 | 2) Create scaled BigWig files for easy visualization 16 | 17 | In the [wiki](https://github.com/ncbi/BAMscale/wiki) pages we have more detailed tutorials for creating bigWig files and quantifying peaks 18 | 19 | ## Update 20 | 20210510: Added support for BAM indexes "file.bam.bai" and "file.bai". Modified the bigwig writing to decrease file size: blocks of 25 bins are written, if non-empty. ***Modified the default bin size to 20bp*** 21 | 22 | 20200918: We are working on a heatmap plotting script in R to help visualization. The script (under development) is available in "R/Plot_heatmap" folder. Please use Rstudio or something similar, as you have to set the paths in the script. Meanwhile, we will work on developing a simple GUI for this. 23 | 24 | 20200423: The full manuscript has been published in [Epigenetics & Chromatin](https://epigeneticsandchromatin.biomedcentral.com/articles/10.1186/s13072-020-00343-x) 25 | 26 | 27 | 20200326: We added the visualization script app written in R. The scripts are available in the "R" sub-folder, with a detailed [manual](https://github.com/ncbi/BAMscale/wiki/Pairwise-comparison-of-peaks-%5Binteractive-plotting-using-R%5D) available in the wiki->visualization section 28 | 29 | 20190821: We recently added support for [RNA-seq](https://github.com/ncbi/BAMscale/wiki/Detailed-usage:-RNA-seq-coverage-tracks) data as well to create coverage tracks. The new method enables accurate representations of exon-intron boundaries (splicing). 30 | 31 | ## Manuals 32 | 33 | In the [wiki](https://github.com/ncbi/BAMscale/wiki) page we have more detailed tutorials for creating bigWig files and quantifying peaks: 34 | 35 | 1. [OK-seq and RFD Track Generation](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-OKseq-RFD-(Replication-Fork-Directionality)-Track-Generation) 36 | 2. [Quantifying Peaks](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-Quantifying-Peak-Coverages-from-Multiple-BAM-Files#comparing-atac-seq-changes-induced-from-treatment) 37 | 3. [Generating Scaled Coverage Tracks](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-Generating-Scaled-Coverage-Tracks#preparing-input-data-for-bamscale) 38 | 4. [END-seq data](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-Processing-END-seq-Data) 39 | 5. [Log2 Coverage Tracks for Replication Timing Data](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-Replication-Timing-log2-Coverage-Ratio-from-Two-BAM-Files) 40 | 6. [Smoothening Function for Coverage Tracks](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-Smooth-Coverage-Tracks) 41 | 42 | 43 | We also added a few R scripts that might be helpful for basic visualizations: 44 | 1. [Creating density plots of quantified peaks](https://github.com/ncbi/BAMscale/wiki/Pairwise-comparison-of-peaks-%5Binteractive-plotting-using-R%5D) 45 | 46 | 2. [Segmenting replication timing bigwigs](https://github.com/ncbi/BAMscale/wiki/Replication-timing-BED-segments-from-bigwig) 47 | 3. [Identifying OK-seq strand switches](https://github.com/ncbi/BAMscale/wiki/Finding-OK-seq-strand-switched-from-the-RFD-track) 48 | 49 | For additional information, visit the [wiki](https://github.com/ncbi/BAMscale/wiki) page. 50 | 51 | For any other requests, or if you need help either open an issue, or feel free to email me: *pongorlorinc@gmail.com* 52 | 53 | 54 | ## Usage for the impatient 55 | 56 | These examples assume you have 4 processing threads, so we set '-t 4' for multithreading. 57 | 58 | #### Peak quantification 59 | 60 | BAMscale cov -t 4 --bed --bam --bam --bam ... --bam 61 | 62 | #### Generating scaled coverage tracks 63 | 64 | ***Creating scaled coverage tracks*** 65 | 66 | BAMscale scale -t 4 --bam [--bam .. --bam ] 67 | 68 | ***Creating stranded RNA-seq coverage tracks*** 69 | 70 | BAMscale scale --operation strandrna --bam 71 | 72 | ***Creating unstranded coverage from RNA-seq*** 73 | 74 | BAMscale scale --operation rna --bam 75 | 76 | ***Getting RFD score from OKseq data*** 77 | 78 | BAMscale scale -t 4 --operation rfd --binsize 1000 --bam 79 | 80 | ***Processing replication timing and Repli-seq data*** 81 | 82 | BAMscale scale -t 4 --operation reptime --bam --bam 83 | 84 | ***Creating stranded END-seq coverages*** 85 | 86 | BAMscale scale -t 4 --operation endseq --bam 87 | 88 | 89 | ## Reference 90 | 91 | BAMscale can be found at **bioRχiv** ([https://doi.org/10.1101/669275](https://www.biorxiv.org/content/10.1101/669275v1)) 92 | 93 | ## Bioconda instalation 94 | 95 | [BAMscale](https://bioconda.github.io/recipes/bamscale/README.html) is available through [Bioconda](https://bioconda.github.io/). Read the Bioconda [Getting Started](https://bioconda.github.io/user/install.html#install-conda) page for a detailed description on how to get Bioconda installed. 96 | 97 | Once Bioconda is available you can install BAMscale using this command. 98 | 99 | conda install bamscale 100 | 101 | ## Docker 102 | 103 | BAMscale docker image is available in [quay.io/biocontainers/bamscale](https://quay.io/repository/biocontainers/bamscale). 104 | 105 | ### Pulling the image 106 | 107 | docker pull quay.io/biocontainers/bamscale:0.0.5--ha85820d_0 108 | 109 | ### Using the Docker image 110 | 111 | #### Peak quantification with Docker 112 | 113 | docker run -v `pwd`:/data bamscale BAMscale cov --bed --bam --bam --bam ... --bam 114 | 115 | #### Generating scaled coverage tracks with Docker 116 | 117 | docker run -v `pwd`:/data bamscale BAMscale scale --bam [--bam .. --bam ] 118 | 119 | ### Creating a custom docker image 120 | 121 | docker build -t bamscale https://raw.githubusercontent.com/pongorlorinc/BAMscale/master/Dockerfile 122 | 123 | ## Local compilation 124 | 125 | ### Requirements 126 | 127 | We have a detailed installation for [Linux](https://github.com/ncbi/BAMscale/wiki/Installation#detailed-installation-for-linux-based-os) and [MAC](https://github.com/ncbi/BAMscale/wiki/Installation#detailed-installation-for-mac-os-with-homebrew) (with homebrew) based systems or through [conda](https://github.com/ncbi/BAMscale/wiki/Installation#detailed-installation-for-mac-os-with-conda). There is also a precompiled version for linux ready for usage available at the [releases](https://github.com/ncbi/BAMscale/releases). 128 | 129 | #### samtools 130 | http://www.htslib.org/ 131 | 132 | #### libBigWig 133 | Clone the libBigWig repository from GitHub: https://github.com/dpryan79/libBigWig 134 | 135 | git clone https://github.com/dpryan79/libBigWig.git 136 | 137 | Compile it and set the environment variables for BAMscale 138 | 139 | cd libBigWig/ 140 | make 141 | export LIBBIGWIG_DIR=`pwd` 142 | export CPPFLAGS="-I $LIBBIGWIG_DIR" 143 | export LDFLAGS="-L $LIBBIGWIG_DIR -Wl,-rpath,$LIBBIGWIG_DIR" 144 | 145 | Optionally (and if you have permission), the libbigwig can also be installed 146 | 147 | make install 148 | 149 | In this case, the flags don't have to be set in the terminal. 150 | 151 | ### Installation 152 | 153 | After compiling the libBigWig library and samtools (if not already installed) clone the BAMscale from GitHub 154 | 155 | git clone https://github.com/ncbi/BAMscale.git 156 | 157 | and go to the BAMscale folder to compile the program: 158 | 159 | cd BAMscale/ 160 | make 161 | 162 | A bin folder will be created with the BAMscale executable. 163 | 164 | # Public Domain notice 165 | 166 | National Center for Biotechnology Information. 167 | 168 | This software is a "United States Government Work" under the terms of the United States 169 | Copyright Act. It was written as part of the authors' official duties as United States 170 | Government employees and thus cannot be copyrighted. This software is freely available 171 | to the public for use. The National Library of Medicine and the U.S. Government have not 172 | placed any restriction on its use or reproduction. 173 | 174 | Although all reasonable efforts have been taken to ensure the accuracy and reliability 175 | of the software and data, the NLM and the U.S. Government do not and cannot warrant the 176 | performance or results that may be obtained by using this software or data. The NLM and 177 | the U.S. Government disclaim all warranties, express or implied, including warranties 178 | of performance, merchantability or fitness for any particular purpose. 179 | 180 | Please cite NCBI in any work or product based on this material. 181 | -------------------------------------------------------------------------------- /bamscale-cov.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.0 3 | class: CommandLineTool 4 | 5 | label: BAMscale-cov 6 | doc: Calculate coverage of BED coordinates in BAM file(s) 7 | 8 | requirements: 9 | InlineJavascriptRequirement: {} 10 | 11 | hints: 12 | - $import: bamscale.yml 13 | 14 | inputs: 15 | l: 16 | type: string? 17 | inputBinding: 18 | position: 1 19 | prefix: -l 20 | doc: | 21 | Sequencing type to be used. Can be: single, paired, and auto (default: autodetect) 22 | f: 23 | type: string? 24 | inputBinding: 25 | position: 1 26 | prefix: -f 27 | doc: | 28 | Compute coverage using fragments instead of reads (default: no) 29 | s: 30 | type: string? 31 | inputBinding: 32 | position: 1 33 | prefix: -s 34 | doc: | 35 | Reads need to have same orientation of peaks (default: unstranded) 36 | r: 37 | type: string? 38 | inputBinding: 39 | position: 1 40 | prefix: -r 41 | doc: | 42 | Reads need to have reverse orientation of peaks (default: unstranded) 43 | e: 44 | type: int? 45 | inputBinding: 46 | position: 2 47 | prefix: -e 48 | doc: | 49 | Compute sequencing coverage from BAM file quickly using the index (option '0'), 50 | or count number of reads by parsing entire BAM file (slower, but more accurate; set to '1' [default]) 51 | c: 52 | type: File? 53 | inputBinding: 54 | position: 2 55 | prefix: -c 56 | doc: | 57 | Input file with list of chromosomes to blacklist when computing coverage for normalization 58 | u: 59 | type: int? 60 | inputBinding: 61 | position: 2 62 | prefix: -u 63 | doc: | 64 | BED file with regions to subtract when computing coverage for normalization 65 | These coordinates should not overlap so reads are not counted multiple times 66 | q: 67 | type: int? 68 | inputBinding: 69 | position: 3 70 | prefix: -q 71 | doc: | 72 | Minimum (at least) mapping quality (default: 0) 73 | d: 74 | type: string? 75 | inputBinding: 76 | position: 3 77 | prefix: -d 78 | doc: | 79 | Keep duplicated reads (default: no) 80 | p: 81 | type: string? 82 | inputBinding: 83 | position: 3 84 | prefix: -p 85 | doc: | 86 | Do not filter un-proper alignments (default: filter) 87 | m: 88 | type: string? 89 | inputBinding: 90 | position: 3 91 | prefix: -m 92 | doc: | 93 | Do not remove reads with unmapped pairs 94 | g: 95 | type: int? 96 | inputBinding: 97 | position: 3 98 | prefix: -g 99 | doc: | 100 | Minimum fragment size for read pairs (default: 0) 101 | x: 102 | type: int? 103 | inputBinding: 104 | position: 3 105 | prefix: -x 106 | doc: | 107 | Maximum fragment size for read pairs (default: 2000) 108 | w: 109 | type: int? 110 | inputBinding: 111 | position: 3 112 | prefix: -w 113 | doc: | 114 | Filter reads based on fragment size (default: no) 115 | t: 116 | type: int? 117 | inputBinding: 118 | position: 4 119 | prefix: -t 120 | doc: | 121 | No. of threads to use (default: 1) 122 | n: 123 | type: string 124 | inputBinding: 125 | position: 4 126 | prefix: -n 127 | doc: | 128 | Output prefix for file names (default: none) 129 | bed: 130 | type: File 131 | inputBinding: 132 | position: 5 133 | prefix: --bed 134 | doc: | 135 | Input BED file 136 | bam: 137 | type: 138 | type: array 139 | items: File 140 | inputBinding: 141 | prefix: --bam 142 | separate: true 143 | secondaryFiles: .bai 144 | inputBinding: 145 | position: 6 146 | doc: | 147 | Input BAM file. This can be specified multiple times in case of multiple BAM files 148 | 149 | outputs: 150 | output: 151 | type: File[] 152 | outputBinding: 153 | glob: $(inputs.n)* 154 | 155 | baseCommand: ["BAMscale", "cov"] 156 | 157 | s:author: 158 | - class: s:Person 159 | s:identifier: https://orcid.org/0000-0002-4108-5982 160 | s:email: mailto:r78v10a07@gmail.com 161 | s:name: Roberto Vera Alvarez 162 | 163 | s:codeRepository: https://github.com/ncbi/BAMscale 164 | s:license: https://spdx.org/licenses/OPL-1.0 165 | 166 | $namespaces: 167 | s: http://schema.org/ 168 | 169 | $schemas: 170 | - https://schema.org/version/latest/schema.rdf 171 | 172 | -------------------------------------------------------------------------------- /bamscale-scale.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | cwlVersion: v1.0 3 | class: CommandLineTool 4 | 5 | label: BAMscale-scale 6 | doc: Scale one or multiple BAM files 7 | 8 | requirements: 9 | InlineJavascriptRequirement: {} 10 | 11 | hints: 12 | - $import: bamscale.yml 13 | 14 | inputs: 15 | l: 16 | type: string? 17 | inputBinding: 18 | position: 1 19 | prefix: -l 20 | doc: | 21 | Sequencing type to be used. Can be: single, paired, and auto (default: autodetect) 22 | f: 23 | type: string? 24 | inputBinding: 25 | position: 1 26 | prefix: -f 27 | doc: | 28 | Compute coverage using fragments instead of reads (default: no) 29 | a: 30 | type: int? 31 | inputBinding: 32 | position: 1 33 | prefix: -a 34 | doc: | 35 | Fragment size to be used to extend single-end library reads 36 | y: 37 | type: string? 38 | inputBinding: 39 | position: 2 40 | prefix: -y 41 | doc: | 42 | Type of normalization. (default: base) 43 | If no normalization is needed, set '--scale no' argument, the program will disregard this option. 44 | Options: 45 | 1) reads: No. of mapped reads/fragments 46 | 2) base: Sum of per-base coverage of reads/fragments 47 | k: 48 | type: string? 49 | inputBinding: 50 | position: 2 51 | prefix: -k 52 | doc: | 53 | Method to scale samples together. (default: genome) 54 | Options are: 55 | 1) no: no scaling, just calculate coverage 56 | 2) smallest: scale reads to smallest library (multiple-samples only) 57 | 3) genome: scale samples to 1x genome coverage (only possible with 'base' normalization type) 58 | r: 59 | type: string? 60 | inputBinding: 61 | position: 1 62 | prefix: -r 63 | doc: | 64 | Operation to perform when scaling samples. Default: scaled 65 | Options are: 66 | 1) scaled: output scaled tracks. 67 | 2) unscaled: do not scale files in any way. 68 | 2) log2: log2 transform against first BAM file. 69 | 3) ratio: coverage ratio against first BAM file. 70 | 4) subtract: subtract coverage against first BAM file. 71 | 5) rfd: OK-seq RFD calculation 72 | z: 73 | type: int? 74 | inputBinding: 75 | position: 2 76 | prefix: -z 77 | doc: | 78 | Size of bins for output bigWig/bedgraph generation (default: 5) 79 | e: 80 | type: int? 81 | inputBinding: 82 | position: 3 83 | prefix: -e 84 | doc: | 85 | Compute sequencing coverage from BAM file. (default: '1', count reads while parsing BAM) 86 | Options are: 87 | 1) 0: use reads in index (only if normalization is set to 'reads') 88 | 2) 1: count reads while parsing BAM(s) 89 | WARNING: this option is only useful when 'reads' are used for normalization 90 | c: 91 | type: File? 92 | inputBinding: 93 | position: 3 94 | prefix: -c 95 | doc: | 96 | Input file with list of chromosomes to blacklist when computing coverage for normalization 97 | u: 98 | type: int? 99 | inputBinding: 100 | position: 3 101 | prefix: -u 102 | doc: | 103 | BED file with regions to subtract when computing coverage for normalization 104 | These coordinates should not overlap so reads are not counted multiple times 105 | j: 106 | type: int? 107 | inputBinding: 108 | position: 3 109 | prefix: -j 110 | doc: | 111 | Smoothen signal by calculating mean of N bins flanking both sides of each bin (default: 0) 112 | If set to '0', the signal is not smoothened. To turn on specify a value greater than '0'. 113 | For replication timing, a good value is to smoothen to 100k bases. If binSize is 100bp, this would be '1000' 114 | b: 115 | type: int? 116 | inputBinding: 117 | position: 3 118 | prefix: -b 119 | doc: | 120 | Which tracks should be smoothened when performing smoothening (default: '1' meaning only binned track). 121 | Options are: 122 | 1) 0: Smoothen scaled and transformed tracks (log2, ratio or subtracted) 123 | 2) 1: Smoothen only the scaled sequencing track 124 | 3) 2: Smoothen only the transformed (log2, ratio or subtract) track 125 | q: 126 | type: int? 127 | inputBinding: 128 | position: 4 129 | prefix: -q 130 | doc: | 131 | Minimum (at least) mapping quality (default: 0) 132 | d: 133 | type: string? 134 | inputBinding: 135 | position: 4 136 | prefix: -d 137 | doc: | 138 | Keep duplicated reads (default: no) 139 | p: 140 | type: string? 141 | inputBinding: 142 | position: 4 143 | prefix: -p 144 | doc: | 145 | Do not filter un-proper alignments (default: filter) 146 | m: 147 | type: string? 148 | inputBinding: 149 | position: 4 150 | prefix: -m 151 | doc: | 152 | Do not remove reads with unmapped pairs 153 | g: 154 | type: int? 155 | inputBinding: 156 | position: 4 157 | prefix: -g 158 | doc: | 159 | Minimum fragment size for read pairs (default: 0) 160 | x: 161 | type: int? 162 | inputBinding: 163 | position: 4 164 | prefix: -x 165 | doc: | 166 | Maximum fragment size for read pairs (default: 2000) 167 | w: 168 | type: int? 169 | inputBinding: 170 | position: 4 171 | prefix: -w 172 | doc: | 173 | Filter reads based on fragment size (default: no) 174 | t: 175 | type: int? 176 | inputBinding: 177 | position: 5 178 | prefix: -t 179 | doc: | 180 | No. of threads to use (default: 1) 181 | bam: 182 | type: 183 | type: array 184 | items: File 185 | inputBinding: 186 | prefix: --bam 187 | separate: true 188 | secondaryFiles: .bai 189 | inputBinding: 190 | position: 6 191 | doc: | 192 | Input BAM file. This can be specified multiple times in case of multiple BAM files 193 | 194 | outputs: 195 | output: 196 | type: File[] 197 | outputBinding: 198 | glob: "*.bw" 199 | 200 | baseCommand: ["BAMscale", "scale"] 201 | 202 | s:author: 203 | - class: s:Person 204 | s:identifier: https://orcid.org/0000-0002-4108-5982 205 | s:email: mailto:r78v10a07@gmail.com 206 | s:name: Roberto Vera Alvarez 207 | 208 | s:codeRepository: https://github.com/ncbi/BAMscale 209 | s:license: https://spdx.org/licenses/OPL-1.0 210 | 211 | $namespaces: 212 | s: http://schema.org/ 213 | 214 | $schemas: 215 | - https://schema.org/version/latest/schema.rdf 216 | 217 | -------------------------------------------------------------------------------- /bamscale.yml: -------------------------------------------------------------------------------- 1 | class: DockerRequirement 2 | dockerImageId: bamscale 3 | dockerFile: 4 | $include: https://raw.githubusercontent.com/ncbi/BAMscale/master/Dockerfile 5 | -------------------------------------------------------------------------------- /doc/ATAC-seq_peak_quant_benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_quant_benchmark.png -------------------------------------------------------------------------------- /doc/ATAC-seq_peak_validation_CB450h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CB450h.png -------------------------------------------------------------------------------- /doc/ATAC-seq_peak_validation_CB452h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CB452h.png -------------------------------------------------------------------------------- /doc/ATAC-seq_peak_validation_CB454h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CB454h.png -------------------------------------------------------------------------------- /doc/ATAC-seq_peak_validation_CEM0h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CEM0h.png -------------------------------------------------------------------------------- /doc/ATAC-seq_peak_validation_CEM2h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CEM2h.png -------------------------------------------------------------------------------- /doc/ATAC-seq_peak_validation_CEM4h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CEM4h.png -------------------------------------------------------------------------------- /doc/images/ATAC-seq_coverage_benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/ATAC-seq_coverage_benchmark.png -------------------------------------------------------------------------------- /doc/images/ATAC-seq_coverage_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/ATAC-seq_coverage_comparison.png -------------------------------------------------------------------------------- /doc/images/ATAC-seq_peak_quant_benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/ATAC-seq_peak_quant_benchmark.png -------------------------------------------------------------------------------- /doc/images/ATAC-seq_peak_validation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/ATAC-seq_peak_validation.png -------------------------------------------------------------------------------- /doc/images/BMAscale_RNAseq_coverage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/BMAscale_RNAseq_coverage.png -------------------------------------------------------------------------------- /doc/images/Detailed_usage_ATAC_exampleTOP1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_ATAC_exampleTOP1.png -------------------------------------------------------------------------------- /doc/images/Detailed_usage_ENDseq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_ENDseq.png -------------------------------------------------------------------------------- /doc/images/Detailed_usage_OKseq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_OKseq.png -------------------------------------------------------------------------------- /doc/images/Detailed_usage_RepTime_closer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_RepTime_closer.png -------------------------------------------------------------------------------- /doc/images/Detailed_usage_RepTime_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_RepTime_full.png -------------------------------------------------------------------------------- /doc/images/Detailed_usage_peakQuant_CB45_2h_vs_0h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_peakQuant_CB45_2h_vs_0h.png -------------------------------------------------------------------------------- /doc/images/Detailed_usage_peakQuant_CB45_4h_vs_0h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_peakQuant_CB45_4h_vs_0h.png -------------------------------------------------------------------------------- /doc/images/Detailed_usage_peakQuant_CEM_2h_vs_0h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_peakQuant_CEM_2h_vs_0h.png -------------------------------------------------------------------------------- /doc/images/Detailed_usage_peakQuant_CEM_4h_vs_0h.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_peakQuant_CEM_4h_vs_0h.png -------------------------------------------------------------------------------- /doc/images/ENDseq_stranded.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/ENDseq_stranded.png -------------------------------------------------------------------------------- /doc/images/FPKM_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/FPKM_formula.png -------------------------------------------------------------------------------- /doc/images/Libsize_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Libsize_formula.png -------------------------------------------------------------------------------- /doc/images/MAIN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/MAIN.png -------------------------------------------------------------------------------- /doc/images/MAIN_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/MAIN_figure.png -------------------------------------------------------------------------------- /doc/images/OKseq_segmenter_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/OKseq_segmenter_figure.png -------------------------------------------------------------------------------- /doc/images/OKseq_switch_example1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/OKseq_switch_example1.png -------------------------------------------------------------------------------- /doc/images/OKseq_switch_example2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/OKseq_switch_example2.png -------------------------------------------------------------------------------- /doc/images/Read_count_matrix_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Read_count_matrix_example.png -------------------------------------------------------------------------------- /doc/images/Replication_timing_script_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Replication_timing_script_example.png -------------------------------------------------------------------------------- /doc/images/TPM_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/TPM_formula.png -------------------------------------------------------------------------------- /doc/images/XY_Empty_page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_Empty_page.png -------------------------------------------------------------------------------- /doc/images/XY_drop_file.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_drop_file.png -------------------------------------------------------------------------------- /doc/images/XY_dropdown.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_dropdown.png -------------------------------------------------------------------------------- /doc/images/XY_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_example.png -------------------------------------------------------------------------------- /doc/images/XY_exec.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_exec.png -------------------------------------------------------------------------------- /doc/images/XY_explanation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_explanation.png -------------------------------------------------------------------------------- /doc/images/XY_rstudio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_rstudio.png -------------------------------------------------------------------------------- /includes/BAMcoverage.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: BAMcoverage.h 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 12:27 PM 12 | */ 13 | 14 | #ifndef BAMCOVERAGE_H 15 | #define BAMCOVERAGE_H 16 | 17 | #include "Definitions.h" 18 | #include "main.h" 19 | #include 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | int ReadStrand(bam1_t *read, int paired_end); 25 | int DetectLibraryType(BAMFILES *bhead); 26 | int Read_filter(bam1_t *read, CMDINPUT *cmd); 27 | CHROMOSOMES *AddIDXcoverage(char *name, int coverage, int id, CHROMOSOMES *head); 28 | void GetChromosomeCoveragesIDX(CHROMOSOMES *head, BAMFILES *bhead); 29 | void GetGenomeCoveragesIDX(CHROMOSOMES *head, BAMFILES *bhead); 30 | void CalculateCoverageOfReads(samFile *fp_in, hts_itr_t *iter, bam1_t *aln, int chrsize, char *chrname, CMDINPUT *cmd, BAMFILES *bamcurr); 31 | void *GetGenomeReadCoveragemultithread(void * voidA); 32 | void MultiGenomeReadCoverage(CMDINPUT *cmd, CHROMOSOMES *chr); 33 | void GetChromosomeCoveragesBAM(CHROMOSOMES *head, BAMFILES *bhead, CMDINPUT *cmd); 34 | char *BEDentryToCoord(char *input); 35 | char *BEDentryChr(char *input); 36 | void SubtractBlacklistedBEDS(char *filename, CHROMOSOMES *head, BAMFILES *bhead, int paired_end); 37 | int *CalculateCoverage(samFile *fp_in, hts_itr_t *iter, bam1_t *aln, int chrsize, char *chrname, CMDINPUT *cmd, BAMFILES *bamcurr); 38 | void GetGenomeCoverageRNA(CMDINPUT *cmd, CHROMOSOMES *head, char *outfile); 39 | void *GetGenomeCoveragemultithread(void * voidA); 40 | void MultiGenomeCoverage(CMDINPUT *cmd, CHROMOSOMES *chr); 41 | void *GetGenomeBaseCoveragemultithread(void * voidA); 42 | void MultiGenomeBaseCoverage(CMDINPUT *cmd, CHROMOSOMES *chr); 43 | void *ScaleBinsmultithread(void * voidA); 44 | void MultiGenomeScaler(CMDINPUT *cmd, CHROMOSOMES *chr); 45 | void *SmoothBinsmultithread(void * voidA); 46 | void MultiGenomeSmoother(CMDINPUT *cmd, CHROMOSOMES *chr); 47 | void *TransformBinsmultithread(void * voidA); 48 | void MultiGenomeTransform(CMDINPUT *cmd, CHROMOSOMES *chr); 49 | void CalculateCoverageOfChromosomeBins(CHROMOSOMES *head, BAMFILES *bhead, int paired_end, int bin_size, int pseudocount, CMDINPUT *cmd); 50 | void SmoothenAllChromosomeBins(CHROMOSOMES *head, BAMFILES *bhead, int smoothBinNum); 51 | #ifdef __cplusplus 52 | } 53 | #endif 54 | #endif /* BAMCOVERAGE_H */ 55 | -------------------------------------------------------------------------------- /includes/BAMstructs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: BAMstructs.h 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 11:58 AM 12 | */ 13 | 14 | #ifndef BAMSTRUCTS_H 15 | #define BAMSTRUCTS_H 16 | #include "Definitions.h" 17 | 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | int CheckIndexShortFile(char *fname); 22 | int CheckIndexFile(char *fname); 23 | void DestroyBAMstruct(BAMFILES *head); 24 | BAMFILES *AddBAMstruct(char *BAMname, BAMFILES *head); 25 | void PrintBAMstructs(BAMFILES *head); 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | #endif /* BAMSTRUCTS_H */ 30 | -------------------------------------------------------------------------------- /includes/BEDstruct.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: BEDstruct.h 9 | * Author: pongorls 10 | * 11 | * Created on December 10, 2018, 8:02 AM 12 | */ 13 | 14 | #ifndef BEDSTRUCT_H 15 | #define BEDSTRUCT_H 16 | 17 | #include "Definitions.h" 18 | #include "main.h" 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | char *BEDtoString(char *chr, int start, int end); 24 | int Read_filter_MultiCov(bam1_t *read, int paired_end); 25 | void DeleteBEDs(PEAK *head); 26 | PEAK *CreateBEDentry(void); 27 | void AllocateReadCovs(PEAK *head, int no_of_samples); 28 | void AllocateCovs(PEAK *head); 29 | PEAK *AddBEDentry(PEAK *curr, char *BEDentry, int tid); 30 | PEAK *ReadBED(char *BEDfilename, int nthreads); 31 | void GetBEDCoveragesBAM(BAMFILES *bhead, PEAK *beds, int paired_end); 32 | void *GetBEDFragmentCoveragesBAMmultithread(void * voidA); 33 | void *GetBEDCoveragesBAMmultithread(void * voidA); 34 | void MultiCoverage(BAMFILES *bhead, PEAK *head, CMDINPUT *cmd); 35 | void CalculateFPKM(BAMFILES *bhead, PEAK *head); 36 | void CalculateLibScaled(BAMFILES *bhead, PEAK *head); 37 | void CalculateTPM(BAMFILES *bhead, PEAK *head); 38 | void WriteMultiCovsRaw(BAMFILES *bhead, PEAK *head, int no_of_samples, char *outfile); 39 | void WriteMultiCovsNormalized(BAMFILES *bhead, PEAK *head, int no_of_samples, char *outfile); 40 | #ifdef __cplusplus 41 | } 42 | #endif 43 | 44 | #endif /* BEDSTRUCT_H */ 45 | -------------------------------------------------------------------------------- /includes/CHROMstruct.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: CHROMstruct.h 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 12:34 PM 12 | */ 13 | 14 | #ifndef CHROMSTRUCT_H 15 | #define CHROMSTRUCT_H 16 | 17 | #include "Definitions.h" 18 | #include "main.h" 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | uint32_t *GetChrLens(CHROMOSOMES *head, int no_of_chrs); 24 | int CountNumberOfChromosomes(CHROMOSOMES *head); 25 | char **GetChromosomeNames(CHROMOSOMES *head, int no_of_chrs); 26 | float CalculateGenomeSize(CHROMOSOMES *head); 27 | CHROMOSOMES *AddCHROMstruct(CHROMOSOMES *head, char *name, int length, int no_of_samples, int threadID); 28 | void DestroyCHROMstruct(CHROMOSOMES *head, int no_of_samples); 29 | CHROMOSOMES *ImportChromosomeDataFromBAM(char *bamfile, int no_of_samples, int threads); 30 | void PrintChromosomes(CHROMOSOMES *head, int no_of_samples); 31 | void PrintBlacklistedChromosomes(CHROMOSOMES *head, int no_of_samples); 32 | CHROMOSOMES *ComputeBins(CHROMOSOMES *head, int binSize); 33 | CHROMOSOMES *AllocateBins(CHROMOSOMES *head, int no_of_samples); 34 | CHROMOSOMES *BlacklistChromosome(CHROMOSOMES *head, char *name); 35 | void BlacklistChromosomeFiles(CHROMOSOMES *head, char *filename); 36 | void DestroyChromCovStruct(CHRCOV *head); 37 | CHRCOV *CreateChromCovStruct(char *name, int id, int nbins); 38 | void DestroyRatioStruct(RATIOS *ptr); 39 | RATIOS *CreateRatioStruct(char *s1, char *s2, int id1, int id2); 40 | #ifdef __cplusplus 41 | } 42 | #endif 43 | #endif /* CHROMSTRUCT_H */ 44 | -------------------------------------------------------------------------------- /includes/Definitions.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: Definitions.h 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 11:56 AM 12 | */ 13 | 14 | #ifndef DEFINITIONS_H 15 | #define DEFINITIONS_H 16 | 17 | #include 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | typedef struct segment { 23 | int start; 24 | int end; 25 | int value; 26 | 27 | struct segment *next; 28 | struct segment *prev; 29 | } SEGMENTS; 30 | 31 | 32 | typedef struct peak { 33 | char *coord; 34 | int id; 35 | char *chr; 36 | char *start_str; 37 | char *end_str; 38 | int strand; 39 | 40 | int start; 41 | int end; 42 | int length; 43 | 44 | int nbins; 45 | int binSize; 46 | int noSamples; 47 | 48 | int tid; 49 | 50 | int *read_cov; 51 | float *normalized; 52 | float **cov; 53 | struct peak *next; 54 | } PEAK; 55 | 56 | typedef struct chrcov { 57 | char *name; 58 | int id; 59 | int nbins; 60 | float *ratio; 61 | 62 | struct chrcov *next; 63 | } CHRCOV; 64 | 65 | typedef struct ratios { 66 | char *sample1; 67 | char *sample2; 68 | int s1; 69 | int s2; 70 | 71 | CHRCOV *chrcovs; 72 | struct ratios *next; 73 | } RATIOS; 74 | 75 | typedef struct BEDcoords { 76 | char *coord; 77 | char *chr; 78 | char *start_str; 79 | char *end_str; 80 | float *normcov; 81 | 82 | int start; 83 | int end; 84 | int summit; 85 | int strand; 86 | struct BEDcoords *next; 87 | } BEDCOORDS; 88 | 89 | typedef struct BAMfiles{ 90 | char *name; 91 | char *shortname; 92 | int id; 93 | int read_coverage; 94 | int filtered_reads; 95 | double base_coverage; 96 | float scale; 97 | float genome_scale; 98 | struct BAMfiles *next; 99 | } BAMFILES; 100 | 101 | typedef struct chromosomes { 102 | char *name; 103 | int id; 104 | int length; 105 | int accept; 106 | int blacklist; 107 | float **coverages; 108 | int *idxreads; 109 | int numberOfBins; 110 | int allocated; 111 | int nonzerobins; 112 | int tid; 113 | struct chromosomes *next; 114 | } CHROMOSOMES; 115 | 116 | typedef struct cmdinput { 117 | char *bedfile; 118 | int no_of_samples; 119 | BAMFILES *bamfiles; 120 | CHROMOSOMES *chr; 121 | int mapq; 122 | int removeduplicates; 123 | int nounproper; 124 | int remove_unmapped_pair; 125 | int fragment_count_mode; 126 | int fragment_size_filter; 127 | int fragment_size; 128 | int strand; 129 | int libtype; 130 | int threads; 131 | int min_insert_size; 132 | int max_insert_size; 133 | char *outdir; 134 | char *outprefix; 135 | char *blacklist_file; 136 | char *blacklist_bed; 137 | char **argv; 138 | int argc; 139 | int genome_coverage; 140 | int strandsplit; 141 | char *custom_scale; 142 | 143 | char *normtype; 144 | char *scale; 145 | char *operation; 146 | 147 | int filtDiffChr; 148 | int filtInsSize; 149 | 150 | int binSize; 151 | int binSizeChange; 152 | int smoothBin; 153 | int smoothBinChange; 154 | int tracksmooth; 155 | } CMDINPUT; 156 | 157 | typedef struct threads { 158 | int pid; 159 | char *chrname; 160 | char *sample; 161 | int sample_id; 162 | int paired_end; 163 | float scale; 164 | int binSize; 165 | int pseudocount; 166 | int strand; 167 | CHROMOSOMES *chr; 168 | PEAK *phead; 169 | CMDINPUT *cmd; 170 | BAMFILES *bamfile; 171 | struct threads *next; 172 | } THREADS; 173 | #ifdef __cplusplus 174 | } 175 | #endif 176 | 177 | #endif /* DEFINITIONS_H */ 178 | 179 | -------------------------------------------------------------------------------- /includes/Inputs.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: Inputs.h 9 | * Author: pongorls 10 | * 11 | * Created on December 11, 2018, 8:00 AM 12 | */ 13 | 14 | #ifndef INPUTS_H 15 | #define INPUTS_H 16 | 17 | #include "Definitions.h" 18 | #include "main.h" 19 | 20 | #define INPUTS_BASE "base" 21 | #define INPUTS_GENOME "genome" 22 | #define INPUTS_SCALED "scaled" 23 | #define INPUTS_CUSTOM "custom" 24 | #define INPUTS_SCALE "scale" 25 | #define INPUTS_UNSCALED "unscaled" 26 | #define INPUTS_LOG2 "log2" 27 | #define INPUTS_RFD "rfd" 28 | #define INPUTS_END "endseq" 29 | #define INPUTS_ENDR "endseqr" 30 | #define INPUTS_RNA "rna" 31 | #define INPUTS_STRRNA "strandrna" 32 | #define INPUTS_RSTRRNA "strandrnaR" 33 | #define INPUTS_REP "reptime" 34 | #define INPUTS_RATIO "ratio" 35 | #define INPUTS_SUBSTRACT "subtract" 36 | #define INPUTS_READS "reads" 37 | #define INPUTS_SINGLE "single" 38 | #define INPUTS_PAIRED "paired" 39 | #define INPUTS_AUTO "auto" 40 | #define INPUTS_NO "no" 41 | #define INPUTS_SMALLEST "smallest" 42 | #define INPUTS_COV "cov" 43 | 44 | #ifdef __cplusplus 45 | extern "C" { 46 | #endif 47 | CMDINPUT *CreateCMDinput(void); 48 | int ParseCustomScaling(CMDINPUT *cmd, char *scales); 49 | void PrintScaleMessage(char *pname); 50 | CMDINPUT *ScaleParser(int argc, char **argv); 51 | void PrintMultiCovMessage(char *pname); 52 | CMDINPUT *MultiCovParser(int argc, char **argv); 53 | void DestroyCMDinput(CMDINPUT *ptr); 54 | #ifdef __cplusplus 55 | } 56 | #endif 57 | 58 | #endif /* INPUTS_H */ 59 | -------------------------------------------------------------------------------- /includes/Writer.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: Writer.h 9 | * Author: pongorls 10 | * 11 | * Created on December 19, 2018, 2:27 PM 12 | */ 13 | 14 | #ifndef WRITER_H 15 | #define WRITER_H 16 | 17 | #include "Definitions.h" 18 | #include "main.h" 19 | 20 | #ifdef __cplusplus 21 | extern "C" { 22 | #endif 23 | char *returnRNAfilename(CMDINPUT *cmd); 24 | void PrintScaledBigWig(CMDINPUT *cmd, BAMFILES *curr, char *sfile); 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | 29 | #endif /* WRITER_H */ 30 | -------------------------------------------------------------------------------- /includes/binning.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: binning.h 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 5:09 PM 12 | */ 13 | 14 | #ifndef BINNING_H 15 | #define BINNING_H 16 | #include "Definitions.h" 17 | #include "main.h" 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | float *QuicksmoothenBins(float *carray, int smoothBins, int numBins); 23 | float *smoothenBins(float **carray, int smoothBins, int numBins); 24 | float *AddPseudoToZeroCov(float *coverage, int len); 25 | float *BinCoverage(int *coverage, int chr_len, int binSize, int nbins); 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | #endif /* BINNING_H */ 30 | -------------------------------------------------------------------------------- /includes/main.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: main.h 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 1:21 PM 12 | */ 13 | 14 | #ifndef MAIN_H 15 | #define MAIN_H 16 | #include "Definitions.h" 17 | 18 | #ifdef __cplusplus 19 | extern "C" { 20 | #endif 21 | void FreeAllocatedData(void); 22 | void ComputeCoverageChIPpeak(CMDINPUT *cmd); 23 | void NormalizeBAMSrna(CMDINPUT *cmd); 24 | void NormalizeBAMS(CMDINPUT *cmd); 25 | void PrintUsage(char *pname); 26 | int main(); 27 | 28 | 29 | 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | 34 | #endif /* MAIN_H */ 35 | 36 | -------------------------------------------------------------------------------- /includes/multithreads.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: multithreads.h 9 | * Author: pongorls 10 | * 11 | * Created on November 30, 2018, 12:04 PM 12 | */ 13 | 14 | #ifndef MULTITHREADS_H 15 | #define MULTITHREADS_H 16 | #include "Definitions.h" 17 | #include "main.h" 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | void DestroyThreadStruct(THREADS **head, int no_of_threads); 23 | THREADS *CreateThreadStruct(char *chrname); 24 | THREADS *AddElement(THREADS *head, char *chrname); 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | #endif /* MULTITHREADS_H */ 29 | -------------------------------------------------------------------------------- /includes/scale.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: scale.h 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 3:54 PM 12 | */ 13 | 14 | #ifndef SCALE_H 15 | #define SCALE_H 16 | #include "Definitions.h" 17 | #include "main.h" 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | float *scaleBins(float *carray, float scale, int nbins, float pseudocount); 23 | void ScaleToSmallest(BAMFILES *head); 24 | void NoScale(BAMFILES *head); 25 | void ScaleToGenomeSize(BAMFILES *head, CHROMOSOMES *chead); 26 | void ScaleGenomeCoverage(BAMFILES *head, CHROMOSOMES *chead); 27 | BAMFILES *ComputeSamplescales(BAMFILES *head, CHROMOSOMES *chead, int scale); 28 | float *logTwoCoverageRatio(float *cov1, float *cov2, int nbins, float min_per_bin_cov); 29 | float *OKseqRFD(float *cov1, float *cov2, int nbins, float min_per_bin_cov); 30 | float *SubtractCoverage(float *cov1, float *cov2, int nbins, float min_per_bin_cov); 31 | float *CoverageRatio(float *cov1, float *cov2, int nbins, float min_per_bin_cov); 32 | float *SignedCoverageRatio(float *cov1, float *cov2, int nbins, int min_per_bin_cov); 33 | CHRCOV *CalculateChromosomeRatio(CHROMOSOMES *curr, CHRCOV *chead, int s1, int s2, int ratioType, int min_per_bin_cov); 34 | void PrintBedgraph(RATIOS *ptr, int binSize); 35 | char *returnChrName(char *input); 36 | void PrintBedgraphOrdered(RATIOS *ptr, int binSize, char *chromfile); 37 | void PrintBigWigOrdered(RATIOS *ptr, int binSize, char *chromfile); 38 | RATIOS *CalculateRatiosAll(RATIOS *head, CHROMOSOMES *chead, BAMFILES *bhead, int no_of_samples, int min_per_bin_cov, int smoothbin, int binSize, char *chromsizes); 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | #endif /* SCALE_H */ 43 | -------------------------------------------------------------------------------- /includes/segmenter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: segmenter.h 9 | * Author: pongorls 10 | * 11 | * Created on November 29, 2018, 1:27 PM 12 | */ 13 | 14 | #ifndef SEGMENTER_H 15 | #define SEGMENTER_H 16 | #include "Definitions.h" 17 | #include "main.h" 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | void DestroySegments(SEGMENTS *head); 23 | SEGMENTS *createSegment(void); 24 | int compare_float (const void * a, const void * b); 25 | int64_t CalculateGenSize(CHROMOSOMES *head); 26 | int64_t CalculateNonZeroBins(CHROMOSOMES *head, int sampleid); 27 | float *ConcatenateGenome(CHROMOSOMES *head, int64_t gensize, int sampleid); 28 | void Segmenting(CHROMOSOMES *head, CMDINPUT *cmd, int sampleid, float upper, float median, float lower); 29 | void Quantiles(CHROMOSOMES *head, int sampleid, CMDINPUT* cmd); 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | #endif /* SEGMENTER_H */ 34 | -------------------------------------------------------------------------------- /nbproject/Makefile-Release.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Generated Makefile - do not edit! 3 | # 4 | # Edit the Makefile in the project folder instead (../Makefile). Each target 5 | # has a -pre and a -post target defined where you can add customized code. 6 | # 7 | # This makefile implements configuration specific macros and targets. 8 | 9 | 10 | # Environment 11 | MKDIR=mkdir 12 | CP=cp 13 | GREP=grep 14 | NM=nm 15 | CCADMIN=CCadmin 16 | RANLIB=ranlib 17 | CC=gcc 18 | CCC=g++ 19 | CXX=g++ 20 | FC=gfortran 21 | AS=as 22 | 23 | # Macros 24 | CND_PLATFORM=GNU-MacOSX 25 | CND_DLIB_EXT=dylib 26 | CND_CONF=Release 27 | CND_DISTDIR=dist 28 | CND_BUILDDIR=build 29 | 30 | # Include project Makefile 31 | include Makefile 32 | 33 | # Object Directory 34 | OBJECTDIR=${CND_BUILDDIR}/${CND_CONF}/${CND_PLATFORM} 35 | 36 | # Object Files 37 | OBJECTFILES= \ 38 | ${OBJECTDIR}/src/BAMcoverage.o \ 39 | ${OBJECTDIR}/src/BAMstructs.o \ 40 | ${OBJECTDIR}/src/BEDstruct.o \ 41 | ${OBJECTDIR}/src/CHROMstruct.o \ 42 | ${OBJECTDIR}/src/Inputs.o \ 43 | ${OBJECTDIR}/src/Writer.o \ 44 | ${OBJECTDIR}/src/binning.o \ 45 | ${OBJECTDIR}/src/main.o \ 46 | ${OBJECTDIR}/src/multithreads.o \ 47 | ${OBJECTDIR}/src/scale.o \ 48 | ${OBJECTDIR}/src/segmenter.o 49 | 50 | 51 | # C Compiler Flags 52 | CFLAGS= 53 | 54 | # CC Compiler Flags 55 | CCFLAGS= 56 | CXXFLAGS= 57 | 58 | # Fortran Compiler Flags 59 | FFLAGS= 60 | 61 | # Assembler Flags 62 | ASFLAGS= 63 | 64 | # Link Libraries and Options 65 | LDLIBSOPTIONS= 66 | 67 | # Build Targets 68 | .build-conf: ${BUILD_SUBPROJECTS} 69 | "${MAKE}" -f nbproject/Makefile-${CND_CONF}.mk bin/BAMscale 70 | 71 | bin/BAMscale: ${OBJECTFILES} 72 | ${MKDIR} -p bin 73 | ${LINK.c} -o bin/BAMscale ${OBJECTFILES} ${LDLIBSOPTIONS} -lBigWig -lhts -lz -lm -lbz2 -llzma -lcurl -ldl -lpthread 74 | 75 | ${OBJECTDIR}/src/BAMcoverage.o: src/BAMcoverage.c 76 | ${MKDIR} -p ${OBJECTDIR}/src 77 | ${RM} "$@.d" 78 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/BAMcoverage.o src/BAMcoverage.c 79 | 80 | ${OBJECTDIR}/src/BAMstructs.o: src/BAMstructs.c 81 | ${MKDIR} -p ${OBJECTDIR}/src 82 | ${RM} "$@.d" 83 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/BAMstructs.o src/BAMstructs.c 84 | 85 | ${OBJECTDIR}/src/BEDstruct.o: src/BEDstruct.c 86 | ${MKDIR} -p ${OBJECTDIR}/src 87 | ${RM} "$@.d" 88 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/BEDstruct.o src/BEDstruct.c 89 | 90 | ${OBJECTDIR}/src/CHROMstruct.o: src/CHROMstruct.c 91 | ${MKDIR} -p ${OBJECTDIR}/src 92 | ${RM} "$@.d" 93 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/CHROMstruct.o src/CHROMstruct.c 94 | 95 | ${OBJECTDIR}/src/Inputs.o: src/Inputs.c 96 | ${MKDIR} -p ${OBJECTDIR}/src 97 | ${RM} "$@.d" 98 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/Inputs.o src/Inputs.c 99 | 100 | ${OBJECTDIR}/src/Writer.o: src/Writer.c 101 | ${MKDIR} -p ${OBJECTDIR}/src 102 | ${RM} "$@.d" 103 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/Writer.o src/Writer.c 104 | 105 | ${OBJECTDIR}/src/binning.o: src/binning.c 106 | ${MKDIR} -p ${OBJECTDIR}/src 107 | ${RM} "$@.d" 108 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/binning.o src/binning.c 109 | 110 | ${OBJECTDIR}/src/main.o: src/main.c 111 | ${MKDIR} -p ${OBJECTDIR}/src 112 | ${RM} "$@.d" 113 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/main.o src/main.c 114 | 115 | ${OBJECTDIR}/src/multithreads.o: src/multithreads.c 116 | ${MKDIR} -p ${OBJECTDIR}/src 117 | ${RM} "$@.d" 118 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/multithreads.o src/multithreads.c 119 | 120 | ${OBJECTDIR}/src/scale.o: src/scale.c 121 | ${MKDIR} -p ${OBJECTDIR}/src 122 | ${RM} "$@.d" 123 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/scale.o src/scale.c 124 | 125 | ${OBJECTDIR}/src/segmenter.o: src/segmenter.c 126 | ${MKDIR} -p ${OBJECTDIR}/src 127 | ${RM} "$@.d" 128 | $(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/segmenter.o src/segmenter.c 129 | 130 | # Subprojects 131 | .build-subprojects: 132 | 133 | # Clean Targets 134 | .clean-conf: ${CLEAN_SUBPROJECTS} 135 | ${RM} -r ${CND_BUILDDIR}/${CND_CONF} 136 | 137 | # Subprojects 138 | .clean-subprojects: 139 | 140 | # Enable dependency checking 141 | .dep.inc: .depcheck-impl 142 | 143 | include .dep.inc 144 | -------------------------------------------------------------------------------- /nbproject/Makefile-impl.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Generated Makefile - do not edit! 3 | # 4 | # Edit the Makefile in the project folder instead (../Makefile). Each target 5 | # has a pre- and a post- target defined where you can add customization code. 6 | # 7 | # This makefile implements macros and targets common to all configurations. 8 | # 9 | # NOCDDL 10 | 11 | 12 | # Building and Cleaning subprojects are done by default, but can be controlled with the SUB 13 | # macro. If SUB=no, subprojects will not be built or cleaned. The following macro 14 | # statements set BUILD_SUB-CONF and CLEAN_SUB-CONF to .build-reqprojects-conf 15 | # and .clean-reqprojects-conf unless SUB has the value 'no' 16 | SUB_no=NO 17 | SUBPROJECTS=${SUB_${SUB}} 18 | BUILD_SUBPROJECTS_=.build-subprojects 19 | BUILD_SUBPROJECTS_NO= 20 | BUILD_SUBPROJECTS=${BUILD_SUBPROJECTS_${SUBPROJECTS}} 21 | CLEAN_SUBPROJECTS_=.clean-subprojects 22 | CLEAN_SUBPROJECTS_NO= 23 | CLEAN_SUBPROJECTS=${CLEAN_SUBPROJECTS_${SUBPROJECTS}} 24 | 25 | 26 | # Project Name 27 | PROJECTNAME=BAMscale 28 | 29 | # Active Configuration 30 | DEFAULTCONF=Release 31 | CONF=${DEFAULTCONF} 32 | 33 | # All Configurations 34 | ALLCONFS=Release 35 | 36 | 37 | # build 38 | .build-impl: .build-pre .validate-impl .depcheck-impl 39 | @#echo "=> Running $@... Configuration=$(CONF)" 40 | "${MAKE}" -f nbproject/Makefile-${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .build-conf 41 | 42 | 43 | # clean 44 | .clean-impl: .clean-pre .validate-impl .depcheck-impl 45 | @#echo "=> Running $@... Configuration=$(CONF)" 46 | "${MAKE}" -f nbproject/Makefile-${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .clean-conf 47 | 48 | 49 | # clobber 50 | .clobber-impl: .clobber-pre .depcheck-impl 51 | @#echo "=> Running $@..." 52 | for CONF in ${ALLCONFS}; \ 53 | do \ 54 | "${MAKE}" -f nbproject/Makefile-$${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .clean-conf; \ 55 | done 56 | 57 | # all 58 | .all-impl: .all-pre .depcheck-impl 59 | @#echo "=> Running $@..." 60 | for CONF in ${ALLCONFS}; \ 61 | do \ 62 | "${MAKE}" -f nbproject/Makefile-$${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .build-conf; \ 63 | done 64 | 65 | # build tests 66 | .build-tests-impl: .build-impl .build-tests-pre 67 | @#echo "=> Running $@... Configuration=$(CONF)" 68 | "${MAKE}" -f nbproject/Makefile-${CONF}.mk SUBPROJECTS=${SUBPROJECTS} .build-tests-conf 69 | 70 | # run tests 71 | .test-impl: .build-tests-impl .test-pre 72 | @#echo "=> Running $@... Configuration=$(CONF)" 73 | "${MAKE}" -f nbproject/Makefile-${CONF}.mk SUBPROJECTS=${SUBPROJECTS} .test-conf 74 | 75 | # dependency checking support 76 | .depcheck-impl: 77 | @echo "# This code depends on make tool being used" >.dep.inc 78 | @if [ -n "${MAKE_VERSION}" ]; then \ 79 | echo "DEPFILES=\$$(wildcard \$$(addsuffix .d, \$${OBJECTFILES} \$${TESTOBJECTFILES}))" >>.dep.inc; \ 80 | echo "ifneq (\$${DEPFILES},)" >>.dep.inc; \ 81 | echo "include \$${DEPFILES}" >>.dep.inc; \ 82 | echo "endif" >>.dep.inc; \ 83 | else \ 84 | echo ".KEEP_STATE:" >>.dep.inc; \ 85 | echo ".KEEP_STATE_FILE:.make.state.\$${CONF}" >>.dep.inc; \ 86 | fi 87 | 88 | # configuration validation 89 | .validate-impl: 90 | @if [ ! -f nbproject/Makefile-${CONF}.mk ]; \ 91 | then \ 92 | echo ""; \ 93 | echo "Error: can not find the makefile for configuration '${CONF}' in project ${PROJECTNAME}"; \ 94 | echo "See 'make help' for details."; \ 95 | echo "Current directory: " `pwd`; \ 96 | echo ""; \ 97 | fi 98 | @if [ ! -f nbproject/Makefile-${CONF}.mk ]; \ 99 | then \ 100 | exit 1; \ 101 | fi 102 | 103 | 104 | # help 105 | .help-impl: .help-pre 106 | @echo "This makefile supports the following configurations:" 107 | @echo " ${ALLCONFS}" 108 | @echo "" 109 | @echo "and the following targets:" 110 | @echo " build (default target)" 111 | @echo " clean" 112 | @echo " clobber" 113 | @echo " all" 114 | @echo " help" 115 | @echo "" 116 | @echo "Makefile Usage:" 117 | @echo " make [CONF=] [SUB=no] build" 118 | @echo " make [CONF=] [SUB=no] clean" 119 | @echo " make [SUB=no] clobber" 120 | @echo " make [SUB=no] all" 121 | @echo " make help" 122 | @echo "" 123 | @echo "Target 'build' will build a specific configuration and, unless 'SUB=no'," 124 | @echo " also build subprojects." 125 | @echo "Target 'clean' will clean a specific configuration and, unless 'SUB=no'," 126 | @echo " also clean subprojects." 127 | @echo "Target 'clobber' will remove all built files from all configurations and," 128 | @echo " unless 'SUB=no', also from subprojects." 129 | @echo "Target 'all' will will build all configurations and, unless 'SUB=no'," 130 | @echo " also build subprojects." 131 | @echo "Target 'help' prints this message." 132 | @echo "" 133 | 134 | -------------------------------------------------------------------------------- /nbproject/Makefile-variables.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Generated - do not edit! 3 | # 4 | # NOCDDL 5 | # 6 | CND_BASEDIR=`pwd` 7 | CND_BUILDDIR=build 8 | CND_DISTDIR=dist 9 | # Release configuration 10 | CND_PLATFORM_Release=GNU-MacOSX 11 | CND_ARTIFACT_DIR_Release=bin 12 | CND_ARTIFACT_NAME_Release=BAMscale 13 | CND_ARTIFACT_PATH_Release=bin/BAMscale 14 | CND_PACKAGE_DIR_Release=dist/Release/GNU-MacOSX/package 15 | CND_PACKAGE_NAME_Release=bamscale.tar 16 | CND_PACKAGE_PATH_Release=dist/Release/GNU-MacOSX/package/bamscale.tar 17 | # 18 | # include compiler specific variables 19 | # 20 | # dmake command 21 | ROOT:sh = test -f nbproject/private/Makefile-variables.mk || \ 22 | (mkdir -p nbproject/private && touch nbproject/private/Makefile-variables.mk) 23 | # 24 | # gmake command 25 | .PHONY: $(shell test -f nbproject/private/Makefile-variables.mk || (mkdir -p nbproject/private && touch nbproject/private/Makefile-variables.mk)) 26 | # 27 | include nbproject/private/Makefile-variables.mk 28 | -------------------------------------------------------------------------------- /nbproject/Package-Release.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | # 4 | # Generated - do not edit! 5 | # 6 | 7 | # Macros 8 | TOP=`pwd` 9 | CND_PLATFORM=GNU-MacOSX 10 | CND_CONF=Release 11 | CND_DISTDIR=dist 12 | CND_BUILDDIR=build 13 | CND_DLIB_EXT=dylib 14 | NBTMPDIR=${CND_BUILDDIR}/${CND_CONF}/${CND_PLATFORM}/tmp-packaging 15 | TMPDIRNAME=tmp-packaging 16 | OUTPUT_PATH=bin/BAMscale 17 | OUTPUT_BASENAME=BAMscale 18 | PACKAGE_TOP_DIR=bamscale/ 19 | 20 | # Functions 21 | function checkReturnCode 22 | { 23 | rc=$? 24 | if [ $rc != 0 ] 25 | then 26 | exit $rc 27 | fi 28 | } 29 | function makeDirectory 30 | # $1 directory path 31 | # $2 permission (optional) 32 | { 33 | mkdir -p "$1" 34 | checkReturnCode 35 | if [ "$2" != "" ] 36 | then 37 | chmod $2 "$1" 38 | checkReturnCode 39 | fi 40 | } 41 | function copyFileToTmpDir 42 | # $1 from-file path 43 | # $2 to-file path 44 | # $3 permission 45 | { 46 | cp "$1" "$2" 47 | checkReturnCode 48 | if [ "$3" != "" ] 49 | then 50 | chmod $3 "$2" 51 | checkReturnCode 52 | fi 53 | } 54 | 55 | # Setup 56 | cd "${TOP}" 57 | mkdir -p ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package 58 | rm -rf ${NBTMPDIR} 59 | mkdir -p ${NBTMPDIR} 60 | 61 | # Copy files and create directories and links 62 | cd "${TOP}" 63 | makeDirectory "${NBTMPDIR}/bamscale/bin" 64 | copyFileToTmpDir "${OUTPUT_PATH}" "${NBTMPDIR}/${PACKAGE_TOP_DIR}bin/${OUTPUT_BASENAME}" 0755 65 | 66 | 67 | # Generate tar file 68 | cd "${TOP}" 69 | rm -f ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package/bamscale.tar 70 | cd ${NBTMPDIR} 71 | tar -vcf ../../../../${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package/bamscale.tar * 72 | checkReturnCode 73 | 74 | # Cleanup 75 | cd "${TOP}" 76 | rm -rf ${NBTMPDIR} 77 | -------------------------------------------------------------------------------- /nbproject/configurations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | includes/BAMcoverage.h 8 | includes/BAMstructs.h 9 | includes/BEDstruct.h 10 | includes/CHROMstruct.h 11 | includes/Definitions.h 12 | includes/Inputs.h 13 | includes/Writer.h 14 | includes/binning.h 15 | includes/main.h 16 | includes/multithreads.h 17 | includes/scale.h 18 | includes/segmenter.h 19 | 20 | 23 | 24 | 27 | src/BAMcoverage.c 28 | src/BAMstructs.c 29 | src/BEDstruct.c 30 | src/CHROMstruct.c 31 | src/Inputs.c 32 | src/Writer.c 33 | src/binning.c 34 | src/main.c 35 | src/multithreads.c 36 | src/scale.c 37 | src/segmenter.c 38 | 39 | 43 | 44 | 48 | Makefile 49 | 50 | .gitignore 51 | Dockerfile 52 | LICENSE 53 | README.md 54 | 55 | 56 | src 57 | 58 | Makefile 59 | 60 | 61 | 62 | default 63 | true 64 | false 65 | 66 | 67 | 68 | 5 69 | 70 | includes 71 | 72 | 73 | 74 | 5 75 | 11 76 | 77 | 78 | 5 79 | 80 | 81 | 5 82 | 83 | 84 | bin/BAMscale 85 | 3 86 | -lBigWig -lhts -lz -lm -lbz2 -llzma -lcurl -ldl -lpthread 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /nbproject/private/Makefile-variables.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Generated - do not edit! 3 | # 4 | # NOCDDL 5 | # 6 | # Release configuration 7 | -------------------------------------------------------------------------------- /nbproject/private/c_standard_headers_indexer.c: -------------------------------------------------------------------------------- 1 | /* 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 | * 4 | * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved. 5 | * 6 | * Oracle and Java are registered trademarks of Oracle and/or its affiliates. 7 | * Other names may be trademarks of their respective owners. 8 | * 9 | * The contents of this file are subject to the terms of either the GNU 10 | * General Public License Version 2 only ("GPL") or the Common 11 | * Development and Distribution License("CDDL") (collectively, the 12 | * "License"). You may not use this file except in compliance with the 13 | * License. You can obtain a copy of the License at 14 | * http://www.netbeans.org/cddl-gplv2.html 15 | * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the 16 | * specific language governing permissions and limitations under the 17 | * License. When distributing the software, include this License Header 18 | * Notice in each file and include the License file at 19 | * nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this 20 | * particular file as subject to the "Classpath" exception as provided 21 | * by Oracle in the GPL Version 2 section of the License file that 22 | * accompanied this code. If applicable, add the following below the 23 | * License Header, with the fields enclosed by brackets [] replaced by 24 | * your own identifying information: 25 | * "Portions Copyrighted [year] [name of copyright owner]" 26 | * 27 | * If you wish your version of this file to be governed by only the CDDL 28 | * or only the GPL Version 2, indicate your decision by adding 29 | * "[Contributor] elects to include this software in this distribution 30 | * under the [CDDL or GPL Version 2] license." If you do not indicate a 31 | * single choice of license, a recipient has the option to distribute 32 | * your version of this file under either the CDDL, the GPL Version 2 or 33 | * to extend the choice of license to its licensees as provided above. 34 | * However, if you add GPL Version 2 code and therefore, elected the GPL 35 | * Version 2 license, then the option applies only if the new code is 36 | * made subject to such option by the copyright holder. 37 | * 38 | * Contributor(s): 39 | */ 40 | 41 | // List of standard headers was taken in http://en.cppreference.com/w/c/header 42 | 43 | #include // Conditionally compiled macro that compares its argument to zero 44 | #include // Functions to determine the type contained in character data 45 | #include // Macros reporting error conditions 46 | #include // Limits of float types 47 | #include // Sizes of basic types 48 | #include // Localization utilities 49 | #include // Common mathematics functions 50 | #include // Nonlocal jumps 51 | #include // Signal handling 52 | #include // Variable arguments 53 | #include // Common macro definitions 54 | #include // Input/output 55 | #include // String handling 56 | #include // General utilities: memory management, program utilities, string conversions, random numbers 57 | #include // Time/date utilities 58 | #include // (since C95) Alternative operator spellings 59 | #include // (since C95) Extended multibyte and wide character utilities 60 | #include // (since C95) Wide character classification and mapping utilities 61 | #ifdef _STDC_C99 62 | #include // (since C99) Complex number arithmetic 63 | #include // (since C99) Floating-point environment 64 | #include // (since C99) Format conversion of integer types 65 | #include // (since C99) Boolean type 66 | #include // (since C99) Fixed-width integer types 67 | #include // (since C99) Type-generic math (macros wrapping math.h and complex.h) 68 | #endif 69 | #ifdef _STDC_C11 70 | #include // (since C11) alignas and alignof convenience macros 71 | #include // (since C11) Atomic types 72 | #include // (since C11) noreturn convenience macros 73 | #include // (since C11) Thread library 74 | #include // (since C11) UTF-16 and UTF-32 character utilities 75 | #endif 76 | -------------------------------------------------------------------------------- /nbproject/private/configurations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Makefile 4 | 5 | 6 | 7 | localhost 8 | 4 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | gdb 24 | 25 | 26 | 27 | "${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -r unscaled 28 | "${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -r unscaled -z 15 29 | "${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -r unscaled -z 25 30 | "${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -k no-z 25 31 | "${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -k no -z 25 32 | "${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -k no -r unscaled -z 25 33 | "${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -k no -r unscaled -z 15 34 | "${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -k no -r unscaled 35 | "${OUTPUT_PATH}" scale --bam /Users/pongorls/locus.bam 36 | "${OUTPUT_PATH}" scale --bam /Volumes/LMP/ngs/chip/SCLC_cell_lines/alignments/NCI-H69_H3K27ac_rep1.hg19_clean.bam 37 | 38 | "${OUTPUT_PATH}" scale --bam /Volumes/LMP/ngs/chip/SCLC_cell_lines/alignments/NCI-H69_H3K27ac_rep1.hg19_clean.bam 39 | 40 | true 41 | 0 42 | 0 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /nbproject/private/cpp_standard_headers_indexer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 | * 4 | * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved. 5 | * 6 | * Oracle and Java are registered trademarks of Oracle and/or its affiliates. 7 | * Other names may be trademarks of their respective owners. 8 | * 9 | * The contents of this file are subject to the terms of either the GNU 10 | * General Public License Version 2 only ("GPL") or the Common 11 | * Development and Distribution License("CDDL") (collectively, the 12 | * "License"). You may not use this file except in compliance with the 13 | * License. You can obtain a copy of the License at 14 | * http://www.netbeans.org/cddl-gplv2.html 15 | * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the 16 | * specific language governing permissions and limitations under the 17 | * License. When distributing the software, include this License Header 18 | * Notice in each file and include the License file at 19 | * nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this 20 | * particular file as subject to the "Classpath" exception as provided 21 | * by Oracle in the GPL Version 2 section of the License file that 22 | * accompanied this code. If applicable, add the following below the 23 | * License Header, with the fields enclosed by brackets [] replaced by 24 | * your own identifying information: 25 | * "Portions Copyrighted [year] [name of copyright owner]" 26 | * 27 | * If you wish your version of this file to be governed by only the CDDL 28 | * or only the GPL Version 2, indicate your decision by adding 29 | * "[Contributor] elects to include this software in this distribution 30 | * under the [CDDL or GPL Version 2] license." If you do not indicate a 31 | * single choice of license, a recipient has the option to distribute 32 | * your version of this file under either the CDDL, the GPL Version 2 or 33 | * to extend the choice of license to its licensees as provided above. 34 | * However, if you add GPL Version 2 code and therefore, elected the GPL 35 | * Version 2 license, then the option applies only if the new code is 36 | * made subject to such option by the copyright holder. 37 | * 38 | * Contributor(s): 39 | */ 40 | 41 | // List of standard headers was taken in http://en.cppreference.com/w/cpp/header 42 | 43 | #include // General purpose utilities: program control, dynamic memory allocation, random numbers, sort and search 44 | #include // Functions and macro constants for signal management 45 | #include // Macro (and function) that saves (and jumps) to an execution context 46 | #include // Handling of variable length argument lists 47 | #include // Runtime type information utilities 48 | #include // std::bitset class template 49 | #include // Function objects, designed for use with the standard algorithms 50 | #include // Various utility components 51 | #include // C-style time/date utilites 52 | #include // typedefs for types such as size_t, NULL and others 53 | #include // Low-level memory management utilities 54 | #include // Higher level memory management utilities 55 | #include // limits of integral types 56 | #include // limits of float types 57 | #include // standardized way to query properties of arithmetic types 58 | #include // Exception handling utilities 59 | #include // Standard exception objects 60 | #include // Conditionally compiled macro that compares its argument to zero 61 | #include // Macro containing the last error number 62 | #include // functions to determine the type contained in character data 63 | #include // functions for determining the type of wide character data 64 | #include // various narrow character string handling functions 65 | #include // various wide and multibyte string handling functions 66 | #include // std::basic_string class template 67 | #include // std::vector container 68 | #include // std::deque container 69 | #include // std::list container 70 | #include // std::set and std::multiset associative containers 71 | #include // std::map and std::multimap associative containers 72 | #include // std::stack container adaptor 73 | #include // std::queue and std::priority_queue container adaptors 74 | #include // Algorithms that operate on containers 75 | #include // Container iterators 76 | #include // Common mathematics functions 77 | #include // Complex number type 78 | #include // Class for representing and manipulating arrays of values 79 | #include // Numeric operations on values in containers 80 | #include // forward declarations of all classes in the input/output library 81 | #include // std::ios_base class, std::basic_ios class template and several typedefs 82 | #include // std::basic_istream class template and several typedefs 83 | #include // std::basic_ostream, std::basic_iostream class templates and several typedefs 84 | #include // several standard stream objects 85 | #include // std::basic_fstream, std::basic_ifstream, std::basic_ofstream class templates and several typedefs 86 | #include // std::basic_stringstream, std::basic_istringstream, std::basic_ostringstream class templates and several typedefs 87 | #include // std::strstream, std::istrstream, std::ostrstream(deprecated) 88 | #include // Helper functions to control the format or input and output 89 | #include // std::basic_streambuf class template 90 | #include // C-style input-output functions 91 | #include // Localization utilities 92 | #include // C localization utilities 93 | #include // empty header. The macros that appear in iso646.h in C are keywords in C++ 94 | #if __cplusplus >= 201103L 95 | #include // (since C++11) std::type_index 96 | #include // (since C++11) Compile-time type information 97 | #include // (since C++11) C++ time utilites 98 | #include // (since C++11) std::initializer_list class template 99 | #include // (since C++11) std::tuple class template 100 | #include // (since C++11) Nested allocator class 101 | #include // (since C++11) fixed-size types and limits of other types 102 | #include // (since C++11) formatting macros , intmax_t and uintmax_t math and conversions 103 | #include // (since C++11) defines std::error_code, a platform-dependent error code 104 | #include // (since C++11) C-style Unicode character conversion functions 105 | #include // (since C++11) std::array container 106 | #include // (since C++11) std::forward_list container 107 | #include // (since C++11) std::unordered_set and std::unordered_multiset unordered associative containers 108 | #include // (since C++11) std::unordered_map and std::unordered_multimap unordered associative containers 109 | #include // (since C++11) Random number generators and distributions 110 | #include // (since C++11) Compile-time rational arithmetic 111 | #include // (since C++11) Floating-point environment access functions 112 | #include // (since C++11) Unicode conversion facilities 113 | #include // (since C++11) Classes, algorithms and iterators to support regular expression processing 114 | #include // (since C++11) Atomic operations library 115 | #include // (since C++11)(deprecated in C++17) simply includes the header 116 | #include // (since C++11)(deprecated in C++17) simply includes the headers (until C++17) (since C++17) and : the overloads equivalent to the contents of the C header tgmath.h are already provided by those headers 117 | #include // (since C++11)(deprecated in C++17) defines one compatibility macro constant 118 | #include // (since C++11)(deprecated in C++17) defines one compatibility macro constant 119 | #include // (since C++11) std::thread class and supporting functions 120 | #include // (since C++11) mutual exclusion primitives 121 | #include // (since C++11) primitives for asynchronous computations 122 | #include // (since C++11) thread waiting conditions 123 | #endif 124 | #if __cplusplus >= 201300L 125 | #include // (since C++14) shared mutual exclusion primitives 126 | #endif 127 | #if __cplusplus >= 201500L 128 | #include // (since C++17) std::any class template 129 | #include // (since C++17) std::optional class template 130 | #include // (since C++17) std::variant class template 131 | #include // (since C++17) Polymorphic allocators and memory resources 132 | #include // (since C++17) std::basic_string_view class template 133 | #include // (since C++17) Predefined execution policies for parallel versions of the algorithms 134 | #include // (since C++17) std::path class and supporting functions 135 | #endif 136 | -------------------------------------------------------------------------------- /nbproject/private/launcher.properties: -------------------------------------------------------------------------------- 1 | # Launchers File syntax: 2 | # 3 | # [Must-have property line] 4 | # launcher1.runCommand= 5 | # [Optional extra properties] 6 | # launcher1.displayName= 7 | # launcher1.hide= 8 | # launcher1.buildCommand= 9 | # launcher1.runDir= 10 | # launcher1.runInOwnTab= 11 | # launcher1.symbolFiles= 12 | # launcher1.env.= 13 | # (If this value is quoted with ` it is handled as a native command which execution result will become the value) 14 | # [Common launcher properties] 15 | # common.runDir= 16 | # (This value is overwritten by a launcher specific runDir value if the latter exists) 17 | # common.env.= 18 | # (Environment variables from common launcher are merged with launcher specific variables) 19 | # common.symbolFiles= 20 | # (This value is overwritten by a launcher specific symbolFiles value if the latter exists) 21 | # 22 | # In runDir, symbolFiles and env fields you can use these macroses: 23 | # ${PROJECT_DIR} - project directory absolute path 24 | # ${OUTPUT_PATH} - linker output path (relative to project directory path) 25 | # ${OUTPUT_BASENAME}- linker output filename 26 | # ${TESTDIR} - test files directory (relative to project directory path) 27 | # ${OBJECTDIR} - object files directory (relative to project directory path) 28 | # ${CND_DISTDIR} - distribution directory (relative to project directory path) 29 | # ${CND_BUILDDIR} - build directory (relative to project directory path) 30 | # ${CND_PLATFORM} - platform name 31 | # ${CND_CONF} - configuration name 32 | # ${CND_DLIB_EXT} - dynamic library extension 33 | # 34 | # All the project launchers must be listed in the file! 35 | # 36 | # launcher1.runCommand=... 37 | # launcher2.runCommand=... 38 | # ... 39 | # common.runDir=... 40 | # common.env.KEY=VALUE 41 | 42 | # launcher1.runCommand= -------------------------------------------------------------------------------- /nbproject/private/private.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 1 5 | 0 6 | 7 | 8 | 9 | 10 | file:/Users/pongorls/NetBeansProjects/BAMscale/BAMscale/src/Writer.c 11 | file:/Users/pongorls/NetBeansProjects/BAMscale/BAMscale/src/Inputs.c 12 | file:/Users/pongorls/NetBeansProjects/BAMscale/BAMscale/includes/BAMstructs.h 13 | file:/Users/pongorls/NetBeansProjects/BAMscale/BAMscale/src/BAMstructs.c 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /nbproject/project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | org.netbeans.modules.cnd.makeproject 4 | 5 | 6 | BAMscale 7 | c 8 | 9 | h 10 | UTF-8 11 | 12 | 13 | src 14 | 15 | 16 | 17 | Release 18 | 1 19 | 20 | 21 | 22 | false 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/BAMstructs.c: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: BAMstructs.c 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 11:58 AM 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "Definitions.h" 25 | #include "BAMstructs.h" 26 | 27 | int CheckIndexShortFile(char *fname) { 28 | if(fname == NULL) 29 | return 0; 30 | 31 | char *idx = (char *)calloc(strlen(fname) + 1, sizeof(char)); 32 | strcpy(idx, fname); 33 | idx[strlen(idx)-1] = 'i'; 34 | 35 | printf("%s\n", idx); 36 | 37 | if(access( idx, F_OK ) == -1) { 38 | if(idx) 39 | free(idx); 40 | 41 | return 0; 42 | 43 | } 44 | 45 | if(idx) 46 | free(idx); 47 | 48 | return 1; 49 | } 50 | 51 | int CheckIndexFile(char *fname) { 52 | if(fname == NULL) 53 | return 0; 54 | 55 | char *idx = (char *)calloc(strlen(fname) + 5, sizeof(char)); 56 | strcpy(idx, fname); 57 | strcat(idx, ".bai"); 58 | 59 | if(access( idx, F_OK ) == -1) { 60 | if(idx) 61 | free(idx); 62 | return CheckIndexShortFile(fname); 63 | } 64 | 65 | if(idx) 66 | free(idx); 67 | 68 | return 1; 69 | } 70 | 71 | void DestroyBAMstruct(BAMFILES *head) { 72 | BAMFILES *curr = head; 73 | 74 | while (head != NULL) { 75 | curr = head; 76 | head = head->next; 77 | 78 | if (curr->name) 79 | free(curr->name); 80 | 81 | if (curr) 82 | free(curr); 83 | } 84 | 85 | head = NULL; 86 | curr = NULL; 87 | } 88 | 89 | BAMFILES *AddBAMstruct(char *BAMname, BAMFILES *head) { 90 | BAMFILES *ptr = (BAMFILES *) calloc (1, sizeof (BAMFILES)); 91 | BAMFILES *curr = head; 92 | ptr->name = NULL; 93 | ptr->shortname = NULL; 94 | ptr->read_coverage = -1; 95 | ptr->scale = 1; 96 | ptr->next = NULL; 97 | ptr->filtered_reads = 0; 98 | ptr->base_coverage = 0; 99 | ptr->genome_scale = 0.0; 100 | 101 | ptr->name = strdup(BAMname); 102 | char *p = strrchr(BAMname, '/'); 103 | 104 | if(p) { 105 | p++; 106 | ptr->shortname = strdup(p); 107 | } 108 | 109 | else { 110 | ptr->shortname = strdup(BAMname); 111 | } 112 | 113 | if (head == NULL) { 114 | ptr->id = 0; 115 | head = ptr; 116 | } else { 117 | while (curr->next != NULL) { 118 | curr = curr->next; 119 | } 120 | 121 | curr->next = ptr; 122 | ptr->id = curr->id + 1; 123 | } 124 | 125 | return head; 126 | } 127 | 128 | void PrintBAMstructs(BAMFILES *head) { 129 | BAMFILES *curr = head; 130 | 131 | while (curr != NULL) { 132 | printf("File: %s\n\tID:%d\n\tNo. of reads: %d\n\tScale: %f\n", curr->name, curr->id, curr->read_coverage, curr->scale); 133 | curr = curr->next; 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/CHROMstruct.c: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: CHROMstruct.c 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 12:34 PM 12 | */ 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "Definitions.h" 19 | #include "CHROMstruct.h" 20 | #include "main.h" 21 | 22 | uint32_t *GetChrLens(CHROMOSOMES *head, int no_of_chrs) { 23 | uint32_t *chrlens = (uint32_t *) malloc(sizeof (uint32_t) * no_of_chrs); 24 | CHROMOSOMES *curr = head; 25 | int i = 0; 26 | 27 | while (curr != NULL) { 28 | chrlens[i] = (uint32_t) curr->length; 29 | i++; 30 | curr = curr->next; 31 | } 32 | 33 | return chrlens; 34 | } 35 | 36 | int CountNumberOfChromosomes(CHROMOSOMES *head) { 37 | CHROMOSOMES *curr = head; 38 | int i = 0; 39 | 40 | while (curr != NULL) { 41 | i++; 42 | curr = curr->next; 43 | } 44 | 45 | return i; 46 | } 47 | 48 | char **GetChromosomeNames(CHROMOSOMES *head, int no_of_chrs) { 49 | char **chrnames = NULL; 50 | int i = 0; 51 | CHROMOSOMES *curr = head; 52 | 53 | if (no_of_chrs < 1) 54 | return NULL; 55 | 56 | chrnames = (char **) calloc(no_of_chrs+1, sizeof (char *)); 57 | 58 | while (curr != NULL) { 59 | if (i < no_of_chrs) { 60 | chrnames[i] = strdup(curr->name); 61 | i++; 62 | } else { 63 | fprintf(stderr, "WARNINGS: disregarding %s, there are more chromosomes than specified?\n", curr->name); 64 | } 65 | 66 | curr = curr->next; 67 | } 68 | 69 | return chrnames; 70 | } 71 | 72 | float CalculateGenomeSize(CHROMOSOMES *head) { 73 | float genome_size = 0; 74 | 75 | while (head != NULL) { 76 | if (head->blacklist == 0 && head->length > 0) 77 | genome_size += (float) head->length; 78 | 79 | head = head->next; 80 | } 81 | 82 | return genome_size; 83 | } 84 | 85 | /** 86 | * Creates new CHROMOSOME structure at end of HEAD. 87 | * @param head is the CHROMOSOME linked list head, name is the chr name 88 | * @return HEAD pointer to the CHROMOSOME structure head 89 | */ 90 | CHROMOSOMES *AddCHROMstruct(CHROMOSOMES *head, char *name, int length, int no_of_samples, int threadID) { 91 | CHROMOSOMES *ptr = (CHROMOSOMES *) malloc(sizeof (CHROMOSOMES)); 92 | CHROMOSOMES *curr = head; 93 | 94 | ptr->blacklist = 0; 95 | ptr->length = length; 96 | ptr->numberOfBins = -1; 97 | ptr->id = -1; 98 | ptr->allocated = 0; 99 | 100 | ptr->name = NULL; 101 | ptr->next = NULL; 102 | ptr->coverages = NULL; 103 | ptr->tid = threadID; 104 | ptr->name = strdup(name); 105 | ptr->idxreads = (int *) malloc(no_of_samples * sizeof (int)); 106 | 107 | for(int i =0 ; i < no_of_samples; i++){ 108 | ptr->idxreads[i] = 0; 109 | } 110 | 111 | if (head == NULL) { 112 | ptr->id = 0; 113 | head = ptr; 114 | } else { 115 | while (curr->next != NULL) { 116 | curr = curr->next; 117 | } 118 | 119 | curr->next = ptr; 120 | ptr->id = curr->id + 1; 121 | } 122 | 123 | return head; 124 | } 125 | 126 | /* 127 | * 128 | */ 129 | void DestroyCHROMstruct(CHROMOSOMES *head, int no_of_samples) { 130 | CHROMOSOMES *curr = head; 131 | int i = 0; 132 | 133 | while (head != NULL) { 134 | curr = head; 135 | head = head->next; 136 | curr->next = NULL; 137 | 138 | if (curr->coverages) { 139 | for (i = 0; i < no_of_samples; i++) { 140 | if (curr->coverages[i]){ 141 | free(curr->coverages[i]); 142 | } 143 | } 144 | free(curr->coverages); 145 | } 146 | 147 | if (curr->idxreads) 148 | free(curr->idxreads); 149 | 150 | if (curr->name) 151 | free(curr->name); 152 | 153 | if (curr) 154 | free(curr); 155 | } 156 | } 157 | 158 | CHROMOSOMES *ImportChromosomeDataFromBAM(char *bamfile, int no_of_samples, int threads) { 159 | CHROMOSOMES *head = NULL; 160 | samFile *fp_in = hts_open(bamfile, "r"); 161 | bam_hdr_t *hdr = sam_hdr_read(fp_in); 162 | int i = 0; 163 | int j = 0; 164 | 165 | for (i = 0; i < hdr->n_targets; i++) { 166 | head = AddCHROMstruct(head, hdr->target_name[i], (int) hdr->target_len[i], no_of_samples, j); 167 | 168 | j++; 169 | 170 | if (j >= threads) { 171 | j = 0; 172 | } 173 | } 174 | 175 | bam_hdr_destroy(hdr); 176 | sam_close(fp_in); 177 | return head; 178 | } 179 | 180 | void PrintChromosomes(CHROMOSOMES *head, int no_of_samples) { 181 | int i; 182 | CHROMOSOMES *curr = head; 183 | 184 | while (curr != NULL) { 185 | printf("Name: %s\n", curr->name); 186 | printf("\tID: %d\n", curr->id); 187 | printf("\tLength: %d\n", curr->length); 188 | printf("\tBlacklisted: %d\n", curr->blacklist); 189 | printf("\tNumber of bins: %d\n", curr->numberOfBins); 190 | printf("\tAllocated sample bins: %d\n", curr->allocated); 191 | printf("\tThread ID: %d\n", curr->tid); 192 | 193 | if (curr->idxreads) { 194 | for (i = 0; i < no_of_samples; i++) 195 | printf("\t\t[ %d ] = %d\n", i, curr->idxreads[i]); 196 | } 197 | curr = curr->next; 198 | } 199 | } 200 | 201 | void PrintBlacklistedChromosomes(CHROMOSOMES *head, int no_of_samples) { 202 | int i; 203 | CHROMOSOMES *curr = head; 204 | 205 | while (curr != NULL) { 206 | if (curr->blacklist == 1) { 207 | printf("Name: %s\n", curr->name); 208 | printf("\tID: %d\n", curr->id); 209 | printf("\tLength: %d\n", curr->length); 210 | printf("\tBlacklisted: %d\n", curr->blacklist); 211 | printf("\tNumber of bins: %d\n", curr->numberOfBins); 212 | printf("\tAllocated sample bins: %d\n", curr->allocated); 213 | 214 | if (curr->idxreads) { 215 | for (i = 0; i < no_of_samples; i++) 216 | printf("\t\t[ %d ] = %d\n", i, curr->idxreads[i]); 217 | } 218 | } 219 | curr = curr->next; 220 | } 221 | } 222 | 223 | CHROMOSOMES *ComputeBins(CHROMOSOMES *head, int binSize) { 224 | CHROMOSOMES *curr = head; 225 | 226 | while (curr != NULL) { 227 | curr->numberOfBins = curr->length / binSize; 228 | curr = curr->next; 229 | } 230 | 231 | return head; 232 | } 233 | 234 | CHROMOSOMES *AllocateBins(CHROMOSOMES *head, int no_of_samples) { 235 | CHROMOSOMES *curr = head; 236 | int i = 0; 237 | 238 | if (no_of_samples <= 0) { 239 | printf("ERROR: no samples were specified??"); 240 | FreeAllocatedData(); 241 | exit(0); 242 | } 243 | 244 | while (curr != NULL) { 245 | if (curr->numberOfBins > -1 && curr->blacklist == 0) { 246 | curr->coverages = (float **) calloc(no_of_samples+1, sizeof (float *)); 247 | 248 | if (curr->coverages == NULL) { 249 | printf("ERROR: could not allocate memory for bins at chr: %s\n", curr->name); 250 | FreeAllocatedData(); 251 | exit(0); 252 | } 253 | 254 | for (i = 0; i < no_of_samples; i++) { 255 | curr->allocated++; 256 | } 257 | } else { 258 | curr->coverages = NULL; 259 | } 260 | 261 | curr = curr->next; 262 | } 263 | 264 | return head; 265 | } 266 | 267 | CHROMOSOMES *BlacklistChromosome(CHROMOSOMES *head, char *name) { 268 | CHROMOSOMES *curr = head; 269 | int found = 0; 270 | 271 | while (curr != NULL) { 272 | if (strcmp(name, curr->name) == 0) { 273 | curr->blacklist = 1; 274 | curr->tid = -1; 275 | found++; 276 | } 277 | 278 | curr = curr->next; 279 | } 280 | 281 | if (found == 0) { 282 | printf("WARNING: \"%s\" chromosome not found and could not be blacklisted\n", name); 283 | } 284 | 285 | return head; 286 | } 287 | 288 | void BlacklistChromosomeFiles(CHROMOSOMES *head, char *filename) { 289 | FILE *handler = fopen(filename, "r"); 290 | char line[BUFSIZ]; 291 | char *pos; 292 | 293 | while (fgets(line, sizeof (line), handler)) { 294 | if ((pos = strchr(line, '\n')) != NULL) 295 | *pos = '\0'; 296 | 297 | head = BlacklistChromosome(head, line); 298 | } 299 | 300 | fclose(handler); 301 | } 302 | 303 | void DestroyChromCovStruct(CHRCOV *head) { 304 | CHRCOV *curr = head; 305 | 306 | while (head != NULL) { 307 | curr = head; 308 | head = head->next; 309 | 310 | if (curr->name) 311 | free(curr->name); 312 | 313 | if (curr->ratio) 314 | free(curr->ratio); 315 | 316 | if (curr) 317 | free(curr); 318 | } 319 | } 320 | 321 | CHRCOV *CreateChromCovStruct(char *name, int id, int nbins) { 322 | CHRCOV *ptr = (CHRCOV *) malloc(sizeof (CHRCOV)); 323 | 324 | ptr->name = strdup(name); 325 | 326 | ptr->id = id; 327 | ptr->nbins = nbins; 328 | 329 | ptr->next = NULL; 330 | ptr->ratio = NULL; 331 | 332 | return ptr; 333 | } 334 | 335 | void DestroyRatioStruct(RATIOS *ptr) { 336 | if (ptr->sample1) 337 | free(ptr->sample1); 338 | 339 | if (ptr->sample2) 340 | free(ptr->sample2); 341 | 342 | if (ptr) 343 | free(ptr); 344 | 345 | if (ptr->chrcovs) 346 | DestroyChromCovStruct(ptr->chrcovs); 347 | 348 | ptr = NULL; 349 | } 350 | 351 | RATIOS *CreateRatioStruct(char *s1, char *s2, int id1, int id2) { 352 | RATIOS *ptr = (RATIOS *) malloc(sizeof (RATIOS)); 353 | ptr->chrcovs = NULL; 354 | ptr->s1 = id1; 355 | ptr->s2 = id2; 356 | 357 | ptr->sample1 = strdup(s1); 358 | 359 | ptr->sample2 = strdup(s2); 360 | 361 | return ptr; 362 | } -------------------------------------------------------------------------------- /src/Writer.c: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: Writer.c 9 | * Author: pongorls 10 | * 11 | * Created on December 19, 2018, 2:27 PM 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | #include "Writer.h" 23 | #include "Definitions.h" 24 | #include "main.h" 25 | #include "scale.h" 26 | #include "CHROMstruct.h" 27 | #include "segmenter.h" 28 | #include "binning.h" 29 | #include "Inputs.h" 30 | 31 | char *returnRNAfilename(CMDINPUT *cmd) { 32 | int fnamelen = 0; 33 | char *outfile = NULL; 34 | 35 | if (cmd->outdir != NULL) 36 | fnamelen += strlen(cmd->outdir); 37 | 38 | fnamelen += strlen(cmd->bamfiles->shortname); 39 | 40 | if(cmd->strandsplit == 1) 41 | fnamelen += strlen(".positive"); 42 | 43 | fnamelen += 50; 44 | 45 | outfile = (char *) calloc((fnamelen*2 + 1), sizeof (char)); 46 | 47 | if (cmd->outdir != NULL) 48 | strcpy(outfile, cmd->outdir); 49 | 50 | else 51 | strcpy(outfile, "./"); 52 | 53 | strcat(outfile, "/"); 54 | 55 | strcat(outfile, cmd->bamfiles->shortname); 56 | 57 | if(cmd->strandsplit == 1) { 58 | if(cmd->strand == 1) 59 | strcat(outfile, ".positive"); 60 | 61 | if(cmd->strand == -1) 62 | strcat(outfile, ".negative"); 63 | } 64 | 65 | strcat(outfile, "."); 66 | strcat(outfile, cmd->operation); 67 | 68 | strcat(outfile, ".bw"); 69 | 70 | return outfile; 71 | } 72 | 73 | void PrintScaledBigWig(CMDINPUT *cmd, BAMFILES *curr, char *sfile) { 74 | char **chrnames = NULL; 75 | uint32_t *chrlens = NULL; 76 | uint32_t start = 0; 77 | int no_of_chrs = CountNumberOfChromosomes(cmd->chr); 78 | char *outfile = NULL; 79 | int fnamelen = 0; 80 | bigWigFile_t *fp = NULL; 81 | int i, j = 0; 82 | CHROMOSOMES *chr = cmd->chr; 83 | float *intervals = NULL; 84 | int blocksize = 25; 85 | int end, currblocksize, non_empty = 0; 86 | 87 | chrnames = GetChromosomeNames(cmd->chr, no_of_chrs); 88 | chrlens = GetChrLens(cmd->chr, no_of_chrs); 89 | 90 | if (cmd->outdir != NULL) 91 | fnamelen += strlen(cmd->outdir); 92 | 93 | fnamelen += strlen(curr->shortname); 94 | 95 | if(cmd->strandsplit == 1) 96 | fnamelen += strlen(".positive"); 97 | 98 | if (sfile != NULL) 99 | fnamelen += strlen(sfile); 100 | 101 | fnamelen += 50; 102 | 103 | outfile = (char *) calloc((fnamelen*2 + 1), sizeof (char)); 104 | 105 | if (cmd->outdir != NULL) 106 | strcpy(outfile, cmd->outdir); 107 | 108 | else 109 | strcpy(outfile, "./"); 110 | 111 | strcat(outfile, "/"); 112 | 113 | if (sfile == NULL) { 114 | strcat(outfile, curr->shortname); 115 | 116 | if(cmd->strandsplit == 1) { 117 | if(cmd->strand == 1) 118 | strcat(outfile, ".positive"); 119 | 120 | if(cmd->strand == -1) 121 | strcat(outfile, ".negative"); 122 | } 123 | } 124 | else { 125 | strcat(outfile, curr->shortname); 126 | 127 | if(strcmp(cmd->operation, INPUTS_END) != 0 && strcmp(cmd->operation, INPUTS_ENDR) != 0 && strcmp(cmd->operation, INPUTS_RFD) != 0) { 128 | strcat(outfile, "_vs_"); 129 | strcat(outfile, sfile); 130 | } 131 | 132 | if(strcmp(cmd->operation, INPUTS_END) == 0 || strcmp(cmd->operation, INPUTS_ENDR) == 0) { 133 | strcat(outfile, ".log2"); 134 | } 135 | } 136 | 137 | strcat(outfile, "."); 138 | strcat(outfile, cmd->operation); 139 | 140 | strcat(outfile, ".bw"); 141 | 142 | fp = bwOpen(outfile, NULL, "w"); 143 | bwCreateHdr(fp, cmd->binSize); 144 | fp->cl = bwCreateChromList(chrnames, chrlens, no_of_chrs); 145 | bwWriteHdr(fp); 146 | 147 | while (chr != NULL) { 148 | if (chr->blacklist == 0 && chr->length > cmd->binSize) { //&& strcmp(chr->name, "chr1") == 0 149 | if (chr->length > 10000000) 150 | printf("Writing: %s\n", chr->name); 151 | 152 | int startwrite = 0; 153 | 154 | if(cmd->strand == -1 && strcmp(cmd->operation, INPUTS_ENDR) == 0) { 155 | for(i = 0; i <= chr->numberOfBins - 1; i++) { 156 | if(intervals[i] > 0) 157 | intervals[i] = -intervals[i]; 158 | } 159 | } 160 | 161 | //bwAddIntervalSpanSteps(fp, chr->name, start, (uint32_t)cmd->binSize, (uint32_t)cmd->binSize, chr->coverages[curr->id], (uint32_t)chr->numberOfBins-1); 162 | 163 | for(int i = 0; i < chr->numberOfBins - 1; i = i + blocksize) { 164 | start = (uint32_t)(i * cmd->binSize); 165 | end = i + blocksize; 166 | currblocksize = blocksize; 167 | non_empty = 0; 168 | 169 | if(end > chr->numberOfBins - 1) { 170 | end = chr->numberOfBins - 1; 171 | currblocksize = end - i; 172 | } 173 | 174 | for(j = i; j < end; j++) { 175 | if(chr->coverages[curr->id][j] != 0) { 176 | non_empty = 1; 177 | } 178 | } 179 | 180 | if(non_empty == 1) { 181 | if(startwrite == 0) { 182 | bwAddIntervalSpanSteps(fp, chr->name, start, (uint32_t)cmd->binSize, (uint32_t)cmd->binSize, chr->coverages[curr->id] + i, (uint32_t)currblocksize); 183 | } else { 184 | bwAppendIntervalSpanSteps(fp, chr->coverages[curr->id] + i, (uint32_t)currblocksize); 185 | } 186 | startwrite++; 187 | } else { 188 | startwrite = 0; 189 | } 190 | } 191 | 192 | if(cmd->strand == -1 && strcmp(cmd->operation, INPUTS_ENDR) == 0) { 193 | for(i = 0; i <= chr->numberOfBins - 1; i++) { 194 | if(intervals[i] < 0) { 195 | intervals[i] = -intervals[i]; 196 | } 197 | } 198 | } 199 | } 200 | 201 | chr = chr->next; 202 | } 203 | 204 | bwClose(fp); 205 | bwCleanup(); 206 | 207 | if (chrnames) { 208 | for (i = 0; i < no_of_chrs; i++) { 209 | if (chrnames[i]) 210 | free(chrnames[i]); 211 | } 212 | free(chrnames); 213 | } 214 | if (chrlens) 215 | free(chrlens); 216 | 217 | if (outfile) 218 | free(outfile); 219 | } -------------------------------------------------------------------------------- /src/binning.c: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: binning.c 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 5:09 PM 12 | */ 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "BAMcoverage.h" 21 | #include "main.h" 22 | #include "CHROMstruct.h" 23 | #include "BAMstructs.h" 24 | #include "binning.h" 25 | 26 | float *QuicksmoothenBins(float *carray, int smoothBins, int numBins) { 27 | float *smoothed = (float *)calloc(numBins + 1, sizeof(float)); 28 | int i = 0; 29 | int j = 0; 30 | int binstart = 0; 31 | int binend = 0; 32 | float binmean = 0; 33 | int runsum_state = 0; 34 | float runsum = -1; 35 | 36 | for(i = 0; i < numBins; i++) { 37 | binmean = 0; 38 | binstart = i - smoothBins; 39 | binend = i + smoothBins; 40 | runsum_state = 1; 41 | 42 | if(binstart < 1) { 43 | if(binstart < 0) 44 | binstart = 0; 45 | 46 | runsum_state = 0; 47 | } 48 | 49 | if(binend >= numBins) { 50 | binend = numBins - 1; 51 | runsum_state = 0; 52 | } 53 | 54 | if(binend - binstart > 0) { 55 | if(runsum_state == 0) { 56 | for(j = binstart; j <= binend; j++) { 57 | binmean += carray[j]; 58 | } 59 | } 60 | 61 | else { 62 | binmean = runsum + carray[binend] - carray[binstart - 1]; 63 | } 64 | 65 | runsum = binmean; 66 | 67 | if(binmean != 0) { 68 | binmean = binmean / (float)(binend - binstart); 69 | } 70 | 71 | smoothed[i] = binmean; 72 | } 73 | } 74 | 75 | if(carray) 76 | free(carray); 77 | 78 | return smoothed; 79 | } 80 | 81 | 82 | float *smoothenBins(float **carray, int smoothBins, int numBins) { 83 | float *smoothed = (float *) calloc(numBins, sizeof (float)); 84 | int i = 0; 85 | int j = 0; 86 | int binstart = 0; 87 | int binend = 0; 88 | float binmean = 0; 89 | 90 | for (i = 0; i < numBins; i++) { 91 | binmean = 0; 92 | binstart = i - smoothBins; 93 | binend = i + smoothBins; 94 | 95 | if (binstart < 0) 96 | binstart = 0; 97 | 98 | if (binend >= numBins) 99 | binend = numBins - 1; 100 | 101 | if (binend - binstart > 0) { 102 | for (j = binstart; j <= binend; j++) { 103 | binmean += (*carray)[j]; 104 | } 105 | 106 | if (binmean != 0) { 107 | binmean = binmean / (float) (binend - binstart); 108 | } 109 | 110 | smoothed[i] = binmean; 111 | } else { 112 | smoothed[i] = 0; 113 | } 114 | } 115 | 116 | if (*carray) 117 | free(*carray); 118 | 119 | return smoothed; 120 | } 121 | 122 | float *AddPseudoToZeroCov(float *coverage, int len) { 123 | int i = 0; 124 | 125 | for (i = 0; i < len; i++) { 126 | coverage[i] += 1; 127 | } 128 | 129 | return coverage; 130 | } 131 | 132 | float *BinCoverage(int *coverage, int chr_len, int binSize, int nbins) { 133 | float *bincov = (float *) calloc(nbins+1, sizeof (float)); 134 | int i = 0; 135 | int j = 0; 136 | int binstart = 0; 137 | int binend = 0; 138 | 139 | for (i = 0; i < nbins; i++) { 140 | if (i == 0) { 141 | binstart = 0; 142 | binend = binSize; 143 | } else { 144 | binstart = i*binSize; 145 | binend = (i + 1) * binSize; 146 | } 147 | 148 | if (binstart >= chr_len) { 149 | binstart = chr_len-1; 150 | } 151 | 152 | if (binend >= chr_len) { 153 | binend = chr_len-1; 154 | } 155 | 156 | for (j = binstart; j < binend; j++) { 157 | bincov[i] += (float) coverage[j]; 158 | } 159 | 160 | if(bincov[i] > 0) 161 | bincov[i] = bincov[i] / ((float) (binend - binstart)); 162 | } 163 | 164 | return bincov; 165 | } -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File: main.c 3 | * Author: pongorls 4 | * 5 | * Created on November 28, 2018, 11:55 AM 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include 19 | 20 | #include "main.h" 21 | #include "Definitions.h" 22 | #include "BAMstructs.h" 23 | #include "CHROMstruct.h" 24 | #include "BAMcoverage.h" 25 | #include "scale.h" 26 | #include "segmenter.h" 27 | #include "multithreads.h" 28 | #include "BEDstruct.h" 29 | #include "Inputs.h" 30 | #include "Writer.h" 31 | #include 32 | 33 | BAMFILES *BAMhead = NULL; 34 | BAMFILES *BAMcurr = NULL; 35 | 36 | CHROMOSOMES *CHROMhead = NULL; 37 | CHROMOSOMES *CHROMcurr = NULL; 38 | 39 | CHRCOV *CCOVhead = NULL; 40 | CHRCOV *CCOVcurr = NULL; 41 | 42 | RATIOS *rhead = NULL; 43 | RATIOS *rcurr = NULL; 44 | 45 | int no_of_samples; 46 | int bamcoverage = 0; //0: quick (index), 1: count all reads 47 | 48 | void FreeAllocatedData(void) { 49 | DestroyBAMstruct(BAMhead); 50 | DestroyCHROMstruct(CHROMhead, no_of_samples); 51 | //DestroyRatioStruct(rhead); 52 | } 53 | 54 | void ComputeCoverageChIPpeak(CMDINPUT *cmd) { 55 | PEAK *head = NULL; 56 | BAMFILES *curr = NULL; 57 | char *ofile = NULL; 58 | int ofile_len = 0; 59 | 60 | CHROMhead = ImportChromosomeDataFromBAM(cmd->bamfiles->name, cmd->no_of_samples, cmd->threads); 61 | 62 | if (cmd->libtype == -1) { 63 | fprintf(stderr, "Detecting library type\n"); 64 | cmd->libtype = DetectLibraryType(cmd->bamfiles); 65 | 66 | if (cmd->libtype == 0) { 67 | fprintf(stderr, "\tLibrary seems single-end\n"); 68 | 69 | if (cmd->fragment_count_mode == 1 && cmd->fragment_size == 0) { 70 | fprintf(stderr, "ERROR: fragment mode counting is enable, library is single-end, but fragment size is set to 0."); 71 | fprintf(stderr, "WARNING: Please re-run program without enabling fragment-counting mode, or set fragment size"); 72 | PrintMultiCovMessage(cmd->argv[0]); 73 | return; 74 | } 75 | } 76 | else 77 | fprintf(stderr, "\tLibrary seems paired-end\n"); 78 | } 79 | 80 | if (cmd->blacklist_file) 81 | BlacklistChromosomeFiles(CHROMhead, cmd->blacklist_file); 82 | 83 | PrintBlacklistedChromosomes(CHROMhead, cmd->no_of_samples); 84 | 85 | if (cmd->genome_coverage == 1) { 86 | MultiGenomeReadCoverage(cmd, CHROMhead); 87 | } 88 | else { 89 | fprintf(stderr, "\nComputing coverage from the idx of BAM files\n"); 90 | GetChromosomeCoveragesIDX(CHROMhead, cmd->bamfiles); 91 | GetGenomeCoveragesIDX(CHROMhead, cmd->bamfiles); 92 | } 93 | 94 | if (cmd->blacklist_bed) { 95 | fprintf(stderr, "Subtracting reads from blaklist BED file ( %s )\n", cmd->blacklist_bed); 96 | SubtractBlacklistedBEDS(cmd->blacklist_bed, CHROMhead, cmd->bamfiles, cmd->libtype); 97 | } 98 | 99 | ComputeSamplescales(cmd->bamfiles, CHROMhead, 1); 100 | head = ReadBED(cmd->bedfile, cmd->threads); 101 | AllocateReadCovs(head, cmd->no_of_samples); 102 | 103 | curr = cmd->bamfiles; 104 | 105 | while (curr != NULL) { 106 | fprintf(stderr, "\nSample: %s\n", curr->shortname); 107 | fprintf(stderr, "\tTotal no. of reads: %d\n", curr->read_coverage); 108 | fprintf(stderr, "\tLibrary size scale: %.2f\n", curr->scale); 109 | curr = curr->next; 110 | } 111 | 112 | fprintf(stderr, "\nProcessing BAM files\n"); 113 | MultiCoverage(cmd->bamfiles, head, cmd); 114 | 115 | if (cmd->outdir) 116 | ofile_len = strlen(cmd->outdir); 117 | 118 | if (cmd->outprefix) 119 | ofile_len += strlen(cmd->outprefix); 120 | 121 | ofile_len += strlen("raw_coverages.tsv") + 50; 122 | ofile = (char *) calloc(ofile_len, sizeof (char)); 123 | 124 | if (cmd->outdir) { 125 | strcat(ofile, cmd->outdir); 126 | strcat(ofile, "/"); 127 | } 128 | 129 | if (cmd->outprefix) { 130 | strcat(ofile, cmd->outprefix); 131 | strcat(ofile, "."); 132 | } 133 | 134 | strcat(ofile, "raw_coverages.tsv"); 135 | 136 | WriteMultiCovsRaw(cmd->bamfiles, head, cmd->no_of_samples, ofile); 137 | 138 | if (ofile) 139 | free(ofile); 140 | 141 | ofile_len += strlen("TPM_normalized_coverages.tsv" + 1); 142 | ofile = (char *) calloc(ofile_len + 1, sizeof (char)); 143 | 144 | if (cmd->outdir) { 145 | strcat(ofile, cmd->outdir); 146 | strcat(ofile, "/"); 147 | } 148 | 149 | if (cmd->outprefix) { 150 | strcat(ofile, cmd->outprefix); 151 | strcat(ofile, "."); 152 | } 153 | 154 | strcat(ofile, "TPM_normalized_coverages.tsv"); 155 | 156 | CalculateTPM(cmd->bamfiles, head); 157 | WriteMultiCovsNormalized(cmd->bamfiles, head, cmd->no_of_samples, ofile); 158 | 159 | if (ofile) 160 | free(ofile); 161 | 162 | ofile_len += strlen("FPKM_normalized_coverages.tsv" + 1); 163 | ofile = (char *) calloc(ofile_len + 1, sizeof (char)); 164 | 165 | if (cmd->outdir) { 166 | strcat(ofile, cmd->outdir); 167 | strcat(ofile, "/"); 168 | } 169 | 170 | if (cmd->outprefix) { 171 | strcat(ofile, cmd->outprefix); 172 | strcat(ofile, "."); 173 | } 174 | 175 | strcat(ofile, "FPKM_normalized_coverages.tsv"); 176 | 177 | CalculateFPKM(cmd->bamfiles, head); 178 | WriteMultiCovsNormalized(cmd->bamfiles, head, cmd->no_of_samples, ofile); 179 | 180 | if (ofile) 181 | free(ofile); 182 | 183 | ofile_len += strlen("Library_normalized_coverages.tsv" + 1); 184 | ofile = (char *) calloc(ofile_len + 1, sizeof (char)); 185 | 186 | if (cmd->outdir) { 187 | strcat(ofile, cmd->outdir); 188 | strcat(ofile, "/"); 189 | } 190 | 191 | if (cmd->outprefix) { 192 | strcat(ofile, cmd->outprefix); 193 | strcat(ofile, "."); 194 | } 195 | 196 | strcat(ofile, "Library_normalized_coverages.tsv"); 197 | 198 | CalculateLibScaled(cmd->bamfiles, head); 199 | WriteMultiCovsNormalized(cmd->bamfiles, head, cmd->no_of_samples, ofile); 200 | 201 | if (ofile) 202 | free(ofile); 203 | 204 | DeleteBEDs(head); 205 | } 206 | 207 | void NormalizeBAMSrna(CMDINPUT *cmd) { 208 | BAMFILES *curr = NULL; 209 | 210 | cmd->fragment_count_mode = 0; 211 | fprintf(stderr, "Allocating BINS of size %d for chromosomes\n", cmd->binSize); 212 | CHROMhead = ImportChromosomeDataFromBAM(cmd->bamfiles->name, cmd->no_of_samples, cmd->threads); 213 | 214 | if (cmd->blacklist_file) 215 | BlacklistChromosomeFiles(CHROMhead, cmd->blacklist_file); 216 | 217 | CHROMhead = ComputeBins(CHROMhead, cmd->binSize); 218 | CHROMhead = AllocateBins(CHROMhead, cmd->no_of_samples); 219 | 220 | cmd->chr = CHROMhead; 221 | 222 | if (cmd->libtype == -1) { 223 | fprintf(stderr, "Detecting library type\n"); 224 | cmd->libtype = DetectLibraryType(cmd->bamfiles); 225 | 226 | if (cmd->libtype == 0) { 227 | fprintf(stderr, "\tLibrary seems single-end\n"); 228 | 229 | if (cmd->fragment_count_mode == 1 && cmd->fragment_size == 0) { 230 | fprintf(stderr, "ERROR: fragment mode counting is enable, library is single-end, but fragment size is set to 0."); 231 | fprintf(stderr, "WARNING: Please re-run program without enabling fragment-counting mode, or set fragment size"); 232 | PrintMultiCovMessage(cmd->argv[0]); 233 | return; 234 | } 235 | } else 236 | fprintf(stderr, "\tLibrary seems paired-end\n"); 237 | } 238 | 239 | //fprintf(stderr, "\nComputing coverage from the idx of BAM files\n"); 240 | //GetChromosomeCoveragesIDX(CHROMhead, cmd->bamfiles); 241 | if(strcmp(cmd->scale, INPUTS_CUSTOM) != 0) { 242 | cmd->bamfiles->scale = 1; 243 | cmd->bamfiles->genome_scale = 1; 244 | } 245 | 246 | if (cmd->genome_coverage > 0 && strcmp(cmd->scale, INPUTS_NO) != 0 && strcmp(cmd->scale, INPUTS_CUSTOM) != 0) { 247 | fprintf(stderr, "\nComputing coverage from BAM file\n"); 248 | MultiGenomeCoverage(cmd, CHROMhead); 249 | ComputeSamplescales(cmd->bamfiles, CHROMhead, 1); 250 | ScaleGenomeCoverage(cmd->bamfiles, CHROMhead); 251 | } 252 | 253 | curr = cmd->bamfiles; 254 | 255 | while (curr != NULL) { 256 | fprintf(stderr, "\nSample: %s\n", curr->shortname); 257 | fprintf(stderr, "\tTotal no. of reads: %d\n", curr->read_coverage); 258 | fprintf(stderr, "\tLibrary size scale: %.2f\n", curr->scale); 259 | fprintf(stderr, "\tTotal number of filtered reads: %d\n", curr->filtered_reads); 260 | fprintf(stderr, "\tBases sequenced: %f\n", curr->base_coverage); 261 | fprintf(stderr, "\tGenome size: %.f\n", CalculateGenomeSize(CHROMhead)); 262 | fprintf(stderr, "\tGenome scale: %f\n", curr->genome_scale); 263 | 264 | if(strcmp(cmd->scale, INPUTS_SMALLEST) == 0) 265 | curr->genome_scale = curr->scale; 266 | 267 | curr = curr->next; 268 | } 269 | 270 | if(cmd->strandsplit == 0) { 271 | fprintf(stderr, "\nCreating coverage track for: %s\n", cmd->bamfiles->shortname); 272 | GetGenomeCoverageRNA(cmd, CHROMhead, returnRNAfilename(cmd)); 273 | } 274 | else { 275 | fprintf(stderr, "\nCreating positive coverage track for: %s\n", cmd->bamfiles->shortname); 276 | cmd->strand = 1; 277 | GetGenomeCoverageRNA(cmd, CHROMhead, returnRNAfilename(cmd)); 278 | 279 | fprintf(stderr, "\nCreating negative coverage track for: %s\n", cmd->bamfiles->shortname); 280 | cmd->strand = -1; 281 | GetGenomeCoverageRNA(cmd, CHROMhead, returnRNAfilename(cmd)); 282 | } 283 | } 284 | 285 | void NormalizeBAMS(CMDINPUT *cmd) { 286 | BAMFILES *curr = NULL; 287 | 288 | cmd->fragment_count_mode = 0; 289 | fprintf(stderr, "Allocating BINS of size %d for chromosomes\n", cmd->binSize); 290 | CHROMhead = ImportChromosomeDataFromBAM(cmd->bamfiles->name, cmd->no_of_samples, cmd->threads); 291 | 292 | if (cmd->blacklist_file) 293 | BlacklistChromosomeFiles(CHROMhead, cmd->blacklist_file); 294 | 295 | CHROMhead = ComputeBins(CHROMhead, cmd->binSize); 296 | CHROMhead = AllocateBins(CHROMhead, cmd->no_of_samples); 297 | cmd->chr = CHROMhead; 298 | 299 | if (cmd->libtype == -1) { 300 | fprintf(stderr, "Detecting library type\n"); 301 | cmd->libtype = DetectLibraryType(cmd->bamfiles); 302 | 303 | if (cmd->libtype == 0) { 304 | fprintf(stderr, "\tLibrary seems single-end\n"); 305 | 306 | if (cmd->fragment_count_mode == 1 && cmd->fragment_size == 0) { 307 | fprintf(stderr, "ERROR: fragment mode counting is enable, library is single-end, but fragment size is set to 0."); 308 | fprintf(stderr, "WARNING: Please re-run program without enabling fragment-counting mode, or set fragment size"); 309 | PrintMultiCovMessage(cmd->argv[0]); 310 | return; 311 | } 312 | } else 313 | fprintf(stderr, "\tLibrary seems paired-end\n"); 314 | } 315 | 316 | fprintf(stderr, "\nComputing coverage from the idx of BAM files\n"); 317 | GetChromosomeCoveragesIDX(CHROMhead, cmd->bamfiles); 318 | MultiGenomeBaseCoverage(cmd, CHROMhead); 319 | 320 | if(cmd->strandsplit == 1 && strcmp(cmd->scale, INPUTS_CUSTOM) != 0) { 321 | if(strcmp(cmd->operation, INPUTS_RSTRRNA) == 0 || strcmp(cmd->operation, INPUTS_STRRNA) == 0) { 322 | cmd->bamfiles->base_coverage = cmd->bamfiles->base_coverage + cmd->bamfiles->next->base_coverage; 323 | cmd->bamfiles->next->base_coverage = cmd->bamfiles->base_coverage; 324 | } 325 | } 326 | 327 | if(strcmp(cmd->scale, INPUTS_CUSTOM) != 0) 328 | ScaleGenomeCoverage(cmd->bamfiles, CHROMhead); 329 | 330 | if (cmd->genome_coverage == 0 && strcmp(cmd->scale, INPUTS_CUSTOM) != 0) { 331 | GetGenomeCoveragesIDX(CHROMhead, cmd->bamfiles); 332 | ComputeSamplescales(cmd->bamfiles, CHROMhead, 1); 333 | } 334 | 335 | curr = cmd->bamfiles; 336 | 337 | while (curr != NULL) { 338 | fprintf(stderr, "\nSample: %s\n", curr->shortname); 339 | fprintf(stderr, "\tTotal no. of reads: %d\n", curr->read_coverage); 340 | fprintf(stderr, "\tLibrary size scale: %.2f\n", curr->scale); 341 | fprintf(stderr, "\tTotal number of filtered reads: %d\n", curr->filtered_reads); 342 | fprintf(stderr, "\tBases sequenced: %f\n", curr->base_coverage); 343 | fprintf(stderr, "\tGenome size: %.f\n", CalculateGenomeSize(CHROMhead)); 344 | fprintf(stderr, "\tGenome scale: %f\n", curr->genome_scale); 345 | 346 | if(strcmp(cmd->scale, INPUTS_SMALLEST) == 0) 347 | curr->genome_scale = curr->scale; 348 | 349 | curr = curr->next; 350 | } 351 | 352 | if (strcmp(cmd->scale, "no") != 0) { 353 | fprintf(stderr, "\nScaling sample(s)\n"); 354 | MultiGenomeScaler(cmd, CHROMhead); 355 | } 356 | 357 | if (cmd->smoothBin > 0) { 358 | if (cmd->tracksmooth == 0 || cmd->tracksmooth == 1) { 359 | fprintf(stderr, "\nSmoothening signal\n"); 360 | MultiGenomeSmoother(cmd, CHROMhead); 361 | } 362 | } 363 | 364 | fprintf(stderr, "Printing output BigWig files\n"); 365 | 366 | curr = cmd->bamfiles; 367 | if(cmd->strandsplit == 1) 368 | cmd->strand = -1; 369 | 370 | while (curr != NULL) { 371 | PrintScaledBigWig(cmd, curr, NULL); 372 | curr = curr->next; 373 | 374 | if(cmd->strandsplit == 1) 375 | cmd->strand = 1; 376 | } 377 | 378 | if (strcmp(cmd->operation, "scaled") != 0 && strcmp(cmd->operation, "unscaled") != 0) { 379 | fprintf(stderr, "Transforming coverage tracks: %s\n", cmd->operation); 380 | MultiGenomeTransform(cmd, CHROMhead); 381 | cmd->strand = 0; 382 | 383 | if (cmd->smoothBin > 0) { 384 | if (cmd->tracksmooth == 0 || cmd->tracksmooth == 2) { 385 | fprintf(stderr, "\nSmoothening transformed signal\n"); 386 | MultiGenomeSmoother(cmd, CHROMhead); 387 | } 388 | } 389 | 390 | curr = cmd->bamfiles->next; 391 | 392 | while (curr != NULL) { 393 | PrintScaledBigWig(cmd, curr, cmd->bamfiles->shortname); 394 | curr = curr->next; 395 | } 396 | } 397 | } 398 | 399 | void PrintUsage(char *pname) { 400 | char *ptr = strrchr(pname, '/'); 401 | ptr = ptr ? ptr + 1 : (char *) pname; 402 | 403 | fprintf(stderr, "\nBAMscale: a tool to quantify peaks, and scale sequencing data\n"); 404 | fprintf(stderr, "Version: %s\n", "v0.0.9"); 405 | 406 | fprintf(stderr, "\nUsage: %s \n", ptr); 407 | fprintf(stderr, "\n\tCommands\tDescription\n"); 408 | fprintf(stderr, "\t========\t===========\n"); 409 | 410 | fprintf(stderr, "\t cov\t\tCalculate coverage of BED coordinates in BAM file(s). Outputs are raw read counts, FPKM and TPM normalized values.\n"); 411 | fprintf(stderr, "\t scale\tConvert BAM files to BigWigs; scale one or multiple files to genome size or to each other.\n"); 412 | } 413 | 414 | /* 415 | * 416 | */ 417 | int main(int argc, char **argv) { 418 | CMDINPUT *cmd = NULL; 419 | int found = 0; 420 | 421 | if (argc > 1) { 422 | if (strcmp(argv[1], INPUTS_COV) == 0) { 423 | found++; 424 | cmd = MultiCovParser(argc, argv); 425 | 426 | if (cmd != NULL) { 427 | no_of_samples = cmd->no_of_samples; 428 | 429 | ComputeCoverageChIPpeak(cmd); 430 | } 431 | } 432 | 433 | if (strcmp(argv[1], INPUTS_SCALE) == 0) { 434 | found++; 435 | cmd = ScaleParser(argc, argv); 436 | 437 | if (cmd != NULL) { 438 | no_of_samples = cmd->no_of_samples; 439 | 440 | if(strcmp(cmd->operation, INPUTS_RSTRRNA) == 0 || strcmp(cmd->operation, INPUTS_RNA) == 0 || strcmp(cmd->operation, INPUTS_STRRNA) == 0) 441 | NormalizeBAMSrna(cmd); 442 | 443 | else 444 | NormalizeBAMS(cmd); 445 | } 446 | } 447 | 448 | if (found == 0) 449 | PrintUsage(argv[0]); 450 | } 451 | else { 452 | PrintUsage(argv[0]); 453 | } 454 | 455 | if (cmd){ 456 | DestroyCMDinput(cmd); 457 | FreeAllocatedData(); 458 | free(cmd); 459 | } 460 | 461 | return (EXIT_SUCCESS); 462 | } 463 | 464 | -------------------------------------------------------------------------------- /src/multithreads.c: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: multithreads.c 9 | * Author: pongorls 10 | * 11 | * Created on November 30, 2018, 12:04 PM 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #define __STDC_FORMAT_MACROS 19 | #include 20 | #include 21 | #include 22 | 23 | 24 | #include "Definitions.h" 25 | #include "main.h" 26 | #include "scale.h" 27 | #include "CHROMstruct.h" 28 | #include "segmenter.h" 29 | #include "BAMcoverage.h" 30 | #include "multithreads.h" 31 | #include "binning.h" 32 | 33 | void DestroyThreadStruct(THREADS **head, int no_of_threads) { 34 | int i = 0; 35 | 36 | for (i = 0; i < no_of_threads; i++) { 37 | if ((*head)[i].chrname) 38 | free((*head)[i].chrname); 39 | } 40 | 41 | if (*head) 42 | free(*head); 43 | } 44 | 45 | THREADS *CreateThreadStruct(char *chrname) { 46 | THREADS *ptr = (THREADS *) malloc(sizeof (THREADS)); 47 | 48 | ptr->chrname = strdup(chrname); 49 | 50 | ptr->sample_id = -1; 51 | ptr->paired_end = 0; 52 | ptr->scale = 1.00; 53 | ptr->binSize = 0; 54 | ptr->pseudocount = 1; 55 | ptr->sample = NULL; 56 | ptr->next = NULL; 57 | 58 | return ptr; 59 | } 60 | 61 | THREADS *AddElement(THREADS *head, char *chrname) { 62 | THREADS *curr = head; 63 | 64 | if (head == NULL) { 65 | head = CreateThreadStruct(chrname); 66 | curr = head; 67 | } else { 68 | while (curr->next != NULL) { 69 | curr = curr->next; 70 | } 71 | 72 | curr->next = CreateThreadStruct(chrname); 73 | } 74 | 75 | return head; 76 | } 77 | 78 | THREADS **AssignChrToThreads(CHROMOSOMES *head, int no_of_threads) { 79 | THREADS **ptr = (THREADS **) malloc(no_of_threads * sizeof (THREADS *)); 80 | CHROMOSOMES *curr = head; 81 | int i = 0; 82 | 83 | while (curr != NULL) { 84 | if (curr->blacklist == 0) { 85 | ptr[i] = AddElement(ptr[i], curr->name); 86 | i++; 87 | 88 | if (i >= no_of_threads) { 89 | i = 0; 90 | } 91 | } 92 | 93 | curr = curr->next; 94 | } 95 | 96 | return ptr; 97 | } 98 | 99 | CHROMOSOMES *FindChrStruct(CHROMOSOMES * head, char *chrname) { 100 | while (head != NULL) { 101 | if (strcmp(chrname, head->name) == 0) { 102 | return head; 103 | } 104 | 105 | head = head->next; 106 | } 107 | 108 | return NULL; 109 | } 110 | 111 | RATIOS *CalculateRatiosAllMultithreaded(RATIOS *head, CHROMOSOMES *chead, BAMFILES *bhead, int no_of_samples, int min_per_bin_cov, int smoothbin, int binSize, char *chromsizes) { 112 | RATIOS *curr = head; 113 | CHROMOSOMES *ccurr = chead; 114 | BAMFILES *bcurr = bhead; 115 | CHRCOV *ptr; 116 | 117 | if (no_of_samples < 2) 118 | return NULL; 119 | 120 | bcurr = bhead->next; 121 | 122 | while (bcurr != NULL) { 123 | printf("\nComparing samples:\n\t%s\n\t%s\n\n", bhead->name, bcurr->name); 124 | ccurr = chead; 125 | 126 | if (curr == NULL) 127 | curr = CreateRatioStruct(bhead->name, bcurr->name, bhead->id, bcurr->id); 128 | 129 | else { 130 | curr->next = CreateRatioStruct(bhead->name, bcurr->name, bhead->id, bcurr->id); 131 | curr = curr->next; 132 | } 133 | 134 | while (ccurr != NULL) { 135 | if (ccurr->blacklist == 0) { 136 | curr->chrcovs = CalculateChromosomeRatio(ccurr, curr->chrcovs, bhead->id, bcurr->id, 1, min_per_bin_cov); 137 | } 138 | 139 | ccurr = ccurr->next; 140 | } 141 | 142 | if (curr->chrcovs != NULL) { 143 | ptr = curr->chrcovs; 144 | 145 | while (ptr != NULL) { 146 | ptr->ratio = smoothenBins(&ptr->ratio, smoothbin, ptr->nbins); 147 | ptr = ptr->next; 148 | } 149 | } 150 | 151 | if (chromsizes) 152 | PrintBigWigOrdered(curr, binSize, chromsizes); 153 | 154 | else 155 | PrintBedgraph(curr, binSize); 156 | 157 | bcurr = bcurr->next; 158 | } 159 | 160 | return head; 161 | } -------------------------------------------------------------------------------- /src/scale.c: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: scale.c 9 | * Author: pongorls 10 | * 11 | * Created on November 28, 2018, 3:54 PM 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | 22 | #include "Definitions.h" 23 | #include "main.h" 24 | #include "scale.h" 25 | #include "CHROMstruct.h" 26 | #include "segmenter.h" 27 | #include "binning.h" 28 | 29 | float *scaleBins(float *carray, float scale, int nbins, float pseudocount) { 30 | int i = 0; 31 | 32 | for (i = 0; i < nbins; i++) { 33 | if (carray[i] > 0) { 34 | carray[i] = carray[i] * scale; 35 | carray[i] = roundf(carray[i] * 100.0) / 100.0; 36 | } else 37 | carray[i] = 0; 38 | } 39 | 40 | return carray; 41 | } 42 | 43 | void ScaleToSmallest(BAMFILES *head) { 44 | BAMFILES *curr = head; 45 | 46 | int smallestBAM = -1; 47 | 48 | while (curr != NULL) { 49 | if (curr->read_coverage != -1) { 50 | if (smallestBAM == -1) { 51 | smallestBAM = curr->read_coverage; 52 | } 53 | else { 54 | if (smallestBAM > curr->read_coverage && curr->read_coverage > 0) { 55 | smallestBAM = curr->read_coverage; 56 | } 57 | } 58 | } 59 | 60 | curr = curr->next; 61 | } 62 | 63 | curr = head; 64 | 65 | while (curr != NULL) { 66 | if (curr->read_coverage > 0) { 67 | curr->scale = (float) curr->read_coverage / (float) smallestBAM; 68 | curr->scale = 1 / curr->scale; 69 | } 70 | 71 | curr = curr->next; 72 | } 73 | } 74 | 75 | void NoScale(BAMFILES *head) { 76 | BAMFILES *curr = head; 77 | 78 | while (curr != NULL) { 79 | if (curr->read_coverage != -1) { 80 | curr->scale = (float) 1; 81 | } 82 | 83 | curr = curr->next; 84 | } 85 | 86 | curr = head; 87 | } 88 | 89 | void ScaleToGenomeSize(BAMFILES *head, CHROMOSOMES *chead) { 90 | BAMFILES *curr = head; 91 | 92 | float genome_size = CalculateGenomeSize(chead) / 100; 93 | 94 | while (curr != NULL) { 95 | if (curr->read_coverage > 0) { 96 | curr->scale = 1 / ((float) curr->read_coverage / genome_size); 97 | } 98 | 99 | curr = curr->next; 100 | } 101 | } 102 | 103 | void ScaleGenomeCoverage(BAMFILES *head, CHROMOSOMES *chead) { 104 | float genome_size = CalculateGenomeSize(chead); 105 | BAMFILES *curr = head; 106 | 107 | while (curr != NULL) { 108 | curr->genome_scale = (float) 1 / (curr->base_coverage / genome_size); 109 | curr = curr->next; 110 | } 111 | } 112 | 113 | BAMFILES *ComputeSamplescales(BAMFILES *head, CHROMOSOMES *chead, int scale) { 114 | if (scale == 0) { 115 | NoScale(head); 116 | } 117 | 118 | if (scale == 1) { 119 | ScaleToSmallest(head); 120 | } 121 | 122 | if (scale == 2) { 123 | ScaleToGenomeSize(head, chead); 124 | } 125 | 126 | return head; 127 | } 128 | 129 | float *logTwoCoverageRatio(float *cov1, float *cov2, int nbins, float min_per_bin_cov) { 130 | int i = 0; 131 | float *carray = (float *) calloc(nbins + 1, sizeof (float)); 132 | 133 | for (i = 0; i < nbins; i++) { 134 | if (cov1[i] >= min_per_bin_cov && cov2[i] >= min_per_bin_cov) { 135 | carray[i] = log2(((cov1[i]) / (cov2[i]))); 136 | if (carray[i] > 1000) 137 | carray[i] = 1000; 138 | 139 | if (carray[i] < -1000) 140 | carray[i] = -1000; 141 | } 142 | } 143 | 144 | if (cov1) 145 | free(cov1); 146 | 147 | return carray; 148 | } 149 | 150 | float *OKseqRFD(float *cov1, float *cov2, int nbins, float min_per_bin_cov) { 151 | int i = 0; 152 | float *carray = (float *) calloc(nbins + 1, sizeof (float)); 153 | 154 | for (i = 0; i < nbins; i++) { 155 | if (cov1[i] >= min_per_bin_cov || cov2[i] >= min_per_bin_cov) { 156 | carray[i] = ( (cov1[i]+0.0001) - (cov2[i]+0.0001)) / ((cov2[i]+0.0001) + (cov1[i]+0.0001)); 157 | } 158 | } 159 | 160 | if (cov1) 161 | free(cov1); 162 | 163 | return carray; 164 | } 165 | 166 | float *SubtractCoverage(float *cov1, float *cov2, int nbins, float min_per_bin_cov) { 167 | int i = 0; 168 | float *carray = (float *) malloc(nbins * sizeof (float)); 169 | 170 | for (i = 0; i < nbins; i++) { 171 | if (cov1[i] >= min_per_bin_cov || cov2[i] >= min_per_bin_cov) { 172 | carray[i] = cov1[i] - cov2[i]; 173 | } 174 | } 175 | 176 | return carray; 177 | } 178 | 179 | float *CoverageRatio(float *cov1, float *cov2, int nbins, float min_per_bin_cov) { 180 | int i = 0; 181 | float *carray = (float *) malloc(nbins * sizeof (float)); 182 | 183 | for (i = 0; i < nbins; i++) { 184 | if (cov1[i] >= min_per_bin_cov || cov2[i] >= min_per_bin_cov) { 185 | carray[i] = (cov1[i] / cov2[i]); 186 | } 187 | } 188 | 189 | return carray; 190 | } 191 | 192 | float *SignedCoverageRatio(float *cov1, float *cov2, int nbins, int min_per_bin_cov) { 193 | int i = 0; 194 | float *carray = (float *) malloc(nbins * sizeof (float)); 195 | 196 | for (i = 0; i < nbins; i++) { 197 | if (cov1[i] >= min_per_bin_cov || cov2[i] >= min_per_bin_cov) { 198 | if (cov1[i] > cov2[i]) { 199 | carray[i] = (cov1[i] / cov2[i]); 200 | } 201 | else { 202 | carray[i] = -(cov2[i] / cov1[i]); 203 | } 204 | 205 | } 206 | } 207 | 208 | return carray; 209 | } 210 | 211 | CHRCOV *CalculateChromosomeRatio(CHROMOSOMES *curr, CHRCOV *chead, int s1, int s2, int ratioType, int min_per_bin_cov) { 212 | CHRCOV *ccurr = chead; 213 | 214 | if (ccurr == NULL) { 215 | chead = CreateChromCovStruct(curr->name, curr->id, curr->numberOfBins); 216 | ccurr = chead; 217 | } 218 | else { 219 | while (ccurr->next != NULL) { 220 | ccurr = ccurr->next; 221 | } 222 | 223 | ccurr->next = CreateChromCovStruct(curr->name, curr->id, curr->numberOfBins); 224 | ccurr = ccurr->next; 225 | } 226 | 227 | if (ratioType == 1) { 228 | ccurr->ratio = logTwoCoverageRatio(curr->coverages[s1], curr->coverages[s2], curr->numberOfBins, min_per_bin_cov); 229 | } 230 | else if (ratioType == 2) { 231 | ccurr->ratio = SubtractCoverage(curr->coverages[s1], curr->coverages[s2], curr->numberOfBins, min_per_bin_cov); 232 | } 233 | else if (ratioType == 3) { 234 | ccurr->ratio = CoverageRatio(curr->coverages[s1], curr->coverages[s2], curr->numberOfBins, min_per_bin_cov); 235 | } 236 | else if (ratioType == 4) { 237 | ccurr->ratio = SignedCoverageRatio(curr->coverages[s1], curr->coverages[s2], curr->numberOfBins, min_per_bin_cov); 238 | } 239 | 240 | return chead; 241 | } 242 | 243 | void PrintBedgraph(RATIOS *ptr, int binSize) { 244 | char outfile[250]; 245 | int i = 0; 246 | CHRCOV *p = ptr->chrcovs; 247 | FILE * fp; 248 | 249 | strcpy(outfile, basename(ptr->sample1)); 250 | strcat(outfile, "_vs_"); 251 | strcat(outfile, basename(ptr->sample2)); 252 | strcat(outfile, ".bedgraph"); 253 | 254 | fp = fopen(outfile, "w+"); 255 | 256 | while (p != NULL) { 257 | for (i = 0; i < p->nbins - 1; i++) 258 | fprintf(fp, "%s\t%d\t%d\t%.3f\n", p->name, i * binSize, (i + 1) * binSize, p->ratio[i]); 259 | p = p->next; 260 | } 261 | 262 | fclose(fp); 263 | } 264 | 265 | char *returnChrName(char *input) { 266 | char *ptr = strtok(input, "\t"); 267 | 268 | while (ptr != NULL) { 269 | return ptr; 270 | } 271 | 272 | return NULL; 273 | } 274 | 275 | void PrintBedgraphOrdered(RATIOS *ptr, int binSize, char *chromfile) { 276 | FILE *handler = fopen(chromfile, "r"); 277 | char line[BUFSIZ]; 278 | char outfile[250]; 279 | char *chrname = NULL; 280 | char *pos; 281 | int i = 0; 282 | CHRCOV *p = ptr->chrcovs; 283 | FILE * fp; 284 | 285 | strcpy(outfile, basename(ptr->sample1)); 286 | strcat(outfile, "_vs_"); 287 | strcat(outfile, basename(ptr->sample2)); 288 | strcat(outfile, ".bedgraph"); 289 | 290 | fp = fopen(outfile, "w+"); 291 | 292 | while (fgets(line, sizeof (line), handler)) { 293 | if ((pos = strchr(line, '\n')) != NULL) 294 | *pos = '\0'; 295 | 296 | chrname = returnChrName(line); 297 | p = ptr->chrcovs; 298 | 299 | if (chrname != NULL) { 300 | while (p != NULL) { 301 | if (strcmp(chrname, p->name) == 0) { 302 | for (i = 0; i < p->nbins - 1; i++) 303 | fprintf(fp, "%s\t%d\t%d\t%.3f\n", p->name, i * binSize, (i + 1) * binSize, p->ratio[i]); 304 | } 305 | 306 | p = p->next; 307 | } 308 | } 309 | } 310 | 311 | while (p != NULL) { 312 | for (i = 0; i < p->nbins - 1; i++) 313 | fprintf(fp, "%s\t%d\t%d\t%.3f\n", p->name, i * binSize, (i + 1) * binSize, p->ratio[i]); 314 | p = p->next; 315 | } 316 | 317 | fclose(fp); 318 | fclose(handler); 319 | } 320 | 321 | void PrintBigWigOrdered(RATIOS *ptr, int binSize, char *chromfile) { 322 | FILE *handler = NULL; 323 | char line[BUFSIZ]; 324 | char outfile[250]; 325 | char *pos, *chrname; 326 | int i = 0; 327 | char **chrnames = NULL; 328 | uint32_t *chrlens = NULL; 329 | uint32_t start = 0; 330 | uint32_t end = 0; 331 | int no_of_chrs = 0; 332 | CHRCOV *p = ptr->chrcovs; 333 | bigWigFile_t *fp; 334 | 335 | handler = fopen(chromfile, "r"); 336 | 337 | while (fgets(line, sizeof (line), handler)) { 338 | if ((pos = strchr(line, '\n')) != NULL) 339 | *pos = '\0'; 340 | 341 | chrname = returnChrName(line); 342 | p = ptr->chrcovs; 343 | 344 | if (chrname != NULL) { 345 | while (p != NULL) { 346 | if (strcmp(chrname, p->name) == 0) { 347 | no_of_chrs++; 348 | } 349 | 350 | p = p->next; 351 | } 352 | } 353 | } 354 | 355 | fclose(handler); 356 | 357 | chrnames = (char **) malloc(no_of_chrs * sizeof (char *)); 358 | chrlens = (uint32_t *) malloc(no_of_chrs * sizeof (uint32_t)); 359 | no_of_chrs = 0; 360 | 361 | handler = fopen(chromfile, "r"); 362 | 363 | while (fgets(line, sizeof (line), handler)) { 364 | if ((pos = strchr(line, '\n')) != NULL) 365 | *pos = '\0'; 366 | 367 | chrname = returnChrName(line); 368 | p = ptr->chrcovs; 369 | 370 | if (chrname != NULL) { 371 | while (p != NULL) { 372 | if (strcmp(chrname, p->name) == 0) { 373 | chrnames[no_of_chrs] = strdup(chrname); 374 | chrlens[no_of_chrs] = (uint32_t) (p->nbins * binSize); 375 | no_of_chrs++; 376 | } 377 | 378 | p = p->next; 379 | } 380 | } 381 | } 382 | 383 | fclose(handler); 384 | 385 | strcpy(outfile, basename(ptr->sample1)); 386 | strcat(outfile, "_vs_"); 387 | strcat(outfile, basename(ptr->sample2)); 388 | strcat(outfile, ".bw"); 389 | 390 | fp = bwOpen(outfile, NULL, "w"); 391 | bwCreateHdr(fp, 10); 392 | fp->cl = bwCreateChromList(chrnames, chrlens, no_of_chrs); 393 | bwWriteHdr(fp); 394 | 395 | handler = fopen(chromfile, "r"); 396 | 397 | while (fgets(line, sizeof (line), handler)) { 398 | if ((pos = strchr(line, '\n')) != NULL) 399 | *pos = '\0'; 400 | 401 | chrname = returnChrName(line); 402 | p = ptr->chrcovs; 403 | 404 | if (chrname != NULL) { 405 | while (p != NULL) { 406 | if (strcmp(chrname, p->name) == 0) { 407 | for (i = 0; i < p->nbins - 1; i++) { 408 | start = (uint32_t) (i * binSize); 409 | end = (uint32_t) ((i + 1) * binSize); 410 | bwAddIntervals(fp, &p->name, &start, &end, &p->ratio[i], (uint32_t) 1); 411 | } 412 | } 413 | 414 | p = p->next; 415 | } 416 | } 417 | } 418 | 419 | fclose(handler); 420 | bwClose(fp); 421 | bwCleanup(); 422 | 423 | for (i = 0; i < no_of_chrs; i++) { 424 | if (chrnames[i]) { 425 | free(chrnames[i]); 426 | } 427 | } 428 | 429 | if (chrnames) 430 | free(chrnames); 431 | 432 | if (chrlens) 433 | free(chrlens); 434 | 435 | } 436 | 437 | RATIOS *CalculateRatiosAll(RATIOS *head, CHROMOSOMES *chead, BAMFILES *bhead, int no_of_samples, int min_per_bin_cov, int smoothbin, int binSize, char *chromsizes) { 438 | RATIOS *curr = head; 439 | CHROMOSOMES *ccurr = chead; 440 | BAMFILES *bcurr = bhead; 441 | CHRCOV *ptr; 442 | 443 | if (no_of_samples < 2) 444 | return NULL; 445 | 446 | bcurr = bhead->next; 447 | 448 | while (bcurr != NULL) { 449 | printf("\nComparing samples:\n\t%s\n\t%s\n\n", bhead->name, bcurr->name); 450 | ccurr = chead; 451 | 452 | if (curr == NULL) 453 | curr = CreateRatioStruct(bhead->name, bcurr->name, bhead->id, bcurr->id); 454 | 455 | else { 456 | curr->next = CreateRatioStruct(bhead->name, bcurr->name, bhead->id, bcurr->id); 457 | curr = curr->next; 458 | } 459 | 460 | while (ccurr != NULL) { 461 | if (ccurr->blacklist == 0) { 462 | curr->chrcovs = CalculateChromosomeRatio(ccurr, curr->chrcovs, bhead->id, bcurr->id, 1, min_per_bin_cov); 463 | } 464 | 465 | ccurr = ccurr->next; 466 | } 467 | 468 | if (curr->chrcovs != NULL) { 469 | ptr = curr->chrcovs; 470 | 471 | while (ptr != NULL) { 472 | ptr->ratio = smoothenBins(&ptr->ratio, smoothbin, ptr->nbins); 473 | ptr = ptr->next; 474 | } 475 | } 476 | 477 | if (chromsizes) 478 | PrintBigWigOrdered(curr, binSize, chromsizes); 479 | 480 | else 481 | PrintBedgraph(curr, binSize); 482 | 483 | bcurr = bcurr->next; 484 | } 485 | 486 | return head; 487 | } -------------------------------------------------------------------------------- /src/segmenter.c: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: segmenter.c 9 | * Author: pongorls 10 | * 11 | * Created on November 29, 2018, 1:27 PM 12 | */ 13 | 14 | #include 15 | #include 16 | #include 17 | #include 18 | #define __STDC_FORMAT_MACROS 19 | #include 20 | 21 | #include "Definitions.h" 22 | #include "main.h" 23 | #include "scale.h" 24 | #include "CHROMstruct.h" 25 | #include "segmenter.h" 26 | 27 | void DestroySegments(SEGMENTS *head) { 28 | SEGMENTS *curr = head; 29 | 30 | while(head != NULL) { 31 | curr = head; 32 | head = head->next; 33 | 34 | free(curr); 35 | } 36 | } 37 | SEGMENTS *createSegment(void) { 38 | SEGMENTS *ptr = (SEGMENTS *) malloc(sizeof(SEGMENTS)); 39 | ptr->next = NULL; 40 | ptr->prev = NULL; 41 | ptr->start = -1; 42 | ptr->end = -1; 43 | 44 | return ptr; 45 | } 46 | 47 | int compare_float (const void * a, const void * b) { 48 | float fa = *(const float*) a; 49 | float fb = *(const float*) b; 50 | return (fa > fb) - (fa < fb); 51 | } 52 | 53 | 54 | int64_t CalculateGenSize(CHROMOSOMES *head) { 55 | int64_t gensize = 0; 56 | CHROMOSOMES *curr = head; 57 | 58 | while(curr != NULL) { 59 | if(curr->blacklist == 0) { 60 | gensize += (int64_t)curr->numberOfBins; 61 | } 62 | 63 | curr = curr->next; 64 | } 65 | return gensize; 66 | } 67 | 68 | int64_t CalculateNonZeroBins(CHROMOSOMES *head, int sampleid) { 69 | int64_t nzbins = 0; 70 | CHROMOSOMES *curr = head; 71 | int i; 72 | 73 | while(curr != NULL) { 74 | if(curr->blacklist == 0) { 75 | curr->nonzerobins = 0; 76 | 77 | for(i = 0; i < curr->numberOfBins; i++) { 78 | if(curr->coverages[sampleid][i] != 0) { 79 | curr->nonzerobins++; 80 | } 81 | } 82 | } 83 | 84 | curr = curr->next; 85 | } 86 | 87 | curr = head; 88 | 89 | while(curr != NULL) { 90 | if(curr->blacklist == 0) { 91 | nzbins += curr->nonzerobins; 92 | } 93 | 94 | curr = curr->next; 95 | } 96 | 97 | return nzbins; 98 | } 99 | 100 | float *ConcatenateGenome(CHROMOSOMES *head, int64_t gensize, int sampleid) { 101 | CHROMOSOMES *curr = head; 102 | int64_t i = 0; 103 | int64_t j = 0; 104 | float *genbins = NULL; 105 | 106 | 107 | if(gensize < 1) 108 | return NULL; 109 | 110 | genbins = (float *) malloc(gensize * sizeof(float)); 111 | 112 | if(genbins == NULL) { 113 | printf("ERROR: could not allocate memory for genome (at quantile calculation)\n"); 114 | FreeAllocatedData(); 115 | exit(0); 116 | } 117 | 118 | while(curr != NULL) { 119 | if(curr->blacklist == 0) { 120 | 121 | for(i = 0; i < curr->numberOfBins; i++) { 122 | if(curr->coverages[sampleid][i] != 0) { 123 | genbins[j] = curr->coverages[sampleid][i]; 124 | j++; 125 | } 126 | } 127 | } 128 | 129 | curr = curr->next; 130 | } 131 | 132 | 133 | 134 | return genbins; 135 | } 136 | 137 | void Segmenting(CHROMOSOMES *head, CMDINPUT *cmd, int sampleid, float upper, float median, float lower) { 138 | SEGMENTS *segment = NULL; 139 | SEGMENTS *segmenthead = NULL; 140 | SEGMENTS *tmp = NULL; 141 | SEGMENTS *rtmp = NULL; 142 | CHROMOSOMES *curr = head; 143 | int prevstate = -1; 144 | int currstate = -1; 145 | int start; 146 | int end; 147 | int i = 0; 148 | FILE *fp_s = fopen("strong.bed", "w+"); 149 | FILE *fp_sm = fopen("med_strong.bed", "w+"); 150 | FILE *fp_wm = fopen("med_weak.bed", "w+"); 151 | FILE *fp_w = fopen("weak.bed", "w+"); 152 | int minsize = 200; 153 | printf("Minsize: %d\n", minsize); 154 | 155 | while(curr != NULL) { 156 | if(curr->blacklist == 0) { 157 | currstate = -1; 158 | prevstate = -1; 159 | start = 0; 160 | end = 0; 161 | segment = NULL; 162 | 163 | for(i = 0; i < curr->numberOfBins; i++) { 164 | if(curr->coverages[sampleid][i] == 0) { 165 | currstate = 0; 166 | } 167 | 168 | else if(curr->coverages[sampleid][i] > upper) { 169 | currstate = 4; 170 | } 171 | 172 | else if(curr->coverages[sampleid][i] > median) { 173 | currstate = 3; 174 | } 175 | 176 | else if(curr->coverages[sampleid][i] > lower) { 177 | currstate = 2; 178 | } 179 | 180 | else { 181 | currstate = 1; 182 | } 183 | 184 | if(prevstate == -1) { 185 | start = 0; 186 | prevstate = currstate; 187 | } 188 | 189 | if(currstate == prevstate) { 190 | end = i; 191 | } 192 | 193 | else { 194 | if(segmenthead == NULL) { 195 | segmenthead = createSegment(); 196 | segment = segmenthead; 197 | } 198 | 199 | else { 200 | segment->next = createSegment(); 201 | segment->next->prev = segment; 202 | 203 | segment = segment->next; 204 | } 205 | 206 | segment->value = prevstate; 207 | segment->start = start; 208 | segment->end = end; 209 | 210 | prevstate = currstate; 211 | start = i-1; 212 | end = i; 213 | } 214 | } 215 | 216 | segment = segmenthead; 217 | 218 | while(segment != NULL) { 219 | if(segment->end - segment->start >= minsize) { 220 | tmp = segment->next; 221 | 222 | while(tmp != NULL && tmp->end - tmp->start < minsize) { 223 | rtmp = tmp; 224 | tmp = tmp->next; 225 | free(rtmp); 226 | } 227 | 228 | if(tmp == NULL) { 229 | segment->end = curr->numberOfBins; 230 | segment->next = NULL; 231 | } 232 | 233 | else { 234 | if(tmp != segment->next) { 235 | segment->next = tmp; 236 | tmp->prev = segment; 237 | } 238 | } 239 | } 240 | 241 | segment = segment->next; 242 | 243 | } 244 | 245 | segment = segmenthead; 246 | 247 | while(segment != NULL) { 248 | if(segment->next) { 249 | if(segment->end != segment->next->start) { 250 | if(segment->end - segment->start > segment->next->end - segment->next->start) 251 | segment->end = segment->next->start; 252 | 253 | else 254 | segment->next->start = segment->end; 255 | } 256 | } 257 | 258 | segment = segment->next; 259 | } 260 | 261 | segment = segmenthead; 262 | 263 | while(segment != NULL) { 264 | if(segment->value == 4) 265 | fprintf(fp_s, "%s\t%d\t%d\n", curr->name, segment->start*cmd->binSize, segment->end*cmd->binSize); 266 | 267 | if(segment->value == 3) 268 | fprintf(fp_sm, "%s\t%d\t%d\n", curr->name, segment->start*cmd->binSize, segment->end*cmd->binSize); 269 | 270 | if(segment->value == 2) 271 | fprintf(fp_wm, "%s\t%d\t%d\n", curr->name, segment->start*cmd->binSize, segment->end*cmd->binSize); 272 | 273 | if(segment->value == 1) 274 | fprintf(fp_w, "%s\t%d\t%d\n", curr->name, segment->start*cmd->binSize, segment->end*cmd->binSize); 275 | 276 | segment = segment->next; 277 | } 278 | 279 | DestroySegments(segmenthead); 280 | segment = NULL; 281 | segmenthead = NULL; 282 | } 283 | 284 | curr = curr->next; 285 | } 286 | 287 | fclose(fp_s); 288 | fclose(fp_sm); 289 | fclose(fp_wm); 290 | fclose(fp_w); 291 | } 292 | 293 | void Quantiles(CHROMOSOMES *head, int sampleid, CMDINPUT *cmd) { 294 | int64_t gensize = CalculateNonZeroBins(head, sampleid); 295 | fprintf(stderr, "Genome size: %" PRIu64 "\n", gensize); 296 | 297 | float *genbin = ConcatenateGenome(head, gensize, sampleid); 298 | 299 | float upper = -1; 300 | float median = -1; 301 | float lower = -1; 302 | 303 | fprintf(stderr, "Sorting genome ( %d ) \n", sampleid); 304 | 305 | qsort(genbin, gensize, sizeof(float), compare_float); 306 | 307 | upper = genbin[(int)round(gensize*0.75)]; 308 | median = genbin[(int)round(gensize*0.5)]; 309 | lower = genbin[(int)round(gensize*0.25)]; 310 | 311 | fprintf(stderr, "Quantiles:\n"); 312 | fprintf(stderr, "\tUpper: %f\n", upper); 313 | fprintf(stderr, "\tMedian: %f\n", median); 314 | fprintf(stderr, "\tLower: %f\n", lower); 315 | 316 | Segmenting(head, cmd, sampleid, upper, median, lower); 317 | 318 | if(genbin) 319 | free(genbin); 320 | } --------------------------------------------------------------------------------