├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── R
    ├── BAMscale_peak_density_version2
    │   └── PlotXY_version2.R
    ├── BAMscale_plot_peak_density
    │   ├── FPKM_normalized_coverages.tsv
    │   ├── peakDensity.command
    │   └── plotXY
    │   │   └── PlotXY.R
    ├── OKseq_switches.R
    ├── Plot_heatmap
    │   ├── Heatmap_with_clustering.R
    │   ├── Heatmapper.R
    │   ├── Heatmapper_functions.R
    │   └── heatmap_functions_clean.R
    └── Replication_timing_segmenter.R
├── README.md
├── bamscale-cov.cwl
├── bamscale-scale.cwl
├── bamscale.yml
├── doc
    ├── ATAC-seq_peak_quant_benchmark.png
    ├── ATAC-seq_peak_validation_CB450h.png
    ├── ATAC-seq_peak_validation_CB452h.png
    ├── ATAC-seq_peak_validation_CB454h.png
    ├── ATAC-seq_peak_validation_CEM0h.png
    ├── ATAC-seq_peak_validation_CEM2h.png
    ├── ATAC-seq_peak_validation_CEM4h.png
    └── images
    │   ├── ATAC-seq_coverage_benchmark.png
    │   ├── ATAC-seq_coverage_comparison.png
    │   ├── ATAC-seq_peak_quant_benchmark.png
    │   ├── ATAC-seq_peak_validation.png
    │   ├── BMAscale_RNAseq_coverage.png
    │   ├── Detailed_usage_ATAC_exampleTOP1.png
    │   ├── Detailed_usage_ENDseq.png
    │   ├── Detailed_usage_OKseq.png
    │   ├── Detailed_usage_RepTime_closer.png
    │   ├── Detailed_usage_RepTime_full.png
    │   ├── Detailed_usage_peakQuant_CB45_2h_vs_0h.png
    │   ├── Detailed_usage_peakQuant_CB45_4h_vs_0h.png
    │   ├── Detailed_usage_peakQuant_CEM_2h_vs_0h.png
    │   ├── Detailed_usage_peakQuant_CEM_4h_vs_0h.png
    │   ├── ENDseq_stranded.png
    │   ├── FPKM_formula.png
    │   ├── Libsize_formula.png
    │   ├── MAIN.png
    │   ├── MAIN_figure.png
    │   ├── OKseq_segmenter_figure.png
    │   ├── OKseq_switch_example1.png
    │   ├── OKseq_switch_example2.png
    │   ├── Read_count_matrix_example.png
    │   ├── Replication_timing_script_example.png
    │   ├── TPM_formula.png
    │   ├── XY_Empty_page.png
    │   ├── XY_drop_file.png
    │   ├── XY_dropdown.png
    │   ├── XY_example.png
    │   ├── XY_exec.png
    │   ├── XY_explanation.png
    │   └── XY_rstudio.png
├── includes
    ├── BAMcoverage.h
    ├── BAMstructs.h
    ├── BEDstruct.h
    ├── CHROMstruct.h
    ├── Definitions.h
    ├── Inputs.h
    ├── Writer.h
    ├── binning.h
    ├── main.h
    ├── multithreads.h
    ├── scale.h
    └── segmenter.h
├── nbproject
    ├── Makefile-Release.mk
    ├── Makefile-impl.mk
    ├── Makefile-variables.mk
    ├── Package-Release.bash
    ├── configurations.xml
    ├── private
    │   ├── Makefile-variables.mk
    │   ├── c_standard_headers_indexer.c
    │   ├── configurations.xml
    │   ├── cpp_standard_headers_indexer.cpp
    │   ├── launcher.properties
    │   └── private.xml
    └── project.xml
└── src
    ├── BAMcoverage.c
    ├── BAMstructs.c
    ├── BEDstruct.c
    ├── CHROMstruct.c
    ├── Inputs.c
    ├── Writer.c
    ├── binning.c
    ├── main.c
    ├── multithreads.c
    ├── scale.c
    └── segmenter.c


/.gitignore:
--------------------------------------------------------------------------------
1 | /bin
2 | /build
3 | /dist
4 | .dep.inc
5 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Base Image
 2 | FROM continuumio/miniconda3
 3 | 
 4 | # Metadata
 5 | LABEL base.image="continuumio/miniconda3"
 6 | LABEL version="1"
 7 | LABEL software="BAMscale"
 8 | LABEL software.version="0.0.7"
 9 | LABEL description="BAMscale is a one-step tool for either 1) quantifying and normalizing the coverage of peaks or 2) generated scaled BigWig files for easy visualization of commonly used DNA-seq capture based methods."
10 | LABEL tags="BAM"
11 | LABEL website="https://github.com/ncbi/BAMscale"
12 | 
13 | # Maintainer
14 | MAINTAINER Roberto Vera Alvarez <r78v10a07@gmail.com>
15 | 
16 | USER root
17 | 
18 | RUN apt-get update && \
19 |     apt-get install -y apt-utils wget bzip2 sudo gcc make && \
20 |     apt-get clean && \
21 |     apt-get purge && \
22 |     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
23 | 
24 | # Updating Anaconda packages
25 | RUN conda update conda
26 | RUN conda update --all
27 | RUN conda config --add channels defaults
28 | RUN conda config --add channels bioconda
29 | RUN conda config --add channels conda-forge
30 | RUN conda install htslib libbigwig
31 | 
32 | # Add user ubuntu with no password, add to sudo group
33 | RUN adduser --disabled-password --gecos '' ubuntu
34 | RUN chmod a+rwx /home/ubuntu/
35 | RUN mkdir /home/ubuntu/bin
36 | RUN chown -R ubuntu /home/ubuntu
37 | USER ubuntu
38 | 
39 | ENV URL=https://github.com/ncbi/BAMscale
40 | ENV FOLDER=BAMscale
41 | ENV PATH="/home/ubuntu/bin:${PATH}"
42 | ENV CONDA_DIR="/opt/conda/"
43 | ENV CPPFLAGS="-I $CONDA_DIR/include"
44 | ENV LDFLAGS="-L $CONDA_DIR/lib -Wl,-rpath,$CONDA_DIR/lib"
45 | 
46 | RUN cd /home/ubuntu/ && \
47 |         git clone $URL && \
48 |         cd $FOLDER && \
49 | 	make && \
50 | 	mv bin/BAMscale /home/ubuntu/bin/ && \
51 |         cd .. && \
52 |         rm -rf $FOLDER
53 | 
54 | WORKDIR /data
55 | 
56 | CMD ["/home/ubuntu/bin/BAMscale"]
57 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | # Public Domain notice
 2 | 
 3 | National Center for Biotechnology Information.
 4 | 
 5 | This software is a "United States Government Work" under the terms of the United States
 6 | Copyright Act. It was written as part of the authors' official duties as United States
 7 | Government employees and thus cannot be copyrighted. This software is freely available
 8 | to the public for use. The National Library of Medicine and the U.S. Government have not
 9 |  placed any restriction on its use or reproduction.
10 | 
11 | Although all reasonable efforts have been taken to ensure the accuracy and reliability
12 | of the software and data, the NLM and the U.S. Government do not and cannot warrant the
13 | performance or results that may be obtained by using this software or data. The NLM and
14 | the U.S. Government disclaim all warranties, express or implied, including warranties
15 | of performance, merchantability or fitness for any particular purpose.
16 | 
17 | Please cite NCBI in any work or product based on this material.
18 | 
19 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | #
  2 | #  There exist several targets which are by default empty and which can be 
  3 | #  used for execution of your targets. These targets are usually executed 
  4 | #  before and after some main targets. They are: 
  5 | #
  6 | #     .build-pre:              called before 'build' target
  7 | #     .build-post:             called after 'build' target
  8 | #     .clean-pre:              called before 'clean' target
  9 | #     .clean-post:             called after 'clean' target
 10 | #     .clobber-pre:            called before 'clobber' target
 11 | #     .clobber-post:           called after 'clobber' target
 12 | #     .all-pre:                called before 'all' target
 13 | #     .all-post:               called after 'all' target
 14 | #     .help-pre:               called before 'help' target
 15 | #     .help-post:              called after 'help' target
 16 | #
 17 | #  Targets beginning with '.' are not intended to be called on their own.
 18 | #
 19 | #  Main targets can be executed directly, and they are:
 20 | #  
 21 | #     build                    build a specific configuration
 22 | #     clean                    remove built files from a configuration
 23 | #     clobber                  remove all built files
 24 | #     all                      build all configurations
 25 | #     help                     print help mesage
 26 | #  
 27 | #  Targets .build-impl, .clean-impl, .clobber-impl, .all-impl, and
 28 | #  .help-impl are implemented in nbproject/makefile-impl.mk.
 29 | #
 30 | #  Available make variables:
 31 | #
 32 | #     CND_BASEDIR                base directory for relative paths
 33 | #     CND_DISTDIR                default top distribution directory (build artifacts)
 34 | #     CND_BUILDDIR               default top build directory (object files, ...)
 35 | #     CONF                       name of current configuration
 36 | #     CND_PLATFORM_${CONF}       platform name (current configuration)
 37 | #     CND_ARTIFACT_DIR_${CONF}   directory of build artifact (current configuration)
 38 | #     CND_ARTIFACT_NAME_${CONF}  name of build artifact (current configuration)
 39 | #     CND_ARTIFACT_PATH_${CONF}  path to build artifact (current configuration)
 40 | #     CND_PACKAGE_DIR_${CONF}    directory of package (current configuration)
 41 | #     CND_PACKAGE_NAME_${CONF}   name of package (current configuration)
 42 | #     CND_PACKAGE_PATH_${CONF}   path to package (current configuration)
 43 | #
 44 | # NOCDDL
 45 | 
 46 | 
 47 | # Environment 
 48 | MKDIR=mkdir
 49 | CP=cp
 50 | CCADMIN=CCadmin
 51 | 
 52 | 
 53 | # build
 54 | build: .build-post
 55 | 
 56 | .build-pre:
 57 | # Add your pre 'build' code here...
 58 | 
 59 | .build-post: .build-impl
 60 | # Add your post 'build' code here...
 61 | 
 62 | 
 63 | # clean
 64 | clean: .clean-post
 65 | 
 66 | .clean-pre:
 67 | # Add your pre 'clean' code here...
 68 | 
 69 | .clean-post: .clean-impl
 70 | # Add your post 'clean' code here...
 71 | 
 72 | 
 73 | # clobber
 74 | clobber: .clobber-post
 75 | 
 76 | .clobber-pre:
 77 | # Add your pre 'clobber' code here...
 78 | 
 79 | .clobber-post: .clobber-impl
 80 | # Add your post 'clobber' code here...
 81 | 
 82 | 
 83 | # all
 84 | all: .all-post
 85 | 
 86 | .all-pre:
 87 | # Add your pre 'all' code here...
 88 | 
 89 | .all-post: .all-impl
 90 | # Add your post 'all' code here...
 91 | 
 92 | 
 93 | # build tests
 94 | build-tests: .build-tests-post
 95 | 
 96 | .build-tests-pre:
 97 | # Add your pre 'build-tests' code here...
 98 | 
 99 | .build-tests-post: .build-tests-impl
100 | # Add your post 'build-tests' code here...
101 | 
102 | 
103 | # run tests
104 | test: .test-post
105 | 
106 | .test-pre: build-tests
107 | # Add your pre 'test' code here...
108 | 
109 | .test-post: .test-impl
110 | # Add your post 'test' code here...
111 | 
112 | 
113 | # help
114 | help: .help-post
115 | 
116 | .help-pre:
117 | # Add your pre 'help' code here...
118 | 
119 | .help-post: .help-impl
120 | # Add your post 'help' code here...
121 | 
122 | 
123 | 
124 | # include project implementation makefile
125 | include nbproject/Makefile-impl.mk
126 | 
127 | # include project make variables
128 | include nbproject/Makefile-variables.mk
129 | 


--------------------------------------------------------------------------------
/R/BAMscale_peak_density_version2/PlotXY_version2.R:
--------------------------------------------------------------------------------
  1 | library(shiny)
  2 | library(GenomicRanges)
  3 | library(data.table)
  4 | library(ggplot2)
  5 | 
  6 | options(shiny.maxRequestSize=300*1024^2)
  7 | 
  8 | 
  9 | subsetQuantwithBed = function(peaksfile, bedfile) {
 10 |   coords = read.table(bedfile)[,1:3]
 11 |   colnames(coords) = c("chr", "start", "end")
 12 |   coords = makeGRangesFromDataFrame(coords)
 13 |   
 14 |   peaks = read.table(peaksfile,
 15 |                      header = T,
 16 |                      sep = "\t")
 17 |   
 18 |   peakcoords = GRanges(peaks[,1])
 19 |   
 20 |   overs = findOverlaps(query = peakcoords, subject = coords)
 21 |   peakoverlaps = unique(queryHits(overs))
 22 |   
 23 |   return (peaks[peakoverlaps,])
 24 |   
 25 | }
 26 | 
 27 | 
 28 | ui <- fluidPage(
 29 |   fluidRow(
 30 |     column(
 31 |       6, 
 32 |       align = "center",
 33 |       tags$img(src = "https://healthtech.upenn.edu/wp-content/uploads/2018/09/nci_-1200x600.jpg", align = "left", height = "100px", width = "200px")   #NIH-NCI
 34 |     )
 35 |   ),
 36 |   sidebarLayout(
 37 |     sidebarPanel(
 38 |       fileInput('datafile', 'Choose quantified peak file',
 39 |                 accept = c('text/csv', 'text/comma-separated-values,text/plain', '.csv')),
 40 |       
 41 |       fileInput("bedfile", "Choose a BED file to subset peaks",
 42 |                 multiple = FALSE,
 43 |                 accept = c(".bed")),
 44 | 
 45 |       conditionalPanel(
 46 |         # use a server side condition
 47 |         # placeholders will be replaced from the server
 48 |         condition = "output.fileUploaded",
 49 |         selectInput("xAxis", "X-axis sample", ""),
 50 |         selectInput("yAxis", "Y-axis sample", ""),
 51 |         numericInput("slidelimit", "Axis limit", 1),
 52 |         numericInput("hexcount", "No. of hex bins", 200))
 53 |       
 54 |       
 55 |       
 56 |     ),
 57 |     mainPanel(
 58 |       plotOutput("plotXY")
 59 |     )
 60 |   )
 61 | )
 62 | 
 63 | server <- function(input, output, session){
 64 |   # create reactive version of the dataset (a data.frame object)
 65 |   filedata <- reactive({
 66 |     infile <- input$datafile
 67 |     if (is.null(infile))
 68 |       # User has not uploaded a file yet. Use NULL to prevent observeEvent from triggering
 69 |       return(NULL)
 70 |     
 71 |     if(is.null(input$bedfile)) {
 72 |       temp <- read.table(input$datafile$datapath,
 73 |                        header = T,
 74 |                        sep = "\t", check.names = F)
 75 |     } else {
 76 |       temp = subsetQuantwithBed(input$datafile$datapath, input$bedfile$datapath)
 77 |     }
 78 |   })
 79 |   
 80 |   output$fileUploaded <- reactive({
 81 |     return(!is.null(filedata()))
 82 |   })
 83 |   outputOptions(output, 'fileUploaded', suspendWhenHidden=FALSE)
 84 |   
 85 |   observeEvent(filedata(), {
 86 |     snames = names(filedata())
 87 |     snames = snames[2:length(snames)]
 88 |     updateSelectInput(session, "xAxis", choices =snames, selected=snames[1])
 89 |     updateSelectInput(session, "yAxis", choices =snames, selected = snames[2])
 90 |     xv = input$yAxis
 91 |     yv = input$xAxi
 92 |     if (is.null(xv) || is.null(yv)){
 93 |       xv = snames[1]
 94 |       yv = snames[2]
 95 |     } 
 96 |     
 97 |     df = as.data.frame(t(rbind(filedata()[, xv], filedata()[, yv])))
 98 |     axislim = round(min(quantile(df[,1], .99)[1], quantile(df[,2], .99)[1]))
 99 |     axismax = max(max(df[,1]), max(df[,2]))
100 |     baxislim =  max(quantile(df[,1], .999)[1], quantile(df[,2], .999)[1])
101 |     updateNumericInput(session, "slidelimit", value = axislim)
102 |   })
103 |  
104 |   output$plotXY <- renderPlot({
105 |     colpal = rev(rainbow(5))
106 |     colpal[1] = "#562188FF"
107 |     colpal[3] = "#225A16FF"
108 |     
109 |     gp1 <- NULL
110 |     gp2 = NULL
111 |     
112 |     dat = filedata()
113 |     if (!is.null(dat)){
114 |       xv <- input$xAxis
115 |       yv <- input$yAxis
116 |       if (!is.null(xv) & !is.null(yv)){
117 |         df = as.data.frame(t(rbind(filedata()[, input$xAxis], filedata()[, input$yAxis])))
118 |         colnames(df) = c(input$xAxis, input$yAxis)
119 |         axislim = min(quantile(df[,1], .99)[1], quantile(df[,2], .99)[1])
120 |         axismax = max(max(df[,1]), max(df[,2]))
121 |         baxislim =  max(quantile(df[,1], .999)[1], quantile(df[,2], .999)[1])
122 |         
123 |         gp1 = ggplot(df, aes(x = df[,1], y = df[,2])) +
124 |           geom_hex(bins = input$hexcount) +
125 |           scale_fill_gradientn(colours = colpal, name = "log2\n(Density of Peaks)", trans = "log2") +
126 |           scale_x_continuous(expand = c(0, 0), limits = c(0, input$slidelimit)) +
127 |           scale_y_continuous(expand = c(0, 0), limits = c(0, input$slidelimit)) +
128 |           xlab(input$xAxis) +
129 |           ylab(input$yAxis) +
130 |           coord_equal(ratio = 1) +
131 |           geom_abline(intercept = 0, slope = 1, colour="black", size=1.00, linetype = "twodash") +
132 |           theme_classic() +
133 |           theme(text = element_text(size = 14))
134 |         }
135 |     }
136 |     
137 |     return (gp1)
138 |     })
139 | }
140 | 
141 | shinyApp(ui, server)
142 | 


--------------------------------------------------------------------------------
/R/BAMscale_plot_peak_density/peakDensity.command:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | export SCPATH=$(dirname "$BASH_SOURCE")
4 | Rscript -e 'library(methods); shiny::runApp(paste0(Sys.getenv("SCPATH"), "/plotXY/PlotXY.R"), launch.browser = TRUE)'
5 | 


--------------------------------------------------------------------------------
/R/BAMscale_plot_peak_density/plotXY/PlotXY.R:
--------------------------------------------------------------------------------
  1 | list.of.packages <- c("shiny", "ggplot2", "tidyr", "ggrepel", "gridExtra")
  2 | new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
  3 |                       if(length(new.packages)) install.packages(new.packages)
  4 |                       
  5 | 
  6 | library(shiny)
  7 | library(ggplot2)
  8 | library(tidyr)
  9 | library(ggrepel)
 10 | library(gridExtra)
 11 | 
 12 | options(shiny.maxRequestSize=150*1024^2)  # Current Max File Size: 150MB  (For larger files increase this accordingly)
 13 | 
 14 | # Define UI for application that draws a histogram
 15 | ui <- fluidPage(
 16 |   tags$style(type="text/css",
 17 |              ".shiny-output-error { visibility: hidden; }",
 18 |              ".shiny-output-error:before { visibility: hidden; }"
 19 |   ),
 20 |   fluidRow(
 21 |     column(
 22 |       6, 
 23 |       align = "center",
 24 |       tags$img(src = "https://healthtech.upenn.edu/wp-content/uploads/2018/09/nci_-1200x600.jpg", align = "left", height = "100px", width = "200px")   #NIH-NCI
 25 |     )
 26 |   ),
 27 |   fluidRow(
 28 |     column(12,
 29 |            align = "center",
 30 |            titlePanel(
 31 |              title  = "Application for Plotting Peak-Density Figures",
 32 |              windowTitle = "Application for Plotting Peak Density Figures"
 33 |              ),
 34 |            fluidRow(
 35 |              column(
 36 |                3,
 37 |                align = "center",
 38 |                fileInput(
 39 |                  "file1", 
 40 |                  "Choose Normalized Coverage File",
 41 |                  accept = c(".tsv")),
 42 |                tags$hr()
 43 |              ),
 44 |              column(
 45 |                6,
 46 |                align = "center",
 47 |                uiOutput("slider")
 48 |              )
 49 |            ),
 50 |            fluidRow(
 51 |              column(
 52 |                6,
 53 |                align = "center",
 54 |                uiOutput("xAxis")
 55 |              ),
 56 |              column(
 57 |                6, 
 58 |                align = "center",
 59 |                uiOutput("yAxis")
 60 |              )
 61 |            ),
 62 |            fluidRow(
 63 |              column(
 64 |                12,
 65 |                align = "center",
 66 |                plotOutput("plotXY", width = "700px", height = "700px")
 67 |              )
 68 |            ),
 69 |            fluidRow(
 70 |              column(
 71 |                12,
 72 |                align = "right",
 73 |                "Developed by Jacob M Gross & Lorinc S Pongor"
 74 |              )
 75 |            )
 76 |     )
 77 |   )
 78 | )
 79 | 
 80 | # Define server logic required to draw a histogram
 81 | server <- function(input, output) {
 82 |   
 83 |   raw_data = eventReactive(input$file1, {
 84 |     a = read.table(input$file1$datapath, stringsAsFactors = F, check.names = F, sep = "\t", header = T, fill = T)
 85 |     a = separate(a, 1, c("chr", "position"), sep = ":")
 86 |     a = separate(a, 2, c("Start", "Stop"), sep = "-")
 87 |   })
 88 |   
 89 |   output$slider = renderUI({
 90 |     dat = as.data.frame(t(rbind(raw_data()[, input$xaxis], raw_data()[, input$yaxis])))
 91 |     sliderInput(
 92 |       "slider1", 
 93 |       label = h3("Axis Limit Slider"), 
 94 |       min = 0, 
 95 |       max = round(max(dat)+1), 
 96 |       value = max(quantile(dat[,1], .99), quantile(dat[,2], .99)) 
 97 |     )
 98 |   })
 99 |   
100 |   output$xAxis = renderUI({
101 |     dat1 = as.data.frame(raw_data()[,4:ncol(raw_data())])
102 |     items = names(dat1)
103 |     selectInput("xaxis", "X-axis:", items, selected = items[1])
104 |   })
105 |   
106 |   output$yAxis = renderUI({
107 |     dat1 = as.data.frame(raw_data()[,4:ncol(raw_data())])
108 |     items = names(dat1)
109 |     #names(items) = items
110 |     selectInput("yaxis", "Y-axis:", items, selected = items[2])
111 |   })
112 |    
113 |    output$plotXY <- renderPlot({
114 |      dat = as.data.frame(t(rbind(raw_data()[, input$xaxis], raw_data()[, input$yaxis])))
115 |      colnames(dat) = c(paste0("1: ", input$xaxis), paste0("2: ", input$yaxis))
116 |      
117 |      colpal = rev(rainbow(5))
118 |      colpal[1] = "#562188FF"
119 |      colpal[3] = "#225A16FF"
120 |      
121 |      p1 = ggplot(dat, aes(x = dat[,1], y = dat[,2])) +
122 |        labs(x = paste0(colnames(dat)[1], " (Reads Per Peak)") , y = paste0(colnames(dat)[2], " (Reads Per Peak)")) +
123 |        ggtitle(label = paste0(colnames(dat)[1]," vs ",colnames(dat)[2], " LibSize Filtered")) +
124 |        geom_hex(bins = 200) +
125 |        scale_fill_gradientn(colours = colpal, name = "log2\n(Density of Peaks)", trans = "log2") +
126 |        scale_x_continuous(expand = c(0, 0), limits = c(0, input$slider1)) +
127 |        scale_y_continuous(expand = c(0, 0), limits = c(0, input$slider1)) +
128 |        geom_abline(intercept = 0, slope = c(0.15579, 0.3193, 0.7208, 1.38742, 3.1315, 6.4188), colour="#8B8B8B", size = 0.5) +
129 |        geom_abline(intercept = 0, slope = c( 0.5, 2), colour="#7E7D7D", size = 0.85)+
130 |        geom_abline(intercept = 0, slope = 1, colour="black", size=0.8, linetype = "twodash") +
131 |        scale_colour_gradient(low = "red", high = "blue") +
132 |        coord_equal(ratio = 1) +
133 |        theme(plot.title = element_text(size = 10), legend.title = element_text(size=8), axis.title = element_text(size=8.5),
134 |              panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_rect(fill = "#F3F3F3", colour = "black"), 
135 |              axis.text = element_text(size = 16), axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20))
136 |      
137 |      grid.arrange(p1)
138 |    })
139 |  
140 | }
141 | 
142 | # Run the application 
143 | shinyApp(ui = ui, server = server)
144 | 
145 | 


--------------------------------------------------------------------------------
/R/OKseq_switches.R:
--------------------------------------------------------------------------------
  1 | #args = commandArgs(trailingOnly=TRUE)
  2 | 
  3 | if (!require(argparse)) {
  4 |   PrintErrorMessage()
  5 |   stop("Please install package names \"rtracklayer\"")
  6 | }
  7 | 
  8 | 
  9 | 
 10 | library(argparse)
 11 | parser <- ArgumentParser()
 12 | parser$add_argument("-i", "--input", type="character", default="test.bw", 
 13 |                     help="Input OK-seq bigWig file.")
 14 | 
 15 | parser$add_argument("-b", "--binsize", type="integer", default=50000, 
 16 |                     help="Bin size [default %(default)s]")
 17 | 
 18 | parser$add_argument("-l", "--leftmin", type="double", default=-0.15, 
 19 |                     help="Mean value of bin on left side [default %(default)s]")
 20 | 
 21 | parser$add_argument("-r", "--rightmin", type="double", default=0.15, 
 22 |                     help="Mean value of bin on right side [default %(default)s]")
 23 | 
 24 | parser$add_argument("-d", "--diff", type="double", default=0.4, 
 25 |                     help="Minimum difference between left and right side means [default %(default)s]")
 26 | 
 27 | parser$add_argument("-s", "--slidesize", type="double", default=50000, 
 28 |                     help="Minimum difference between left and right side means [default %(default)s]")
 29 | 
 30 | 
 31 | args <- parser$parse_args()
 32 | 
 33 | min_left_value = args$leftmin
 34 | min_right_value = args$rightmin
 35 | min_diff = args$diff
 36 | windowsize = args$binsize
 37 | infile = args$input 
 38 | sliding_window = args$slidesize
 39 | 
 40 | 
 41 | if(length(args) < 1){
 42 |   parser$print_help()
 43 |   stop("Not enough arguments. Replication timing bigwig file has to be specified (created with BAMscale).")
 44 | }
 45 | 
 46 | if(!file.exists(infile)) {
 47 |   parser$print_help()
 48 |   stop(paste0("Input file specified does not exist: \"", infile, "\""))
 49 | }
 50 | 
 51 | if (!require(rtracklayer)) {
 52 |   parser$print_help()
 53 |   stop("Please install package names \"rtracklayer\"")
 54 | }
 55 | 
 56 | library(rtracklayer)
 57 | 
 58 | GetMergedElements = function(fbw, typeseg) {
 59 |   fbw = fbw[fbw$segments == typeseg,]
 60 |   fbw = reduce(fbw)
 61 |   fbw$length = width(fbw)
 62 |   fbw = fbw[fbw$length > min_segment_length]
 63 |   fbw$segments = typeseg
 64 |   return(fbw)
 65 | }
 66 | 
 67 | outfile = gsub(pattern ="\\.bw|\\.bigWig|\\.bigwig", replacement = "", x = infile)
 68 | outfile = paste0(outfile, ".OKseq_switches.bed")
 69 | 
 70 | include_zeroes_segment_thresholds = 0
 71 | 
 72 | bw = import(infile)
 73 | 
 74 | dat_bw = as.data.frame(bw)
 75 | 
 76 | chrnames = as.character(unique(dat_bw$seqnames))
 77 | chrnames = chrnames[grepl(pattern = "_", chrnames) == F]
 78 | 
 79 | df <- data.frame(chr=character(),
 80 |                  start=numeric(), 
 81 |                  end=numeric(), 
 82 |                  stringsAsFactors=FALSE) 
 83 | 
 84 | binsize = dat_bw[1, "end"] - dat_bw[1, 'start'] + 1
 85 | windowsize = windowsize / binsize
 86 | rsize = args$binsize
 87 | 
 88 | 
 89 | for(j in 1:length(chrnames)) {
 90 |   print(chrnames[j])
 91 |   dat = dat_bw[dat_bw$seqnames == chrnames[j],]
 92 |   endcoord = dat[nrow(dat), "end"]
 93 |   
 94 |   nbins = (endcoord - rsize) / sliding_window
 95 |   lastpos = -1
 96 |   reslist = list()
 97 |   
 98 |   for(i in 1:nbins) {
 99 |     startcoord = as.integer(((i-1)*sliding_window)) - 1
100 |     endcoord = as.integer(startcoord + rsize*3) + 2
101 |     
102 |     tdat = dat[dat$start >= (startcoord-1) & dat$end <= (endcoord + 2),]
103 |     tscore = rep(tdat$score, tdat$width)
104 |     
105 |     if(length(tscore) >= rsize*2) {
106 |       lscore = mean(tscore[0:rsize])
107 |       rscore = mean(tscore[(length(tscore)-rsize):length(tscore)])
108 |       
109 |       if(rscore > lscore) {
110 |         if(rscore - lscore >= min_diff & lscore <= min_left_value & rscore >= min_right_value) {
111 |           reslist = append(reslist, list(chrnames[j], startcoord, endcoord))
112 |           lastpos = endcoord
113 |         }
114 |       }
115 |     }
116 |   }
117 |   
118 |   if(length(reslist) > 0) {
119 |     leftpos = data.frame(matrix(unlist(reslist), nrow=length(reslist) / 3, byrow=T),stringsAsFactors=FALSE)
120 |     colnames(leftpos) = c("chr", "start", "end")
121 |     leftpos$start = as.numeric(leftpos$start)
122 |     leftpos$end = as.numeric(leftpos$end)
123 | 
124 |     if(nrow(df) == 0) {
125 |       df = leftpos
126 |     } else {
127 |       df = merge(df, leftpos, all = T, no.dup = F)
128 |     }
129 |   }
130 | }
131 | 
132 | leftpos = df
133 | leftpos$tval = "."
134 | leftpos$nval = "."
135 | leftpos$color = "77,0,0"
136 | 
137 | rightpos = leftpos
138 | rightpos$start = rightpos$start + 2*rsize
139 | rightpos$end = rightpos$start + rsize
140 | rightpos$color = "19,108,0"
141 | leftpos$end = leftpos$start + rsize
142 | merged = rbind(leftpos, rightpos)
143 | 
144 | interpos = leftpos
145 | interpos$start = interpos$start + rsize
146 | interpos$end = interpos$start + rsize
147 | interpos$color = "240,230,140"
148 | merged = rbind(merged, interpos)
149 | 
150 | merged = merged[order(merged$chr, merged$start),]
151 | merged = merged[,c("chr", "start", "end", "tval", "tval", "nval", "start", "end", "color")]
152 | write.table(merged, outfile, quote = F, sep = "\t", col.names = F, row.names = F)
153 | 
154 | 


--------------------------------------------------------------------------------
/R/Plot_heatmap/Heatmap_with_clustering.R:
--------------------------------------------------------------------------------
  1 | library(rtracklayer)
  2 | library(ggplot2)
  3 | library(ComplexHeatmap)
  4 | require(reshape2)
  5 | library(data.table)
  6 | library(circlize)
  7 | library(RColorBrewer)
  8 | source("heatmap_functions_clean.R")
  9 | 
 10 | # If you use this script, please cite BAMscale, rtracklayer and ComplexHeatmap!!!
 11 | # Disclaimer/FYI: this is still under development!
 12 | 
 13 | #Author: Lorinc Pongor (pongorlorinc@gmail.com)
 14 | 
 15 | ## INPUTS ###
 16 | 
 17 | plot_pdf_name = "Heatmap_plot.pdf"
 18 | 
 19 | # downsample peaks to these many coordinates (separately each peak)
 20 | subset_peaks = 0 # set to 0 to stop
 21 | extend = 5000 #extend peaks by these many bases
 22 | nbin = 100 #number of bins in heatmap, the default 100 is pretty good
 23 | 
 24 | # Number of clusters in k-means clustering.
 25 | # Set to 0 to turn it off
 26 | k_means_clusters = 3
 27 | 
 28 | # Numeric values to set for clustering, or ordering of peaks based on intensity
 29 | # Please give numeric values. Eg:
 30 | # samples_used_clustering_arranging = c(5,6) for samples 5 and 6
 31 | samples_used_clustering_arranging = NULL # set to NULL to use all samples
 32 | 
 33 | 
 34 | #Z-score data for k-means clustering or sorting peaks? 0: no, 1: yes
 35 | z_score_data_clustering_arranging = 1 
 36 | 
 37 | # Scale samples separately?
 38 | # Yes: plot_scale_samples_separate = 1
 39 | # No: plot_scale_samples_separate = 0
 40 | # It is encouraged to set to 1 (No) when comparing bigwigs from different sources.
 41 | # If the data is from one experiment (eg. same antibody, same time), and bigwig is normalized, this can be set to 0 
 42 | plot_scale_samples_separate = 1
 43 | 
 44 | # one or multiple bigwigs
 45 | bwfile = c("data/Sample_MCF7_RecQ1_2_020_C_HCVW3BGX2.dd.bam.scaled.bw",
 46 |            "data/Sample_MCF7_ER_2_020_C_HCVW3BGX2.dd.bam.scaled.bw",
 47 |            "data/FOXA1_ENCFF255FPM.bigWig",
 48 |            "data/GATA3_ENCFF477GZL.bigWig",
 49 |            "data/H3K27ac_ENCFF411FCW.bigWig",
 50 |            "data/H3K4me1_ENCFF983TTS.bigWig",
 51 |            "data/H3K4Me3_ENCFF862CKA.bigWig",
 52 |            "data/H3K9Me3_ENCFF688REP.bigWig")
 53 | 
 54 | #one or multiple peaks
 55 | peaks = c("data/MCF7_RecQ1_2_vs_MCF7_IgG_2_MACS2_brdpks.bed")
 56 | 
 57 | # Name of bigwigs to be used for the subtitle of each heatmap (column)
 58 | # Same number of names have to be specified as the number of bigwig files
 59 | # Set to bwnames = NULL to use file name
 60 | bwnames = c("RECQ1", "ERa", "FOXA1", "GATA3", "H3K27ac", "H3K4me1", "H3K4Me3", "H3K9Me3")
 61 | 
 62 | # Name of peaks to be used for the rowname of each heatmap
 63 | # Same number of names have to be specified as the number of peak files
 64 | # Set to pnames = NULL to use file name
 65 | pnames = NULL
 66 | 
 67 | ## END OF INPUTS ###
 68 | 
 69 | 
 70 | z_score_data = 0 # please don't use this for now
 71 | 
 72 | peakds = list()
 73 | peakds$peaks = peaks
 74 | peakds$bwfile = bwfile
 75 | peakds$bwnames = bwnames
 76 | peakds$peaknames = pnames
 77 | peakds$subset_peaks = subset_peaks
 78 | peakds$extend = extend
 79 | peakds$nbin = nbin
 80 | peakds$k_means_clusters = k_means_clusters
 81 | peakds$samples_used_clustering_arranging = samples_used_clustering_arranging
 82 | peakds$plot_scale_samples_separate = plot_scale_samples_separate
 83 | peakds$z_score_data_clustering_arranging = z_score_data_clustering_arranging
 84 | peakds$bed = GRanges()
 85 | peakds$peak_quants = data.frame()
 86 | peakds$raw_binmats = list()
 87 | peakds$binmats = list()
 88 | peakds$order = NULL
 89 | peakds$clusters = NULL
 90 | peakds$errors = 0
 91 | peakds$plotlist = list()
 92 | peakds$z_score_data = z_score_data
 93 | peakds$heatmap_lineplot_means = list()
 94 | peakds$max_ann = list()
 95 | peakds$min_ann = list()
 96 | peakds$heatmap_max = list()
 97 | peakds$heatmap_min = list()
 98 | source("heatmap_functions_clean.R")
 99 | start.time <- Sys.time()
100 | peakds = ImportHeatmapData(peakds)
101 | 
102 | end.time <- Sys.time()
103 | time.taken <- end.time - start.time
104 | time.taken
105 | 
106 | if(peakds$errors > 0) {
107 |   stop("Check inputs!!!")
108 | }
109 | 
110 | peakds = PrepareDataForPlotting(peakds)
111 | peakds = PlotHeatmaps(peakds)
112 | draw(peakds$combined_plot, ht_gap = unit(1, "cm"))
113 | 
114 | pdf(plot_pdf_name, 
115 |     width = (length(peakds$bwfile) * 4 + (length(peakds$bwfile)-1)) * 0.393701, 
116 |     height = 18 * 0.393701)
117 | draw(peakds$combined_plot, ht_gap = unit(1, "cm"))
118 | dev.off()
119 | 


--------------------------------------------------------------------------------
/R/Plot_heatmap/Heatmapper.R:
--------------------------------------------------------------------------------
 1 | library(GenomicRanges)
 2 | library(dynamicTreeCut)
 3 | library(ComplexHeatmap)
 4 | library(circlize)
 5 | library(RColorBrewer)
 6 | library(ChIPseeker)
 7 | library(TxDb.Hsapiens.UCSC.hg19.knownGene)
 8 | library(ggplot2)
 9 | library(data.table)
10 | 
11 | source("Heatmapper_functions.R")
12 | 
13 | 
14 | bwfile = c("../../SCLC_NAPY_ChIP/NA_chip/bigwigs/GSM1700639_H889_ASCL1.scaled.bw",
15 |            "../../SCLC_NAPY_ChIP/NA_chip/bigwigs/GSM1700641_H82_NEUROD1.scaled.bw",
16 |            "/Volumes/LMP/ngs/chip/SCLC_cell_lines/bigwigs_hg19_clean/NCI-H1048_POU2F3_rep1.hg19_clean.bam.scaled.bw",
17 |            "/Volumes/LMP/ngs/chip/SCLC_cell_lines/bigwigs_hg19_clean/NCI-H889_H3K27ac_rep1.hg19_clean.bam.scaled.bw",
18 |            "/Volumes/LMP/ngs/chip/SCLC_cell_lines/bigwigs_hg19_clean/NCI-H82_H3K27ac_rep1.hg19_clean.bam.scaled.bw",
19 |            "/Volumes/LMP/ngs/chip/SCLC_cell_lines/bigwigs_hg19_clean/NCI-H1048_H3K27ac_rep1.hg19_clean.bam.scaled.bw",
20 |            "/Volumes/LMP/ngs/chip/SCLC_cell_lines/bigwigs_hg19_clean/DMS114_H3K27ac_rep1.hg19_clean.bam.scaled.bw")
21 | 
22 | peaks = c("Intervene_results/sets/100_ASCL1.bed",
23 |           "Intervene_results/sets/010_NEUROD1.bed",
24 |           "Intervene_results/sets/001_POU2F3.bed")
25 | 
26 | bwnames = c("H889_ASCL1", "H82_NEUROD1", "H1048_POU2F3", "H889", "H82", "H1048", "DMS114")
27 | pnames = c("A", "N", "P")
28 | 
29 | heatobj = ImportHeatMapperData(bedfiles = peaks,
30 |                                bednames = pnames,
31 |                                bwfiles = bwfile, 
32 |                                bwnames = bwnames, 
33 |                                extend_peaks = c(2500),
34 |                                subset_peaks = 1500, 
35 |                                individual_subsetting = 1)
36 | 
37 | heatobj = PrepBEDheatmapdata(obj = heatobj, cluster = "no")
38 | heatobj = PrepHeatmapPlots(obj = heatobj,
39 |                            color_palette = c("Reds", "Greens", "Blues", "Purples", "Purples", "Purples", "Purples"),
40 |                            #lineplot_max_value = c(20), lineplot_min_value = c(10),
41 |                            split_colors = c("red", "green", "blue"), 
42 |                            raster_quality = 1, same_scale = 0)
43 | draw(heatobj@combined_heatmaps, ht_gap = unit(1, "cm"))
44 | 
45 | 


--------------------------------------------------------------------------------
/R/Plot_heatmap/heatmap_functions_clean.R:
--------------------------------------------------------------------------------
  1 | library(data.table)
  2 | library(rtracklayer)
  3 | 
  4 | ScoreGrangesBWmean = function(coords, bw) {
  5 |   overs = as.data.frame(findOverlaps(coords, bw))
  6 |   overs$score = bw[overs$subjectHits]$score
  7 |   overs.dt = data.table(overs)
  8 |   oversum = overs.dt[,list(score = mean(score)), by='queryHits']
  9 |   meanvals = rep(NA, length(coords))
 10 |   meanvals[oversum$queryHits] = oversum$score
 11 |   return (meanvals)
 12 | }
 13 | 
 14 | Quantify_peaks = function(bed, bwfile) {
 15 |   print("Quantifying peaks:")
 16 |   peak_quants = as.data.frame(matrix(ncol = length(bwfile) + 1, nrow = length(bed)))
 17 |   colnames(peak_quants) = c(as.character(seq(1,length(bwfile))), "mean")
 18 |   rownames(peak_quants) = bed$pname
 19 |   
 20 |   for(i in 1:length(bwfile)) {
 21 |     cat("\tQuantifying peaks for:", bwfile[i],"\n")
 22 |     bw = import(bwfile[i], which = bed)
 23 |     peak_quants[,i] = ScoreGrangesBWmean(bed, bw)
 24 |   }
 25 |   
 26 |   if(length(bwfile) > 1) {
 27 |     peak_quants$mean = rowMeans(peak_quants[,1:length(bwfile)])
 28 |   } else {
 29 |     peak_quants$mean = as.numeric(peak_quants[,1])
 30 |   }
 31 |   
 32 |   peak_quants$id = bed$id
 33 |   cat("\t", "Done importing peaks","\n")
 34 |   return(peak_quants)
 35 | }
 36 | 
 37 | CheckBWfiles = function(peakds) {
 38 |   file_not_exist = 0
 39 |   if(is.null(length(peakds$bwfile)) | length(peakds$bwfile) < 1) {
 40 |     print(paste0("ERROR: no BW files were specified"))
 41 |   }
 42 |   
 43 |   for(i in 1:length(peakds$bwfile)) {
 44 |     if(!file.exists(peakds$bwfile[i])) {
 45 |       print(paste0("ERROR: file \"", peakds$bwfile[i], "\" does not exist"))
 46 |       file_not_exist = 1
 47 |     } 
 48 |   }
 49 |   
 50 |   if(file_not_exist == 1) {
 51 |     peakds$errors = 1
 52 |     return (peakds)
 53 |   }
 54 |   
 55 |   if(is.null(peakds$bwnames)) {
 56 |     peakds$bwnames = basename(peakds$bwfile)
 57 |   } else {
 58 |     if(length(peakds$bwnames) != length(peakds$bwfile)) {
 59 |       print(paste0("ERROR: number of specified BW names (",length(peakds$bwnames),") is not equal to number of BW files(",length(peakds$bwfile),")"))
 60 |       peakds$errors = 1
 61 |     }
 62 |   }
 63 |   
 64 |   if(file_not_exist == 1) {
 65 |     peakds$errors = 1
 66 |     return (peakds)
 67 |   }
 68 |   
 69 |   return (peakds)
 70 | }
 71 | 
 72 | 
 73 | 
 74 | CheckBEDfiles = function(peakds) {
 75 |   print("Importing BED coordinates")
 76 |   file_not_exist = 0
 77 |   if(is.null(length(peakds$peaks)) | length(peakds$peaks) < 1) {
 78 |     print(paste0("ERROR: no peak files were specified"))
 79 |   }
 80 |   
 81 |   for(i in 1:length(peakds$peaks)) {
 82 |     if(!file.exists(peakds$peaks[i])) {
 83 |       print(paste0("ERROR: file \"", peakds$peaks[i], "\" does not exist"))
 84 |       file_not_exist = 1
 85 |     } 
 86 |   }
 87 |   
 88 |   if(file_not_exist == 1) {
 89 |     peakds$errors = 1
 90 |     return (peakds)
 91 |   }
 92 |   
 93 |   if(is.null(peakds$peaknames)) {
 94 |     peakds$peaknames = basename(peakds$peaks)
 95 |   } else {
 96 |     if(length(peakds$peaknames) != length(peakds$peaks)) {
 97 |       print(paste0("ERROR: number of specified peak names (",length(peakds$peaknames),") is not equal to number of peak files(",length(peakds$peaks),")"))
 98 |       peakds$errors = 1
 99 |     }
100 |   }
101 |   
102 |   if(file_not_exist == 1) {
103 |     peakds$errors = 1
104 |     return (peakds)
105 |   }
106 |   
107 |   for(i in 1:length(peakds$peaks)) {
108 |     cat("\tImporting BED:", peakds$peaks[i],"\n")
109 |     tbed = read.table(peakds$peaks[i], sep = "\t")[,1:3]
110 |     colnames(tbed) = c("chr", "start", "end")
111 |     tbed$id = i
112 |     tbed = makeGRangesFromDataFrame(tbed, keep.extra.columns = T)
113 |     tbed = tbed + extend
114 |     
115 |     if(peakds$subset_peaks > 0 & length(tbed) > peakds$subset_peaks) {
116 |       tbed = tbed[sample(seq(1,length(tbed)), peakds$subset_peaks)]
117 |     }
118 |     
119 |     if(i == 1) {
120 |       bed = tbed
121 |     } else {
122 |       bed = c(bed, tbed)
123 |     }
124 |   }
125 |   
126 |   bed$pname = seq(1, length(bed))
127 |   names(bed) = bed$pname
128 |   peakds$bed = bed
129 |   cat("\t", "Done reading BED files","\n")
130 |   
131 |   return (peakds)
132 | }
133 | 
134 | BinCoordinates = function(bw, coord, nbins) {
135 |   coord$length = width(coord)
136 |   overs = as.data.frame(findOverlaps(coord, bw))
137 |   overs$width = coord[overs$queryHits]$length
138 |   
139 |   dfcoord = as.data.frame(coord)
140 |   
141 |   dfcoord = dfcoord[overs$queryHits,]
142 |   overs$strand = as.character(dfcoord$strand)
143 |   overs$tss = ifelse(overs$strand == "+", dfcoord$end, dfcoord$start)
144 |   overs$probe_pos = start(bw[overs$subjectHits])
145 |   overs$dist = ifelse(overs$strand == "+", overs$tss - overs$probe_pos, overs$probe_pos - overs$tss)
146 |   overs$bin = round(overs$dist / (overs$width / (nbins-1)))
147 |   overs$bin = overs$bin + 1
148 |   overs$score = bw[overs$subjectHits]$score
149 |   overs.dt = data.table(overs)
150 |   oversum = overs.dt[,list(score = mean(score)), by=c('queryHits', 'bin')]
151 |   
152 |   return (oversum)
153 | }
154 | 
155 | Subset_Binned_to_common_peaks = function(raw_binmats) {
156 |   qhits = list()
157 |   
158 |   for(i in 1:length(raw_binmats)) {
159 |     if(i == 1) {
160 |       qhits = rownames(raw_binmats[[i]])
161 |     } else {
162 |       qhits = intersect(qhits, rownames(raw_binmats[[i]]))
163 |     }
164 |   }
165 |   
166 |   for(i in 1:length(raw_binmats)) {
167 |     raw_binmats[[i]] = raw_binmats[[i]][qhits,]
168 |   }
169 |   
170 |   return (raw_binmats)
171 | }
172 | 
173 | 
174 | Bin_peaks = function(bed, bwfile, nbin) {
175 |   print("Importing and binning peaks:")
176 |   raw_binmats = list() 
177 |   
178 |   for(i in 1:length(bwfile)) {
179 |     cat("\tImporting peaks for:", bwfile[i],"\n")
180 |     bw = import(bwfile[i], which = bed)
181 |     df = BinCoordinates(bw, bed, nbin)
182 |     raw_binmats[[i]] = as.data.frame(acast(df,queryHits~bin, value.var = "score"))
183 |     raw_binmats[[i]] = raw_binmats[[i]][,as.character(seq(1:nbin))]
184 |   }
185 |   
186 |   raw_binmats = Subset_Binned_to_common_peaks(raw_binmats)
187 |   cat("\t", "Done quantifying peaks","\n")
188 |   return (raw_binmats)
189 | }
190 | 
191 | ImportHeatmapData = function(peakds) {
192 |   peakds = CheckBEDfiles(peakds)
193 |   
194 |   if(peakds$errors > 0) {
195 |     return (peakds)
196 |   }
197 |   
198 |   peakds = CheckBWfiles(peakds)
199 |   
200 |   if(peakds$errors > 0) {
201 |     return (peakds)
202 |   }
203 |   
204 |   peakds$peak_quants = Quantify_peaks(peakds$bed, peakds$bwfile)
205 |   peakds$raw_binmats = Bin_peaks(bed = peakds$bed, bwfile = peakds$bwfile, nbin = peakds$nbin)
206 |   peakds$bed = peakds$bed[rownames(peakds$raw_binmats[[1]])]
207 |   
208 |   return (peakds)
209 | }
210 | 
211 | Scale_binned_matrices = function(raw_binmats, z_score_data) {
212 |   binmats = list()
213 |   
214 |   for(i in 1:length(raw_binmats)) {
215 |     if(z_score_data == 1) {
216 |       binmats[[i]] = as.data.frame(t(scale(t(raw_binmats[[i]]))))
217 |     } else if (z_score_data == -1) {
218 |       binmats[[i]] = as.data.frame(scale(raw_binmats[[i]]))
219 |     } else {
220 |       binmats[[i]] = raw_binmats[[i]]
221 |     }
222 |   }
223 |   
224 |   return (binmats)
225 | }
226 | 
227 | Arrange_peaks_for_plotting = function(peak_quants, bwfile, k_means_clusters, z_score_data_clustering_arranging, samples_used_clustering_arranging) {
228 |   mean_table = data.frame(mean = peak_quants$mean, id = peak_quants$id)
229 |   tmp_peak_quants = peak_quants # used for ordering temporarily
230 |   
231 |   if(z_score_data_clustering_arranging == 1) {
232 |     for(i in 1:length(bwfile)) {
233 |       tmp_peak_quants[,i] = as.numeric(scale(tmp_peak_quants[,i]))
234 |     }
235 |   }
236 |   
237 |   if(is.null(samples_used_clustering_arranging)) {
238 |     if(length(bwfile) == 1) {
239 |       tmp_peak_quants$mean = as.numeric(tmp_peak_quants[,1])
240 |     } else {
241 |       tmp_peak_quants$mean = rowMeans(tmp_peak_quants[,1:length(bwfile)], na.rm = T)
242 |     }
243 |   } else {
244 |     if(length(samples_used_clustering_arranging) == 1) {
245 |       tmp_peak_quants$mean = as.numeric(tmp_peak_quants[,samples_used_clustering_arranging])
246 |     } else {
247 |       tmp_peak_quants$mean = rowMeans(tmp_peak_quants[,samples_used_clustering_arranging], na.rm = T)
248 |     }
249 |   }
250 |   
251 |   if(k_means_clusters > 0) {
252 |     clusters = NULL
253 |     
254 |     if(length(bwfile) == 1) {
255 |       clusters = kmeans(tmp_peak_quants[,1], centers = k_means_clusters)
256 |     } else if (!is.null(samples_used_clustering_arranging)){
257 |       clusters = kmeans(tmp_peak_quants[,samples_used_clustering_arranging], centers = k_means_clusters)
258 |     } else {
259 |       clusters = kmeans(tmp_peak_quants[,1:length(bwfile)], centers = k_means_clusters)
260 |     }
261 |     
262 |     tmp_peak_quants$split = clusters$cluster
263 |     clusterids = unique(as.numeric(clusters$cluster))
264 |     clustermeans = rep(0, length(clusterids))
265 |     names(clustermeans) = clusterids
266 |     
267 |     for(i in 1:length(clustermeans)) {
268 |       clustermeans[i]=mean(tmp_peak_quants[tmp_peak_quants$split == clusterids[i], "mean"])
269 |     }
270 |     
271 |     clustermeans = sort(clustermeans, decreasing = T)
272 |     peak_quants_reordered = data.frame()
273 |     for(i in 1:length(clustermeans)) {
274 |       tmp = tmp_peak_quants[tmp_peak_quants$split == names(clustermeans[i]),]
275 |       tmp = tmp[order(tmp$mean, decreasing = T),]
276 |       tmp$split = i
277 |       peak_quants_reordered = rbind(peak_quants_reordered, tmp)
278 |     }
279 |     tmp_peak_quants = peak_quants_reordered
280 |     peak_quants = peak_quants[rownames(tmp_peak_quants),]
281 |     peak_quants$split = tmp_peak_quants$split
282 |     
283 |   } else {
284 |     peak_quants$split = peak_quants$id
285 |     tmp_peak_quants = tmp_peak_quants[order(tmp_peak_quants$split, tmp_peak_quants$mean, decreasing = T),]
286 |     peak_quants = peak_quants[rownames(tmp_peak_quants),]
287 |   }
288 |   
289 |   return (peak_quants)
290 | }
291 | 
292 | Calc_mean_values_for_plots = function(bwfile, binmats, peak_quants) {
293 |   cmeans = list()
294 |   heatmap_min = c(rep(0, length(bwfile)))
295 |   for(i in 1:length(bwfile)) {
296 |     cmeans[[i]] = as.data.frame(matrix(ncol = length(unique(peak_quants$split)), nrow = nbin))
297 |     colnames(cmeans[[i]]) = unique(peak_quants$split)
298 |     for(j in 1:length(unique(peak_quants$split))) {
299 |       cmeans[[i]][,j] = colMeans(as.matrix(binmats[[i]][rownames(peak_quants[peak_quants$split == unique(peak_quants$split)[j],]),]), na.rm = T)
300 |     }
301 |   }
302 |   
303 |   return (cmeans)
304 | }
305 | 
306 | Calc_max_line_plot = function(cmeans, plot_scale_samples_separate) {
307 |   max_ann = c(rep(0, length(cmeans)))
308 |   
309 |   for(i in 1:length(cmeans)) {
310 |     max_ann[i] = max(cmeans[[i]], na.rm = T)
311 |   }
312 |   
313 |   if(plot_scale_samples_separate == 0) {
314 |     max_ann = c(rep(max(max_ann), length(cmeans)))
315 |   }
316 |   
317 |   return (max_ann)
318 | }
319 | 
320 | Calc_min_line_plot = function(cmeans, plot_scale_samples_separate) {
321 |   min_ann = c(rep(0, length(cmeans)))
322 |   
323 |   for(i in 1:length(cmeans)) {
324 |     min_ann[i] = min(0, min(cmeans[[i]], na.rm = T))
325 |   }
326 |   
327 |   if(plot_scale_samples_separate == 0) {
328 |     min_ann = c(rep(min(min_ann), length(cmeans)))
329 |   }
330 |   
331 |   return (min_ann)
332 | }
333 | 
334 | Calc_heatmap_max_values = function(binmats, plot_scale_samples_separate) {
335 |   heatmap_max = c(rep(0, length(binmats)))
336 |   
337 |   for(i in 1:length(binmats)) {
338 |     heatmap_max[i] = quantile(binmats[[i]],.95, na.rm=T)[[1]]
339 |   }
340 |   
341 |   if(plot_scale_samples_separate == 0) {
342 |     heatmap_max = c(rep(max(heatmap_max), length(binmats)))
343 |   }
344 |   
345 |   return (heatmap_max)
346 | }
347 | 
348 | Calc_heatmap_min_values  = function(binmats, plot_scale_samples_separate) {
349 |   heatmap_min = c(rep(0, length(binmats)))
350 |   
351 |   for(i in 1:length(binmats)) {
352 |     heatmap_min[i] = quantile(binmats[[i]],.05, na.rm=T)[[1]]
353 |   }
354 |   
355 |   if(plot_scale_samples_separate == 0) {
356 |     heatmap_min = c(rep(max(heatmap_min), length(binmats)))
357 |   }
358 |   
359 |   return (heatmap_min)
360 | }
361 | 
362 | 
363 | PrepareDataForPlotting = function(peakds) {
364 |   peakds$binmats = Scale_binned_matrices(raw_binmats = peakds$raw_binmats, z_score_data = peakds$z_score_data)
365 |   peakds$peak_quants = Arrange_peaks_for_plotting(peak_quants = peakds$peak_quants, 
366 |                                            bwfile = peakds$bwfile,
367 |                                            k_means_clusters = peakds$k_means_clusters, 
368 |                                            z_score_data_clustering_arranging = peakds$z_score_data_clustering_arranging, 
369 |                                            samples_used_clustering_arranging = peakds$samples_used_clustering_arranging)
370 |   
371 |   
372 |   peakds$heatmap_lineplot_means = Calc_mean_values_for_plots(peakds$bwfile, peakds$binmats, peakds$peak_quants)
373 |   peakds$max_ann = Calc_max_line_plot(peakds$heatmap_lineplot_means, peakds$plot_scale_samples_separate)
374 |   peakds$min_ann = Calc_min_line_plot(peakds$heatmap_lineplot_means, peakds$plot_scale_samples_separate)
375 |   peakds$heatmap_max = Calc_heatmap_max_values(binmats = peakds$binmats, peakds$plot_scale_samples_separate)
376 |   peakds$heatmap_min = Calc_heatmap_min_values(binmats = peakds$binmats, peakds$plot_scale_samples_separate)
377 |   
378 |   return (peakds)
379 | }
380 | 
381 | PlotHeatmaps = function(peakds) {
382 |   peakds$plotlist = list()
383 |   for(i in 1:length(peakds$bwfile)) {
384 |     ptable = peakds$binmats[[i]]
385 |     ha = HeatmapAnnotation(mean = anno_lines(peakds$heatmap_lineplot_means[[i]], 
386 |                                              ylim = c(peakds$min_ann[i],peakds$max_ann[i]), 
387 |                                              height = unit(2, "cm"),
388 |                                              gp = gpar(col = 1:length(unique(peakds$peak_quants$split)))),
389 |                            show_annotation_name = c(mean = FALSE))
390 |     
391 |     col_fun = colorRamp2(c(peakds$heatmap_min[i], 
392 |                            peakds$heatmap_min[i] + peakds$heatmap_max[i]*0.15, 
393 |                            peakds$heatmap_min[i] + peakds$heatmap_max[i]*0.3, 
394 |                            peakds$heatmap_min[i] + peakds$heatmap_max[i]*0.6, 
395 |                            peakds$heatmap_min[i] + peakds$heatmap_max[i], 
396 |                            peakds$heatmap_min[i] + peakds$heatmap_max[i]*1.15), brewer.pal(n = 11, name = "RdYlBu")[c(1,4,6,8,10,11)])
397 |     
398 |     peakds$plotlist[[i]] = Heatmap(ptable[rownames(peakds$peak_quants),],
399 |                             name = peakds$bwnames[i],
400 |                             column_title = peakds$bwnames[i],
401 |                             show_row_names = F,
402 |                             show_column_names = F,
403 |                             cluster_rows = F,
404 |                             cluster_columns = F,
405 |                             use_raster = T,
406 |                             width = unit(3, "cm"), 
407 |                             height = unit(14, "cm"),
408 |                             top_annotation = ha,
409 |                             col = col_fun,
410 |                             row_split = peakds$peak_quants$split,
411 |                             row_title_gp = gpar(col = 1:length(unique(peakds$peak_quants$split)), font = 2))
412 |     
413 |   
414 |     if(i == 1) {
415 |       oplot =  peakds$plotlist[[i]]
416 |     } else {
417 |       oplot = oplot +  peakds$plotlist[[i]]
418 |     }  
419 |   }
420 |   peakds$combined_plot = oplot
421 |   return (peakds)
422 | }


--------------------------------------------------------------------------------
/R/Replication_timing_segmenter.R:
--------------------------------------------------------------------------------
  1 | args = commandArgs(trailingOnly=TRUE)
  2 | 
  3 | PrintErrorMessage = function() {
  4 |   cat("\nUsage: Rscrip Replication_timing_segmenter.R <replication_timing.bw>\n")
  5 |   cat("\nTakes as input a replication timing log2 ratio file in bigwig format, and outputs a BED file with the timing segments\n")
  6 |   cat("\ttiming segments are created by splitting the log2 ratios into 4 quartiles:\n")
  7 |   cat("\t\t1) upper (early replication)\n")
  8 |   cat("\t\t2) mid-upper (mid-early replication)\n")
  9 |   cat("\t\t3) mid-lower (mid-late replication)\n")
 10 |   cat("\t\t4) lower (late replication)\n")
 11 |   cat("\nThis is a simple adaptation of a messy script originally created that uses the following packages: rtracklayer\n\tPlease cite rtracklayer when using this script\n\n")
 12 | }
 13 | 
 14 | if(length(args) < 1){
 15 |   PrintErrorMessage()
 16 |   stop("Not enough arguments. Replication timing bigwig file has to be specified (created with BAMscale).")
 17 | }
 18 | 
 19 | infile = args[1]
 20 | 
 21 | if(!file.exists(infile)) {
 22 |   PrintErrorMessage()
 23 |   stop(paste0("Input file specified does not exist: \"", infile, "\""))
 24 | }
 25 | 
 26 | if (!require(rtracklayer)) {
 27 |   PrintErrorMessage()
 28 |   stop("Please install package names \"rtracklayer\"")
 29 | }
 30 | 
 31 | library(rtracklayer)
 32 | 
 33 | outfile = gsub(pattern ="\\.bw|\\.bigWig|\\.bigwig", replacement = "", x = infile)
 34 | outfile = paste0(outfile, ".replication_timings.bed")
 35 | 
 36 | options(warn=-1)
 37 | GetMergedElements = function(fbw, typeseg) {
 38 |   fbw = fbw[fbw$segments == typeseg,]
 39 |   fbw = reduce(fbw)
 40 |   fbw$length = width(fbw)
 41 |   fbw = fbw[fbw$length > min_segment_length]
 42 |   fbw$segments = typeseg
 43 |   return(fbw)
 44 | }
 45 | 
 46 | include_zeroes_segment_thresholds = 0
 47 | min_segment_length = 5000
 48 | 
 49 | bw = import(infile)
 50 | 
 51 | if(include_zeroes_segment_thresholds == 0) {
 52 |   medval = quantile(bw$score[bw$score != 0], .5, na.rm = T)[[1]]
 53 |   upper = quantile(bw$score[bw$score != 0], .75, na.rm = T)[[1]]
 54 |   lower = quantile(bw$score[bw$score != 0], .25, na.rm = T)[[1]]
 55 | } else {
 56 |   medval = quantile(bw$score, .5, na.rm = T)[[1]]
 57 |   upper = quantile(bw$score, .75, na.rm = T)[[1]]
 58 |   lower = quantile(bw$score, .25, na.rm = T)[[1]]
 59 | }
 60 | 
 61 | 
 62 | bw$segments = 0
 63 | bw$segments = ifelse(bw$score < lower, -2, bw$segments)
 64 | bw$segments = ifelse(bw$score < medval & bw$segments == 0, -1, bw$segments)
 65 | bw$segments = ifelse(bw$score > medval & bw$segments == 0, 1, bw$segments)
 66 | bw$segments = ifelse(bw$score > upper, 2, bw$segments)
 67 | bw = resize(bw, width(bw) + 1, fix="start")
 68 | 
 69 | segments = GetMergedElements(bw, 2)
 70 | segments = c(segments, GetMergedElements(bw, 1))
 71 | segments = c(segments, GetMergedElements(bw, -1))
 72 | segments = c(segments, GetMergedElements(bw, -2))
 73 | segments = sort(segments)
 74 | segments = resize(segments, width(segments) - 1, fix="end")
 75 | 
 76 | seg_gaps = setdiff(as(seqinfo(segments), "GRanges"), segments)
 77 | seg_gaps$length = width(seg_gaps)
 78 | seg_gaps$segments = 0
 79 | segments = c(segments, seg_gaps)
 80 | segments = sort(segments)
 81 | 
 82 | if(segments$segments[1] == 0) {
 83 |   segments$segments[1] = segments$segments[2]
 84 | }
 85 | 
 86 | if(segments$segments[length(segments)] == 0) {
 87 |   segments$segments[length(segments)] = segments$segments[length(segments)-1]
 88 | }
 89 | 
 90 | segments$final_seg = sapply(seq(1, length(segments), 1), function(i) {
 91 |   if(i > 1 & i < length(segments)) {
 92 |     if(segments$segments[i] == 0) {
 93 |       if(segments$length[i-1] > segments$length[i+1]) {
 94 |         segments$segments[i-1]
 95 |       } else {
 96 |         segments$segments[i+1]
 97 |       }
 98 |     } else {
 99 |       segments$segments[i]
100 |     }
101 |   } else {
102 |     segments$segments[i]
103 |   }
104 | })
105 | 
106 | segments = resize(segments, width(segments) + 1, fix="start")
107 | segments$segments = segments$final_seg
108 | 
109 | segments_final = GetMergedElements(segments, 2)
110 | segments_final = c(segments_final, GetMergedElements(segments, 1))
111 | segments_final = c(segments_final, GetMergedElements(segments, -1))
112 | segments_final = c(segments_final, GetMergedElements(segments, -2))
113 | segments_final = sort(segments_final)
114 | segments_final = resize(segments_final, width(segments_final) - 1, fix="end")
115 | 
116 | segments_final = as.data.frame(segments_final)
117 | segments_final$width = NULL
118 | segments_final$strand = NULL
119 | segments_final$length = NULL
120 | 
121 | segments_final$color = "126,0,21"
122 | segments_final$color = ifelse(segments_final$segments == -1, "153,89,31", segments_final$color)
123 | segments_final$color = ifelse(segments_final$segments == 1, "154,161,14", segments_final$color)
124 | segments_final$color = ifelse(segments_final$segments == 2, "20,155,3", segments_final$color)
125 | 
126 | segments_final$time = "late"
127 | segments_final$time = ifelse(segments_final$segments == -1, "mid-late", segments_final$time)
128 | segments_final$time = ifelse(segments_final$segments == 1, "mid-early", segments_final$time)
129 | segments_final$time = ifelse(segments_final$segments == 2, "early", segments_final$time)
130 | 
131 | segments_final$tval = 0
132 | segments_final$nval = "."
133 | 
134 | segments_final = segments_final[,c("seqnames", "start", "end", "time", "tval", "nval", "start", "end", "color")]
135 | write.table(segments_final, outfile, sep = "\t", quote = F, col.names = F, row.names = F)
136 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | BAMscale
  2 | ===
  3 | 
  4 | **Overview of BAMscale applications**
  5 | 
  6 | 
  7 | <p align="center">
  8 | <img src="https://github.com/ncbi/BAMscale/blob/master/doc/images/MAIN_figure.png"  width="800" height="220" />
  9 | </p>
 10 | 
 11 | 
 12 | BAMscale is a one-step tool to 
 13 | 
 14 |     1) quantify/normalize peak coverages from multiple BAM files 
 15 |     2) Create scaled BigWig files for easy visualization
 16 | 
 17 | In the [wiki](https://github.com/ncbi/BAMscale/wiki) pages we have more detailed tutorials for creating bigWig files and quantifying peaks
 18 | 
 19 | ## Update
 20 | 20210510: Added support for BAM indexes "file.bam.bai" and "file.bai". Modified the bigwig writing to decrease file size: blocks of 25 bins are written, if non-empty. ***Modified the default bin size to 20bp***
 21 | 
 22 | 20200918: We are working on a heatmap plotting script in R to help visualization. The script (under development) is available in "R/Plot_heatmap" folder. Please use Rstudio or something similar, as you have to set the paths in the script. Meanwhile, we will work on developing a simple GUI for this.
 23 | 
 24 | 20200423: The full manuscript has been published in [Epigenetics & Chromatin](https://epigeneticsandchromatin.biomedcentral.com/articles/10.1186/s13072-020-00343-x)
 25 | 
 26 | 
 27 | 20200326: We added the visualization script app written in R. The scripts are available in the "R" sub-folder, with a detailed [manual](https://github.com/ncbi/BAMscale/wiki/Pairwise-comparison-of-peaks-%5Binteractive-plotting-using-R%5D) available in the wiki->visualization section
 28 | 
 29 | 20190821: We recently added support for [RNA-seq](https://github.com/ncbi/BAMscale/wiki/Detailed-usage:-RNA-seq-coverage-tracks) data as well to create coverage tracks. The new method enables accurate representations of exon-intron boundaries (splicing). 
 30 | 
 31 | ## Manuals
 32 | 
 33 | In the [wiki](https://github.com/ncbi/BAMscale/wiki) page we have more detailed tutorials for creating bigWig files and quantifying peaks:
 34 | 
 35 | 1. [OK-seq and RFD Track Generation](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-OKseq-RFD-(Replication-Fork-Directionality)-Track-Generation)
 36 | 2. [Quantifying Peaks](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-Quantifying-Peak-Coverages-from-Multiple-BAM-Files#comparing-atac-seq-changes-induced-from-treatment)
 37 | 3. [Generating Scaled Coverage Tracks](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-Generating-Scaled-Coverage-Tracks#preparing-input-data-for-bamscale)
 38 | 4. [END-seq data](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-Processing-END-seq-Data)
 39 | 5. [Log2 Coverage Tracks for Replication Timing Data](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-Replication-Timing-log2-Coverage-Ratio-from-Two-BAM-Files)
 40 | 6. [Smoothening Function for Coverage Tracks](https://github.com/ncbi/BAMscale/wiki/Detailed-Use:-Smooth-Coverage-Tracks)
 41 | 
 42 | 
 43 | We also added a few R scripts that might be helpful for basic visualizations:
 44 | 1. [Creating density plots of quantified peaks](https://github.com/ncbi/BAMscale/wiki/Pairwise-comparison-of-peaks-%5Binteractive-plotting-using-R%5D)
 45 | 
 46 | 2. [Segmenting replication timing bigwigs](https://github.com/ncbi/BAMscale/wiki/Replication-timing-BED-segments-from-bigwig)
 47 | 3. [Identifying OK-seq strand switches](https://github.com/ncbi/BAMscale/wiki/Finding-OK-seq-strand-switched-from-the-RFD-track)
 48 | 
 49 | For additional information, visit the [wiki](https://github.com/ncbi/BAMscale/wiki) page.
 50 | 
 51 | For any other requests, or if you need help either open an issue, or feel free to email me: *pongorlorinc@gmail.com*
 52 | 
 53 | 
 54 | ## Usage for the impatient
 55 | 
 56 | These examples assume you have 4 processing threads, so we set '-t 4' for multithreading.
 57 | 
 58 | #### Peak quantification
 59 | 
 60 |     BAMscale cov -t 4 --bed <BED_FILE> --bam <BAM1> --bam <BAM2> --bam <BAM3> ... --bam <BAMn>
 61 | 
 62 | #### Generating scaled coverage tracks
 63 | 
 64 | ***Creating scaled coverage tracks***
 65 | 
 66 |     BAMscale scale -t 4 --bam <BAM_FILE> [--bam <BAM2> .. --bam <BAMn>]
 67 | 
 68 | ***Creating stranded RNA-seq coverage tracks***
 69 | 
 70 |     BAMscale scale --operation strandrna --bam <RNAseq.bam>
 71 |     
 72 | ***Creating unstranded coverage from RNA-seq***
 73 | 
 74 |     BAMscale scale --operation rna --bam <RNAseq.bam>
 75 | 
 76 | ***Getting RFD score from OKseq data***
 77 | 
 78 |     BAMscale scale -t 4 --operation rfd --binsize 1000 --bam <BAM_FILE>
 79 |     
 80 | ***Processing replication timing and Repli-seq data***
 81 | 
 82 |     BAMscale scale -t 4 --operation reptime --bam <G1_phase.bam> --bam <S_phase.bam>
 83 |     
 84 | ***Creating stranded END-seq coverages***
 85 | 
 86 |     BAMscale scale -t 4 --operation endseq --bam <ENDseq.bam>
 87 | 
 88 | 
 89 | ## Reference
 90 | 
 91 | BAMscale can be found at **bioR&chi;iv** ([https://doi.org/10.1101/669275](https://www.biorxiv.org/content/10.1101/669275v1))
 92 | 
 93 | ## Bioconda instalation
 94 | 
 95 | [BAMscale](https://bioconda.github.io/recipes/bamscale/README.html) is available through [Bioconda](https://bioconda.github.io/). Read the Bioconda [Getting Started](https://bioconda.github.io/user/install.html#install-conda) page for a detailed description on how to get Bioconda installed.
 96 | 
 97 | Once Bioconda is available you can install BAMscale using this command.
 98 | 
 99 |     conda install bamscale
100 | 
101 | ## Docker
102 | 
103 | BAMscale docker image is available in [quay.io/biocontainers/bamscale](https://quay.io/repository/biocontainers/bamscale).
104 | 
105 | ### Pulling the image
106 | 
107 |     docker pull quay.io/biocontainers/bamscale:0.0.5--ha85820d_0
108 |     
109 | ### Using the Docker image
110 | 
111 | #### Peak quantification with Docker
112 | 
113 |     docker run -v `pwd`:/data bamscale BAMscale cov --bed <BED_FILE> --bam <BAM1> --bam <BAM2> --bam <BAM3> ... --bam <BAMn>
114 | 
115 | #### Generating scaled coverage tracks with Docker
116 | 
117 |     docker run -v `pwd`:/data bamscale BAMscale scale --bam <BAM_FILE> [--bam <BAM2> .. --bam <BAMn>]
118 | 
119 | ### Creating a custom docker image
120 | 
121 |     docker build -t bamscale https://raw.githubusercontent.com/pongorlorinc/BAMscale/master/Dockerfile
122 | 
123 | ## Local compilation
124 | 
125 | ### Requirements
126 | 
127 | We have a detailed installation for [Linux](https://github.com/ncbi/BAMscale/wiki/Installation#detailed-installation-for-linux-based-os) and [MAC](https://github.com/ncbi/BAMscale/wiki/Installation#detailed-installation-for-mac-os-with-homebrew) (with homebrew) based systems or through [conda](https://github.com/ncbi/BAMscale/wiki/Installation#detailed-installation-for-mac-os-with-conda). There is also a precompiled version for linux ready for usage available at the [releases](https://github.com/ncbi/BAMscale/releases).
128 | 
129 | #### samtools
130 | http://www.htslib.org/
131 | 
132 | #### libBigWig
133 | Clone the libBigWig repository from GitHub: https://github.com/dpryan79/libBigWig
134 | 
135 |     git clone https://github.com/dpryan79/libBigWig.git
136 | 
137 | Compile it and set the environment variables for BAMscale
138 | 
139 |     cd libBigWig/
140 |     make
141 |     export LIBBIGWIG_DIR=`pwd`
142 |     export CPPFLAGS="-I $LIBBIGWIG_DIR"
143 |     export LDFLAGS="-L $LIBBIGWIG_DIR -Wl,-rpath,$LIBBIGWIG_DIR"
144 |     
145 | Optionally (and if you have permission), the libbigwig can also be installed
146 | 
147 |     make install
148 |     
149 | In this case, the flags don't have to be set in the terminal.
150 | 
151 | ### Installation
152 | 
153 | After compiling the libBigWig library and samtools (if not already installed) clone the BAMscale from GitHub
154 | 
155 |     git clone https://github.com/ncbi/BAMscale.git
156 |     
157 | and go to the BAMscale folder to compile the program:
158 | 
159 |     cd BAMscale/
160 |     make
161 |     
162 | A bin folder will be created with the BAMscale executable.
163 | 
164 | # Public Domain notice
165 | 
166 | National Center for Biotechnology Information.
167 | 
168 | This software is a "United States Government Work" under the terms of the United States
169 | Copyright Act. It was written as part of the authors' official duties as United States
170 | Government employees and thus cannot be copyrighted. This software is freely available
171 | to the public for use. The National Library of Medicine and the U.S. Government have not
172 |  placed any restriction on its use or reproduction.
173 | 
174 | Although all reasonable efforts have been taken to ensure the accuracy and reliability
175 | of the software and data, the NLM and the U.S. Government do not and cannot warrant the
176 | performance or results that may be obtained by using this software or data. The NLM and
177 | the U.S. Government disclaim all warranties, express or implied, including warranties
178 | of performance, merchantability or fitness for any particular purpose.
179 | 
180 | Please cite NCBI in any work or product based on this material.
181 | 


--------------------------------------------------------------------------------
/bamscale-cov.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwl-runner
  2 | cwlVersion: v1.0
  3 | class: CommandLineTool
  4 | 
  5 | label: BAMscale-cov
  6 | doc: Calculate coverage of BED coordinates in BAM file(s)
  7 | 
  8 | requirements:
  9 |   InlineJavascriptRequirement: {}
 10 | 
 11 | hints:
 12 |   - $import: bamscale.yml
 13 | 
 14 | inputs:
 15 |   l:
 16 |     type: string?
 17 |     inputBinding:
 18 |       position: 1
 19 |       prefix: -l
 20 |     doc: |
 21 |       Sequencing type to be used. Can be: single, paired, and auto (default: autodetect)
 22 |   f:
 23 |     type: string?
 24 |     inputBinding:
 25 |       position: 1
 26 |       prefix: -f
 27 |     doc: |
 28 |       Compute coverage using fragments instead of reads (default: no)
 29 |   s:
 30 |     type: string?
 31 |     inputBinding:
 32 |       position: 1
 33 |       prefix: -s
 34 |     doc: |
 35 |       Reads need to have same orientation of peaks (default: unstranded)
 36 |   r:
 37 |     type: string?
 38 |     inputBinding:
 39 |       position: 1
 40 |       prefix: -r
 41 |     doc: |
 42 |       Reads need to have reverse orientation of peaks (default: unstranded)
 43 |   e:
 44 |     type: int?
 45 |     inputBinding:
 46 |       position: 2
 47 |       prefix: -e
 48 |     doc: |
 49 |       Compute sequencing coverage from BAM file quickly using the index (option '0'),
 50 |       or count number of reads by parsing entire BAM file (slower, but more accurate; set to '1' [default])
 51 |   c:
 52 |     type: File?
 53 |     inputBinding:
 54 |       position: 2
 55 |       prefix: -c
 56 |     doc: |
 57 |       Input file with list of chromosomes to blacklist when computing coverage for normalization
 58 |   u:
 59 |     type: int?
 60 |     inputBinding:
 61 |       position: 2
 62 |       prefix: -u
 63 |     doc: |
 64 |       BED file with regions to subtract when computing coverage for normalization
 65 |       These coordinates should not overlap so reads are not counted multiple times
 66 |   q:
 67 |     type: int?
 68 |     inputBinding:
 69 |       position: 3
 70 |       prefix: -q
 71 |     doc: |
 72 |       Minimum (at least) mapping quality (default: 0)
 73 |   d:
 74 |     type: string?
 75 |     inputBinding:
 76 |       position: 3
 77 |       prefix: -d
 78 |     doc: |
 79 |       Keep duplicated reads (default: no)
 80 |   p:
 81 |     type: string?
 82 |     inputBinding:
 83 |       position: 3
 84 |       prefix: -p
 85 |     doc: |
 86 |       Do not filter un-proper alignments (default: filter)
 87 |   m:
 88 |     type: string?
 89 |     inputBinding:
 90 |       position: 3
 91 |       prefix: -m
 92 |     doc: |
 93 |       Do not remove reads with unmapped pairs
 94 |   g:
 95 |     type: int?
 96 |     inputBinding:
 97 |       position: 3
 98 |       prefix: -g
 99 |     doc: |
100 |       Minimum fragment size for read pairs (default: 0)
101 |   x:
102 |     type: int?
103 |     inputBinding:
104 |       position: 3
105 |       prefix: -x
106 |     doc: |
107 |       Maximum fragment size for read pairs (default: 2000)
108 |   w:
109 |     type: int?
110 |     inputBinding:
111 |       position: 3
112 |       prefix: -w
113 |     doc: |
114 |       Filter reads based on fragment size (default: no)
115 |   t:
116 |     type: int?
117 |     inputBinding:
118 |       position: 4
119 |       prefix: -t
120 |     doc: |
121 |       No. of threads to use (default: 1)
122 |   n:
123 |     type: string
124 |     inputBinding:
125 |       position: 4
126 |       prefix: -n
127 |     doc: |
128 |       Output prefix for file names (default: none)
129 |   bed:
130 |     type: File
131 |     inputBinding:
132 |       position: 5
133 |       prefix: --bed
134 |     doc: |
135 |       Input BED file
136 |   bam:
137 |     type:
138 |       type: array
139 |       items: File
140 |       inputBinding:
141 |         prefix: --bam
142 |         separate: true
143 |     secondaryFiles: .bai
144 |     inputBinding:
145 |       position: 6
146 |     doc: |
147 |       Input BAM file. This can be specified multiple times in case of multiple BAM files
148 | 
149 | outputs:
150 |   output:
151 |     type: File[]
152 |     outputBinding:
153 |       glob: $(inputs.n)*
154 | 
155 | baseCommand: ["BAMscale", "cov"]
156 | 
157 | s:author:
158 |   - class: s:Person
159 |     s:identifier: https://orcid.org/0000-0002-4108-5982
160 |     s:email: mailto:r78v10a07@gmail.com
161 |     s:name: Roberto Vera Alvarez
162 | 
163 | s:codeRepository: https://github.com/ncbi/BAMscale
164 | s:license: https://spdx.org/licenses/OPL-1.0
165 | 
166 | $namespaces:
167 |   s: http://schema.org/
168 | 
169 | $schemas:
170 |   - https://schema.org/version/latest/schema.rdf
171 | 
172 | 


--------------------------------------------------------------------------------
/bamscale-scale.cwl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env cwl-runner
  2 | cwlVersion: v1.0
  3 | class: CommandLineTool
  4 | 
  5 | label: BAMscale-scale
  6 | doc: Scale one or multiple BAM files
  7 | 
  8 | requirements:
  9 |   InlineJavascriptRequirement: {}
 10 | 
 11 | hints:
 12 |   - $import: bamscale.yml
 13 | 
 14 | inputs:
 15 |   l:
 16 |     type: string?
 17 |     inputBinding:
 18 |       position: 1
 19 |       prefix: -l
 20 |     doc: |
 21 |       Sequencing type to be used. Can be: single, paired, and auto (default: autodetect)
 22 |   f:
 23 |     type: string?
 24 |     inputBinding:
 25 |       position: 1
 26 |       prefix: -f
 27 |     doc: |
 28 |       Compute coverage using fragments instead of reads (default: no)
 29 |   a:
 30 |     type: int?
 31 |     inputBinding:
 32 |       position: 1
 33 |       prefix: -a
 34 |     doc: |
 35 |       Fragment size to be used to extend single-end library reads
 36 |   y:
 37 |     type: string?
 38 |     inputBinding:
 39 |       position: 2
 40 |       prefix: -y
 41 |     doc: |
 42 |       Type of normalization. (default: base)
 43 |       If no normalization is needed, set '--scale no' argument, the program will disregard this option.
 44 |       Options:
 45 |         1) reads: No. of mapped reads/fragments
 46 |         2) base: Sum of per-base coverage of reads/fragments
 47 |   k:
 48 |     type: string?
 49 |     inputBinding:
 50 |       position: 2
 51 |       prefix: -k
 52 |     doc: |
 53 |       Method to scale samples together. (default: genome)
 54 |       Options are:
 55 |         1) no: no scaling, just calculate coverage
 56 |         2) smallest: scale reads to smallest library (multiple-samples only)
 57 |         3) genome: scale samples to 1x genome coverage (only possible with 'base' normalization type)
 58 |   r:
 59 |     type: string?
 60 |     inputBinding:
 61 |       position: 1
 62 |       prefix: -r
 63 |     doc: |
 64 |       Operation to perform when scaling samples. Default: scaled
 65 |       Options are:
 66 |         1) scaled: output scaled tracks.
 67 |         2) unscaled: do not scale files in any way.
 68 |         2) log2: log2 transform against first BAM file.
 69 |         3) ratio: coverage ratio against first BAM file.
 70 |         4) subtract: subtract coverage against first BAM file.
 71 |         5) rfd: OK-seq RFD calculation
 72 |   z:
 73 |     type: int?
 74 |     inputBinding:
 75 |       position: 2
 76 |       prefix: -z
 77 |     doc: |
 78 |       Size of bins for output bigWig/bedgraph generation (default: 5)
 79 |   e:
 80 |     type: int?
 81 |     inputBinding:
 82 |       position: 3
 83 |       prefix: -e
 84 |     doc: |
 85 |       Compute sequencing coverage from BAM file. (default: '1', count reads while parsing BAM)
 86 |       Options are:
 87 |         1) 0: use reads in index (only if normalization is set to 'reads')
 88 |         2) 1: count reads while parsing BAM(s)
 89 |       WARNING: this option is only useful when 'reads' are used for normalization
 90 |   c:
 91 |     type: File?
 92 |     inputBinding:
 93 |       position: 3
 94 |       prefix: -c
 95 |     doc: |
 96 |       Input file with list of chromosomes to blacklist when computing coverage for normalization
 97 |   u:
 98 |     type: int?
 99 |     inputBinding:
100 |       position: 3
101 |       prefix: -u
102 |     doc: |
103 |       BED file with regions to subtract when computing coverage for normalization
104 |       These coordinates should not overlap so reads are not counted multiple times
105 |   j:
106 |     type: int?
107 |     inputBinding:
108 |       position: 3
109 |       prefix: -j
110 |     doc: |
111 |       Smoothen signal by calculating mean of N bins flanking both sides of each bin (default: 0)
112 |       If set to '0', the signal is not smoothened. To turn on specify a value greater than '0'.
113 |       For replication timing, a good value is to smoothen to 100k bases. If binSize is 100bp, this would be '1000'
114 |   b:
115 |     type: int?
116 |     inputBinding:
117 |       position: 3
118 |       prefix: -b
119 |     doc: |
120 |       Which tracks should be smoothened when performing smoothening (default: '1' meaning only binned track).
121 |       Options are:
122 |         1) 0: Smoothen scaled and transformed tracks (log2, ratio or subtracted)
123 |         2) 1: Smoothen only the scaled sequencing track
124 |         3) 2: Smoothen only the transformed (log2, ratio or subtract) track
125 |   q:
126 |     type: int?
127 |     inputBinding:
128 |       position: 4
129 |       prefix: -q
130 |     doc: |
131 |       Minimum (at least) mapping quality (default: 0)
132 |   d:
133 |     type: string?
134 |     inputBinding:
135 |       position: 4
136 |       prefix: -d
137 |     doc: |
138 |       Keep duplicated reads (default: no)
139 |   p:
140 |     type: string?
141 |     inputBinding:
142 |       position: 4
143 |       prefix: -p
144 |     doc: |
145 |       Do not filter un-proper alignments (default: filter)
146 |   m:
147 |     type: string?
148 |     inputBinding:
149 |       position: 4
150 |       prefix: -m
151 |     doc: |
152 |       Do not remove reads with unmapped pairs
153 |   g:
154 |     type: int?
155 |     inputBinding:
156 |       position: 4
157 |       prefix: -g
158 |     doc: |
159 |       Minimum fragment size for read pairs (default: 0)
160 |   x:
161 |     type: int?
162 |     inputBinding:
163 |       position: 4
164 |       prefix: -x
165 |     doc: |
166 |       Maximum fragment size for read pairs (default: 2000)
167 |   w:
168 |     type: int?
169 |     inputBinding:
170 |       position: 4
171 |       prefix: -w
172 |     doc: |
173 |       Filter reads based on fragment size (default: no)
174 |   t:
175 |     type: int?
176 |     inputBinding:
177 |       position: 5
178 |       prefix: -t
179 |     doc: |
180 |       No. of threads to use (default: 1)
181 |   bam:
182 |     type:
183 |       type: array
184 |       items: File
185 |       inputBinding:
186 |         prefix: --bam
187 |         separate: true
188 |     secondaryFiles: .bai
189 |     inputBinding:
190 |       position: 6
191 |     doc: |
192 |       Input BAM file. This can be specified multiple times in case of multiple BAM files
193 | 
194 | outputs:
195 |   output:
196 |     type: File[]
197 |     outputBinding:
198 |        glob: "*.bw"
199 | 
200 | baseCommand: ["BAMscale", "scale"]
201 | 
202 | s:author:
203 |   - class: s:Person
204 |     s:identifier: https://orcid.org/0000-0002-4108-5982
205 |     s:email: mailto:r78v10a07@gmail.com
206 |     s:name: Roberto Vera Alvarez
207 | 
208 | s:codeRepository: https://github.com/ncbi/BAMscale
209 | s:license: https://spdx.org/licenses/OPL-1.0
210 | 
211 | $namespaces:
212 |   s: http://schema.org/
213 | 
214 | $schemas:
215 |   - https://schema.org/version/latest/schema.rdf
216 | 
217 | 


--------------------------------------------------------------------------------
/bamscale.yml:
--------------------------------------------------------------------------------
1 |   class: DockerRequirement
2 |   dockerImageId: bamscale
3 |   dockerFile:
4 |     $include: https://raw.githubusercontent.com/ncbi/BAMscale/master/Dockerfile
5 | 


--------------------------------------------------------------------------------
/doc/ATAC-seq_peak_quant_benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_quant_benchmark.png


--------------------------------------------------------------------------------
/doc/ATAC-seq_peak_validation_CB450h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CB450h.png


--------------------------------------------------------------------------------
/doc/ATAC-seq_peak_validation_CB452h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CB452h.png


--------------------------------------------------------------------------------
/doc/ATAC-seq_peak_validation_CB454h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CB454h.png


--------------------------------------------------------------------------------
/doc/ATAC-seq_peak_validation_CEM0h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CEM0h.png


--------------------------------------------------------------------------------
/doc/ATAC-seq_peak_validation_CEM2h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CEM2h.png


--------------------------------------------------------------------------------
/doc/ATAC-seq_peak_validation_CEM4h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/ATAC-seq_peak_validation_CEM4h.png


--------------------------------------------------------------------------------
/doc/images/ATAC-seq_coverage_benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/ATAC-seq_coverage_benchmark.png


--------------------------------------------------------------------------------
/doc/images/ATAC-seq_coverage_comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/ATAC-seq_coverage_comparison.png


--------------------------------------------------------------------------------
/doc/images/ATAC-seq_peak_quant_benchmark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/ATAC-seq_peak_quant_benchmark.png


--------------------------------------------------------------------------------
/doc/images/ATAC-seq_peak_validation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/ATAC-seq_peak_validation.png


--------------------------------------------------------------------------------
/doc/images/BMAscale_RNAseq_coverage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/BMAscale_RNAseq_coverage.png


--------------------------------------------------------------------------------
/doc/images/Detailed_usage_ATAC_exampleTOP1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_ATAC_exampleTOP1.png


--------------------------------------------------------------------------------
/doc/images/Detailed_usage_ENDseq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_ENDseq.png


--------------------------------------------------------------------------------
/doc/images/Detailed_usage_OKseq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_OKseq.png


--------------------------------------------------------------------------------
/doc/images/Detailed_usage_RepTime_closer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_RepTime_closer.png


--------------------------------------------------------------------------------
/doc/images/Detailed_usage_RepTime_full.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_RepTime_full.png


--------------------------------------------------------------------------------
/doc/images/Detailed_usage_peakQuant_CB45_2h_vs_0h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_peakQuant_CB45_2h_vs_0h.png


--------------------------------------------------------------------------------
/doc/images/Detailed_usage_peakQuant_CB45_4h_vs_0h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_peakQuant_CB45_4h_vs_0h.png


--------------------------------------------------------------------------------
/doc/images/Detailed_usage_peakQuant_CEM_2h_vs_0h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_peakQuant_CEM_2h_vs_0h.png


--------------------------------------------------------------------------------
/doc/images/Detailed_usage_peakQuant_CEM_4h_vs_0h.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Detailed_usage_peakQuant_CEM_4h_vs_0h.png


--------------------------------------------------------------------------------
/doc/images/ENDseq_stranded.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/ENDseq_stranded.png


--------------------------------------------------------------------------------
/doc/images/FPKM_formula.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/FPKM_formula.png


--------------------------------------------------------------------------------
/doc/images/Libsize_formula.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Libsize_formula.png


--------------------------------------------------------------------------------
/doc/images/MAIN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/MAIN.png


--------------------------------------------------------------------------------
/doc/images/MAIN_figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/MAIN_figure.png


--------------------------------------------------------------------------------
/doc/images/OKseq_segmenter_figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/OKseq_segmenter_figure.png


--------------------------------------------------------------------------------
/doc/images/OKseq_switch_example1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/OKseq_switch_example1.png


--------------------------------------------------------------------------------
/doc/images/OKseq_switch_example2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/OKseq_switch_example2.png


--------------------------------------------------------------------------------
/doc/images/Read_count_matrix_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Read_count_matrix_example.png


--------------------------------------------------------------------------------
/doc/images/Replication_timing_script_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/Replication_timing_script_example.png


--------------------------------------------------------------------------------
/doc/images/TPM_formula.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/TPM_formula.png


--------------------------------------------------------------------------------
/doc/images/XY_Empty_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_Empty_page.png


--------------------------------------------------------------------------------
/doc/images/XY_drop_file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_drop_file.png


--------------------------------------------------------------------------------
/doc/images/XY_dropdown.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_dropdown.png


--------------------------------------------------------------------------------
/doc/images/XY_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_example.png


--------------------------------------------------------------------------------
/doc/images/XY_exec.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_exec.png


--------------------------------------------------------------------------------
/doc/images/XY_explanation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_explanation.png


--------------------------------------------------------------------------------
/doc/images/XY_rstudio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ncbi/BAMscale/faf0bf4af6804f2332c7c1fc58a291faa383bfc8/doc/images/XY_rstudio.png


--------------------------------------------------------------------------------
/includes/BAMcoverage.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   BAMcoverage.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on November 28, 2018, 12:27 PM
12 |  */
13 | 
14 | #ifndef BAMCOVERAGE_H
15 | #define BAMCOVERAGE_H
16 | 
17 | #include "Definitions.h"
18 | #include "main.h"
19 | #include <htslib/sam.h>
20 | 
21 | #ifdef __cplusplus
22 | extern "C" {
23 | #endif
24 |     int ReadStrand(bam1_t *read, int paired_end);
25 |     int DetectLibraryType(BAMFILES *bhead);
26 |     int Read_filter(bam1_t *read, CMDINPUT *cmd);
27 |     CHROMOSOMES *AddIDXcoverage(char *name, int coverage, int id, CHROMOSOMES *head);
28 |     void GetChromosomeCoveragesIDX(CHROMOSOMES *head, BAMFILES *bhead);
29 |     void GetGenomeCoveragesIDX(CHROMOSOMES *head, BAMFILES *bhead);
30 |     void CalculateCoverageOfReads(samFile *fp_in, hts_itr_t *iter, bam1_t *aln, int chrsize, char *chrname, CMDINPUT *cmd, BAMFILES *bamcurr);
31 |     void *GetGenomeReadCoveragemultithread(void * voidA);
32 |     void MultiGenomeReadCoverage(CMDINPUT *cmd, CHROMOSOMES *chr);
33 |     void GetChromosomeCoveragesBAM(CHROMOSOMES *head, BAMFILES *bhead, CMDINPUT *cmd);
34 |     char *BEDentryToCoord(char *input);
35 |     char *BEDentryChr(char *input);
36 |     void SubtractBlacklistedBEDS(char *filename, CHROMOSOMES *head, BAMFILES *bhead, int paired_end);
37 |     int *CalculateCoverage(samFile *fp_in, hts_itr_t *iter, bam1_t *aln, int chrsize, char *chrname, CMDINPUT *cmd, BAMFILES *bamcurr);
38 |     void GetGenomeCoverageRNA(CMDINPUT *cmd, CHROMOSOMES *head, char *outfile);
39 |     void *GetGenomeCoveragemultithread(void * voidA);
40 |     void MultiGenomeCoverage(CMDINPUT *cmd, CHROMOSOMES *chr);
41 |     void *GetGenomeBaseCoveragemultithread(void * voidA);
42 |     void MultiGenomeBaseCoverage(CMDINPUT *cmd, CHROMOSOMES *chr);
43 |     void *ScaleBinsmultithread(void * voidA);
44 |     void MultiGenomeScaler(CMDINPUT *cmd, CHROMOSOMES *chr);
45 |     void *SmoothBinsmultithread(void * voidA);
46 |     void MultiGenomeSmoother(CMDINPUT *cmd, CHROMOSOMES *chr);
47 |     void *TransformBinsmultithread(void * voidA);
48 |     void MultiGenomeTransform(CMDINPUT *cmd, CHROMOSOMES *chr);
49 |     void CalculateCoverageOfChromosomeBins(CHROMOSOMES *head, BAMFILES *bhead, int paired_end, int bin_size, int pseudocount, CMDINPUT *cmd);
50 |     void SmoothenAllChromosomeBins(CHROMOSOMES *head, BAMFILES *bhead, int smoothBinNum);
51 | #ifdef __cplusplus
52 | }
53 | #endif
54 | #endif /* BAMCOVERAGE_H */
55 | 


--------------------------------------------------------------------------------
/includes/BAMstructs.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   BAMstructs.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on November 28, 2018, 11:58 AM
12 |  */
13 | 
14 | #ifndef BAMSTRUCTS_H
15 | #define BAMSTRUCTS_H
16 | #include "Definitions.h"
17 | 
18 | #ifdef __cplusplus
19 | extern "C" {
20 | #endif
21 |     int CheckIndexShortFile(char *fname);
22 |     int CheckIndexFile(char *fname);
23 |     void DestroyBAMstruct(BAMFILES *head);
24 |     BAMFILES *AddBAMstruct(char *BAMname, BAMFILES *head);
25 |     void PrintBAMstructs(BAMFILES *head);
26 | #ifdef __cplusplus
27 | }
28 | #endif
29 | #endif /* BAMSTRUCTS_H */
30 | 


--------------------------------------------------------------------------------
/includes/BEDstruct.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   BEDstruct.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on December 10, 2018, 8:02 AM
12 |  */
13 | 
14 | #ifndef BEDSTRUCT_H
15 | #define BEDSTRUCT_H
16 | 
17 | #include "Definitions.h"
18 | #include "main.h"
19 | 
20 | #ifdef __cplusplus
21 | extern "C" {
22 | #endif
23 |     char *BEDtoString(char *chr, int start, int end);
24 |     int Read_filter_MultiCov(bam1_t *read, int paired_end);
25 |     void DeleteBEDs(PEAK *head);
26 |     PEAK *CreateBEDentry(void);
27 |     void AllocateReadCovs(PEAK *head, int no_of_samples);
28 |     void AllocateCovs(PEAK *head);
29 |     PEAK *AddBEDentry(PEAK *curr, char *BEDentry, int tid);
30 |     PEAK *ReadBED(char *BEDfilename, int nthreads);
31 |     void GetBEDCoveragesBAM(BAMFILES *bhead, PEAK *beds, int paired_end);
32 |     void *GetBEDFragmentCoveragesBAMmultithread(void * voidA);
33 |     void *GetBEDCoveragesBAMmultithread(void * voidA);
34 |     void MultiCoverage(BAMFILES *bhead, PEAK *head, CMDINPUT *cmd);
35 |     void CalculateFPKM(BAMFILES *bhead, PEAK *head);
36 |     void CalculateLibScaled(BAMFILES *bhead, PEAK *head);
37 |     void CalculateTPM(BAMFILES *bhead, PEAK *head);
38 |     void WriteMultiCovsRaw(BAMFILES *bhead, PEAK *head, int no_of_samples, char *outfile);
39 |     void WriteMultiCovsNormalized(BAMFILES *bhead, PEAK *head, int no_of_samples, char *outfile);
40 | #ifdef __cplusplus
41 | }
42 | #endif
43 | 
44 | #endif /* BEDSTRUCT_H */
45 | 


--------------------------------------------------------------------------------
/includes/CHROMstruct.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   CHROMstruct.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on November 28, 2018, 12:34 PM
12 |  */
13 | 
14 | #ifndef CHROMSTRUCT_H
15 | #define CHROMSTRUCT_H
16 | 
17 | #include "Definitions.h"
18 | #include "main.h"
19 | 
20 | #ifdef __cplusplus
21 | extern "C" {
22 | #endif
23 |     uint32_t *GetChrLens(CHROMOSOMES *head, int no_of_chrs);
24 |     int CountNumberOfChromosomes(CHROMOSOMES *head);
25 |     char **GetChromosomeNames(CHROMOSOMES *head, int no_of_chrs);
26 |     float CalculateGenomeSize(CHROMOSOMES *head);
27 |     CHROMOSOMES *AddCHROMstruct(CHROMOSOMES *head, char *name, int length, int no_of_samples, int threadID);
28 |     void DestroyCHROMstruct(CHROMOSOMES *head, int no_of_samples);
29 |     CHROMOSOMES *ImportChromosomeDataFromBAM(char *bamfile, int no_of_samples, int threads);
30 |     void PrintChromosomes(CHROMOSOMES *head, int no_of_samples);
31 |     void PrintBlacklistedChromosomes(CHROMOSOMES *head, int no_of_samples);
32 |     CHROMOSOMES *ComputeBins(CHROMOSOMES *head, int binSize);
33 |     CHROMOSOMES *AllocateBins(CHROMOSOMES *head, int no_of_samples);
34 |     CHROMOSOMES *BlacklistChromosome(CHROMOSOMES *head, char *name);
35 |     void BlacklistChromosomeFiles(CHROMOSOMES *head, char *filename);
36 |     void DestroyChromCovStruct(CHRCOV *head);
37 |     CHRCOV *CreateChromCovStruct(char *name, int id, int nbins);
38 |     void DestroyRatioStruct(RATIOS *ptr);
39 |     RATIOS *CreateRatioStruct(char *s1, char *s2, int id1, int id2);
40 | #ifdef __cplusplus
41 | }
42 | #endif
43 | #endif /* CHROMSTRUCT_H */
44 | 


--------------------------------------------------------------------------------
/includes/Definitions.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * To change this license header, choose License Headers in Project Properties.
  3 |  * To change this template file, choose Tools | Templates
  4 |  * and open the template in the editor.
  5 |  */
  6 | 
  7 | /* 
  8 |  * File:   Definitions.h
  9 |  * Author: pongorls
 10 |  *
 11 |  * Created on November 28, 2018, 11:56 AM
 12 |  */
 13 | 
 14 | #ifndef DEFINITIONS_H
 15 | #define DEFINITIONS_H
 16 | 
 17 | #include <stdint.h>
 18 | 
 19 | #ifdef __cplusplus
 20 | extern "C" {
 21 | #endif
 22 |     typedef struct segment {
 23 |         int start;
 24 |         int end;
 25 |         int value;
 26 |         
 27 |         struct segment *next;
 28 |         struct segment *prev;
 29 |     } SEGMENTS;
 30 |     
 31 |     
 32 |     typedef struct peak {
 33 |         char *coord;
 34 |         int id;
 35 |         char *chr;
 36 |         char *start_str;
 37 |         char *end_str;
 38 |         int strand;
 39 |         
 40 |         int start;
 41 |         int end;
 42 |         int length;
 43 |         
 44 |         int nbins;
 45 |         int binSize;
 46 |         int noSamples;
 47 |         
 48 |         int tid;
 49 |         
 50 |         int *read_cov;
 51 |         float *normalized;
 52 |         float **cov;
 53 |         struct peak *next;
 54 |     } PEAK;
 55 |     
 56 |     typedef struct chrcov {
 57 |         char *name;
 58 |         int id;
 59 |         int nbins;
 60 |         float *ratio;
 61 |         
 62 |         struct chrcov *next;
 63 |     } CHRCOV;
 64 |     
 65 |     typedef struct ratios {
 66 |         char *sample1;
 67 |         char *sample2;
 68 |         int s1;
 69 |         int s2;
 70 |         
 71 |         CHRCOV *chrcovs;
 72 |         struct ratios *next;
 73 |     } RATIOS;
 74 |     
 75 |     typedef struct BEDcoords {
 76 |         char *coord;
 77 |         char *chr;
 78 |         char *start_str;
 79 |         char *end_str;
 80 |         float *normcov;
 81 |         
 82 |         int start;
 83 |         int end;
 84 |         int summit;
 85 |         int strand;
 86 |         struct BEDcoords *next;
 87 |     } BEDCOORDS;
 88 |     
 89 |     typedef struct BAMfiles{
 90 |         char *name;
 91 |         char *shortname;
 92 |         int id;
 93 |         int read_coverage;
 94 |         int filtered_reads;
 95 |         double base_coverage;
 96 |         float scale;
 97 |         float genome_scale;
 98 |         struct BAMfiles *next;
 99 |     } BAMFILES;
100 |     
101 |     typedef struct chromosomes {
102 |         char *name;
103 |         int id;
104 |         int length;
105 |         int accept;
106 |         int blacklist;
107 |         float  **coverages;
108 |         int *idxreads;
109 |         int numberOfBins;
110 |         int allocated;
111 |         int nonzerobins;
112 |         int tid;
113 |         struct chromosomes *next;
114 |     } CHROMOSOMES;
115 | 
116 |     typedef struct cmdinput {
117 |         char *bedfile;
118 |         int no_of_samples;
119 |         BAMFILES *bamfiles;
120 |         CHROMOSOMES *chr;
121 |         int mapq;
122 |         int removeduplicates;
123 |         int nounproper;
124 |         int remove_unmapped_pair;
125 |         int fragment_count_mode;
126 |         int fragment_size_filter;
127 |         int fragment_size;
128 |         int strand;
129 |         int libtype;
130 |         int threads;
131 |         int min_insert_size;
132 |         int max_insert_size;
133 |         char *outdir;
134 |         char *outprefix;
135 |         char *blacklist_file;
136 |         char *blacklist_bed;
137 |         char **argv;
138 |         int argc;
139 |         int genome_coverage;
140 |         int strandsplit;
141 |         char *custom_scale;
142 |         
143 |         char *normtype;
144 |         char *scale;
145 |         char *operation;
146 |         
147 |         int filtDiffChr;
148 |         int filtInsSize;
149 |         
150 |         int binSize;
151 |         int binSizeChange;
152 |         int smoothBin;
153 |         int smoothBinChange;
154 |         int tracksmooth;
155 |     } CMDINPUT;
156 |     
157 |     typedef struct threads {
158 |         int pid;
159 |         char *chrname;
160 |         char *sample;
161 |         int sample_id;
162 |         int paired_end;
163 |         float scale;
164 |         int binSize;
165 |         int pseudocount;
166 |         int strand;
167 |         CHROMOSOMES *chr;
168 |         PEAK *phead;
169 |         CMDINPUT *cmd;
170 |         BAMFILES *bamfile;
171 |         struct threads *next;
172 |     } THREADS;
173 | #ifdef __cplusplus
174 | }
175 | #endif
176 | 
177 | #endif /* DEFINITIONS_H */
178 | 
179 | 


--------------------------------------------------------------------------------
/includes/Inputs.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   Inputs.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on December 11, 2018, 8:00 AM
12 |  */
13 | 
14 | #ifndef INPUTS_H
15 | #define INPUTS_H
16 | 
17 | #include "Definitions.h"
18 | #include "main.h"
19 | 
20 | #define INPUTS_BASE "base"
21 | #define INPUTS_GENOME "genome"
22 | #define INPUTS_SCALED "scaled"
23 | #define INPUTS_CUSTOM "custom"
24 | #define INPUTS_SCALE "scale"
25 | #define INPUTS_UNSCALED "unscaled"
26 | #define INPUTS_LOG2 "log2"
27 | #define INPUTS_RFD "rfd"
28 | #define INPUTS_END "endseq"
29 | #define INPUTS_ENDR "endseqr"
30 | #define INPUTS_RNA "rna"
31 | #define INPUTS_STRRNA "strandrna"
32 | #define INPUTS_RSTRRNA "strandrnaR"
33 | #define INPUTS_REP "reptime"
34 | #define INPUTS_RATIO "ratio"
35 | #define INPUTS_SUBSTRACT "subtract"
36 | #define INPUTS_READS "reads"
37 | #define INPUTS_SINGLE "single"
38 | #define INPUTS_PAIRED "paired"
39 | #define INPUTS_AUTO "auto"
40 | #define INPUTS_NO "no"
41 | #define INPUTS_SMALLEST "smallest"
42 | #define INPUTS_COV "cov"
43 | 
44 | #ifdef __cplusplus
45 | extern "C" {
46 | #endif
47 |     CMDINPUT *CreateCMDinput(void);
48 |     int ParseCustomScaling(CMDINPUT *cmd, char *scales);
49 |     void PrintScaleMessage(char *pname);
50 |     CMDINPUT *ScaleParser(int argc, char **argv);
51 |     void PrintMultiCovMessage(char *pname);
52 |     CMDINPUT *MultiCovParser(int argc, char **argv);
53 |     void DestroyCMDinput(CMDINPUT *ptr);
54 | #ifdef __cplusplus
55 | }
56 | #endif
57 | 
58 | #endif /* INPUTS_H */
59 | 


--------------------------------------------------------------------------------
/includes/Writer.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   Writer.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on December 19, 2018, 2:27 PM
12 |  */
13 | 
14 | #ifndef WRITER_H
15 | #define WRITER_H
16 | 
17 | #include "Definitions.h"
18 | #include "main.h"
19 | 
20 | #ifdef __cplusplus
21 | extern "C" {
22 | #endif
23 |     char *returnRNAfilename(CMDINPUT *cmd);
24 |     void PrintScaledBigWig(CMDINPUT *cmd, BAMFILES *curr, char *sfile);
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | 
29 | #endif /* WRITER_H */
30 | 


--------------------------------------------------------------------------------
/includes/binning.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   binning.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on November 28, 2018, 5:09 PM
12 |  */
13 | 
14 | #ifndef BINNING_H
15 | #define BINNING_H
16 | #include "Definitions.h"
17 | #include "main.h"
18 | 
19 | #ifdef __cplusplus
20 | extern "C" {
21 | #endif
22 |     float *QuicksmoothenBins(float *carray, int smoothBins, int numBins);
23 |     float *smoothenBins(float **carray, int smoothBins, int numBins);
24 |     float *AddPseudoToZeroCov(float *coverage, int len);
25 |     float *BinCoverage(int *coverage, int chr_len, int binSize, int nbins);
26 | #ifdef __cplusplus
27 | }
28 | #endif
29 | #endif /* BINNING_H */
30 | 


--------------------------------------------------------------------------------
/includes/main.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   main.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on November 28, 2018, 1:21 PM
12 |  */
13 | 
14 | #ifndef MAIN_H
15 | #define MAIN_H
16 | #include "Definitions.h"
17 | 
18 | #ifdef __cplusplus
19 | extern "C" {
20 | #endif
21 |     void FreeAllocatedData(void);
22 |     void ComputeCoverageChIPpeak(CMDINPUT *cmd);
23 |     void NormalizeBAMSrna(CMDINPUT *cmd);
24 |     void NormalizeBAMS(CMDINPUT *cmd);
25 |     void PrintUsage(char *pname);
26 |     int main();
27 | 
28 | 
29 | 
30 | #ifdef __cplusplus
31 | }
32 | #endif
33 | 
34 | #endif /* MAIN_H */
35 | 
36 | 


--------------------------------------------------------------------------------
/includes/multithreads.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   multithreads.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on November 30, 2018, 12:04 PM
12 |  */
13 | 
14 | #ifndef MULTITHREADS_H
15 | #define MULTITHREADS_H
16 | #include "Definitions.h"
17 | #include "main.h"
18 | 
19 | #ifdef __cplusplus
20 | extern "C" {
21 | #endif
22 |     void DestroyThreadStruct(THREADS **head, int no_of_threads);
23 |     THREADS *CreateThreadStruct(char *chrname);
24 |     THREADS *AddElement(THREADS *head, char *chrname);
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 | #endif /* MULTITHREADS_H */
29 | 


--------------------------------------------------------------------------------
/includes/scale.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   scale.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on November 28, 2018, 3:54 PM
12 |  */
13 | 
14 | #ifndef SCALE_H
15 | #define SCALE_H
16 | #include "Definitions.h"
17 | #include "main.h"
18 | 
19 | #ifdef __cplusplus
20 | extern "C" {
21 | #endif
22 |     float *scaleBins(float *carray, float scale, int nbins, float pseudocount);
23 |     void ScaleToSmallest(BAMFILES *head);
24 |     void NoScale(BAMFILES *head);
25 |     void ScaleToGenomeSize(BAMFILES *head, CHROMOSOMES *chead);
26 |     void ScaleGenomeCoverage(BAMFILES *head, CHROMOSOMES *chead);
27 |     BAMFILES *ComputeSamplescales(BAMFILES *head, CHROMOSOMES *chead, int scale);
28 |     float *logTwoCoverageRatio(float *cov1, float *cov2, int nbins, float min_per_bin_cov);
29 |     float *OKseqRFD(float *cov1, float *cov2, int nbins, float min_per_bin_cov);
30 |     float *SubtractCoverage(float *cov1, float *cov2, int nbins, float min_per_bin_cov);
31 |     float *CoverageRatio(float *cov1, float *cov2, int nbins, float min_per_bin_cov);
32 |     float *SignedCoverageRatio(float *cov1, float *cov2, int nbins, int min_per_bin_cov);
33 |     CHRCOV *CalculateChromosomeRatio(CHROMOSOMES *curr, CHRCOV *chead, int s1, int s2, int ratioType, int min_per_bin_cov);
34 |     void PrintBedgraph(RATIOS *ptr, int binSize);
35 |     char *returnChrName(char *input);
36 |     void PrintBedgraphOrdered(RATIOS *ptr, int binSize, char *chromfile);
37 |     void PrintBigWigOrdered(RATIOS *ptr, int binSize, char *chromfile);
38 |     RATIOS *CalculateRatiosAll(RATIOS *head, CHROMOSOMES *chead, BAMFILES *bhead, int no_of_samples, int min_per_bin_cov, int smoothbin, int binSize, char *chromsizes);
39 | #ifdef __cplusplus
40 | }
41 | #endif
42 | #endif /* SCALE_H */
43 | 


--------------------------------------------------------------------------------
/includes/segmenter.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * To change this license header, choose License Headers in Project Properties.
 3 |  * To change this template file, choose Tools | Templates
 4 |  * and open the template in the editor.
 5 |  */
 6 | 
 7 | /* 
 8 |  * File:   segmenter.h
 9 |  * Author: pongorls
10 |  *
11 |  * Created on November 29, 2018, 1:27 PM
12 |  */
13 | 
14 | #ifndef SEGMENTER_H
15 | #define SEGMENTER_H
16 | #include "Definitions.h"
17 | #include "main.h"
18 | 
19 | #ifdef __cplusplus
20 | extern "C" {
21 | #endif
22 |     void DestroySegments(SEGMENTS *head);
23 |     SEGMENTS *createSegment(void);
24 |     int compare_float (const void * a, const void * b);
25 |     int64_t CalculateGenSize(CHROMOSOMES *head);
26 |     int64_t CalculateNonZeroBins(CHROMOSOMES *head, int sampleid);
27 |     float *ConcatenateGenome(CHROMOSOMES *head, int64_t gensize, int sampleid);
28 |     void Segmenting(CHROMOSOMES *head, CMDINPUT *cmd, int sampleid, float upper, float median, float lower);
29 |     void Quantiles(CHROMOSOMES *head, int sampleid, CMDINPUT* cmd);
30 | #ifdef __cplusplus
31 | }
32 | #endif
33 | #endif /* SEGMENTER_H */
34 | 


--------------------------------------------------------------------------------
/nbproject/Makefile-Release.mk:
--------------------------------------------------------------------------------
  1 | #
  2 | # Generated Makefile - do not edit!
  3 | #
  4 | # Edit the Makefile in the project folder instead (../Makefile). Each target
  5 | # has a -pre and a -post target defined where you can add customized code.
  6 | #
  7 | # This makefile implements configuration specific macros and targets.
  8 | 
  9 | 
 10 | # Environment
 11 | MKDIR=mkdir
 12 | CP=cp
 13 | GREP=grep
 14 | NM=nm
 15 | CCADMIN=CCadmin
 16 | RANLIB=ranlib
 17 | CC=gcc
 18 | CCC=g++
 19 | CXX=g++
 20 | FC=gfortran
 21 | AS=as
 22 | 
 23 | # Macros
 24 | CND_PLATFORM=GNU-MacOSX
 25 | CND_DLIB_EXT=dylib
 26 | CND_CONF=Release
 27 | CND_DISTDIR=dist
 28 | CND_BUILDDIR=build
 29 | 
 30 | # Include project Makefile
 31 | include Makefile
 32 | 
 33 | # Object Directory
 34 | OBJECTDIR=${CND_BUILDDIR}/${CND_CONF}/${CND_PLATFORM}
 35 | 
 36 | # Object Files
 37 | OBJECTFILES= \
 38 | 	${OBJECTDIR}/src/BAMcoverage.o \
 39 | 	${OBJECTDIR}/src/BAMstructs.o \
 40 | 	${OBJECTDIR}/src/BEDstruct.o \
 41 | 	${OBJECTDIR}/src/CHROMstruct.o \
 42 | 	${OBJECTDIR}/src/Inputs.o \
 43 | 	${OBJECTDIR}/src/Writer.o \
 44 | 	${OBJECTDIR}/src/binning.o \
 45 | 	${OBJECTDIR}/src/main.o \
 46 | 	${OBJECTDIR}/src/multithreads.o \
 47 | 	${OBJECTDIR}/src/scale.o \
 48 | 	${OBJECTDIR}/src/segmenter.o
 49 | 
 50 | 
 51 | # C Compiler Flags
 52 | CFLAGS=
 53 | 
 54 | # CC Compiler Flags
 55 | CCFLAGS=
 56 | CXXFLAGS=
 57 | 
 58 | # Fortran Compiler Flags
 59 | FFLAGS=
 60 | 
 61 | # Assembler Flags
 62 | ASFLAGS=
 63 | 
 64 | # Link Libraries and Options
 65 | LDLIBSOPTIONS=
 66 | 
 67 | # Build Targets
 68 | .build-conf: ${BUILD_SUBPROJECTS}
 69 | 	"${MAKE}"  -f nbproject/Makefile-${CND_CONF}.mk bin/BAMscale
 70 | 
 71 | bin/BAMscale: ${OBJECTFILES}
 72 | 	${MKDIR} -p bin
 73 | 	${LINK.c} -o bin/BAMscale ${OBJECTFILES} ${LDLIBSOPTIONS} -lBigWig -lhts -lz -lm -lbz2 -llzma -lcurl -ldl -lpthread
 74 | 
 75 | ${OBJECTDIR}/src/BAMcoverage.o: src/BAMcoverage.c
 76 | 	${MKDIR} -p ${OBJECTDIR}/src
 77 | 	${RM} "$@.d"
 78 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/BAMcoverage.o src/BAMcoverage.c
 79 | 
 80 | ${OBJECTDIR}/src/BAMstructs.o: src/BAMstructs.c
 81 | 	${MKDIR} -p ${OBJECTDIR}/src
 82 | 	${RM} "$@.d"
 83 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/BAMstructs.o src/BAMstructs.c
 84 | 
 85 | ${OBJECTDIR}/src/BEDstruct.o: src/BEDstruct.c
 86 | 	${MKDIR} -p ${OBJECTDIR}/src
 87 | 	${RM} "$@.d"
 88 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/BEDstruct.o src/BEDstruct.c
 89 | 
 90 | ${OBJECTDIR}/src/CHROMstruct.o: src/CHROMstruct.c
 91 | 	${MKDIR} -p ${OBJECTDIR}/src
 92 | 	${RM} "$@.d"
 93 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/CHROMstruct.o src/CHROMstruct.c
 94 | 
 95 | ${OBJECTDIR}/src/Inputs.o: src/Inputs.c
 96 | 	${MKDIR} -p ${OBJECTDIR}/src
 97 | 	${RM} "$@.d"
 98 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/Inputs.o src/Inputs.c
 99 | 
100 | ${OBJECTDIR}/src/Writer.o: src/Writer.c
101 | 	${MKDIR} -p ${OBJECTDIR}/src
102 | 	${RM} "$@.d"
103 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/Writer.o src/Writer.c
104 | 
105 | ${OBJECTDIR}/src/binning.o: src/binning.c
106 | 	${MKDIR} -p ${OBJECTDIR}/src
107 | 	${RM} "$@.d"
108 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/binning.o src/binning.c
109 | 
110 | ${OBJECTDIR}/src/main.o: src/main.c
111 | 	${MKDIR} -p ${OBJECTDIR}/src
112 | 	${RM} "$@.d"
113 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/main.o src/main.c
114 | 
115 | ${OBJECTDIR}/src/multithreads.o: src/multithreads.c
116 | 	${MKDIR} -p ${OBJECTDIR}/src
117 | 	${RM} "$@.d"
118 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/multithreads.o src/multithreads.c
119 | 
120 | ${OBJECTDIR}/src/scale.o: src/scale.c
121 | 	${MKDIR} -p ${OBJECTDIR}/src
122 | 	${RM} "$@.d"
123 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/scale.o src/scale.c
124 | 
125 | ${OBJECTDIR}/src/segmenter.o: src/segmenter.c
126 | 	${MKDIR} -p ${OBJECTDIR}/src
127 | 	${RM} "$@.d"
128 | 	$(COMPILE.c) -O2 -Iincludes -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/segmenter.o src/segmenter.c
129 | 
130 | # Subprojects
131 | .build-subprojects:
132 | 
133 | # Clean Targets
134 | .clean-conf: ${CLEAN_SUBPROJECTS}
135 | 	${RM} -r ${CND_BUILDDIR}/${CND_CONF}
136 | 
137 | # Subprojects
138 | .clean-subprojects:
139 | 
140 | # Enable dependency checking
141 | .dep.inc: .depcheck-impl
142 | 
143 | include .dep.inc
144 | 


--------------------------------------------------------------------------------
/nbproject/Makefile-impl.mk:
--------------------------------------------------------------------------------
  1 | # 
  2 | # Generated Makefile - do not edit! 
  3 | # 
  4 | # Edit the Makefile in the project folder instead (../Makefile). Each target
  5 | # has a pre- and a post- target defined where you can add customization code.
  6 | #
  7 | # This makefile implements macros and targets common to all configurations.
  8 | #
  9 | # NOCDDL
 10 | 
 11 | 
 12 | # Building and Cleaning subprojects are done by default, but can be controlled with the SUB
 13 | # macro. If SUB=no, subprojects will not be built or cleaned. The following macro
 14 | # statements set BUILD_SUB-CONF and CLEAN_SUB-CONF to .build-reqprojects-conf
 15 | # and .clean-reqprojects-conf unless SUB has the value 'no'
 16 | SUB_no=NO
 17 | SUBPROJECTS=${SUB_${SUB}}
 18 | BUILD_SUBPROJECTS_=.build-subprojects
 19 | BUILD_SUBPROJECTS_NO=
 20 | BUILD_SUBPROJECTS=${BUILD_SUBPROJECTS_${SUBPROJECTS}}
 21 | CLEAN_SUBPROJECTS_=.clean-subprojects
 22 | CLEAN_SUBPROJECTS_NO=
 23 | CLEAN_SUBPROJECTS=${CLEAN_SUBPROJECTS_${SUBPROJECTS}}
 24 | 
 25 | 
 26 | # Project Name
 27 | PROJECTNAME=BAMscale
 28 | 
 29 | # Active Configuration
 30 | DEFAULTCONF=Release
 31 | CONF=${DEFAULTCONF}
 32 | 
 33 | # All Configurations
 34 | ALLCONFS=Release 
 35 | 
 36 | 
 37 | # build
 38 | .build-impl: .build-pre .validate-impl .depcheck-impl
 39 | 	@#echo "=> Running $@... Configuration=$(CONF)"
 40 | 	"${MAKE}" -f nbproject/Makefile-${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .build-conf
 41 | 
 42 | 
 43 | # clean
 44 | .clean-impl: .clean-pre .validate-impl .depcheck-impl
 45 | 	@#echo "=> Running $@... Configuration=$(CONF)"
 46 | 	"${MAKE}" -f nbproject/Makefile-${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .clean-conf
 47 | 
 48 | 
 49 | # clobber 
 50 | .clobber-impl: .clobber-pre .depcheck-impl
 51 | 	@#echo "=> Running $@..."
 52 | 	for CONF in ${ALLCONFS}; \
 53 | 	do \
 54 | 	    "${MAKE}" -f nbproject/Makefile-$${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .clean-conf; \
 55 | 	done
 56 | 
 57 | # all 
 58 | .all-impl: .all-pre .depcheck-impl
 59 | 	@#echo "=> Running $@..."
 60 | 	for CONF in ${ALLCONFS}; \
 61 | 	do \
 62 | 	    "${MAKE}" -f nbproject/Makefile-$${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .build-conf; \
 63 | 	done
 64 | 
 65 | # build tests
 66 | .build-tests-impl: .build-impl .build-tests-pre
 67 | 	@#echo "=> Running $@... Configuration=$(CONF)"
 68 | 	"${MAKE}" -f nbproject/Makefile-${CONF}.mk SUBPROJECTS=${SUBPROJECTS} .build-tests-conf
 69 | 
 70 | # run tests
 71 | .test-impl: .build-tests-impl .test-pre
 72 | 	@#echo "=> Running $@... Configuration=$(CONF)"
 73 | 	"${MAKE}" -f nbproject/Makefile-${CONF}.mk SUBPROJECTS=${SUBPROJECTS} .test-conf
 74 | 
 75 | # dependency checking support
 76 | .depcheck-impl:
 77 | 	@echo "# This code depends on make tool being used" >.dep.inc
 78 | 	@if [ -n "${MAKE_VERSION}" ]; then \
 79 | 	    echo "DEPFILES=\$$(wildcard \$$(addsuffix .d, \$${OBJECTFILES} \$${TESTOBJECTFILES}))" >>.dep.inc; \
 80 | 	    echo "ifneq (\$${DEPFILES},)" >>.dep.inc; \
 81 | 	    echo "include \$${DEPFILES}" >>.dep.inc; \
 82 | 	    echo "endif" >>.dep.inc; \
 83 | 	else \
 84 | 	    echo ".KEEP_STATE:" >>.dep.inc; \
 85 | 	    echo ".KEEP_STATE_FILE:.make.state.\$${CONF}" >>.dep.inc; \
 86 | 	fi
 87 | 
 88 | # configuration validation
 89 | .validate-impl:
 90 | 	@if [ ! -f nbproject/Makefile-${CONF}.mk ]; \
 91 | 	then \
 92 | 	    echo ""; \
 93 | 	    echo "Error: can not find the makefile for configuration '${CONF}' in project ${PROJECTNAME}"; \
 94 | 	    echo "See 'make help' for details."; \
 95 | 	    echo "Current directory: " `pwd`; \
 96 | 	    echo ""; \
 97 | 	fi
 98 | 	@if [ ! -f nbproject/Makefile-${CONF}.mk ]; \
 99 | 	then \
100 | 	    exit 1; \
101 | 	fi
102 | 
103 | 
104 | # help
105 | .help-impl: .help-pre
106 | 	@echo "This makefile supports the following configurations:"
107 | 	@echo "    ${ALLCONFS}"
108 | 	@echo ""
109 | 	@echo "and the following targets:"
110 | 	@echo "    build  (default target)"
111 | 	@echo "    clean"
112 | 	@echo "    clobber"
113 | 	@echo "    all"
114 | 	@echo "    help"
115 | 	@echo ""
116 | 	@echo "Makefile Usage:"
117 | 	@echo "    make [CONF=<CONFIGURATION>] [SUB=no] build"
118 | 	@echo "    make [CONF=<CONFIGURATION>] [SUB=no] clean"
119 | 	@echo "    make [SUB=no] clobber"
120 | 	@echo "    make [SUB=no] all"
121 | 	@echo "    make help"
122 | 	@echo ""
123 | 	@echo "Target 'build' will build a specific configuration and, unless 'SUB=no',"
124 | 	@echo "    also build subprojects."
125 | 	@echo "Target 'clean' will clean a specific configuration and, unless 'SUB=no',"
126 | 	@echo "    also clean subprojects."
127 | 	@echo "Target 'clobber' will remove all built files from all configurations and,"
128 | 	@echo "    unless 'SUB=no', also from subprojects."
129 | 	@echo "Target 'all' will will build all configurations and, unless 'SUB=no',"
130 | 	@echo "    also build subprojects."
131 | 	@echo "Target 'help' prints this message."
132 | 	@echo ""
133 | 
134 | 


--------------------------------------------------------------------------------
/nbproject/Makefile-variables.mk:
--------------------------------------------------------------------------------
 1 | #
 2 | # Generated - do not edit!
 3 | #
 4 | # NOCDDL
 5 | #
 6 | CND_BASEDIR=`pwd`
 7 | CND_BUILDDIR=build
 8 | CND_DISTDIR=dist
 9 | # Release configuration
10 | CND_PLATFORM_Release=GNU-MacOSX
11 | CND_ARTIFACT_DIR_Release=bin
12 | CND_ARTIFACT_NAME_Release=BAMscale
13 | CND_ARTIFACT_PATH_Release=bin/BAMscale
14 | CND_PACKAGE_DIR_Release=dist/Release/GNU-MacOSX/package
15 | CND_PACKAGE_NAME_Release=bamscale.tar
16 | CND_PACKAGE_PATH_Release=dist/Release/GNU-MacOSX/package/bamscale.tar
17 | #
18 | # include compiler specific variables
19 | #
20 | # dmake command
21 | ROOT:sh = test -f nbproject/private/Makefile-variables.mk || \
22 | 	(mkdir -p nbproject/private && touch nbproject/private/Makefile-variables.mk)
23 | #
24 | # gmake command
25 | .PHONY: $(shell test -f nbproject/private/Makefile-variables.mk || (mkdir -p nbproject/private && touch nbproject/private/Makefile-variables.mk))
26 | #
27 | include nbproject/private/Makefile-variables.mk
28 | 


--------------------------------------------------------------------------------
/nbproject/Package-Release.bash:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | 
 3 | #
 4 | # Generated - do not edit!
 5 | #
 6 | 
 7 | # Macros
 8 | TOP=`pwd`
 9 | CND_PLATFORM=GNU-MacOSX
10 | CND_CONF=Release
11 | CND_DISTDIR=dist
12 | CND_BUILDDIR=build
13 | CND_DLIB_EXT=dylib
14 | NBTMPDIR=${CND_BUILDDIR}/${CND_CONF}/${CND_PLATFORM}/tmp-packaging
15 | TMPDIRNAME=tmp-packaging
16 | OUTPUT_PATH=bin/BAMscale
17 | OUTPUT_BASENAME=BAMscale
18 | PACKAGE_TOP_DIR=bamscale/
19 | 
20 | # Functions
21 | function checkReturnCode
22 | {
23 |     rc=$?
24 |     if [ $rc != 0 ]
25 |     then
26 |         exit $rc
27 |     fi
28 | }
29 | function makeDirectory
30 | # $1 directory path
31 | # $2 permission (optional)
32 | {
33 |     mkdir -p "$1"
34 |     checkReturnCode
35 |     if [ "$2" != "" ]
36 |     then
37 |       chmod $2 "$1"
38 |       checkReturnCode
39 |     fi
40 | }
41 | function copyFileToTmpDir
42 | # $1 from-file path
43 | # $2 to-file path
44 | # $3 permission
45 | {
46 |     cp "$1" "$2"
47 |     checkReturnCode
48 |     if [ "$3" != "" ]
49 |     then
50 |         chmod $3 "$2"
51 |         checkReturnCode
52 |     fi
53 | }
54 | 
55 | # Setup
56 | cd "${TOP}"
57 | mkdir -p ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package
58 | rm -rf ${NBTMPDIR}
59 | mkdir -p ${NBTMPDIR}
60 | 
61 | # Copy files and create directories and links
62 | cd "${TOP}"
63 | makeDirectory "${NBTMPDIR}/bamscale/bin"
64 | copyFileToTmpDir "${OUTPUT_PATH}" "${NBTMPDIR}/${PACKAGE_TOP_DIR}bin/${OUTPUT_BASENAME}" 0755
65 | 
66 | 
67 | # Generate tar file
68 | cd "${TOP}"
69 | rm -f ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package/bamscale.tar
70 | cd ${NBTMPDIR}
71 | tar -vcf ../../../../${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package/bamscale.tar *
72 | checkReturnCode
73 | 
74 | # Cleanup
75 | cd "${TOP}"
76 | rm -rf ${NBTMPDIR}
77 | 


--------------------------------------------------------------------------------
/nbproject/configurations.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <configurationDescriptor version="100">
  3 |   <logicalFolder name="root" displayName="root" projectFiles="true" kind="ROOT">
  4 |     <logicalFolder name="HeaderFiles"
  5 |                    displayName="Header Files"
  6 |                    projectFiles="true">
  7 |       <itemPath>includes/BAMcoverage.h</itemPath>
  8 |       <itemPath>includes/BAMstructs.h</itemPath>
  9 |       <itemPath>includes/BEDstruct.h</itemPath>
 10 |       <itemPath>includes/CHROMstruct.h</itemPath>
 11 |       <itemPath>includes/Definitions.h</itemPath>
 12 |       <itemPath>includes/Inputs.h</itemPath>
 13 |       <itemPath>includes/Writer.h</itemPath>
 14 |       <itemPath>includes/binning.h</itemPath>
 15 |       <itemPath>includes/main.h</itemPath>
 16 |       <itemPath>includes/multithreads.h</itemPath>
 17 |       <itemPath>includes/scale.h</itemPath>
 18 |       <itemPath>includes/segmenter.h</itemPath>
 19 |     </logicalFolder>
 20 |     <logicalFolder name="ResourceFiles"
 21 |                    displayName="Resource Files"
 22 |                    projectFiles="true">
 23 |     </logicalFolder>
 24 |     <logicalFolder name="SourceFiles"
 25 |                    displayName="Source Files"
 26 |                    projectFiles="true">
 27 |       <itemPath>src/BAMcoverage.c</itemPath>
 28 |       <itemPath>src/BAMstructs.c</itemPath>
 29 |       <itemPath>src/BEDstruct.c</itemPath>
 30 |       <itemPath>src/CHROMstruct.c</itemPath>
 31 |       <itemPath>src/Inputs.c</itemPath>
 32 |       <itemPath>src/Writer.c</itemPath>
 33 |       <itemPath>src/binning.c</itemPath>
 34 |       <itemPath>src/main.c</itemPath>
 35 |       <itemPath>src/multithreads.c</itemPath>
 36 |       <itemPath>src/scale.c</itemPath>
 37 |       <itemPath>src/segmenter.c</itemPath>
 38 |     </logicalFolder>
 39 |     <logicalFolder name="TestFiles"
 40 |                    displayName="Test Files"
 41 |                    projectFiles="false"
 42 |                    kind="TEST_LOGICAL_FOLDER">
 43 |     </logicalFolder>
 44 |     <logicalFolder name="ExternalFiles"
 45 |                    displayName="Important Files"
 46 |                    projectFiles="false"
 47 |                    kind="IMPORTANT_FILES_FOLDER">
 48 |       <itemPath>Makefile</itemPath>
 49 |     </logicalFolder>
 50 |     <itemPath>.gitignore</itemPath>
 51 |     <itemPath>Dockerfile</itemPath>
 52 |     <itemPath>LICENSE</itemPath>
 53 |     <itemPath>README.md</itemPath>
 54 |   </logicalFolder>
 55 |   <sourceRootList>
 56 |     <Elem>src</Elem>
 57 |   </sourceRootList>
 58 |   <projectmakefile>Makefile</projectmakefile>
 59 |   <confs>
 60 |     <conf name="Release" type="1">
 61 |       <toolsSet>
 62 |         <compilerSet>default</compilerSet>
 63 |         <dependencyChecking>true</dependencyChecking>
 64 |         <rebuildPropChanged>false</rebuildPropChanged>
 65 |       </toolsSet>
 66 |       <compileType>
 67 |         <cTool>
 68 |           <developmentMode>5</developmentMode>
 69 |           <incDir>
 70 |             <pElem>includes</pElem>
 71 |           </incDir>
 72 |         </cTool>
 73 |         <ccTool>
 74 |           <developmentMode>5</developmentMode>
 75 |           <standard>11</standard>
 76 |         </ccTool>
 77 |         <fortranCompilerTool>
 78 |           <developmentMode>5</developmentMode>
 79 |         </fortranCompilerTool>
 80 |         <asmTool>
 81 |           <developmentMode>5</developmentMode>
 82 |         </asmTool>
 83 |         <linkerTool>
 84 |           <output>bin/BAMscale</output>
 85 |           <linkerDepDynSerchKind>3</linkerDepDynSerchKind>
 86 |           <commandLine>-lBigWig -lhts -lz -lm -lbz2 -llzma -lcurl -ldl -lpthread</commandLine>
 87 |         </linkerTool>
 88 |       </compileType>
 89 |       <item path=".gitignore" ex="false" tool="3" flavor2="0">
 90 |       </item>
 91 |       <item path="Dockerfile" ex="false" tool="3" flavor2="0">
 92 |       </item>
 93 |       <item path="LICENSE" ex="false" tool="3" flavor2="0">
 94 |       </item>
 95 |       <item path="README.md" ex="false" tool="3" flavor2="0">
 96 |       </item>
 97 |       <item path="includes/BAMcoverage.h" ex="false" tool="3" flavor2="0">
 98 |       </item>
 99 |       <item path="includes/BAMstructs.h" ex="false" tool="3" flavor2="0">
100 |       </item>
101 |       <item path="includes/BEDstruct.h" ex="false" tool="3" flavor2="0">
102 |       </item>
103 |       <item path="includes/CHROMstruct.h" ex="false" tool="3" flavor2="0">
104 |       </item>
105 |       <item path="includes/Definitions.h" ex="false" tool="3" flavor2="0">
106 |       </item>
107 |       <item path="includes/Inputs.h" ex="false" tool="3" flavor2="0">
108 |       </item>
109 |       <item path="includes/Writer.h" ex="false" tool="3" flavor2="0">
110 |       </item>
111 |       <item path="includes/binning.h" ex="false" tool="3" flavor2="0">
112 |       </item>
113 |       <item path="includes/main.h" ex="false" tool="3" flavor2="0">
114 |       </item>
115 |       <item path="includes/multithreads.h" ex="false" tool="3" flavor2="0">
116 |       </item>
117 |       <item path="includes/scale.h" ex="false" tool="3" flavor2="0">
118 |       </item>
119 |       <item path="includes/segmenter.h" ex="false" tool="3" flavor2="0">
120 |       </item>
121 |       <item path="src/BAMcoverage.c" ex="false" tool="0" flavor2="0">
122 |       </item>
123 |       <item path="src/BAMstructs.c" ex="false" tool="0" flavor2="0">
124 |       </item>
125 |       <item path="src/BEDstruct.c" ex="false" tool="0" flavor2="0">
126 |       </item>
127 |       <item path="src/CHROMstruct.c" ex="false" tool="0" flavor2="0">
128 |       </item>
129 |       <item path="src/Inputs.c" ex="false" tool="0" flavor2="0">
130 |       </item>
131 |       <item path="src/Writer.c" ex="false" tool="0" flavor2="0">
132 |       </item>
133 |       <item path="src/binning.c" ex="false" tool="0" flavor2="0">
134 |       </item>
135 |       <item path="src/main.c" ex="false" tool="0" flavor2="0">
136 |       </item>
137 |       <item path="src/multithreads.c" ex="false" tool="0" flavor2="0">
138 |       </item>
139 |       <item path="src/scale.c" ex="false" tool="0" flavor2="0">
140 |       </item>
141 |       <item path="src/segmenter.c" ex="false" tool="0" flavor2="0">
142 |       </item>
143 |     </conf>
144 |   </confs>
145 | </configurationDescriptor>
146 | 


--------------------------------------------------------------------------------
/nbproject/private/Makefile-variables.mk:
--------------------------------------------------------------------------------
1 | #
2 | # Generated - do not edit!
3 | #
4 | # NOCDDL
5 | #
6 | # Release configuration
7 | 


--------------------------------------------------------------------------------
/nbproject/private/c_standard_headers_indexer.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 3 |  *
 4 |  * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved.
 5 |  *
 6 |  * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
 7 |  * Other names may be trademarks of their respective owners.
 8 |  *
 9 |  * The contents of this file are subject to the terms of either the GNU
10 |  * General Public License Version 2 only ("GPL") or the Common
11 |  * Development and Distribution License("CDDL") (collectively, the
12 |  * "License"). You may not use this file except in compliance with the
13 |  * License. You can obtain a copy of the License at
14 |  * http://www.netbeans.org/cddl-gplv2.html
15 |  * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
16 |  * specific language governing permissions and limitations under the
17 |  * License.  When distributing the software, include this License Header
18 |  * Notice in each file and include the License file at
19 |  * nbbuild/licenses/CDDL-GPL-2-CP.  Oracle designates this
20 |  * particular file as subject to the "Classpath" exception as provided
21 |  * by Oracle in the GPL Version 2 section of the License file that
22 |  * accompanied this code. If applicable, add the following below the
23 |  * License Header, with the fields enclosed by brackets [] replaced by
24 |  * your own identifying information:
25 |  * "Portions Copyrighted [year] [name of copyright owner]"
26 |  *
27 |  * If you wish your version of this file to be governed by only the CDDL
28 |  * or only the GPL Version 2, indicate your decision by adding
29 |  * "[Contributor] elects to include this software in this distribution
30 |  * under the [CDDL or GPL Version 2] license." If you do not indicate a
31 |  * single choice of license, a recipient has the option to distribute
32 |  * your version of this file under either the CDDL, the GPL Version 2 or
33 |  * to extend the choice of license to its licensees as provided above.
34 |  * However, if you add GPL Version 2 code and therefore, elected the GPL
35 |  * Version 2 license, then the option applies only if the new code is
36 |  * made subject to such option by the copyright holder.
37 |  *
38 |  * Contributor(s):
39 |  */
40 | 
41 | // List of standard headers was taken in http://en.cppreference.com/w/c/header
42 | 
43 | #include <assert.h> 	 // Conditionally compiled macro that compares its argument to zero
44 | #include <ctype.h> 	 // Functions to determine the type contained in character data
45 | #include <errno.h> 	 // Macros reporting error conditions
46 | #include <float.h> 	 // Limits of float types
47 | #include <limits.h> 	 // Sizes of basic types
48 | #include <locale.h> 	 // Localization utilities
49 | #include <math.h> 	 // Common mathematics functions
50 | #include <setjmp.h> 	 // Nonlocal jumps
51 | #include <signal.h> 	 // Signal handling
52 | #include <stdarg.h> 	 // Variable arguments
53 | #include <stddef.h> 	 // Common macro definitions
54 | #include <stdio.h> 	 // Input/output
55 | #include <string.h> 	 // String handling
56 | #include <stdlib.h> 	 // General utilities: memory management, program utilities, string conversions, random numbers
57 | #include <time.h> 	 // Time/date utilities
58 | #include <iso646.h>      // (since C95) Alternative operator spellings
59 | #include <wchar.h>       // (since C95) Extended multibyte and wide character utilities
60 | #include <wctype.h>      // (since C95) Wide character classification and mapping utilities
61 | #ifdef _STDC_C99
62 | #include <complex.h>     // (since C99) Complex number arithmetic
63 | #include <fenv.h>        // (since C99) Floating-point environment
64 | #include <inttypes.h>    // (since C99) Format conversion of integer types
65 | #include <stdbool.h>     // (since C99) Boolean type
66 | #include <stdint.h>      // (since C99) Fixed-width integer types
67 | #include <tgmath.h>      // (since C99) Type-generic math (macros wrapping math.h and complex.h)
68 | #endif
69 | #ifdef _STDC_C11
70 | #include <stdalign.h>    // (since C11) alignas and alignof convenience macros
71 | #include <stdatomic.h>   // (since C11) Atomic types
72 | #include <stdnoreturn.h> // (since C11) noreturn convenience macros
73 | #include <threads.h>     // (since C11) Thread library
74 | #include <uchar.h>       // (since C11) UTF-16 and UTF-32 character utilities
75 | #endif
76 | 


--------------------------------------------------------------------------------
/nbproject/private/configurations.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <configurationDescriptor version="100">
 3 |   <projectmakefile>Makefile</projectmakefile>
 4 |   <confs>
 5 |     <conf name="Release" type="1">
 6 |       <toolsSet>
 7 |         <developmentServer>localhost</developmentServer>
 8 |         <platform>4</platform>
 9 |       </toolsSet>
10 |       <dbx_gdbdebugger version="1">
11 |         <gdb_pathmaps>
12 |         </gdb_pathmaps>
13 |         <gdb_interceptlist>
14 |           <gdbinterceptoptions gdb_all="false" gdb_unhandled="true" gdb_unexpected="true"/>
15 |         </gdb_interceptlist>
16 |         <gdb_options>
17 |           <DebugOptions>
18 |           </DebugOptions>
19 |         </gdb_options>
20 |         <gdb_buildfirst gdb_buildfirst_overriden="false" gdb_buildfirst_old="false"/>
21 |       </dbx_gdbdebugger>
22 |       <nativedebugger version="1">
23 |         <engine>gdb</engine>
24 |       </nativedebugger>
25 |       <runprofile version="9">
26 |         <runcommandpicklist>
27 |           <runcommandpicklistitem>"${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -r unscaled</runcommandpicklistitem>
28 |           <runcommandpicklistitem>"${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -r unscaled -z 15</runcommandpicklistitem>
29 |           <runcommandpicklistitem>"${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -r unscaled -z 25</runcommandpicklistitem>
30 |           <runcommandpicklistitem>"${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -k no-z 25</runcommandpicklistitem>
31 |           <runcommandpicklistitem>"${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -k no -z 25</runcommandpicklistitem>
32 |           <runcommandpicklistitem>"${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -k no -r unscaled -z 25</runcommandpicklistitem>
33 |           <runcommandpicklistitem>"${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -k no -r unscaled -z 15</runcommandpicklistitem>
34 |           <runcommandpicklistitem>"${OUTPUT_PATH}" scale --bam tmp.bam -t 8 -k no -r unscaled</runcommandpicklistitem>
35 |           <runcommandpicklistitem>"${OUTPUT_PATH}" scale --bam /Users/pongorls/locus.bam</runcommandpicklistitem>
36 |           <runcommandpicklistitem>"${OUTPUT_PATH}" scale --bam /Volumes/LMP/ngs/chip/SCLC_cell_lines/alignments/NCI-H69_H3K27ac_rep1.hg19_clean.bam</runcommandpicklistitem>
37 |         </runcommandpicklist>
38 |         <runcommand>"${OUTPUT_PATH}" scale --bam /Volumes/LMP/ngs/chip/SCLC_cell_lines/alignments/NCI-H69_H3K27ac_rep1.hg19_clean.bam</runcommand>
39 |         <rundir></rundir>
40 |         <buildfirst>true</buildfirst>
41 |         <terminal-type>0</terminal-type>
42 |         <remove-instrumentation>0</remove-instrumentation>
43 |         <environment>
44 |         </environment>
45 |       </runprofile>
46 |     </conf>
47 |   </confs>
48 | </configurationDescriptor>
49 | 


--------------------------------------------------------------------------------
/nbproject/private/cpp_standard_headers_indexer.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
  3 |  *
  4 |  * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved.
  5 |  *
  6 |  * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
  7 |  * Other names may be trademarks of their respective owners.
  8 |  *
  9 |  * The contents of this file are subject to the terms of either the GNU
 10 |  * General Public License Version 2 only ("GPL") or the Common
 11 |  * Development and Distribution License("CDDL") (collectively, the
 12 |  * "License"). You may not use this file except in compliance with the
 13 |  * License. You can obtain a copy of the License at
 14 |  * http://www.netbeans.org/cddl-gplv2.html
 15 |  * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
 16 |  * specific language governing permissions and limitations under the
 17 |  * License.  When distributing the software, include this License Header
 18 |  * Notice in each file and include the License file at
 19 |  * nbbuild/licenses/CDDL-GPL-2-CP.  Oracle designates this
 20 |  * particular file as subject to the "Classpath" exception as provided
 21 |  * by Oracle in the GPL Version 2 section of the License file that
 22 |  * accompanied this code. If applicable, add the following below the
 23 |  * License Header, with the fields enclosed by brackets [] replaced by
 24 |  * your own identifying information:
 25 |  * "Portions Copyrighted [year] [name of copyright owner]"
 26 |  *
 27 |  * If you wish your version of this file to be governed by only the CDDL
 28 |  * or only the GPL Version 2, indicate your decision by adding
 29 |  * "[Contributor] elects to include this software in this distribution
 30 |  * under the [CDDL or GPL Version 2] license." If you do not indicate a
 31 |  * single choice of license, a recipient has the option to distribute
 32 |  * your version of this file under either the CDDL, the GPL Version 2 or
 33 |  * to extend the choice of license to its licensees as provided above.
 34 |  * However, if you add GPL Version 2 code and therefore, elected the GPL
 35 |  * Version 2 license, then the option applies only if the new code is
 36 |  * made subject to such option by the copyright holder.
 37 |  *
 38 |  * Contributor(s):
 39 |  */
 40 | 
 41 | // List of standard headers was taken in http://en.cppreference.com/w/cpp/header
 42 | 
 43 | #include <cstdlib> 	    // General purpose utilities: program control, dynamic memory allocation, random numbers, sort and search
 44 | #include <csignal> 	    // Functions and macro constants for signal management
 45 | #include <csetjmp> 	    // Macro (and function) that saves (and jumps) to an execution context
 46 | #include <cstdarg> 	    // Handling of variable length argument lists
 47 | #include <typeinfo> 	    // Runtime type information utilities
 48 | #include <bitset> 	    // std::bitset class template
 49 | #include <functional> 	    // Function objects, designed for use with the standard algorithms
 50 | #include <utility> 	    // Various utility components
 51 | #include <ctime> 	    // C-style time/date utilites
 52 | #include <cstddef> 	    // typedefs for types such as size_t, NULL and others
 53 | #include <new> 	            // Low-level memory management utilities
 54 | #include <memory> 	    // Higher level memory management utilities
 55 | #include <climits>          // limits of integral types
 56 | #include <cfloat> 	    // limits of float types
 57 | #include <limits> 	    // standardized way to query properties of arithmetic types
 58 | #include <exception> 	    // Exception handling utilities
 59 | #include <stdexcept> 	    // Standard exception objects
 60 | #include <cassert> 	    // Conditionally compiled macro that compares its argument to zero
 61 | #include <cerrno>           // Macro containing the last error number
 62 | #include <cctype>           // functions to determine the type contained in character data
 63 | #include <cwctype>          // functions for determining the type of wide character data
 64 | #include <cstring> 	    // various narrow character string handling functions
 65 | #include <cwchar> 	    // various wide and multibyte string handling functions
 66 | #include <string> 	    // std::basic_string class template
 67 | #include <vector> 	    // std::vector container
 68 | #include <deque> 	    // std::deque container
 69 | #include <list> 	    // std::list container
 70 | #include <set> 	            // std::set and std::multiset associative containers
 71 | #include <map> 	            // std::map and std::multimap associative containers
 72 | #include <stack> 	    // std::stack container adaptor
 73 | #include <queue> 	    // std::queue and std::priority_queue container adaptors
 74 | #include <algorithm> 	    // Algorithms that operate on containers
 75 | #include <iterator> 	    // Container iterators
 76 | #include <cmath>            // Common mathematics functions
 77 | #include <complex>          // Complex number type
 78 | #include <valarray>         // Class for representing and manipulating arrays of values
 79 | #include <numeric>          // Numeric operations on values in containers
 80 | #include <iosfwd>           // forward declarations of all classes in the input/output library
 81 | #include <ios>              // std::ios_base class, std::basic_ios class template and several typedefs
 82 | #include <istream>          // std::basic_istream class template and several typedefs
 83 | #include <ostream>          // std::basic_ostream, std::basic_iostream class templates and several typedefs
 84 | #include <iostream>         // several standard stream objects
 85 | #include <fstream>          // std::basic_fstream, std::basic_ifstream, std::basic_ofstream class templates and several typedefs
 86 | #include <sstream>          // std::basic_stringstream, std::basic_istringstream, std::basic_ostringstream class templates and several typedefs
 87 | #include <strstream>        // std::strstream, std::istrstream, std::ostrstream(deprecated)
 88 | #include <iomanip>          // Helper functions to control the format or input and output
 89 | #include <streambuf>        // std::basic_streambuf class template
 90 | #include <cstdio>           // C-style input-output functions
 91 | #include <locale>           // Localization utilities
 92 | #include <clocale>          // C localization utilities
 93 | #include <ciso646>          // empty header. The macros that appear in iso646.h in C are keywords in C++
 94 | #if __cplusplus >= 201103L
 95 | #include <typeindex>        // (since C++11) 	std::type_index
 96 | #include <type_traits>      // (since C++11) 	Compile-time type information
 97 | #include <chrono>           // (since C++11) 	C++ time utilites
 98 | #include <initializer_list> // (since C++11) 	std::initializer_list class template
 99 | #include <tuple>            // (since C++11) 	std::tuple class template
100 | #include <scoped_allocator> // (since C++11) 	Nested allocator class
101 | #include <cstdint>          // (since C++11) 	fixed-size types and limits of other types
102 | #include <cinttypes>        // (since C++11) 	formatting macros , intmax_t and uintmax_t math and conversions
103 | #include <system_error>     // (since C++11) 	defines std::error_code, a platform-dependent error code
104 | #include <cuchar>           // (since C++11) 	C-style Unicode character conversion functions
105 | #include <array>            // (since C++11) 	std::array container
106 | #include <forward_list>     // (since C++11) 	std::forward_list container
107 | #include <unordered_set>    // (since C++11) 	std::unordered_set and std::unordered_multiset unordered associative containers
108 | #include <unordered_map>    // (since C++11) 	std::unordered_map and std::unordered_multimap unordered associative containers
109 | #include <random>           // (since C++11) 	Random number generators and distributions
110 | #include <ratio>            // (since C++11) 	Compile-time rational arithmetic
111 | #include <cfenv>            // (since C++11) 	Floating-point environment access functions
112 | #include <codecvt>          // (since C++11) 	Unicode conversion facilities
113 | #include <regex>            // (since C++11) 	Classes, algorithms and iterators to support regular expression processing
114 | #include <atomic>           // (since C++11) 	Atomic operations library
115 | #include <ccomplex>         // (since C++11)(deprecated in C++17) 	simply includes the header <complex>
116 | #include <ctgmath>          // (since C++11)(deprecated in C++17) 	simply includes the headers <ccomplex> (until C++17)<complex> (since C++17) and <cmath>: the overloads equivalent to the contents of the C header tgmath.h are already provided by those headers
117 | #include <cstdalign>        // (since C++11)(deprecated in C++17) 	defines one compatibility macro constant
118 | #include <cstdbool>         // (since C++11)(deprecated in C++17) 	defines one compatibility macro constant
119 | #include <thread>           // (since C++11) 	std::thread class and supporting functions
120 | #include <mutex>            // (since C++11) 	mutual exclusion primitives
121 | #include <future>           // (since C++11) 	primitives for asynchronous computations
122 | #include <condition_variable> // (since C++11) 	thread waiting conditions
123 | #endif
124 | #if __cplusplus >= 201300L
125 | #include <shared_mutex>     // (since C++14) 	shared mutual exclusion primitives
126 | #endif
127 | #if __cplusplus >= 201500L
128 | #include <any>              // (since C++17) 	std::any class template
129 | #include <optional>         // (since C++17) 	std::optional class template
130 | #include <variant>          // (since C++17) 	std::variant class template
131 | #include <memory_resource>  // (since C++17) 	Polymorphic allocators and memory resources
132 | #include <string_view>      // (since C++17) 	std::basic_string_view class template
133 | #include <execution>        // (since C++17) 	Predefined execution policies for parallel versions of the algorithms
134 | #include <filesystem>       // (since C++17) 	std::path class and supporting functions
135 | #endif
136 | 


--------------------------------------------------------------------------------
/nbproject/private/launcher.properties:
--------------------------------------------------------------------------------
 1 | # Launchers File syntax:
 2 | #
 3 | # [Must-have property line] 
 4 | # launcher1.runCommand=<Run Command>
 5 | # [Optional extra properties] 
 6 | # launcher1.displayName=<Display Name, runCommand by default>
 7 | # launcher1.hide=<true if lancher is not visible in menu, false by default>
 8 | # launcher1.buildCommand=<Build Command, Build Command specified in project properties by default>
 9 | # launcher1.runDir=<Run Directory, ${PROJECT_DIR} by default>
10 | # launcher1.runInOwnTab=<false if launcher reuse common "Run" output tab, true by default>
11 | # launcher1.symbolFiles=<Symbol Files loaded by debugger, ${OUTPUT_PATH} by default>
12 | # launcher1.env.<Environment variable KEY>=<Environment variable VALUE>
13 | # (If this value is quoted with ` it is handled as a native command which execution result will become the value)
14 | # [Common launcher properties]
15 | # common.runDir=<Run Directory>
16 | # (This value is overwritten by a launcher specific runDir value if the latter exists)
17 | # common.env.<Environment variable KEY>=<Environment variable VALUE>
18 | # (Environment variables from common launcher are merged with launcher specific variables)
19 | # common.symbolFiles=<Symbol Files loaded by debugger>
20 | # (This value is overwritten by a launcher specific symbolFiles value if the latter exists)
21 | #
22 | # In runDir, symbolFiles and env fields you can use these macroses:
23 | # ${PROJECT_DIR}    -   project directory absolute path
24 | # ${OUTPUT_PATH}    -   linker output path (relative to project directory path)
25 | # ${OUTPUT_BASENAME}-   linker output filename
26 | # ${TESTDIR}        -   test files directory (relative to project directory path)
27 | # ${OBJECTDIR}      -   object files directory (relative to project directory path)
28 | # ${CND_DISTDIR}    -   distribution directory (relative to project directory path)
29 | # ${CND_BUILDDIR}   -   build directory (relative to project directory path)
30 | # ${CND_PLATFORM}   -   platform name
31 | # ${CND_CONF}       -   configuration name
32 | # ${CND_DLIB_EXT}   -   dynamic library extension
33 | #
34 | # All the project launchers must be listed in the file!
35 | #
36 | # launcher1.runCommand=...
37 | # launcher2.runCommand=...
38 | # ...
39 | # common.runDir=...
40 | # common.env.KEY=VALUE
41 | 
42 | # launcher1.runCommand=<type your run command here>


--------------------------------------------------------------------------------
/nbproject/private/private.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project-private xmlns="http://www.netbeans.org/ns/project-private/1">
 3 |     <data xmlns="http://www.netbeans.org/ns/make-project-private/1">
 4 |         <activeConfTypeElem>1</activeConfTypeElem>
 5 |         <activeConfIndexElem>0</activeConfIndexElem>
 6 |     </data>
 7 |     <editor-bookmarks xmlns="http://www.netbeans.org/ns/editor-bookmarks/2" lastBookmarkId="0"/>
 8 |     <open-files xmlns="http://www.netbeans.org/ns/projectui-open-files/2">
 9 |         <group>
10 |             <file>file:/Users/pongorls/NetBeansProjects/BAMscale/BAMscale/src/Writer.c</file>
11 |             <file>file:/Users/pongorls/NetBeansProjects/BAMscale/BAMscale/src/Inputs.c</file>
12 |             <file>file:/Users/pongorls/NetBeansProjects/BAMscale/BAMscale/includes/BAMstructs.h</file>
13 |             <file>file:/Users/pongorls/NetBeansProjects/BAMscale/BAMscale/src/BAMstructs.c</file>
14 |         </group>
15 |     </open-files>
16 | </project-private>
17 | 


--------------------------------------------------------------------------------
/nbproject/project.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project xmlns="http://www.netbeans.org/ns/project/1">
 3 |     <type>org.netbeans.modules.cnd.makeproject</type>
 4 |     <configuration>
 5 |         <data xmlns="http://www.netbeans.org/ns/make-project/1">
 6 |             <name>BAMscale</name>
 7 |             <c-extensions>c</c-extensions>
 8 |             <cpp-extensions/>
 9 |             <header-extensions>h</header-extensions>
10 |             <sourceEncoding>UTF-8</sourceEncoding>
11 |             <make-dep-projects/>
12 |             <sourceRootList>
13 |                 <sourceRootElem>src</sourceRootElem>
14 |             </sourceRootList>
15 |             <confList>
16 |                 <confElem>
17 |                     <name>Release</name>
18 |                     <type>1</type>
19 |                 </confElem>
20 |             </confList>
21 |             <formatting>
22 |                 <project-formatting-style>false</project-formatting-style>
23 |             </formatting>
24 |         </data>
25 |     </configuration>
26 | </project>
27 | 


--------------------------------------------------------------------------------
/src/BAMstructs.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * To change this license header, choose License Headers in Project Properties.
  3 |  * To change this template file, choose Tools | Templates
  4 |  * and open the template in the editor.
  5 |  */
  6 | 
  7 | /* 
  8 |  * File:   BAMstructs.c
  9 |  * Author: pongorls
 10 |  * 
 11 |  * Created on November 28, 2018, 11:58 AM
 12 |  */
 13 | 
 14 | #include <stdio.h>
 15 | #include <stdlib.h>
 16 | #include <string.h>
 17 | #include <htslib/sam.h>
 18 | #include <errno.h>
 19 | #include <fcntl.h>
 20 | #include <sys/stat.h>
 21 | #include <sys/types.h>
 22 | #include <unistd.h>
 23 | 
 24 | #include "Definitions.h"
 25 | #include "BAMstructs.h"
 26 | 
 27 | int CheckIndexShortFile(char *fname) {
 28 |     if(fname == NULL)
 29 |         return 0;
 30 |     
 31 |     char *idx = (char *)calloc(strlen(fname) + 1, sizeof(char));
 32 |     strcpy(idx, fname);
 33 |     idx[strlen(idx)-1] = 'i';
 34 |     
 35 |     printf("%s\n", idx);
 36 |     
 37 |     if(access( idx, F_OK ) == -1) {
 38 |         if(idx)
 39 |             free(idx);
 40 |         
 41 |         return 0;
 42 |         
 43 |     }
 44 |     
 45 |     if(idx)
 46 |         free(idx);
 47 |     
 48 |     return 1;
 49 | }
 50 | 
 51 | int CheckIndexFile(char *fname) {
 52 |     if(fname == NULL)
 53 |         return 0;
 54 |     
 55 |     char *idx = (char *)calloc(strlen(fname) + 5, sizeof(char));
 56 |     strcpy(idx, fname);
 57 |     strcat(idx, ".bai");
 58 |     
 59 |     if(access( idx, F_OK ) == -1) {
 60 |         if(idx)
 61 |             free(idx);
 62 |         return CheckIndexShortFile(fname);
 63 |     }
 64 |     
 65 |     if(idx)
 66 |         free(idx);
 67 |     
 68 |     return 1;
 69 | }
 70 | 
 71 | void DestroyBAMstruct(BAMFILES *head) {
 72 |     BAMFILES *curr = head;
 73 | 
 74 |     while (head != NULL) {
 75 |         curr = head;
 76 |         head = head->next;
 77 | 
 78 |         if (curr->name)
 79 |             free(curr->name);
 80 | 
 81 |         if (curr)
 82 |             free(curr);
 83 |     }
 84 | 
 85 |     head = NULL;
 86 |     curr = NULL;
 87 | }
 88 | 
 89 | BAMFILES *AddBAMstruct(char *BAMname, BAMFILES *head) {
 90 |     BAMFILES *ptr = (BAMFILES *) calloc (1, sizeof (BAMFILES));
 91 |     BAMFILES *curr = head;
 92 |     ptr->name = NULL;
 93 |     ptr->shortname = NULL;
 94 |     ptr->read_coverage = -1;
 95 |     ptr->scale = 1;
 96 |     ptr->next = NULL;
 97 |     ptr->filtered_reads = 0;
 98 |     ptr->base_coverage = 0;
 99 |     ptr->genome_scale = 0.0;
100 | 
101 |     ptr->name = strdup(BAMname);
102 |     char *p = strrchr(BAMname, '/');
103 |         
104 |     if(p) {
105 |         p++;
106 |         ptr->shortname = strdup(p);
107 |     }
108 |     
109 |     else {
110 |         ptr->shortname = strdup(BAMname);
111 |     }
112 |     
113 |     if (head == NULL) {
114 |         ptr->id = 0;
115 |         head = ptr;
116 |     } else {
117 |         while (curr->next != NULL) {
118 |             curr = curr->next;
119 |         }
120 | 
121 |         curr->next = ptr;
122 |         ptr->id = curr->id + 1;
123 |     }
124 | 
125 |     return head;
126 | }
127 | 
128 | void PrintBAMstructs(BAMFILES *head) {
129 |     BAMFILES *curr = head;
130 | 
131 |     while (curr != NULL) {
132 |         printf("File: %s\n\tID:%d\n\tNo. of reads: %d\n\tScale: %f\n", curr->name, curr->id, curr->read_coverage, curr->scale);
133 |         curr = curr->next;
134 |     }
135 | }
136 | 


--------------------------------------------------------------------------------
/src/CHROMstruct.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * To change this license header, choose License Headers in Project Properties.
  3 |  * To change this template file, choose Tools | Templates
  4 |  * and open the template in the editor.
  5 |  */
  6 | 
  7 | /* 
  8 |  * File:   CHROMstruct.c
  9 |  * Author: pongorls
 10 |  * 
 11 |  * Created on November 28, 2018, 12:34 PM
 12 |  */
 13 | #include <stdio.h>
 14 | #include <stdlib.h>
 15 | #include <string.h>
 16 | #include <htslib/sam.h>
 17 | 
 18 | #include "Definitions.h"
 19 | #include "CHROMstruct.h"
 20 | #include "main.h"
 21 | 
 22 | uint32_t *GetChrLens(CHROMOSOMES *head, int no_of_chrs) {
 23 |     uint32_t *chrlens = (uint32_t *) malloc(sizeof (uint32_t) * no_of_chrs);
 24 |     CHROMOSOMES *curr = head;
 25 |     int i = 0;
 26 | 
 27 |     while (curr != NULL) {
 28 |         chrlens[i] = (uint32_t) curr->length;
 29 |         i++;
 30 |         curr = curr->next;
 31 |     }
 32 | 
 33 |     return chrlens;
 34 | }
 35 | 
 36 | int CountNumberOfChromosomes(CHROMOSOMES *head) {
 37 |     CHROMOSOMES *curr = head;
 38 |     int i = 0;
 39 | 
 40 |     while (curr != NULL) {
 41 |         i++;
 42 |         curr = curr->next;
 43 |     }
 44 | 
 45 |     return i;
 46 | }
 47 | 
 48 | char **GetChromosomeNames(CHROMOSOMES *head, int no_of_chrs) {
 49 |     char **chrnames = NULL;
 50 |     int i = 0;
 51 |     CHROMOSOMES *curr = head;
 52 | 
 53 |     if (no_of_chrs < 1)
 54 |         return NULL;
 55 | 
 56 |     chrnames = (char **) calloc(no_of_chrs+1, sizeof (char *));
 57 | 
 58 |     while (curr != NULL) {
 59 |         if (i < no_of_chrs) {
 60 |             chrnames[i] = strdup(curr->name);
 61 |             i++;
 62 |         } else {
 63 |             fprintf(stderr, "WARNINGS: disregarding %s, there are more chromosomes than specified?\n", curr->name);
 64 |         }
 65 | 
 66 |         curr = curr->next;
 67 |     }
 68 | 
 69 |     return chrnames;
 70 | }
 71 | 
 72 | float CalculateGenomeSize(CHROMOSOMES *head) {
 73 |     float genome_size = 0;
 74 | 
 75 |     while (head != NULL) {
 76 |         if (head->blacklist == 0 && head->length > 0)
 77 |             genome_size += (float) head->length;
 78 | 
 79 |         head = head->next;
 80 |     }
 81 | 
 82 |     return genome_size;
 83 | }
 84 | 
 85 | /**
 86 |  * Creates new CHROMOSOME structure at end of HEAD.
 87 |  * @param <b>head</b> is the CHROMOSOME linked list head, <b>name</b> is the chr name
 88 |  * @return <b>HEAD</b> pointer to the CHROMOSOME structure head
 89 |  */
 90 | CHROMOSOMES *AddCHROMstruct(CHROMOSOMES *head, char *name, int length, int no_of_samples, int threadID) {
 91 |     CHROMOSOMES *ptr = (CHROMOSOMES *) malloc(sizeof (CHROMOSOMES));
 92 |     CHROMOSOMES *curr = head;
 93 | 
 94 |     ptr->blacklist = 0;
 95 |     ptr->length = length;
 96 |     ptr->numberOfBins = -1;
 97 |     ptr->id = -1;
 98 |     ptr->allocated = 0;
 99 | 
100 |     ptr->name = NULL;
101 |     ptr->next = NULL;
102 |     ptr->coverages = NULL;
103 |     ptr->tid = threadID;
104 |     ptr->name = strdup(name);
105 |     ptr->idxreads = (int *) malloc(no_of_samples * sizeof (int));
106 |     
107 |     for(int i =0 ; i < no_of_samples; i++){
108 |         ptr->idxreads[i] = 0;
109 |     }
110 | 
111 |     if (head == NULL) {
112 |         ptr->id = 0;
113 |         head = ptr;
114 |     } else {
115 |         while (curr->next != NULL) {
116 |             curr = curr->next;
117 |         }
118 | 
119 |         curr->next = ptr;
120 |         ptr->id = curr->id + 1;
121 |     }
122 | 
123 |     return head;
124 | }
125 | 
126 | /*
127 |  * 
128 |  */
129 | void DestroyCHROMstruct(CHROMOSOMES *head, int no_of_samples) {
130 |     CHROMOSOMES *curr = head;
131 |     int i = 0;
132 | 
133 |     while (head != NULL) {
134 |         curr = head;
135 |         head = head->next;
136 |         curr->next = NULL;
137 | 
138 |         if (curr->coverages) {
139 |             for (i = 0; i < no_of_samples; i++) {
140 |                 if (curr->coverages[i]){
141 |                     free(curr->coverages[i]);
142 |                 }
143 |             }
144 |             free(curr->coverages);
145 |         }
146 | 
147 |         if (curr->idxreads)
148 |             free(curr->idxreads);
149 | 
150 |         if (curr->name)
151 |             free(curr->name);
152 | 
153 |         if (curr)
154 |             free(curr);
155 |     }
156 | }
157 | 
158 | CHROMOSOMES *ImportChromosomeDataFromBAM(char *bamfile, int no_of_samples, int threads) {
159 |     CHROMOSOMES *head = NULL;
160 |     samFile *fp_in = hts_open(bamfile, "r");
161 |     bam_hdr_t *hdr = sam_hdr_read(fp_in);
162 |     int i = 0;
163 |     int j = 0;
164 | 
165 |     for (i = 0; i < hdr->n_targets; i++) {
166 |         head = AddCHROMstruct(head, hdr->target_name[i], (int) hdr->target_len[i], no_of_samples, j);
167 | 
168 |         j++;
169 | 
170 |         if (j >= threads) {
171 |             j = 0;
172 |         }
173 |     }
174 | 
175 |     bam_hdr_destroy(hdr);
176 |     sam_close(fp_in);
177 |     return head;
178 | }
179 | 
180 | void PrintChromosomes(CHROMOSOMES *head, int no_of_samples) {
181 |     int i;
182 |     CHROMOSOMES *curr = head;
183 | 
184 |     while (curr != NULL) {
185 |         printf("Name: %s\n", curr->name);
186 |         printf("\tID: %d\n", curr->id);
187 |         printf("\tLength: %d\n", curr->length);
188 |         printf("\tBlacklisted: %d\n", curr->blacklist);
189 |         printf("\tNumber of bins: %d\n", curr->numberOfBins);
190 |         printf("\tAllocated sample bins: %d\n", curr->allocated);
191 |         printf("\tThread ID: %d\n", curr->tid);
192 | 
193 |         if (curr->idxreads) {
194 |             for (i = 0; i < no_of_samples; i++)
195 |                 printf("\t\t[ %d ] = %d\n", i, curr->idxreads[i]);
196 |         }
197 |         curr = curr->next;
198 |     }
199 | }
200 | 
201 | void PrintBlacklistedChromosomes(CHROMOSOMES *head, int no_of_samples) {
202 |     int i;
203 |     CHROMOSOMES *curr = head;
204 | 
205 |     while (curr != NULL) {
206 |         if (curr->blacklist == 1) {
207 |             printf("Name: %s\n", curr->name);
208 |             printf("\tID: %d\n", curr->id);
209 |             printf("\tLength: %d\n", curr->length);
210 |             printf("\tBlacklisted: %d\n", curr->blacklist);
211 |             printf("\tNumber of bins: %d\n", curr->numberOfBins);
212 |             printf("\tAllocated sample bins: %d\n", curr->allocated);
213 | 
214 |             if (curr->idxreads) {
215 |                 for (i = 0; i < no_of_samples; i++)
216 |                     printf("\t\t[ %d ] = %d\n", i, curr->idxreads[i]);
217 |             }
218 |         }
219 |         curr = curr->next;
220 |     }
221 | }
222 | 
223 | CHROMOSOMES *ComputeBins(CHROMOSOMES *head, int binSize) {
224 |     CHROMOSOMES *curr = head;
225 | 
226 |     while (curr != NULL) {
227 |         curr->numberOfBins = curr->length / binSize;
228 |         curr = curr->next;
229 |     }
230 | 
231 |     return head;
232 | }
233 | 
234 | CHROMOSOMES *AllocateBins(CHROMOSOMES *head, int no_of_samples) {
235 |     CHROMOSOMES *curr = head;
236 |     int i = 0;
237 | 
238 |     if (no_of_samples <= 0) {
239 |         printf("ERROR: no samples were specified??");
240 |         FreeAllocatedData();
241 |         exit(0);
242 |     }
243 | 
244 |     while (curr != NULL) {
245 |         if (curr->numberOfBins > -1 && curr->blacklist == 0) {
246 |             curr->coverages = (float **) calloc(no_of_samples+1, sizeof (float *));
247 | 
248 |             if (curr->coverages == NULL) {
249 |                 printf("ERROR: could not allocate memory for bins at chr: %s\n", curr->name);
250 |                 FreeAllocatedData();
251 |                 exit(0);
252 |             }
253 | 
254 |             for (i = 0; i < no_of_samples; i++) {
255 |                 curr->allocated++;
256 |             }
257 |         } else {
258 |             curr->coverages = NULL;
259 |         }
260 | 
261 |         curr = curr->next;
262 |     }
263 | 
264 |     return head;
265 | }
266 | 
267 | CHROMOSOMES *BlacklistChromosome(CHROMOSOMES *head, char *name) {
268 |     CHROMOSOMES *curr = head;
269 |     int found = 0;
270 | 
271 |     while (curr != NULL) {
272 |         if (strcmp(name, curr->name) == 0) {
273 |             curr->blacklist = 1;
274 |             curr->tid = -1;
275 |             found++;
276 |         }
277 | 
278 |         curr = curr->next;
279 |     }
280 | 
281 |     if (found == 0) {
282 |         printf("WARNING: \"%s\" chromosome not found and could not be blacklisted\n", name);
283 |     }
284 | 
285 |     return head;
286 | }
287 | 
288 | void BlacklistChromosomeFiles(CHROMOSOMES *head, char *filename) {
289 |     FILE *handler = fopen(filename, "r");
290 |     char line[BUFSIZ];
291 |     char *pos;
292 | 
293 |     while (fgets(line, sizeof (line), handler)) {
294 |         if ((pos = strchr(line, '\n')) != NULL)
295 |             *pos = '\0';
296 | 
297 |         head = BlacklistChromosome(head, line);
298 |     }
299 | 
300 |     fclose(handler);
301 | }
302 | 
303 | void DestroyChromCovStruct(CHRCOV *head) {
304 |     CHRCOV *curr = head;
305 | 
306 |     while (head != NULL) {
307 |         curr = head;
308 |         head = head->next;
309 | 
310 |         if (curr->name)
311 |             free(curr->name);
312 | 
313 |         if (curr->ratio)
314 |             free(curr->ratio);
315 | 
316 |         if (curr)
317 |             free(curr);
318 |     }
319 | }
320 | 
321 | CHRCOV *CreateChromCovStruct(char *name, int id, int nbins) {
322 |     CHRCOV *ptr = (CHRCOV *) malloc(sizeof (CHRCOV));
323 | 
324 |     ptr->name = strdup(name);
325 | 
326 |     ptr->id = id;
327 |     ptr->nbins = nbins;
328 | 
329 |     ptr->next = NULL;
330 |     ptr->ratio = NULL;
331 | 
332 |     return ptr;
333 | }
334 | 
335 | void DestroyRatioStruct(RATIOS *ptr) {
336 |     if (ptr->sample1)
337 |         free(ptr->sample1);
338 | 
339 |     if (ptr->sample2)
340 |         free(ptr->sample2);
341 | 
342 |     if (ptr)
343 |         free(ptr);
344 | 
345 |     if (ptr->chrcovs)
346 |         DestroyChromCovStruct(ptr->chrcovs);
347 | 
348 |     ptr = NULL;
349 | }
350 | 
351 | RATIOS *CreateRatioStruct(char *s1, char *s2, int id1, int id2) {
352 |     RATIOS *ptr = (RATIOS *) malloc(sizeof (RATIOS));
353 |     ptr->chrcovs = NULL;
354 |     ptr->s1 = id1;
355 |     ptr->s2 = id2;
356 | 
357 |     ptr->sample1 = strdup(s1);
358 | 
359 |     ptr->sample2 = strdup(s2);
360 | 
361 |     return ptr;
362 | }


--------------------------------------------------------------------------------
/src/Writer.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * To change this license header, choose License Headers in Project Properties.
  3 |  * To change this template file, choose Tools | Templates
  4 |  * and open the template in the editor.
  5 |  */
  6 | 
  7 | /* 
  8 |  * File:   Writer.c
  9 |  * Author: pongorls
 10 |  * 
 11 |  * Created on December 19, 2018, 2:27 PM
 12 |  */
 13 | 
 14 | #include <stdio.h>
 15 | #include <stdlib.h>
 16 | #include <string.h>
 17 | #include <math.h>
 18 | #include <libgen.h>
 19 | 
 20 | #include <bigWig.h>
 21 | 
 22 | #include "Writer.h"
 23 | #include "Definitions.h"
 24 | #include "main.h"
 25 | #include "scale.h"
 26 | #include "CHROMstruct.h"
 27 | #include "segmenter.h"
 28 | #include "binning.h"
 29 | #include "Inputs.h"
 30 | 
 31 | char *returnRNAfilename(CMDINPUT *cmd) {
 32 |     int fnamelen = 0;
 33 |     char *outfile = NULL;
 34 |     
 35 |     if (cmd->outdir != NULL)
 36 |         fnamelen += strlen(cmd->outdir);
 37 | 
 38 |     fnamelen += strlen(cmd->bamfiles->shortname);
 39 | 
 40 |     if(cmd->strandsplit == 1)
 41 |         fnamelen += strlen(".positive");
 42 | 
 43 |     fnamelen += 50;
 44 | 
 45 |     outfile = (char *) calloc((fnamelen*2 + 1), sizeof (char));
 46 | 
 47 |     if (cmd->outdir != NULL)
 48 |         strcpy(outfile, cmd->outdir);
 49 | 
 50 |     else
 51 |         strcpy(outfile, "./");
 52 | 
 53 |     strcat(outfile, "/");
 54 | 
 55 |     strcat(outfile, cmd->bamfiles->shortname);
 56 |         
 57 |     if(cmd->strandsplit == 1) {
 58 |         if(cmd->strand == 1)
 59 |             strcat(outfile, ".positive");
 60 |             
 61 |         if(cmd->strand == -1)
 62 |             strcat(outfile, ".negative");
 63 |     }
 64 |     
 65 |     strcat(outfile, ".");
 66 |     strcat(outfile, cmd->operation);
 67 |     
 68 |     strcat(outfile, ".bw");
 69 |     
 70 |     return outfile;
 71 | }
 72 | 
 73 | void PrintScaledBigWig(CMDINPUT *cmd, BAMFILES *curr, char *sfile) {
 74 |     char **chrnames = NULL;
 75 |     uint32_t *chrlens = NULL;
 76 |     uint32_t start = 0;
 77 |     int no_of_chrs = CountNumberOfChromosomes(cmd->chr);
 78 |     char *outfile = NULL;
 79 |     int fnamelen = 0;
 80 |     bigWigFile_t *fp = NULL;
 81 |     int i, j = 0;
 82 |     CHROMOSOMES *chr = cmd->chr;
 83 |     float *intervals = NULL;
 84 |     int blocksize = 25;
 85 |     int end, currblocksize, non_empty = 0;
 86 |     
 87 |     chrnames = GetChromosomeNames(cmd->chr, no_of_chrs);
 88 |     chrlens = GetChrLens(cmd->chr, no_of_chrs);
 89 | 
 90 |     if (cmd->outdir != NULL)
 91 |         fnamelen += strlen(cmd->outdir);
 92 | 
 93 |     fnamelen += strlen(curr->shortname);
 94 | 
 95 |     if(cmd->strandsplit == 1)
 96 |         fnamelen += strlen(".positive");
 97 |     
 98 |     if (sfile != NULL)
 99 |         fnamelen += strlen(sfile);
100 | 
101 |     fnamelen += 50;
102 | 
103 |     outfile = (char *) calloc((fnamelen*2 + 1), sizeof (char));
104 | 
105 |     if (cmd->outdir != NULL)
106 |         strcpy(outfile, cmd->outdir);
107 | 
108 |     else
109 |         strcpy(outfile, "./");
110 | 
111 |     strcat(outfile, "/");
112 | 
113 |     if (sfile == NULL) {
114 |         strcat(outfile, curr->shortname);
115 |         
116 |         if(cmd->strandsplit == 1) {
117 |             if(cmd->strand == 1)
118 |                 strcat(outfile, ".positive");
119 |             
120 |             if(cmd->strand == -1)
121 |                 strcat(outfile, ".negative");
122 |         }
123 |     }
124 |     else {
125 |         strcat(outfile, curr->shortname);
126 |         
127 |         if(strcmp(cmd->operation, INPUTS_END) != 0 && strcmp(cmd->operation, INPUTS_ENDR) != 0 && strcmp(cmd->operation, INPUTS_RFD) != 0) {
128 |             strcat(outfile, "_vs_");
129 |             strcat(outfile, sfile);
130 |         }
131 |         
132 |         if(strcmp(cmd->operation, INPUTS_END) == 0 || strcmp(cmd->operation, INPUTS_ENDR) == 0) {
133 |             strcat(outfile, ".log2");
134 |         }
135 |     }
136 | 
137 |     strcat(outfile, ".");
138 |     strcat(outfile, cmd->operation);
139 |     
140 |     strcat(outfile, ".bw");
141 | 
142 |     fp = bwOpen(outfile, NULL, "w");
143 |     bwCreateHdr(fp, cmd->binSize);
144 |     fp->cl = bwCreateChromList(chrnames, chrlens, no_of_chrs);
145 |     bwWriteHdr(fp);
146 | 
147 |     while (chr != NULL) {       
148 |         if (chr->blacklist == 0 && chr->length > cmd->binSize) { //&& strcmp(chr->name, "chr1") == 0
149 |             if (chr->length > 10000000)
150 |                 printf("Writing: %s\n", chr->name);
151 |             
152 |             int startwrite = 0;
153 |             
154 |             if(cmd->strand == -1 && strcmp(cmd->operation, INPUTS_ENDR) == 0) {
155 |                 for(i = 0; i <= chr->numberOfBins - 1; i++) {
156 |                     if(intervals[i] > 0)
157 |                         intervals[i] = -intervals[i]; 
158 |                 }
159 |             }
160 |             
161 |             //bwAddIntervalSpanSteps(fp, chr->name, start, (uint32_t)cmd->binSize, (uint32_t)cmd->binSize, chr->coverages[curr->id], (uint32_t)chr->numberOfBins-1);
162 |                         
163 |             for(int i = 0; i < chr->numberOfBins - 1; i = i + blocksize) {
164 |                 start = (uint32_t)(i * cmd->binSize);
165 |                 end = i + blocksize;
166 |                 currblocksize = blocksize;
167 |                 non_empty = 0;
168 |                 
169 |                 if(end > chr->numberOfBins - 1) {
170 |                     end  = chr->numberOfBins - 1;
171 |                     currblocksize = end - i;
172 |                 }
173 |                 
174 |                 for(j = i; j < end; j++) {
175 |                     if(chr->coverages[curr->id][j] != 0) {
176 |                         non_empty = 1;
177 |                     }
178 |                 }
179 |                 
180 |                 if(non_empty == 1) {
181 |                     if(startwrite == 0) {
182 |                         bwAddIntervalSpanSteps(fp, chr->name, start, (uint32_t)cmd->binSize, (uint32_t)cmd->binSize, chr->coverages[curr->id] + i, (uint32_t)currblocksize);
183 |                     } else {
184 |                         bwAppendIntervalSpanSteps(fp, chr->coverages[curr->id] + i, (uint32_t)currblocksize);
185 |                     }
186 |                     startwrite++;
187 |                 } else {
188 |                     startwrite = 0;
189 |                 }
190 |             }
191 |             
192 |             if(cmd->strand == -1 && strcmp(cmd->operation, INPUTS_ENDR) == 0) {
193 |                 for(i = 0; i <= chr->numberOfBins - 1; i++) {
194 |                     if(intervals[i] < 0) {
195 |                         intervals[i] = -intervals[i];
196 |                     }
197 |                 }
198 |             }
199 |         }
200 | 
201 |         chr = chr->next;
202 |     }
203 |     
204 |     bwClose(fp);
205 |     bwCleanup();
206 | 
207 |     if (chrnames) {
208 |         for (i = 0; i < no_of_chrs; i++) {
209 |             if (chrnames[i])
210 |                 free(chrnames[i]);
211 |         }
212 |         free(chrnames);
213 |     }
214 |     if (chrlens)
215 |         free(chrlens);
216 | 
217 |     if (outfile)
218 |         free(outfile);
219 | }


--------------------------------------------------------------------------------
/src/binning.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * To change this license header, choose License Headers in Project Properties.
  3 |  * To change this template file, choose Tools | Templates
  4 |  * and open the template in the editor.
  5 |  */
  6 | 
  7 | /* 
  8 |  * File:   binning.c
  9 |  * Author: pongorls
 10 |  * 
 11 |  * Created on November 28, 2018, 5:09 PM
 12 |  */
 13 | #include <stdio.h>
 14 | #include <stdlib.h>
 15 | #include <string.h>
 16 | #include <math.h>
 17 | #include <unistd.h>
 18 | #include <htslib/sam.h>
 19 | 
 20 | #include "BAMcoverage.h"
 21 | #include "main.h"
 22 | #include "CHROMstruct.h"
 23 | #include "BAMstructs.h"
 24 | #include "binning.h"
 25 | 
 26 | float *QuicksmoothenBins(float *carray, int smoothBins, int numBins) {
 27 |     float *smoothed = (float *)calloc(numBins + 1, sizeof(float));
 28 |     int i = 0;
 29 |     int j = 0;
 30 |     int binstart = 0;
 31 |     int binend = 0;
 32 |     float binmean = 0;
 33 |     int runsum_state = 0;
 34 |     float runsum = -1;
 35 |     
 36 |     for(i = 0; i < numBins; i++) {
 37 |         binmean = 0;
 38 |         binstart = i - smoothBins;
 39 |         binend = i + smoothBins;
 40 |         runsum_state = 1;
 41 |         
 42 |         if(binstart < 1) {
 43 |             if(binstart < 0)
 44 |                 binstart = 0;
 45 |             
 46 |             runsum_state = 0;
 47 |         }
 48 |         
 49 |         if(binend >= numBins) {
 50 |             binend = numBins - 1;
 51 |             runsum_state = 0;
 52 |         }
 53 |         
 54 |         if(binend - binstart > 0) {           
 55 |             if(runsum_state == 0) {
 56 |                 for(j = binstart; j <= binend; j++) {
 57 |                     binmean += carray[j];
 58 |                 }
 59 |             }
 60 |             
 61 |             else {
 62 |                 binmean = runsum + carray[binend] - carray[binstart - 1];
 63 |             }
 64 |             
 65 |             runsum = binmean;
 66 |             
 67 |             if(binmean != 0) {
 68 |                 binmean = binmean / (float)(binend - binstart);
 69 |             }
 70 |             
 71 |             smoothed[i] = binmean;
 72 |         }
 73 |     }
 74 |     
 75 |     if(carray)
 76 |         free(carray);
 77 |     
 78 |     return smoothed;
 79 | }
 80 | 
 81 | 
 82 | float *smoothenBins(float **carray, int smoothBins, int numBins) {
 83 |     float *smoothed = (float *) calloc(numBins, sizeof (float));
 84 |     int i = 0;
 85 |     int j = 0;
 86 |     int binstart = 0;
 87 |     int binend = 0;
 88 |     float binmean = 0;
 89 | 
 90 |     for (i = 0; i < numBins; i++) {
 91 |         binmean = 0;
 92 |         binstart = i - smoothBins;
 93 |         binend = i + smoothBins;
 94 | 
 95 |         if (binstart < 0)
 96 |             binstart = 0;
 97 | 
 98 |         if (binend >= numBins)
 99 |             binend = numBins - 1;
100 | 
101 |         if (binend - binstart > 0) {
102 |             for (j = binstart; j <= binend; j++) {
103 |                 binmean += (*carray)[j];
104 |             }
105 | 
106 |             if (binmean != 0) {
107 |                 binmean = binmean / (float) (binend - binstart);
108 |             }
109 | 
110 |             smoothed[i] = binmean;
111 |         } else {
112 |             smoothed[i] = 0;
113 |         }
114 |     }
115 | 
116 |     if (*carray)
117 |         free(*carray);
118 | 
119 |     return smoothed;
120 | }
121 | 
122 | float *AddPseudoToZeroCov(float *coverage, int len) {
123 |     int i = 0;
124 | 
125 |     for (i = 0; i < len; i++) {
126 |         coverage[i] += 1;
127 |     }
128 | 
129 |     return coverage;
130 | }
131 | 
132 | float *BinCoverage(int *coverage, int chr_len, int binSize, int nbins) {
133 |     float *bincov = (float *) calloc(nbins+1, sizeof (float));
134 |     int i = 0;
135 |     int j = 0;
136 |     int binstart = 0;
137 |     int binend = 0;
138 |     
139 |     for (i = 0; i < nbins; i++) {
140 |         if (i == 0) {
141 |             binstart = 0;
142 |             binend = binSize;
143 |         } else {
144 |             binstart = i*binSize;
145 |             binend = (i + 1) * binSize;
146 |         }
147 | 
148 |         if (binstart >= chr_len) {
149 |             binstart = chr_len-1;
150 |         }
151 | 
152 |         if (binend >= chr_len) {
153 |             binend = chr_len-1;
154 |         }
155 | 
156 |         for (j = binstart; j < binend; j++) {
157 |             bincov[i] += (float) coverage[j];
158 |         }
159 | 
160 |         if(bincov[i] > 0)
161 |             bincov[i] = bincov[i] / ((float) (binend - binstart));
162 |     }
163 | 
164 |     return bincov;
165 | }


--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * File:   main.c
  3 |  * Author: pongorls
  4 |  *
  5 |  * Created on November 28, 2018, 11:55 AM
  6 |  */
  7 | 
  8 | #include <stdio.h>
  9 | #include <stdlib.h>
 10 | #include <string.h>
 11 | #include <math.h>
 12 | #include <unistd.h>
 13 | #include <pthread.h>
 14 | #include <ctype.h>
 15 | #include <getopt.h>
 16 | #include <inttypes.h>
 17 | 
 18 | #include <bigWig.h>
 19 | 
 20 | #include "main.h"
 21 | #include "Definitions.h"
 22 | #include "BAMstructs.h"
 23 | #include "CHROMstruct.h"
 24 | #include "BAMcoverage.h"
 25 | #include "scale.h"
 26 | #include "segmenter.h"
 27 | #include "multithreads.h"
 28 | #include "BEDstruct.h"
 29 | #include "Inputs.h"
 30 | #include "Writer.h"
 31 | #include <htslib/sam.h>
 32 | 
 33 | BAMFILES *BAMhead = NULL;
 34 | BAMFILES *BAMcurr = NULL;
 35 | 
 36 | CHROMOSOMES *CHROMhead = NULL;
 37 | CHROMOSOMES *CHROMcurr = NULL;
 38 | 
 39 | CHRCOV *CCOVhead = NULL;
 40 | CHRCOV *CCOVcurr = NULL;
 41 | 
 42 | RATIOS *rhead = NULL;
 43 | RATIOS *rcurr = NULL;
 44 | 
 45 | int no_of_samples;
 46 | int bamcoverage = 0; //0: quick (index), 1: count all reads
 47 | 
 48 | void FreeAllocatedData(void) {
 49 |     DestroyBAMstruct(BAMhead);
 50 |     DestroyCHROMstruct(CHROMhead, no_of_samples);
 51 |     //DestroyRatioStruct(rhead);
 52 | }
 53 | 
 54 | void ComputeCoverageChIPpeak(CMDINPUT *cmd) {
 55 |     PEAK *head = NULL;
 56 |     BAMFILES *curr = NULL;
 57 |     char *ofile = NULL;
 58 |     int ofile_len = 0;
 59 | 
 60 |     CHROMhead = ImportChromosomeDataFromBAM(cmd->bamfiles->name, cmd->no_of_samples, cmd->threads);
 61 | 
 62 |     if (cmd->libtype == -1) {
 63 |         fprintf(stderr, "Detecting library type\n");
 64 |         cmd->libtype = DetectLibraryType(cmd->bamfiles);
 65 | 
 66 |         if (cmd->libtype == 0) {
 67 |             fprintf(stderr, "\tLibrary seems single-end\n");
 68 | 
 69 |             if (cmd->fragment_count_mode == 1 && cmd->fragment_size == 0) {
 70 |                 fprintf(stderr, "ERROR: fragment mode counting is enable, library is single-end, but fragment size is set to 0.");
 71 |                 fprintf(stderr, "WARNING: Please re-run program without enabling fragment-counting mode, or set fragment size");
 72 |                 PrintMultiCovMessage(cmd->argv[0]);
 73 |                 return;
 74 |             }
 75 |         }
 76 |         else
 77 |             fprintf(stderr, "\tLibrary seems paired-end\n");
 78 |     }
 79 | 
 80 |     if (cmd->blacklist_file)
 81 |         BlacklistChromosomeFiles(CHROMhead, cmd->blacklist_file);
 82 | 
 83 |     PrintBlacklistedChromosomes(CHROMhead, cmd->no_of_samples);
 84 | 
 85 |     if (cmd->genome_coverage == 1) {
 86 |         MultiGenomeReadCoverage(cmd, CHROMhead);
 87 |     }
 88 |     else {
 89 |         fprintf(stderr, "\nComputing coverage from the idx of BAM files\n");
 90 |         GetChromosomeCoveragesIDX(CHROMhead, cmd->bamfiles);
 91 |         GetGenomeCoveragesIDX(CHROMhead, cmd->bamfiles);
 92 |     }
 93 | 
 94 |     if (cmd->blacklist_bed) {
 95 |         fprintf(stderr, "Subtracting reads from blaklist BED file ( %s )\n", cmd->blacklist_bed);
 96 |         SubtractBlacklistedBEDS(cmd->blacklist_bed, CHROMhead, cmd->bamfiles, cmd->libtype);
 97 |     }
 98 | 
 99 |     ComputeSamplescales(cmd->bamfiles, CHROMhead, 1);
100 |     head = ReadBED(cmd->bedfile, cmd->threads);
101 |     AllocateReadCovs(head, cmd->no_of_samples);
102 | 
103 |     curr = cmd->bamfiles;
104 | 
105 |     while (curr != NULL) {
106 |         fprintf(stderr, "\nSample: %s\n", curr->shortname);
107 |         fprintf(stderr, "\tTotal no. of reads: %d\n", curr->read_coverage);
108 |         fprintf(stderr, "\tLibrary size scale: %.2f\n", curr->scale);
109 |         curr = curr->next;
110 |     }
111 | 
112 |     fprintf(stderr, "\nProcessing BAM files\n");
113 |     MultiCoverage(cmd->bamfiles, head, cmd);
114 | 
115 |     if (cmd->outdir)
116 |         ofile_len = strlen(cmd->outdir);
117 | 
118 |     if (cmd->outprefix)
119 |         ofile_len += strlen(cmd->outprefix);
120 | 
121 |     ofile_len += strlen("raw_coverages.tsv") + 50;
122 |     ofile = (char *) calloc(ofile_len, sizeof (char));
123 | 
124 |     if (cmd->outdir) {
125 |         strcat(ofile, cmd->outdir);
126 |         strcat(ofile, "/");
127 |     }
128 | 
129 |     if (cmd->outprefix) {
130 |         strcat(ofile, cmd->outprefix);
131 |         strcat(ofile, ".");
132 |     }
133 | 
134 |     strcat(ofile, "raw_coverages.tsv");
135 | 
136 |     WriteMultiCovsRaw(cmd->bamfiles, head, cmd->no_of_samples, ofile);
137 | 
138 |     if (ofile)
139 |         free(ofile);
140 | 
141 |     ofile_len += strlen("TPM_normalized_coverages.tsv" + 1);
142 |     ofile = (char *) calloc(ofile_len + 1, sizeof (char));
143 | 
144 |     if (cmd->outdir) {
145 |         strcat(ofile, cmd->outdir);
146 |         strcat(ofile, "/");
147 |     }
148 | 
149 |     if (cmd->outprefix) {
150 |         strcat(ofile, cmd->outprefix);
151 |         strcat(ofile, ".");
152 |     }
153 | 
154 |     strcat(ofile, "TPM_normalized_coverages.tsv");
155 | 
156 |     CalculateTPM(cmd->bamfiles, head);
157 |     WriteMultiCovsNormalized(cmd->bamfiles, head, cmd->no_of_samples, ofile);
158 | 
159 |     if (ofile)
160 |         free(ofile);
161 | 
162 |     ofile_len += strlen("FPKM_normalized_coverages.tsv" + 1);
163 |     ofile = (char *) calloc(ofile_len + 1, sizeof (char));
164 | 
165 |     if (cmd->outdir) {
166 |         strcat(ofile, cmd->outdir);
167 |         strcat(ofile, "/");
168 |     }
169 | 
170 |     if (cmd->outprefix) {
171 |         strcat(ofile, cmd->outprefix);
172 |         strcat(ofile, ".");
173 |     }
174 | 
175 |     strcat(ofile, "FPKM_normalized_coverages.tsv");
176 | 
177 |     CalculateFPKM(cmd->bamfiles, head);
178 |     WriteMultiCovsNormalized(cmd->bamfiles, head, cmd->no_of_samples, ofile);
179 | 
180 |     if (ofile)
181 |         free(ofile);
182 | 
183 |     ofile_len += strlen("Library_normalized_coverages.tsv" + 1);
184 |     ofile = (char *) calloc(ofile_len + 1, sizeof (char));
185 | 
186 |     if (cmd->outdir) {
187 |         strcat(ofile, cmd->outdir);
188 |         strcat(ofile, "/");
189 |     }
190 | 
191 |     if (cmd->outprefix) {
192 |         strcat(ofile, cmd->outprefix);
193 |         strcat(ofile, ".");
194 |     }
195 | 
196 |     strcat(ofile, "Library_normalized_coverages.tsv");
197 | 
198 |     CalculateLibScaled(cmd->bamfiles, head);
199 |     WriteMultiCovsNormalized(cmd->bamfiles, head, cmd->no_of_samples, ofile);
200 | 
201 |     if (ofile)
202 |         free(ofile);
203 | 
204 |     DeleteBEDs(head);
205 | }
206 | 
207 | void NormalizeBAMSrna(CMDINPUT *cmd) {
208 |     BAMFILES *curr = NULL;
209 | 
210 |     cmd->fragment_count_mode = 0;
211 |     fprintf(stderr, "Allocating BINS of size %d for chromosomes\n", cmd->binSize);
212 |     CHROMhead = ImportChromosomeDataFromBAM(cmd->bamfiles->name, cmd->no_of_samples, cmd->threads);
213 | 
214 |     if (cmd->blacklist_file)
215 |         BlacklistChromosomeFiles(CHROMhead, cmd->blacklist_file);
216 |     
217 |     CHROMhead = ComputeBins(CHROMhead, cmd->binSize);
218 |     CHROMhead = AllocateBins(CHROMhead, cmd->no_of_samples);
219 |     
220 |     cmd->chr = CHROMhead;
221 | 
222 |     if (cmd->libtype == -1) {
223 |         fprintf(stderr, "Detecting library type\n");
224 |         cmd->libtype = DetectLibraryType(cmd->bamfiles);
225 | 
226 |         if (cmd->libtype == 0) {
227 |             fprintf(stderr, "\tLibrary seems single-end\n");
228 | 
229 |             if (cmd->fragment_count_mode == 1 && cmd->fragment_size == 0) {
230 |                 fprintf(stderr, "ERROR: fragment mode counting is enable, library is single-end, but fragment size is set to 0.");
231 |                 fprintf(stderr, "WARNING: Please re-run program without enabling fragment-counting mode, or set fragment size");
232 |                 PrintMultiCovMessage(cmd->argv[0]);
233 |                 return;
234 |             }
235 |         } else
236 |             fprintf(stderr, "\tLibrary seems paired-end\n");
237 |     }
238 | 
239 |     //fprintf(stderr, "\nComputing coverage from the idx of BAM files\n");
240 |     //GetChromosomeCoveragesIDX(CHROMhead, cmd->bamfiles);
241 |     if(strcmp(cmd->scale, INPUTS_CUSTOM) != 0) {
242 |         cmd->bamfiles->scale = 1;
243 |         cmd->bamfiles->genome_scale = 1;
244 |     }
245 |     
246 |     if (cmd->genome_coverage > 0 && strcmp(cmd->scale, INPUTS_NO) != 0 && strcmp(cmd->scale, INPUTS_CUSTOM) != 0) {
247 |         fprintf(stderr, "\nComputing coverage from BAM file\n");
248 |         MultiGenomeCoverage(cmd, CHROMhead);
249 |         ComputeSamplescales(cmd->bamfiles, CHROMhead, 1);
250 |         ScaleGenomeCoverage(cmd->bamfiles, CHROMhead);
251 |     }
252 |     
253 |     curr = cmd->bamfiles;
254 | 
255 |     while (curr != NULL) {
256 |         fprintf(stderr, "\nSample: %s\n", curr->shortname);
257 |         fprintf(stderr, "\tTotal no. of reads: %d\n", curr->read_coverage);
258 |         fprintf(stderr, "\tLibrary size scale: %.2f\n", curr->scale);
259 |         fprintf(stderr, "\tTotal number of filtered reads: %d\n", curr->filtered_reads);
260 |         fprintf(stderr, "\tBases sequenced: %f\n", curr->base_coverage);
261 |         fprintf(stderr, "\tGenome size: %.f\n", CalculateGenomeSize(CHROMhead));
262 |         fprintf(stderr, "\tGenome scale: %f\n", curr->genome_scale);
263 |         
264 |         if(strcmp(cmd->scale, INPUTS_SMALLEST) == 0)
265 |             curr->genome_scale = curr->scale;
266 |         
267 |         curr = curr->next;
268 |     }
269 | 
270 |     if(cmd->strandsplit == 0) {
271 |         fprintf(stderr, "\nCreating coverage track for: %s\n", cmd->bamfiles->shortname);
272 |         GetGenomeCoverageRNA(cmd, CHROMhead, returnRNAfilename(cmd));
273 |     }
274 |     else {
275 |         fprintf(stderr, "\nCreating positive coverage track for: %s\n", cmd->bamfiles->shortname);
276 |         cmd->strand = 1;
277 |         GetGenomeCoverageRNA(cmd, CHROMhead, returnRNAfilename(cmd));
278 |         
279 |         fprintf(stderr, "\nCreating negative coverage track for: %s\n", cmd->bamfiles->shortname);
280 |         cmd->strand = -1;
281 |         GetGenomeCoverageRNA(cmd, CHROMhead, returnRNAfilename(cmd));
282 |     }
283 | }
284 | 
285 | void NormalizeBAMS(CMDINPUT *cmd) {
286 |     BAMFILES *curr = NULL;
287 | 
288 |     cmd->fragment_count_mode = 0;
289 |     fprintf(stderr, "Allocating BINS of size %d for chromosomes\n", cmd->binSize);
290 |     CHROMhead = ImportChromosomeDataFromBAM(cmd->bamfiles->name, cmd->no_of_samples, cmd->threads);
291 | 
292 |     if (cmd->blacklist_file)
293 |         BlacklistChromosomeFiles(CHROMhead, cmd->blacklist_file);
294 | 
295 |     CHROMhead = ComputeBins(CHROMhead, cmd->binSize);
296 |     CHROMhead = AllocateBins(CHROMhead, cmd->no_of_samples);
297 |     cmd->chr = CHROMhead;
298 | 
299 |     if (cmd->libtype == -1) {
300 |         fprintf(stderr, "Detecting library type\n");
301 |         cmd->libtype = DetectLibraryType(cmd->bamfiles);
302 | 
303 |         if (cmd->libtype == 0) {
304 |             fprintf(stderr, "\tLibrary seems single-end\n");
305 | 
306 |             if (cmd->fragment_count_mode == 1 && cmd->fragment_size == 0) {
307 |                 fprintf(stderr, "ERROR: fragment mode counting is enable, library is single-end, but fragment size is set to 0.");
308 |                 fprintf(stderr, "WARNING: Please re-run program without enabling fragment-counting mode, or set fragment size");
309 |                 PrintMultiCovMessage(cmd->argv[0]);
310 |                 return;
311 |             }
312 |         } else
313 |             fprintf(stderr, "\tLibrary seems paired-end\n");
314 |     }
315 | 
316 |     fprintf(stderr, "\nComputing coverage from the idx of BAM files\n");
317 |     GetChromosomeCoveragesIDX(CHROMhead, cmd->bamfiles);
318 |     MultiGenomeBaseCoverage(cmd, CHROMhead);
319 |         
320 |     if(cmd->strandsplit == 1 && strcmp(cmd->scale, INPUTS_CUSTOM) != 0) {
321 |         if(strcmp(cmd->operation, INPUTS_RSTRRNA) == 0 || strcmp(cmd->operation, INPUTS_STRRNA) == 0) {
322 |             cmd->bamfiles->base_coverage = cmd->bamfiles->base_coverage + cmd->bamfiles->next->base_coverage;
323 |             cmd->bamfiles->next->base_coverage = cmd->bamfiles->base_coverage;
324 |         }
325 |     }
326 |     
327 |     if(strcmp(cmd->scale, INPUTS_CUSTOM) != 0)
328 |         ScaleGenomeCoverage(cmd->bamfiles, CHROMhead);
329 | 
330 |     if (cmd->genome_coverage == 0 && strcmp(cmd->scale, INPUTS_CUSTOM) != 0) {
331 |         GetGenomeCoveragesIDX(CHROMhead, cmd->bamfiles);
332 |         ComputeSamplescales(cmd->bamfiles, CHROMhead, 1);
333 |     }
334 | 
335 |     curr = cmd->bamfiles;
336 | 
337 |     while (curr != NULL) {
338 |         fprintf(stderr, "\nSample: %s\n", curr->shortname);
339 |         fprintf(stderr, "\tTotal no. of reads: %d\n", curr->read_coverage);
340 |         fprintf(stderr, "\tLibrary size scale: %.2f\n", curr->scale);
341 |         fprintf(stderr, "\tTotal number of filtered reads: %d\n", curr->filtered_reads);
342 |         fprintf(stderr, "\tBases sequenced: %f\n", curr->base_coverage);
343 |         fprintf(stderr, "\tGenome size: %.f\n", CalculateGenomeSize(CHROMhead));
344 |         fprintf(stderr, "\tGenome scale: %f\n", curr->genome_scale);
345 |         
346 |         if(strcmp(cmd->scale, INPUTS_SMALLEST) == 0)
347 |             curr->genome_scale = curr->scale;
348 |         
349 |         curr = curr->next;
350 |     }
351 | 
352 |     if (strcmp(cmd->scale, "no") != 0) {
353 |         fprintf(stderr, "\nScaling sample(s)\n");
354 |         MultiGenomeScaler(cmd, CHROMhead);
355 |     }
356 | 
357 |     if (cmd->smoothBin > 0) {
358 |         if (cmd->tracksmooth == 0 || cmd->tracksmooth == 1) {
359 |             fprintf(stderr, "\nSmoothening signal\n");
360 |             MultiGenomeSmoother(cmd, CHROMhead);
361 |         }
362 |     }
363 |     
364 |     fprintf(stderr, "Printing output BigWig files\n");
365 | 
366 |     curr = cmd->bamfiles;
367 |     if(cmd->strandsplit == 1)
368 |         cmd->strand = -1;
369 |     
370 |     while (curr != NULL) {
371 |         PrintScaledBigWig(cmd, curr, NULL);
372 |         curr = curr->next;
373 |         
374 |         if(cmd->strandsplit == 1)
375 |             cmd->strand = 1;
376 |     }
377 | 
378 |     if (strcmp(cmd->operation, "scaled") != 0 && strcmp(cmd->operation, "unscaled") != 0) {
379 |         fprintf(stderr, "Transforming coverage tracks: %s\n", cmd->operation);
380 |         MultiGenomeTransform(cmd, CHROMhead);
381 |         cmd->strand = 0;
382 |         
383 |         if (cmd->smoothBin > 0) {
384 |             if (cmd->tracksmooth == 0 || cmd->tracksmooth == 2) {
385 |                 fprintf(stderr, "\nSmoothening transformed signal\n");
386 |                 MultiGenomeSmoother(cmd, CHROMhead);
387 |             }
388 |         }
389 |         
390 |         curr = cmd->bamfiles->next;
391 | 
392 |         while (curr != NULL) {
393 |             PrintScaledBigWig(cmd, curr, cmd->bamfiles->shortname);
394 |             curr = curr->next;
395 |         }
396 |     }
397 | }
398 | 
399 | void PrintUsage(char *pname) {
400 |     char *ptr = strrchr(pname, '/');
401 |     ptr = ptr ? ptr + 1 : (char *) pname;
402 | 
403 |     fprintf(stderr, "\nBAMscale: a tool to quantify peaks, and scale sequencing data\n");
404 |     fprintf(stderr, "Version: %s\n", "v0.0.9");
405 | 
406 |     fprintf(stderr, "\nUsage: %s <command>\n", ptr);
407 |     fprintf(stderr, "\n\tCommands\tDescription\n");
408 |     fprintf(stderr, "\t========\t===========\n");
409 | 
410 |     fprintf(stderr, "\t   cov\t\tCalculate coverage of BED coordinates in BAM file(s). Outputs are raw read counts, FPKM and TPM normalized values.\n");
411 |     fprintf(stderr, "\t   scale\tConvert BAM files to BigWigs; scale one or multiple files to genome size or to each other.\n");
412 | }
413 | 
414 | /*
415 |  * 
416 |  */
417 | int main(int argc, char **argv) {
418 |     CMDINPUT *cmd = NULL;
419 |     int found = 0;
420 | 
421 |     if (argc > 1) {
422 |         if (strcmp(argv[1], INPUTS_COV) == 0) {
423 |             found++;
424 |             cmd = MultiCovParser(argc, argv);
425 | 
426 |             if (cmd != NULL) {
427 |                 no_of_samples = cmd->no_of_samples;
428 |                 
429 |                 ComputeCoverageChIPpeak(cmd);                    
430 |             }
431 |         }
432 | 
433 |         if (strcmp(argv[1], INPUTS_SCALE) == 0) {
434 |             found++;
435 |             cmd = ScaleParser(argc, argv);
436 | 
437 |             if (cmd != NULL) {
438 |                 no_of_samples = cmd->no_of_samples;
439 |            
440 |                 if(strcmp(cmd->operation, INPUTS_RSTRRNA) == 0 || strcmp(cmd->operation, INPUTS_RNA) == 0 || strcmp(cmd->operation, INPUTS_STRRNA) == 0)
441 |                     NormalizeBAMSrna(cmd);
442 |                 
443 |                 else
444 |                    NormalizeBAMS(cmd); 
445 |             }
446 |         }
447 | 
448 |         if (found == 0)
449 |             PrintUsage(argv[0]);
450 |     }
451 |     else {
452 |         PrintUsage(argv[0]);
453 |     }
454 | 
455 |     if (cmd){
456 |         DestroyCMDinput(cmd);
457 |         FreeAllocatedData();
458 |         free(cmd);
459 |     }
460 | 
461 |     return (EXIT_SUCCESS);
462 | }
463 | 
464 | 


--------------------------------------------------------------------------------
/src/multithreads.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * To change this license header, choose License Headers in Project Properties.
  3 |  * To change this template file, choose Tools | Templates
  4 |  * and open the template in the editor.
  5 |  */
  6 | 
  7 | /* 
  8 |  * File:   multithreads.c
  9 |  * Author: pongorls
 10 |  * 
 11 |  * Created on November 30, 2018, 12:04 PM
 12 |  */
 13 | 
 14 | #include <stdio.h>
 15 | #include <stdlib.h>
 16 | #include <string.h>
 17 | #include <math.h>
 18 | #define __STDC_FORMAT_MACROS
 19 | #include <inttypes.h>
 20 | #include <pthread.h>
 21 | #include <htslib/sam.h>
 22 | 
 23 | 
 24 | #include "Definitions.h"
 25 | #include "main.h"
 26 | #include "scale.h"
 27 | #include "CHROMstruct.h"
 28 | #include "segmenter.h"
 29 | #include "BAMcoverage.h"
 30 | #include "multithreads.h"
 31 | #include "binning.h"
 32 | 
 33 | void DestroyThreadStruct(THREADS **head, int no_of_threads) {
 34 |     int i = 0;
 35 | 
 36 |     for (i = 0; i < no_of_threads; i++) {
 37 |         if ((*head)[i].chrname)
 38 |             free((*head)[i].chrname);
 39 |     }
 40 | 
 41 |     if (*head)
 42 |         free(*head);
 43 | }
 44 | 
 45 | THREADS *CreateThreadStruct(char *chrname) {
 46 |     THREADS *ptr = (THREADS *) malloc(sizeof (THREADS));
 47 | 
 48 |     ptr->chrname = strdup(chrname);
 49 | 
 50 |     ptr->sample_id = -1;
 51 |     ptr->paired_end = 0;
 52 |     ptr->scale = 1.00;
 53 |     ptr->binSize = 0;
 54 |     ptr->pseudocount = 1;
 55 |     ptr->sample = NULL;
 56 |     ptr->next = NULL;
 57 | 
 58 |     return ptr;
 59 | }
 60 | 
 61 | THREADS *AddElement(THREADS *head, char *chrname) {
 62 |     THREADS *curr = head;
 63 | 
 64 |     if (head == NULL) {
 65 |         head = CreateThreadStruct(chrname);
 66 |         curr = head;
 67 |     } else {
 68 |         while (curr->next != NULL) {
 69 |             curr = curr->next;
 70 |         }
 71 | 
 72 |         curr->next = CreateThreadStruct(chrname);
 73 |     }
 74 | 
 75 |     return head;
 76 | }
 77 | 
 78 | THREADS **AssignChrToThreads(CHROMOSOMES *head, int no_of_threads) {
 79 |     THREADS **ptr = (THREADS **) malloc(no_of_threads * sizeof (THREADS *));
 80 |     CHROMOSOMES *curr = head;
 81 |     int i = 0;
 82 | 
 83 |     while (curr != NULL) {
 84 |         if (curr->blacklist == 0) {
 85 |             ptr[i] = AddElement(ptr[i], curr->name);
 86 |             i++;
 87 | 
 88 |             if (i >= no_of_threads) {
 89 |                 i = 0;
 90 |             }
 91 |         }
 92 | 
 93 |         curr = curr->next;
 94 |     }
 95 | 
 96 |     return ptr;
 97 | }
 98 | 
 99 | CHROMOSOMES *FindChrStruct(CHROMOSOMES * head, char *chrname) {
100 |     while (head != NULL) {
101 |         if (strcmp(chrname, head->name) == 0) {
102 |             return head;
103 |         }
104 | 
105 |         head = head->next;
106 |     }
107 | 
108 |     return NULL;
109 | }
110 | 
111 | RATIOS *CalculateRatiosAllMultithreaded(RATIOS *head, CHROMOSOMES *chead, BAMFILES *bhead, int no_of_samples, int min_per_bin_cov, int smoothbin, int binSize, char *chromsizes) {
112 |     RATIOS *curr = head;
113 |     CHROMOSOMES *ccurr = chead;
114 |     BAMFILES *bcurr = bhead;
115 |     CHRCOV *ptr;
116 | 
117 |     if (no_of_samples < 2)
118 |         return NULL;
119 | 
120 |     bcurr = bhead->next;
121 | 
122 |     while (bcurr != NULL) {
123 |         printf("\nComparing samples:\n\t%s\n\t%s\n\n", bhead->name, bcurr->name);
124 |         ccurr = chead;
125 | 
126 |         if (curr == NULL)
127 |             curr = CreateRatioStruct(bhead->name, bcurr->name, bhead->id, bcurr->id);
128 | 
129 |         else {
130 |             curr->next = CreateRatioStruct(bhead->name, bcurr->name, bhead->id, bcurr->id);
131 |             curr = curr->next;
132 |         }
133 | 
134 |         while (ccurr != NULL) {
135 |             if (ccurr->blacklist == 0) {
136 |                 curr->chrcovs = CalculateChromosomeRatio(ccurr, curr->chrcovs, bhead->id, bcurr->id, 1, min_per_bin_cov);
137 |             }
138 | 
139 |             ccurr = ccurr->next;
140 |         }
141 | 
142 |         if (curr->chrcovs != NULL) {
143 |             ptr = curr->chrcovs;
144 | 
145 |             while (ptr != NULL) {
146 |                 ptr->ratio = smoothenBins(&ptr->ratio, smoothbin, ptr->nbins);
147 |                 ptr = ptr->next;
148 |             }
149 |         }
150 | 
151 |         if (chromsizes)
152 |             PrintBigWigOrdered(curr, binSize, chromsizes);
153 | 
154 |         else
155 |             PrintBedgraph(curr, binSize);
156 | 
157 |         bcurr = bcurr->next;
158 |     }
159 | 
160 |     return head;
161 | }


--------------------------------------------------------------------------------
/src/scale.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * To change this license header, choose License Headers in Project Properties.
  3 |  * To change this template file, choose Tools | Templates
  4 |  * and open the template in the editor.
  5 |  */
  6 | 
  7 | /* 
  8 |  * File:   scale.c
  9 |  * Author: pongorls
 10 |  * 
 11 |  * Created on November 28, 2018, 3:54 PM
 12 |  */
 13 | 
 14 | #include <stdio.h>
 15 | #include <stdlib.h>
 16 | #include <string.h>
 17 | #include <math.h>
 18 | #include <libgen.h>
 19 | 
 20 | #include <bigWig.h>
 21 | 
 22 | #include "Definitions.h"
 23 | #include "main.h"
 24 | #include "scale.h"
 25 | #include "CHROMstruct.h"
 26 | #include "segmenter.h"
 27 | #include "binning.h"
 28 | 
 29 | float *scaleBins(float *carray, float scale, int nbins, float pseudocount) {
 30 |     int i = 0;
 31 |     
 32 |     for (i = 0; i < nbins; i++) {
 33 |         if (carray[i] > 0) {
 34 |             carray[i] = carray[i] * scale;
 35 |             carray[i] = roundf(carray[i] * 100.0) / 100.0;
 36 |         } else
 37 |             carray[i] = 0;
 38 |     }
 39 |     
 40 |     return carray;
 41 | }
 42 | 
 43 | void ScaleToSmallest(BAMFILES *head) {
 44 |     BAMFILES *curr = head;
 45 | 
 46 |     int smallestBAM = -1;
 47 | 
 48 |     while (curr != NULL) {
 49 |         if (curr->read_coverage != -1) {
 50 |             if (smallestBAM == -1) {
 51 |                 smallestBAM = curr->read_coverage;
 52 |             }
 53 |             else {
 54 |                 if (smallestBAM > curr->read_coverage && curr->read_coverage > 0) {
 55 |                     smallestBAM = curr->read_coverage;
 56 |                 }
 57 |             }
 58 |         }
 59 | 
 60 |         curr = curr->next;
 61 |     }
 62 | 
 63 |     curr = head;
 64 | 
 65 |     while (curr != NULL) {
 66 |         if (curr->read_coverage > 0) {
 67 |             curr->scale = (float) curr->read_coverage / (float) smallestBAM;
 68 |             curr->scale = 1 / curr->scale;
 69 |         }
 70 | 
 71 |         curr = curr->next;
 72 |     }
 73 | }
 74 | 
 75 | void NoScale(BAMFILES *head) {
 76 |     BAMFILES *curr = head;
 77 | 
 78 |     while (curr != NULL) {
 79 |         if (curr->read_coverage != -1) {
 80 |             curr->scale = (float) 1;
 81 |         }
 82 | 
 83 |         curr = curr->next;
 84 |     }
 85 | 
 86 |     curr = head;
 87 | }
 88 | 
 89 | void ScaleToGenomeSize(BAMFILES *head, CHROMOSOMES *chead) {
 90 |     BAMFILES *curr = head;
 91 | 
 92 |     float genome_size = CalculateGenomeSize(chead) / 100;
 93 | 
 94 |     while (curr != NULL) {
 95 |         if (curr->read_coverage > 0) {
 96 |             curr->scale = 1 / ((float) curr->read_coverage / genome_size);
 97 |         }
 98 | 
 99 |         curr = curr->next;
100 |     }
101 | }
102 | 
103 | void ScaleGenomeCoverage(BAMFILES *head, CHROMOSOMES *chead) {
104 |     float genome_size = CalculateGenomeSize(chead);
105 |     BAMFILES *curr = head;
106 | 
107 |     while (curr != NULL) {
108 |         curr->genome_scale = (float) 1 / (curr->base_coverage / genome_size);
109 |         curr = curr->next;
110 |     }
111 | }
112 | 
113 | BAMFILES *ComputeSamplescales(BAMFILES *head, CHROMOSOMES *chead, int scale) {
114 |     if (scale == 0) {
115 |         NoScale(head);
116 |     }
117 | 
118 |     if (scale == 1) {
119 |         ScaleToSmallest(head);
120 |     }
121 | 
122 |     if (scale == 2) {
123 |         ScaleToGenomeSize(head, chead);
124 |     }
125 | 
126 |     return head;
127 | }
128 | 
129 | float *logTwoCoverageRatio(float *cov1, float *cov2, int nbins, float min_per_bin_cov) {
130 |     int i = 0;
131 |     float *carray = (float *) calloc(nbins + 1, sizeof (float));
132 | 
133 |     for (i = 0; i < nbins; i++) {
134 |         if (cov1[i] >= min_per_bin_cov && cov2[i] >= min_per_bin_cov) {
135 |             carray[i] = log2(((cov1[i]) / (cov2[i])));
136 |             if (carray[i] > 1000)
137 |                 carray[i] = 1000;
138 | 
139 |             if (carray[i] < -1000)
140 |                 carray[i] = -1000;
141 |         }
142 |     }
143 |        
144 |     if (cov1)
145 |         free(cov1);
146 | 
147 |     return carray;
148 | }
149 | 
150 | float *OKseqRFD(float *cov1, float *cov2, int nbins, float min_per_bin_cov) {
151 |     int i = 0;
152 |     float *carray = (float *) calloc(nbins + 1, sizeof (float));
153 | 
154 |     for (i = 0; i < nbins; i++) {
155 |         if (cov1[i] >= min_per_bin_cov || cov2[i] >= min_per_bin_cov) {
156 |             carray[i] = ( (cov1[i]+0.0001) - (cov2[i]+0.0001)) / ((cov2[i]+0.0001) + (cov1[i]+0.0001));
157 |         }
158 |     }
159 |        
160 |     if (cov1)
161 |         free(cov1);
162 | 
163 |     return carray;
164 | }
165 | 
166 | float *SubtractCoverage(float *cov1, float *cov2, int nbins, float min_per_bin_cov) {
167 |     int i = 0;
168 |     float *carray = (float *) malloc(nbins * sizeof (float));
169 | 
170 |     for (i = 0; i < nbins; i++) {
171 |         if (cov1[i] >= min_per_bin_cov || cov2[i] >= min_per_bin_cov) {
172 |             carray[i] = cov1[i] - cov2[i];
173 |         }
174 |     }
175 | 
176 |     return carray;
177 | }
178 | 
179 | float *CoverageRatio(float *cov1, float *cov2, int nbins, float min_per_bin_cov) {
180 |     int i = 0;
181 |     float *carray = (float *) malloc(nbins * sizeof (float));
182 | 
183 |     for (i = 0; i < nbins; i++) {
184 |         if (cov1[i] >= min_per_bin_cov || cov2[i] >= min_per_bin_cov) {
185 |             carray[i] = (cov1[i] / cov2[i]);
186 |         }
187 |     }
188 | 
189 |     return carray;
190 | }
191 | 
192 | float *SignedCoverageRatio(float *cov1, float *cov2, int nbins, int min_per_bin_cov) {
193 |     int i = 0;
194 |     float *carray = (float *) malloc(nbins * sizeof (float));
195 | 
196 |     for (i = 0; i < nbins; i++) {
197 |         if (cov1[i] >= min_per_bin_cov || cov2[i] >= min_per_bin_cov) {
198 |             if (cov1[i] > cov2[i]) {
199 |                 carray[i] = (cov1[i] / cov2[i]);
200 |             }
201 |             else {
202 |                 carray[i] = -(cov2[i] / cov1[i]);
203 |             }
204 | 
205 |         }
206 |     }
207 | 
208 |     return carray;
209 | }
210 | 
211 | CHRCOV *CalculateChromosomeRatio(CHROMOSOMES *curr, CHRCOV *chead, int s1, int s2, int ratioType, int min_per_bin_cov) {
212 |     CHRCOV *ccurr = chead;
213 | 
214 |     if (ccurr == NULL) {
215 |         chead = CreateChromCovStruct(curr->name, curr->id, curr->numberOfBins);
216 |         ccurr = chead;
217 |     }
218 |     else {
219 |         while (ccurr->next != NULL) {
220 |             ccurr = ccurr->next;
221 |         }
222 | 
223 |         ccurr->next = CreateChromCovStruct(curr->name, curr->id, curr->numberOfBins);
224 |         ccurr = ccurr->next;
225 |     }
226 | 
227 |     if (ratioType == 1) {
228 |         ccurr->ratio = logTwoCoverageRatio(curr->coverages[s1], curr->coverages[s2], curr->numberOfBins, min_per_bin_cov);
229 |     }
230 |     else if (ratioType == 2) {
231 |         ccurr->ratio = SubtractCoverage(curr->coverages[s1], curr->coverages[s2], curr->numberOfBins, min_per_bin_cov);
232 |     }
233 |     else if (ratioType == 3) {
234 |         ccurr->ratio = CoverageRatio(curr->coverages[s1], curr->coverages[s2], curr->numberOfBins, min_per_bin_cov);
235 |     }
236 |     else if (ratioType == 4) {
237 |         ccurr->ratio = SignedCoverageRatio(curr->coverages[s1], curr->coverages[s2], curr->numberOfBins, min_per_bin_cov);
238 |     }
239 |     
240 |     return chead;
241 | }
242 | 
243 | void PrintBedgraph(RATIOS *ptr, int binSize) {
244 |     char outfile[250];
245 |     int i = 0;
246 |     CHRCOV *p = ptr->chrcovs;
247 |     FILE * fp;
248 | 
249 |     strcpy(outfile, basename(ptr->sample1));
250 |     strcat(outfile, "_vs_");
251 |     strcat(outfile, basename(ptr->sample2));
252 |     strcat(outfile, ".bedgraph");
253 | 
254 |     fp = fopen(outfile, "w+");
255 | 
256 |     while (p != NULL) {
257 |         for (i = 0; i < p->nbins - 1; i++)
258 |             fprintf(fp, "%s\t%d\t%d\t%.3f\n", p->name, i * binSize, (i + 1) * binSize, p->ratio[i]);
259 |         p = p->next;
260 |     }
261 | 
262 |     fclose(fp);
263 | }
264 | 
265 | char *returnChrName(char *input) {
266 |     char *ptr = strtok(input, "\t");
267 | 
268 |     while (ptr != NULL) {
269 |         return ptr;
270 |     }
271 | 
272 |     return NULL;
273 | }
274 | 
275 | void PrintBedgraphOrdered(RATIOS *ptr, int binSize, char *chromfile) {
276 |     FILE *handler = fopen(chromfile, "r");
277 |     char line[BUFSIZ];
278 |     char outfile[250];
279 |     char *chrname = NULL;
280 |     char *pos;
281 |     int i = 0;
282 |     CHRCOV *p = ptr->chrcovs;
283 |     FILE * fp;
284 | 
285 |     strcpy(outfile, basename(ptr->sample1));
286 |     strcat(outfile, "_vs_");
287 |     strcat(outfile, basename(ptr->sample2));
288 |     strcat(outfile, ".bedgraph");
289 | 
290 |     fp = fopen(outfile, "w+");
291 | 
292 |     while (fgets(line, sizeof (line), handler)) {
293 |         if ((pos = strchr(line, '\n')) != NULL)
294 |             *pos = '\0';
295 | 
296 |         chrname = returnChrName(line);
297 |         p = ptr->chrcovs;
298 | 
299 |         if (chrname != NULL) {
300 |             while (p != NULL) {
301 |                 if (strcmp(chrname, p->name) == 0) {
302 |                     for (i = 0; i < p->nbins - 1; i++)
303 |                         fprintf(fp, "%s\t%d\t%d\t%.3f\n", p->name, i * binSize, (i + 1) * binSize, p->ratio[i]);
304 |                 }
305 | 
306 |                 p = p->next;
307 |             }
308 |         }
309 |     }
310 | 
311 |     while (p != NULL) {
312 |         for (i = 0; i < p->nbins - 1; i++)
313 |             fprintf(fp, "%s\t%d\t%d\t%.3f\n", p->name, i * binSize, (i + 1) * binSize, p->ratio[i]);
314 |         p = p->next;
315 |     }
316 | 
317 |     fclose(fp);
318 |     fclose(handler);
319 | }
320 | 
321 | void PrintBigWigOrdered(RATIOS *ptr, int binSize, char *chromfile) {
322 |     FILE *handler = NULL;
323 |     char line[BUFSIZ];
324 |     char outfile[250];
325 |     char *pos, *chrname;
326 |     int i = 0;
327 |     char **chrnames = NULL;
328 |     uint32_t *chrlens = NULL;
329 |     uint32_t start = 0;
330 |     uint32_t end = 0;
331 |     int no_of_chrs = 0;
332 |     CHRCOV *p = ptr->chrcovs;
333 |     bigWigFile_t *fp;
334 | 
335 |     handler = fopen(chromfile, "r");
336 | 
337 |     while (fgets(line, sizeof (line), handler)) {
338 |         if ((pos = strchr(line, '\n')) != NULL)
339 |             *pos = '\0';
340 | 
341 |         chrname = returnChrName(line);
342 |         p = ptr->chrcovs;
343 | 
344 |         if (chrname != NULL) {
345 |             while (p != NULL) {
346 |                 if (strcmp(chrname, p->name) == 0) {
347 |                     no_of_chrs++;
348 |                 }
349 | 
350 |                 p = p->next;
351 |             }
352 |         }
353 |     }
354 | 
355 |     fclose(handler);
356 | 
357 |     chrnames = (char **) malloc(no_of_chrs * sizeof (char *));
358 |     chrlens = (uint32_t *) malloc(no_of_chrs * sizeof (uint32_t));
359 |     no_of_chrs = 0;
360 | 
361 |     handler = fopen(chromfile, "r");
362 | 
363 |     while (fgets(line, sizeof (line), handler)) {
364 |         if ((pos = strchr(line, '\n')) != NULL)
365 |             *pos = '\0';
366 | 
367 |         chrname = returnChrName(line);
368 |         p = ptr->chrcovs;
369 | 
370 |         if (chrname != NULL) {
371 |             while (p != NULL) {
372 |                 if (strcmp(chrname, p->name) == 0) {
373 |                     chrnames[no_of_chrs] = strdup(chrname);
374 |                     chrlens[no_of_chrs] = (uint32_t) (p->nbins * binSize);
375 |                     no_of_chrs++;
376 |                 }
377 | 
378 |                 p = p->next;
379 |             }
380 |         }
381 |     }
382 | 
383 |     fclose(handler);
384 | 
385 |     strcpy(outfile, basename(ptr->sample1));
386 |     strcat(outfile, "_vs_");
387 |     strcat(outfile, basename(ptr->sample2));
388 |     strcat(outfile, ".bw");
389 | 
390 |     fp = bwOpen(outfile, NULL, "w");
391 |     bwCreateHdr(fp, 10);
392 |     fp->cl = bwCreateChromList(chrnames, chrlens, no_of_chrs);
393 |     bwWriteHdr(fp);
394 | 
395 |     handler = fopen(chromfile, "r");
396 | 
397 |     while (fgets(line, sizeof (line), handler)) {
398 |         if ((pos = strchr(line, '\n')) != NULL)
399 |             *pos = '\0';
400 | 
401 |         chrname = returnChrName(line);
402 |         p = ptr->chrcovs;
403 | 
404 |         if (chrname != NULL) {
405 |             while (p != NULL) {
406 |                 if (strcmp(chrname, p->name) == 0) {
407 |                     for (i = 0; i < p->nbins - 1; i++) {
408 |                         start = (uint32_t) (i * binSize);
409 |                         end = (uint32_t) ((i + 1) * binSize);
410 |                         bwAddIntervals(fp, &p->name, &start, &end, &p->ratio[i], (uint32_t) 1);
411 |                     }
412 |                 }
413 | 
414 |                 p = p->next;
415 |             }
416 |         }
417 |     }
418 | 
419 |     fclose(handler);
420 |     bwClose(fp);
421 |     bwCleanup();
422 | 
423 |     for (i = 0; i < no_of_chrs; i++) {
424 |         if (chrnames[i]) {
425 |             free(chrnames[i]);
426 |         }
427 |     }
428 | 
429 |     if (chrnames)
430 |         free(chrnames);
431 | 
432 |     if (chrlens)
433 |         free(chrlens);
434 | 
435 | }
436 | 
437 | RATIOS *CalculateRatiosAll(RATIOS *head, CHROMOSOMES *chead, BAMFILES *bhead, int no_of_samples, int min_per_bin_cov, int smoothbin, int binSize, char *chromsizes) {
438 |     RATIOS *curr = head;
439 |     CHROMOSOMES *ccurr = chead;
440 |     BAMFILES *bcurr = bhead;
441 |     CHRCOV *ptr;
442 | 
443 |     if (no_of_samples < 2)
444 |         return NULL;
445 | 
446 |     bcurr = bhead->next;
447 | 
448 |     while (bcurr != NULL) {
449 |         printf("\nComparing samples:\n\t%s\n\t%s\n\n", bhead->name, bcurr->name);
450 |         ccurr = chead;
451 | 
452 |         if (curr == NULL)
453 |             curr = CreateRatioStruct(bhead->name, bcurr->name, bhead->id, bcurr->id);
454 | 
455 |         else {
456 |             curr->next = CreateRatioStruct(bhead->name, bcurr->name, bhead->id, bcurr->id);
457 |             curr = curr->next;
458 |         }
459 | 
460 |         while (ccurr != NULL) {
461 |             if (ccurr->blacklist == 0) {
462 |                 curr->chrcovs = CalculateChromosomeRatio(ccurr, curr->chrcovs, bhead->id, bcurr->id, 1, min_per_bin_cov);
463 |             }
464 | 
465 |             ccurr = ccurr->next;
466 |         }
467 | 
468 |         if (curr->chrcovs != NULL) {
469 |             ptr = curr->chrcovs;
470 | 
471 |             while (ptr != NULL) {
472 |                 ptr->ratio = smoothenBins(&ptr->ratio, smoothbin, ptr->nbins);
473 |                 ptr = ptr->next;
474 |             }
475 |         }
476 | 
477 |         if (chromsizes)
478 |             PrintBigWigOrdered(curr, binSize, chromsizes);
479 | 
480 |         else
481 |             PrintBedgraph(curr, binSize);
482 | 
483 |         bcurr = bcurr->next;
484 |     }
485 | 
486 |     return head;
487 | }


--------------------------------------------------------------------------------
/src/segmenter.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * To change this license header, choose License Headers in Project Properties.
  3 |  * To change this template file, choose Tools | Templates
  4 |  * and open the template in the editor.
  5 |  */
  6 | 
  7 | /* 
  8 |  * File:   segmenter.c
  9 |  * Author: pongorls
 10 |  * 
 11 |  * Created on November 29, 2018, 1:27 PM
 12 |  */
 13 | 
 14 | #include <stdio.h>
 15 | #include <stdlib.h>
 16 | #include <string.h>
 17 | #include <math.h>
 18 | #define __STDC_FORMAT_MACROS
 19 | #include <inttypes.h>
 20 | 
 21 | #include "Definitions.h"
 22 | #include "main.h"
 23 | #include "scale.h"
 24 | #include "CHROMstruct.h"
 25 | #include "segmenter.h"
 26 | 
 27 | void DestroySegments(SEGMENTS *head) {
 28 |     SEGMENTS *curr = head;
 29 |     
 30 |     while(head != NULL) {
 31 |         curr = head;
 32 |         head = head->next;
 33 |         
 34 |         free(curr);
 35 |     }
 36 | }
 37 | SEGMENTS *createSegment(void) {
 38 |     SEGMENTS *ptr = (SEGMENTS *) malloc(sizeof(SEGMENTS));
 39 |     ptr->next = NULL;
 40 |     ptr->prev = NULL;
 41 |     ptr->start = -1;
 42 |     ptr->end = -1;
 43 |   
 44 |     return ptr;
 45 | }
 46 | 
 47 | int compare_float (const void * a, const void * b) {
 48 |   float fa = *(const float*) a;
 49 |   float fb = *(const float*) b;
 50 |   return (fa > fb) - (fa < fb);
 51 | }
 52 | 
 53 | 
 54 | int64_t CalculateGenSize(CHROMOSOMES *head) {
 55 |     int64_t gensize = 0;
 56 |     CHROMOSOMES *curr = head;
 57 | 
 58 |     while(curr != NULL) {
 59 |         if(curr->blacklist == 0) {
 60 |             gensize += (int64_t)curr->numberOfBins;      
 61 |         }
 62 |         
 63 |         curr = curr->next;
 64 |     }
 65 |     return gensize;
 66 | }
 67 | 
 68 | int64_t CalculateNonZeroBins(CHROMOSOMES *head, int sampleid) {
 69 |     int64_t nzbins = 0;
 70 |     CHROMOSOMES *curr = head;
 71 |     int i;
 72 |     
 73 |     while(curr != NULL) {
 74 |         if(curr->blacklist == 0) {
 75 |             curr->nonzerobins = 0;
 76 |             
 77 |             for(i = 0; i < curr->numberOfBins; i++) {
 78 |                 if(curr->coverages[sampleid][i] != 0) {
 79 |                     curr->nonzerobins++;
 80 |                 }
 81 |             }     
 82 |         }
 83 |         
 84 |         curr = curr->next;
 85 |     }
 86 |     
 87 |     curr = head;
 88 |     
 89 |     while(curr != NULL) {
 90 |         if(curr->blacklist == 0) {
 91 |             nzbins += curr->nonzerobins;
 92 |         }
 93 |         
 94 |         curr = curr->next;
 95 |     }
 96 |     
 97 |     return nzbins;
 98 | }
 99 | 
100 | float *ConcatenateGenome(CHROMOSOMES *head, int64_t gensize, int sampleid) {
101 |     CHROMOSOMES *curr = head;
102 |     int64_t i = 0;
103 |     int64_t j = 0;
104 |     float *genbins = NULL;
105 |     
106 |     
107 |     if(gensize < 1)
108 |         return NULL;
109 |     
110 |     genbins = (float *) malloc(gensize * sizeof(float));
111 |     
112 |     if(genbins == NULL) {
113 |         printf("ERROR: could not allocate memory for genome (at quantile calculation)\n");
114 |         FreeAllocatedData();
115 |         exit(0); 
116 |     }
117 |         
118 |     while(curr != NULL) {
119 |         if(curr->blacklist == 0) {
120 |             
121 |             for(i = 0; i < curr->numberOfBins; i++) {
122 |                 if(curr->coverages[sampleid][i] != 0) {
123 |                    genbins[j] = curr->coverages[sampleid][i];
124 |                    j++; 
125 |                 }
126 |             }
127 |         }
128 |         
129 |         curr = curr->next;
130 |     }
131 |     
132 |     
133 |     
134 |     return genbins;
135 | }
136 | 
137 | void Segmenting(CHROMOSOMES *head, CMDINPUT *cmd, int sampleid, float upper, float median, float lower) {
138 |     SEGMENTS *segment = NULL;
139 |     SEGMENTS *segmenthead = NULL;
140 |     SEGMENTS *tmp = NULL;
141 |     SEGMENTS *rtmp = NULL;
142 |     CHROMOSOMES *curr = head;
143 |     int prevstate = -1;
144 |     int currstate = -1;
145 |     int start;
146 |     int end;
147 |     int i = 0;
148 |     FILE *fp_s = fopen("strong.bed", "w+");
149 |     FILE *fp_sm = fopen("med_strong.bed", "w+");
150 |     FILE *fp_wm = fopen("med_weak.bed", "w+");
151 |     FILE *fp_w = fopen("weak.bed", "w+");
152 |     int minsize = 200;
153 |     printf("Minsize: %d\n", minsize);
154 |     
155 |     while(curr != NULL) {
156 |         if(curr->blacklist == 0) {
157 |             currstate = -1;
158 |             prevstate = -1;
159 |             start = 0;
160 |             end = 0;
161 |             segment = NULL;
162 | 
163 |             for(i = 0; i < curr->numberOfBins; i++) {              
164 |                 if(curr->coverages[sampleid][i] == 0) {
165 |                     currstate = 0;
166 |                 }
167 |                 
168 |                 else if(curr->coverages[sampleid][i] > upper) {
169 |                    currstate = 4; 
170 |                 }
171 |                 
172 |                 else if(curr->coverages[sampleid][i] > median) {
173 |                     currstate = 3;
174 |                 }
175 |                 
176 |                 else if(curr->coverages[sampleid][i] > lower) {
177 |                     currstate = 2;
178 |                 }
179 |                 
180 |                 else {
181 |                    currstate = 1; 
182 |                 }
183 |                 
184 |                 if(prevstate == -1) {
185 |                     start = 0;
186 |                     prevstate = currstate;
187 |                 }
188 |                 
189 |                 if(currstate == prevstate) {
190 |                     end = i;
191 |                 }
192 | 
193 |                 else {
194 |                     if(segmenthead == NULL) {
195 |                         segmenthead = createSegment();
196 |                         segment = segmenthead;
197 |                     }
198 |                     
199 |                     else {
200 |                         segment->next = createSegment();
201 |                         segment->next->prev = segment;
202 |                         
203 |                         segment = segment->next;
204 |                     }
205 |                     
206 |                     segment->value = prevstate;
207 |                     segment->start = start;
208 |                     segment->end = end;
209 |                     
210 |                     prevstate = currstate;
211 |                     start = i-1;
212 |                     end = i;
213 |                 }
214 |             }
215 |             
216 |             segment = segmenthead;
217 |             
218 |             while(segment != NULL) {
219 |                 if(segment->end - segment->start >= minsize) {
220 |                     tmp = segment->next;
221 |                     
222 |                     while(tmp != NULL && tmp->end - tmp->start < minsize) {
223 |                         rtmp = tmp;
224 |                         tmp = tmp->next;
225 |                         free(rtmp);
226 |                     }
227 |                     
228 |                     if(tmp == NULL) {
229 |                         segment->end = curr->numberOfBins;
230 |                         segment->next = NULL;
231 |                     }
232 |                     
233 |                     else {
234 |                         if(tmp != segment->next) {
235 |                             segment->next = tmp;
236 |                             tmp->prev = segment;
237 |                         }
238 |                     }
239 |                 }
240 |                 
241 |                 segment = segment->next;
242 |                 
243 |             }
244 |             
245 |             segment = segmenthead;
246 |             
247 |             while(segment != NULL) {
248 |                 if(segment->next) {
249 |                     if(segment->end != segment->next->start) {
250 |                         if(segment->end - segment->start > segment->next->end - segment->next->start)
251 |                             segment->end = segment->next->start;
252 |                         
253 |                         else
254 |                             segment->next->start = segment->end;
255 |                     }
256 |                 }
257 |                 
258 |                 segment = segment->next;
259 |             }
260 |             
261 |             segment = segmenthead;
262 |             
263 |             while(segment != NULL) {
264 |                     if(segment->value == 4)
265 |                         fprintf(fp_s, "%s\t%d\t%d\n", curr->name, segment->start*cmd->binSize, segment->end*cmd->binSize);
266 | 
267 |                     if(segment->value == 3)
268 |                         fprintf(fp_sm, "%s\t%d\t%d\n", curr->name, segment->start*cmd->binSize, segment->end*cmd->binSize);
269 | 
270 |                     if(segment->value == 2)
271 |                         fprintf(fp_wm, "%s\t%d\t%d\n", curr->name, segment->start*cmd->binSize, segment->end*cmd->binSize);
272 | 
273 |                     if(segment->value == 1)
274 |                         fprintf(fp_w, "%s\t%d\t%d\n", curr->name, segment->start*cmd->binSize, segment->end*cmd->binSize);
275 |  
276 |                     segment = segment->next;
277 |             }
278 |                         
279 |             DestroySegments(segmenthead);
280 |             segment = NULL;
281 |             segmenthead = NULL;
282 |         }
283 |         
284 |         curr = curr->next;
285 |     }
286 |         
287 |     fclose(fp_s);
288 |     fclose(fp_sm);
289 |     fclose(fp_wm);
290 |     fclose(fp_w);
291 | }
292 | 
293 | void Quantiles(CHROMOSOMES *head, int sampleid, CMDINPUT *cmd) {
294 |     int64_t gensize = CalculateNonZeroBins(head, sampleid);
295 |     fprintf(stderr, "Genome size: %" PRIu64 "\n", gensize);
296 |  
297 |     float *genbin = ConcatenateGenome(head, gensize, sampleid);
298 |     
299 |     float upper = -1;
300 |     float median = -1;
301 |     float lower = -1;
302 |     
303 |     fprintf(stderr, "Sorting genome ( %d ) \n", sampleid);
304 |     
305 |     qsort(genbin, gensize, sizeof(float), compare_float);
306 |     
307 |     upper = genbin[(int)round(gensize*0.75)];
308 |     median = genbin[(int)round(gensize*0.5)];
309 |     lower = genbin[(int)round(gensize*0.25)];
310 |             
311 |     fprintf(stderr, "Quantiles:\n");
312 |     fprintf(stderr, "\tUpper: %f\n", upper);
313 |     fprintf(stderr, "\tMedian: %f\n", median);
314 |     fprintf(stderr, "\tLower: %f\n", lower);
315 | 
316 |     Segmenting(head, cmd, sampleid, upper, median, lower);
317 |     
318 |     if(genbin)
319 |         free(genbin);
320 | }


--------------------------------------------------------------------------------