├── .github
    └── workflows
    │   ├── go.yml
    │   └── release.yml
├── .gitignore
├── LICENSE
├── README.md
├── go.mod
├── go.sum
├── gopeaks.go
└── gopeaks_test.go


/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | jobs:
10 | 
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v2
15 | 
16 |     - name: Set up Go
17 |       uses: actions/setup-go@v2
18 |       with:
19 |         go-version: 1.15
20 | 
21 |     - name: Build
22 |       run: go build -v ./...
23 | 
24 |     - name: Test
25 |       run: go test -v ./...


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - '*'
 7 | 
 8 | jobs:
 9 |   release:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout # ----------------------------------------------------------------
13 |         uses: actions/checkout@v1
14 |       - name: Set up Go 1.15 # ----------------------------------------------------------
15 |         uses: actions/setup-go@v1
16 |         with:
17 |           go-version: 1.15.11
18 |       - name: Build with xgo # ----------------------------------------------------------
19 |         uses: crazy-max/ghaction-xgo@v1
20 |         with:
21 |           xgo_version: latest
22 |           go_version: 1.15.11
23 |           dest: build
24 |           prefix: gopeaks
25 |           targets: windows/amd64,linux/amd64,darwin/amd64
26 |           v: true
27 |           x: false
28 |           ldflags: -s -w
29 |       - name: Create Release # ----------------------------------------------------------
30 |         uses: actions/create-release@v1.0.0
31 |         id: create_release
32 |         env:
33 |           GITHUB_TOKEN: ${{ secrets.GO_RELEASE_TOKEN }}
34 |         with:
35 |           tag_name: ${{ github.ref }}
36 |           release_name: Release ${{ github.ref }}
37 |           draft: false
38 |           prerelease: false
39 |       - name: Upload Release Assets # ---------------------------------------------------
40 |         uses: glentiki/xbin-release-action@v1.0.0
41 |         env:
42 |           GITHUB_TOKEN: ${{ secrets.GO_RELEASE_TOKEN }}
43 |         with:
44 |           upload_url: ${{ steps.create_release.outputs.upload_url }} # This pulls from the CREATE RELEASE step above, referencing it's ID to get its outputs object, which include a `upload_url`. See this blog post for more info: https://jasonet.co/posts/new-features-of-github-actions/#passing-data-to-future-steps
45 |           assets_path: ./build
46 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.bam
2 | *.bai
3 | gopeaks
4 | *.bed
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Jake VanCampen - Bioinformatics
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Anaconda-Server Badge](https://anaconda.org/bioconda/gopeaks/badges/version.svg)](https://anaconda.org/bioconda/gopeaks)
  2 | [![Anaconda-Server Badge](https://anaconda.org/bioconda/gopeaks/badges/downloads.svg)](https://anaconda.org/bioconda/gopeaks)
  3 | [![Anaconda-Server Badge](https://anaconda.org/bioconda/gopeaks/badges/license.svg)](https://anaconda.org/bioconda/gopeaks)
  4 | [![Go](https://github.com/maxsonBraunLab/gopeaks/actions/workflows/go.yml/badge.svg?branch=main)](https://github.com/maxsonBraunLab/gopeaks/actions/workflows/go.yml)
  5 | ![Maintainer](https://img.shields.io/badge/maintainer-gartician-blue)
  6 | 
  7 | # GoPeaks
  8 | 
  9 | GoPeaks is a peak caller designed for CUT&TAG/CUT&RUN sequencing data. GoPeaks by default works best with narrow peaks such as H3K4me3 and transcription factors. However, broad epigenetic marks like H3K27Ac/H3K4me1 require different the step, slide, and minwidth parameters. We encourage users to explore the parameters of GoPeaks to analyze their data.
 10 | 
 11 | # Configure
 12 | 
 13 | Download the latest release using conda: 
 14 | 
 15 | ```
 16 | conda install -c bioconda -c conda-forge gopeaks
 17 | ```
 18 | 
 19 | Or download binary asset directly from github: 
 20 | 
 21 | ```
 22 | wget -O gopeaks https://github.com/maxsonBraunLab/gopeaks/releases/download/v1.0.0/gopeaks-linux-amd64
 23 | chmod +x gopeaks
 24 | ```
 25 | 
 26 | # Paper
 27 | 
 28 | GoPeaks has been published in Genome Biology on July 4th. If you use our program in your studies, please cite our paper:
 29 | 
 30 | Yashar, W.M., Kong, G., VanCampen, J. et al. GoPeaks: histone modification peak calling for CUT&Tag. Genome Biol 23, 144 (2022). https://doi.org/10.1186/s13059-022-02707-w
 31 | 
 32 | # Example Usage
 33 | 
 34 | ```
 35 | usage: GoPeaks [-h|--help] [-b|--bam "<value>"] [-c|--control "<value>"]
 36 |                [-s|--chromsize "<value>"] [-m|--mdist <integer>] [-r|--minreads
 37 |                <integer>] [-p|--pval <float>] [-t|--step <integer>] [-l|--slide
 38 |                <integer>] [-w|--minwidth <integer>] [-o|--prefix "<value>"]
 39 |                [-v|--version] [--broad] [--verbose]
 40 | 
 41 |                GoPeaks is a peak caller designed for CUT&TAG/CUT&RUN sequencing
 42 |                data. GoPeaks by default works best with narrow peaks such as
 43 |                H3K4me3 and transcription factors. GoPeaks can be used with the
 44 |                "--broad" flag to call broad peaks like H3K27Ac/H3K4me1. We
 45 |                encourage users to explore the parameters of GoPeaks to analyze
 46 |                their data.
 47 | 
 48 | Arguments:
 49 | 
 50 |   -h  --help       Print help information
 51 |   -b  --bam        Input BAM file (must be paired-end reads)
 52 |   -c  --control    Input BAM file with control signal to be normalized (e.g.
 53 |                    IgG, Input)
 54 |   -s  --chromsize  Chromosome sizes for the genome if not found in the bam
 55 |                    header
 56 |   -m  --mdist      Merge peaks within <mdist> base pairs. Default: 1000
 57 |   -r  --minreads   Test genome bins with at least <minreads> read pairs..
 58 |                    Default: 15
 59 |   -p  --pval       Define significance threshold <pval> with multiple
 60 |                    hypothesis correction via Benjamini-Hochberg. Default: 0.05
 61 |   -t  --step       Bin size for coverage bins. Default: 100
 62 |   -l  --slide      Slide size for coverage bins. Default: 50
 63 |   -w  --minwidth   Minimum width (bp) of a peak. Default: 150
 64 |   -o  --prefix     Output prefix to write peaks and metrics file. Default:
 65 |                    sample
 66 |   -v  --version    Print the current GoPeaks version
 67 |       --broad      Run GoPeaks on broad marks (--step 5000 & --slide 1000)
 68 |       --verbose    Run GoPeaks in verbose mode.
 69 | 
 70 | ```
 71 | 
 72 | ## Call peaks on a bam file using an IgG control
 73 | 
 74 | ```
 75 | gopeaks -b <sample>.bam -c <control>.bam -o path/to/gopeaks/<sample>
 76 | ```
 77 | 
 78 | ## Output
 79 | 
 80 | Two output files are generated each with the output prefix ${prefix}, set to "sample" by default.
 81 | 
 82 |     - sample_peaks.bed
 83 |     - sample_gopeaks.json
 84 | 
 85 | ```
 86 | head sample_peaks.bed
 87 | chr1	9950	10550
 88 | chr1	21250	22650
 89 | chr1	96050	97050
 90 | ```
 91 | 
 92 | ```
 93 | cat sample_gopeaks.json
 94 | {
 95 |         "gopeaks_version": "1.0.0",
 96 |         "date": "2022-05-10 3:19:14 PM",
 97 |         "elapsed": "5m20.184082356s",
 98 |         "prefix": "K562_1_H3K4me3",
 99 |         "command": "gopeaks -b K562_1_H3K4me3.bam -c K562_1_IgG.bam",
100 |         "peak_counts": 10329
101 | }
102 | ```
103 | 
104 | ## Recommended parameters
105 | 
106 | | Sequencing Modality                      | Recommended Parameters       |
107 | | ---------------------------------------- | ---------------------------- |
108 | | CUT&TAG or CUT&RUN narrow peaks          | Default parameters           |
109 | | CUT&TAG or CUT&RUN transcription factors | Default parameters           |
110 | | CUT&TAG or CUT&RUN broad peaks           | `--broad` and `--mdist 3000` |
111 | | ATAC-Seq                                 | Default parameters           |
112 | 
113 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module maxsonBraunLab/gopeaks
 2 | 
 3 | go 1.15
 4 | 
 5 | require (
 6 | 	github.com/akamensky/argparse v1.3.1
 7 | 	github.com/go-gota/gota v0.12.0
 8 | 	github.com/pbenner/gonetics v1.0.0
 9 | 	github.com/sirupsen/logrus v1.8.1
10 | 	gonum.org/v1/gonum v0.9.1
11 | )
12 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
 2 | gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8=
 3 | github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
 4 | github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
 5 | github.com/akamensky/argparse v1.3.1 h1:kP6+OyvR0fuBH6UhbE6yh/nskrDEIQgEA1SUXDPjx4g=
 6 | github.com/akamensky/argparse v1.3.1/go.mod h1:S5kwC7IuDcEr5VeXtGPRVZ5o/FdhcMlQz4IZQuw64xA=
 7 | github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
 8 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 9 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
10 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
11 | github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
12 | github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
13 | github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g=
14 | github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks=
15 | github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
16 | github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
17 | github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
18 | github.com/go-gota/gota v0.12.0 h1:T5BDg1hTf5fZ/CO+T/N0E+DDqUhvoKBl+UVckgcAAQg=
19 | github.com/go-gota/gota v0.12.0/go.mod h1:UT+NsWpZC/FhaOyWb9Hui0jXg0Iq8e/YugZHTbyW/34=
20 | github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
21 | github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs=
22 | github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
23 | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
24 | github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
25 | github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
26 | github.com/pbenner/gonetics v1.0.0 h1:OV194PW7cdQJYbVQtSf1G8hZ0VGqCGKijC0M8KgkpIA=
27 | github.com/pbenner/gonetics v1.0.0/go.mod h1:pbCZfdjg2QuXuJJREZjjxsxT7sDF+GBx3EDic75GqE0=
28 | github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY=
29 | github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
30 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
31 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
32 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
33 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
34 | github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
35 | github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
36 | github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
37 | github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w=
38 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
39 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
40 | golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
41 | golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
42 | golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
43 | golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
44 | golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
45 | golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3 h1:n9HxLrNxWWtEb1cA950nuEEj3QnKbtsCJ6KjcgisNUs=
46 | golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE=
47 | golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
48 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
49 | golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
50 | golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
51 | golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
52 | golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
53 | golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
54 | golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
55 | golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
56 | golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
57 | golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
58 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
59 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
60 | golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6 h1:0PC75Fz/kyMGhL0e1QnypqK2kQMqKt9csD1GnMJR+Zk=
61 | golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
62 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
63 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
64 | golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
65 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
66 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
67 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
68 | golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
69 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c=
70 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
71 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
72 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
73 | golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
74 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
75 | golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
76 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
77 | golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
78 | golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
79 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
80 | gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
81 | gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
82 | gonum.org/v1/gonum v0.9.1 h1:HCWmqqNoELL0RAQeKBXWtkp04mGk8koafcB4He6+uhc=
83 | gonum.org/v1/gonum v0.9.1/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
84 | gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=
85 | gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
86 | gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
87 | gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
88 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
89 | 


--------------------------------------------------------------------------------
/gopeaks.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"os"
  7 | 	"regexp"
  8 | 	"sync"
  9 | 	"time"
 10 | 	"math"
 11 | 	"strings"
 12 | 
 13 | 	"github.com/akamensky/argparse"
 14 | 	gn "github.com/pbenner/gonetics"
 15 | 	"github.com/sirupsen/logrus"
 16 | 	"gonum.org/v1/gonum/stat/distuv"
 17 | 	"github.com/go-gota/gota/dataframe"
 18 | 	"github.com/go-gota/gota/series"
 19 | )
 20 | 
 21 | const gopeaks_version = "1.0.0"
 22 | 
 23 | type Metrics struct {
 24 | 	Version string `json:"gopeaks_version"`
 25 | 	Date    string `json:"date"`
 26 | 	Elapsed string `json:"elapsed"`
 27 | 	Prefix  string `json:"prefix"`
 28 | 	Command string `json:"command"`
 29 | 	Peaks   int    `json:"peak_counts"`
 30 | }
 31 | 
 32 | func (m *Metrics) Log(op string) {
 33 | 	resp, err := json.MarshalIndent(m, "", "\t")
 34 | 	if err != nil {
 35 | 		fmt.Println(err)
 36 | 		os.Exit(1)
 37 | 	}
 38 | 
 39 | 	f, err := os.Create(op + "_gopeaks.json")
 40 | 	defer f.Close()
 41 | 	if err != nil {
 42 | 		fmt.Println(err)
 43 | 		os.Exit(1)
 44 | 	}
 45 | 
 46 | 	f.WriteString(string(resp))
 47 | 	f.WriteString("\n")
 48 | }
 49 | 
 50 | func main() {
 51 | 
 52 | 	// start time is what elapsed metric
 53 | 	// is calculated from
 54 | 	startTime := time.Now()
 55 | 
 56 | 	parser := argparse.NewParser("GoPeaks",`GoPeaks is a peak caller designed for CUT&TAG/CUT&RUN sequencing data. GoPeaks by default works best with narrow peaks such as H3K4me3 and transcription factors. GoPeaks can be used with the "--broad" flag to call broad peaks like H3K27Ac/H3K4me1. We encourage users to explore the parameters of GoPeaks to analyze their data.`)
 57 | 	bam := parser.String("b", "bam", &argparse.Options{Help: "Input BAM file (must be paired-end reads)"})
 58 | 	control := parser.String("c", "control", &argparse.Options{Help: "Input BAM file with control signal to be normalized (e.g. IgG, Input)"})
 59 | 	cs := parser.String("s", "chromsize", &argparse.Options{Help: "Chromosome sizes for the genome if not found in the bam header"})
 60 | 	within := parser.Int("m", "mdist", &argparse.Options{Help: "Merge peaks within <mdist> base pairs", Default: 1000})
 61 | 	minreads := parser.Int("r", "minreads", &argparse.Options{Help: "Test genome bins with at least <minreads> read pairs.", Default: 15})
 62 | 	pval := parser.Float("p", "pval", &argparse.Options{Help: "Define significance threshold <pval> with multiple hypothesis correction via Benjamini-Hochberg", Default: 0.05})
 63 | 	step := parser.Int("t", "step", &argparse.Options{Help: "Bin size for coverage bins", Default: 100})
 64 | 	slide := parser.Int("l", "slide", &argparse.Options{Help: "Slide size for coverage bins", Default: 50})
 65 | 	minwidth := parser.Int("w", "minwidth", &argparse.Options{Help: "Minimum width (bp) of a peak", Default: 150})
 66 | 	outprefix := parser.String("o", "prefix", &argparse.Options{Help: "Output prefix to write peaks and metrics file", Default: "sample"})
 67 | 	version := parser.Flag("v", "version", &argparse.Options{Help: "Print the current GoPeaks version"})
 68 | 	broad := parser.Flag("", "broad", &argparse.Options{Help: "Run GoPeaks on broad marks (--step 5000 & --slide 1000)"})
 69 | 	verbose := parser.Flag("", "verbose", &argparse.Options{Help: "Run GoPeaks in verbose mode."})
 70 | 	// note: "Required" interface clashes with --version flag.
 71 | 	err := parser.Parse(os.Args)
 72 | 
 73 | 	// parse flags --------------------------------------------------------------------------------
 74 | 
 75 | 	// check version
 76 | 	if *version == true {
 77 | 		fmt.Println("GoPeaks version:", gopeaks_version)
 78 | 		os.Exit(0)
 79 | 	}
 80 | 
 81 | 	// check argparse errors
 82 | 	if err != nil {
 83 | 		fmt.Print(parser.Usage(err))
 84 | 		os.Exit(1)
 85 | 	}
 86 | 
 87 | 	// require args
 88 | 	if *bam == "" {
 89 | 		fmt.Println(parser.Help(nil))
 90 | 		os.Exit(1)
 91 | 	}
 92 | 
 93 | 	// read bamfile to GRanges
 94 | 	r := gn.GRanges{}
 95 | 	if err := r.ImportBamPairedEnd(*bam, gn.BamReaderOptions{ReadName: false, ReadCigar: false, ReadSequence: false}); err != nil {
 96 | 		logrus.Errorf("Error %s", err.Error())
 97 | 		os.Exit(1)
 98 | 	}
 99 | 
100 | 	g := gn.Genome{}
101 | 	if *cs != "" {
102 | 		err := g.Import(*cs)
103 | 		if err != nil {
104 | 			logrus.Errorln("Failed to import chromsizes file")
105 | 			os.Exit(1)
106 | 		}
107 | 	}
108 | 
109 | 	if *cs == "" {
110 | 		if *verbose {
111 | 			fmt.Println("Reading chromsizes from bam header...")
112 | 		}
113 | 		g, err = gn.BamImportGenome(*bam)
114 | 		if err != nil {
115 | 			fmt.Println("Genome could not be determined from bam file")
116 | 			os.Exit(1)
117 | 		}
118 | 	}
119 | 
120 | 	if *broad == true {
121 | 		x := 5000
122 | 		step = &x
123 | 		y := 1000
124 | 		slide = &y
125 | 	}
126 | 
127 | 	// import data --------------------------------------------------------------------------------
128 | 
129 | 	gf := KnownChroms(&g)
130 | 	fr := r.FilterGenome(gf)
131 | 
132 | 	// calculate coverage
133 | 	binRanges := binGenome(g, *step, *slide)
134 | 	binCounts := countOverlaps(binRanges, fr)
135 | 	nreads := fr.Length()
136 | 
137 | 	// calculate control coverage and subtract signal
138 | 	if *control != "" {
139 | 		c := gn.GRanges{}
140 | 		if err := c.ImportBamPairedEnd(*control, gn.BamReaderOptions{ReadName: false, ReadCigar: false, ReadSequence: false}); err != nil {
141 | 			logrus.Errorf("Error %s", err.Error())
142 | 			os.Exit(1)
143 | 		}
144 | 
145 | 		cr := c.FilterGenome(gf)
146 | 		ctrlCounts := countOverlaps(binRanges, cr)
147 | 		binCounts = normalizeToControl(binCounts, ctrlCounts, fr.Length(), cr.Length())
148 | 	}
149 | 
150 | 	// callpeaks ----------------------------------------------------------------------------------
151 | 	peaks := callpeaks(binCounts, float64(nreads), *within, *minwidth, *minreads, *pval, *outprefix, *verbose)
152 | 
153 | 	outfile := *outprefix + "_peaks.bed"
154 | 	err = peaks.ExportBed3(outfile, false)
155 | 	if err != nil {
156 | 		logrus.Errorln(err)
157 | 	}
158 | 
159 | 	// write output metrics -----------------------------------------------------------------------
160 | 	metrics := &Metrics{
161 | 		Version: gopeaks_version,
162 | 		Date:    time.Now().Format("2006-01-02 3:4:5 PM"),
163 | 		Elapsed: time.Since(startTime).String(),
164 | 		Prefix:  *outprefix,
165 | 		Command:  strings.Join(os.Args, " "),
166 | 		Peaks:   peaks.Length(),
167 | 	}
168 | 
169 | 	// log metrics to file
170 | 	metrics.Log(*outprefix)
171 | }
172 | 
173 | func scaleTreatToControl(counts []float64, s1 []float64, s2 []float64) []float64 {
174 | 	scale := make([]float64, len(s1))
175 | 	d1map := map[int]float64{}
176 | 	for i, s := range s1 {
177 | 		d1map[i] = s
178 | 	}
179 | 	var frac float64
180 | 	for i, o := range s2 {
181 | 		if d1map[i] > 0 {
182 | 			frac = o / d1map[i]
183 | 			if frac > 1 {
184 | 				frac = 1
185 | 			}
186 | 		} else {
187 | 			frac = 1
188 | 		}
189 | 		scale[i] = math.Floor(counts[i] * (1 - frac))
190 | 	}
191 | 	return scale
192 | }
193 | 
194 | func cpm(in []float64, nreads float64) []float64 {
195 | 	var cpm []float64
196 | 	for _, o := range in {
197 | 		num := o * (1e6 / nreads)
198 | 		cpm = append(cpm, num)
199 | 	}
200 | 	return cpm
201 | }
202 | 
203 | func normalizeToControl(treat gn.GRanges, ctrl gn.GRanges, treads, creads int) gn.GRanges {
204 | 	tcounts := treat.GetMeta("overlap_counts").([]float64)
205 | 	ccounts := ctrl.GetMeta("overlap_counts").([]float64)
206 | 
207 | 	// calculate the cpm for each track
208 | 	tcountsNorm := cpm(tcounts, float64(treads))
209 | 	ccountsNorm := cpm(ccounts, float64(creads))
210 | 
211 | 	// scale the treatment
212 | 	// scaled_counts = treat * 1-(control/treat)
213 | 	// NOTE: intervals of 0 signal includes actual 0 bins PLUS where IgG > treatment thx to scaleTreatToControl
214 | 	scale := scaleTreatToControl(tcounts, tcountsNorm, ccountsNorm)
215 | 	treat.AddMeta("overlap_counts", scale)
216 | 	return treat
217 | }
218 | 
219 | func binomialParameters(counts []float64, minreads int) (float64, int, int) {
220 | 
221 | 	// nzSignals = total signal in non-zero bins
222 | 	// nzBins = number of non-zero bins
223 | 	// nTests = number of tests (binCounts > minreads)
224 | 
225 | 	nzSignals := 0.0
226 | 	nzBins := 0
227 | 	nTests := 0
228 | 
229 | 	for i := 0; i < len(counts); i++ {
230 | 		binCounts := float64(counts[i])
231 | 		// a bin can satisfy non-zero signal AND > minreads. This is okay.
232 | 		if binCounts != 0.0 {
233 | 			nzSignals += counts[i]
234 | 			nzBins += 1
235 | 		}
236 | 		if binCounts > float64(minreads) {
237 | 			nTests += 1
238 | 		}
239 | 	}
240 | 
241 | 	return nzSignals, nzBins, nTests
242 | }
243 | 
244 | func callpeaks(coverage gn.GRanges, total float64, within, width, minreads int, pval float64, outprefix string, verbose bool) gn.GRanges {
245 | 
246 | 	// coverage = GRanges of overlap counts in a bin
247 | 	// total = total number of paired-end reads
248 | 
249 | 	ccts := coverage.GetMeta("overlap_counts").([]float64)
250 | 	nzSignals, nzBins, nTests := binomialParameters(ccts, minreads)
251 | 
252 | 	// calculate probability of read in non-zero bin
253 | 	p := (float64(nzSignals) / float64(nzBins)) / float64(total)
254 | 
255 | 	if verbose {
256 | 		fmt.Println("nTests:", nTests)
257 | 		fmt.Println("nzSignals:", nzSignals)
258 | 		fmt.Println("nzBins:", nzBins)
259 | 		fmt.Println("n:", total)
260 | 		fmt.Println("p:", p)
261 | 		fmt.Println("mu:", float64(nzBins) * float64(p))
262 | 		fmt.Println("var:", float64(nzBins) * float64(p) * (1-float64(p)))
263 | 	}
264 | 
265 | 	var keepSlice []int
266 | 	var bins []int
267 | 	var counts []float64
268 | 	var pvals []float64
269 | 
270 | 	nTests = 0
271 | 	for i := 0; i < len(ccts); i++ {
272 | 		cnt := ccts[i]
273 | 		if cnt > float64(minreads) {
274 | 			prob := BinomTest(cnt, total, p)
275 | 			nTests += 1
276 | 			bins = append(bins, i)
277 | 			counts = append(counts, cnt)
278 | 			pvals = append(pvals, prob)
279 | 		}
280 | 	}
281 | 
282 | 	// `pvals` is list of p-values per eligible bin. `pval` is threshold for significance.
283 | 	keepSlice = filterBinsbyFDR(bins, counts, pvals, pval, nTests, outprefix)
284 | 
285 | 	// merge overlapping and nearby peaks -----------------------------------------------
286 | 	binsKeep := coverage.Subset(keepSlice)
287 | 	binsKeepMerge := binsKeep.Merge()
288 | 	peaks := mergeWithin(binsKeepMerge, within)
289 | 	peaksFilt := filterPeakWidth(peaks, width)
290 | 	return peaksFilt
291 | }
292 | 
293 | func filterBinsbyFDR(Bins []int, Counts []float64, Pvals []float64, Threshold float64, Tests int, outprefix string) []int {
294 | 
295 | 	keepBins := []int{}
296 | 
297 | 	// assign rank to each uniq pval
298 | 	// init fdrDF with binID, counts, and pvals.
299 | 	fdrDF := dataframe.New(
300 | 		series.New(Bins, series.Int, "bin"),
301 | 		series.New(Counts, series.Float, "counts"),
302 | 		series.New(Pvals, series.Float, "pval"),
303 | 	)
304 | 	fdrDF = fdrDF.Arrange(dataframe.Sort("pval"))
305 | 	fdrDF = assignRanks(fdrDF)
306 | 
307 | 	// fmt.Println("assigned ranks")
308 | 	// fmt.Println(fdrDF)
309 | 
310 | 	// create new series: [padj, keep].
311 | 	// calculate padj for each pval
312 | 	fdr := series.New([]float64{}, series.Float, "padj")
313 | 	keep := series.New([]int{}, series.Int, "keep")
314 | 
315 | 	for i := 0; i < fdrDF.Nrow(); i++ {
316 | 
317 | 		p := fdrDF.Elem(i, 2).Float()
318 | 		r := fdrDF.Elem(i, 3).Float()
319 | 
320 | 		// ranks came from assignRanks
321 | 		// padj = (n_test * pval) / rank
322 | 		padj := float64(Tests) * float64(p) / float64(r)
323 | 		if padj >= 1 {
324 | 			padj = 1
325 | 		}
326 | 
327 | 		// collect money
328 | 		fdr.Append(padj)
329 | 		if padj < Threshold {
330 | 			keep.Append(1)
331 | 		} else {
332 | 			keep.Append(0)
333 | 		}
334 | 	}
335 | 
336 | 	// create padj and keep columns in the DF
337 | 	fdrDF = fdrDF.
338 | 		Mutate(series.New(fdr, series.Float, "padj")).
339 | 		Mutate(series.New(keep, series.Int, "keep"))
340 | 
341 | 	// filter and return significant peaks
342 | 	fdrDF = fdrDF.Filter(dataframe.F{
343 | 		Colname: "keep",
344 | 		Comparator: series.Eq,
345 | 		Comparando: 1},
346 | 	)
347 | 	for i := 0; i < fdrDF.Nrow(); i++ {
348 | 		sigSlice, _ := fdrDF.Elem(i, 0).Int()
349 | 		keepBins = append(keepBins, sigSlice)
350 | 	}
351 | 
352 | 	// fmt.Println(fdrDF)
353 | 	// fmt.Println(fdrDF.Drop([]int{0, 3, 5}).Describe()) // stat summary all columns except for BinID and keep.
354 | 
355 | 	return keepBins
356 | }
357 | 
358 | func assignRanks(fdrDF dataframe.DataFrame) dataframe.DataFrame {
359 | 
360 | 	// implement smart ranking scheme to account for same pvals
361 | 
362 | 	// assume the pval col is sorted numerically
363 | 	rank := 0
364 | 	rankSeries := series.New([]int{}, series.Int, "rankSeries")
365 | 	pvalMap := make(map[float64]int)
366 | 
367 | 	// create pval:rank map
368 | 	for i := 0; i < fdrDF.Nrow(); i++ {
369 | 		pval := fdrDF.Elem(i, 2).Float()
370 | 		_, ok := pvalMap[pval] // output = value, bool
371 | 		if !ok {
372 | 			rank += 1
373 | 			pvalMap[pval] = rank
374 | 		}
375 | 	}
376 | 
377 | 	// assign rank to rankSeries
378 | 	for i := 0; i < fdrDF.Nrow(); i++ {
379 | 		pval := fdrDF.Elem(i, 2).Float()
380 | 		rankSeries.Append(pvalMap[pval])
381 | 	}
382 | 
383 | 	// add rank column to DF
384 | 	fdrDF = fdrDF.Mutate(series.New(rankSeries, series.Int, "rank"))
385 | 
386 | 	return fdrDF
387 | 
388 | }
389 | 
390 | // filterPeakWidth returns a granges object with ranges having width
391 | // greater than the provided width
392 | func filterPeakWidth(peaks gn.GRanges, width int) gn.GRanges {
393 | 	var keepIdx []int
394 | 	for i := 0; i < len(peaks.Seqnames); i++ {
395 | 		if (peaks.Ranges[i].To - peaks.Ranges[i].From) > width {
396 | 			keepIdx = append(keepIdx, i)
397 | 		}
398 | 	}
399 | 	return peaks.Subset(keepIdx)
400 | }
401 | 
402 | // BinomTest returns the p-value testing the null hypothesis that the
403 | // probability of a positive Bernoulli trial of probability p is p
404 | func BinomTest(count float64, total float64, p float64) float64 {
405 | 	// dev notes: may need to use one-tailed binomial test. we're not interested in bins < expected.
406 | 	dist := distuv.Binomial{N: float64(total) - count, P: p}
407 | 	return dist.Prob(float64(count))
408 | }
409 | 
410 | // MaxIntSlice returns the Max of an []Int
411 | // cast as a float64
412 | func MaxIntSlice(slice []int) float64 {
413 | 	max := 0
414 | 	for _, i := range slice {
415 | 		if max < i {
416 | 			max = i
417 | 		}
418 | 	}
419 | 	return float64(max)
420 | }
421 | 
422 | // merges ranges in obj that are "within" base pairs apart
423 | func mergeWithin(obj gn.GRanges, within int) gn.GRanges {
424 | 
425 | 	out := []gn.Range{}
426 | 	outSeqs := []string{}
427 | 
428 | 	in := obj.Ranges
429 | 	inSeqs := obj.Seqnames
430 | 
431 | 	for i := 0; i < len(in); i++ {
432 | 
433 | 		outLen := len(out)
434 | 		if i == 0 {
435 | 			out = append(out, in[i])
436 | 			outSeqs = append(outSeqs, inSeqs[i])
437 | 			continue
438 | 		}
439 | 
440 | 		if outSeqs[len(outSeqs)-1] == inSeqs[i] {
441 | 			if (out[outLen-1].To + within) >= in[i].From {
442 | 				out[outLen-1].To = in[i].To
443 | 			} else {
444 | 				// append
445 | 				out = append(out, in[i])
446 | 				outSeqs = append(outSeqs, inSeqs[i])
447 | 			}
448 | 		} else {
449 | 			out = append(out, in[i])
450 | 			outSeqs = append(outSeqs, inSeqs[i])
451 | 		}
452 | 	}
453 | 
454 | 	of := []int{}
455 | 	ot := []int{}
456 | 	os := []byte{}
457 | 	for _, r := range out {
458 | 		of = append(of, r.From)
459 | 		ot = append(ot, r.To)
460 | 		os = append(os, '*')
461 | 	}
462 | 	return gn.NewGRanges(outSeqs, of, ot, os)
463 | }
464 | 
465 | // countOverlaps counts the overlapping in r2 and reports them as
466 | // a new metadata column "overlap_counts" on r2
467 | func countOverlaps(r1 gn.GRanges, r2 gn.GRanges) gn.GRanges {
468 | 	s, _ := gn.FindOverlaps(r1, r2)
469 | 	idxMap := map[int]float64{}
470 | 	for i := 0; i < len(s); i++ {
471 | 		idxMap[s[i]] += 1
472 | 	}
473 | 	var olaps []float64
474 | 	for i := 0; i < r1.Length(); i++ {
475 | 		var cnt float64
476 | 		cnt, ok := idxMap[i]
477 | 		if !ok {
478 | 			cnt = 0.0
479 | 		}
480 | 		olaps = append(olaps, cnt)
481 | 	}
482 | 	r1.AddMeta("overlap_counts", olaps)
483 | 	return r1
484 | }
485 | 
486 | func binChrom(genome gn.Genome, chr string, step, slide int) gn.GRanges {
487 | 	var seqnames []string
488 | 	var ranges []gn.Range
489 | 	var strand []byte
490 | 	start := 0
491 | 	len, _ := genome.SeqLength(chr)
492 | 	count := 0
493 | 	for start <= len-step {
494 | 		end := start + step
495 | 		ranges = append(ranges, gn.Range{From: start, To: end})
496 | 		seqnames = append(seqnames, chr)
497 | 		start += slide
498 | 		count += 1
499 | 	}
500 | 
501 | 	strand = make([]byte, count)
502 | 	for i := 0; i < count; i++ {
503 | 		strand[i] = '*'
504 | 	}
505 | 
506 | 	ret := gn.GRanges{
507 | 		Seqnames: seqnames,
508 | 		Ranges:   ranges,
509 | 		Strand:   strand,
510 | 		Meta:     gn.Meta{},
511 | 	}
512 | 
513 | 	return ret
514 | }
515 | 
516 | // bin Result stores the chromosome bin result
517 | // and it's chromosome sort order
518 | type BinnedRangesOrder struct {
519 | 	Order  int
520 | 	Ranges gn.GRanges
521 | }
522 | 
523 | // read results into output channel
524 | func binChromToChan(g gn.Genome, chr string, out chan BinnedRangesOrder, step, slide int) {
525 | 	var res BinnedRangesOrder
526 | 	for i, s := range g.Seqnames {
527 | 		if s == chr {
528 | 			res.Order = i
529 | 			res.Ranges = binChrom(g, chr, step, slide)
530 | 			out <- res
531 | 		}
532 | 	}
533 | }
534 | 
535 | func handleChromBins(input chan BinnedRangesOrder, output chan gn.GRanges, wg *sync.WaitGroup) {
536 | 
537 | 	// parse input into slice
538 | 	var gRes []BinnedRangesOrder
539 | 	for r := range input {
540 | 		gRes = append(gRes, r)
541 | 		wg.Done()
542 | 	}
543 | 
544 | 	var ret gn.GRanges
545 | 	// append to output preserving chr order
546 | 	for i := 0; i < len(gRes); i++ {
547 | 		for _, g := range gRes {
548 | 			if g.Order == i {
549 | 				ret = ret.Append(g.Ranges)
550 | 			}
551 | 		}
552 | 	}
553 | 	output <- ret
554 | }
555 | 
556 | // bin genome into overlapping ranges with step and slide
557 | // bin genome creates coverages for each chromosome in separate go routines
558 | func binGenome(genome gn.Genome, step int, slide int) gn.GRanges {
559 | 	input := make(chan BinnedRangesOrder)
560 | 	output := make(chan gn.GRanges)
561 | 	var wg sync.WaitGroup
562 | 	go handleChromBins(input, output, &wg)
563 | 	defer close(output)
564 | 	for _, chr := range genome.Seqnames {
565 | 		wg.Add(1)
566 | 		go binChromToChan(genome, chr, input, step, slide)
567 | 	}
568 | 
569 | 	wg.Wait()
570 | 	close(input)
571 | 	return <-output
572 | }
573 | 
574 | // filters unknown chromosome names from a strings slice
575 | // using a regex of unwanted string matches
576 | func filterUnkownChroms(start []string) []string {
577 | 	var ret []string
578 | 	filt := `Un|_|EBV|N|M`
579 | 	for _, s := range start {
580 | 		r := regexp.MustCompile(filt)
581 | 		if !r.MatchString(s) {
582 | 			ret = append(ret, s)
583 | 		}
584 | 	}
585 | 	return ret
586 | }
587 | 
588 | // returns a genome of filtered chromosomes
589 | func KnownChroms(genome *gn.Genome) gn.Genome {
590 | 
591 | 	// make map of known seqs
592 | 	knownMap := map[string]bool{}
593 | 	knownSeqs := filterUnkownChroms(genome.Seqnames)
594 | 	for _, s := range knownSeqs {
595 | 		knownMap[s] = true
596 | 	}
597 | 
598 | 	// return new genome with only known chroms
599 | 	seqnames := []string{}
600 | 	lengths := []int{}
601 | 	for i := 0; i < genome.Length(); i++ {
602 | 		if b, _ := knownMap[genome.Seqnames[i]]; b {
603 | 			seqnames = append(seqnames, genome.Seqnames[i])
604 | 			lengths = append(lengths, genome.Lengths[i])
605 | 		}
606 | 	}
607 | 	return gn.NewGenome(seqnames, lengths)
608 | }
609 | 


--------------------------------------------------------------------------------
/gopeaks_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/matryer/is"
  8 | 	gn "github.com/pbenner/gonetics"
  9 | )
 10 | 
 11 | func TestFilterUnknownChroms(t *testing.T) {
 12 | 	is := is.New(t)
 13 | 	chroms := []string{"ChrUn", "chrUn_test", "chrUn_GL000218v1", "chrM", "chr1_blah", "EBV", "chr3", "chr1", "chr4"}
 14 | 	got := filterUnkownChroms(chroms)
 15 | 	want := []string{"chr3", "chr1", "chr4"}
 16 | 	is.Equal(got, want) // should equal
 17 | }
 18 | 
 19 | func TestBinGenome(t *testing.T) {
 20 | 	is := is.New(t)
 21 | 	seqnames := []string{"chr1", "chr2", "chr3", "chr4"}
 22 | 	lengths := []int{1000, 2000, 5000, 300}
 23 | 	g := gn.NewGenome(seqnames, lengths)
 24 | 	bR := binGenome(g, 100, 50)
 25 | 
 26 | 	is.Equal(bR.Seqnames[0], "chr1")                  // first should be chr1
 27 | 	is.Equal(bR.Seqnames[len(bR.Seqnames)-1], "chr4") // last should be chr4
 28 | 
 29 | 	// test that only the four chroms
 30 | 	set := make(map[string]bool)
 31 | 	for _, k := range bR.Seqnames {
 32 | 		set[k] = true
 33 | 	}
 34 | 	is.Equal(len(set), 4) // should be only four chroms
 35 | }
 36 | 
 37 | func TestCountOverlaps(t *testing.T) {
 38 | 	is := is.New(t)
 39 | 	r1 := gn.NewGRanges(
 40 | 		[]string{"chr1", "chr1", "chr1", "chr1"},
 41 | 		[]int{1, 80, 90, 9},
 42 | 		[]int{20, 100, 110, 40},
 43 | 		[]byte{'*', '*', '*', '*'})
 44 | 
 45 | 	r2 := gn.NewGRanges(
 46 | 		[]string{"chr1", "chr1", "chr1"},
 47 | 		[]int{4, 8, 90},
 48 | 		[]int{8, 10, 95},
 49 | 		[]byte{'*', '*', '*'})
 50 | 	rCts := countOverlaps(r1, r2)
 51 | 	dat := rCts.MetaData[0].([]float64)
 52 | 	is.Equal(len(dat), 4)
 53 | }
 54 | 
 55 | func TestNormalizeTocontrolCounts(t *testing.T) {
 56 | 	r1 := gn.NewGRanges(
 57 | 		[]string{"chr1", "chr1", "chr1", "chr1"},
 58 | 		[]int{1, 30, 50, 70},
 59 | 		[]int{20, 40, 60, 80},
 60 | 		[]byte{'*', '*', '*', '*'})
 61 | 
 62 | 	r2 := gn.NewGRanges(
 63 | 		[]string{"chr1", "chr1", "chr1", "chr1"},
 64 | 		[]int{1, 30, 50, 70},
 65 | 		[]int{20, 40, 60, 80},
 66 | 		[]byte{'*', '*', '*', '*'})
 67 | 
 68 | 	r1Cov := []float64{3.3, 4.5, 6.5, 1.1}
 69 | 	r2Cov := []float64{2.3, 5.5, 2.5, 1.5}
 70 | 	r1.AddMeta("overlap_counts", r1Cov)
 71 | 	r2.AddMeta("overlap_counts", r2Cov)
 72 | 
 73 | 	norm := normalizeToControl(r1, r2, r1.Length(), r2.Length())
 74 | 	fmt.Println(norm)
 75 | }
 76 | 
 77 | func TestMaxIntSlice(t *testing.T) {
 78 | 	is := is.New(t)
 79 | 	tslice := []int{1, 2, 3, 4, 5, 5, 7, 8, 0, 10, 300}
 80 | 	want := float64(300)
 81 | 	got := MaxIntSlice(tslice)
 82 | 	is.Equal(got, want) // should equal 300
 83 | }
 84 | 
 85 | func TestMergeWithin(t *testing.T) {
 86 | 	is := is.New(t)
 87 | 	r1 := gn.NewGRanges(
 88 | 		[]string{"chr1", "chr1", "chr1", "chr1", "chr1", "chr1", "chr2", "chr2", "chr2"},
 89 | 		[]int{1, 80, 90, 9, 400, 550, 3, 30, 80},
 90 | 		[]int{20, 100, 110, 40, 500, 600, 10, 40, 90},
 91 | 		[]byte{'*', '*', '*', '*', '*', '*', '*', '*', '*'})
 92 | 	want := gn.NewGRanges(
 93 | 		[]string{"chr1", "chr1", "chr2"},
 94 | 		[]int{1, 40, 3},
 95 | 		[]int{400, 600, 90},
 96 | 		[]byte{'*', '*', '*'})
 97 | 	got := mergeWithin(r1, 150)
 98 | 	is.True(got.Ranges[0].From == want.Ranges[0].From)
 99 | 	is.True(got.Ranges[2].From == want.Ranges[2].From)
100 | 	is.True(got.Seqnames[2] == want.Seqnames[2])
101 | }
102 | 
103 | func TestBinomTest(t *testing.T) {
104 | 	fl := BinomTest(4.5, 50, 0.5)
105 | 	fmt.Println(fl)
106 | }
107 | 


--------------------------------------------------------------------------------