├── .github └── workflows │ ├── go.yml │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── go.mod ├── go.sum ├── gopeaks.go └── gopeaks_test.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v2 18 | with: 19 | go-version: 1.15 20 | 21 | - name: Build 22 | run: go build -v ./... 23 | 24 | - name: Test 25 | run: go test -v ./... -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - '*' 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout # ---------------------------------------------------------------- 13 | uses: actions/checkout@v1 14 | - name: Set up Go 1.15 # ---------------------------------------------------------- 15 | uses: actions/setup-go@v1 16 | with: 17 | go-version: 1.15.11 18 | - name: Build with xgo # ---------------------------------------------------------- 19 | uses: crazy-max/ghaction-xgo@v1 20 | with: 21 | xgo_version: latest 22 | go_version: 1.15.11 23 | dest: build 24 | prefix: gopeaks 25 | targets: windows/amd64,linux/amd64,darwin/amd64 26 | v: true 27 | x: false 28 | ldflags: -s -w 29 | - name: Create Release # ---------------------------------------------------------- 30 | uses: actions/create-release@v1.0.0 31 | id: create_release 32 | env: 33 | GITHUB_TOKEN: ${{ secrets.GO_RELEASE_TOKEN }} 34 | with: 35 | tag_name: ${{ github.ref }} 36 | release_name: Release ${{ github.ref }} 37 | draft: false 38 | prerelease: false 39 | - name: Upload Release Assets # --------------------------------------------------- 40 | uses: glentiki/xbin-release-action@v1.0.0 41 | env: 42 | GITHUB_TOKEN: ${{ secrets.GO_RELEASE_TOKEN }} 43 | with: 44 | upload_url: ${{ steps.create_release.outputs.upload_url }} # This pulls from the CREATE RELEASE step above, referencing it's ID to get its outputs object, which include a `upload_url`. See this blog post for more info: https://jasonet.co/posts/new-features-of-github-actions/#passing-data-to-future-steps 45 | assets_path: ./build 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.bam 2 | *.bai 3 | gopeaks 4 | *.bed 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jake VanCampen - Bioinformatics 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Anaconda-Server Badge](https://anaconda.org/bioconda/gopeaks/badges/version.svg)](https://anaconda.org/bioconda/gopeaks) 2 | [![Anaconda-Server Badge](https://anaconda.org/bioconda/gopeaks/badges/downloads.svg)](https://anaconda.org/bioconda/gopeaks) 3 | [![Anaconda-Server Badge](https://anaconda.org/bioconda/gopeaks/badges/license.svg)](https://anaconda.org/bioconda/gopeaks) 4 | [![Go](https://github.com/maxsonBraunLab/gopeaks/actions/workflows/go.yml/badge.svg?branch=main)](https://github.com/maxsonBraunLab/gopeaks/actions/workflows/go.yml) 5 | ![Maintainer](https://img.shields.io/badge/maintainer-gartician-blue) 6 | 7 | # GoPeaks 8 | 9 | GoPeaks is a peak caller designed for CUT&TAG/CUT&RUN sequencing data. GoPeaks by default works best with narrow peaks such as H3K4me3 and transcription factors. However, broad epigenetic marks like H3K27Ac/H3K4me1 require different the step, slide, and minwidth parameters. We encourage users to explore the parameters of GoPeaks to analyze their data. 10 | 11 | # Configure 12 | 13 | Download the latest release using conda: 14 | 15 | ``` 16 | conda install -c bioconda -c conda-forge gopeaks 17 | ``` 18 | 19 | Or download binary asset directly from github: 20 | 21 | ``` 22 | wget -O gopeaks https://github.com/maxsonBraunLab/gopeaks/releases/download/v1.0.0/gopeaks-linux-amd64 23 | chmod +x gopeaks 24 | ``` 25 | 26 | # Paper 27 | 28 | GoPeaks has been published in Genome Biology on July 4th. If you use our program in your studies, please cite our paper: 29 | 30 | Yashar, W.M., Kong, G., VanCampen, J. et al. GoPeaks: histone modification peak calling for CUT&Tag. Genome Biol 23, 144 (2022). https://doi.org/10.1186/s13059-022-02707-w 31 | 32 | # Example Usage 33 | 34 | ``` 35 | usage: GoPeaks [-h|--help] [-b|--bam ""] [-c|--control ""] 36 | [-s|--chromsize ""] [-m|--mdist ] [-r|--minreads 37 | ] [-p|--pval ] [-t|--step ] [-l|--slide 38 | ] [-w|--minwidth ] [-o|--prefix ""] 39 | [-v|--version] [--broad] [--verbose] 40 | 41 | GoPeaks is a peak caller designed for CUT&TAG/CUT&RUN sequencing 42 | data. GoPeaks by default works best with narrow peaks such as 43 | H3K4me3 and transcription factors. GoPeaks can be used with the 44 | "--broad" flag to call broad peaks like H3K27Ac/H3K4me1. We 45 | encourage users to explore the parameters of GoPeaks to analyze 46 | their data. 47 | 48 | Arguments: 49 | 50 | -h --help Print help information 51 | -b --bam Input BAM file (must be paired-end reads) 52 | -c --control Input BAM file with control signal to be normalized (e.g. 53 | IgG, Input) 54 | -s --chromsize Chromosome sizes for the genome if not found in the bam 55 | header 56 | -m --mdist Merge peaks within base pairs. Default: 1000 57 | -r --minreads Test genome bins with at least read pairs.. 58 | Default: 15 59 | -p --pval Define significance threshold with multiple 60 | hypothesis correction via Benjamini-Hochberg. Default: 0.05 61 | -t --step Bin size for coverage bins. Default: 100 62 | -l --slide Slide size for coverage bins. Default: 50 63 | -w --minwidth Minimum width (bp) of a peak. Default: 150 64 | -o --prefix Output prefix to write peaks and metrics file. Default: 65 | sample 66 | -v --version Print the current GoPeaks version 67 | --broad Run GoPeaks on broad marks (--step 5000 & --slide 1000) 68 | --verbose Run GoPeaks in verbose mode. 69 | 70 | ``` 71 | 72 | ## Call peaks on a bam file using an IgG control 73 | 74 | ``` 75 | gopeaks -b .bam -c .bam -o path/to/gopeaks/ 76 | ``` 77 | 78 | ## Output 79 | 80 | Two output files are generated each with the output prefix ${prefix}, set to "sample" by default. 81 | 82 | - sample_peaks.bed 83 | - sample_gopeaks.json 84 | 85 | ``` 86 | head sample_peaks.bed 87 | chr1 9950 10550 88 | chr1 21250 22650 89 | chr1 96050 97050 90 | ``` 91 | 92 | ``` 93 | cat sample_gopeaks.json 94 | { 95 | "gopeaks_version": "1.0.0", 96 | "date": "2022-05-10 3:19:14 PM", 97 | "elapsed": "5m20.184082356s", 98 | "prefix": "K562_1_H3K4me3", 99 | "command": "gopeaks -b K562_1_H3K4me3.bam -c K562_1_IgG.bam", 100 | "peak_counts": 10329 101 | } 102 | ``` 103 | 104 | ## Recommended parameters 105 | 106 | | Sequencing Modality | Recommended Parameters | 107 | | ---------------------------------------- | ---------------------------- | 108 | | CUT&TAG or CUT&RUN narrow peaks | Default parameters | 109 | | CUT&TAG or CUT&RUN transcription factors | Default parameters | 110 | | CUT&TAG or CUT&RUN broad peaks | `--broad` and `--mdist 3000` | 111 | | ATAC-Seq | Default parameters | 112 | 113 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module maxsonBraunLab/gopeaks 2 | 3 | go 1.15 4 | 5 | require ( 6 | github.com/akamensky/argparse v1.3.1 7 | github.com/go-gota/gota v0.12.0 8 | github.com/pbenner/gonetics v1.0.0 9 | github.com/sirupsen/logrus v1.8.1 10 | gonum.org/v1/gonum v0.9.1 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= 2 | gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= 3 | github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= 4 | github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= 5 | github.com/akamensky/argparse v1.3.1 h1:kP6+OyvR0fuBH6UhbE6yh/nskrDEIQgEA1SUXDPjx4g= 6 | github.com/akamensky/argparse v1.3.1/go.mod h1:S5kwC7IuDcEr5VeXtGPRVZ5o/FdhcMlQz4IZQuw64xA= 7 | github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= 8 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 9 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 10 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 11 | github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= 12 | github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= 13 | github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g= 14 | github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks= 15 | github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= 16 | github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY= 17 | github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= 18 | github.com/go-gota/gota v0.12.0 h1:T5BDg1hTf5fZ/CO+T/N0E+DDqUhvoKBl+UVckgcAAQg= 19 | github.com/go-gota/gota v0.12.0/go.mod h1:UT+NsWpZC/FhaOyWb9Hui0jXg0Iq8e/YugZHTbyW/34= 20 | github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U= 21 | github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gGcHOs= 22 | github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= 23 | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= 24 | github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= 25 | github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= 26 | github.com/pbenner/gonetics v1.0.0 h1:OV194PW7cdQJYbVQtSf1G8hZ0VGqCGKijC0M8KgkpIA= 27 | github.com/pbenner/gonetics v1.0.0/go.mod h1:pbCZfdjg2QuXuJJREZjjxsxT7sDF+GBx3EDic75GqE0= 28 | github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= 29 | github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= 30 | github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 31 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 32 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 33 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 34 | github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= 35 | github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE= 36 | github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= 37 | github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= 38 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 39 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 40 | golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 41 | golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 42 | golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 43 | golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 44 | golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 45 | golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3 h1:n9HxLrNxWWtEb1cA950nuEEj3QnKbtsCJ6KjcgisNUs= 46 | golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE= 47 | golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= 48 | golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= 49 | golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 50 | golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 51 | golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 52 | golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 53 | golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 54 | golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 55 | golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= 56 | golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= 57 | golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= 58 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 59 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 60 | golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6 h1:0PC75Fz/kyMGhL0e1QnypqK2kQMqKt9csD1GnMJR+Zk= 61 | golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= 62 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 63 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 64 | golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 65 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 66 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 67 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 68 | golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 69 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da h1:b3NXsE2LusjYGGjL5bxEVZZORm/YEFFrWFjR8eFrw/c= 70 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 71 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 72 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 73 | golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 74 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 75 | golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 76 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 77 | golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 78 | golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 79 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 80 | gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= 81 | gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= 82 | gonum.org/v1/gonum v0.9.1 h1:HCWmqqNoELL0RAQeKBXWtkp04mGk8koafcB4He6+uhc= 83 | gonum.org/v1/gonum v0.9.1/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= 84 | gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc= 85 | gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= 86 | gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= 87 | gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= 88 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 89 | -------------------------------------------------------------------------------- /gopeaks.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "os" 7 | "regexp" 8 | "sync" 9 | "time" 10 | "math" 11 | "strings" 12 | 13 | "github.com/akamensky/argparse" 14 | gn "github.com/pbenner/gonetics" 15 | "github.com/sirupsen/logrus" 16 | "gonum.org/v1/gonum/stat/distuv" 17 | "github.com/go-gota/gota/dataframe" 18 | "github.com/go-gota/gota/series" 19 | ) 20 | 21 | const gopeaks_version = "1.0.0" 22 | 23 | type Metrics struct { 24 | Version string `json:"gopeaks_version"` 25 | Date string `json:"date"` 26 | Elapsed string `json:"elapsed"` 27 | Prefix string `json:"prefix"` 28 | Command string `json:"command"` 29 | Peaks int `json:"peak_counts"` 30 | } 31 | 32 | func (m *Metrics) Log(op string) { 33 | resp, err := json.MarshalIndent(m, "", "\t") 34 | if err != nil { 35 | fmt.Println(err) 36 | os.Exit(1) 37 | } 38 | 39 | f, err := os.Create(op + "_gopeaks.json") 40 | defer f.Close() 41 | if err != nil { 42 | fmt.Println(err) 43 | os.Exit(1) 44 | } 45 | 46 | f.WriteString(string(resp)) 47 | f.WriteString("\n") 48 | } 49 | 50 | func main() { 51 | 52 | // start time is what elapsed metric 53 | // is calculated from 54 | startTime := time.Now() 55 | 56 | parser := argparse.NewParser("GoPeaks",`GoPeaks is a peak caller designed for CUT&TAG/CUT&RUN sequencing data. GoPeaks by default works best with narrow peaks such as H3K4me3 and transcription factors. GoPeaks can be used with the "--broad" flag to call broad peaks like H3K27Ac/H3K4me1. We encourage users to explore the parameters of GoPeaks to analyze their data.`) 57 | bam := parser.String("b", "bam", &argparse.Options{Help: "Input BAM file (must be paired-end reads)"}) 58 | control := parser.String("c", "control", &argparse.Options{Help: "Input BAM file with control signal to be normalized (e.g. IgG, Input)"}) 59 | cs := parser.String("s", "chromsize", &argparse.Options{Help: "Chromosome sizes for the genome if not found in the bam header"}) 60 | within := parser.Int("m", "mdist", &argparse.Options{Help: "Merge peaks within base pairs", Default: 1000}) 61 | minreads := parser.Int("r", "minreads", &argparse.Options{Help: "Test genome bins with at least read pairs.", Default: 15}) 62 | pval := parser.Float("p", "pval", &argparse.Options{Help: "Define significance threshold with multiple hypothesis correction via Benjamini-Hochberg", Default: 0.05}) 63 | step := parser.Int("t", "step", &argparse.Options{Help: "Bin size for coverage bins", Default: 100}) 64 | slide := parser.Int("l", "slide", &argparse.Options{Help: "Slide size for coverage bins", Default: 50}) 65 | minwidth := parser.Int("w", "minwidth", &argparse.Options{Help: "Minimum width (bp) of a peak", Default: 150}) 66 | outprefix := parser.String("o", "prefix", &argparse.Options{Help: "Output prefix to write peaks and metrics file", Default: "sample"}) 67 | version := parser.Flag("v", "version", &argparse.Options{Help: "Print the current GoPeaks version"}) 68 | broad := parser.Flag("", "broad", &argparse.Options{Help: "Run GoPeaks on broad marks (--step 5000 & --slide 1000)"}) 69 | verbose := parser.Flag("", "verbose", &argparse.Options{Help: "Run GoPeaks in verbose mode."}) 70 | // note: "Required" interface clashes with --version flag. 71 | err := parser.Parse(os.Args) 72 | 73 | // parse flags -------------------------------------------------------------------------------- 74 | 75 | // check version 76 | if *version == true { 77 | fmt.Println("GoPeaks version:", gopeaks_version) 78 | os.Exit(0) 79 | } 80 | 81 | // check argparse errors 82 | if err != nil { 83 | fmt.Print(parser.Usage(err)) 84 | os.Exit(1) 85 | } 86 | 87 | // require args 88 | if *bam == "" { 89 | fmt.Println(parser.Help(nil)) 90 | os.Exit(1) 91 | } 92 | 93 | // read bamfile to GRanges 94 | r := gn.GRanges{} 95 | if err := r.ImportBamPairedEnd(*bam, gn.BamReaderOptions{ReadName: false, ReadCigar: false, ReadSequence: false}); err != nil { 96 | logrus.Errorf("Error %s", err.Error()) 97 | os.Exit(1) 98 | } 99 | 100 | g := gn.Genome{} 101 | if *cs != "" { 102 | err := g.Import(*cs) 103 | if err != nil { 104 | logrus.Errorln("Failed to import chromsizes file") 105 | os.Exit(1) 106 | } 107 | } 108 | 109 | if *cs == "" { 110 | if *verbose { 111 | fmt.Println("Reading chromsizes from bam header...") 112 | } 113 | g, err = gn.BamImportGenome(*bam) 114 | if err != nil { 115 | fmt.Println("Genome could not be determined from bam file") 116 | os.Exit(1) 117 | } 118 | } 119 | 120 | if *broad == true { 121 | x := 5000 122 | step = &x 123 | y := 1000 124 | slide = &y 125 | } 126 | 127 | // import data -------------------------------------------------------------------------------- 128 | 129 | gf := KnownChroms(&g) 130 | fr := r.FilterGenome(gf) 131 | 132 | // calculate coverage 133 | binRanges := binGenome(g, *step, *slide) 134 | binCounts := countOverlaps(binRanges, fr) 135 | nreads := fr.Length() 136 | 137 | // calculate control coverage and subtract signal 138 | if *control != "" { 139 | c := gn.GRanges{} 140 | if err := c.ImportBamPairedEnd(*control, gn.BamReaderOptions{ReadName: false, ReadCigar: false, ReadSequence: false}); err != nil { 141 | logrus.Errorf("Error %s", err.Error()) 142 | os.Exit(1) 143 | } 144 | 145 | cr := c.FilterGenome(gf) 146 | ctrlCounts := countOverlaps(binRanges, cr) 147 | binCounts = normalizeToControl(binCounts, ctrlCounts, fr.Length(), cr.Length()) 148 | } 149 | 150 | // callpeaks ---------------------------------------------------------------------------------- 151 | peaks := callpeaks(binCounts, float64(nreads), *within, *minwidth, *minreads, *pval, *outprefix, *verbose) 152 | 153 | outfile := *outprefix + "_peaks.bed" 154 | err = peaks.ExportBed3(outfile, false) 155 | if err != nil { 156 | logrus.Errorln(err) 157 | } 158 | 159 | // write output metrics ----------------------------------------------------------------------- 160 | metrics := &Metrics{ 161 | Version: gopeaks_version, 162 | Date: time.Now().Format("2006-01-02 3:4:5 PM"), 163 | Elapsed: time.Since(startTime).String(), 164 | Prefix: *outprefix, 165 | Command: strings.Join(os.Args, " "), 166 | Peaks: peaks.Length(), 167 | } 168 | 169 | // log metrics to file 170 | metrics.Log(*outprefix) 171 | } 172 | 173 | func scaleTreatToControl(counts []float64, s1 []float64, s2 []float64) []float64 { 174 | scale := make([]float64, len(s1)) 175 | d1map := map[int]float64{} 176 | for i, s := range s1 { 177 | d1map[i] = s 178 | } 179 | var frac float64 180 | for i, o := range s2 { 181 | if d1map[i] > 0 { 182 | frac = o / d1map[i] 183 | if frac > 1 { 184 | frac = 1 185 | } 186 | } else { 187 | frac = 1 188 | } 189 | scale[i] = math.Floor(counts[i] * (1 - frac)) 190 | } 191 | return scale 192 | } 193 | 194 | func cpm(in []float64, nreads float64) []float64 { 195 | var cpm []float64 196 | for _, o := range in { 197 | num := o * (1e6 / nreads) 198 | cpm = append(cpm, num) 199 | } 200 | return cpm 201 | } 202 | 203 | func normalizeToControl(treat gn.GRanges, ctrl gn.GRanges, treads, creads int) gn.GRanges { 204 | tcounts := treat.GetMeta("overlap_counts").([]float64) 205 | ccounts := ctrl.GetMeta("overlap_counts").([]float64) 206 | 207 | // calculate the cpm for each track 208 | tcountsNorm := cpm(tcounts, float64(treads)) 209 | ccountsNorm := cpm(ccounts, float64(creads)) 210 | 211 | // scale the treatment 212 | // scaled_counts = treat * 1-(control/treat) 213 | // NOTE: intervals of 0 signal includes actual 0 bins PLUS where IgG > treatment thx to scaleTreatToControl 214 | scale := scaleTreatToControl(tcounts, tcountsNorm, ccountsNorm) 215 | treat.AddMeta("overlap_counts", scale) 216 | return treat 217 | } 218 | 219 | func binomialParameters(counts []float64, minreads int) (float64, int, int) { 220 | 221 | // nzSignals = total signal in non-zero bins 222 | // nzBins = number of non-zero bins 223 | // nTests = number of tests (binCounts > minreads) 224 | 225 | nzSignals := 0.0 226 | nzBins := 0 227 | nTests := 0 228 | 229 | for i := 0; i < len(counts); i++ { 230 | binCounts := float64(counts[i]) 231 | // a bin can satisfy non-zero signal AND > minreads. This is okay. 232 | if binCounts != 0.0 { 233 | nzSignals += counts[i] 234 | nzBins += 1 235 | } 236 | if binCounts > float64(minreads) { 237 | nTests += 1 238 | } 239 | } 240 | 241 | return nzSignals, nzBins, nTests 242 | } 243 | 244 | func callpeaks(coverage gn.GRanges, total float64, within, width, minreads int, pval float64, outprefix string, verbose bool) gn.GRanges { 245 | 246 | // coverage = GRanges of overlap counts in a bin 247 | // total = total number of paired-end reads 248 | 249 | ccts := coverage.GetMeta("overlap_counts").([]float64) 250 | nzSignals, nzBins, nTests := binomialParameters(ccts, minreads) 251 | 252 | // calculate probability of read in non-zero bin 253 | p := (float64(nzSignals) / float64(nzBins)) / float64(total) 254 | 255 | if verbose { 256 | fmt.Println("nTests:", nTests) 257 | fmt.Println("nzSignals:", nzSignals) 258 | fmt.Println("nzBins:", nzBins) 259 | fmt.Println("n:", total) 260 | fmt.Println("p:", p) 261 | fmt.Println("mu:", float64(nzBins) * float64(p)) 262 | fmt.Println("var:", float64(nzBins) * float64(p) * (1-float64(p))) 263 | } 264 | 265 | var keepSlice []int 266 | var bins []int 267 | var counts []float64 268 | var pvals []float64 269 | 270 | nTests = 0 271 | for i := 0; i < len(ccts); i++ { 272 | cnt := ccts[i] 273 | if cnt > float64(minreads) { 274 | prob := BinomTest(cnt, total, p) 275 | nTests += 1 276 | bins = append(bins, i) 277 | counts = append(counts, cnt) 278 | pvals = append(pvals, prob) 279 | } 280 | } 281 | 282 | // `pvals` is list of p-values per eligible bin. `pval` is threshold for significance. 283 | keepSlice = filterBinsbyFDR(bins, counts, pvals, pval, nTests, outprefix) 284 | 285 | // merge overlapping and nearby peaks ----------------------------------------------- 286 | binsKeep := coverage.Subset(keepSlice) 287 | binsKeepMerge := binsKeep.Merge() 288 | peaks := mergeWithin(binsKeepMerge, within) 289 | peaksFilt := filterPeakWidth(peaks, width) 290 | return peaksFilt 291 | } 292 | 293 | func filterBinsbyFDR(Bins []int, Counts []float64, Pvals []float64, Threshold float64, Tests int, outprefix string) []int { 294 | 295 | keepBins := []int{} 296 | 297 | // assign rank to each uniq pval 298 | // init fdrDF with binID, counts, and pvals. 299 | fdrDF := dataframe.New( 300 | series.New(Bins, series.Int, "bin"), 301 | series.New(Counts, series.Float, "counts"), 302 | series.New(Pvals, series.Float, "pval"), 303 | ) 304 | fdrDF = fdrDF.Arrange(dataframe.Sort("pval")) 305 | fdrDF = assignRanks(fdrDF) 306 | 307 | // fmt.Println("assigned ranks") 308 | // fmt.Println(fdrDF) 309 | 310 | // create new series: [padj, keep]. 311 | // calculate padj for each pval 312 | fdr := series.New([]float64{}, series.Float, "padj") 313 | keep := series.New([]int{}, series.Int, "keep") 314 | 315 | for i := 0; i < fdrDF.Nrow(); i++ { 316 | 317 | p := fdrDF.Elem(i, 2).Float() 318 | r := fdrDF.Elem(i, 3).Float() 319 | 320 | // ranks came from assignRanks 321 | // padj = (n_test * pval) / rank 322 | padj := float64(Tests) * float64(p) / float64(r) 323 | if padj >= 1 { 324 | padj = 1 325 | } 326 | 327 | // collect money 328 | fdr.Append(padj) 329 | if padj < Threshold { 330 | keep.Append(1) 331 | } else { 332 | keep.Append(0) 333 | } 334 | } 335 | 336 | // create padj and keep columns in the DF 337 | fdrDF = fdrDF. 338 | Mutate(series.New(fdr, series.Float, "padj")). 339 | Mutate(series.New(keep, series.Int, "keep")) 340 | 341 | // filter and return significant peaks 342 | fdrDF = fdrDF.Filter(dataframe.F{ 343 | Colname: "keep", 344 | Comparator: series.Eq, 345 | Comparando: 1}, 346 | ) 347 | for i := 0; i < fdrDF.Nrow(); i++ { 348 | sigSlice, _ := fdrDF.Elem(i, 0).Int() 349 | keepBins = append(keepBins, sigSlice) 350 | } 351 | 352 | // fmt.Println(fdrDF) 353 | // fmt.Println(fdrDF.Drop([]int{0, 3, 5}).Describe()) // stat summary all columns except for BinID and keep. 354 | 355 | return keepBins 356 | } 357 | 358 | func assignRanks(fdrDF dataframe.DataFrame) dataframe.DataFrame { 359 | 360 | // implement smart ranking scheme to account for same pvals 361 | 362 | // assume the pval col is sorted numerically 363 | rank := 0 364 | rankSeries := series.New([]int{}, series.Int, "rankSeries") 365 | pvalMap := make(map[float64]int) 366 | 367 | // create pval:rank map 368 | for i := 0; i < fdrDF.Nrow(); i++ { 369 | pval := fdrDF.Elem(i, 2).Float() 370 | _, ok := pvalMap[pval] // output = value, bool 371 | if !ok { 372 | rank += 1 373 | pvalMap[pval] = rank 374 | } 375 | } 376 | 377 | // assign rank to rankSeries 378 | for i := 0; i < fdrDF.Nrow(); i++ { 379 | pval := fdrDF.Elem(i, 2).Float() 380 | rankSeries.Append(pvalMap[pval]) 381 | } 382 | 383 | // add rank column to DF 384 | fdrDF = fdrDF.Mutate(series.New(rankSeries, series.Int, "rank")) 385 | 386 | return fdrDF 387 | 388 | } 389 | 390 | // filterPeakWidth returns a granges object with ranges having width 391 | // greater than the provided width 392 | func filterPeakWidth(peaks gn.GRanges, width int) gn.GRanges { 393 | var keepIdx []int 394 | for i := 0; i < len(peaks.Seqnames); i++ { 395 | if (peaks.Ranges[i].To - peaks.Ranges[i].From) > width { 396 | keepIdx = append(keepIdx, i) 397 | } 398 | } 399 | return peaks.Subset(keepIdx) 400 | } 401 | 402 | // BinomTest returns the p-value testing the null hypothesis that the 403 | // probability of a positive Bernoulli trial of probability p is p 404 | func BinomTest(count float64, total float64, p float64) float64 { 405 | // dev notes: may need to use one-tailed binomial test. we're not interested in bins < expected. 406 | dist := distuv.Binomial{N: float64(total) - count, P: p} 407 | return dist.Prob(float64(count)) 408 | } 409 | 410 | // MaxIntSlice returns the Max of an []Int 411 | // cast as a float64 412 | func MaxIntSlice(slice []int) float64 { 413 | max := 0 414 | for _, i := range slice { 415 | if max < i { 416 | max = i 417 | } 418 | } 419 | return float64(max) 420 | } 421 | 422 | // merges ranges in obj that are "within" base pairs apart 423 | func mergeWithin(obj gn.GRanges, within int) gn.GRanges { 424 | 425 | out := []gn.Range{} 426 | outSeqs := []string{} 427 | 428 | in := obj.Ranges 429 | inSeqs := obj.Seqnames 430 | 431 | for i := 0; i < len(in); i++ { 432 | 433 | outLen := len(out) 434 | if i == 0 { 435 | out = append(out, in[i]) 436 | outSeqs = append(outSeqs, inSeqs[i]) 437 | continue 438 | } 439 | 440 | if outSeqs[len(outSeqs)-1] == inSeqs[i] { 441 | if (out[outLen-1].To + within) >= in[i].From { 442 | out[outLen-1].To = in[i].To 443 | } else { 444 | // append 445 | out = append(out, in[i]) 446 | outSeqs = append(outSeqs, inSeqs[i]) 447 | } 448 | } else { 449 | out = append(out, in[i]) 450 | outSeqs = append(outSeqs, inSeqs[i]) 451 | } 452 | } 453 | 454 | of := []int{} 455 | ot := []int{} 456 | os := []byte{} 457 | for _, r := range out { 458 | of = append(of, r.From) 459 | ot = append(ot, r.To) 460 | os = append(os, '*') 461 | } 462 | return gn.NewGRanges(outSeqs, of, ot, os) 463 | } 464 | 465 | // countOverlaps counts the overlapping in r2 and reports them as 466 | // a new metadata column "overlap_counts" on r2 467 | func countOverlaps(r1 gn.GRanges, r2 gn.GRanges) gn.GRanges { 468 | s, _ := gn.FindOverlaps(r1, r2) 469 | idxMap := map[int]float64{} 470 | for i := 0; i < len(s); i++ { 471 | idxMap[s[i]] += 1 472 | } 473 | var olaps []float64 474 | for i := 0; i < r1.Length(); i++ { 475 | var cnt float64 476 | cnt, ok := idxMap[i] 477 | if !ok { 478 | cnt = 0.0 479 | } 480 | olaps = append(olaps, cnt) 481 | } 482 | r1.AddMeta("overlap_counts", olaps) 483 | return r1 484 | } 485 | 486 | func binChrom(genome gn.Genome, chr string, step, slide int) gn.GRanges { 487 | var seqnames []string 488 | var ranges []gn.Range 489 | var strand []byte 490 | start := 0 491 | len, _ := genome.SeqLength(chr) 492 | count := 0 493 | for start <= len-step { 494 | end := start + step 495 | ranges = append(ranges, gn.Range{From: start, To: end}) 496 | seqnames = append(seqnames, chr) 497 | start += slide 498 | count += 1 499 | } 500 | 501 | strand = make([]byte, count) 502 | for i := 0; i < count; i++ { 503 | strand[i] = '*' 504 | } 505 | 506 | ret := gn.GRanges{ 507 | Seqnames: seqnames, 508 | Ranges: ranges, 509 | Strand: strand, 510 | Meta: gn.Meta{}, 511 | } 512 | 513 | return ret 514 | } 515 | 516 | // bin Result stores the chromosome bin result 517 | // and it's chromosome sort order 518 | type BinnedRangesOrder struct { 519 | Order int 520 | Ranges gn.GRanges 521 | } 522 | 523 | // read results into output channel 524 | func binChromToChan(g gn.Genome, chr string, out chan BinnedRangesOrder, step, slide int) { 525 | var res BinnedRangesOrder 526 | for i, s := range g.Seqnames { 527 | if s == chr { 528 | res.Order = i 529 | res.Ranges = binChrom(g, chr, step, slide) 530 | out <- res 531 | } 532 | } 533 | } 534 | 535 | func handleChromBins(input chan BinnedRangesOrder, output chan gn.GRanges, wg *sync.WaitGroup) { 536 | 537 | // parse input into slice 538 | var gRes []BinnedRangesOrder 539 | for r := range input { 540 | gRes = append(gRes, r) 541 | wg.Done() 542 | } 543 | 544 | var ret gn.GRanges 545 | // append to output preserving chr order 546 | for i := 0; i < len(gRes); i++ { 547 | for _, g := range gRes { 548 | if g.Order == i { 549 | ret = ret.Append(g.Ranges) 550 | } 551 | } 552 | } 553 | output <- ret 554 | } 555 | 556 | // bin genome into overlapping ranges with step and slide 557 | // bin genome creates coverages for each chromosome in separate go routines 558 | func binGenome(genome gn.Genome, step int, slide int) gn.GRanges { 559 | input := make(chan BinnedRangesOrder) 560 | output := make(chan gn.GRanges) 561 | var wg sync.WaitGroup 562 | go handleChromBins(input, output, &wg) 563 | defer close(output) 564 | for _, chr := range genome.Seqnames { 565 | wg.Add(1) 566 | go binChromToChan(genome, chr, input, step, slide) 567 | } 568 | 569 | wg.Wait() 570 | close(input) 571 | return <-output 572 | } 573 | 574 | // filters unknown chromosome names from a strings slice 575 | // using a regex of unwanted string matches 576 | func filterUnkownChroms(start []string) []string { 577 | var ret []string 578 | filt := `Un|_|EBV|N|M` 579 | for _, s := range start { 580 | r := regexp.MustCompile(filt) 581 | if !r.MatchString(s) { 582 | ret = append(ret, s) 583 | } 584 | } 585 | return ret 586 | } 587 | 588 | // returns a genome of filtered chromosomes 589 | func KnownChroms(genome *gn.Genome) gn.Genome { 590 | 591 | // make map of known seqs 592 | knownMap := map[string]bool{} 593 | knownSeqs := filterUnkownChroms(genome.Seqnames) 594 | for _, s := range knownSeqs { 595 | knownMap[s] = true 596 | } 597 | 598 | // return new genome with only known chroms 599 | seqnames := []string{} 600 | lengths := []int{} 601 | for i := 0; i < genome.Length(); i++ { 602 | if b, _ := knownMap[genome.Seqnames[i]]; b { 603 | seqnames = append(seqnames, genome.Seqnames[i]) 604 | lengths = append(lengths, genome.Lengths[i]) 605 | } 606 | } 607 | return gn.NewGenome(seqnames, lengths) 608 | } 609 | -------------------------------------------------------------------------------- /gopeaks_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/matryer/is" 8 | gn "github.com/pbenner/gonetics" 9 | ) 10 | 11 | func TestFilterUnknownChroms(t *testing.T) { 12 | is := is.New(t) 13 | chroms := []string{"ChrUn", "chrUn_test", "chrUn_GL000218v1", "chrM", "chr1_blah", "EBV", "chr3", "chr1", "chr4"} 14 | got := filterUnkownChroms(chroms) 15 | want := []string{"chr3", "chr1", "chr4"} 16 | is.Equal(got, want) // should equal 17 | } 18 | 19 | func TestBinGenome(t *testing.T) { 20 | is := is.New(t) 21 | seqnames := []string{"chr1", "chr2", "chr3", "chr4"} 22 | lengths := []int{1000, 2000, 5000, 300} 23 | g := gn.NewGenome(seqnames, lengths) 24 | bR := binGenome(g, 100, 50) 25 | 26 | is.Equal(bR.Seqnames[0], "chr1") // first should be chr1 27 | is.Equal(bR.Seqnames[len(bR.Seqnames)-1], "chr4") // last should be chr4 28 | 29 | // test that only the four chroms 30 | set := make(map[string]bool) 31 | for _, k := range bR.Seqnames { 32 | set[k] = true 33 | } 34 | is.Equal(len(set), 4) // should be only four chroms 35 | } 36 | 37 | func TestCountOverlaps(t *testing.T) { 38 | is := is.New(t) 39 | r1 := gn.NewGRanges( 40 | []string{"chr1", "chr1", "chr1", "chr1"}, 41 | []int{1, 80, 90, 9}, 42 | []int{20, 100, 110, 40}, 43 | []byte{'*', '*', '*', '*'}) 44 | 45 | r2 := gn.NewGRanges( 46 | []string{"chr1", "chr1", "chr1"}, 47 | []int{4, 8, 90}, 48 | []int{8, 10, 95}, 49 | []byte{'*', '*', '*'}) 50 | rCts := countOverlaps(r1, r2) 51 | dat := rCts.MetaData[0].([]float64) 52 | is.Equal(len(dat), 4) 53 | } 54 | 55 | func TestNormalizeTocontrolCounts(t *testing.T) { 56 | r1 := gn.NewGRanges( 57 | []string{"chr1", "chr1", "chr1", "chr1"}, 58 | []int{1, 30, 50, 70}, 59 | []int{20, 40, 60, 80}, 60 | []byte{'*', '*', '*', '*'}) 61 | 62 | r2 := gn.NewGRanges( 63 | []string{"chr1", "chr1", "chr1", "chr1"}, 64 | []int{1, 30, 50, 70}, 65 | []int{20, 40, 60, 80}, 66 | []byte{'*', '*', '*', '*'}) 67 | 68 | r1Cov := []float64{3.3, 4.5, 6.5, 1.1} 69 | r2Cov := []float64{2.3, 5.5, 2.5, 1.5} 70 | r1.AddMeta("overlap_counts", r1Cov) 71 | r2.AddMeta("overlap_counts", r2Cov) 72 | 73 | norm := normalizeToControl(r1, r2, r1.Length(), r2.Length()) 74 | fmt.Println(norm) 75 | } 76 | 77 | func TestMaxIntSlice(t *testing.T) { 78 | is := is.New(t) 79 | tslice := []int{1, 2, 3, 4, 5, 5, 7, 8, 0, 10, 300} 80 | want := float64(300) 81 | got := MaxIntSlice(tslice) 82 | is.Equal(got, want) // should equal 300 83 | } 84 | 85 | func TestMergeWithin(t *testing.T) { 86 | is := is.New(t) 87 | r1 := gn.NewGRanges( 88 | []string{"chr1", "chr1", "chr1", "chr1", "chr1", "chr1", "chr2", "chr2", "chr2"}, 89 | []int{1, 80, 90, 9, 400, 550, 3, 30, 80}, 90 | []int{20, 100, 110, 40, 500, 600, 10, 40, 90}, 91 | []byte{'*', '*', '*', '*', '*', '*', '*', '*', '*'}) 92 | want := gn.NewGRanges( 93 | []string{"chr1", "chr1", "chr2"}, 94 | []int{1, 40, 3}, 95 | []int{400, 600, 90}, 96 | []byte{'*', '*', '*'}) 97 | got := mergeWithin(r1, 150) 98 | is.True(got.Ranges[0].From == want.Ranges[0].From) 99 | is.True(got.Ranges[2].From == want.Ranges[2].From) 100 | is.True(got.Seqnames[2] == want.Seqnames[2]) 101 | } 102 | 103 | func TestBinomTest(t *testing.T) { 104 | fl := BinomTest(4.5, 50, 0.5) 105 | fmt.Println(fl) 106 | } 107 | --------------------------------------------------------------------------------