├── .coveragerc
├── .flake8
├── .github
└── workflows
│ ├── pypi-publish.yml
│ ├── pytest.yml
│ └── python-publish-test.yml
├── .gitignore
├── .readthedocs.yml
├── .zenodo.json
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── cooltools
├── __init__.py
├── __main__.py
├── api
│ ├── __init__.py
│ ├── coverage.py
│ ├── directionality.py
│ ├── dotfinder.py
│ ├── eigdecomp.py
│ ├── expected.py
│ ├── insulation.py
│ ├── rearrange.py
│ ├── saddle.py
│ ├── sample.py
│ ├── snipping.py
│ └── virtual4c.py
├── cli
│ ├── __init__.py
│ ├── coverage.py
│ ├── dots.py
│ ├── eigs_cis.py
│ ├── eigs_trans.py
│ ├── expected_cis.py
│ ├── expected_trans.py
│ ├── genome.py
│ ├── insulation.py
│ ├── logbin_expected.py
│ ├── pileup.py
│ ├── rearrange.py
│ ├── saddle.py
│ ├── sample.py
│ ├── util.py
│ └── virtual4c.py
├── lib
│ ├── __init__.py
│ ├── _numutils.pyx
│ ├── _query.py
│ ├── checks.py
│ ├── common.py
│ ├── io.py
│ ├── numutils.py
│ ├── peaks.py
│ ├── plotting.py
│ ├── runlength.py
│ └── schemas.py
└── sandbox
│ ├── __init__.py
│ ├── balance.py
│ ├── contrast.py
│ ├── cool2cworld.py
│ ├── cooler_filters
│ ├── Example_usage.ipynb
│ ├── pixel_filter_util.py
│ └── test_data_util.cool
│ ├── cross_score.py
│ ├── expected_smoothing.py
│ ├── expected_smoothing_example.ipynb
│ ├── fastsavetxt.pyx
│ ├── obs_over_exp_cooler.py
│ ├── observed_over_expected_example.ipynb
│ ├── pairs_scaling_functions.py
│ └── rearrange_cooler_example.ipynb
├── datasets
└── external_test_files.tsv
├── docs
├── Makefile
├── cli.rst
├── conf.py
├── cooltools.lib.rst
├── cooltools.rst
├── figs
│ └── cooltools-logo-futura.png
├── index.rst
├── make.bat
├── notebooks_old
│ ├── 01_scaling-curves.ipynb
│ ├── 02_expected.ipynb
│ ├── 03_eigendecomposition.ipynb
│ ├── 04_saddle-plots.ipynb
│ ├── 05_insulation-score.ipynb
│ ├── 06_snipping-pileups.ipynb
│ ├── 07_pileups2.ipynb
│ ├── 08_dot-calling-internals.ipynb
│ └── data
│ │ └── encode_motifs.hg38.ctcf_known1.liftover.bed.gz
├── releases.md
└── requirements.txt
├── pyproject.toml
├── pytest.ini
├── requirements-dev.txt
├── requirements.txt
├── setup.py
└── tests
├── data
├── CN.mm9.10000kb.cool
├── CN.mm9.1000kb.cool
├── CN.mm9.toy_expected.chromnamed.tsv
├── CN.mm9.toy_expected.tsv
├── CN.mm9.toy_features.bed
├── CN.mm9.toy_regions.bed
├── dotfinder_mock_inputs.npz
├── dotfinder_mock_res.csv.gz
├── make_test_compartments.py
├── mm9.chrom.sizes.reduced
├── mm9.named_nonoverlap_regions.bed
├── sin_eigs_mat.bg2.gz
├── sin_eigs_mat.cool
├── sin_eigs_track.tsv
├── test.10.bins
└── test.chrom.sizes
├── test_call-dots.py
├── test_checks.py
├── test_compartments_saddle.py
├── test_coverage.py
├── test_dotfinder_chunking.py
├── test_dotfinder_stats.py
├── test_expected.py
├── test_insulation.py
├── test_io.py
├── test_lazy_toeplitz.py
├── test_lib_common.py
├── test_rearrange_cooler.py
├── test_sample.py
├── test_snipping.py
└── test_virtual4c.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | source=
3 | cooltools/
4 |
5 | omit=
6 | cooltools/__main__.py
7 |
8 | [report]
9 | exclude_lines =
10 | pragma: no cover
11 | return NotImplemented
12 | raise NotImplementedError
13 |
--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude =
3 | __init__.py
4 | __main__.py
5 |
6 | max-line-length = 80
7 | ignore =
8 | # whitespace before ':'
9 | E203
10 | # too many leading '#' for block comment
11 | E266
12 | # line too long
13 | E501
14 | # line break before binary operator
15 | W503
16 | select =
17 | # mccabe complexity
18 | C
19 | # pycodestyle
20 | E
21 | # pyflakes error
22 | F
23 | # pyflakes warning
24 | W
25 | # bugbear
26 | B
27 | # line exceeds max-line-length + 10%
28 | B950
29 |
--------------------------------------------------------------------------------
/.github/workflows/pypi-publish.yml:
--------------------------------------------------------------------------------
1 | name: Build and upload Python Package to PyPI
2 |
3 | on:
4 | workflow_call:
5 | workflow_dispatch:
6 | release:
7 | types: [released]
8 |
9 | jobs:
10 | Publish:
11 | runs-on: ubuntu-latest
12 | permissions:
13 | id-token: write
14 |
15 | steps:
16 | - name: Checkout
17 | uses: actions/checkout@v4
18 |
19 | - name: Setup Python
20 | uses: actions/setup-python@v4
21 | with:
22 | python-version: "3.x"
23 |
24 | - name: Install dependencies
25 | run: |
26 | python -m pip install --upgrade pip
27 | pip install build
28 |
29 | - name: Build
30 | run: python -m build --sdist
31 |
32 | - name: Publish distribution 📦 to PyPI
33 | uses: pypa/gh-action-pypi-publish@release/v1
34 |
--------------------------------------------------------------------------------
/.github/workflows/pytest.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Pytest
5 |
6 | on:
7 | push:
8 | branches: [ master ]
9 | pull_request:
10 | branches: [ master ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | matrix:
18 | python-version: [ '3.9', '3.10', '3.11', '3.12' ]
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | - name: Set up Python ${{ matrix.python-version }}
23 | uses: actions/setup-python@v1
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | - name: Install dependencies
27 | run: |
28 | pip install --upgrade pip wheel setuptools
29 | pip install numpy cython
30 | pip install -r requirements-dev.txt
31 | pip install -e .
32 | - name: Lint with flake8
33 | run: |
34 | pip install flake8
35 | # stop the build if there are Python syntax errors or undefined names
36 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
37 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
38 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
39 | - name: Test with pytest
40 | run: |
41 | pip install pytest
42 | pytest
43 |
--------------------------------------------------------------------------------
/.github/workflows/python-publish-test.yml:
--------------------------------------------------------------------------------
1 | # This workflows will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Publish Python Package to Test PyPI
5 |
6 | on:
7 | release:
8 | types: [prereleased]
9 |
10 | jobs:
11 | deploy:
12 |
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Set up Python
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: '3.x'
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install cython numpy setuptools wheel twine
25 | - name: Build and publish
26 | env:
27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 | run: |
30 | python setup.py sdist
31 | twine upload --repository-url https://test.pypi.org/legacy/ dist/*
32 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Vim's cache
2 | *.un~
3 |
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__
6 | cooltools/__pycache__
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 | *.c
13 |
14 | # Distribution / packaging
15 | .Python
16 | env/
17 | .venv/
18 | build/
19 | develop-eggs/
20 | dist/
21 | downloads/
22 | eggs/
23 | .eggs/
24 | # lib/ - we're using it cooltools/lib
25 | lib64/
26 | parts/
27 | sdist/
28 | var/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *,cover
52 | .hypothesis/
53 | .pytest_cache
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 |
63 | # Flask stuff:
64 | instance/
65 | .webassets-cache
66 |
67 | # Scrapy stuff:
68 | .scrapy
69 |
70 | # Sphinx documentation
71 | docs/_build/
72 | docs/notebooks
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # IPython Notebook
78 | .ipynb_checkpoints
79 |
80 | # pyenv
81 | .python-version
82 |
83 | # celery beat schedule file
84 | celerybeat-schedule
85 |
86 | # dotenv
87 | .env
88 |
89 | # virtualenv
90 | venv/
91 | ENV/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | tmp/
101 |
102 | # Downloaded data
103 | datasets/*
104 | !datasets/external_test_files.tsv
105 | tmp.npz
106 | .gitignore
107 | tmp.hdf5
108 | cooltools/sandbox/test.mcool
109 |
110 | .vscode/
111 | .idea/
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | build:
9 | os: ubuntu-22.04
10 | tools:
11 | python: "3.10"
12 |
13 | # Build documentation in the docs/ directory with Sphinx
14 | sphinx:
15 | configuration: docs/conf.py
16 |
17 | # Build documentation with MkDocs
18 | #mkdocs:
19 | # configuration: mkdocs.yml
20 |
21 | # Optionally build your docs in additional formats such as PDF and ePub
22 | formats: all
23 |
24 | # Optionally set the version of Python and requirements required to build your docs
25 | python:
26 | install:
27 | - requirements: docs/requirements.txt
28 |
--------------------------------------------------------------------------------
/.zenodo.json:
--------------------------------------------------------------------------------
1 | {
2 | "description": "CoolTools: the tools for your .cools",
3 | "license": "MIT",
4 | "upload_type": "software",
5 | "access_right": "open",
6 | "creators": [
7 | {
8 | "name": "Sergey Venev",
9 | "affiliation": "University of Massachusetts Medical School"
10 | },
11 | {
12 | "name": "Nezar Abdennur",
13 | "affiliation": "MIT"
14 | },
15 | {
16 | "name": "Anton Goloborodko",
17 | "affiliation": "IMBA"
18 | },
19 | {
20 | "name": "Ilya Flyamer",
21 | "affiliation": "FMI"
22 | },
23 | {
24 | "name": "Geoffrey Fudenberg",
25 | "affiliation": "University of Southern California"
26 | },
27 | {
28 | "name": "Johannes Nuebler",
29 | "affiliation": "MIT"
30 | },
31 | {
32 | "name": "Aleksandra Galitsyna",
33 | "affiliation": "Skolkovo Institute of Science and Technology"
34 | },
35 | {
36 | "name": "Betul Akgol",
37 | "affiliation": "University of Massachusetts Medical School"
38 | },
39 | {
40 | "name": "Sameer Abraham",
41 | "affiliation": "MIT"
42 | },
43 | {
44 | "name": "Peter Kerpedjiev",
45 | "affiliation": "Harvard Medical School"
46 | },
47 | {
48 | "name": "Maksim Imakaev",
49 | "affiliation": "MIT"
50 | }
51 | ],
52 | "keywords": [
53 | "genomics",
54 | "bioinformatics",
55 | "Hi-C",
56 | "data",
57 | "analysis",
58 | "cooler"
59 | ]
60 | }
61 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 |
3 | ## General guidelines
4 |
5 | If you haven't contributed to open-source before, we recommend you read [this excellent guide by GitHub on how to contribute to open source](https://opensource.guide/how-to-contribute). The guide is long, so you can gloss over things you're familiar with.
6 |
7 | If you're not already familiar with it, we follow the [fork and pull model](https://help.github.com/articles/about-collaborative-development-models) on GitHub. Also, check out this recommended [git workflow](https://www.asmeurer.com/git-workflow/).
8 |
9 | As a rough guide for cooltools:
10 | - contributors should preferably work on their forks and submit pull requests to the main branch
11 | - core maintainers can work on feature branches in the main fork and then submit pull requests to the main branch
12 | - core maintainers can push directly to the main branch if it's urgently needed
13 |
14 |
15 | ## Contributing Code
16 |
17 | This project has a number of requirements for all code contributed.
18 |
19 | * We follow the [PEP-8 style](https://www.python.org/dev/peps/pep-0008/) convention.
20 | * We use [flake8](http://flake8.pycqa.org/en/latest/) to automatically lint the code and maintain code style. You can use a code formatter like [black](https://github.com/psf/black) or [autopep8](https://github.com/hhatto/autopep8) to help keep the linter happy.
21 | * We use [Numpy-style docstrings](https://numpydoc.readthedocs.io/en/latest/format.html).
22 | * User-facing API changes or new features should have documentation added.
23 |
24 | Ideally, provide full test coverage for new code submitted in PRs.
25 |
26 |
27 | ## Setting up Your Development Environment
28 |
29 | For setting up an isolated virtual environment for development, we recommend using [conda](https://docs.conda.io/en/latest/miniconda.html). After forking and cloning the repository, install in "editable" (i.e. development) mode using the `-e` option:
30 |
31 | ```sh
32 | $ git clone https://github.com/open2c/cooltools.git
33 | $ cd cooltools
34 | $ pip install -e .
35 | ```
36 |
37 | Editable mode installs the package by creating a "link" to your working (repo) directory.
38 |
39 |
40 | ## Unit Tests
41 |
42 | It is best if all new functionality and/or bug fixes have unit tests added with each use-case.
43 |
44 | We use [pytest](https://docs.pytest.org/en/latest) as our unit testing framework with the `pytest-cov` extension to check code coverage and `pytest-flake8` to check code style. You don't need to configure these extensions yourself.
45 | This automatically checks code style and functionality, and prints code coverage, even though it doesn't fail on low coverage.
46 |
47 | Once you've configured your environment, you can just `cd` to the root of your repository and run
48 |
49 | ```sh
50 | $ pytest
51 | ```
52 |
53 | Unit tests are automatically run on Travis CI for pull requests.
54 |
55 |
56 | ## Coverage
57 |
58 | The `pytest` script automatically reports coverage, both on the terminal for missing line numbers, and in annotated HTML form in `htmlcov/index.html`.
59 |
60 |
61 | ## Documentation
62 |
63 | If a feature is stable and relatively finalized, it is time to add it to the documentation. If you are adding any private/public functions, it is best to add docstrings, to aid in reviewing code and also for the API reference.
64 |
65 | We use [Numpy style docstrings](https://numpydoc.readthedocs.io/en/latest/format.html>) and [Sphinx](http://www.sphinx-doc.org/en/stable) to document this library. Sphinx, in turn, uses [reStructuredText](http://www.sphinx-doc.org/en/stable/rest.html) as its markup language for adding code.
66 |
67 | We use the [Sphinx Autosummary extension](http://www.sphinx-doc.org/en/stable/ext/autosummary.html) to generate API references. You may want to look at `docs/api.rst` to see how these files look and where to add new functions, classes or modules.
68 |
69 | We also use the [nbsphinx extension](https://nbsphinx.readthedocs.io/en/0.5.0/) to render tutorial pages from Jupyter notebooks.
70 |
71 | To build the documentation:
72 |
73 | ```sh
74 | $ make docs
75 | ```
76 |
77 | After this, you can find an HTML version of the documentation in `docs/_build/html/index.html`.
78 |
79 | Documentation from `master` and tagged releases is automatically built and hosted thanks to [readthedocs](https://readthedocs.org/).
80 |
81 |
82 | ## Acknowledgement
83 |
84 | If you've contributed significantly and would like your authorship to be included in subsequent uploads to [Zenodo](https://zenodo.org), please make a separate PR to add your name and affiliation to the `.zenodo.json` file.
85 |
86 | ---
87 |
88 | This document was modified from the [guidelines from the sparse project](https://github.com/pydata/sparse/blob/master/docs/contributing.rst).
89 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Cooltools developers
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include CHANGELOG.md
2 | include README.md
3 | include LICENSE
4 | include requirements.txt
5 | include requirements-dev.txt
6 | include environment.yml
7 |
8 | include cooltools/lib/_numutils.pyx
9 | graft tests
10 | graft docs
11 | prune docs/_build
12 | prune docs/_static
13 | prune docs/_templates
14 |
15 | global-exclude __pycache__/*
16 | global-exclude *.so
17 | global-exclude *.pyd
18 | global-exclude *.pyc
19 | global-exclude .git*
20 | global-exclude .deps/*
21 | global-exclude .DS_Store
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: build install test docs clean clean-pyc clean-dist build-dist publish-test publish
2 |
3 |
4 | build:
5 | python setup.py build_ext --inplace
6 |
7 | install:
8 | pip install -e .
9 |
10 | test:
11 | pytest
12 |
13 | docs:
14 | cd docs && make html
15 |
16 |
17 | clean-pyc:
18 | find . -name '*.pyc' -exec rm --force {} +
19 | find . -name '*.pyo' -exec rm --force {} +
20 | find . -name '*~' -exec rm --force {} +
21 |
22 | clean-dist:
23 | rm -rf build/
24 | rm -rf dist/
25 |
26 | clean: clean-pyc clean-dist
27 |
28 |
29 | build-dist: clean-dist
30 | python setup.py sdist
31 | # python setup.py bdist_wheel
32 |
33 | publish-test: build-dist
34 | twine upload --repository-url https://test.pypi.org/legacy/ dist/*
35 |
36 | publish: build-dist
37 | twine upload dist/*
38 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # cooltools: enabling high-resolution Hi-C analysis in Python
2 |
3 |
4 |
5 |
6 | [](https://github.com/open2c/cooltools/actions/workflows/pytest.yml)
7 | [](https://cooltools.readthedocs.io/en/latest/?badge=latest)
8 | [](https://pypi.org/project/cooltools)
9 | [](https://bioconda.github.io/recipes/cooltools/README.html)
10 | [](https://zenodo.org/badge/latestdoi/82413481)
11 |
12 | > tools for your .cools
13 |
14 | Chromosome conformation capture technologies reveal the incredible complexity of genome folding. A growing number of labs and multiple consortia, including the 4D Nucleome, the International Nucleome Consortium, and ENCODE, are generating higher-resolution datasets to probe genome architecture across cell states, types, and organisms. Larger datasets increase the challenges at each step of computational analysis, from storage, to memory, to researchers’ time. The recently-introduced [***cooler***](https://github.com/open2c/cooler/tree/master/cooler) format readily handles storage of high-resolution datasets via a sparse data model.
15 |
16 | ***cooltools*** leverages this format to enable flexible and reproducible analysis of high-resolution data. ***cooltools*** provides a suite of computational tools with a paired python API and command line access, which facilitates workflows either on high-performance computing clusters or via custom analysis notebooks. As part of the [***Open2C*** ecosystem](https://open2c.github.io/), ***cooltools*** also provides detailed introductions to key concepts in Hi-C-data analysis with interactive notebook documentation. For more information, see the [preprint](https://doi.org/10.1101/2022.10.31.514564): https://doi.org/10.1101/2022.10.31.514564.
17 |
18 | ## Requirements
19 |
20 | The following are required before installing cooltools:
21 |
22 | * Python 3.7+
23 | * `numpy`
24 | * `cython`
25 |
26 | ## Installation
27 |
28 | ```sh
29 | pip install cooltools
30 | ```
31 |
32 | or install the latest version directly from github:
33 |
34 | ```
35 | $ pip install https://github.com/open2c/cooltools/archive/refs/heads/master.zip
36 | ```
37 |
38 | See the [requirements.txt](https://github.com/open2c/cooltools/blob/master/requirements.txt) file for information on compatible dependencies, especially for [cooler](https://github.com/open2c/cooler/tree/master/cooler) and [bioframe](https://github.com/open2c/bioframe).
39 |
40 |
41 | ## Documentation and Tutorials
42 |
43 | Documentation can be found here: https://cooltools.readthedocs.io/en/latest/.
44 |
45 | Cooltools offers a number of tutorials using the [Open2c code ecosystem](https://github.com/open2c/). For users who are new to Hi-C analysis, we recommend going through example notebooks in the following order:
46 |
47 | - [Visualization](https://cooltools.readthedocs.io/en/latest/notebooks/viz.html): how to load and visualize Hi-C data stored in coolers.
48 | - [Contacts vs Distance](https://cooltools.readthedocs.io/en/latest/notebooks/contacts_vs_distance.html): how to calculate contact frequency as a function of genomic distance, the most prominent feature in Hi-C maps.
49 | - [Compartments and Saddles](https://cooltools.readthedocs.io/en/latest/notebooks/compartments_and_saddles.html): how to extract eigenvectors and create saddleplots reflecting A/B compartments.
50 | - [Insulation and Boundaries](https://cooltools.readthedocs.io/en/latest/notebooks/insulation_and_boundaries.html): how to extract insulation profiles and call boundaries using insulation profile minima.
51 | - [Pileups and Average Patterns](https://cooltools.readthedocs.io/en/latest/notebooks/pileup_CTCF.html): how to create avearge maps around genomic features like CTCF.
52 |
53 | For users interested in running analyses from the commmand line:
54 | - [Command line interface](https://cooltools.readthedocs.io/en/latest/notebooks/command_line_interface.html): how to use the cooltools CLI.
55 |
56 | Note that these notebooks currently focus on mammalian interphase Hi-C analysis, but are readily extendible to other organisms and cellular contexts. To clone notebooks for interactive analysis, visit https://github.com/open2c/open2c_examples. Docs for cooltools are built directly from these notebooks.
57 |
58 | ## Contributing
59 | Cooltools welcomes contributions. The guiding principles for tools are that they are (i) as simple as possible, (ii) as interpretable as possible, (iii) should not involve visualization. The following applies for contributing new functionality to cooltools.
60 |
61 | New functionality should:
62 | - clearly define the problem
63 | - discuss alternative solutions
64 | - provide a separate example (provided as a gist/notebook/etc) explaining its use cases on multiple datasets.
65 | - be compatible with the latest versions of cooler and cooltools (e.g. should be able to be run on any cooler generated by the latest version of cooler)
66 |
67 | New functionality should either:
68 | - generalize or extend existing tool without impairing user experience, and be submitted as PR to the relevant tool
69 | - or extract a distinct feature of genome organization, and be submitted as pull request to the sandbox
70 |
71 | Vignettes, using existing tools in new ways, should be submitted as pull requests to open2c_vignettes as a distinct jupyter notebook, rather than to cooltools sandbox. The bar for contributions to this repository is minimal. We recommend each vignette to include package version information, and raise an error for other versions. If it makes sense, the example data available for download using cooltools can be used to allow an easy way to try out the analysis. Otherwise, the source of data can be specified for others to obtain it.
72 |
73 | Practical aspects for contributing can be found in the guide [here](https://github.com/open2c/cooltools/blob/master/CONTRIBUTING.md).
74 |
75 | ## Citing `cooltools`
76 |
77 | Open2C*, Nezar Abdennur*, Sameer Abraham, Geoffrey Fudenberg*, Ilya M. Flyamer*, Aleksandra A. Galitsyna*, Anton Goloborodko*, Maxim Imakaev, Betul A. Oksuz, and Sergey V. Venev*. “Cooltools: Enabling High-Resolution Hi-C Analysis in Python.” bioRxiv, November 1, 2022. https://doi.org/10.1101/2022.10.31.514564.
78 |
--------------------------------------------------------------------------------
/cooltools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Cool tools
4 | ~~~~~~~~~~
5 |
6 | The tools for your .cool's.
7 |
8 | :author: Cooltools developers
9 | :license: MIT
10 |
11 | """
12 | import logging
13 |
14 | __version__ = "0.7.1"
15 |
16 | from . import lib
17 |
18 | from .lib import (
19 | numutils,
20 | download_data,
21 | print_available_datasets,
22 | get_data_dir,
23 | download_file,
24 | get_md5sum,
25 | )
26 |
27 | from .api.expected import expected_cis, expected_trans
28 | from .api.coverage import coverage
29 | from .api.eigdecomp import eigs_cis, eigs_trans
30 | from .api.saddle import digitize, saddle
31 | from .api.sample import sample
32 | from .api.snipping import pileup
33 | from .api.directionality import directionality
34 | from .api.insulation import insulation
35 | from .api.dotfinder import dots
36 | from .api.virtual4c import virtual4c
37 |
--------------------------------------------------------------------------------
/cooltools/__main__.py:
--------------------------------------------------------------------------------
1 | from .cli import cli
2 |
3 | if __name__ == "__main__":
4 | cli()
5 |
--------------------------------------------------------------------------------
/cooltools/api/__init__.py:
--------------------------------------------------------------------------------
1 | import importlib
2 | import pathlib
3 |
4 | __all__ = [
5 | f.stem
6 | for f in pathlib.Path(__file__).parent.glob("*.py")
7 | if f.is_file() and not f.name == "__init__.py"
8 | ]
9 |
10 | for _ in __all__:
11 | importlib.import_module("." + _, "cooltools.api")
12 |
13 | del pathlib
14 | del importlib
15 |
--------------------------------------------------------------------------------
/cooltools/api/directionality.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | import numpy as np
3 | import pandas as pd
4 |
5 | def _dirscore(
6 | pixels, bins, window=10, ignore_diags=2, balanced=True, signed_chi2=False
7 | ):
8 | lo_bin_id = bins.index.min()
9 | hi_bin_id = bins.index.max() + 1
10 | N = hi_bin_id - lo_bin_id
11 |
12 | bad_bin_mask = (
13 | bins["weight"].isnull().values if balanced else np.zeros(N, dtype=bool)
14 | )
15 |
16 | diag_pixels = pixels[pixels["bin2_id"] - pixels["bin1_id"] <= (window - 1) * 2]
17 | if balanced:
18 | diag_pixels = diag_pixels[~diag_pixels["balanced"].isnull()]
19 |
20 | i = diag_pixels["bin1_id"].values - lo_bin_id
21 | j = diag_pixels["bin2_id"].values - lo_bin_id
22 | val = diag_pixels["balanced"].values if balanced else diag_pixels["count"].values
23 |
24 | sum_pixels_left = np.zeros(N)
25 | n_pixels_left = np.zeros(N)
26 | for i_shift in range(0, window):
27 | if i_shift < ignore_diags:
28 | continue
29 |
30 | mask = (i + i_shift == j) & (i + i_shift < N) & (j >= 0)
31 | sum_pixels_left += np.bincount(i[mask] + i_shift, val[mask], minlength=N)
32 |
33 | loc_bad_bin_mask = np.zeros(N, dtype=bool)
34 | if i_shift == 0:
35 | loc_bad_bin_mask |= bad_bin_mask
36 | else:
37 | loc_bad_bin_mask[i_shift:] |= bad_bin_mask[:-i_shift]
38 | loc_bad_bin_mask |= bad_bin_mask
39 | n_pixels_left[i_shift:] += 1 - loc_bad_bin_mask[i_shift:]
40 |
41 | sum_pixels_right = np.zeros(N)
42 | n_pixels_right = np.zeros(N)
43 | for j_shift in range(0, window):
44 | if j_shift < ignore_diags:
45 | continue
46 |
47 | mask = (i == j - j_shift) & (i < N) & (j - j_shift >= 0)
48 |
49 | sum_pixels_right += np.bincount(i[mask], val[mask], minlength=N)
50 |
51 | loc_bad_bin_mask = np.zeros(N, dtype=bool)
52 | loc_bad_bin_mask |= bad_bin_mask
53 | if j_shift == 0:
54 | loc_bad_bin_mask |= bad_bin_mask
55 | else:
56 | loc_bad_bin_mask[:-j_shift] |= bad_bin_mask[j_shift:]
57 |
58 | n_pixels_right[: (-j_shift if j_shift else None)] += (
59 | 1 - loc_bad_bin_mask[: (-j_shift if j_shift else None)]
60 | )
61 |
62 | with warnings.catch_warnings():
63 | warnings.simplefilter("ignore")
64 |
65 | a = sum_pixels_left
66 | b = sum_pixels_right
67 | if signed_chi2:
68 | e = (a + b) / 2.0
69 | score = np.sign(b - a) * ((a - e) ** 2 + (b - e) ** 2) / e
70 | else:
71 | score = (b - a) / (a + b)
72 |
73 | return score
74 |
75 |
76 | def _dirscore_dense(A, window=10, signed_chi2=False):
77 | N = A.shape[0]
78 | di = np.zeros(N)
79 | for i in range(0, N):
80 | lo = max(0, i - window)
81 | hi = min((i + window) + 1, N)
82 | b, a = np.nansum(A[i, i:hi]), np.nansum(A[i, lo : i + 1])
83 | if signed_chi2:
84 | e = (a + b) / 2.0
85 | if e:
86 | di[i] = np.sign(b - a) * ((a - e) ** 2 + (b - e) ** 2) / e
87 | else:
88 | di[i] = (b - a) / (a + b)
89 | mask = np.nansum(A, axis=0) == 0
90 | di[mask] = np.nan
91 | return di
92 |
93 |
94 | def directionality(
95 | clr,
96 | window_bp=100000,
97 | balance="weight",
98 | min_dist_bad_bin=2,
99 | ignore_diags=None,
100 | chromosomes=None,
101 | ):
102 | """Calculate the diamond insulation scores and call insulating boundaries.
103 |
104 | Parameters
105 | ----------
106 | clr : cooler.Cooler
107 | A cooler with balanced Hi-C data.
108 | window_bp : int
109 | The size of the sliding diamond window used to calculate the insulation
110 | score.
111 | min_dist_bad_bin : int
112 | The minimal allowed distance to a bad bin. Do not calculate insulation
113 | scores for bins having a bad bin closer than this distance.
114 | ignore_diags : int
115 | The number of diagonals to ignore. If None, equals the number of
116 | diagonals ignored during IC balancing.
117 |
118 | Returns
119 | -------
120 | ins_table : pandas.DataFrame
121 | A table containing the insulation scores of the genomic bins and
122 | the insulating boundary strengths.
123 | """
124 | if chromosomes is None:
125 | chromosomes = clr.chromnames
126 |
127 | bin_size = clr.info["bin-size"]
128 | ignore_diags = (
129 | ignore_diags
130 | if ignore_diags is not None
131 | else clr._load_attrs(clr.root.rstrip("/") + "/bins/weight")["ignore_diags"]
132 | )
133 | window_bins = window_bp // bin_size
134 |
135 | if window_bp % bin_size != 0:
136 | raise Exception(
137 | "The window size ({}) has to be a multiple of the bin size {}".format(
138 | window_bp, bin_size
139 | )
140 | )
141 |
142 | dir_chrom_tables = []
143 | for chrom in chromosomes:
144 | chrom_bins = clr.bins().fetch(chrom)
145 | chrom_pixels = clr.matrix(as_pixels=True, balance=balance).fetch(chrom)
146 |
147 | # mask neighbors of bad bins
148 | is_bad_bin = np.isnan(chrom_bins["weight"].values)
149 | bad_bin_neighbor = np.zeros_like(is_bad_bin)
150 | for i in range(0, min_dist_bad_bin):
151 | if i == 0:
152 | bad_bin_neighbor = bad_bin_neighbor | is_bad_bin
153 | else:
154 | bad_bin_neighbor = bad_bin_neighbor | np.r_[[True] * i, is_bad_bin[:-i]]
155 | bad_bin_neighbor = bad_bin_neighbor | np.r_[is_bad_bin[i:], [True] * i]
156 |
157 | dir_chrom = chrom_bins[["chrom", "start", "end"]].copy()
158 | dir_chrom["bad_bin_masked"] = bad_bin_neighbor
159 |
160 | with warnings.catch_warnings():
161 | warnings.simplefilter("ignore", RuntimeWarning)
162 | dir_track = _dirscore(
163 | chrom_pixels, chrom_bins, window=window_bins, ignore_diags=ignore_diags
164 | )
165 | dir_track[bad_bin_neighbor] = np.nan
166 | dir_track[~np.isfinite(dir_track)] = np.nan
167 | dir_chrom["directionality_ratio_{}".format(window_bp)] = dir_track
168 |
169 | dir_track = _dirscore(
170 | chrom_pixels,
171 | chrom_bins,
172 | window=window_bins,
173 | ignore_diags=ignore_diags,
174 | signed_chi2=True,
175 | )
176 | dir_track[bad_bin_neighbor] = np.nan
177 | dir_track[~np.isfinite(dir_track)] = np.nan
178 | dir_chrom["directionality_index_{}".format(window_bp)] = dir_track
179 |
180 | dir_chrom_tables.append(dir_chrom)
181 |
182 | dir_table = pd.concat(dir_chrom_tables)
183 | return dir_table
184 |
--------------------------------------------------------------------------------
/cooltools/api/sample.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | import cooler
5 | import cooler.parallel
6 | from .coverage import coverage
7 | from ..lib.common import pool_decorator
8 |
9 |
10 |
11 | def sample_pixels_approx(pixels, frac):
12 | pixels["count"] = np.random.binomial(pixels["count"], frac)
13 | mask = pixels["count"] > 0
14 |
15 | if issubclass(type(pixels), pd.DataFrame):
16 | pixels = pixels[mask]
17 | elif issubclass(type(pixels), dict):
18 | pixels = {k: arr[mask] for k, arr in pixels.items()}
19 | return pixels
20 |
21 |
22 | def sample_pixels_exact(pixels, count):
23 | cumcount = np.cumsum(np.asarray(pixels["count"]))
24 | total = cumcount[-1]
25 | n_pixels = cumcount.shape[0]
26 |
27 | # sample a given number of distinct contacts
28 | random_contacts = np.random.choice(total, size=count, replace=False)
29 |
30 | # find where those contacts live in the cumcount array
31 | loc = np.searchsorted(cumcount, random_contacts, side="right")
32 |
33 | # re-bin those locations to get new counts
34 | new_counts = np.bincount(loc, minlength=n_pixels)
35 |
36 | pixels["count"] = new_counts
37 | mask = pixels["count"] > 0
38 | if issubclass(type(pixels), pd.DataFrame):
39 | pixels = pixels[mask]
40 | elif issubclass(type(pixels), dict):
41 | pixels = {k: arr[mask] for k, arr in pixels.items()}
42 | return pixels
43 |
44 |
45 | def _extract_pixel_chunk(chunk):
46 | return chunk["pixels"]
47 |
48 | @pool_decorator
49 | def sample(
50 | clr,
51 | out_clr_path,
52 | count=None,
53 | cis_count=None,
54 | frac=None,
55 | exact=False,
56 | chunksize=int(1e7),
57 | nproc=1,
58 | map_functor=map,
59 | ):
60 | """
61 | Pick a random subset of contacts from a Hi-C map.
62 |
63 | Parameters
64 | ----------
65 | clr : cooler.Cooler or str
66 | A Cooler or a path/URI to a Cooler with input data.
67 |
68 | out_clr_path : str
69 | A path/URI to the output.
70 |
71 | count : int
72 | The target number of contacts in the sample.
73 | Mutually exclusive with `cis_count` and `frac`.
74 |
75 | cis_count : int
76 | The target number of cis contacts in the sample.
77 | Mutually exclusive with `count` and `frac`.
78 |
79 | frac : float
80 | The target sample size as a fraction of contacts in the original
81 | dataset. Mutually exclusive with `count` and `cis_count`.
82 |
83 | exact : bool
84 | If True, the resulting sample size will exactly match the target value.
85 | Exact sampling will load the whole pixel table into memory!
86 | If False, binomial sampling will be used instead and the sample size
87 | will be randomly distributed around the target value.
88 |
89 | chunksize : int
90 | The number of pixels loaded and processed per step of computation.
91 |
92 | nproc : int, optional
93 | How many processes to use for calculation. Ignored if map_functor is passed.
94 |
95 | map_functor : callable, optional
96 | Map function to dispatch the matrix chunks to workers.
97 | If left unspecified, pool_decorator applies the following defaults: if nproc>1 this defaults to multiprocess.Pool;
98 | If nproc=1 this defaults the builtin map.
99 |
100 | """
101 | if issubclass(type(clr), str):
102 | clr = cooler.Cooler(clr)
103 |
104 | if frac is not None and count is None and cis_count is None:
105 | pass
106 | elif frac is None and count is not None and cis_count is None:
107 | frac = count / clr.info["sum"]
108 | elif frac is None and count is None and cis_count is not None:
109 | # note division by two, since coverage() counts each side separately
110 | cis_total = clr.info.get("cis", np.sum(coverage(clr)[0] // 2, dtype=int))
111 | frac = cis_count / cis_total
112 | else:
113 | raise ValueError(
114 | "Please specify exactly one argument among `count`, `cis_count`"
115 | " and `frac`"
116 | )
117 |
118 | if frac > 1.0:
119 | raise ValueError(
120 | "The number of contacts in a sample cannot exceed "
121 | "that in the original dataset."
122 | )
123 |
124 | if exact:
125 | count = np.round(frac * clr.info["sum"]).astype(int)
126 | pixels = sample_pixels_exact(clr.pixels()[:], count)
127 | cooler.create_cooler(out_clr_path, clr.bins()[:], pixels, ordered=True)
128 |
129 | else:
130 | pipeline = (
131 | cooler.parallel.split(
132 | clr, include_bins=False, map=map_functor, chunksize=chunksize
133 | )
134 | .pipe(_extract_pixel_chunk)
135 | .pipe(sample_pixels_approx, frac=frac)
136 | )
137 |
138 | cooler.create_cooler(
139 | out_clr_path,
140 | clr.bins()[:][["chrom", "start", "end"]],
141 | iter(pipeline),
142 | ordered=True,
143 | )
144 |
--------------------------------------------------------------------------------
/cooltools/api/virtual4c.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | logging.basicConfig(level=logging.INFO)
4 |
5 | from functools import partial
6 |
7 | import numpy as np
8 | import pandas as pd
9 | import bioframe
10 |
11 |
12 | from ..lib.checks import is_cooler_balanced
13 | from ..lib.common import pool_decorator
14 |
15 |
16 |
17 | def _extract_profile(chrom, clr, clr_weight_name, viewpoint):
18 | to_return = []
19 | if clr_weight_name:
20 | colname = "balanced"
21 | else:
22 | colname = "count"
23 | pxls1 = clr.matrix(balance=clr_weight_name, as_pixels=True, join=True).fetch(
24 | chrom, viewpoint
25 | )
26 | pxls1[["chrom2"]] = viewpoint[0]
27 | pxls1[["start2"]] = viewpoint[1]
28 | pxls1[["end2"]] = viewpoint[2]
29 |
30 | pxls1 = (
31 | pxls1.groupby(["chrom1", "start1", "end1"], observed=True)[colname]
32 | .mean()
33 | .reset_index()
34 | )
35 | pxls1.columns = ["chrom", "start", "end", colname]
36 | if pxls1.shape[0] > 0:
37 | to_return.append(pxls1)
38 |
39 | pxls2 = clr.matrix(balance=clr_weight_name, as_pixels=True, join=True).fetch(
40 | viewpoint, chrom
41 | )
42 | pxls2[["chrom1"]] = viewpoint[0]
43 | pxls2[["start1"]] = viewpoint[1]
44 | pxls2[["end1"]] = viewpoint[2]
45 | pxls2 = (
46 | pxls2.groupby(["chrom2", "start2", "end2"], observed=True)[colname]
47 | .mean()
48 | .reset_index()
49 | )
50 | pxls2.columns = ["chrom", "start", "end", colname]
51 | if pxls2.shape[0] > 0:
52 | to_return.append(pxls2)
53 | if len(to_return) == 0:
54 | return pd.DataFrame(columns=["chrom", "start", "end", colname])
55 | else:
56 | return pd.concat(to_return, ignore_index=True)
57 |
58 | @pool_decorator
59 | def virtual4c(
60 | clr,
61 | viewpoint,
62 | clr_weight_name="weight",
63 | nproc=1,
64 | map_functor=map,
65 | ):
66 | """Generate genome-wide contact profile for a given viewpoint.
67 |
68 | Extract all contacts of a given viewpoint from a cooler file.
69 |
70 | Parameters
71 | ----------
72 | clr : cooler.Cooler
73 | A cooler with balanced Hi-C data.
74 | viewpoint : tuple or str
75 | Coordinates of the viewpoint.
76 | clr_weight_name : str
77 | Name of the column in the bin table with weight
78 | nproc : int, optional
79 | How many processes to use for calculation. Ignored if map_functor is passed.
80 | map_functor : callable, optional
81 | Map function to dispatch the matrix chunks to workers.
82 | If left unspecified, pool_decorator applies the following defaults: if nproc>1 this defaults to multiprocess.Pool;
83 | If nproc=1 this defaults the builtin map.
84 |
85 | Returns
86 | -------
87 | v4C_table : pandas.DataFrame
88 | A table containing the interaction frequency of the viewpoint with the rest of
89 | the genome
90 |
91 | Note
92 | ----
93 | Note: this is a new (experimental) function, the interface or output might change in
94 | a future version.
95 | """
96 | if clr_weight_name not in [None, False]:
97 | # check if cooler is balanced
98 | try:
99 | _ = is_cooler_balanced(clr, clr_weight_name, raise_errors=True)
100 |
101 | except Exception as e:
102 | raise ValueError(
103 | f"provided cooler is not balanced or {clr_weight_name} is missing"
104 | ) from e
105 | colname = "balanced"
106 | else:
107 | colname = "count"
108 | viewpoint = bioframe.core.stringops.parse_region(viewpoint)
109 |
110 | f = partial(
111 | _extract_profile, clr=clr, clr_weight_name=clr_weight_name, viewpoint=viewpoint
112 | )
113 |
114 | counts = list(map_functor(f, clr.chromnames))
115 |
116 | # Concatenate all chrompsome dfs into one
117 | v4c = pd.concat(counts, ignore_index=True)
118 | if v4c.shape[0] == 0:
119 | logging.warn(f"No contacts found for viewpoint {viewpoint}")
120 | v4c = clr.bins()[:][["chrom", "start", "end"]]
121 | v4c[colname] = np.nan
122 | else:
123 | v4c["chrom"] = v4c["chrom"].astype("category")
124 | v4c["start"] = v4c["start"].astype(int)
125 | v4c["end"] = v4c["end"].astype(int)
126 | bioframe.sort_bedframe(
127 | v4c,
128 | view_df=bioframe.make_viewframe(clr.chromsizes),
129 | ) # sort it according clr.chromsizes order
130 | v4c.loc[
131 | (v4c["chrom"] == viewpoint[0])
132 | & (v4c["start"] >= viewpoint[1])
133 | & (v4c["end"] <= viewpoint[2]),
134 | colname,
135 | ] = np.nan # Set within-viewpoint bins to nan
136 | v4c = (
137 | pd.merge(
138 | clr.bins()[:][["chrom", "start", "end"]],
139 | v4c,
140 | on=["chrom", "start", "end"],
141 | how="left",
142 | )
143 | .drop_duplicates()
144 | .reset_index(drop=True)
145 | ) # Ensure we return all bins even if empty
146 | return v4c
147 |
--------------------------------------------------------------------------------
/cooltools/cli/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import division, print_function
3 | import click
4 | import sys
5 | from .. import __version__
6 |
7 | # Monkey patch
8 | click.core._verify_python3_env = lambda: None
9 |
10 |
11 | CONTEXT_SETTINGS = {
12 | "help_option_names": ["-h", "--help"],
13 | }
14 |
15 |
16 | @click.version_option(__version__, "-V", "--version")
17 | @click.group(context_settings=CONTEXT_SETTINGS)
18 | @click.option("-v", "--verbose", help="Verbose logging", is_flag=True, default=False)
19 | @click.option(
20 | "-d", "--debug", help="Post mortem debugging", is_flag=True, default=False
21 | )
22 | def cli(verbose, debug):
23 | """
24 | Type -h or --help after any subcommand for more information.
25 |
26 | """
27 | if verbose:
28 | pass
29 | # logger.setLevel(logging.DEBUG)
30 |
31 | if debug:
32 | import traceback
33 |
34 | try:
35 | import ipdb as pdb
36 | except ImportError:
37 | import pdb
38 |
39 | def _excepthook(exc_type, value, tb):
40 | traceback.print_exception(exc_type, value, tb)
41 | print()
42 | pdb.pm()
43 |
44 | sys.excepthook = _excepthook
45 |
46 |
47 | from . import (
48 | expected_cis,
49 | expected_trans,
50 | insulation,
51 | pileup,
52 | eigs_cis,
53 | eigs_trans,
54 | saddle,
55 | dots,
56 | genome,
57 | sample,
58 | coverage,
59 | virtual4c,
60 | rearrange,
61 | )
62 |
--------------------------------------------------------------------------------
/cooltools/cli/coverage.py:
--------------------------------------------------------------------------------
1 | import click
2 | import cooler
3 |
4 | from . import cli
5 | from .. import api
6 |
7 | import bioframe
8 |
9 |
10 |
11 | @cli.command()
12 | @click.argument(
13 | "cool_path", metavar="COOL_PATH", type=str, nargs=1,
14 | )
15 | @click.option(
16 | "--output",
17 | "-o",
18 | help="Specify output file name to store the coverage in a tsv format.",
19 | type=str,
20 | required=False,
21 | )
22 | @click.option(
23 | "--ignore-diags",
24 | help="The number of diagonals to ignore. By default, equals"
25 | " the number of diagonals ignored during IC balancing.",
26 | type=int,
27 | default=None,
28 | show_default=True,
29 | )
30 | @click.option(
31 | "--store",
32 | help="Append columns with coverage (cov_cis_raw, cov_tot_raw), or"
33 | " (cov_cis_clr_weight_name, cov_tot_clr_weight_name) if calculating"
34 | " balanced coverage, to the cooler bin table. If clr_weight_name=None,"
35 | " also stores total cis counts in the cooler info",
36 | is_flag=True,
37 | )
38 | @click.option(
39 | "--chunksize",
40 | help="Split the contact matrix pixel records into equally sized chunks to"
41 | " save memory and/or parallelize. Default is 10^7",
42 | type=int,
43 | default=1e7,
44 | show_default=True,
45 | )
46 | @click.option(
47 | "--bigwig",
48 | help="Also save output as bigWig files for cis and total coverage"
49 | " with the names