├── .github └── workflows │ ├── before_install.sh │ ├── build-doc.yml │ ├── test.yml │ └── test_script.sh ├── .gitignore ├── CITATION ├── CONTRIBUTORS ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── appveyor.yml ├── doc ├── .gitignore ├── Makefile ├── _static │ ├── css │ │ ├── bootstrap.css │ │ ├── bootstrap.min.css │ │ └── jhepc.css │ ├── js │ │ ├── bootstrap.js │ │ ├── bootstrap.min.js │ │ ├── jquery.js │ │ ├── jquery.maphilight.js │ │ └── jquery.maphilight.min.js │ └── logos │ │ ├── button_blue.png │ │ ├── button_blue.svg │ │ ├── button_green.png │ │ ├── button_green.svg │ │ ├── button_orange.png │ │ ├── button_orange.svg │ │ ├── logo_1.png │ │ ├── logo_1.svg │ │ └── logos.svg ├── _templates │ └── function.rst ├── conf.py ├── documentation.rst ├── index.rst ├── install.rst ├── make.bat ├── modules │ ├── .gitignore │ ├── classes.rst │ ├── datasets.rst │ ├── filter.rst │ ├── normalization.rst │ └── utils.rst ├── papers │ ├── figures │ │ └── counts_pfalc.png │ ├── paper.bib │ └── paper.md ├── sphinxext │ ├── LICENSE.txt │ ├── MANIFEST.in │ ├── README.txt │ ├── gen_rst.py │ └── numpy_ext │ │ ├── __init__.py │ │ ├── docscrape.py │ │ ├── docscrape_sphinx.py │ │ └── numpydoc.py ├── themes │ └── iced │ │ ├── layout.html │ │ ├── logos │ │ └── theme.conf ├── tutorial │ ├── basic │ │ └── tutorial.rst │ └── index.rst └── whats_new.rst ├── environment-dev.yml ├── environment.yml ├── examples ├── HiC-pro │ ├── launch_tests.sh │ ├── load_counts.py │ └── subset.matrix ├── README.txt ├── normalization │ ├── README.txt │ ├── plot_caic_normalization.py │ ├── plot_filtering_strategies.py │ ├── plot_ice_normalization.py │ └── plot_loic_normalization.py └── utils │ ├── README.txt │ ├── plot_extract_sample_map.py │ └── plot_intra_inter_contact_maps.py ├── iced ├── __init__.py ├── _filter_.pyx ├── datasets │ ├── __init__.py │ ├── base.py │ ├── data │ │ ├── duan2009 │ │ │ ├── duan.SC.10000.raw_sub.bed │ │ │ └── duan.SC.10000.raw_sub.matrix │ │ └── servant2018 │ │ │ ├── simulation_3.bed │ │ │ └── simulation_3.mat │ ├── setup.py │ └── tests │ │ └── test_base.py ├── filter.py ├── io │ ├── __init__.py │ ├── _io_pandas.py │ ├── setup.py │ └── tests │ │ └── test_io.py ├── normalization │ ├── __init__.py │ ├── _ca_utils.py │ ├── _normalization_.pyx │ └── tests │ │ ├── test_ca_utils.py │ │ └── test_normalization.py ├── random │ ├── __init__.py │ └── tests │ │ └── test_init.py ├── scripts │ └── ice.py ├── setup.py ├── tests │ └── test_filter.py └── utils │ ├── __init__.py │ ├── _genome.py │ ├── _validation.py │ ├── tests │ ├── test_genome.py │ └── test_validation.py │ └── validation.py ├── pyproject.toml ├── requirements.txt ├── requirements ├── default.txt ├── docs.txt └── tests.txt └── setup.py /.github/workflows/before_install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ex 3 | 4 | export PIP_DEFAULT_TIMEOUT=60 5 | 6 | if [[ $MINIMUM_REQUIREMENTS == 1 ]]; then 7 | for filename in requirements/*.txt; do 8 | sed -i 's/>=/==/g' $filename 9 | done 10 | fi 11 | 12 | python -m pip install --upgrade pip wheel setuptools 13 | python -m pip install $PIP_FLAGS -r requirements/default.txt 14 | python -m pip install $PIP_FLAGS -r requirements/tests.txt 15 | 16 | set +ex 17 | -------------------------------------------------------------------------------- /.github/workflows/build-doc.yml: -------------------------------------------------------------------------------- 1 | name: Build documentation 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - main 7 | 8 | jobs: 9 | build: 10 | name: Build docs 11 | runs-on: ubuntu-latest 12 | 13 | timeout-minutes: 10 14 | 15 | strategy: 16 | fail-fast: false 17 | 18 | steps: 19 | - name: Checkout 🛎️ 20 | uses: actions/checkout@v2 21 | with: 22 | persist-credentials: false 23 | 24 | - name: Setup Python 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: 3.9 28 | 29 | - name: Installation of the package 30 | shell: bash -l {0} 31 | run: | 32 | set -ex 33 | python -m pip install --upgrade pip 34 | python -m pip install -r requirements/default.txt 35 | python -m pip install -r requirements/docs.txt 36 | make install 37 | set +ex 38 | 39 | - name: Build the documentation 40 | shell: bash -l {0} 41 | run: | 42 | pushd doc 43 | make html # SPHINXOPTS="-W" 44 | touch _build/html/.nojekyll 45 | 46 | - name: Deploy documentation 47 | uses: JamesIves/github-pages-deploy-action@4.1.4 48 | with: 49 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 50 | BRANCH: gh-pages 51 | FOLDER: doc/_build/html 52 | CLEAN: false 53 | 54 | - name: Store docs as artifact 55 | uses: actions/upload-artifact@v1 56 | with: 57 | name: docs 58 | path: doc/_build/html 59 | 60 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | tests: 6 | name: linux-cp${{ matrix.python-version }}-${{ matrix.OPTIONS_NAME }} 7 | runs-on: ubuntu-latest 8 | 9 | timeout-minutes: 10 10 | 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | python-version: ["3.9", "3.10", "3.11", "3.12"] 15 | PIP_FLAGS: [""] 16 | MINIMUM_REQUIREMENTS: [0] 17 | include: 18 | - platform_id: manylinux_x86_64 19 | python-version: 3.9 20 | MINIMUM_REQUIREMENTS: 1 21 | OPTIONS_NAME: "min" 22 | 23 | steps: 24 | - name: Checkout 🛎️ 25 | uses: actions/checkout@v2 26 | with: 27 | persist-credentials: false 28 | 29 | - name: Setup Python 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python-version}} 33 | 34 | - name: Installation of the package 35 | shell: bash -l {0} 36 | run: | 37 | set -ex 38 | source .github/workflows/before_install.sh 39 | set -ex 40 | make install 41 | 42 | - name: Run the tests 43 | shell: bash -l {0} 44 | run: | 45 | source .github/workflows/test_script.sh 46 | # Now, run the HiC-pro example with the corresponding tests. 47 | pushd examples/HiC-pro 48 | bash launch_tests.sh 49 | popd 50 | 51 | - name: Build the documentation 52 | shell: bash -l {0} 53 | run: | 54 | pip install -r requirements/docs.txt 55 | pushd doc 56 | # FIXME let's give up on warnings for now 57 | make html # SPHINXOPTS="-W" 58 | touch _build/html/.nojekyll 59 | -------------------------------------------------------------------------------- /.github/workflows/test_script.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Fail on non-zero exit and echo the commands 3 | set -ev 4 | 5 | python -m pip list 6 | 7 | (cd .. && pytest --doctest-modules --cov=iced --pyargs iced) 8 | flake8 --exit-zero iced examples 9 | 10 | (cd examples/HiC-pro && source launch_tests.sh) 11 | set +ev 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swo 3 | *.swp 4 | __pycache__ 5 | dist 6 | build 7 | *.so 8 | MANIFEST 9 | .coverage 10 | -------------------------------------------------------------------------------- /CITATION: -------------------------------------------------------------------------------- 1 | To reference iced in publication, please cite the following: 2 | 3 | @Article{servant:hicpro, 4 | Author="Servant, N. and Varoquaux, N. and Lajoie, B. R. and Viara, E. 5 | and Chen, C. J. and Vert, J. P. and Heard, E. and Dekker, J. and 6 | Barillot, E. ", 7 | Title="{{H}i{C}-{P}ro: an optimized and flexible pipeline for {H}i-{C} data processing}", 8 | Journal="Genome Biol.", 9 | Year="2015", 10 | Volume="16", 11 | Pages="259" 12 | } 13 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | Matthias Blum 2 | Nicolas Servant 3 | Nelle Varoquaux 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | New BSD License 2 | 3 | Copyright (c) 2014-2019 The iced developers. 4 | All rights reserved. 5 | 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | a. Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | b. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | c. Neither the name of the Iced Developers nor the names of 16 | its contributors may be used to endorse or promote products 17 | derived from this software without specific prior written 18 | permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 31 | DAMAGE. 32 | 33 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include iced *.c *.h *.py 2 | recursive-include iced *.matrix *.bed *.mat 3 | include pyproject.toml 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON ?= python 2 | PIP ?= pip 3 | CYTHON ?= cython 4 | PYTEST ?= pytest 5 | CTAGS ?= ctags 6 | 7 | all: clean inplace test 8 | 9 | inplace: cython 10 | $(PYTHON) setup.py build_ext -i 11 | 12 | install: cython 13 | python -m pip install . 14 | 15 | test: test-code 16 | 17 | test-code: inplace 18 | $(PYTEST) --showlocals -v iced --durations=20 19 | 20 | test-coverage: 21 | rm -rf coverage .coverage 22 | $(PYTEST) iced --showlocals -v --cov=iced 23 | 24 | clean-ctags: 25 | rm -f tags 26 | 27 | clean: clean-ctags 28 | $(PYTHON) setup.py clean 29 | rm -rf dist 30 | rm -rf build 31 | 32 | trailing-spaces: 33 | find iced -name "*.py" -exec perl -pi -e 's/[ \t]*$$//' {} \; 34 | 35 | cython: 36 | find iced -name "*.pyx" -exec $(CYTHON) {} \; 37 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. -*- mode: rst -*- 2 | 3 | |Travis|_ |Coveralls|_ 4 | 5 | .. |Travis| image:: https://api.travis-ci.org/hiclib/iced.png?branch=master 6 | .. _Travis: https://travis-ci.org/hiclib/iced 7 | 8 | .. |Coveralls| image:: 9 | https://coveralls.io/repos/github/hiclib/iced/badge.svg?branch=master 10 | .. _Coveralls: https://coveralls.io/r/hiclib/iced?branch=master 11 | 12 | 13 | iced 14 | ==== 15 | 16 | The python module iced implements the ICE normalization of hic data 17 | 18 | 19 | Depends on 20 | 21 | python >= 2.7 22 | numpy >= 1.16 23 | scipy >= 0.19 24 | sklearn 25 | pandas 26 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # AppVeyor.com is a Continuous Integration service to build and run tests under 2 | # Windows 3 | # https://ci.appveyor.com/project/iced-ci/iced 4 | 5 | environment: 6 | global: 7 | # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the 8 | # /E:ON and /V:ON options are not enabled in the batch script interpreter 9 | # See: http://stackoverflow.com/a/13751649/163740 10 | CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\build_tools\\appveyor\\run_with_env.cmd" 11 | WHEELHOUSE_UPLOADER_USERNAME: iced-appveyor 12 | WHEELHOUSE_UPLOADER_SECRET: 13 | secure: BQm8KfEj6v2Y+dQxb2syQvTFxDnHXvaNktkLcYSq7jfbTOO6eH9n09tfQzFUVcWZ 14 | 15 | # Make sure we don't download large datasets when running the test on 16 | # continuous integration platform 17 | iced_SKIP_NETWORK_TESTS: 1 18 | 19 | matrix: 20 | - PYTHON: "C:\\Python27" 21 | PYTHON_VERSION: "2.7.8" 22 | PYTHON_ARCH: "32" 23 | 24 | - PYTHON: "C:\\Python27-x64" 25 | PYTHON_VERSION: "2.7.8" 26 | PYTHON_ARCH: "64" 27 | 28 | - PYTHON: "C:\\Python35" 29 | PYTHON_VERSION: "3.5.0" 30 | PYTHON_ARCH: "32" 31 | 32 | - PYTHON: "C:\\Python35-x64" 33 | PYTHON_VERSION: "3.5.0" 34 | PYTHON_ARCH: "64" 35 | 36 | 37 | 38 | install: 39 | # Install Python (from the official .msi of http://python.org) and pip when 40 | # not already installed. 41 | - "powershell ./build_tools/appveyor/install.ps1" 42 | - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" 43 | - "python -m pip install -U pip" 44 | 45 | # Check that we have the expected version and architecture for Python 46 | - "python --version" 47 | - "python -c \"import struct; print(struct.calcsize('P') * 8)\"" 48 | - "pip --version" 49 | 50 | # Install the build and runtime dependencies of the project. 51 | - "%CMD_IN_ENV% pip install --timeout=60 --trusted-host 28daf2247a33ed269873-7b1aad3fab3cc330e1fd9d109892382a.r6.cf2.rackcdn.com -r build_tools/appveyor/requirements.txt" 52 | - "%CMD_IN_ENV% python setup.py bdist_wheel bdist_wininst -b doc/logos/scikit-learn-logo.bmp" 53 | - ps: "ls dist" 54 | 55 | # Install the generated wheel package to test it 56 | - "pip install --pre --no-index --find-links dist/ scikit-learn" 57 | 58 | # Not a .NET project, we build scikit-learn in the install step instead 59 | build: false 60 | 61 | test_script: 62 | # Change to a non-source folder to make sure we run the tests on the 63 | # installed library. 64 | - "mkdir empty_folder" 65 | - "cd empty_folder" 66 | 67 | - "python -c \"import nose; nose.main()\" --with-timer --timer-top-n 20 -s -v iced" 68 | 69 | # Move back to the project folder 70 | - "cd .." 71 | 72 | artifacts: 73 | # Archive the generated wheel package in the ci.appveyor.com build report. 74 | - path: dist\* 75 | 76 | on_success: 77 | # Upload the generated wheel package to Rackspace 78 | # On Windows, Apache Libcloud cannot find a standard CA cert bundle so we 79 | # disable the ssl checks. 80 | - "python -m wheelhouse_uploader upload --no-ssl-check --local-folder=dist iced-windows-wheels" 81 | 82 | notifications: 83 | - provider: Webhook 84 | url: https://webhooks.gitter.im/e/0dc8e57cd38105aeb1b4 85 | on_build_success: false 86 | on_build_failure: True 87 | 88 | cache: 89 | # Use the appveyor cache to avoid re-downloading large archives such 90 | # the MKL numpy and scipy wheels mirrored on a rackspace cloud 91 | # container, speed up the appveyor jobs and reduce bandwidth 92 | # usage on our rackspace account. 93 | - '%APPDATA%\pip\Cache' 94 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | _build/* 2 | auto_examples/* 3 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | all: html 25 | 26 | help: 27 | @echo "Please use \`make ' where is one of" 28 | @echo " html to make standalone HTML files" 29 | @echo " dirhtml to make HTML files named index.html in directories" 30 | @echo " singlehtml to make a single large HTML file" 31 | @echo " pickle to make pickle files" 32 | @echo " json to make JSON files" 33 | @echo " htmlhelp to make HTML files and a HTML help project" 34 | @echo " qthelp to make HTML files and a qthelp project" 35 | @echo " devhelp to make HTML files and a Devhelp project" 36 | @echo " epub to make an epub" 37 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 38 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 39 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 40 | @echo " text to make text files" 41 | @echo " man to make manual pages" 42 | @echo " texinfo to make Texinfo files" 43 | @echo " info to make Texinfo files and run them through makeinfo" 44 | @echo " gettext to make PO message catalogs" 45 | @echo " changes to make an overview of all changed/added/deprecated items" 46 | @echo " xml to make Docutils-native XML files" 47 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 48 | @echo " linkcheck to check all external links for integrity" 49 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 50 | 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | html: 55 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html/dev 56 | @echo 57 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html/dev." 58 | 59 | html-release: 60 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 61 | @echo 62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 63 | 64 | 65 | dirhtml: 66 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 67 | @echo 68 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 69 | 70 | singlehtml: 71 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 72 | @echo 73 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 74 | 75 | pickle: 76 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 77 | @echo 78 | @echo "Build finished; now you can process the pickle files." 79 | 80 | json: 81 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 82 | @echo 83 | @echo "Build finished; now you can process the JSON files." 84 | 85 | htmlhelp: 86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 87 | @echo 88 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 89 | ".hhp project file in $(BUILDDIR)/htmlhelp." 90 | 91 | qthelp: 92 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 93 | @echo 94 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 95 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 96 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/iced.qhcp" 97 | @echo "To view the help file:" 98 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/iced.qhc" 99 | 100 | devhelp: 101 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 102 | @echo 103 | @echo "Build finished." 104 | @echo "To view the help file:" 105 | @echo "# mkdir -p $$HOME/.local/share/devhelp/iced" 106 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/iced" 107 | @echo "# devhelp" 108 | 109 | epub: 110 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 111 | @echo 112 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 113 | 114 | latex: 115 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 116 | @echo 117 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 118 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 119 | "(use \`make latexpdf' here to do that automatically)." 120 | 121 | latexpdf: 122 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 123 | @echo "Running LaTeX files through pdflatex..." 124 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 125 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 126 | 127 | latexpdfja: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo "Running LaTeX files through platex and dvipdfmx..." 130 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 131 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 132 | 133 | text: 134 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 135 | @echo 136 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 137 | 138 | man: 139 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 140 | @echo 141 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 142 | 143 | texinfo: 144 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 145 | @echo 146 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 147 | @echo "Run \`make' in that directory to run these through makeinfo" \ 148 | "(use \`make info' here to do that automatically)." 149 | 150 | info: 151 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 152 | @echo "Running Texinfo files through makeinfo..." 153 | make -C $(BUILDDIR)/texinfo info 154 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 155 | 156 | gettext: 157 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 158 | @echo 159 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 160 | 161 | changes: 162 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 163 | @echo 164 | @echo "The overview file is in $(BUILDDIR)/changes." 165 | 166 | linkcheck: 167 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 168 | @echo 169 | @echo "Link check complete; look for any errors in the above output " \ 170 | "or in $(BUILDDIR)/linkcheck/output.txt." 171 | 172 | doctest: 173 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 174 | @echo "Testing of doctests in the sources finished, look at the " \ 175 | "results in $(BUILDDIR)/doctest/output.txt." 176 | 177 | xml: 178 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 179 | @echo 180 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 181 | 182 | pseudoxml: 183 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 184 | @echo 185 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 186 | 187 | deploy: 188 | rsync -avz _build/html/ nvaroquaux@ssh.cbio.ensmp.fr:public_html/iced/dev/ 189 | 190 | deploy-release: 191 | rsync -avz _build/html/ nvaroquaux@ssh.cbio.ensmp.fr:public_html/iced/ 192 | 193 | -------------------------------------------------------------------------------- /doc/_static/css/jhepc.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: sans-serif; 3 | font-size: 16px; 4 | line-height: 1.5em; 5 | } 6 | 7 | p { 8 | } 9 | 10 | a { 11 | color: #08519C; 12 | } 13 | 14 | a:hover{ 15 | color: #FF3D00; 16 | text-decoration: none; 17 | } 18 | 19 | div.navbar { 20 | position: fixed; 21 | width: 20%; 22 | top: 0; 23 | text-align: right; 24 | background-color: rgb(240, 240, 240); 25 | border-right: 1px solid; 26 | border-right-color: rgb(189, 189, 189); 27 | height: 100%; 28 | text-transform: uppercase; 29 | font-weight: bold; 30 | z-index: 50; 31 | } 32 | 33 | .navbar-nav>li>a, .navbar-nav>.active>a, .navbar-nav>.active>a:hover, 34 | .navbar-nav>.active>a.active { 35 | background: transparent; 36 | text-transform: uppercase; 37 | font-weight: bold; 38 | height: 65px; 39 | vertical-align: middle; 40 | padding: 20px 30px; 41 | } 42 | 43 | .navbar-nav>li>a>img { 44 | visibility: hidden; 45 | padding-left: 8px; 46 | padding-bottom: 3px; 47 | } 48 | 49 | .navbar-nav>li>a:hover>img{ 50 | visibility: visible; 51 | } 52 | 53 | div.navbar .navbar-brand { 54 | max-width: 400px; 55 | vertical-align: middle; 56 | padding: 20px; 57 | } 58 | 59 | div.navbar .navbar-brand { 60 | padding-top: 7px; 61 | } 62 | 63 | div.navbar ul { 64 | padding-top: 15em; 65 | } 66 | 67 | body { 68 | background-color: rgb(255, 255, 255); 69 | } 70 | 71 | div.container{ 72 | min-height: 75%; 73 | margin: 0; 74 | border-bottom: 1px solid rgb(189, 189, 189); 75 | } 76 | 77 | div.container, footer { 78 | max-width: 1450px; 79 | margin-left: 20%; 80 | padding: 5em 15em 5em 4em; 81 | } 82 | 83 | div.container div#gallery { 84 | margin-left: -8em; 85 | margin-right: -2em; 86 | } 87 | 88 | div.container div#gallery h1 { 89 | margin-left: 3em; 90 | margin-right: 2em; 91 | } 92 | 93 | #carousel-container { 94 | width: 100%; 95 | margin: 0; 96 | padding: 0; 97 | } 98 | 99 | h1 a.headerlink, h2 a.headerlink, h3 a.headerlink { 100 | display: none; 101 | font-weight: normal; 102 | } 103 | 104 | h1 { 105 | padding-top: 50px; 106 | padding-bottom: 30px; 107 | font-family: sans-serif; 108 | text-transform: uppercase; 109 | font-size: 1.7em; 110 | letter-spacing: 0.01em; 111 | } 112 | 113 | h2 { 114 | font-family: sans-serif; 115 | font-size: 1.3em; 116 | text-transform: uppercase; 117 | letter-spacing: 0.01em; 118 | 119 | } 120 | 121 | .carousel-inner { 122 | border-bottom: 1px solid rgb(189, 189, 189); 123 | } 124 | 125 | .carousel-inner .item { 126 | margin: 0; 127 | width: 100%; 128 | top: 65px; 129 | height: 500px; 130 | } 131 | 132 | .carousel-inner .item .carousel-text { 133 | height: 500px; 134 | position: absolute; 135 | top: 65px; 136 | right: 0px; 137 | width: 35%; 138 | padding-left: 0em; 139 | display: inline-block; 140 | text-align: right; 141 | padding-right: 18em; 142 | } 143 | 144 | .carousel-inner .item .carousel-text h2 { 145 | font-size: 60px; 146 | } 147 | 148 | .carousel-inner .item .carousel-text h3 { 149 | font-size: 35px; 150 | } 151 | 152 | 153 | .carousel-inner .item .carousel-image { 154 | height: 500px; 155 | width: 60%; 156 | padding: 0 0px; 157 | display: inline-block; 158 | text-align: right; 159 | } 160 | 161 | .carousel-inner .item .carousel-image img { 162 | padding: 50px 0px; 163 | max-width: 80%; 164 | } 165 | 166 | .black { 167 | background-color: black; 168 | color: rgb(245, 245, 245); 169 | } 170 | 171 | .white { 172 | background-color: white; 173 | color: #333333; 174 | } 175 | 176 | .dark-grey { 177 | background-color: #222; 178 | color: rgb(245, 245, 245); 179 | } 180 | 181 | .section img { 182 | -webkit-border-radius: 10px; /* Saf3-4, iOS 1-3.2, Android <1.6 */ 183 | -moz-border-radius: 10px; /* FF1-3.6 */ 184 | border-radius: 10px; /* Opera 10.5, IE9, Saf5, Chrome, FF4, iOS 4, Android 2.1+ */ 185 | border: 2px solid #fff; 186 | max-width: 75%; 187 | max-height: 60%; 188 | } 189 | 190 | .highlight { 191 | background-color: transparent; 192 | } 193 | 194 | pre { 195 | width: 70%; 196 | font-family: monospace,serif; 197 | background-color: white; 198 | padding: 20px; 199 | } 200 | 201 | div.admonition { 202 | margin-bottom: 10px; 203 | margin-top: 10px; 204 | padding: 7px; 205 | border-radius: 4px; 206 | -moz-border-radius: 4px; 207 | } 208 | 209 | div.note { 210 | background-color: #EEE; 211 | border: 1px solid #CCC; 212 | } 213 | 214 | pre { 215 | padding: 10px; 216 | background-color: #F8F8F8; 217 | color: #222; 218 | line-height: 1.2em; 219 | border: 1px solid #DDD; 220 | margin: 1.5em 0 1.5em 0; 221 | } 222 | 223 | p.admonition-title { 224 | margin: 0px 10px 5px 0px; 225 | font-weight: bold; 226 | display: inline; 227 | } 228 | 229 | .first { 230 | margin-top: 0 !important; 231 | } 232 | 233 | .highlight a { 234 | text-decoration: underline; 235 | } 236 | -------------------------------------------------------------------------------- /doc/_static/js/jquery.maphilight.min.js: -------------------------------------------------------------------------------- 1 | (function(G){var B,J,C,K,N,M,I,E,H,A,L;J=!!document.createElement("canvas").getContext;B=(function(){var P=document.createElement("div");P.innerHTML='';var O=P.firstChild;O.style.behavior="url(#default#VML)";return O?typeof O.adj=="object":true})();if(!(J||B)){G.fn.maphilight=function(){return this};return }if(J){E=function(O){return Math.max(0,Math.min(parseInt(O,16),255))};H=function(O,P){return"rgba("+E(O.substr(0,2))+","+E(O.substr(2,2))+","+E(O.substr(4,2))+","+P+")"};C=function(O){var P=G('').get(0);P.getContext("2d").clearRect(0,0,P.width,P.height);return P};var F=function(Q,O,R,P,S){P=P||0;S=S||0;Q.beginPath();if(O=="rect"){Q.rect(R[0]+P,R[1]+S,R[2]-R[0],R[3]-R[1])}else{if(O=="poly"){Q.moveTo(R[0]+P,R[1]+S);for(i=2;i').get(0)};K=function(P,S,T,W,O){var U,V,Q,R;U='';V=(W.stroke?'strokeweight="'+W.strokeWidth+'" stroked="t" strokecolor="#'+W.strokeColor+'"':'stroked="f"');Q='';if(S=="rect"){R=G('')}else{if(S=="poly"){R=G('')}else{if(S=="circ"){R=G('')}}}R.get(0).innerHTML=U+Q;G(P).append(R)};N=function(O){G(O).find("[name=highlighted]").remove()}}M=function(P){var O,Q=P.getAttribute("coords").split(",");for(O=0;O0)){return }if(W.hasClass("maphilighted")){var R=W.parent();W.insertBefore(R);R.remove();G(S).unbind(".maphilight").find("area[coords]").unbind(".maphilight")}T=G("
").css({display:"block",background:'url("'+this.src+'")',position:"absolute",padding:0,width:"2100px",height:this.height});if(a.wrapClass){if(a.wrapClass===true){T.addClass(G(this).attr("class"))}else{T.addClass(a.wrapClass)}}W.before(T).css("opacity",0).css(I).remove();if(B){W.css("filter","Alpha(opacity=0)")}T.append(W);V=C(this);G(V).css(I);V.height=this.height;V.width=this.width;Z=function(f){var c,d;d=L(this,a);if(!d.neverOn&&!d.alwaysOn){c=M(this);K(V,c[0],c[1],d,"highlighted");if(d.groupBy){var b;if(/^[a-zA-Z][\-a-zA-Z]+$/.test(d.groupBy)){b=S.find("area["+d.groupBy+'="'+G(this).attr(d.groupBy)+'"]')}else{b=S.find(d.groupBy)}var g=this;b.each(function(){if(this!=g){var h=L(this,a);if(!h.neverOn&&!h.alwaysOn){var e=M(this);K(V,e[0],e[1],h,"highlighted")}}})}if(!J){G(V).append("")}}};G(S).bind("alwaysOn.maphilight",function(){if(X){N(X)}if(!J){G(V).empty()}G(S).find("area[coords]").each(function(){var b,c;c=L(this,a);if(c.alwaysOn){if(!X&&J){X=C(W[0]);G(X).css(I);X.width=W[0].width;X.height=W[0].height;W.before(X)}c.fade=c.alwaysOnFade;b=M(this);if(J){K(X,b[0],b[1],c,"")}else{K(V,b[0],b[1],c,"")}}})});G(S).trigger("alwaysOn.maphilight").find("area[coords]").bind("mouseover.maphilight",Z).bind("mouseout.maphilight",function(b){N(V)});W.before(V);W.addClass("maphilighted")})};G.fn.maphilight.defaults={fill:true,fillColor:"000000",fillOpacity:0.2,stroke:true,strokeColor:"ff0000",strokeOpacity:1,strokeWidth:1,fade:true,alwaysOn:false,neverOn:false,groupBy:false,wrapClass:true,shadow:false,shadowX:0,shadowY:0,shadowRadius:6,shadowColor:"000000",shadowOpacity:0.8,shadowPosition:"outside",shadowFrom:false}})(jQuery); -------------------------------------------------------------------------------- /doc/_static/logos/button_blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiclib/iced/1a6a7e50ffdcc8904a754033aab32a2f80eb11e8/doc/_static/logos/button_blue.png -------------------------------------------------------------------------------- /doc/_static/logos/button_blue.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 23 | 30 | 34 | 35 | 36 | 58 | 60 | 61 | 63 | image/svg+xml 64 | 66 | 67 | 68 | 69 | 70 | 75 | 87 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /doc/_static/logos/button_green.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiclib/iced/1a6a7e50ffdcc8904a754033aab32a2f80eb11e8/doc/_static/logos/button_green.png -------------------------------------------------------------------------------- /doc/_static/logos/button_green.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 23 | 30 | 34 | 35 | 36 | 58 | 60 | 61 | 63 | image/svg+xml 64 | 66 | 67 | 68 | 69 | 70 | 75 | 87 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /doc/_static/logos/button_orange.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiclib/iced/1a6a7e50ffdcc8904a754033aab32a2f80eb11e8/doc/_static/logos/button_orange.png -------------------------------------------------------------------------------- /doc/_static/logos/button_orange.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 23 | 30 | 34 | 35 | 36 | 58 | 60 | 61 | 63 | image/svg+xml 64 | 66 | 67 | 68 | 69 | 70 | 75 | 87 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /doc/_static/logos/logo_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiclib/iced/1a6a7e50ffdcc8904a754033aab32a2f80eb11e8/doc/_static/logos/logo_1.png -------------------------------------------------------------------------------- /doc/_static/logos/logo_1.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 21 | 23 | 30 | 38 | 42 | 43 | 51 | 55 | 56 | 64 | 68 | 69 | 77 | 81 | 82 | 83 | 105 | 107 | 108 | 110 | image/svg+xml 111 | 113 | 114 | 115 | 116 | 117 | 122 | PASTIS 135 | Poisson-based Algorithm forSTable Inference of DNA Structure 150 | 153 | 156 | 168 | 178 | 179 | 182 | 194 | 204 | 205 | 208 | 220 | 230 | 231 | 234 | 246 | 256 | 257 | 258 | 259 | 260 | -------------------------------------------------------------------------------- /doc/_templates/function.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autofunction:: {{ objname }} 7 | 8 | 9 | -------------------------------------------------------------------------------- /doc/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Iced documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Mar 31 17:17:03 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | import sphinx_gallery 18 | 19 | # If extensions (or modules to document with autodoc) are in another directory, 20 | # add these directories to sys.path here. If the directory is relative to the 21 | # documentation root, use os.path.abspath to make it absolute, like shown here. 22 | sys.path.insert(0, os.path.abspath('sphinxext')) 23 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary', 24 | 'numpy_ext.numpydoc', "sphinx.ext.imgmath", 25 | "sphinx_gallery.gen_gallery"] 26 | 27 | autosummary_generate = True 28 | autodoc_default_flags = ['members', 'inherited-members'] 29 | 30 | 31 | # -- General configuration ------------------------------------------------ 32 | 33 | # If your documentation needs a minimal Sphinx version, state it here. 34 | needs_sphinx = '1.0' 35 | 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # The suffix of source filenames. 40 | source_suffix = '.rst' 41 | 42 | # The encoding of source files. 43 | # source_encoding = 'utf-8-sig' 44 | 45 | # The master toctree document. 46 | master_doc = 'index' 47 | 48 | # General information about the project. 49 | project = u'Iced' 50 | copyright = u'2015-2017, iced contributors' 51 | 52 | # The version info for the project you're documenting, acts as replacement for 53 | # |version| and |release|, also used in various other places throughout the 54 | # built documents. 55 | # 56 | # The short X.Y version. 57 | version = '0.5' 58 | # The full version, including alpha/beta/rc tags. 59 | release = '0.5.0-git' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | # language = None 64 | 65 | # There are two options for replacing |today|: either, you set today to some 66 | # non-false value, then it is used: 67 | # today = '' 68 | # Else, today_fmt is used as the format for a strftime call. 69 | # today_fmt = '%B %d, %Y' 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | exclude_patterns = ['_build'] 74 | 75 | # The reST default role (used for this markup: `text`) to use for all 76 | # documents. 77 | # default_role = None 78 | 79 | # If true, '()' will be appended to :func: etc. cross-reference text. 80 | add_function_parentheses = False 81 | 82 | # If true, the current module name will be prepended to all description 83 | # unit titles (such as .. function::). 84 | # add_module_names = True 85 | 86 | # If true, sectionauthor and moduleauthor directives will be shown in the 87 | # output. They are ignored by default. 88 | # show_authors = False 89 | 90 | # The name of the Pygments (syntax highlighting) style to use. 91 | pygments_style = 'sphinx' 92 | 93 | # A list of ignored prefixes for module index sorting. 94 | # modindex_common_prefix = [] 95 | 96 | # If true, keep warnings as "system message" paragraphs in the built documents. 97 | # keep_warnings = False 98 | 99 | 100 | # -- Options for HTML output ---------------------------------------------- 101 | 102 | # The theme to use for HTML and HTML Help pages. See the documentation for 103 | # a list of builtin themes. 104 | html_theme = 'iced' 105 | 106 | # Theme options are theme-specific and customize the look and feel of a theme 107 | # further. For a list of options available for each theme, see the 108 | # documentation. 109 | # html_theme_options = {} 110 | 111 | # Add any paths that contain custom themes here, relative to this directory. 112 | html_theme_path = ["themes"] 113 | 114 | # The name for this set of Sphinx documents. If None, it defaults to 115 | # " v documentation". 116 | # html_title = None 117 | 118 | # A shorter title for the navigation bar. Default is the same as html_title. 119 | # html_short_title = None 120 | 121 | # The name of an image file (relative to this directory) to place at the top 122 | # of the sidebar. 123 | # html_logo = None 124 | 125 | # The name of an image file (within the static path) to use as favicon of the 126 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 127 | # pixels large. 128 | # html_favicon = None 129 | 130 | # Add any paths that contain custom static files (such as style sheets) here, 131 | # relative to this directory. They are copied after the builtin static files, 132 | # so a file named "default.css" will overwrite the builtin "default.css". 133 | html_static_path = ['_static'] 134 | 135 | # Add any extra paths that contain custom files (such as robots.txt or 136 | # .htaccess) here, relative to this directory. These files are copied 137 | # directly to the root of the documentation. 138 | # html_extra_path = [] 139 | 140 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 141 | # using the given strftime format. 142 | # html_last_updated_fmt = '%b %d, %Y' 143 | 144 | # If true, SmartyPants will be used to convert quotes and dashes to 145 | # typographically correct entities. 146 | # html_use_smartypants = True 147 | 148 | # Custom sidebar templates, maps document names to template names. 149 | # html_sidebars = {} 150 | 151 | # Additional templates that should be rendered to pages, maps page names to 152 | # template names. 153 | # html_additional_pages = {} 154 | 155 | # If false, no module index is generated. 156 | # html_domain_indices = True 157 | 158 | # If false, no index is generated. 159 | # html_use_index = True 160 | 161 | # If true, the index is split into individual pages for each letter. 162 | # html_split_index = False 163 | 164 | # If true, links to the reST sources are added to the pages. 165 | # html_show_sourcelink = True 166 | 167 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 168 | # html_show_sphinx = True 169 | 170 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 171 | # html_show_copyright = True 172 | 173 | # If true, an OpenSearch description file will be output, and all pages will 174 | # contain a tag referring to it. The value of this option must be the 175 | # base URL from which the finished HTML is served. 176 | # html_use_opensearch = '' 177 | 178 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 179 | # html_file_suffix = None 180 | 181 | # Output file base name for HTML help builder. 182 | htmlhelp_basename = 'Iceddoc' 183 | 184 | 185 | # -- Options for LaTeX output --------------------------------------------- 186 | 187 | latex_elements = { 188 | # The paper size ('letterpaper' or 'a4paper'). 189 | # 'papersize': 'letterpaper', 190 | 191 | # The font size ('10pt', '11pt' or '12pt'). 192 | # 'pointsize': '10pt', 193 | 194 | # Additional stuff for the LaTeX preamble. 195 | # 'preamble': '', 196 | } 197 | 198 | # Grouping the document tree into LaTeX files. List of tuples 199 | # (source start file, target name, title, 200 | # author, documentclass [howto, manual, or own class]). 201 | latex_documents = [ 202 | ('index', 'iced.tex', u'Iced Documentation', 203 | u'Nelle Varoquaux', 'manual'), 204 | ] 205 | 206 | # The name of an image file (relative to this directory) to place at the top of 207 | # the title page. 208 | # latex_logo = None 209 | 210 | # For "manual" documents, if this is true, then toplevel headings are parts, 211 | # not chapters. 212 | # latex_use_parts = False 213 | 214 | # If true, show page references after internal links. 215 | # latex_show_pagerefs = False 216 | 217 | # If true, show URL addresses after external links. 218 | # latex_show_urls = False 219 | 220 | # Documents to append as an appendix to all manuals. 221 | # latex_appendices = [] 222 | 223 | # If false, no module index is generated. 224 | # latex_domain_indices = True 225 | 226 | 227 | # -- Options for manual page output --------------------------------------- 228 | 229 | # One entry per manual page. List of tuples 230 | # (source start file, name, description, authors, manual section). 231 | man_pages = [ 232 | ('index', 'pastis', u'pastis Documentation', 233 | [u'Nelle Varoquaux'], 1) 234 | ] 235 | 236 | # If true, show URL addresses after external links. 237 | # man_show_urls = False 238 | 239 | 240 | # -- Options for Texinfo output ------------------------------------------- 241 | 242 | # Grouping the document tree into Texinfo files. List of tuples 243 | # (source start file, target name, title, author, 244 | # dir menu entry, description, category) 245 | texinfo_documents = [ 246 | ('index', 'pastis', u'pastis Documentation', 247 | u'Nelle Varoquaux', 'pastis', 'One line description of project.', 248 | 'Miscellaneous'), 249 | ] 250 | 251 | # Documents to append as an appendix to all manuals. 252 | # texinfo_appendices = [] 253 | 254 | # If false, no module index is generated. 255 | # texinfo_domain_indices = True 256 | 257 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 258 | # texinfo_show_urls = 'footnote' 259 | 260 | # If true, do not generate a @detailmenu in the "Top" node's menu. 261 | # texinfo_no_detailmenu = False 262 | -------------------------------------------------------------------------------- /doc/documentation.rst: -------------------------------------------------------------------------------- 1 | ========================= 2 | Documentation 3 | ========================= 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | modules/classes 9 | -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. iced documentation master file, created by 2 | sphinx-quickstart on Wed Jan 14 12:05:19 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Iced 7 | ================================ 8 | 9 | 10 | Recent technological advances allow the measurement, in a single Hi-C 11 | experiment, of the frequencies of physical contacts among pairs of genomic 12 | loci at a genome-wide scale. 13 | 14 | **Iced** implements a fast and memory efficient of the ICE normalization 15 | strategy. It is included in the HiC-pro pipeline, that processes data from raw 16 | fastq files to normalized contact maps. ``iced`` grew bigger than just 17 | being a normalization packages, and contains a number of utilities functions 18 | that may be useful if you are analyzing and processing Hi-C data. 19 | 20 | 21 | .. toctree:: 22 | :maxdepth: 2 23 | 24 | 25 | References 26 | ========== 27 | 28 | If you use ``iced`` as part of HiC-Pro, please cite: 29 | 30 | `HiC-Pro: an optimized and flexible pipeline for Hi-C data processing 31 | `_ 32 | \*N. Servant, N. Varoquaux, B.R. Lajoie, E. Viara, C.J. Chen, J.-P. Vert, E. 33 | Heard, J. Dekker, E. Barillot, Genome Biology 2015 34 | 35 | else, please cite: 36 | 37 | `iced: fast and memory efficient normalization of contact maps 38 | `_, N. Varoquaux, N. 39 | Servant, JOSS, 2019 40 | 41 | 42 | Contacts 43 | ======== 44 | 45 | If you have any questions or suggestions, please email nelle dot varoquaux at 46 | ensmp dot fr, or open a ticket on `Github 47 | `_ 48 | 49 | 50 | 51 | Indices and tables 52 | ================== 53 | 54 | * :ref:`genindex` 55 | * :ref:`modindex` 56 | * :ref:`search` 57 | 58 | -------------------------------------------------------------------------------- /doc/install.rst: -------------------------------------------------------------------------------- 1 | ================================================================================ 2 | Installation 3 | ================================================================================ 4 | 5 | This package uses distutils, which is the default way of installing 6 | python modules. 7 | 8 | The dependencies are: 9 | 10 | - python (>= 3.6) 11 | - setuptools 12 | - numpy (>= 1.16) 13 | - scipy (>= 0.19) 14 | - pandas 15 | 16 | 17 | All of these dependencies can be installed at once using `Anaconda 18 | `_ 19 | 20 | The easiest way to install iced is using `pip`: 21 | 22 | pip install -U iced 23 | 24 | You can also download the code from `Github 25 | `_ and install in your home 26 | directory, use:: 27 | 28 | python setup.py install --user 29 | 30 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\iced.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\iced.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /doc/modules/.gitignore: -------------------------------------------------------------------------------- 1 | generated 2 | -------------------------------------------------------------------------------- /doc/modules/classes.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | Reference 3 | ========== 4 | 5 | This is the class and reference of iced. 6 | 7 | .. _base_ref: 8 | 9 | :mod:`iced.normalization`: Normalization 10 | =============================================== 11 | 12 | .. automodule:: iced.normalization 13 | :no-members: 14 | :no-inherited-members: 15 | 16 | 17 | Functions 18 | --------- 19 | .. currentmodule:: iced 20 | 21 | .. autosummary:: 22 | :toctree: generated/ 23 | :template: function.rst 24 | 25 | normalization.ICE_normalization 26 | normalization.SCN_normalization 27 | normalization.estimate_block_biases 28 | 29 | .. _normalization_ref: 30 | 31 | :mod:`iced.filter`: Filter 32 | =============================================== 33 | 34 | .. automodule:: iced.filter 35 | :no-members: 36 | :no-inherited-members: 37 | 38 | 39 | Functions 40 | --------- 41 | 42 | .. currentmodule:: iced 43 | 44 | .. autosummary:: 45 | :toctree: generated/ 46 | :template: function.rst 47 | 48 | filter.filter_low_counts 49 | filter.filter_high_counts 50 | 51 | .. _filter_ref: 52 | 53 | :mod:`iced.datasets`: Datasets 54 | =============================================== 55 | 56 | .. automodule:: iced.datasets 57 | :no-members: 58 | :no-inherited-members: 59 | 60 | Functions 61 | --------- 62 | .. currentmodule:: iced 63 | 64 | .. autosummary:: 65 | :toctree: generated/ 66 | :template: function.rst 67 | 68 | datasets.load_sample_yeast 69 | 70 | 71 | .. _datasets_ref: 72 | 73 | 74 | :mod:`iced.utils`: Utils 75 | =============================================== 76 | 77 | .. automodule:: iced.utils 78 | :no-members: 79 | :no-inherited-members: 80 | 81 | 82 | Functions 83 | --------- 84 | .. currentmodule:: iced 85 | 86 | .. autosummary:: 87 | :toctree: generated/ 88 | :template: function.rst 89 | 90 | 91 | utils.get_intra_mask 92 | utils.get_inter_mask 93 | utils.extract_sub_contact_map 94 | utils.downsample_resolution 95 | 96 | .. _utils_ref: 97 | -------------------------------------------------------------------------------- /doc/modules/datasets.rst: -------------------------------------------------------------------------------- 1 | .. _datasets: 2 | 3 | ============== 4 | Datasets 5 | ============== 6 | 7 | The :mod:`datasets` submodule contains utilities to download and load datasets 8 | in numpy format. 9 | 10 | .. currentmodule:: iced.datasets 11 | -------------------------------------------------------------------------------- /doc/modules/filter.rst: -------------------------------------------------------------------------------- 1 | .. _filter: 2 | 3 | ============= 4 | Filtering 5 | ============= 6 | 7 | The :mod:`filter` submodule contains utilities for filtering the contact count 8 | matrix prior to normalizing. 9 | 10 | 11 | .. currentmodule:: iced.filter 12 | -------------------------------------------------------------------------------- /doc/modules/normalization.rst: -------------------------------------------------------------------------------- 1 | .. _normalization: 2 | 3 | ============== 4 | Normalization 5 | ============== 6 | 7 | The :mod:`normalization` submodule contains normalization methods based on 8 | a matrix scaling approach. 9 | 10 | .. currentmodule:: iced.normalization 11 | 12 | .. image:: ../auto_examples/normalization/images/sphx_glr_plot_ice_normalization_001.png 13 | :target: ../auto_examples/normalization/plot_ice_normalization.html 14 | :align: right 15 | :scale: 100% 16 | -------------------------------------------------------------------------------- /doc/modules/utils.rst: -------------------------------------------------------------------------------- 1 | .. _utils: 2 | 3 | ============== 4 | Utilities 5 | ============== 6 | 7 | The :mod:`utils` submodule contains utilities function. 8 | 9 | .. currentmodule:: iced.utils 10 | -------------------------------------------------------------------------------- /doc/papers/figures/counts_pfalc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiclib/iced/1a6a7e50ffdcc8904a754033aab32a2f80eb11e8/doc/papers/figures/counts_pfalc.png -------------------------------------------------------------------------------- /doc/papers/paper.bib: -------------------------------------------------------------------------------- 1 | @Article{cournac:normalization, 2 | Title = {{Normalization of a chromosomal contact map}}, 3 | Author = {A. Cournac and H. Marie-Nelly and M. Marbouty 4 | and R. Koszul and J. Mozziconacci}, 5 | Journal = {BMC Genomics}, 6 | Year = {2012}, 7 | Pages = {436}, 8 | Volume = {13}, 9 | doi="10.1186/1471-2164-13-436" 10 | } 11 | 12 | 13 | @Article{imakaev:iterative, 14 | Title = {Iterative correction of {Hi-C} data reveals 15 | hallmarks of chromosome organization}, 16 | Author = {Imakaev, M. and Fudenberg, G and McCord, R. P. 17 | and Naumova, N. and Goloborodko, A. and Lajoie, B. R. and Dekker, J. and 18 | Mirny, L. A. }, 19 | Journal = {Nature Methods}, 20 | Year = {2012}, 21 | Pages = {999--1003}, 22 | Volume = {9}, 23 | doi = "10.1038/nmeth.2148" 24 | } 25 | 26 | @Article{li:hi-corrector, 27 | Author="Li, W. and Gong, K. and Li, Q. and Alber, F. and Zhou, X. J. ", 28 | Title="{{H}i-{C}orrector: a fast, scalable and memory-efficient package for 29 | normalizing large-scale {H}i-{C} data}", 30 | Journal="Bioinformatics", 31 | Year="2015", 32 | Volume="31", 33 | Number="6", 34 | Pages="960--962", 35 | Month="Mar", 36 | doi="10.1093/bioinformatics/btu747" 37 | } 38 | 39 | @Article{varoquaux:iced_osf, 40 | Author="Varoquaux, N, and Servant, N", 41 | Title="iced: fast and memory efficient normalization of contact maps", 42 | doi="10.5281/zenodo.2622857" 43 | } 44 | 45 | @article{servant:hicpro, 46 | Author="Servant, N. and Varoquaux, N. and Lajoie, B. R. and Viara, E. 47 | and Chen, C. J. and Vert, J. P. and Heard, E. and Dekker, J. and Barillot, 48 | E. ", 49 | Title="{{H}i{C}-{P}ro: an optimized and flexible pipeline for {H}i-{C} data 50 | processing}", 51 | Journal="Genome Biol.", 52 | Year="2015", 53 | Volume="16", 54 | doi="10.1186/s13059-015-0831-x" 55 | } 56 | -------------------------------------------------------------------------------- /doc/papers/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'iced: fast and memory efficient normalization of contact maps' 3 | tags: 4 | - Hi-C contact count 5 | - Normalization 6 | authors: 7 | - name: Nelle Varoquaux 8 | orcid: 0000-0002-8748-6546 9 | affiliation: "1" 10 | - name: Nicolas Servant 11 | orcid: 0000-0003-1678-7410 12 | affiliation: "3, 4, 5" 13 | affiliations: 14 | - name: University of California, Berkeley 15 | index: 1 16 | - name: Institut Curie 17 | index: 3 18 | - name: INSERM U900 19 | index: 4 20 | - name: Mines ParisTech 21 | index: 5 22 | 23 | date: 2019, February, 7th 24 | bibliography: paper.bib 25 | 26 | --- 27 | 28 | # Summary 29 | 30 | The three-dimensional structure of the genome is thought to play an important 31 | role in many biological processes, such as gene regulation and replication. 32 | Recent technological advances allow the measurement, in a single 33 | experiment, of the frequencies of physical contacts among pairs of genomic 34 | loci at a genome-wide scale. The Hi-C protocol results in a noisy and indirect 35 | measurement of the 3D structure of the genome, yielding a symmetric matrix 36 | where each row and each column correspond to a genomic window, and each entry 37 | to the number of times those windows have been seen interacting with one 38 | another. As with any genomics experiments, the resulting matrix contains unwanted 39 | variations depending on the GC-content, mappability, and the details of the 40 | protocol used. Before any downstream analysis, this matrix needs to be 41 | appropriately normalized. 42 | 43 | ![](figures/counts_pfalc.png) 44 | 45 | [**Iced**](https://github.com/hiclib/iced) implements fast and memory 46 | efficient normalization methods, such the ICE normalization strategy or the 47 | SCN algorithm. It is included in the HiC-pro pipeline, that processes data 48 | from raw fastq files to normalized contact maps [@servant:hicpro]. iced 49 | eventually grew bigger than just being a normalization packages, and contains 50 | a number of utilities functions that may be useful if you are analyzing and 51 | processing Hi-C data. 52 | 53 | Moving from sequencing reads to a normalized contact map is a challenging 54 | task. Hi-C usually requires several millions to billions of paired-end 55 | sequencing reads, depending on genome size and on the desired resolution. 56 | Managing these data thus requires optimized bioinformatic workflows able to 57 | extract the contact frequencies in reasonable computational time and with 58 | reasonable resource and storage requirements. The final step of such pipeline 59 | is typically a normalization step, essential to ensure accurate analysis and 60 | proper interpretation of the results. 61 | 62 | We propose here fast implementations of the iterative correction method 63 | [@imakaev:iterative] and SCN [@cournac:normalization] in Python. iced 64 | emphasizes ease-of-use, performance, maintainability, and memory-efficiency. 65 | This implementation leverages a memory-efficient data format of Hi-C maps, and 66 | outperforms both in speed and memory usage HiCorrector [@li:hi-corrector], a 67 | parallelized C++ implementation of the same algorithm. 68 | 69 | # References 70 | -------------------------------------------------------------------------------- /doc/sphinxext/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------- 2 | The files 3 | - numpydoc.py 4 | - autosummary.py 5 | - autosummary_generate.py 6 | - docscrape.py 7 | - docscrape_sphinx.py 8 | - phantom_import.py 9 | have the following license: 10 | 11 | Copyright (C) 2008 Stefan van der Walt , Pauli Virtanen 12 | 13 | Redistribution and use in source and binary forms, with or without 14 | modification, are permitted provided that the following conditions are 15 | met: 16 | 17 | 1. Redistributions of source code must retain the above copyright 18 | notice, this list of conditions and the following disclaimer. 19 | 2. Redistributions in binary form must reproduce the above copyright 20 | notice, this list of conditions and the following disclaimer in 21 | the documentation and/or other materials provided with the 22 | distribution. 23 | 24 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 26 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 27 | DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 28 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 | POSSIBILITY OF SUCH DAMAGE. 35 | 36 | ------------------------------------------------------------------------------- 37 | The files 38 | - compiler_unparse.py 39 | - comment_eater.py 40 | - traitsdoc.py 41 | have the following license: 42 | 43 | This software is OSI Certified Open Source Software. 44 | OSI Certified is a certification mark of the Open Source Initiative. 45 | 46 | Copyright (c) 2006, Enthought, Inc. 47 | All rights reserved. 48 | 49 | Redistribution and use in source and binary forms, with or without 50 | modification, are permitted provided that the following conditions are met: 51 | 52 | * Redistributions of source code must retain the above copyright notice, this 53 | list of conditions and the following disclaimer. 54 | * Redistributions in binary form must reproduce the above copyright notice, 55 | this list of conditions and the following disclaimer in the documentation 56 | and/or other materials provided with the distribution. 57 | * Neither the name of Enthought, Inc. nor the names of its contributors may 58 | be used to endorse or promote products derived from this software without 59 | specific prior written permission. 60 | 61 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 62 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 63 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 64 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 65 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 66 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 67 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 68 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 69 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 70 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 71 | 72 | 73 | ------------------------------------------------------------------------------- 74 | The files 75 | - only_directives.py 76 | - plot_directive.py 77 | originate from Matplotlib (http://matplotlib.sf.net/) which has 78 | the following license: 79 | 80 | Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved. 81 | 82 | 1. This LICENSE AGREEMENT is between John D. Hunter (“JDH”), and the Individual or Organization (“Licensee”) accessing and otherwise using matplotlib software in source or binary form and its associated documentation. 83 | 84 | 2. Subject to the terms and conditions of this License Agreement, JDH hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use matplotlib 0.98.3 alone or in any derivative version, provided, however, that JDH’s License Agreement and JDH’s notice of copyright, i.e., “Copyright (c) 2002-2008 John D. Hunter; All Rights Reserved” are retained in matplotlib 0.98.3 alone or in any derivative version prepared by Licensee. 85 | 86 | 3. In the event Licensee prepares a derivative work that is based on or incorporates matplotlib 0.98.3 or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to matplotlib 0.98.3. 87 | 88 | 4. JDH is making matplotlib 0.98.3 available to Licensee on an “AS IS” basis. JDH MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, JDH MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF MATPLOTLIB 0.98.3 WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 89 | 90 | 5. JDH SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF MATPLOTLIB 0.98.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING MATPLOTLIB 0.98.3, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 91 | 92 | 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 93 | 94 | 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between JDH and Licensee. This License Agreement does not grant permission to use JDH trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 95 | 96 | 8. By copying, installing or otherwise using matplotlib 0.98.3, Licensee agrees to be bound by the terms and conditions of this License Agreement. 97 | 98 | -------------------------------------------------------------------------------- /doc/sphinxext/MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include tests *.py 2 | include *.txt 3 | -------------------------------------------------------------------------------- /doc/sphinxext/README.txt: -------------------------------------------------------------------------------- 1 | ===================================== 2 | numpydoc -- Numpy's Sphinx extensions 3 | ===================================== 4 | 5 | Numpy's documentation uses several custom extensions to Sphinx. These 6 | are shipped in this ``numpydoc`` package, in case you want to make use 7 | of them in third-party projects. 8 | 9 | The following extensions are available: 10 | 11 | - ``numpydoc``: support for the Numpy docstring format in Sphinx, and add 12 | the code description directives ``np-function``, ``np-cfunction``, etc. 13 | that support the Numpy docstring syntax. 14 | 15 | - ``numpydoc.traitsdoc``: For gathering documentation about Traits attributes. 16 | 17 | - ``numpydoc.plot_directives``: Adaptation of Matplotlib's ``plot::`` 18 | directive. Note that this implementation may still undergo severe 19 | changes or eventually be deprecated. 20 | 21 | - ``numpydoc.only_directives``: (DEPRECATED) 22 | 23 | - ``numpydoc.autosummary``: (DEPRECATED) An ``autosummary::`` directive. 24 | Available in Sphinx 0.6.2 and (to-be) 1.0 as ``sphinx.ext.autosummary``, 25 | and it the Sphinx 1.0 version is recommended over that included in 26 | Numpydoc. 27 | 28 | 29 | numpydoc 30 | ======== 31 | 32 | Numpydoc inserts a hook into Sphinx's autodoc that converts docstrings 33 | following the Numpy/Scipy format to a form palatable to Sphinx. 34 | 35 | Options 36 | ------- 37 | 38 | The following options can be set in conf.py: 39 | 40 | - numpydoc_use_plots: bool 41 | 42 | Whether to produce ``plot::`` directives for Examples sections that 43 | contain ``import matplotlib``. 44 | 45 | - numpydoc_show_class_members: bool 46 | 47 | Whether to show all members of a class in the Methods and Attributes 48 | sections automatically. 49 | 50 | - numpydoc_edit_link: bool (DEPRECATED -- edit your HTML template instead) 51 | 52 | Whether to insert an edit link after docstrings. 53 | -------------------------------------------------------------------------------- /doc/sphinxext/numpy_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hiclib/iced/1a6a7e50ffdcc8904a754033aab32a2f80eb11e8/doc/sphinxext/numpy_ext/__init__.py -------------------------------------------------------------------------------- /doc/sphinxext/numpy_ext/docscrape_sphinx.py: -------------------------------------------------------------------------------- 1 | import re 2 | import inspect 3 | import textwrap 4 | import pydoc 5 | from .docscrape import NumpyDocString 6 | from .docscrape import FunctionDoc 7 | from .docscrape import ClassDoc 8 | 9 | 10 | class SphinxDocString(NumpyDocString): 11 | def __init__(self, docstring, config=None): 12 | config = {} if config is None else config 13 | self.use_plots = config.get('use_plots', False) 14 | NumpyDocString.__init__(self, docstring, config=config) 15 | 16 | # string conversion routines 17 | def _str_header(self, name, symbol='`'): 18 | return ['.. rubric:: ' + name, ''] 19 | 20 | def _str_field_list(self, name): 21 | return [':' + name + ':'] 22 | 23 | def _str_indent(self, doc, indent=4): 24 | out = [] 25 | for line in doc: 26 | out += [' ' * indent + line] 27 | return out 28 | 29 | def _str_signature(self): 30 | return [''] 31 | if self['Signature']: 32 | return ['``%s``' % self['Signature']] + [''] 33 | else: 34 | return [''] 35 | 36 | def _str_summary(self): 37 | return self['Summary'] + [''] 38 | 39 | def _str_extended_summary(self): 40 | return self['Extended Summary'] + [''] 41 | 42 | def _str_param_list(self, name): 43 | out = [] 44 | if self[name]: 45 | out += self._str_field_list(name) 46 | out += [''] 47 | for param, param_type, desc in self[name]: 48 | out += self._str_indent(['**%s** : %s' % (param.strip(), 49 | param_type)]) 50 | out += [''] 51 | out += self._str_indent(desc, 8) 52 | out += [''] 53 | return out 54 | 55 | @property 56 | def _obj(self): 57 | if hasattr(self, '_cls'): 58 | return self._cls 59 | elif hasattr(self, '_f'): 60 | return self._f 61 | return None 62 | 63 | def _str_member_list(self, name): 64 | """ 65 | Generate a member listing, autosummary:: table where possible, 66 | and a table where not. 67 | 68 | """ 69 | out = [] 70 | if self[name]: 71 | out += ['.. rubric:: %s' % name, ''] 72 | prefix = getattr(self, '_name', '') 73 | 74 | if prefix: 75 | prefix = '~%s.' % prefix 76 | 77 | autosum = [] 78 | others = [] 79 | for param, param_type, desc in self[name]: 80 | param = param.strip() 81 | if not self._obj or hasattr(self._obj, param): 82 | autosum += [" %s%s" % (prefix, param)] 83 | else: 84 | others.append((param, param_type, desc)) 85 | 86 | if autosum: 87 | # GAEL: Toctree commented out below because it creates 88 | # hundreds of sphinx warnings 89 | # out += ['.. autosummary::', ' :toctree:', ''] 90 | out += ['.. autosummary::', ''] 91 | out += autosum 92 | 93 | if others: 94 | maxlen_0 = max([len(x[0]) for x in others]) 95 | maxlen_1 = max([len(x[1]) for x in others]) 96 | hdr = "=" * maxlen_0 + " " + "=" * maxlen_1 + " " + "=" * 10 97 | fmt = '%%%ds %%%ds ' % (maxlen_0, maxlen_1) 98 | n_indent = maxlen_0 + maxlen_1 + 4 99 | out += [hdr] 100 | for param, param_type, desc in others: 101 | out += [fmt % (param.strip(), param_type)] 102 | out += self._str_indent(desc, n_indent) 103 | out += [hdr] 104 | out += [''] 105 | return out 106 | 107 | def _str_section(self, name): 108 | out = [] 109 | if self[name]: 110 | out += self._str_header(name) 111 | out += [''] 112 | content = textwrap.dedent("\n".join(self[name])).split("\n") 113 | out += content 114 | out += [''] 115 | return out 116 | 117 | def _str_see_also(self, func_role): 118 | out = [] 119 | if self['See Also']: 120 | see_also = super(SphinxDocString, self)._str_see_also(func_role) 121 | out = ['.. seealso::', ''] 122 | out += self._str_indent(see_also[2:]) 123 | return out 124 | 125 | def _str_warnings(self): 126 | out = [] 127 | if self['Warnings']: 128 | out = ['.. warning::', ''] 129 | out += self._str_indent(self['Warnings']) 130 | return out 131 | 132 | def _str_index(self): 133 | idx = self['index'] 134 | out = [] 135 | if len(idx) == 0: 136 | return out 137 | 138 | out += ['.. index:: %s' % idx.get('default', '')] 139 | for section, references in idx.iteritems(): 140 | if section == 'default': 141 | continue 142 | elif section == 'refguide': 143 | out += [' single: %s' % (', '.join(references))] 144 | else: 145 | out += [' %s: %s' % (section, ','.join(references))] 146 | return out 147 | 148 | def _str_references(self): 149 | out = [] 150 | if self['References']: 151 | out += self._str_header('References') 152 | if isinstance(self['References'], str): 153 | self['References'] = [self['References']] 154 | out.extend(self['References']) 155 | out += [''] 156 | # Latex collects all references to a separate bibliography, 157 | # so we need to insert links to it 158 | import sphinx # local import to avoid test dependency 159 | if sphinx.__version__ >= "0.6": 160 | out += ['.. only:: latex', ''] 161 | else: 162 | out += ['.. latexonly::', ''] 163 | items = [] 164 | for line in self['References']: 165 | m = re.match(r'.. \[([a-z0-9._-]+)\]', line, re.I) 166 | if m: 167 | items.append(m.group(1)) 168 | out += [' ' + ", ".join(["[%s]_" % item for item in items]), ''] 169 | return out 170 | 171 | def _str_examples(self): 172 | examples_str = "\n".join(self['Examples']) 173 | 174 | if (self.use_plots and 'import matplotlib' in examples_str 175 | and 'plot::' not in examples_str): 176 | out = [] 177 | out += self._str_header('Examples') 178 | out += ['.. plot::', ''] 179 | out += self._str_indent(self['Examples']) 180 | out += [''] 181 | return out 182 | else: 183 | return self._str_section('Examples') 184 | 185 | def __str__(self, indent=0, func_role="obj"): 186 | out = [] 187 | out += self._str_signature() 188 | out += self._str_index() + [''] 189 | out += self._str_summary() 190 | out += self._str_extended_summary() 191 | for param_list in ('Parameters', 'Returns', 'Raises', 'Attributes'): 192 | out += self._str_param_list(param_list) 193 | out += self._str_warnings() 194 | out += self._str_see_also(func_role) 195 | out += self._str_section('Notes') 196 | out += self._str_references() 197 | out += self._str_examples() 198 | for param_list in ('Methods',): 199 | out += self._str_member_list(param_list) 200 | out = self._str_indent(out, indent) 201 | return '\n'.join(out) 202 | 203 | 204 | class SphinxFunctionDoc(SphinxDocString, FunctionDoc): 205 | def __init__(self, obj, doc=None, config={}): 206 | self.use_plots = config.get('use_plots', False) 207 | FunctionDoc.__init__(self, obj, doc=doc, config=config) 208 | 209 | 210 | class SphinxClassDoc(SphinxDocString, ClassDoc): 211 | def __init__(self, obj, doc=None, func_doc=None, config={}): 212 | self.use_plots = config.get('use_plots', False) 213 | ClassDoc.__init__(self, obj, doc=doc, func_doc=None, config=config) 214 | 215 | 216 | class SphinxObjDoc(SphinxDocString): 217 | def __init__(self, obj, doc=None, config=None): 218 | self._f = obj 219 | SphinxDocString.__init__(self, doc, config=config) 220 | 221 | 222 | def get_doc_object(obj, what=None, doc=None, config={}): 223 | if what is None: 224 | if inspect.isclass(obj): 225 | what = 'class' 226 | elif inspect.ismodule(obj): 227 | what = 'module' 228 | elif callable(obj): 229 | what = 'function' 230 | else: 231 | what = 'object' 232 | if what == 'class': 233 | return SphinxClassDoc(obj, func_doc=SphinxFunctionDoc, doc=doc, 234 | config=config) 235 | elif what in ('function', 'method'): 236 | return SphinxFunctionDoc(obj, doc=doc, config=config) 237 | else: 238 | if doc is None: 239 | doc = pydoc.getdoc(obj) 240 | return SphinxObjDoc(obj, doc, config=config) 241 | -------------------------------------------------------------------------------- /doc/sphinxext/numpy_ext/numpydoc.py: -------------------------------------------------------------------------------- 1 | """ 2 | ======== 3 | numpydoc 4 | ======== 5 | 6 | Sphinx extension that handles docstrings in the Numpy standard format. [1] 7 | 8 | It will: 9 | 10 | - Convert Parameters etc. sections to field lists. 11 | - Convert See Also section to a See also entry. 12 | - Renumber references. 13 | - Extract the signature from the docstring, if it can't be determined 14 | otherwise. 15 | 16 | .. [1] http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard 17 | 18 | """ 19 | 20 | from __future__ import unicode_literals 21 | 22 | import sys # Only needed to check Python version 23 | import os 24 | import re 25 | import pydoc 26 | from .docscrape_sphinx import get_doc_object 27 | from .docscrape_sphinx import SphinxDocString 28 | import inspect 29 | 30 | 31 | def mangle_docstrings(app, what, name, obj, options, lines, 32 | reference_offset=[0]): 33 | 34 | cfg = dict(use_plots=app.config.numpydoc_use_plots, 35 | show_class_members=app.config.numpydoc_show_class_members) 36 | 37 | if what == 'module': 38 | # Strip top title 39 | title_re = re.compile(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*', 40 | re.I | re.S) 41 | lines[:] = title_re.sub('', "\n".join(lines)).split("\n") 42 | else: 43 | doc = get_doc_object(obj, what, "\n".join(lines), config=cfg) 44 | if sys.version_info[0] < 3: 45 | lines[:] = unicode(doc).splitlines() 46 | else: 47 | lines[:] = str(doc).splitlines() 48 | 49 | if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \ 50 | obj.__name__: 51 | if hasattr(obj, '__module__'): 52 | v = dict(full_name="%s.%s" % (obj.__module__, obj.__name__)) 53 | else: 54 | v = dict(full_name=obj.__name__) 55 | lines += [u'', u'.. htmlonly::', ''] 56 | lines += [u' %s' % x for x in 57 | (app.config.numpydoc_edit_link % v).split("\n")] 58 | 59 | # replace reference numbers so that there are no duplicates 60 | references = [] 61 | for line in lines: 62 | line = line.strip() 63 | m = re.match(r'^.. \[([a-z0-9_.-])\]', line, re.I) 64 | if m: 65 | references.append(m.group(1)) 66 | 67 | # start renaming from the longest string, to avoid overwriting parts 68 | references.sort(key=lambda x: -len(x)) 69 | if references: 70 | for i, line in enumerate(lines): 71 | for r in references: 72 | if re.match(r'^\d+$', r): 73 | new_r = "R%d" % (reference_offset[0] + int(r)) 74 | else: 75 | new_r = u"%s%d" % (r, reference_offset[0]) 76 | lines[i] = lines[i].replace(u'[%s]_' % r, 77 | u'[%s]_' % new_r) 78 | lines[i] = lines[i].replace(u'.. [%s]' % r, 79 | u'.. [%s]' % new_r) 80 | 81 | reference_offset[0] += len(references) 82 | 83 | 84 | def mangle_signature(app, what, name, obj, 85 | options, sig, retann): 86 | # Do not try to inspect classes that don't define `__init__` 87 | if (inspect.isclass(obj) and 88 | (not hasattr(obj, '__init__') or 89 | 'initializes x; see ' in pydoc.getdoc(obj.__init__))): 90 | return '', '' 91 | 92 | if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')): 93 | return 94 | if not hasattr(obj, '__doc__'): 95 | return 96 | 97 | doc = SphinxDocString(pydoc.getdoc(obj)) 98 | if doc['Signature']: 99 | sig = re.sub("^[^(]*", "", doc['Signature']) 100 | return sig, '' 101 | 102 | 103 | def setup(app, get_doc_object_=get_doc_object): 104 | global get_doc_object 105 | get_doc_object = get_doc_object_ 106 | 107 | if sys.version_info[0] < 3: 108 | app.connect(b'autodoc-process-docstring', mangle_docstrings) 109 | app.connect(b'autodoc-process-signature', mangle_signature) 110 | else: 111 | app.connect('autodoc-process-docstring', mangle_docstrings) 112 | app.connect('autodoc-process-signature', mangle_signature) 113 | app.add_config_value('numpydoc_edit_link', None, False) 114 | app.add_config_value('numpydoc_use_plots', None, False) 115 | app.add_config_value('numpydoc_show_class_members', True, True) 116 | 117 | # Extra mangling domains 118 | app.add_domain(NumpyPythonDomain) 119 | app.add_domain(NumpyCDomain) 120 | 121 | #----------------------------------------------------------------------------- 122 | # Docstring-mangling domains 123 | #----------------------------------------------------------------------------- 124 | 125 | try: 126 | import sphinx # lazy to avoid test dependency 127 | except ImportError: 128 | CDomain = PythonDomain = object 129 | else: 130 | from sphinx.domains.c import CDomain 131 | from sphinx.domains.python import PythonDomain 132 | 133 | 134 | class ManglingDomainBase(object): 135 | directive_mangling_map = {} 136 | 137 | def __init__(self, *a, **kw): 138 | super(ManglingDomainBase, self).__init__(*a, **kw) 139 | self.wrap_mangling_directives() 140 | 141 | def wrap_mangling_directives(self): 142 | for name, objtype in self.directive_mangling_map.items(): 143 | self.directives[name] = wrap_mangling_directive( 144 | self.directives[name], objtype) 145 | 146 | 147 | class NumpyPythonDomain(ManglingDomainBase, PythonDomain): 148 | name = 'np' 149 | directive_mangling_map = { 150 | 'function': 'function', 151 | 'class': 'class', 152 | 'exception': 'class', 153 | 'method': 'function', 154 | 'classmethod': 'function', 155 | 'staticmethod': 'function', 156 | 'attribute': 'attribute', 157 | } 158 | 159 | 160 | class NumpyCDomain(ManglingDomainBase, CDomain): 161 | name = 'np-c' 162 | directive_mangling_map = { 163 | 'function': 'function', 164 | 'member': 'attribute', 165 | 'macro': 'function', 166 | 'type': 'class', 167 | 'var': 'object', 168 | } 169 | 170 | 171 | def wrap_mangling_directive(base_directive, objtype): 172 | class directive(base_directive): 173 | def run(self): 174 | env = self.state.document.settings.env 175 | 176 | name = None 177 | if self.arguments: 178 | m = re.match(r'^(.*\s+)?(.*?)(\(.*)?', self.arguments[0]) 179 | name = m.group(2).strip() 180 | 181 | if not name: 182 | name = self.arguments[0] 183 | 184 | lines = list(self.content) 185 | mangle_docstrings(env.app, objtype, name, None, None, lines) 186 | # local import to avoid testing dependency 187 | from docutils.statemachine import ViewList 188 | self.content = ViewList(lines, self.content.parent) 189 | 190 | return base_directive.run(self) 191 | 192 | return directive 193 | -------------------------------------------------------------------------------- /doc/themes/iced/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "basic/layout.html" %} 2 | 3 | {% block htmltitle %} 4 | {{ super() }} 5 | 6 | 7 | 10 | 12 | 14 | 15 | {% endblock %} 16 | 17 | {% set title = "Iced: fast and memory efficient ICE normalization" %} 18 | 19 | {%- block doctype -%} 20 | 21 | {%- endblock %} 22 | 23 | {%- block extrahead %} 24 | 25 | 26 | 28 | 29 | 32 | 33 | 34 | {% endblock %} 35 | 36 | {# Silence the sidebar's, relbar's #} 37 | {% block header %} 38 | {%- block navbar -%} 39 | 52 | {%- endblock -%} 53 | {% endblock %} 54 | {% block sidebar1 %}{% endblock %} 55 | {% block sidebar2 %}{% endblock %} 56 | {% block relbar1 %}{% endblock %} 57 | {% block relbar2 %}{% endblock %} 58 | {% block sidebarsourcelink %}{% endblock %} 59 | 60 | {%- block content %} 61 |
62 | {% block body %} {% endblock %} 63 |
64 | 65 | {%- endblock %} 66 | 67 | 68 | {%- block footer %} 69 |
70 |

71 | Back to top 72 | {% if theme_source_link_position == "footer" %} 73 |
74 | {% include "sourcelink.html" %} 75 | {% endif %} 76 |

77 |

78 | {%- if show_copyright %} 79 | {%- if hasdoc('copyright') %} 80 | {% trans path=pathto('copyright'), copyright=copyright|e %}© Copyright {{ copyright }}.{% endtrans %}
81 | {%- else %} 82 | {% trans copyright=copyright|e %}© Copyright {{ copyright }}.{% endtrans %}
83 | {%- endif %} 84 | {%- endif %} 85 | {%- if last_updated %} 86 | {% trans last_updated=last_updated|e %}Last updated on {{ last_updated }}.{% endtrans %}
87 | {%- endif %} 88 | {%- if show_sphinx %} 89 | {% trans sphinx_version=sphinx_version|e %}Created using Sphinx {{ sphinx_version }}.{% endtrans %}
90 | {%- endif %} 91 |

92 |
93 | {%- endblock %} 94 | -------------------------------------------------------------------------------- /doc/themes/iced/logos: -------------------------------------------------------------------------------- 1 | ../../_static/logos/ -------------------------------------------------------------------------------- /doc/themes/iced/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | pygments_style = tango 4 | -------------------------------------------------------------------------------- /doc/tutorial/basic/tutorial.rst: -------------------------------------------------------------------------------- 1 | .. _introduction: 2 | 3 | An introduction to contact counts normalization with iced 4 | ========================================================= 5 | -------------------------------------------------------------------------------- /doc/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial_menu: 2 | 3 | 4 | ================ 5 | Iced tutorial 6 | ================ 7 | 8 | .. contents:: 9 | 10 | .. note:: **Doctest Mode** 11 | 12 | The code-examples in the above tutorials are written in a 13 | *python-console* format. If you wish to easily execute these examples 14 | in **IPython**, use:: 15 | 16 | %doctest_mode 17 | 18 | in the IPython-console. You can then simply copy and paste the examples 19 | directly into IPython without having to worry about removing the **>>>** 20 | manually. 21 | 22 | 23 | What is iced? 24 | ============= 25 | 26 | ``iced`` is a python package that contains normalization techniques for Hi-C data. 27 | It is included in the HiC-pro pipeline, that processes data from raw fastq 28 | files to normalized contact maps. Eventually, ``iced`` grew bigger than just being a 29 | normalization packages, and contains a number of utilities functions that may 30 | be useful if you are analyzing and processing Hi-C data. 31 | 32 | If you use ``iced``, please cite: 33 | 34 | HiC-Pro: An optimized and flexible pipeline for Hi-C processing. Servant N., 35 | Varoquaux N., Lajoie BR., Viara E., Chen CJ., Vert JP., Dekker J., Heard E., 36 | Barillot E. Genome Biology 2015, 16:259 doi:10.1186/s13059-015-0831-x 37 | http://www.genomebiology.com/2015/16/1/259 38 | 39 | Working with Hi-C data in Python 40 | ================================ 41 | 42 | Hi-C data boils down to a matrix of contact counts. Each row and columns 43 | corresponds to a genomic window, and each entry to the number of times these 44 | genomic windows have been observed to interact with one another. Python 45 | happens to be an excellent language to manipulate matrices, and ``iced`` 46 | leverages a number of scientific packages that provides nice and easy-to-use 47 | matrix operation. 48 | 49 | .. note:: 50 | 51 | If you are not familiar with numpy and python, we strongly encourage to 52 | follow the short tutorial of the `scipy lecture notes 53 | `_ 54 | 55 | .. _loading_example_dataset 56 | 57 | Loading an example dataset 58 | ========================== 59 | 60 | ``iced`` comes with a sample data set that allows you to play a bit with the 61 | package. The sample data set included corresponds to the first 5 chromosomes 62 | of the budding yeast *S. cerevisiae*. In the following, we start a Python or 63 | IPython interpreter from our shell and load this data set. Our notational 64 | convention is that ``$`` denotes the shell prompt while ``>>>`` denotes the 65 | Python interpreter prompt:: 66 | 67 | $ python 68 | >>> from iced import datasets 69 | >>> counts, lengths = datasets.load_sample_yeast() 70 | 71 | A data set in ``iced`` is composed of an N by N numpy.ndarray ``counts`` and a 72 | vector of ``lengths`` that contains the number of bins per chromosomes. For 73 | our sample data, the vector ``lengths`` is an ndarray of length 5, underlying 74 | we have here 5 chromosomes:: 75 | 76 | >>> print(len(lengths)) 77 | 5 78 | 79 | The contact map ``counts`` should be squared and symmetric. The shape should 80 | also match the lengths vector:: 81 | 82 | >>> print(counts.shape) 83 | (350, 350) 84 | >>> print(lengths.sum()) 85 | 350 86 | 87 | The ``counts`` matrix is here of size 350 by 350. 88 | 89 | You've successfully loaded your first Hi-C data! 90 | The corresponding image is the following. 91 | 92 | .. image:: /auto_examples/datasets/images/sphx_glr_plot_yeast_sample_001.png 93 | :target: ../../auto_examples/datasets/plot_yeast_sample.html 94 | :align: center 95 | :scale: 50 96 | 97 | 98 | Normalizing a data set with ICE 99 | =============================== 100 | 101 | Now that we have some data loaded, let's proceed to normalizing it. There are 102 | two normalization algorithms implemented in `iced`: ICE and SCN. ICE is the 103 | most widely used normalization technique on Hi-C data, so this is the one we 104 | will showcase. 105 | 106 | ICE is based on a matrix balancing algorithm. The underlying assumptions are 107 | that the contact map suffers from biases that can be decomposable as a product 108 | of regionale biases: :math:`C_{ij} = \beta_i \beta_j N_{ij}`, where 109 | :math:`C_{ij}` is the raw contact counts between loci :math:`i` and :math:`j`, 110 | :math:`N_{ij}` the normalized contact counts, and :math:`\beta` the bias 111 | vector. 112 | 113 | Normalizing the data is as simple as follows :: 114 | 115 | >>> from iced import normalization 116 | >>> normed = normalization.ICE_normalization(counts) 117 | 118 | But the estimation of the bias vector can be severely problematic in low 119 | coverage regions. In fact, if the matrix is too sparse, the algorithm may not 120 | converge at all! To avoid this, Imakaev et al recommend filtering out a 121 | certain percentage of rows and columns that interact the least. This has to 122 | be performed prior to applying the normalization algorithm:: 123 | 124 | >>> from iced import filter 125 | >>> counts = filter.filter_low_counts(counts, percentage=0.04) 126 | >>> normed = normalization.ICE_normalization(counts) 127 | 128 | 129 | .. image:: /auto_examples/normalization/images/sphx_glr_plot_ice_normalization_001.png 130 | :target: ../../auto_examples/normalization/plot_ice_normalization.html 131 | :align: center 132 | :scale: 75 133 | 134 | 135 | How about cancer data sets? LOIC and CAIC 136 | ========================================= 137 | 138 | The last section discusses normalizing Hi-C data using ICE. This method is not 139 | adapted to normalize cancer data sets: several of the assumptions made do not 140 | hold in the presence of copy number variation. 141 | 142 | ``iced`` proposes two methods for normalizing data sets with copy number 143 | variations: 144 | 145 | - ``LOIC``, to preserve enrichment in interactions due to copy number 146 | variations 147 | - ``CAIC`` to remove copy number variation effects in the contact count matrix. 148 | 149 | Two examples are available in the gallery, showcasing how to use ``LOIC`` 150 | and ``CAIC`` with ``ice``. 151 | 152 | -------------------------------------------------------------------------------- /doc/whats_new.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Release history 3 | =============== 4 | 5 | 6 | Version 0.4 7 | =========== 8 | 9 | **In Development** 10 | 11 | 12 | New features 13 | ------------ 14 | 15 | - Added a new filtering option `remove_all_zeros_loci` which removes 16 | non-interacting rows and columns from the count matrix *prior* to applying 17 | the filtering strategy. 18 | By Nicolas Servant and Nelle Varoquaux 19 | 20 | - `ice` now has a new option `--output-bias`. If provided, the bias vector 21 | will be saved in a file in addition of the normalized contact counts. 22 | By Nicolas Servant and Nelle Varoquaux 23 | -------------------------------------------------------------------------------- /environment-dev.yml: -------------------------------------------------------------------------------- 1 | name: iced 2 | channels: 3 | - default 4 | dependencies: 5 | - python>3 6 | - cython 7 | - numpy>1.16 8 | - pandas 9 | - scikit-learn 10 | - scipy>0.19 11 | # requirements/test.txt 12 | - codecov 13 | - flake8 14 | - pytest>=5.2 15 | - pytest-cov>=2.7 16 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: iced 2 | channels: 3 | - default 4 | dependencies: 5 | - python>3 6 | - cython 7 | - numpy>1.16 8 | - pandas 9 | - scikit-learn 10 | - scipy>0.19 11 | -------------------------------------------------------------------------------- /examples/HiC-pro/launch_tests.sh: -------------------------------------------------------------------------------- 1 | # Basic normalization 2 | ice subset.matrix 3 | 4 | # More HiC-pro like with the options 5 | ice --results_filename /tmp/iced_matrix.matrix --filter_low_counts_perc 0.02 \ 6 | --filter_high_counts_perc 0.02 --max_iter 1000 --eps 0.1 \ 7 | --remove-all-zeros-loci --output-bias 1 --verbose 1 subset.matrix 8 | 9 | python load_counts.py 10 | -------------------------------------------------------------------------------- /examples/HiC-pro/load_counts.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from iced import io 3 | import numpy as np 4 | 5 | print("Checking the normalized matrix can be re-loaded") 6 | 7 | counts = io.load_counts("/tmp/iced_matrix.matrix") 8 | 9 | # Load with np.loadtxt and check that the shape makes sense 10 | print("Checking the shape of the written matrix makes sense") 11 | t = np.loadtxt("/tmp/iced_matrix.matrix") 12 | if t.shape[1] != 3: 13 | raise ValueError("The shape of the written matrix doesn't make sense") 14 | 15 | # Checking that the base seems fine (ie, is one) 16 | if t[0, 0] < 1 or t[0, 1] < 1: 17 | raise ValueError("The output should be 1-based, not 0 based") 18 | 19 | # Checking that there are no 0 in the written file 20 | if np.any(counts.data == 0): 21 | raise ValueError("The output contains 0") 22 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _general_examples: 2 | 3 | General examples 4 | ---------------- 5 | 6 | General-purpose and introductory examples for the `iced`. 7 | -------------------------------------------------------------------------------- /examples/normalization/README.txt: -------------------------------------------------------------------------------- 1 | .. _normalization_examples: 2 | 3 | Normalization 4 | ------------- 5 | 6 | Examples concerning the :mod:`iced.normalization` module. 7 | -------------------------------------------------------------------------------- /examples/normalization/plot_caic_normalization.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================================================== 3 | Normalizing a cancer contact count matrix with CAIC 4 | =================================================== 5 | 6 | CAIC is a normalization method to remove copy-number biases present in a 7 | matrix. This example showcases how to perform such a normalization on 8 | simulated data with `iced`. 9 | 10 | """ 11 | 12 | ############################################################################### 13 | # Loading the data and normalizing 14 | # -------------------------------- 15 | # 16 | # The normalization is done in three step: 17 | # 18 | # 1. Normalize the data using LOIC, to remove GC, mappability, and other 19 | # biases 20 | # 2. Estimate the block biases due to copy number. 21 | # 3. Remove the block biases from the LOIC-normalized contact counts 22 | 23 | from iced import datasets 24 | from iced import normalization 25 | import matplotlib.pyplot as plt 26 | from matplotlib import colors 27 | 28 | 29 | counts, lengths, cnv = datasets.load_sample_cancer() 30 | 31 | loic_normed = normalization.ICE_normalization(counts, counts_profile=cnv) 32 | block_biases = normalization.estimate_block_biases(counts, lengths, cnv) 33 | caic_normed = loic_normed / block_biases 34 | 35 | ############################################################################### 36 | # Visualizing the results using Matplotlib 37 | # ---------------------------------------- 38 | # 39 | # The following code visualizes the raw original data, the estimated block 40 | # biases, and the normalized matrix using the CAIC method. 41 | chromosomes = ["I", "II", "III", "IV", "V", "VI"] 42 | 43 | fig, axes = plt.subplots(ncols=3, figsize=(14, 3)) 44 | 45 | axes[0].imshow(counts, cmap="RdBu_r", norm=colors.SymLogNorm(1), 46 | extent=(0, len(counts), 0, len(counts))) 47 | 48 | [axes[0].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 49 | [axes[0].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 50 | axes[0].set_title("Raw contact counts", fontweight="bold") 51 | 52 | m = axes[1].imshow(block_biases, cmap="RdBu_r", norm=colors.SymLogNorm(1), 53 | extent=(0, len(counts), 0, len(counts))) 54 | [axes[1].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 55 | [axes[1].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 56 | axes[1].set_title("Estimated block biases", fontweight="bold") 57 | 58 | m = axes[2].imshow(caic_normed, 59 | cmap="RdBu_r", norm=colors.SymLogNorm(1), 60 | extent=(0, len(counts), 0, len(counts))) 61 | [axes[2].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 62 | [axes[2].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 63 | cb = fig.colorbar(m) 64 | axes[2].set_title("Normalized contact counts with CAIC", fontweight="bold") 65 | -------------------------------------------------------------------------------- /examples/normalization/plot_filtering_strategies.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================== 3 | Different filtering strategies 4 | ============================== 5 | 6 | `iced` provides different filtering strategies. In short: 7 | 8 | - filtering rows and columns that are the most sparse. 9 | - filtering of the smallest x% rows and columns in terms of interactions 10 | - filtering of the smallest x% **interacting** rows and columns 11 | 12 | """ 13 | import matplotlib.pyplot as plt 14 | from matplotlib import colors 15 | 16 | from iced import datasets 17 | from iced import filter 18 | 19 | 20 | # Loading a sample dataset 21 | counts, lengths = datasets.load_sample_yeast() 22 | 23 | 24 | fig, axes = plt.subplots(ncols=3, figsize=(12, 4)) 25 | counts_1 = filter.filter_low_counts(counts, lengths=lengths, percentage=0.04) 26 | counts_2 = filter.filter_low_counts(counts, lengths=lengths, percentage=0.04, 27 | sparsity=False) 28 | counts_3 = filter.filter_low_counts(counts, lengths=lengths, percentage=0.04, 29 | sparsity=False, remove_all_zeros_loci=True) 30 | 31 | 32 | # Plotting the results using matplotlib 33 | chromosomes = ["I", "II", "III", "IV", "V", "VI"] 34 | 35 | 36 | for ax, c in zip(axes, [counts_1, counts_2, counts_3]): 37 | ax.imshow(c, cmap="Blues", norm=colors.SymLogNorm(1), 38 | extent=(0, len(counts), 0, len(counts))) 39 | 40 | [ax.axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 41 | [ax.axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 42 | 43 | axes[0].set_title("Filtering 4% sparsest loci") 44 | axes[1].set_title("Filtering 4% smallest interacting loci") 45 | axes[2].set_title("Filtering 4% smallest interacting loci\n + all " 46 | "non-interacting loci") 47 | -------------------------------------------------------------------------------- /examples/normalization/plot_ice_normalization.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================== 3 | Normalizing a contact count matrix 4 | ================================== 5 | 6 | This example showcases some basic filtering and normalization. 7 | 8 | """ 9 | import matplotlib.pyplot as plt 10 | from matplotlib import colors 11 | 12 | from iced import datasets 13 | from iced import filter 14 | from iced import normalization 15 | 16 | 17 | # Loading a sample dataset 18 | counts, lengths = datasets.load_sample_yeast() 19 | 20 | # Filtering and normalizing contact count data 21 | normed = filter.filter_low_counts(counts, lengths=lengths, percentage=0.04) 22 | normed = normalization.ICE_normalization(normed) 23 | 24 | # Plotting the results using matplotlib 25 | chromosomes = ["I", "II", "III", "IV", "V", "VI"] 26 | 27 | fig, axes = plt.subplots(ncols=2, figsize=(12, 4)) 28 | 29 | axes[0].imshow(counts, cmap="RdBu_r", norm=colors.SymLogNorm(1), 30 | extent=(0, len(counts), 0, len(counts))) 31 | 32 | [axes[0].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 33 | [axes[0].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 34 | axes[0].set_title("Raw contact counts", fontweight="bold") 35 | 36 | m = axes[1].imshow(normed, cmap="RdBu_r", norm=colors.SymLogNorm(1), 37 | extent=(0, len(counts), 0, len(counts))) 38 | [axes[1].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 39 | [axes[1].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 40 | cb = fig.colorbar(m) 41 | axes[1].set_title("Normalized contact counts", fontweight="bold") 42 | -------------------------------------------------------------------------------- /examples/normalization/plot_loic_normalization.py: -------------------------------------------------------------------------------- 1 | """ 2 | =================================================== 3 | Normalizing a cancer contact count matrix with LOIC 4 | =================================================== 5 | 6 | """ 7 | 8 | from iced import datasets 9 | from iced import normalization 10 | import matplotlib.pyplot as plt 11 | from matplotlib import colors 12 | 13 | # Loading a sample dataset 14 | counts, lengths, cnv = datasets.load_sample_cancer() 15 | 16 | normed = normalization.ICE_normalization(counts, counts_profile=cnv) 17 | 18 | # Plotting the results using matplotlib 19 | chromosomes = ["I", "II", "III", "IV", "V", "VI"] 20 | 21 | fig, axes = plt.subplots(ncols=2, figsize=(12, 4)) 22 | 23 | axes[0].imshow(counts, cmap="RdBu_r", norm=colors.SymLogNorm(1), 24 | extent=(0, len(counts), 0, len(counts))) 25 | 26 | [axes[0].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 27 | [axes[0].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 28 | axes[0].set_title("Raw contact counts", fontweight="bold") 29 | 30 | m = axes[1].imshow(normed, cmap="RdBu_r", norm=colors.SymLogNorm(1), 31 | extent=(0, len(counts), 0, len(counts))) 32 | [axes[1].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 33 | [axes[1].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 34 | cb = fig.colorbar(m) 35 | axes[1].set_title("Normalized contact counts with LOIC", fontweight="bold") 36 | -------------------------------------------------------------------------------- /examples/utils/README.txt: -------------------------------------------------------------------------------- 1 | .. _utils_examples: 2 | 3 | Utils 4 | ------------- 5 | 6 | Examples concerning the :mod:`iced.utils` module. 7 | -------------------------------------------------------------------------------- /examples/utils/plot_extract_sample_map.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================= 3 | Extracting parts of a contact map 4 | ================================= 5 | 6 | This example shows how to extract contact counts associated to some chromosomes 7 | of the contact maps. Here, we extract chromosomes 1, 4 and 5 of the budding 8 | yeasts contact map 9 | """ 10 | import matplotlib.pyplot as plt 11 | from matplotlib import colors 12 | 13 | from iced import datasets 14 | from iced.utils import extract_sub_contact_map 15 | 16 | 17 | # Loading a sample dataset 18 | counts, lengths = datasets.load_sample_yeast() 19 | sub_counts, sub_lengths = extract_sub_contact_map(counts, lengths, [0, 3, 4]) 20 | 21 | fig, ax = plt.subplots() 22 | m = ax.matshow(sub_counts, cmap="Blues", norm=colors.SymLogNorm(1), 23 | extent=(0, len(sub_counts), 0, len(sub_counts))) 24 | [ax.axhline(i, linewidth=1, color="#000000") for i in sub_lengths.cumsum()] 25 | [ax.axvline(i, linewidth=1, color="#000000") for i in sub_lengths.cumsum()] 26 | cb = fig.colorbar(m) 27 | ax.set_title("Chromosomes I, IV and V of yeast") 28 | -------------------------------------------------------------------------------- /examples/utils/plot_intra_inter_contact_maps.py: -------------------------------------------------------------------------------- 1 | """ 2 | ================================= 3 | Extracting parts of a contact map 4 | ================================= 5 | 6 | This examples shows how to use a mask to plot only the inter or the intra 7 | contact map. 8 | 9 | """ 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | from matplotlib import colors 13 | 14 | from iced import datasets 15 | from iced.utils import get_intra_mask 16 | from iced.utils import get_inter_mask 17 | 18 | 19 | # Loading a sample dataset 20 | counts, lengths = datasets.load_sample_yeast() 21 | intra_mask = get_intra_mask(lengths) 22 | inter_mask = get_inter_mask(lengths) 23 | 24 | fig, axes = plt.subplots(ncols=2, figsize=(12, 6)) 25 | inter_counts = counts.copy() 26 | inter_counts[intra_mask] = np.nan 27 | intra_counts = counts.copy() 28 | intra_counts[inter_mask] = np.nan 29 | 30 | m = axes[0].matshow(intra_counts, cmap="Blues", norm=colors.SymLogNorm(1), 31 | extent=(0, len(counts), 0, len(counts))) 32 | m = axes[1].matshow(inter_counts, cmap="Blues", norm=colors.SymLogNorm(1), 33 | extent=(0, len(counts), 0, len(counts))) 34 | 35 | axes[0].set_title("Intra-chromosomal maps") 36 | axes[1].set_title("Inter-chromosomal maps") 37 | 38 | [axes[0].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 39 | [axes[0].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 40 | [axes[1].axhline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 41 | [axes[1].axvline(i, linewidth=1, color="#000000") for i in lengths.cumsum()] 42 | -------------------------------------------------------------------------------- /iced/__init__.py: -------------------------------------------------------------------------------- 1 | from . import normalization 2 | from . import filter 3 | from . import io 4 | from . import datasets 5 | from . import random 6 | 7 | __version__ = "0.6.0a0.dev0" 8 | -------------------------------------------------------------------------------- /iced/_filter_.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | cimport cython 3 | cimport numpy as cnp 4 | 5 | ctypedef cnp.float64_t DOUBLE 6 | ctypedef cnp.int8_t BOOL 7 | 8 | 9 | @cython.boundscheck(False) 10 | @cython.wraparound(False) 11 | @cython.cdivision(True) 12 | def _filter_csr(X, cnp.ndarray[BOOL, ndim=1, cast=True] bias): 13 | 14 | cdef: 15 | cnp.ndarray[DOUBLE, ndim=1] X_data = X.data 16 | cnp.ndarray[int, ndim=1] X_indices = X.indices 17 | cnp.ndarray[int, ndim=1] X_indptr = X.indptr 18 | unsigned int m = X.shape[0] 19 | unsigned int i, j, row 20 | 21 | j = 0 22 | for i, row in enumerate(X_indices): 23 | while i >= X_indptr[j + 1]: 24 | j += 1 25 | if bias[row] or bias[j]: 26 | X_data[i] = 0 27 | return X 28 | -------------------------------------------------------------------------------- /iced/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import clear_data_home, get_data_home 2 | from .base import load_sample_yeast, load_sample_cancer 3 | -------------------------------------------------------------------------------- /iced/datasets/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os import environ, makedirs 3 | from os.path import join, expanduser, exists 4 | from os.path import dirname 5 | import shutil 6 | 7 | import pandas as pd 8 | from .. import io 9 | 10 | # authors: Nelle Varoquaux 11 | 12 | # This module is greatly inspired from sklearn.datasets 13 | 14 | 15 | def get_data_home(data_home=None): 16 | """Return the path of the scikit-learn data dir. 17 | 18 | This folder is used by some large dataset loaders to avoid 19 | downloading the data several times. 20 | 21 | By default the data dir is set to a folder named 'scikit_learn_data' 22 | in the user home folder. 23 | 24 | Alternatively, it can be set by the 'SCIKIT_LEARN_DATA' environment 25 | variable or programmatically by giving an explicit folder path. The 26 | '~' symbol is expanded to the user home folder. 27 | 28 | If the folder does not already exist, it is automatically created. 29 | """ 30 | if data_home is None: 31 | data_home = environ.get('HICLIB_DATA', 32 | join('~', 'hiclib_data')) 33 | data_home = expanduser(data_home) 34 | if not exists(data_home): 35 | makedirs(data_home) 36 | return data_home 37 | 38 | 39 | def clear_data_home(data_home=None): 40 | """Delete all the content of the data home cache.""" 41 | data_home = get_data_home(data_home) 42 | shutil.rmtree(data_home) 43 | 44 | 45 | def load_sample_yeast(): 46 | """ 47 | Load and return a sample of S. cerevisiae contact count matrix from duan 48 | et al, Nature, 2009 49 | 50 | Returns 51 | ------- 52 | counts, lengths: 53 | tuple of two elements, the first a contact count matrix, the 54 | second an ndarray containing the lengths of the chromosomes. 55 | """ 56 | module_path = dirname(__file__) 57 | lengths = io.load_lengths( 58 | os.path.join(module_path, "data/duan2009/duan.SC.10000.raw_sub.bed")) 59 | counts = io.load_counts( 60 | os.path.join(module_path, 61 | "data/duan2009/duan.SC.10000.raw_sub.matrix"), 62 | lengths=lengths) 63 | counts = counts.toarray() 64 | counts = counts.T + counts 65 | return counts, lengths 66 | 67 | 68 | def load_sample_cancer(): 69 | """ 70 | Load and return a sample of a simulated cancer Hi-C data from Servant et 71 | al, 2018 72 | 73 | Returns 74 | ------- 75 | counts, lengths: 76 | tuple of two elements, the first a contact count matrix, the 77 | second an ndarray containing the lengths of the chromosomes. 78 | """ 79 | module_path = dirname(__file__) 80 | lengths_filename = os.path.join( 81 | module_path, "data/servant2018/simulation_3.bed") 82 | lengths = io.load_lengths( 83 | lengths_filename) 84 | counts = io.load_counts( 85 | os.path.join(module_path, 86 | "data/servant2018/simulation_3.mat"), 87 | lengths=lengths) 88 | counts = counts.toarray() 89 | counts = counts.T + counts 90 | 91 | # Load CNVs 92 | cnv = pd.read_csv(lengths_filename, usecols=(3, ), comment="#", sep="\t", 93 | header=None) 94 | cnv = cnv.values.flatten() 95 | return counts, lengths, cnv 96 | -------------------------------------------------------------------------------- /iced/datasets/data/duan2009/duan.SC.10000.raw_sub.bed: -------------------------------------------------------------------------------- 1 | chr01 1 10000 0 2 | chr01 10001 20000 1 3 | chr01 20001 30000 2 4 | chr01 30001 40000 3 5 | chr01 40001 50000 4 6 | chr01 50001 60000 5 7 | chr01 60001 70000 6 8 | chr01 70001 80000 7 9 | chr01 80001 90000 8 10 | chr01 90001 100000 9 11 | chr01 100001 110000 10 12 | chr01 110001 120000 11 13 | chr01 120001 130000 12 14 | chr01 130001 140000 13 15 | chr01 140001 150000 14 16 | chr01 150001 160000 15 17 | chr01 160001 170000 16 18 | chr01 170001 180000 17 19 | chr01 180001 190000 18 20 | chr01 190001 200000 19 21 | chr01 200001 210000 20 22 | chr01 210001 220000 21 23 | chr01 220001 230000 22 24 | chr01 230001 240000 23 25 | chr02 1 10000 24 26 | chr02 10001 20000 25 27 | chr02 20001 30000 26 28 | chr02 30001 40000 27 29 | chr02 40001 50000 28 30 | chr02 50001 60000 29 31 | chr02 60001 70000 30 32 | chr02 70001 80000 31 33 | chr02 80001 90000 32 34 | chr02 90001 100000 33 35 | chr02 100001 110000 34 36 | chr02 110001 120000 35 37 | chr02 120001 130000 36 38 | chr02 130001 140000 37 39 | chr02 140001 150000 38 40 | chr02 150001 160000 39 41 | chr02 160001 170000 40 42 | chr02 170001 180000 41 43 | chr02 180001 190000 42 44 | chr02 190001 200000 43 45 | chr02 200001 210000 44 46 | chr02 210001 220000 45 47 | chr02 220001 230000 46 48 | chr02 230001 240000 47 49 | chr02 240001 250000 48 50 | chr02 250001 260000 49 51 | chr02 260001 270000 50 52 | chr02 270001 280000 51 53 | chr02 280001 290000 52 54 | chr02 290001 300000 53 55 | chr02 300001 310000 54 56 | chr02 310001 320000 55 57 | chr02 320001 330000 56 58 | chr02 330001 340000 57 59 | chr02 340001 350000 58 60 | chr02 350001 360000 59 61 | chr02 360001 370000 60 62 | chr02 370001 380000 61 63 | chr02 380001 390000 62 64 | chr02 390001 400000 63 65 | chr02 400001 410000 64 66 | chr02 410001 420000 65 67 | chr02 420001 430000 66 68 | chr02 430001 440000 67 69 | chr02 440001 450000 68 70 | chr02 450001 460000 69 71 | chr02 460001 470000 70 72 | chr02 470001 480000 71 73 | chr02 480001 490000 72 74 | chr02 490001 500000 73 75 | chr02 500001 510000 74 76 | chr02 510001 520000 75 77 | chr02 520001 530000 76 78 | chr02 530001 540000 77 79 | chr02 540001 550000 78 80 | chr02 550001 560000 79 81 | chr02 560001 570000 80 82 | chr02 570001 580000 81 83 | chr02 580001 590000 82 84 | chr02 590001 600000 83 85 | chr02 600001 610000 84 86 | chr02 610001 620000 85 87 | chr02 620001 630000 86 88 | chr02 630001 640000 87 89 | chr02 640001 650000 88 90 | chr02 650001 660000 89 91 | chr02 660001 670000 90 92 | chr02 670001 680000 91 93 | chr02 680001 690000 92 94 | chr02 690001 700000 93 95 | chr02 700001 710000 94 96 | chr02 710001 720000 95 97 | chr02 720001 730000 96 98 | chr02 730001 740000 97 99 | chr02 740001 750000 98 100 | chr02 750001 760000 99 101 | chr02 760001 770000 100 102 | chr02 770001 780000 101 103 | chr02 780001 790000 102 104 | chr02 790001 800000 103 105 | chr02 800001 810000 104 106 | chr02 810001 820000 105 107 | chr03 1 10000 106 108 | chr03 10001 20000 107 109 | chr03 20001 30000 108 110 | chr03 30001 40000 109 111 | chr03 40001 50000 110 112 | chr03 50001 60000 111 113 | chr03 60001 70000 112 114 | chr03 70001 80000 113 115 | chr03 80001 90000 114 116 | chr03 90001 100000 115 117 | chr03 100001 110000 116 118 | chr03 110001 120000 117 119 | chr03 120001 130000 118 120 | chr03 130001 140000 119 121 | chr03 140001 150000 120 122 | chr03 150001 160000 121 123 | chr03 160001 170000 122 124 | chr03 170001 180000 123 125 | chr03 180001 190000 124 126 | chr03 190001 200000 125 127 | chr03 200001 210000 126 128 | chr03 210001 220000 127 129 | chr03 220001 230000 128 130 | chr03 230001 240000 129 131 | chr03 240001 250000 130 132 | chr03 250001 260000 131 133 | chr03 260001 270000 132 134 | chr03 270001 280000 133 135 | chr03 280001 290000 134 136 | chr03 290001 300000 135 137 | chr03 300001 310000 136 138 | chr03 310001 320000 137 139 | chr04 1 10000 138 140 | chr04 10001 20000 139 141 | chr04 20001 30000 140 142 | chr04 30001 40000 141 143 | chr04 40001 50000 142 144 | chr04 50001 60000 143 145 | chr04 60001 70000 144 146 | chr04 70001 80000 145 147 | chr04 80001 90000 146 148 | chr04 90001 100000 147 149 | chr04 100001 110000 148 150 | chr04 110001 120000 149 151 | chr04 120001 130000 150 152 | chr04 130001 140000 151 153 | chr04 140001 150000 152 154 | chr04 150001 160000 153 155 | chr04 160001 170000 154 156 | chr04 170001 180000 155 157 | chr04 180001 190000 156 158 | chr04 190001 200000 157 159 | chr04 200001 210000 158 160 | chr04 210001 220000 159 161 | chr04 220001 230000 160 162 | chr04 230001 240000 161 163 | chr04 240001 250000 162 164 | chr04 250001 260000 163 165 | chr04 260001 270000 164 166 | chr04 270001 280000 165 167 | chr04 280001 290000 166 168 | chr04 290001 300000 167 169 | chr04 300001 310000 168 170 | chr04 310001 320000 169 171 | chr04 320001 330000 170 172 | chr04 330001 340000 171 173 | chr04 340001 350000 172 174 | chr04 350001 360000 173 175 | chr04 360001 370000 174 176 | chr04 370001 380000 175 177 | chr04 380001 390000 176 178 | chr04 390001 400000 177 179 | chr04 400001 410000 178 180 | chr04 410001 420000 179 181 | chr04 420001 430000 180 182 | chr04 430001 440000 181 183 | chr04 440001 450000 182 184 | chr04 450001 460000 183 185 | chr04 460001 470000 184 186 | chr04 470001 480000 185 187 | chr04 480001 490000 186 188 | chr04 490001 500000 187 189 | chr04 500001 510000 188 190 | chr04 510001 520000 189 191 | chr04 520001 530000 190 192 | chr04 530001 540000 191 193 | chr04 540001 550000 192 194 | chr04 550001 560000 193 195 | chr04 560001 570000 194 196 | chr04 570001 580000 195 197 | chr04 580001 590000 196 198 | chr04 590001 600000 197 199 | chr04 600001 610000 198 200 | chr04 610001 620000 199 201 | chr04 620001 630000 200 202 | chr04 630001 640000 201 203 | chr04 640001 650000 202 204 | chr04 650001 660000 203 205 | chr04 660001 670000 204 206 | chr04 670001 680000 205 207 | chr04 680001 690000 206 208 | chr04 690001 700000 207 209 | chr04 700001 710000 208 210 | chr04 710001 720000 209 211 | chr04 720001 730000 210 212 | chr04 730001 740000 211 213 | chr04 740001 750000 212 214 | chr04 750001 760000 213 215 | chr04 760001 770000 214 216 | chr04 770001 780000 215 217 | chr04 780001 790000 216 218 | chr04 790001 800000 217 219 | chr04 800001 810000 218 220 | chr04 810001 820000 219 221 | chr04 820001 830000 220 222 | chr04 830001 840000 221 223 | chr04 840001 850000 222 224 | chr04 850001 860000 223 225 | chr04 860001 870000 224 226 | chr04 870001 880000 225 227 | chr04 880001 890000 226 228 | chr04 890001 900000 227 229 | chr04 900001 910000 228 230 | chr04 910001 920000 229 231 | chr04 920001 930000 230 232 | chr04 930001 940000 231 233 | chr04 940001 950000 232 234 | chr04 950001 960000 233 235 | chr04 960001 970000 234 236 | chr04 970001 980000 235 237 | chr04 980001 990000 236 238 | chr04 990001 1000000 237 239 | chr04 1000001 1010000 238 240 | chr04 1010001 1020000 239 241 | chr04 1020001 1030000 240 242 | chr04 1030001 1040000 241 243 | chr04 1040001 1050000 242 244 | chr04 1050001 1060000 243 245 | chr04 1060001 1070000 244 246 | chr04 1070001 1080000 245 247 | chr04 1080001 1090000 246 248 | chr04 1090001 1100000 247 249 | chr04 1100001 1110000 248 250 | chr04 1110001 1120000 249 251 | chr04 1120001 1130000 250 252 | chr04 1130001 1140000 251 253 | chr04 1140001 1150000 252 254 | chr04 1150001 1160000 253 255 | chr04 1160001 1170000 254 256 | chr04 1170001 1180000 255 257 | chr04 1180001 1190000 256 258 | chr04 1190001 1200000 257 259 | chr04 1200001 1210000 258 260 | chr04 1210001 1220000 259 261 | chr04 1220001 1230000 260 262 | chr04 1230001 1240000 261 263 | chr04 1240001 1250000 262 264 | chr04 1250001 1260000 263 265 | chr04 1260001 1270000 264 266 | chr04 1270001 1280000 265 267 | chr04 1280001 1290000 266 268 | chr04 1290001 1300000 267 269 | chr04 1300001 1310000 268 270 | chr04 1310001 1320000 269 271 | chr04 1320001 1330000 270 272 | chr04 1330001 1340000 271 273 | chr04 1340001 1350000 272 274 | chr04 1350001 1360000 273 275 | chr04 1360001 1370000 274 276 | chr04 1370001 1380000 275 277 | chr04 1380001 1390000 276 278 | chr04 1390001 1400000 277 279 | chr04 1400001 1410000 278 280 | chr04 1410001 1420000 279 281 | chr04 1420001 1430000 280 282 | chr04 1430001 1440000 281 283 | chr04 1440001 1450000 282 284 | chr04 1450001 1460000 283 285 | chr04 1460001 1470000 284 286 | chr04 1470001 1480000 285 287 | chr04 1480001 1490000 286 288 | chr04 1490001 1500000 287 289 | chr04 1500001 1510000 288 290 | chr04 1510001 1520000 289 291 | chr04 1520001 1530000 290 292 | chr04 1530001 1540000 291 293 | chr05 1 10000 292 294 | chr05 10001 20000 293 295 | chr05 20001 30000 294 296 | chr05 30001 40000 295 297 | chr05 40001 50000 296 298 | chr05 50001 60000 297 299 | chr05 60001 70000 298 300 | chr05 70001 80000 299 301 | chr05 80001 90000 300 302 | chr05 90001 100000 301 303 | chr05 100001 110000 302 304 | chr05 110001 120000 303 305 | chr05 120001 130000 304 306 | chr05 130001 140000 305 307 | chr05 140001 150000 306 308 | chr05 150001 160000 307 309 | chr05 160001 170000 308 310 | chr05 170001 180000 309 311 | chr05 180001 190000 310 312 | chr05 190001 200000 311 313 | chr05 200001 210000 312 314 | chr05 210001 220000 313 315 | chr05 220001 230000 314 316 | chr05 230001 240000 315 317 | chr05 240001 250000 316 318 | chr05 250001 260000 317 319 | chr05 260001 270000 318 320 | chr05 270001 280000 319 321 | chr05 280001 290000 320 322 | chr05 290001 300000 321 323 | chr05 300001 310000 322 324 | chr05 310001 320000 323 325 | chr05 320001 330000 324 326 | chr05 330001 340000 325 327 | chr05 340001 350000 326 328 | chr05 350001 360000 327 329 | chr05 360001 370000 328 330 | chr05 370001 380000 329 331 | chr05 380001 390000 330 332 | chr05 390001 400000 331 333 | chr05 400001 410000 332 334 | chr05 410001 420000 333 335 | chr05 420001 430000 334 336 | chr05 430001 440000 335 337 | chr05 440001 450000 336 338 | chr05 450001 460000 337 339 | chr05 460001 470000 338 340 | chr05 470001 480000 339 341 | chr05 480001 490000 340 342 | chr05 490001 500000 341 343 | chr05 500001 510000 342 344 | chr05 510001 520000 343 345 | chr05 520001 530000 344 346 | chr05 530001 540000 345 347 | chr05 540001 550000 346 348 | chr05 550001 560000 347 349 | chr05 560001 570000 348 350 | chr05 570001 580000 349 351 | -------------------------------------------------------------------------------- /iced/datasets/setup.py: -------------------------------------------------------------------------------- 1 | 2 | def configuration(parent_package='', top_path=None): 3 | from numpy.distutils.misc_util import Configuration 4 | config = Configuration('datasets', parent_package, top_path) 5 | config.add_data_dir('data') 6 | 7 | return config 8 | 9 | 10 | if __name__ == '__main__': 11 | from numpy.distutils.core import setup 12 | setup(**configuration(top_path='').todict()) 13 | -------------------------------------------------------------------------------- /iced/datasets/tests/test_base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | from iced.datasets import clear_data_home, get_data_home 4 | from iced.datasets import load_sample_yeast, load_sample_cancer 5 | 6 | DATA_HOME = tempfile.mkdtemp(prefix="hiclib_data_home_test_") 7 | LOAD_FILES_ROOT = tempfile.mkdtemp(prefix="hiclib_load_files_test_") 8 | TEST_CATEGORY_DIR1 = "" 9 | TEST_CATEGORY_DIR2 = "" 10 | 11 | 12 | def test_data_home(): 13 | # get_data_home will point to a pre-existing folder 14 | data_home = get_data_home(data_home=DATA_HOME) 15 | assert data_home == DATA_HOME 16 | assert os.path.exists(data_home) 17 | 18 | # clear_data_home will delete both the content and the folder it-self 19 | clear_data_home(data_home=data_home) 20 | assert not os.path.exists(data_home) 21 | 22 | # if the folder is missing it will be created again 23 | data_home = get_data_home(data_home=DATA_HOME) 24 | assert os.path.exists(data_home) 25 | 26 | 27 | def test_data_sub_yeast(): 28 | counts, lengths = load_sample_yeast() 29 | 30 | 31 | def test_sample_cancer(): 32 | counts, lengths, cnv = load_sample_cancer() 33 | -------------------------------------------------------------------------------- /iced/filter.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | from ._filter_ import _filter_csr 4 | from . import utils 5 | 6 | 7 | def filter_low_counts(X, lengths=None, percentage=0.02, copy=True, 8 | sparsity=True, remove_all_zeros_loci=False, 9 | verbose=False): 10 | """ 11 | Filter rows and columns with low counts 12 | 13 | Parameters 14 | ---------- 15 | X : ndarray (n, n) 16 | Count matrix (hollow, symetric) 17 | 18 | lengths : ndarray (m, ), optional, default: None 19 | Lengths of the chromosomes 20 | 21 | percentage : float, optional, default: 0.02 22 | percentage of rows and columns to discard 23 | 24 | remove_all_zeros_loci : bool, optional, default: False 25 | if set to True, the filtering will remove first all the non 26 | interacting loci, and then apply the filtering strategy chosen. 27 | 28 | copy : boolean, optional, default: True 29 | If set to true, copies the count matrix 30 | 31 | sparsity : boolean, optional, default: True 32 | Whether to use the sparsity of the region or the total number of 33 | counts of the region to filer. 34 | 35 | Return 36 | ------ 37 | X : ndarray (n, n) 38 | The filtered array 39 | """ 40 | if sparse.issparse(X): 41 | if not sparse.isspmatrix_csr(X): 42 | X = X.tocsr() 43 | X.sort_indices() 44 | else: 45 | X[np.isnan(X)] = 0 46 | 47 | if sparsity: 48 | if lengths is not None: 49 | weights = [] 50 | [weights.append(i) for i in lengths for j in range(i)] 51 | weights = np.array(weights) 52 | mask = utils.get_inter_mask(lengths) 53 | else: 54 | weights = np.ones(X.shape[0]) 55 | mask = np.zeros(X.shape, dtype=bool) 56 | 57 | return _filter_low_sparse(X, weights, mask, percentage=percentage, 58 | remove_all_zeros_loci=remove_all_zeros_loci, 59 | verbose=verbose) 60 | else: 61 | return _filter_low_sum(X, percentage=percentage, 62 | remove_all_zeros_loci=remove_all_zeros_loci, 63 | verbose=verbose) 64 | 65 | 66 | def filter_high_counts(X, lengths=None, percentage=0.02, copy=True): 67 | """ 68 | Filter rows and columns with high counts 69 | 70 | Parameters 71 | ---------- 72 | X : ndarray (n, n) 73 | Count matrix (hollow, symetric) 74 | 75 | lengths : ndarray (m, ), optional, default: None 76 | Lengths of the chromosomes 77 | 78 | percentage : float, optional, default: 0.02 79 | percentage of rows and columns to discard 80 | 81 | copy : boolean, optional, default: True 82 | If set to true, copies the count matrix 83 | 84 | Return 85 | ------ 86 | X : ndarray (n, n) 87 | The filtered array 88 | 89 | Notes 90 | ----- 91 | New in 0.6 92 | """ 93 | if sparse.issparse(X): 94 | if not sparse.isspmatrix_csr(X): 95 | X = X.tocsr() 96 | X.sort_indices() 97 | else: 98 | X[np.isnan(X)] = 0 99 | 100 | return _filter_high_sum(X, percentage=percentage) 101 | 102 | 103 | def _filter_low_sparse(X, weights, mask, percentage=0.02, 104 | remove_all_zeros_loci=False, verbose=False): 105 | # This is NOT going to work on sparse data. For now, raise a Not 106 | # implemented error 107 | 108 | if sparse.issparse(X): 109 | raise NotImplemented 110 | if remove_all_zeros_loci: 111 | raise NotImplemented 112 | 113 | counts = X.copy() 114 | counts[mask] = 1 115 | X_sum = (counts == 0).sum(axis=0).astype(float) / weights 116 | X_sum.sort() 117 | X_sum = np.array(X_sum).flatten() 118 | x = X_sum[int(X.shape[0] * (1. - percentage))] 119 | X_sum = (counts == 0).sum(axis=0).astype(float) / weights 120 | 121 | if sparse.issparse(X): 122 | _filter_csr(X, (X_sum < x)) 123 | else: 124 | X[X_sum > x, :] = np.nan 125 | X[:, X_sum > x] = np.nan 126 | 127 | return X 128 | 129 | 130 | def _filter_high_sum(X, percentage=0.02, verbose=False): 131 | X_sum = (np.array(X.sum(axis=0)).flatten() + 132 | np.array(X.sum(axis=1)).flatten()) 133 | X_sum.sort() 134 | m = X.shape[0] 135 | x = X_sum[int(m * (1-percentage))] 136 | 137 | if verbose: 138 | print("Filter %s bins ..." % sum(X_sum > x)) 139 | 140 | X_sum = (np.array(X.sum(axis=0)).flatten() + 141 | np.array(X.sum(axis=1)).flatten()) 142 | 143 | if sparse.issparse(X): 144 | _filter_csr(X, (X_sum > x)) 145 | else: 146 | X[X_sum > x, :] = np.nan 147 | X[:, X_sum > x] = np.nan 148 | 149 | return X 150 | 151 | 152 | def _filter_low_sum(X, percentage=0.02, remove_all_zeros_loci=False, 153 | verbose=False): 154 | X_sum = (np.array(X.sum(axis=0)).flatten() + 155 | np.array(X.sum(axis=1)).flatten()) 156 | X_sum.sort() 157 | m = X.shape[0] 158 | 159 | if not remove_all_zeros_loci: 160 | x = X_sum[int(m * percentage)] 161 | else: 162 | num_noninteracting_loci = sum(X_sum == 0) 163 | x = X_sum[ 164 | int(len(X_sum[X_sum > 0]) * percentage) + num_noninteracting_loci] 165 | 166 | X_sum = (np.array(X.sum(axis=0)).flatten() + 167 | np.array(X.sum(axis=1)).flatten()) 168 | if verbose: 169 | print("Filter %s out of %s bins ..." % (sum(X_sum < x), m)) 170 | 171 | if sparse.issparse(X): 172 | _filter_csr(X, (X_sum < x)) 173 | else: 174 | X[X_sum < x, :] = np.nan 175 | X[:, X_sum < x] = np.nan 176 | 177 | return X 178 | -------------------------------------------------------------------------------- /iced/io/__init__.py: -------------------------------------------------------------------------------- 1 | from ._io_pandas import load_counts, load_lengths 2 | from ._io_pandas import write_counts 3 | from numpy import loadtxt, savetxt 4 | 5 | 6 | def write_lengths(filename, lengths, resolution=1): 7 | """ 8 | Write lengths as bed file 9 | """ 10 | chromosomes = ["Chr%02d" % (i + 1) for i in range(len(lengths))] 11 | j = 0 12 | with open(filename, "w") as bed_file: 13 | for chrid, l in enumerate(lengths): 14 | for i in range(l): 15 | bed_file.write( 16 | "%s\t%d\t%d\t%d\n" % (chromosomes[chrid], 17 | i * resolution + 1, 18 | (i + 1) * resolution, 19 | j)) 20 | j += 1 21 | -------------------------------------------------------------------------------- /iced/io/_io_pandas.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import numpy as np 3 | from scipy import sparse 4 | import pandas as pd 5 | 6 | 7 | def load_counts(filename, lengths=None, base=None): 8 | """ 9 | Fast loading of a raw interaction counts file 10 | 11 | Parameters 12 | ---------- 13 | filename : str 14 | path to the file to load. The file should be of the following format: 15 | i, j, counts 16 | 17 | lengths : ndarray 18 | lengths of each chromosomes 19 | 20 | base : [None, 0, 1], optional, default: None 21 | Is the file 0 or 1 based? If None, attempts to guess. 22 | 23 | Returns 24 | -------- 25 | X : the interaction counts file 26 | """ 27 | n = None 28 | if lengths is not None: 29 | n = lengths.sum() 30 | shape = (n, n) 31 | else: 32 | shape = None 33 | # This is the interaction count files 34 | dataframe = pd.read_csv(filename, sep="\t", comment="#", header=None) 35 | row, col, data = dataframe.values.T 36 | 37 | # If there are NAs remove them 38 | mask = np.isnan(data) 39 | if np.any(mask): 40 | warnings.warn( 41 | "NAs detected in %s. " 42 | "Removing NAs and replacing with 0." % filename) 43 | row = row[np.invert(mask)] 44 | col = col[np.invert(mask)] 45 | data = data[np.invert(mask)] 46 | 47 | # XXX We need to deal with the fact that we should not duplicate entries 48 | # for the diagonal. 49 | # XXX what if n doesn't exist? 50 | if base is not None: 51 | if base not in [0, 1]: 52 | raise ValueError("indices should start either at 0 or 1") 53 | col -= base 54 | row -= base 55 | else: 56 | warnings.warn( 57 | "Attempting to guess whether counts are 0 or 1 based") 58 | 59 | if (col.min() >= 1 and row.min() >= 1) and \ 60 | ((n is None) or (col.max() == n)): 61 | # This is a hack to deal with the fact that sometimes, the files 62 | # are indexed at 1 and not 0 63 | 64 | col -= 1 65 | row -= 1 66 | 67 | if shape is None: 68 | n = max(col.max(), row.max()) + 1 69 | shape = (int(n), int(n)) 70 | 71 | data = data.astype(float) 72 | counts = sparse.coo_matrix((data, (row, col)), shape=shape) 73 | return counts 74 | 75 | 76 | def load_lengths(filename, return_base=False): 77 | """ 78 | Fast loading of the bed files 79 | 80 | Parameters 81 | ---------- 82 | filename : str, 83 | path to the file to load. The file should be a bed file 84 | 85 | return_base : bool, optional, default: False 86 | whether to return if it is 0 or 1-base 87 | 88 | Returns 89 | ------- 90 | lengths : the lengths of each chromosomes 91 | """ 92 | data = pd.read_csv(filename, sep="\t", comment="#", header=None) 93 | data = data.values 94 | _, idx, lengths = np.unique(data[:, 0], return_counts=True, 95 | return_index=True) 96 | if return_base: 97 | return lengths[idx.argsort()], data[0, 3] 98 | else: 99 | return lengths[idx.argsort()] 100 | 101 | 102 | def write_counts(filename, counts, base=None): 103 | """ 104 | Write counts 105 | 106 | Parameters 107 | ---------- 108 | 109 | filename : str 110 | 111 | counts: array-like 112 | """ 113 | if not sparse.isspmatrix_coo(counts): 114 | if sparse.issparse(counts): 115 | counts = counts.tocoo() 116 | else: 117 | counts = sparse.coo_matrix(counts) 118 | 119 | counts.eliminate_zeros() 120 | if base is not None: 121 | counts.row += base 122 | counts.col += base 123 | # XXX this is slow and memory intensive 124 | data = np.concatenate([counts.row[:, np.newaxis], 125 | counts.col[:, np.newaxis], 126 | counts.data[:, np.newaxis]], axis=1) 127 | np.savetxt(filename, data, fmt="%d\t%d\t%f") 128 | 129 | 130 | def write_lengths(filename, lengths, resolution=1): 131 | """ 132 | Write lengths as bed file 133 | """ 134 | chromosomes = ["Chr%02d" % (i + 1) for i in range(len(lengths))] 135 | j = 0 136 | with open(filename, "w") as bed_file: 137 | for chrid, l in enumerate(lengths): 138 | for i in range(l): 139 | bed_file.write( 140 | "%s\t%d\t%d\t%d\n" % (chromosomes[chrid], 141 | i * resolution + 1, 142 | (i + 1) * resolution, 143 | j)) 144 | j += 1 145 | -------------------------------------------------------------------------------- /iced/io/setup.py: -------------------------------------------------------------------------------- 1 | # License: BSD Style. 2 | import os 3 | from os.path import join 4 | 5 | import numpy 6 | 7 | 8 | def configuration(parent_package='', top_path=None): 9 | from numpy.distutils.misc_util import Configuration 10 | 11 | libraries = [] 12 | if os.name == 'posix': 13 | libraries.append('m') 14 | 15 | config = Configuration('io', parent_package, top_path) 16 | 17 | return config 18 | 19 | if __name__ == '__main__': 20 | from numpy.distutils.core import setup 21 | setup(**configuration(top_path='').todict()) 22 | -------------------------------------------------------------------------------- /iced/io/tests/test_io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import dirname 3 | from iced import io 4 | 5 | 6 | def test_load_counts(): 7 | module_path = dirname(__file__) 8 | counts_filename = os.path.join( 9 | module_path, 10 | "../../datasets/data/duan2009/duan.SC.10000.raw_sub.matrix") 11 | 12 | counts = io.load_counts(counts_filename) 13 | assert counts is not None 14 | -------------------------------------------------------------------------------- /iced/normalization/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | from ._normalization_ import _update_normalization_csr 4 | from ..utils import is_symetric_or_tri, is_tri 5 | from ._ca_utils import estimate_block_biases 6 | 7 | 8 | def ICE_normalization(X, SS=None, max_iter=3000, eps=1e-4, copy=True, 9 | norm='l1', verbose=0, output_bias=False, 10 | total_counts=None, counts_profile=None): 11 | """ 12 | ICE normalization 13 | 14 | The imakaev normalization of Hi-C data consists of iteratively estimating 15 | the bias such that all the rows and columns (ie loci) have equal 16 | visibility. 17 | 18 | Parameters 19 | ---------- 20 | X : ndarray or sparse array (n, n) 21 | raw interaction frequency matrix 22 | 23 | max_iter : integer, optional, default: 3000 24 | Maximum number of iteration 25 | 26 | eps : float, optional, default: 1e-4 27 | the relative increment in the results before declaring convergence. 28 | 29 | copy : boolean, optional, default: True 30 | If copy is True, the original data is not modified. 31 | 32 | norm : string, optional, default: l1 33 | If set to "l1", will compute the ICE algorithm of the paper. Else, the 34 | algorithm is adapted to use the l2 norm, as suggested in the SCN 35 | paper. 36 | 37 | output_bias : boolean, optional, default: False 38 | whether to output the bias vector. 39 | 40 | total_counts : float, optional, default: None 41 | the total number of contact counts that the normalized matrix should 42 | contain. If set to None, the normalized contact count matrix will be 43 | such that the total number of contact counts equals the initial number 44 | of interactions. 45 | 46 | Returns 47 | ------- 48 | X, (bias) : ndarray (n, n) 49 | Normalized IF matrix and bias of output_bias is True 50 | 51 | Example 52 | ------- 53 | .. plot:: examples/normalization/plot_ICE_normalization.py 54 | """ 55 | if copy: 56 | X = X.copy() 57 | 58 | if sparse.issparse(X): 59 | if not sparse.isspmatrix_coo(X): 60 | X = sparse.coo_matrix(X, dtype=float) 61 | else: 62 | X[np.isnan(X)] = 0 63 | X = X.astype('float') 64 | 65 | m = X.shape[0] 66 | is_symetric_or_tri(X) 67 | old_bias = None 68 | bias = np.ones((m, 1)) 69 | _is_tri = is_tri(X) 70 | if verbose: 71 | print("Matrix is triangular superior") 72 | 73 | if counts_profile is not None: 74 | rows_to_remove = counts_profile == 0 75 | if sparse.issparse(X): 76 | rows_to_remove = np.where(rows_to_remove)[0] 77 | X.data[np.isin(X.row, rows_to_remove)] = 0 78 | X.data[np.isin(X.col, rows_to_remove)] = 0 79 | X.eliminate_zeros() 80 | else: 81 | X[rows_to_remove] = 0 82 | X[:, rows_to_remove] = 0 83 | 84 | if total_counts is None: 85 | total_counts = X.sum() 86 | for it in np.arange(max_iter): 87 | if norm == 'l1': 88 | # Actually, this should be done if the matrix is diag sup or diag 89 | # inf 90 | if _is_tri: 91 | sum_ds = X.sum(axis=0) + X.sum(axis=1).T - X.diagonal() 92 | else: 93 | sum_ds = X.sum(axis=0) 94 | elif norm == 'l2': 95 | if _is_tri: 96 | sum_ds = ((X**2).sum(axis=0) + 97 | (X**2).sum(axis=1).T - 98 | (X**2).diagonal()) 99 | else: 100 | sum_ds = (X**2).sum(axis=0) 101 | 102 | if SS is not None: 103 | raise NotImplementedError 104 | dbias = sum_ds.reshape((m, 1)) 105 | if counts_profile is not None: 106 | dbias /= counts_profile[:, np.newaxis] 107 | dbias[counts_profile == 0] = 0 108 | # To avoid numerical instabilities 109 | dbias /= dbias[dbias != 0].mean() 110 | 111 | dbias[dbias == 0] = 1 112 | bias *= dbias 113 | 114 | if sparse.issparse(X): 115 | X.data /= dbias.A[X.row, 0] 116 | X.data /= dbias.A[X.col, 0] 117 | else: 118 | X /= dbias 119 | X /= dbias.T 120 | 121 | bias *= np.sqrt(X.sum() / total_counts) 122 | X *= total_counts / X.sum() 123 | 124 | if old_bias is not None and np.abs(old_bias - bias).sum() < eps: 125 | if verbose > 1: 126 | print("break at iteration %d" % (it,)) 127 | break 128 | 129 | if verbose > 1 and old_bias is not None: 130 | print('ICE at iteration %d %s' % 131 | (it, np.abs(old_bias - bias).sum())) 132 | old_bias = bias.copy() 133 | # Now that we are finished with the bias estimation, set all biases 134 | # corresponding to filtered rows to np.nan 135 | if sparse.issparse(X): 136 | to_rm = (np.array(X.sum(axis=0)).flatten() + 137 | np.array(X.sum(axis=1)).flatten()) == 0 138 | else: 139 | to_rm = (X.sum(axis=0) + X.sum(axis=1)) == 0 140 | bias[to_rm] = np.nan 141 | if output_bias: 142 | return X, bias 143 | else: 144 | return X 145 | 146 | 147 | def SCN_normalization(X, max_iter=300, eps=1e-6, copy=True): 148 | """ 149 | Sequential Component Normalization 150 | 151 | Parameters 152 | ---------- 153 | X : ndarray (n, n) 154 | raw interaction frequency matrix 155 | 156 | max_iter : integer, optional, default: 300 157 | Maximum number of iteration 158 | 159 | eps : float, optional, default: 1e-6 160 | the relative increment in the results before declaring convergence. 161 | 162 | copy : boolean, optional, default: True 163 | If copy is True, the original data is not modified. 164 | 165 | Returns 166 | ------- 167 | X : ndarray, 168 | Normalized IF 169 | """ 170 | # X needs to be square, else it's gonna fail 171 | 172 | m, n = X.shape 173 | if m != n: 174 | raise ValueError 175 | 176 | if copy: 177 | X = X.copy() 178 | X = X.astype(float) 179 | 180 | for it in np.arange(max_iter): 181 | sum_X = np.sqrt((X ** 2).sum(0)) 182 | sum_X[sum_X == 0] = 1 183 | X /= sum_X 184 | X = X.T 185 | sum_X = np.sqrt((X ** 2).sum(0)) 186 | sum_X[sum_X == 0] = 1 187 | X /= sum_X 188 | X = X.T 189 | 190 | if np.abs(X - X.T).sum() < eps: 191 | print("break at iteration %d" % (it,)) 192 | break 193 | 194 | return X 195 | -------------------------------------------------------------------------------- /iced/normalization/_normalization_.pyx: -------------------------------------------------------------------------------- 1 | #import numpy as np 2 | cimport cython 3 | cimport numpy as np 4 | 5 | ctypedef np.float64_t DOUBLE 6 | ctypedef np.int32_t INT 7 | 8 | 9 | @cython.boundscheck(False) 10 | @cython.wraparound(False) 11 | @cython.cdivision(True) 12 | def _update_normalization_csr(X, np.ndarray[DOUBLE, ndim=1] bias): 13 | 14 | cdef: 15 | np.ndarray[DOUBLE, ndim=1] X_data = X.data 16 | np.ndarray[int, ndim=1] X_indices = X.indices 17 | np.ndarray[int, ndim=1] X_indptr = X.indptr 18 | unsigned int m = X.shape[0] 19 | unsigned int i, j, row 20 | 21 | j = 0 22 | for i, row in enumerate(X_indices): 23 | while i >= X_indptr[j + 1]: 24 | j += 1 25 | X_data[i] /= bias[row] * bias[j] 26 | return X 27 | -------------------------------------------------------------------------------- /iced/normalization/tests/test_ca_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_array_almost_equal, assert_array_equal 3 | from scipy import sparse 4 | from iced.normalization import _ca_utils as utils 5 | 6 | 7 | def test_identify_missing_distances(): 8 | random_state = np.random.RandomState(seed=42) 9 | counts = random_state.randint(0, 100, (100, 100)) 10 | missing_loci = random_state.rand(100) > 0.95 11 | lengths = np.array([75, 25]) 12 | counts[missing_loci] = 0 13 | counts[:, missing_loci] = 0 14 | counts = np.triu(counts, k=1) 15 | counts = counts + counts.T 16 | 17 | gdis_dense = utils.get_genomic_distances( 18 | lengths) 19 | 20 | gdis_dense, num_dense = np.unique( 21 | gdis_dense[missing_loci], 22 | return_counts=True) 23 | num_dense[1] = 0 24 | gdis, num = utils.identify_missing_distances( 25 | sparse.coo_matrix(np.triu(counts, 1)), lengths) 26 | # assert_array_equal(num[:len(num_dense)], num_dense) 27 | 28 | 29 | def test_get_genomic_distances(): 30 | random_state = np.random.RandomState(seed=42) 31 | lengths = np.array([25, 75]) 32 | counts = random_state.randint(0, 100, (100, 100)) 33 | missing_loci = random_state.rand(100) > 0.95 34 | counts[missing_loci] = 0 35 | counts[:, missing_loci] = 0 36 | counts = np.triu(counts, k=1).astype(float) 37 | counts = counts + counts.T 38 | 39 | counts_sparse = sparse.coo_matrix(np.triu(counts), shape=counts.shape) 40 | 41 | gdis_dense = utils.get_genomic_distances(lengths, counts) 42 | gdis_sparse = utils.get_genomic_distances(lengths, counts_sparse) 43 | 44 | gdis_sparse = sparse.coo_matrix( 45 | (gdis_sparse, (counts_sparse.row, counts_sparse.col)), 46 | shape=counts.shape).toarray() 47 | gdis_dense[gdis_sparse == 0] = 0 48 | assert_array_equal(gdis_dense, gdis_sparse) 49 | 50 | 51 | def test_get_mapping(): 52 | random_state = np.random.RandomState(seed=42) 53 | counts = random_state.randint(0, 100, (100, 100)) 54 | missing_loci = random_state.rand(100) > 0.9 55 | counts[missing_loci] = 0 56 | counts[:, missing_loci] = 0 57 | counts = np.triu(counts, k=1).astype(float) 58 | counts = counts + counts.T 59 | 60 | counts_sparse = sparse.coo_matrix(np.triu(counts), shape=counts.shape) 61 | 62 | bs = np.ones(counts.shape) 63 | lengths = np.array([50, 25, 25]) 64 | counts[missing_loci] = np.nan 65 | counts[:, missing_loci] = np.nan 66 | 67 | counts_dense = counts 68 | 69 | mapping_dense = utils.get_mapping( 70 | counts_dense, lengths, bs, 71 | smoothed=False) 72 | mapping_sparse = utils.get_mapping( 73 | counts_sparse, lengths, np.ones(counts_sparse.data.shape), 74 | smoothed=False) 75 | assert_array_equal(mapping_dense, mapping_sparse) 76 | 77 | 78 | def test_expected_dense_sparse(): 79 | random_state = np.random.RandomState(seed=42) 80 | counts = random_state.randint(0, 100, (100, 100)) 81 | missing_loci = random_state.rand(100) > 0.9 82 | counts[missing_loci] = 0 83 | counts[:, missing_loci] = 0 84 | counts = np.triu(counts, k=1).astype(float) 85 | counts = counts + counts.T 86 | 87 | counts_sparse = sparse.coo_matrix(np.triu(counts), shape=counts.shape) 88 | 89 | bs = np.ones(counts.shape) 90 | lengths = np.array([75, 25]) 91 | counts[missing_loci] = np.nan 92 | counts[:, missing_loci] = np.nan 93 | 94 | expected_dense = utils.get_expected(counts, lengths, bs) 95 | expected_sparse = utils.get_expected(counts_sparse, lengths, 96 | np.ones(counts_sparse.data.shape)) 97 | expected_sparse = sparse.coo_matrix( 98 | (expected_sparse, (counts_sparse.row, counts_sparse.col)), 99 | shape=counts.shape).toarray() 100 | expected_dense[expected_sparse == 0] = 0 101 | assert_array_almost_equal(expected_sparse, 102 | np.triu(expected_dense)) 103 | 104 | 105 | def test_estimate_bias_dense_sparse(): 106 | random_state = np.random.RandomState(seed=42) 107 | counts = random_state.randint(0, 100, (100, 100)) 108 | missing_loci = random_state.rand(100) > 0.95 109 | counts[missing_loci] = 0 110 | counts[:, missing_loci] = 0 111 | 112 | counts = np.triu(counts, k=1).astype(float) 113 | counts = counts + counts.T 114 | # Add some sparsity 115 | counts_sparse = sparse.coo_matrix(np.triu(counts), shape=counts.shape) 116 | counts[missing_loci] = np.nan 117 | counts[:, missing_loci] = np.nan 118 | 119 | bs_dense = np.ones(counts.shape) 120 | bs_sparse = np.ones(counts_sparse.data.shape) 121 | 122 | lengths = np.array([75, 25]) 123 | cnv = 2 * np.ones(lengths.sum()) 124 | cnv[:random_state.randint(0, 100)] += 2 125 | cnv[random_state.randint(0, 100):random_state.randint(0, 100)] -= 1 126 | 127 | mapping = utils.get_mapping(counts_sparse, lengths, bs_sparse) 128 | expected_dense = utils.get_expected(counts, lengths, bs_dense, 129 | mapping=mapping) 130 | expected_sparse = utils.get_expected(counts_sparse, lengths, bs_sparse, 131 | mapping=mapping) 132 | 133 | bias_dense = utils.estimate_bias(counts, cnv, expected_dense, lengths) 134 | bias_sparse = utils.estimate_bias( 135 | counts_sparse, cnv, expected_sparse, lengths, mapping) 136 | 137 | bias_sparse = sparse.coo_matrix( 138 | (bias_sparse, (counts_sparse.row, counts_sparse.col)), 139 | shape=counts.shape).toarray() 140 | bias_dense[bias_sparse == 0] = 0 141 | assert_array_almost_equal(bias_dense, bias_sparse) 142 | 143 | 144 | def test_num_each_dis(): 145 | random_state = np.random.RandomState(seed=42) 146 | lengths = random_state.randint(0, 50, 5) 147 | lengths.sort() 148 | b = random_state.randint(0, lengths.sum()/2) 149 | e = random_state.randint(b, lengths.sum()) 150 | 151 | # Generate rows and cols 152 | rows = np.arange(b, e) 153 | b = random_state.randint(0, lengths.sum()/2) 154 | e = random_state.randint(b, lengths.sum()) 155 | cols = np.arange(b, e) 156 | 157 | gdis, num = utils._num_each_gdis(rows, cols, lengths) 158 | 159 | gdistances = utils.get_genomic_distances(lengths) 160 | gdistances = np.triu(gdistances) 161 | gdis_dense, num_dense = np.unique( 162 | gdistances[rows][:, cols], return_counts=True) 163 | m = np.array([i in gdis for i in gdis_dense]) 164 | assert_array_equal(num, num_dense[m]) 165 | -------------------------------------------------------------------------------- /iced/normalization/tests/test_normalization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | from numpy.testing import assert_array_almost_equal 4 | import pytest 5 | 6 | from iced.normalization import ICE_normalization 7 | from iced.normalization import SCN_normalization 8 | 9 | 10 | def test_ICE_normalization(): 11 | n = 100 12 | random_state = np.random.RandomState(seed=42) 13 | X = random_state.randint(0, 100, size=(n, n)) 14 | X = X + X.T 15 | normed_X = ICE_normalization(X, eps=1e-10, max_iter=1000000) 16 | normed = normed_X.sum(axis=1) 17 | assert_array_almost_equal(normed / normed.mean(), np.ones((len(X), )), 18 | decimal=0) 19 | 20 | normed_X, bias = ICE_normalization(X, eps=1e-10, max_iter=100000, 21 | output_bias=True) 22 | assert_array_almost_equal(normed_X, X / (bias.T * bias), 6) 23 | 24 | 25 | def test_ICE_normalization_cancer(): 26 | n = 100 27 | random_state = np.random.RandomState(seed=42) 28 | X = random_state.randint(0, 100, size=(n, n)) 29 | X = X + X.T 30 | profile = np.ones(n) 31 | profile[:10] = 0 32 | profile[50:] = 2 33 | normed_X, bias = ICE_normalization(X, eps=1e-10, counts_profile=profile, 34 | output_bias=True) 35 | assert not np.all(np.isnan(normed_X)) 36 | 37 | normed_X[np.isnan(normed_X)] = 0 38 | mask = np.isnan(bias).flatten() 39 | bias[np.isnan(bias)] = 1 40 | normed_from_bias_X = X / (bias.T * bias) 41 | normed_from_bias_X[mask] = 0 42 | normed_from_bias_X[:, mask] = 0 43 | assert_array_almost_equal(normed_X, normed_from_bias_X, 6) 44 | inferred_profile = normed_X.sum(axis=0) 45 | inferred_profile /= inferred_profile.max() 46 | assert_array_almost_equal(inferred_profile, profile / profile.max()) 47 | 48 | # Do the same for sparse matriecs 49 | normed_X = ICE_normalization( 50 | sparse.coo_matrix(X), 51 | eps=1e-10, counts_profile=profile) 52 | 53 | 54 | def test_sparse_ICE_normalization(): 55 | n = 100 56 | random_state = np.random.RandomState(seed=42) 57 | X = random_state.randint(0, 100, size=(n, n)) 58 | 59 | thres = (random_state.rand(n, n) > 0.5).astype(bool) 60 | 61 | X[thres] = 0 62 | X = X + X.T 63 | sparse_X = sparse.csr_matrix(X) 64 | true_normed_X = ICE_normalization(X, eps=1e-10, max_iter=10) 65 | normed_X = ICE_normalization(sparse_X, eps=1e-10, max_iter=10) 66 | assert_array_almost_equal(X, sparse_X.todense()) 67 | assert_array_almost_equal(true_normed_X, np.array(normed_X.todense())) 68 | 69 | 70 | def test_sparse_ICE_normalization_triu(): 71 | n = 100 72 | random_state = np.random.RandomState(seed=42) 73 | X = random_state.randint(0, 100, size=(n, n)) 74 | 75 | thres = (random_state.rand(n, n) > 0.5).astype(bool) 76 | X[thres] = 0 77 | X = X + X.T 78 | sparse_X = sparse.triu(X) 79 | true_normed_X, true_biases = ICE_normalization( 80 | X, eps=1e-10, max_iter=10, output_bias=True) 81 | true_normed_X = np.triu(true_normed_X) 82 | 83 | normed_X_sparse, biases_sparse = ICE_normalization( 84 | sparse_X, eps=1e-10, max_iter=100, 85 | output_bias=True) 86 | normed_X_dense, biases_dense = ICE_normalization( 87 | np.triu(X), eps=1e-10, max_iter=100, 88 | output_bias=True) 89 | 90 | # The sparse and dense version are going to be equal up to a constant 91 | # factor 92 | assert_array_almost_equal(normed_X_dense, 93 | np.array(normed_X_sparse.toarray())) 94 | 95 | normed_X_sparse *= true_normed_X.mean() / normed_X_sparse.mean() 96 | normed_X_dense *= true_normed_X.mean() / normed_X_dense.mean() 97 | 98 | assert_array_almost_equal(true_normed_X, 99 | np.array(normed_X_sparse.todense())) 100 | assert_array_almost_equal(true_normed_X, normed_X_dense) 101 | 102 | total_counts = 5000 103 | normed_X = ICE_normalization(sparse_X, eps=1e-10, 104 | total_counts=total_counts) 105 | assert pytest.approx(normed_X.sum()) == total_counts 106 | 107 | 108 | def test_SCN_normalization(): 109 | n = 100 110 | random_state = np.random.RandomState(seed=42) 111 | X = random_state.randint(0, 100, size=(n, n)) 112 | 113 | normed_X = SCN_normalization(X) 114 | assert_array_almost_equal(np.sqrt((normed_X ** 2).sum(axis=1)), 115 | np.ones((len(X), ))) 116 | -------------------------------------------------------------------------------- /iced/random/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | 4 | 5 | def downsample_contact_map(counts, nreads=None, proportion=None, 6 | random_state=None): 7 | """ 8 | Downsample the contact count matrix 9 | 10 | Parameters 11 | ---------- 12 | counts : ndarray or sparse matrix 13 | The raw contact count matrix. 14 | 15 | nreads : integer, optional, default : None 16 | The number of reads of resulting matrix. By default, will downsample 17 | to 80% of the original matrix. 18 | `nreads` and `proportion` cannot be both provided. 19 | 20 | proportion : float [0, 1], optional, default: None 21 | The proportion of reads of the resulting downsampled matrix. By 22 | default, will downsample to 80% of the matrix. 23 | `nreads` and `proportion` cannot be both provided. 24 | 25 | random_state : random_state object, optional 26 | 27 | Returns 28 | ------- 29 | c : downsampled contact count matrix as a COO matrix. 30 | """ 31 | if not np.issubdtype(counts.dtype, np.integer): 32 | if np.abs(counts - np.round(counts)).sum() != 0: 33 | raise ValueError("Count matrix should be integers") 34 | counts = counts.astype("int") 35 | if not sparse.issparse(counts): 36 | # Convert into COO sparse matrix 37 | counts = sparse.coo_matrix(np.triu(counts)) 38 | elif not sparse.isspmatrix_coo(counts): 39 | # Also convert into COO sparse matrix 40 | counts = sparse.coo_matrix(counts) 41 | 42 | if nreads is None and proportion is None: 43 | nreads = int(np.round(counts.sum() * 0.8)) 44 | 45 | if nreads is not None and proportion is not None: 46 | raise ValueError( 47 | "The user provided both the number of reads and " 48 | "proportion. Provide one or the other") 49 | 50 | if proportion is not None and ((proportion < 0) or (proportion > 1)): 51 | raise ValueError( 52 | "`proportion` should be between 0 and 1. %f was provided" % 53 | proportion) 54 | 55 | if nreads is None: 56 | nreads = int(np.round(proportion * counts.sum())) 57 | 58 | if random_state is None: 59 | random_state = np.random.RandomState() 60 | elif isinstance(random_state, int): 61 | random_state = np.random.RandomState(random_state) 62 | 63 | x, y = np.indices(counts.shape) 64 | # Create a matrix of indices where each entry corresponds to an 65 | # interacting pair of loci, and where interacting pairs appear the number 66 | # of time they interact 67 | ind = np.repeat(np.arange(len(counts.data)), counts.data, axis=0) 68 | 69 | # Shuffle the indices and select f*nreads number of interaction 70 | sub_ind = random_state.choice(ind, size=nreads, replace=False) 71 | 72 | # Recreate the interaction counts matrix. 73 | c = sparse.coo_matrix( 74 | (np.ones(len(sub_ind)), (counts.row[sub_ind], 75 | counts.col[sub_ind])), 76 | shape=counts.shape, dtype=float) 77 | return c 78 | 79 | 80 | def bootstrap_contact_map(counts, random_state=None): 81 | """ 82 | Bootstrap the contact count matrix 83 | 84 | Parameters 85 | ---------- 86 | counts : ndarray or sparse matrix 87 | The raw contact count matrix. 88 | 89 | random_state : random_state object, optional 90 | 91 | Returns 92 | ------- 93 | c : downsampled contact count matrix as a COO matrix. 94 | """ 95 | if not np.issubdtype(counts.dtype, np.integer): 96 | if np.abs(counts - np.round(counts)).sum() != 0: 97 | raise ValueError("Count matrix should be integers") 98 | counts = counts.astype("int") 99 | if not sparse.issparse(counts): 100 | # Convert into COO sparse matrix 101 | counts = sparse.coo_matrix(np.triu(counts)) 102 | elif not sparse.isspmatrix_coo(counts): 103 | # Also convert into COO sparse matrix 104 | counts = sparse.coo_matrix(counts) 105 | 106 | if random_state is None: 107 | random_state = np.random.RandomState() 108 | elif isinstance(random_state, int): 109 | random_state = np.random.RandomState(random_state) 110 | 111 | x, y = np.indices(counts.shape) 112 | # Create a matrix of indices where each entry corresponds to an 113 | # interacting pair of loci, and where interacting pairs appear the number 114 | # of time they interact 115 | ind = np.repeat(np.arange(len(counts.data)), counts.data, axis=0) 116 | 117 | nreads = counts.sum() 118 | # Shuffle the indices and select f*nreads number of interaction 119 | sub_ind = random_state.choice(ind, size=nreads, replace=True) 120 | 121 | # Recreate the interaction counts matrix. 122 | c = sparse.coo_matrix( 123 | (np.ones(len(sub_ind)), (counts.row[sub_ind], 124 | counts.col[sub_ind])), 125 | shape=counts.shape, dtype=float) 126 | return c 127 | 128 | 129 | def permute_contact_map(counts, random_state=None, circular=False): 130 | """ 131 | Randomize matrix preserving the distribution of elements per diagonal 132 | 133 | Arguments 134 | --------- 135 | matrix : ndarray (n, n) 136 | The ndarray to shuffle 137 | 138 | circular : boolean, optional, default: False 139 | Whether the chromosome is circular. 140 | 141 | Returns 142 | ------- 143 | Randomized matrix that preserves the contact law P(s) 144 | """ 145 | if random_state is None: 146 | random_state = np.random.RandomState() 147 | elif isinstance(random_state, int): 148 | random_state = np.random.RandomState(random_state) 149 | 150 | if circular: 151 | return _permute_contact_map_circular( 152 | counts, 153 | random_state=random_state) 154 | else: 155 | randomized_counts = np.zeros(counts.shape) 156 | 157 | N = len(counts) 158 | 159 | for s in range(0, N): 160 | indices = random_state.shuffle(np.arange(N - s)) 161 | random_elements = np.diag(counts, k=s)[indices] 162 | np.fill_diag(randomized_counts.T[s:], random_elements) 163 | return randomized_counts 164 | 165 | 166 | def _permute_contact_map_circular(counts, random_state=None): 167 | N = len(counts) 168 | randomized_counts = np.zeros(counts.shape) 169 | 170 | # Draw random samples directly from the data 171 | indices = np.arange(N) 172 | np.random.shuffle(indices) 173 | np.fill_diagonal(randomized_counts, np.diag(counts)[indices]) 174 | 175 | for s in range(1, int(np.ceil(N / 2))): 176 | # Start by upper diagonals 177 | sub_diag = np.concatenate( 178 | [np.diag(counts, k=s), 179 | np.diag(counts, k=s-N)]) 180 | np.random.shuffle(indices) 181 | random_elements = sub_diag[indices] 182 | np.fill_diagonal( 183 | randomized_counts.T[s:], 184 | random_elements[:N-s]) 185 | np.fill_diagonal( 186 | randomized_counts[N-s:], 187 | random_elements[N-s:N]) 188 | 189 | # And now lower diagonals 190 | sub_diag = np.concatenate( 191 | [np.diag(counts, k=-s), 192 | np.diag(counts, k=N-s)]) 193 | np.random.shuffle(indices) 194 | random_elements = sub_diag[indices] 195 | np.fill_diagonal( 196 | randomized_counts[s:], 197 | random_elements[:N-s]) 198 | np.fill_diagonal( 199 | randomized_counts.T[N-s:], 200 | random_elements[N-s:N]) 201 | 202 | return randomized_counts 203 | -------------------------------------------------------------------------------- /iced/random/tests/test_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | import pytest 4 | 5 | from iced.random import downsample_contact_map 6 | from iced.random import bootstrap_contact_map 7 | from iced import datasets 8 | 9 | 10 | def test_downsample_contact_map(): 11 | counts, lengths = datasets.load_sample_yeast() 12 | nreads = int(np.round(0.8 * np.triu(counts).sum())) 13 | downsampled_counts = downsample_contact_map(counts, 14 | random_state=42) 15 | assert nreads == downsampled_counts.sum() 16 | 17 | downsampled_counts = downsample_contact_map(counts, nreads=nreads, 18 | random_state=42) 19 | assert nreads == downsampled_counts.sum() 20 | 21 | downsampled_counts = downsample_contact_map(counts, proportion=0.8, 22 | random_state=42) 23 | assert nreads == downsampled_counts.sum() 24 | 25 | with pytest.raises(ValueError): 26 | downsample_contact_map(counts*.3, nreads=nreads, 27 | random_state=42) 28 | 29 | with pytest.raises(ValueError): 30 | downsample_contact_map(counts, nreads=nreads, 31 | proportion=0.5, 32 | random_state=42) 33 | 34 | # Test that it works with COO matrices 35 | counts = sparse.coo_matrix(np.triu(counts)) 36 | downsampled_counts = downsample_contact_map(counts, nreads=nreads, 37 | random_state=42) 38 | assert nreads == downsampled_counts.sum() 39 | 40 | # Test that it works with CSR matrices 41 | counts = sparse.csr_matrix(counts) 42 | downsampled_counts = downsample_contact_map(counts, nreads=nreads, 43 | random_state=42) 44 | assert nreads == downsampled_counts.sum() 45 | 46 | with pytest.raises(ValueError): 47 | downsample_contact_map(counts, proportion=-0.1) 48 | 49 | with pytest.raises(ValueError): 50 | downsample_contact_map(counts, proportion=1.5) 51 | 52 | downsample_contact_map(counts) 53 | 54 | 55 | def test_bootstrap_contact_map(): 56 | counts, lengths = datasets.load_sample_yeast() 57 | bootstrap_contact_map(counts, 58 | random_state=42) 59 | with pytest.raises(ValueError): 60 | bootstrap_contact_map(counts*.3, 61 | random_state=42) 62 | 63 | # Test that it works with COO matrices 64 | counts = sparse.coo_matrix(np.triu(counts)) 65 | bootstrap_contact_map(counts, 66 | random_state=42) 67 | 68 | # Test that it works with CSR matrices 69 | counts = sparse.csr_matrix(counts) 70 | bootstrap_contact_map(counts, 71 | random_state=42) 72 | 73 | bootstrap_contact_map(counts) 74 | -------------------------------------------------------------------------------- /iced/scripts/ice.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | from __future__ import print_function 3 | import sys 4 | import argparse 5 | import numpy as np 6 | from scipy import sparse 7 | 8 | import iced 9 | from iced.io import load_counts, savetxt, write_counts 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser("ICE normalization") 13 | parser.add_argument('filename', 14 | metavar='File to load', 15 | type=str, 16 | help='Path to file of contact counts to load') 17 | parser.add_argument("--results_filename", 18 | "-r", 19 | type=str, 20 | default=None, 21 | help="results_filename") 22 | parser.add_argument("--filtering_perc", "-f", 23 | type=float, 24 | default=None, 25 | help="Percentage of reads to filter out") 26 | parser.add_argument("--filter_low_counts_perc", 27 | type=float, 28 | default=0.02, 29 | help="Percentage of reads to filter out") 30 | parser.add_argument("--filter_high_counts_perc", 31 | type=float, 32 | default=0, 33 | help="Percentage of reads to filter out") 34 | parser.add_argument("--remove-all-zeros-loci", default=False, 35 | action="store_true", 36 | help="If provided, all non-interacting loci will be " 37 | "removed prior to the filtering strategy chosen.") 38 | parser.add_argument("--max_iter", "-m", default=100, type=int, 39 | help="Maximum number of iterations") 40 | parser.add_argument("--eps", "-e", default=0.1, type=float, 41 | help="Precision") 42 | parser.add_argument("--dense", "-d", default=False, action="store_true") 43 | parser.add_argument("--output-bias", "-b", default=False, help="Output the bias vector") 44 | parser.add_argument("--verbose", "-v", default=False, type=bool) 45 | parser.add_argument("--base", default=None, type=int, 46 | help="Indicates whether the matrix file is 0 or 1-based") 47 | 48 | 49 | args = parser.parse_args() 50 | filename = args.filename 51 | 52 | # Deprecating filtering_perc option 53 | filter_low_counts = None 54 | if "--filtering_perc" in sys.argv: 55 | DeprecationWarning( 56 | "Option '--filtering_perc' is deprecated. Please use " 57 | "'--filter_low_counts_perc' instead.'") 58 | # And print it again because deprecation warnings are not displayed for 59 | # recent versions of python 60 | print("--filtering_perc is deprecated. Please use filter_low_counts_perc") 61 | print("instead. This option will be removed in ice 0.3") 62 | filter_low_counts = args.filtering_perc 63 | if "--filter_low_counts_perc" in sys.argv and "--filtering_perc" in sys.argv: 64 | raise Warning("This two options are incompatible") 65 | if "--filtering_perc" is None and "--filter_low_counts_perc" not in sys.argv: 66 | filter_low_counts_perc = 0.02 67 | elif args.filter_low_counts_perc is not None: 68 | filter_low_counts_perc = args.filter_low_counts_perc 69 | 70 | if args.base is None: 71 | base = 1 72 | print("Assuming the file is 1-based. If this is not the desired option, " 73 | "set option --base to 0") 74 | else: 75 | base = args.base 76 | 77 | if args.verbose: 78 | print("Using iced version %s" % iced.__version__) 79 | print("Loading files...") 80 | 81 | # Loads file as i, j, counts 82 | counts = load_counts(filename, base=base) 83 | 84 | 85 | if args.dense: 86 | counts = np.array(counts.todense()) 87 | else: 88 | counts = sparse.csr_matrix(counts) 89 | 90 | if args.verbose: 91 | print("Normalizing...") 92 | 93 | if filter_low_counts_perc != 0: 94 | counts = iced.filter.filter_low_counts(counts, 95 | percentage=filter_low_counts_perc, 96 | remove_all_zeros_loci=args.remove_all_zeros_loci, 97 | copy=False, sparsity=False, verbose=args.verbose) 98 | if args.filter_high_counts_perc != 0: 99 | counts = iced.filter.filter_high_counts( 100 | counts, 101 | percentage=args.filter_high_counts_perc, 102 | copy=False) 103 | 104 | counts, bias = iced.normalization.ICE_normalization( 105 | counts, max_iter=args.max_iter, copy=False, 106 | verbose=args.verbose, eps=args.eps, output_bias=True) 107 | 108 | if args.results_filename is None: 109 | results_filename = ".".join( 110 | filename.split(".")[:-1]) + "_normalized." + filename.split(".")[-1] 111 | else: 112 | results_filename = args.results_filename 113 | 114 | counts = sparse.coo_matrix(counts) 115 | 116 | if args.verbose: 117 | print("Writing results...") 118 | 119 | #write_counts(results_filename, counts) 120 | write_counts( 121 | results_filename, 122 | counts, base=base) 123 | 124 | 125 | 126 | if args.output_bias: 127 | np.savetxt(results_filename + ".biases", bias) 128 | -------------------------------------------------------------------------------- /iced/setup.py: -------------------------------------------------------------------------------- 1 | # License: BSD Style. 2 | import os 3 | from os.path import join 4 | 5 | import numpy 6 | 7 | 8 | def configuration(parent_package='', top_path=None): 9 | from numpy.distutils.misc_util import Configuration 10 | 11 | libraries = [] 12 | if os.name == 'posix': 13 | libraries.append('m') 14 | 15 | config = Configuration('iced', parent_package, top_path) 16 | config.add_subpackage('utils') 17 | config.add_subpackage("datasets") 18 | config.add_subpackage("io") 19 | config.add_subpackage("normalization") 20 | config.add_subpackage("random") 21 | 22 | config.add_extension( 23 | 'normalization/_normalization_', 24 | libraries=libraries, 25 | sources=['normalization/_normalization_.c'], 26 | include_dirs=[join('..', 'src', 'cblas'), 27 | numpy.get_include()]) 28 | config.add_extension( 29 | '_filter_', 30 | libraries=libraries, 31 | sources=['_filter_.c'], 32 | include_dirs=[join('..', 'src', 'cblas'), 33 | numpy.get_include()]) 34 | 35 | return config 36 | 37 | 38 | if __name__ == '__main__': 39 | from numpy.distutils.core import setup 40 | setup(**configuration(top_path='').todict()) 41 | -------------------------------------------------------------------------------- /iced/tests/test_filter.py: -------------------------------------------------------------------------------- 1 | from iced.filter import filter_low_counts, filter_high_counts 2 | from iced.datasets import load_sample_yeast 3 | import numpy as np 4 | from numpy.testing import assert_array_equal 5 | from scipy import sparse 6 | 7 | 8 | def test_filter_low_counts(): 9 | X = np.ones((100, 100)) 10 | X[0, :] = 0 11 | X[:, 0] = 0 12 | 13 | X_filtered_true = X.copy() 14 | X_filtered_true[X == 0] = np.nan 15 | X_filtered = filter_low_counts(X) 16 | assert_array_equal(X_filtered, X_filtered_true) 17 | 18 | lengths = np.array([40, 60]) 19 | X_filtered = filter_low_counts(X, lengths=lengths) 20 | assert_array_equal(X_filtered, X_filtered_true) 21 | 22 | X_filtered = filter_low_counts(X, sparsity=False) 23 | assert_array_equal(X_filtered, X_filtered_true) 24 | 25 | 26 | def test_filter_low_counts_with_zeros(): 27 | X = 10 * np.ones((100, 100)) 28 | X[0, :] = 0 29 | X[:, 0] = 0 30 | X[1, :] = 1 31 | X[:, 1] = 1 32 | 33 | X_filtered_true = X.copy() 34 | X_filtered_true[X != 10] = np.nan 35 | X_filtered = filter_low_counts(X, remove_all_zeros_loci=True, 36 | sparsity=False) 37 | assert_array_equal(X_filtered, X_filtered_true) 38 | 39 | 40 | def test_sparse_filter_low_counts(): 41 | X = 10 * np.ones((100, 100)) 42 | X[0, :] = 1 43 | X[:, 0] = 1 44 | X_filtered_dense = X.copy() 45 | X_filtered_dense[0] = 0 46 | X_filtered_dense[:, 0] = 0 47 | return 48 | # this is not implemented yet 49 | X_filtered_sparse_csr = filter_low_counts(sparse.csr_matrix(X), 50 | sparsity=False) 51 | X_filtered_sparse_coo = filter_low_counts(sparse.coo_matrix(X)) 52 | 53 | assert_array_equal(X_filtered_dense, 54 | np.array(X_filtered_sparse_csr.todense())) 55 | assert_array_equal(X_filtered_dense, 56 | np.array(X_filtered_sparse_coo.todense())) 57 | 58 | 59 | def test_sparse_filter_low_counts_real_data(): 60 | counts, lengths = load_sample_yeast() 61 | counts_sparse = sparse.csr_matrix(counts) 62 | counts_dense = filter_low_counts(counts, sparsity=False, percentage=0.1) 63 | counts_sparse = filter_low_counts(counts_sparse, sparsity=False, 64 | percentage=0.1) 65 | counts_dense[np.isnan(counts_dense)] = 0 66 | assert_array_equal(counts_dense, counts_sparse.toarray()) 67 | 68 | triu_counts_sparse = sparse.csr_matrix(np.triu(counts)) 69 | triu_counts_sparse = filter_low_counts(triu_counts_sparse, sparsity=False, 70 | percentage=0.1) 71 | assert_array_equal(np.triu(counts_dense), triu_counts_sparse.toarray()) 72 | 73 | 74 | def test_filter_high_counts(): 75 | X = np.ones((100, 100)) 76 | X *= np.arange(len(X)) 77 | X_filtered_true = X.copy() 78 | X_filtered_true[-1] = np.nan 79 | X_filtered_true[:, -1] = np.nan 80 | X_filtered = filter_high_counts(X) 81 | assert_array_equal(X_filtered, X_filtered_true) 82 | 83 | 84 | def test_filter_high_couts_sparse(): 85 | X = np.ones((100, 100)) 86 | X *= np.arange(len(X)) 87 | X_filtered_dense = filter_high_counts(X) 88 | 89 | X_filtered_sparse_csr = filter_high_counts(sparse.csr_matrix(X)) 90 | X_filtered_sparse_coo = filter_high_counts(sparse.coo_matrix(X)) 91 | 92 | assert_array_equal(X_filtered_dense, 93 | np.array(X_filtered_sparse_csr.todense())) 94 | assert_array_equal(X_filtered_dense, 95 | np.array(X_filtered_sparse_coo.todense())) 96 | -------------------------------------------------------------------------------- /iced/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from ._genome import * 2 | from .validation import is_symetric_or_tri, is_tri 3 | 4 | 5 | class deprecated(object): 6 | """Decorator to mark a function or class as deprecated. 7 | 8 | Issue a warning when the function is called/the class is instantiated and 9 | adds a warning to the docstring. 10 | 11 | The optional extra argument will be appended to the deprecation message 12 | and the docstring. Note: to use this with the default value for extra, put 13 | in an empty of parentheses: 14 | 15 | >>> from sklearn.utils import deprecated 16 | >>> deprecated() # doctest: +ELLIPSIS 17 | 18 | 19 | >>> @deprecated() 20 | ... def some_function(): pass 21 | """ 22 | 23 | # Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary, 24 | # but with many changes. 25 | 26 | def __init__(self, extra=''): 27 | """ 28 | Parameters 29 | ---------- 30 | extra: string 31 | to be added to the deprecation messages 32 | 33 | """ 34 | self.extra = extra 35 | 36 | def __call__(self, obj): 37 | if isinstance(obj, type): 38 | return self._decorate_class(obj) 39 | else: 40 | return self._decorate_fun(obj) 41 | 42 | def _decorate_class(self, cls): 43 | msg = "Class %s is deprecated" % cls.__name__ 44 | if self.extra: 45 | msg += "; %s" % self.extra 46 | 47 | # FIXME: we should probably reset __new__ for full generality 48 | init = cls.__init__ 49 | 50 | def wrapped(*args, **kwargs): 51 | warnings.warn(msg, category=DeprecationWarning) 52 | return init(*args, **kwargs) 53 | cls.__init__ = wrapped 54 | 55 | wrapped.__name__ = '__init__' 56 | wrapped.__doc__ = self._update_doc(init.__doc__) 57 | wrapped.deprecated_original = init 58 | 59 | return cls 60 | 61 | def _decorate_fun(self, fun): 62 | """Decorate function fun""" 63 | 64 | msg = "Function %s is deprecated" % fun.__name__ 65 | if self.extra: 66 | msg += "; %s" % self.extra 67 | 68 | def wrapped(*args, **kwargs): 69 | warnings.warn(msg, category=DeprecationWarning) 70 | return fun(*args, **kwargs) 71 | 72 | wrapped.__name__ = fun.__name__ 73 | wrapped.__dict__ = fun.__dict__ 74 | wrapped.__doc__ = self._update_doc(fun.__doc__) 75 | 76 | return wrapped 77 | 78 | def _update_doc(self, olddoc): 79 | newdoc = "DEPRECATED" 80 | if self.extra: 81 | newdoc = "%s: %s" % (newdoc, self.extra) 82 | if olddoc: 83 | newdoc = "%s\n\n%s" % (newdoc, olddoc) 84 | return newdoc 85 | -------------------------------------------------------------------------------- /iced/utils/_genome.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | from .validation import is_symetric_or_tri 4 | 5 | 6 | def get_intra_mask(lengths, counts=None): 7 | """ 8 | Returns a mask for intrachromosomal interactions 9 | 10 | Parameters 11 | ---------- 12 | lengths : ndarray, (n, ) 13 | lengths of the chromosomes 14 | 15 | counts : ndarray or sparse matrix (n, n), optional, default: None 16 | if provided, and if sparse, will only return genomic distances for 17 | non-zero elements of the sparse matrix. 18 | 19 | Returns 20 | ------- 21 | dis: ndarray (n, n) or m, dtype: int 22 | returns a boolean matrix. If counts matrix not provided, or provided 23 | as dense format, returns an n-by-n matrix. Else, returns an m-vector, 24 | where m is the number of data points in the sparse matrix 25 | 26 | 27 | Returns 28 | ------- 29 | mask : ndarray (m, m) 30 | boolean mask 31 | """ 32 | if counts is not None and sparse.issparse(counts): 33 | return _get_intra_mask_sparse(lengths, counts) 34 | else: 35 | return _get_intra_mask_dense(lengths) 36 | 37 | 38 | def _get_intra_mask_sparse(lengths, counts): 39 | if not sparse.isspmatrix_coo(counts): 40 | counts = counts.tocoo() 41 | chr_id = np.array([i for i, l in enumerate(lengths) for _ in range(l)]) 42 | mask = np.ones(counts.col.shape, dtype=bool) * True 43 | mask[chr_id[counts.col] != chr_id[counts.row]] = False 44 | return mask 45 | 46 | 47 | def _get_intra_mask_dense(lengths): 48 | """ 49 | Returns a mask for intrachromosomal interactions 50 | 51 | Parameters 52 | ---------- 53 | lengths : ndarray, (n, ) 54 | lengths of the chromosomes 55 | 56 | Returns 57 | ------- 58 | mask : ndarray (m, m) 59 | boolean mask 60 | """ 61 | # Copy the lengths in order not to modify the original matrix 62 | mask = np.zeros((lengths.sum(), lengths.sum())) 63 | begin = 0 64 | for end in lengths.cumsum(): 65 | mask[begin:end, begin:end] = 1 66 | begin = end 67 | return mask.astype(bool) 68 | 69 | 70 | def get_inter_mask(lengths): 71 | """ 72 | Returns a mask for interchromosomal interactions 73 | 74 | Parameters 75 | ---------- 76 | lengths : ndarray, (n, ) 77 | lengths of the chromosomes 78 | 79 | Returns 80 | ------- 81 | mask : ndarray of dtype boolean 82 | boolean mask 83 | """ 84 | intra_mask = get_intra_mask(lengths) 85 | return np.invert(intra_mask) 86 | 87 | 88 | def get_genomic_distances(lengths, counts=None): 89 | """ 90 | Returns a matrix of the genomic distances 91 | 92 | Inter chromosomal interactions are set to -1 93 | 94 | Parameters 95 | ---------- 96 | lengths : ndarray (L, ) 97 | lengths of the chromosomes 98 | 99 | counts : ndarray or sparse matrix (n, n), optional, default: None 100 | if provided, and if sparse, will only return genomic distances for 101 | non-zero elements of the sparse matrix. 102 | 103 | Returns 104 | ------- 105 | dis: ndarray (n, n) or m, dtype: int 106 | returns the genomic distance matrix, with -1 for inter chromosomal 107 | interactions. If counts matrix not provided, or provided as dense 108 | format, returns an n-by-n matrix. Else, returns an m-vector, where m 109 | is the number of data points in the sparse matrix 110 | """ 111 | if counts is not None and sparse.issparse(counts): 112 | if not sparse.isspmatrix_coo(counts): 113 | counts = counts.tocoo() 114 | return _get_genomic_distances_sparse(lengths, counts) 115 | else: 116 | return _get_genomic_distances_dense(lengths) 117 | 118 | 119 | def _get_genomic_distances_sparse(lengths, counts): 120 | """ 121 | """ 122 | chr_id = np.array([i for i, l in enumerate(lengths) for _ in range(l)]) 123 | gdis = np.abs(counts.col - counts.row) 124 | gdis[chr_id[counts.col] != chr_id[counts.row]] = -1 125 | return gdis 126 | 127 | 128 | def _get_genomic_distances_dense(lengths): 129 | """ 130 | Returns a matrix of the genomic distances 131 | 132 | Inter chromosomal interactions are set to -1 133 | 134 | Parameters 135 | ---------- 136 | lengths : ndarray (n, ) 137 | lengths of the chromosomes 138 | 139 | Returns 140 | ------- 141 | dis: ndarray (n, n), dtype: int 142 | returns the genomic distance matrix, with -1 for inter chromosomal 143 | interactions 144 | """ 145 | inter_mask = get_inter_mask(lengths) 146 | n = lengths.sum() 147 | 148 | dis = np.concatenate([np.concatenate([np.arange(i, 0, -1), 149 | np.arange(n - i)]) 150 | for i in range(n)]) 151 | dis = dis.reshape((n, n)) 152 | dis[inter_mask] = -1 153 | 154 | return dis.astype(int) 155 | 156 | 157 | def extract_sub_contact_map(counts, lengths, chromosomes): 158 | """ 159 | Extract the contact map associated to a given list of chromosome 160 | 161 | Parameters 162 | ---------- 163 | counts : ndarray (n, n) 164 | 165 | lengths : ndarray (L, ) 166 | 167 | chromosomes : list of ids 168 | 169 | Returns 170 | ------- 171 | sub_counts, sub_lengths : (ndarray, ndarray) 172 | 173 | Examples 174 | -------- 175 | 176 | >>> from iced import datasets 177 | >>> from iced.utils import extract_sub_contact_map 178 | >>> counts, lengths = datasets.load_sample_yeast() 179 | >>> scounts, slengths = extract_sub_contact_map(counts, lengths, [0, 2]) 180 | >>> print(len(counts), len(scounts)) 181 | ... # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE 182 | 350 56 183 | """ 184 | chromosomes = np.array(chromosomes) 185 | if chromosomes.max() >= len(lengths): 186 | raise ValueError( 187 | "The chromosomes provided are not compatible with the " 188 | "lengths array. Possible values are" 189 | " %s" % " ".join("%s" % i for i in np.arange(len(lengths)))) 190 | if lengths.sum() != counts.shape[0]: 191 | raise ValueError( 192 | "The lengths provided is incompatible with the counts matrix" 193 | "shape. The total lengths is %d while the contact count matrix " 194 | "is %d" % (lengths.sum(), counts.shape[0])) 195 | 196 | is_symetric_or_tri(counts) 197 | chromosomes.sort() 198 | 199 | new_lengths = lengths[chromosomes] 200 | new_counts = np.zeros((new_lengths.sum(), new_lengths.sum())) 201 | begin1, end1 = 0, 0 202 | for i, l1 in enumerate(lengths): 203 | end1 += l1 204 | if i not in chromosomes: 205 | begin1 = end1 206 | continue 207 | # Find position of this pair of chromosome in the matrix 208 | new_num_chrom = (chromosomes == i).argmax() 209 | if new_num_chrom == 0: 210 | new_begin1 = 0 211 | else: 212 | new_begin1 = new_lengths.cumsum()[new_num_chrom - 1] 213 | new_end1 = new_lengths.cumsum()[new_num_chrom] 214 | 215 | begin2, end2 = 0, 0 216 | for j, l2 in enumerate(lengths): 217 | end2 += l2 218 | if j not in chromosomes: 219 | begin2 = end2 220 | continue 221 | # Find position of this pair of chromosome in the matrix 222 | new_num_chrom = (chromosomes == j).argmax() 223 | if new_num_chrom == 0: 224 | new_begin2 = 0 225 | else: 226 | new_begin2 = new_lengths.cumsum()[new_num_chrom - 1] 227 | new_end2 = new_lengths.cumsum()[new_num_chrom] 228 | new_counts[new_begin1:new_end1, 229 | new_begin2:new_end2] = counts[begin1:end1, begin2:end2] 230 | begin2 = end2 231 | 232 | begin1 = end1 233 | 234 | return new_counts, new_lengths 235 | 236 | 237 | def undersample_per_chr(X, lengths): 238 | """ 239 | Undersample matrix to chromosomes 240 | 241 | Undersample the matrix ununiformaly per chromosome lengths. 242 | 243 | Parameters 244 | ---------- 245 | X : ndarray (n, n) 246 | The matrix to undersample 247 | 248 | lengths : ndarray (L, ) 249 | Lengths of the chromosomes 250 | 251 | Returns 252 | ------- 253 | undersampled_X : ndarray (L, L) 254 | `X` undersampled per chromosome 255 | """ 256 | lengths_cum = lengths.cumsum() 257 | chr1_begin = 0 258 | undersampled_X = np.zeros((len(lengths), len(lengths))) 259 | for i, chr1_end in enumerate(lengths_cum): 260 | chr2_begin = 0 261 | for j, chr2_end in enumerate(lengths_cum): 262 | surface = X[chr1_begin:chr1_end, chr2_begin:chr2_end] 263 | undersampled_X[i, j] = surface[np.invert(np.isnan(surface))].mean() 264 | chr2_begin = chr2_end 265 | chr1_begin = chr1_end 266 | 267 | return undersampled_X 268 | 269 | 270 | def downsample_resolution(counts, lengths, factor=2, normalize=False): 271 | """ 272 | Downsamples the resolution of a matrix 273 | 274 | Parameters 275 | ---------- 276 | counts : ndarray (N, N) 277 | contact counts matrix to downsample 278 | 279 | lengths : ndarray (L, ) 280 | chromosomes lengths 281 | 282 | coef : int, optionnal, default: 2 283 | downsample resolution of the counts matrix by `coef` 284 | 285 | Returns 286 | ------- 287 | target_counts, target_lengths : ndarray 288 | """ 289 | if factor == 1: 290 | return counts, lengths 291 | # FIXME there is probably a better way to do this 292 | target_lengths = np.ceil(lengths.astype(float) / factor).astype(int) 293 | target_counts = np.zeros((target_lengths.sum(), 294 | target_lengths.sum())) 295 | begin_i, end_i = 0, 0 296 | target_begin_i, target_end_i = 0, 0 297 | for i, length_i in enumerate(lengths): 298 | end_i += length_i 299 | target_end_i += target_lengths[i] 300 | begin_j, end_j = 0, 0 301 | target_begin_j, target_end_j = 0, 0 302 | for j, length_j in enumerate(lengths): 303 | end_j += length_j 304 | target_end_j += target_lengths[j] 305 | 306 | sub_counts = counts[begin_i:end_i, begin_j:end_j] 307 | sub_target_counts = target_counts[target_begin_i:target_end_i, 308 | target_begin_j:target_end_j] 309 | d = np.zeros(sub_target_counts.shape) 310 | for i_start in range(factor): 311 | for j_start in range(factor): 312 | s = sub_counts[i_start::factor, j_start::factor] 313 | d[:s.shape[0], :s.shape[1]] += np.invert(np.isnan(s)) 314 | s[np.isnan(s)] = 0 315 | sub_target_counts[:s.shape[0], :s.shape[1]] += s 316 | if normalize: 317 | sub_target_counts /= d 318 | 319 | begin_j = end_j 320 | target_begin_j = target_end_j 321 | begin_i = end_i 322 | target_begin_i = target_end_i 323 | return target_counts, target_lengths 324 | 325 | 326 | def _change_lengths_resolution(lengths, resolution=10000, copy=True): 327 | if copy: 328 | lengths = lengths.copy() 329 | lengths = lengths.astype(float) 330 | return np.ceil(lengths / resolution).astype(int) 331 | 332 | 333 | def get_chromosome_counts(counts, lengths, chromosome): 334 | """ 335 | """ 336 | if chromosome > len(lengths) - 1: 337 | raise ValueError("Chromosome %d doesn't exists. Possible values are " 338 | "from 0 to %d" % (chromosome, len(lengths) - 1)) 339 | 340 | if len(counts) != lengths.sum(): 341 | raise ValueError("The total lengths and the counts matrix shape " 342 | "should be the same. They are respectively %d and %d" 343 | % (lengths.sum(), len(counts))) 344 | 345 | lengths_cum = np.concatenate([[0], lengths.cumsum()]) 346 | return counts[lengths_cum[chromosome]:lengths_cum[chromosome + 1], 347 | lengths_cum[chromosome]:lengths_cum[chromosome + 1]] 348 | -------------------------------------------------------------------------------- /iced/utils/_validation.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /iced/utils/tests/test_genome.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_array_equal 3 | from scipy import sparse 4 | 5 | from iced.utils._genome import get_intra_mask 6 | from iced.utils._genome import get_inter_mask 7 | from iced.utils._genome import get_genomic_distances 8 | from iced.utils._genome import _change_lengths_resolution 9 | from iced.utils._genome import undersample_per_chr 10 | from iced.utils._genome import extract_sub_contact_map 11 | from iced.utils._genome import downsample_resolution 12 | 13 | 14 | def test_get_intra_mask(): 15 | lengths = np.array([5, 5]) 16 | mask = get_intra_mask(lengths) 17 | true_mask = np.zeros((10, 10)) 18 | true_mask[:5, :5] = 1 19 | true_mask[5:, 5:] = 1 20 | true_mask = true_mask.astype(bool) 21 | assert_array_equal(mask, true_mask) 22 | 23 | # Now test sparse matrix 24 | random_state = np.random.RandomState(seed=42) 25 | 26 | m = 15 27 | rows = random_state.randint(0, 10, size=(m,)) 28 | cols = random_state.randint(0, 10, size=(m,)) 29 | counts = np.zeros((10, 10)) 30 | counts[rows, cols] += 1 31 | counts = sparse.coo_matrix(np.triu(counts)) 32 | rows = counts.row 33 | cols = counts.col 34 | sparse_mask = get_intra_mask(lengths, counts=counts) 35 | sparse_true_mask = true_mask[rows, cols] 36 | assert_array_equal(sparse_mask, sparse_true_mask) 37 | 38 | # Providing a matrix that isn't coo 39 | counts = counts.tocsr() 40 | sparse_mask = get_intra_mask(lengths, counts=counts) 41 | sparse_true_mask = true_mask[rows, cols] 42 | assert_array_equal(sparse_mask, sparse_true_mask) 43 | 44 | 45 | def test_change_lengths_resolution(): 46 | lengths = np.array([5, 5]) 47 | l = _change_lengths_resolution(lengths, resolution=1) 48 | assert_array_equal(lengths, l) 49 | 50 | 51 | def test_get_inter_mask(): 52 | lengths = np.array([5, 5]) 53 | mask = get_inter_mask(lengths) 54 | true_mask = np.zeros((10, 10)) 55 | true_mask[:5, :5] = 1 56 | true_mask[5:, 5:] = 1 57 | assert_array_equal(mask, np.invert(true_mask.astype(bool))) 58 | 59 | 60 | def test_downsample_resolution(): 61 | random_state = np.random.RandomState(seed=42) 62 | 63 | lengths = np.array([10, 10]) 64 | counts = np.triu(random_state.randint( 65 | 0, 100, (lengths.sum(), lengths.sum()))) 66 | counts = counts + counts.T 67 | downsampled_counts, downsampled_lengths = downsample_resolution( 68 | counts, lengths) 69 | assert downsampled_lengths.sum() == lengths.sum()/2 70 | 71 | lengths = np.array([10, 11]) 72 | counts = np.triu(random_state.randint( 73 | 0, 100, (lengths.sum(), lengths.sum()))) 74 | counts = counts + counts.T 75 | downsampled_counts, downsampled_lengths = downsample_resolution( 76 | counts, lengths) 77 | assert downsampled_lengths.sum() == 11 78 | 79 | 80 | def test_undersample_per_chr(): 81 | X = np.array([[1, 1, 0, 0], 82 | [1, 1, 0, 0], 83 | [0, 0, 0.5, 0.5], 84 | [0, 0, 0.5, 0.5]]) 85 | lengths = np.array([2, 2]) 86 | undersampled_X = undersample_per_chr(X, lengths) 87 | undersampled_X_true = np.array([[1, 0], 88 | [0, 0.5]]) 89 | assert_array_equal(undersampled_X_true, undersampled_X) 90 | 91 | 92 | def test_return_sample(): 93 | lengths = np.array([50, 75]) 94 | n = lengths.sum() 95 | X = np.random.randint(0, 50, (n, n)) 96 | X = np.triu(X) 97 | sub_X, _ = extract_sub_contact_map(X, lengths, [0]) 98 | assert_array_equal(X[:lengths[0], :lengths[0]], 99 | sub_X) 100 | 101 | 102 | def test_get_genomic_distances(): 103 | lengths = np.array([5, 5]) 104 | dense_gdis = get_genomic_distances(lengths) 105 | 106 | # FIXME we should test this!! 107 | true_gdis = dense_gdis 108 | 109 | # Now test sparse matrix 110 | random_state = np.random.RandomState(seed=42) 111 | 112 | m = 15 113 | rows = random_state.randint(0, 10, size=(m,)) 114 | cols = random_state.randint(0, 10, size=(m,)) 115 | counts = np.zeros((10, 10)) 116 | counts[rows, cols] += 1 117 | counts = sparse.coo_matrix(np.triu(counts)) 118 | rows = counts.row 119 | cols = counts.col 120 | sparse_gdis = get_genomic_distances(lengths, counts=counts) 121 | sparse_true_gdis = true_gdis[rows, cols] 122 | assert_array_equal(sparse_gdis, sparse_true_gdis) 123 | 124 | # Providing a matrix that isn't coo 125 | counts = counts.tocsr() 126 | sparse_gdis = get_genomic_distances(lengths, counts=counts) 127 | sparse_true_gdis = true_gdis[rows, cols] 128 | assert_array_equal(sparse_gdis, sparse_true_gdis) 129 | -------------------------------------------------------------------------------- /iced/utils/tests/test_validation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | import pytest 4 | from iced.utils import validation 5 | 6 | 7 | def test_is_symetric_or_tri(): 8 | n = 100 9 | m = 50 10 | random_state = np.random.RandomState(seed=42) 11 | X = random_state.randn(n, m) 12 | with pytest.raises(ValueError): 13 | validation.is_symetric_or_tri(X) 14 | X = random_state.randn(n, n) 15 | with pytest.raises(ValueError): 16 | validation.is_symetric_or_tri(X) 17 | X = X + X.T 18 | validation.is_symetric_or_tri(X) 19 | X = np.triu(X) 20 | validation.is_symetric_or_tri(X) 21 | 22 | 23 | def test_is_symetric_or_tri_sparse(): 24 | n = 100 25 | m = 50 26 | random_state = np.random.RandomState(seed=42) 27 | X = sparse.csr_matrix(random_state.randn(n, m)) 28 | with pytest.raises(ValueError): 29 | validation.is_symetric_or_tri(X) 30 | 31 | X = sparse.csr_matrix(random_state.randn(n, n)) 32 | with pytest.raises(ValueError): 33 | validation.is_symetric_or_tri(X) 34 | X = random_state.randn(n, n) 35 | X = X + X.T 36 | X = sparse.csr_matrix(X) 37 | validation.is_symetric_or_tri(X) 38 | X[np.tri(n, dtype=bool)] = 0 39 | validation.is_symetric_or_tri(X) 40 | 41 | 42 | def test_is_tri(): 43 | n = 100 44 | random_state = np.random.RandomState(seed=42) 45 | X = random_state.randn(n, n) 46 | assert validation.is_tri(np.triu(X)) 47 | assert validation.is_tri(np.tril(X)) 48 | -------------------------------------------------------------------------------- /iced/utils/validation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import sparse 3 | 4 | 5 | def is_symetric_or_tri(X, eps=1e-7): 6 | m, n = X.shape 7 | if m != n: 8 | raise ValueError("The matrix should be of shape (n, n)") 9 | 10 | if is_tri(X): 11 | return True 12 | if np.abs(X - X.T).sum() > eps: 13 | raise ValueError("The matrix should be symmetric") 14 | 15 | 16 | def is_tri(X): 17 | diag = X.diagonal().sum() 18 | if sparse.issparse(X): 19 | if not (sparse.tril(X).sum() - diag) or \ 20 | not (sparse.triu(X).sum() - diag): 21 | return True 22 | elif not np.triu(X, 1).sum() or not np.tril(X, -1).sum(): 23 | return True 24 | else: 25 | return False 26 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=77", "wheel", "numpy", "scipy", "cython"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "iced" 7 | requires-python = ">= 3.9" 8 | version = "0.6.0a0.dev0" 9 | dependencies = [ 10 | "numpy>=1.16.0", 11 | "scipy>=0.19.0", 12 | ] 13 | readme = "README.rst" 14 | description = "ICE normalization" 15 | keywords = ["iced", "hi-c", "chromatin"] 16 | authors = [ 17 | {name = "Nelle Varoquaux", email = "nelle.varoquaux@gmail.com"} 18 | ] 19 | maintainers = [ 20 | {name = "Nelle Varoquaux", email = "nelle.varoquaux@gmail.com"} 21 | ] 22 | classifiers=[ 23 | 'Development Status :: 3 - Alpha', 24 | 'Intended Audience :: Science/Research', 25 | 'Intended Audience :: Developers', 26 | 'Programming Language :: Python', 27 | 'Topic :: Utilities', 28 | 'Topic :: Software Development', 29 | 'Topic :: Scientific/Engineering', 30 | 'Operating System :: Microsoft :: Windows', 31 | 'Operating System :: POSIX', 32 | 'Operating System :: Unix', 33 | 'Operating System :: MacOS'] 34 | 35 | [project.scripts] 36 | ice = "iced.scripts.ice:main" 37 | 38 | [project.urls] 39 | Homepage = "https://hiclib.github.io/iced/" 40 | Documentation = "https://hiclib.github.io/iced/documentation.html" 41 | Repository = "https://github.com/hiclib/iced.git" 42 | Issues = "https://github.com/hiclib/iced/issues" 43 | Download = "https://github.com/hiclib/iced/releases" 44 | 45 | [tool.setuptools] 46 | include-package-data = true 47 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/default.txt 2 | -r requirements/tests.txt 3 | -------------------------------------------------------------------------------- /requirements/default.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.16 2 | scipy>=0.19 3 | cython 4 | scikit-learn 5 | pandas 6 | -------------------------------------------------------------------------------- /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | sphinx>=1.8 2 | sphinx-gallery>=0.7.0,!=0.8.0 3 | numpydoc>=1.0 4 | matplotlib>=3.3 5 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | pytest>=5.2.0 2 | pytest-cov>=2.7.0 3 | flake8 4 | codecov 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import Extension, setup 2 | import numpy as np 3 | 4 | 5 | setup( 6 | ext_modules=[ 7 | Extension(name="iced._filter_", 8 | sources=["iced/_filter_.pyx"], 9 | include_dirs=[np.get_include()]), 10 | Extension(name="iced.normalization._normalization_", 11 | sources=["iced/normalization/_normalization_.pyx"], 12 | include_dirs=[np.get_include()] 13 | )], 14 | ) 15 | --------------------------------------------------------------------------------