├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── data
    ├── 10x_pooled_400.mat
    ├── BranchedSynDat.mat
    ├── GSE60361_dat.mat
    ├── SCDE_k2_sup.mat
    ├── SCDE_test.mat
    └── SynMouseESprog_1000.mat
├── deploy_pypi.sh
├── docs
    ├── Makefile
    ├── _build
    │   ├── doctrees
    │   │   ├── best_practices.doctree
    │   │   ├── environment.pickle
    │   │   ├── index.doctree
    │   │   ├── modules.doctree
    │   │   ├── nmf_wrapper.doctree
    │   │   ├── preprocessing.doctree
    │   │   ├── readme_link.doctree
    │   │   ├── run_se.doctree
    │   │   ├── state_estimation.doctree
    │   │   ├── things_we_tried.doctree
    │   │   ├── uncurl.doctree
    │   │   ├── uncurl.robust.doctree
    │   │   ├── uncurl.scalable.doctree
    │   │   ├── uncurl_2.doctree
    │   │   ├── uncurl_pub.doctree
    │   │   └── unsupported_methods.doctree
    │   └── html
    │   │   ├── .buildinfo
    │   │   ├── .nojekyll
    │   │   ├── _modules
    │   │       ├── index.html
    │   │       └── uncurl
    │   │       │   ├── clustering.html
    │   │       │   ├── dim_reduce.html
    │   │       │   ├── dimensionality_reduction.html
    │   │       │   ├── evaluation.html
    │   │       │   ├── experiment_runner.html
    │   │       │   ├── lineage.html
    │   │       │   ├── nb_cluster.html
    │   │       │   ├── nb_clustering.html
    │   │       │   ├── nb_state_estimation.html
    │   │       │   ├── nmf_wrapper.html
    │   │       │   ├── pois_ll.html
    │   │       │   ├── poisson_cluster.html
    │   │       │   ├── preprocessing.html
    │   │       │   ├── qual2quant.html
    │   │       │   ├── robust
    │   │       │       └── state_estimation.html
    │   │       │   ├── run_se.html
    │   │       │   ├── scalable
    │   │       │       └── state_estimation.html
    │   │       │   ├── simulation.html
    │   │       │   ├── spatial_inference.html
    │   │       │   └── state_estimation.html
    │   │   ├── _sources
    │   │       ├── best_practices.rst.txt
    │   │       ├── index.rst.txt
    │   │       ├── modules.rst.txt
    │   │       ├── nmf_wrapper.rst.txt
    │   │       ├── preprocessing.rst.txt
    │   │       ├── readme_link.rst.txt
    │   │       ├── run_se.rst.txt
    │   │       ├── state_estimation.rst.txt
    │   │       ├── things_we_tried.rst.txt
    │   │       ├── uncurl.robust.rst.txt
    │   │       ├── uncurl.rst.txt
    │   │       ├── uncurl.scalable.rst.txt
    │   │       ├── uncurl_2.rst.txt
    │   │       ├── uncurl_pub.rst.txt
    │   │       └── unsupported_methods.rst.txt
    │   │   ├── _static
    │   │       ├── ajax-loader.gif
    │   │       ├── alabaster.css
    │   │       ├── basic.css
    │   │       ├── classic.css
    │   │       ├── custom.css
    │   │       ├── doctools.js
    │   │       ├── jquery-3.1.0.js
    │   │       ├── jquery.js
    │   │       ├── pygments.css
    │   │       ├── searchtools.js
    │   │       ├── sidebar.js
    │   │       ├── underscore-1.3.1.js
    │   │       ├── underscore.js
    │   │       └── websupport.js
    │   │   ├── best_practices.html
    │   │   ├── genindex.html
    │   │   ├── index.html
    │   │   ├── modules.html
    │   │   ├── nmf_wrapper.html
    │   │   ├── objects.inv
    │   │   ├── preprocessing.html
    │   │   ├── py-modindex.html
    │   │   ├── readme_link.html
    │   │   ├── run_se.html
    │   │   ├── search.html
    │   │   ├── searchindex.js
    │   │   ├── state_estimation.html
    │   │   ├── things_we_tried.html
    │   │   ├── uncurl.html
    │   │   ├── uncurl.robust.html
    │   │   ├── uncurl.scalable.html
    │   │   ├── uncurl_2.html
    │   │   ├── uncurl_pub.html
    │   │   └── unsupported_methods.html
    ├── conf.py
    ├── index.rst
    ├── readme_link.rst
    ├── things_we_tried.rst
    ├── uncurl.rst
    ├── uncurl_pub.rst
    └── unsupported_methods.rst
├── examples
    ├── example.py
    ├── lineage_example.py
    ├── means_weights.npy
    ├── synthetic_example.py
    └── zeisel_subset_example.py
├── notebooks
    ├── Imputation.ipynb
    └── Tutorial.ipynb
├── optional_requirements.txt
├── push-docs.sh
├── requirements.txt
├── setup.py
├── tests
    ├── test_cluster.py
    ├── test_cluster_sparse.py
    ├── test_dim_reduce.py
    ├── test_experiment_runner.py
    ├── test_fit_dist.py
    ├── test_gap_score.py
    ├── test_lineage.py
    ├── test_nb.py
    ├── test_nb_state_estimation.py
    ├── test_nmf.py
    ├── test_poisson.py
    ├── test_preprocessing.py
    ├── test_qual2quant.py
    ├── test_real_data.py
    ├── test_state_estimation.py
    ├── test_state_estimation_sparse.py
    └── test_zip_state_estimation.py
└── uncurl
    ├── __init__.py
    ├── clustering.py
    ├── dimensionality_reduction.py
    ├── ensemble.py
    ├── evaluation.py
    ├── experiment_runner.py
    ├── fit_dist_data.py
    ├── gap_score.py
    ├── lightlda_utils.py
    ├── lineage.py
    ├── nb_clustering.py
    ├── nb_state_estimation.py
    ├── nmf_wrapper.py
    ├── nolips.pyx
    ├── nolips_parallel.pyx
    ├── plda_utils.py
    ├── pois_ll.py
    ├── preprocessing.py
    ├── qual2quant.py
    ├── run_se.py
    ├── sampling.py
    ├── simulation.py
    ├── sparse_utils.pyx
    ├── state_estimation.py
    ├── vis.py
    ├── zip_clustering.py
    ├── zip_state_estimation.py
    └── zip_utils.py


/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | sudo: enabled
 3 | python:
 4 |     - 2.7
 5 |     - 3.4
 6 | install: 
 7 |     - pip install -r requirements.txt
 8 |     - pip install pytest
 9 |     - pip install flaky
10 |     - pip install .
11 | script: pytest
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Yue Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include tests/*.py
2 | include uncurl/*.pyx
3 | 


--------------------------------------------------------------------------------
/data/10x_pooled_400.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/data/10x_pooled_400.mat


--------------------------------------------------------------------------------
/data/BranchedSynDat.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/data/BranchedSynDat.mat


--------------------------------------------------------------------------------
/data/GSE60361_dat.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/data/GSE60361_dat.mat


--------------------------------------------------------------------------------
/data/SCDE_k2_sup.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/data/SCDE_k2_sup.mat


--------------------------------------------------------------------------------
/data/SCDE_test.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/data/SCDE_test.mat


--------------------------------------------------------------------------------
/data/SynMouseESprog_1000.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/data/SynMouseESprog_1000.mat


--------------------------------------------------------------------------------
/deploy_pypi.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # before running this:
 4 | # 1. increase version number in setup.py, git commit
 5 | # 2. git tag v<new version>
 6 | # 3. git push --tags
 7 | 
 8 | # delete existing dists
 9 | rm dist/*.tar.gz
10 | rm dist/*.whl
11 | 
12 | # create a source distribution
13 | python3 setup.py sdist
14 | 
15 | # create wheels - note: this can't be uploaded onto pypi
16 | python3 setup.py bdist_wheel
17 | 
18 | # upload
19 | twine upload dist/uncurl_seq-*.tar.gz --verbose
20 | 
21 | # TODO: how to upload built wheels? This requires the 'manylinux1' platform tag?
22 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXAUTO = sphinx-apidoc
 8 | SPHINXPROJ    = UNCURL
 9 | SOURCEDIR     = .
10 | PYTHONDIR = ../uncurl
11 | BUILDDIR      = _build
12 | 
13 | # Put it first so that "make" without argument is like "make help".
14 | help:
15 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
16 | 
17 | .PHONY: help Makefile
18 | 
19 | # Catch-all target: route all unknown targets to Sphinx using the new
20 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
21 | %: Makefile
22 | 	@$(SPHINXAUTO) -o ./ "$(PYTHONDIR)"
23 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
24 | 


--------------------------------------------------------------------------------
/docs/_build/doctrees/best_practices.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/best_practices.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/environment.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/environment.pickle


--------------------------------------------------------------------------------
/docs/_build/doctrees/index.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/index.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/modules.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/modules.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/nmf_wrapper.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/nmf_wrapper.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/preprocessing.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/preprocessing.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/readme_link.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/readme_link.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/run_se.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/run_se.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/state_estimation.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/state_estimation.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/things_we_tried.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/things_we_tried.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/uncurl.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/uncurl.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/uncurl.robust.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/uncurl.robust.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/uncurl.scalable.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/uncurl.scalable.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/uncurl_2.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/uncurl_2.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/uncurl_pub.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/uncurl_pub.doctree


--------------------------------------------------------------------------------
/docs/_build/doctrees/unsupported_methods.doctree:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/doctrees/unsupported_methods.doctree


--------------------------------------------------------------------------------
/docs/_build/html/.buildinfo:
--------------------------------------------------------------------------------
1 | # Sphinx build info version 1
2 | # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3 | config: 7a41e62564e52ad3fce21832d1242a34
4 | tags: 645f666f9bcd5a90fca523b33c5a78b7
5 | 


--------------------------------------------------------------------------------
/docs/_build/html/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/html/.nojekyll


--------------------------------------------------------------------------------
/docs/_build/html/_modules/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Overview: module code &#8212; UNCURL 0.2.3 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="../_static/classic.css" type="text/css" />
 12 |     <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    '../',
 17 |         VERSION:     '0.2.3',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true,
 21 |         SOURCELINK_SUFFIX: '.txt'
 22 |       };
 23 |     </script>
 24 |     <script type="text/javascript" src="../_static/jquery.js"></script>
 25 |     <script type="text/javascript" src="../_static/underscore.js"></script>
 26 |     <script type="text/javascript" src="../_static/doctools.js"></script>
 27 |     <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
 28 |     <link rel="index" title="Index" href="../genindex.html" />
 29 |     <link rel="search" title="Search" href="../search.html" /> 
 30 |   </head>
 31 |   <body role="document">
 32 |     <div class="related" role="navigation" aria-label="related navigation">
 33 |       <h3>Navigation</h3>
 34 |       <ul>
 35 |         <li class="right" style="margin-right: 10px">
 36 |           <a href="../genindex.html" title="General Index"
 37 |              accesskey="I">index</a></li>
 38 |         <li class="right" >
 39 |           <a href="../py-modindex.html" title="Python Module Index"
 40 |              >modules</a> |</li>
 41 |         <li class="nav-item nav-item-0"><a href="../index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
 42 |       </ul>
 43 |     </div>  
 44 | 
 45 |     <div class="document">
 46 |       <div class="documentwrapper">
 47 |         <div class="bodywrapper">
 48 |           <div class="body" role="main">
 49 |             
 50 |   <h1>All modules for which code is available</h1>
 51 | <ul><li><a href="uncurl/clustering.html">uncurl.clustering</a></li>
 52 | <li><a href="uncurl/dim_reduce.html">uncurl.dim_reduce</a></li>
 53 | <li><a href="uncurl/dimensionality_reduction.html">uncurl.dimensionality_reduction</a></li>
 54 | <li><a href="uncurl/evaluation.html">uncurl.evaluation</a></li>
 55 | <li><a href="uncurl/experiment_runner.html">uncurl.experiment_runner</a></li>
 56 | <li><a href="uncurl/lineage.html">uncurl.lineage</a></li>
 57 | <li><a href="uncurl/nb_cluster.html">uncurl.nb_cluster</a></li>
 58 | <li><a href="uncurl/nb_clustering.html">uncurl.nb_clustering</a></li>
 59 | <li><a href="uncurl/nb_state_estimation.html">uncurl.nb_state_estimation</a></li>
 60 | <li><a href="uncurl/nmf_wrapper.html">uncurl.nmf_wrapper</a></li>
 61 | <li><a href="uncurl/pois_ll.html">uncurl.pois_ll</a></li>
 62 | <li><a href="uncurl/preprocessing.html">uncurl.preprocessing</a></li>
 63 | <li><a href="uncurl/qual2quant.html">uncurl.qual2quant</a></li>
 64 | <li><a href="uncurl/robust/state_estimation.html">uncurl.robust.state_estimation</a></li>
 65 | <li><a href="uncurl/run_se.html">uncurl.run_se</a></li>
 66 | <li><a href="uncurl/scalable/state_estimation.html">uncurl.scalable.state_estimation</a></li>
 67 | <li><a href="uncurl/simulation.html">uncurl.simulation</a></li>
 68 | <li><a href="uncurl/spatial_inference.html">uncurl.spatial_inference</a></li>
 69 | <li><a href="uncurl/state_estimation.html">uncurl.state_estimation</a></li>
 70 | </ul>
 71 | 
 72 |           </div>
 73 |         </div>
 74 |       </div>
 75 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 76 |         <div class="sphinxsidebarwrapper">
 77 | <div id="searchbox" style="display: none" role="search">
 78 |   <h3>Quick search</h3>
 79 |     <form class="search" action="../search.html" method="get">
 80 |       <div><input type="text" name="q" /></div>
 81 |       <div><input type="submit" value="Go" /></div>
 82 |       <input type="hidden" name="check_keywords" value="yes" />
 83 |       <input type="hidden" name="area" value="default" />
 84 |     </form>
 85 | </div>
 86 | <script type="text/javascript">$('#searchbox').show(0);</script>
 87 |         </div>
 88 |       </div>
 89 |       <div class="clearer"></div>
 90 |     </div>
 91 |     <div class="related" role="navigation" aria-label="related navigation">
 92 |       <h3>Navigation</h3>
 93 |       <ul>
 94 |         <li class="right" style="margin-right: 10px">
 95 |           <a href="../genindex.html" title="General Index"
 96 |              >index</a></li>
 97 |         <li class="right" >
 98 |           <a href="../py-modindex.html" title="Python Module Index"
 99 |              >modules</a> |</li>
100 |         <li class="nav-item nav-item-0"><a href="../index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
101 |       </ul>
102 |     </div>
103 |     <div class="footer" role="contentinfo">
104 |         &#169; Copyright 2017, Sumit Mukherjee, Yue Zhang.
105 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.3.
106 |     </div>
107 |   </body>
108 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/_modules/uncurl/spatial_inference.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | 
 4 | 
 5 | <html xmlns="http://www.w3.org/1999/xhtml">
 6 |   <head>
 7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 8 |     
 9 |     <title>uncurl.spatial_inference &#8212; UNCURL 0.2.3 documentation</title>
10 |     
11 |     <link rel="stylesheet" href="../../_static/classic.css" type="text/css" />
12 |     <link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
13 |     
14 |     <script type="text/javascript">
15 |       var DOCUMENTATION_OPTIONS = {
16 |         URL_ROOT:    '../../',
17 |         VERSION:     '0.2.3',
18 |         COLLAPSE_INDEX: false,
19 |         FILE_SUFFIX: '.html',
20 |         HAS_SOURCE:  true,
21 |         SOURCELINK_SUFFIX: '.txt'
22 |       };
23 |     </script>
24 |     <script type="text/javascript" src="../../_static/jquery.js"></script>
25 |     <script type="text/javascript" src="../../_static/underscore.js"></script>
26 |     <script type="text/javascript" src="../../_static/doctools.js"></script>
27 |     <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
28 |     <link rel="index" title="Index" href="../../genindex.html" />
29 |     <link rel="search" title="Search" href="../../search.html" /> 
30 |   </head>
31 |   <body role="document">
32 |     <div class="related" role="navigation" aria-label="related navigation">
33 |       <h3>Navigation</h3>
34 |       <ul>
35 |         <li class="right" style="margin-right: 10px">
36 |           <a href="../../genindex.html" title="General Index"
37 |              accesskey="I">index</a></li>
38 |         <li class="right" >
39 |           <a href="../../py-modindex.html" title="Python Module Index"
40 |              >modules</a> |</li>
41 |         <li class="nav-item nav-item-0"><a href="../../index.html">UNCURL 0.2.3 documentation</a> &#187;</li>
42 |           <li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> &#187;</li> 
43 |       </ul>
44 |     </div>  
45 | 
46 |     <div class="document">
47 |       <div class="documentwrapper">
48 |         <div class="bodywrapper">
49 |           <div class="body" role="main">
50 |             
51 |   <h1>Source code for uncurl.spatial_inference</h1><div class="highlight"><pre>
52 | <span></span><span class="c1"># Spatial inference using Poisson clustering</span>
53 | 
54 | <div class="viewcode-block" id="spatial"><a class="viewcode-back" href="../../uncurl.html#uncurl.spatial_inference.spatial">[docs]</a><span class="k">def</span> <span class="nf">spatial</span><span class="p">(</span><span class="n">data</span><span class="p">):</span>
55 |     <span class="sd">&quot;&quot;&quot;</span>
56 | <span class="sd">    &quot;&quot;&quot;</span></div>
57 | </pre></div>
58 | 
59 |           </div>
60 |         </div>
61 |       </div>
62 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
63 |         <div class="sphinxsidebarwrapper">
64 | <div id="searchbox" style="display: none" role="search">
65 |   <h3>Quick search</h3>
66 |     <form class="search" action="../../search.html" method="get">
67 |       <div><input type="text" name="q" /></div>
68 |       <div><input type="submit" value="Go" /></div>
69 |       <input type="hidden" name="check_keywords" value="yes" />
70 |       <input type="hidden" name="area" value="default" />
71 |     </form>
72 | </div>
73 | <script type="text/javascript">$('#searchbox').show(0);</script>
74 |         </div>
75 |       </div>
76 |       <div class="clearer"></div>
77 |     </div>
78 |     <div class="related" role="navigation" aria-label="related navigation">
79 |       <h3>Navigation</h3>
80 |       <ul>
81 |         <li class="right" style="margin-right: 10px">
82 |           <a href="../../genindex.html" title="General Index"
83 |              >index</a></li>
84 |         <li class="right" >
85 |           <a href="../../py-modindex.html" title="Python Module Index"
86 |              >modules</a> |</li>
87 |         <li class="nav-item nav-item-0"><a href="../../index.html">UNCURL 0.2.3 documentation</a> &#187;</li>
88 |           <li class="nav-item nav-item-1"><a href="../index.html" >Module code</a> &#187;</li> 
89 |       </ul>
90 |     </div>
91 |     <div class="footer" role="contentinfo">
92 |         &#169; Copyright 2017, Sumit Mukherjee, Yue Zhang.
93 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.3.
94 |     </div>
95 |   </body>
96 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/_sources/best_practices.rst.txt:
--------------------------------------------------------------------------------
 1 | UNCURL Best Practices
 2 | ======
 3 | 
 4 | 
 5 | Distribution Selection
 6 | ======================
 7 | 
 8 | In general, the best distribution to use for a given dataset is the one for which the most genes have the lowest error: see (cite figure here).
 9 | 
10 | State Estimation
11 | ================
12 | 
13 | Gene subset selection
14 | ---------------------
15 | 
16 | .. code-block:: python
17 | 
18 |     import uncurl
19 | 
20 | 
21 | Initialization
22 | --------------
23 | 
24 | The default initialization
25 | 
26 | Semi-supervision, using bulk or qualitative data
27 | ------------------------------------------------
28 | 
29 | Semi-supervision is done using the 
30 | 
31 | Clustering
32 | ==========
33 | 
34 | Visualization
35 | =============
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/index.rst.txt:
--------------------------------------------------------------------------------
 1 | .. UNCURL documentation master file, created by
 2 |    sphinx-quickstart on Mon Mar 27 13:42:21 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to UNCURL's documentation!
 7 | ==================================
 8 | 
 9 | 
10 | .. toctree::
11 |    :maxdepth: 2
12 |    :caption: Contents:
13 | 
14 |    readme_link
15 |    unsupported_methods
16 |    things_we_tried
17 |    uncurl
18 | 
19 |  
20 | 
21 | Indices and tables
22 | ==================
23 | 
24 | * :ref:`genindex`
25 | * :ref:`modindex`
26 | * :ref:`search`
27 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/modules.rst.txt:
--------------------------------------------------------------------------------
1 | uncurl
2 | ======
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    uncurl
8 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/nmf_wrapper.rst.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/html/_sources/nmf_wrapper.rst.txt


--------------------------------------------------------------------------------
/docs/_build/html/_sources/preprocessing.rst.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/html/_sources/preprocessing.rst.txt


--------------------------------------------------------------------------------
/docs/_build/html/_sources/readme_link.rst.txt:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/run_se.rst.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/html/_sources/run_se.rst.txt


--------------------------------------------------------------------------------
/docs/_build/html/_sources/state_estimation.rst.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/html/_sources/state_estimation.rst.txt


--------------------------------------------------------------------------------
/docs/_build/html/_sources/things_we_tried.rst.txt:
--------------------------------------------------------------------------------
 1 | Non-default parameters: things we tried and their results
 2 | ================================================
 3 | 
 4 | There are a number of uncurl parameters (well, not necessarily parameters, more like... run configurations?) that we experimented with. Here are some results.
 5 | 
 6 | 
 7 | Cell normalization
 8 | ------------------
 9 | 
10 | This option involves normalizing the cells by their read counts. First, we calculate the total read count of each cell, and divide all counts for cell i by its total read count. Then, we find the median total read count over all cells, and multiply the entire matrix by that value. This method has been used previously for scRNA-seq datasets [see paper for reference].
11 | 
12 | The clustering performance after cell normalization were substantially better on count-valued datasets, and either had no effect or were marginally worse on RPKM-normalized and other forms of data that have already been normalized in some other way. So we would suggest using this option for unnormalized count-valued datasets. The downside is that it might lose some information (if certain cell types were correlated to larger read counts?), but I'm not sure if that happens in practice.
13 | 
14 | [TODO: include graphs]
15 | 
16 | To use this option, run ``data_normalized = uncurl.preprocessing.cell_normalize(data)``, and run uncurl on ``data_normalized``.
17 | 
18 | 
19 | Constrained W
20 | -------------
21 | 
22 | When this option is activated, the ``W`` matrix is normalized so that its columns sum to 1 after each round of alternating minimization. Without this option, ``W`` is only constrained to be nonnegative during the optimization process, and normalized after the end of the optimization.
23 | 
24 | In clustering experiments, this option had mixed results. It performed marginally better on some datasets and marginally worse on others. On the 10X datasets, constrained W performed slightly better when combined with cell normalization, and worse without cell normalization.
25 | 
26 | [TODO: include graphs]
27 | 
28 | To use this option, add the argument ``constrain_w=True`` to ``run_state_estimation`` or ``poisson_estimate_state``. This does not work for the NMF-based methods.
29 | 
30 | 
31 | Uncurl initialization options
32 | -----------------------------
33 | 
34 | We provide a variety of initialization options for uncurl. Most initialization methods first perform a clustering, initialize M based on the cluster means, and W based on the cluster assignments. The default initialization is based on truncated SVD followed by K-means. We also provide initializations based on Poisson clustering, and Poisson k-means++ with randomized W. 
35 | 
36 | In clustering experiments, truncated SVD initialization usually performed the best, but there were some datasets under which Poisson clustering initialization performed better. For example, on randomly downsampled data, Poisson clustering initialization seems to perform better.
37 | 
38 | To use different initializations, use the argument ``initialization=<method>``, where ``<method>`` can be one of ``tsvd`` (truncated SVD + K-means), ``cluster`` (Poisson clustering), ``kmpp`` (Poisson k-means++), or ``km`` (k-means on the full data).
39 | 
40 | 
41 | Alternative to QualNorm: mean-normalized initialization
42 | -------------------------------------------------------
43 | 
44 | Given prior gene expression data, there are a variety of methods for initializing uncurl. ``QualNorm`` is one way of doing this initialization. Another way, when we have real-valued prior data, we could normalize the prior data so that each cell type sums to 1, and then multiply that by the mean per-cell read count of the actual data.
45 | 
46 | This performed better than QualNorm on sparse datasets such as the 10X datasets.
47 | 
48 | 
49 | Optimization methods
50 | --------------------
51 | 
52 | The default optimization method for Poisson state estimation is NoLips [see paper for reference].
53 | 
54 | Before settling on NoLips as a default, we also tried a variety of different optimization methods. The first was L-BFGS, as implemented in scipy. We also tried gradient descent, stochastic gradient descent, and a custom method based on alternating iteratively reweighted least squares on a Poisson regression model. These methods are not included in the uncurl package because they had poor performance characteristics compared to NoLips. We settled on NoLips because it was easy to port to sparse matrices and was easily parallelizable. L-BFGS tends to converge in fewer iterations, but the per-iteration time for NoLips is much less: it has closed-form updates that don't require gradient or objective value calculations, and the updates take advantage of data sparsity.
55 | 
56 | To use different optimization methods, use the argument ``method=<method>``, where ``<method>`` can be either ``NoLips`` (default) or ``L-BFGS-B``.
57 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/uncurl.robust.rst.txt:
--------------------------------------------------------------------------------
 1 | uncurl.robust package
 2 | =====================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | uncurl.robust.state_estimation module
 8 | -------------------------------------
 9 | 
10 | .. automodule:: uncurl.robust.state_estimation
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: uncurl.robust
20 |     :members:
21 |     :undoc-members:
22 |     :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/uncurl.rst.txt:
--------------------------------------------------------------------------------
  1 | uncurl package
  2 | ==============
  3 | 
  4 | Submodules
  5 | ----------
  6 | 
  7 | uncurl.preprocessing module
  8 | ---------------------------
  9 | 
 10 | .. automodule:: uncurl.preprocessing
 11 |     :members:
 12 |     :undoc-members:
 13 |     :show-inheritance:
 14 | 
 15 | uncurl.run_se module
 16 | ------------------------------
 17 | 
 18 | .. automodule:: uncurl.run_se
 19 |     :members:
 20 |     :undoc-members:
 21 |     :show-inheritance:
 22 | 
 23 | uncurl.state_estimation module
 24 | ------------------------------
 25 | 
 26 | .. automodule:: uncurl.state_estimation
 27 |     :members:
 28 |     :undoc-members:
 29 |     :show-inheritance:
 30 | 
 31 | uncurl.nmf_wrapper module
 32 | ------------------------------
 33 | 
 34 | .. automodule:: uncurl.nmf_wrapper
 35 |     :members:
 36 |     :undoc-members:
 37 |     :show-inheritance:
 38 | 
 39 | uncurl.qual2quant module
 40 | ------------------------
 41 | 
 42 | .. automodule:: uncurl.qual2quant
 43 |     :members:
 44 |     :undoc-members:
 45 |     :show-inheritance:
 46 | 
 47 | uncurl.clustering module
 48 | ------------------------
 49 | 
 50 | .. automodule:: uncurl.clustering
 51 |     :members:
 52 |     :undoc-members:
 53 |     :show-inheritance:
 54 | 
 55 | uncurl.dimensionality_reduction module
 56 | ------------------------
 57 | 
 58 | .. automodule:: uncurl.dimensionality_reduction
 59 |     :members:
 60 |     :undoc-members:
 61 |     :show-inheritance:
 62 | 
 63 | uncurl.evaluation module
 64 | ------------------------
 65 | 
 66 | .. automodule:: uncurl.evaluation
 67 |     :members:
 68 |     :undoc-members:
 69 |     :show-inheritance:
 70 | 
 71 | uncurl.experiment_runner module
 72 | ------------------------
 73 | 
 74 | .. automodule:: uncurl.experiment_runner
 75 |     :members:
 76 |     :undoc-members:
 77 |     :show-inheritance:
 78 | 
 79 | uncurl.lineage module
 80 | ---------------------
 81 | 
 82 | .. automodule:: uncurl.lineage
 83 |     :members:
 84 |     :undoc-members:
 85 |     :show-inheritance:
 86 | 
 87 | uncurl.nb_cluster module
 88 | ------------------------
 89 | 
 90 | .. automodule:: uncurl.nb_cluster
 91 |     :members:
 92 |     :undoc-members:
 93 |     :show-inheritance:
 94 | 
 95 | uncurl.nb_state_estimation module
 96 | ---------------------------------
 97 | 
 98 | .. automodule:: uncurl.nb_state_estimation
 99 |     :members:
100 |     :undoc-members:
101 |     :show-inheritance:
102 | 
103 | uncurl.pois_ll module
104 | ---------------------
105 | 
106 | .. automodule:: uncurl.pois_ll
107 |     :members:
108 |     :undoc-members:
109 |     :show-inheritance:
110 | 
111 | uncurl.simulation module
112 | ------------------------
113 | 
114 | .. automodule:: uncurl.simulation
115 |     :members:
116 |     :undoc-members:
117 |     :show-inheritance:
118 | 
119 | 
120 | Module contents
121 | ---------------
122 | 
123 | .. automodule:: uncurl
124 |     :members:
125 |     :undoc-members:
126 |     :show-inheritance:
127 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/uncurl.scalable.rst.txt:
--------------------------------------------------------------------------------
 1 | uncurl.scalable package
 2 | =======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | uncurl.scalable.state_estimation module
 8 | ---------------------------------------
 9 | 
10 | .. automodule:: uncurl.scalable.state_estimation
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: uncurl.scalable
20 |     :members:
21 |     :undoc-members:
22 |     :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/uncurl_2.rst.txt:
--------------------------------------------------------------------------------
1 | UNCURL public functions
2 | ==============
3 | 
4 | .. automodule:: uncurl
5 |    :members: poisson_cluster, nb_cluster, qual2quant, poisson_estimate_state, lineage
6 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/uncurl_pub.rst.txt:
--------------------------------------------------------------------------------
 1 | UNCURL public functions
 2 | =======================
 3 | 
 4 | uncurl.max_variance_genes
 5 | -----------------
 6 | 
 7 | .. autofunction:: uncurl.max_variance_genes
 8 | 
 9 | uncurl.qualNorm
10 | -----------------
11 | 
12 | .. autofunction:: uncurl.qualNorm
13 | 
14 | uncurl.poisson_cluster
15 | ----------------------
16 | 
17 | .. autofunction:: uncurl.poisson_cluster
18 | 
19 | uncurl.nb_cluster
20 | -----------------
21 | 
22 | .. autofunction:: uncurl.nb_cluster
23 | 
24 | uncurl.poisson_estimate_state
25 | -----------------------------
26 | 
27 | .. autofunction:: uncurl.poisson_estimate_state
28 | 
29 | uncurl.nb_estimate_state
30 | -----------------------------
31 | 
32 | .. autofunction:: uncurl.nb_estimate_state
33 | 
34 | uncurl.mds
35 | -----------------
36 | 
37 | .. autofunction:: uncurl.mds
38 | 
39 | uncurl.lineage
40 | --------------
41 | 
42 | .. autofunction:: uncurl.lineage
43 | 
44 | uncurl.pseudotime
45 | --------------
46 | 
47 | .. autofunction:: uncurl.pseudotime
48 | 


--------------------------------------------------------------------------------
/docs/_build/html/_sources/unsupported_methods.rst.txt:
--------------------------------------------------------------------------------
 1 | Details on unsupported methods
 2 | ==============================
 3 | 
 4 | There are a number of unsupported or experimental methods part of the UNCURL package. We provide information on them here for the sake of completeness but cannot vouch for their correctness.
 5 | 
 6 | Alternative state estimation methods
 7 | ------------------------------------
 8 | 
 9 | We provide implementations of the convex mixture model for the negative binomial (NB) and zero-inflated Poisson (ZIP) distributions. In our experiments they did not work as well as the Poisson model on most datasets, and are substantially less efficient.
10 | 
11 | We also provide methods based on LDA (latent Dirichlet allocation), using the LightLDA implementation. The outputs of these methods can be interpreted as state estimation with a binomial sampling distribution. See ``lightlda_utils.py``. In practice, they had worse performance than Poisson state estimation in accuracy, runtime, and memory usage, especially on larger datasets.
12 | 
13 | Alternative clustering methods
14 | ------------------------------
15 | 
16 | As with state estimation, we provide NB and ZIP versions of k-means. The same efficiency considerations apply.
17 | 
18 | Dimensionality reduction
19 | ------------------------
20 | 
21 | The ``mds`` function performs dimensionality reduction using MDS. This works by running MDS on M to convert it into a projection matrix, and then using that matrix to project W onto 2d space. This is much faster than tSNE or even PCA, at the cost of some fidelity, but it might work as a first pass.
22 | 
23 | Example:
24 | 
25 | .. code-block:: python
26 | 
27 |     import numpy as np
28 |     from uncurl import mds, dim_reduce_data
29 | 
30 |     data = np.loadtxt('counts.txt')
31 | 
32 |     # dimensionality reduction using MDS on state estimation means
33 |     M, W, ll = poisson_estimate_state(data, 4)
34 |     # proj is a 2d projection of the data.
35 |     proj = mds(M, W, 2)
36 | 
37 | 
38 | Lineage estimation
39 | ------------------
40 | 
41 | The ``lineage`` function performs lineage estimation from the output of ``poisson_estimate_state``. It fits the data to a different 5th degree polynomial for each cell type.
42 | 
43 | The ``pseudotime`` function calculates the pseudotime for each cell given the output of ``lineage`` and a starting cell.
44 | 
45 | Example (including visualization):
46 | 
47 | .. code-block:: python
48 | 
49 |     import numpy as np
50 |     import matplotlib.pyplot as plt
51 | 
52 |     from uncurl import poisson_estimate_state, mds, lineage, pseudotime
53 | 
54 |     data = np.loadtxt('counts.txt')
55 | 
56 |     # pretend that there are three natural clusters in the dataset.
57 |     M, W = poisson_estimate_state(data, 3)
58 | 
59 |     curve_params, smoothed_points, edges, cell_assignments = lineage(M, W)
60 | 
61 |     # assume the "root" is cell 0
62 |     ptime = pseudotime(0, edges, smoothed_points)
63 | 
64 |     # visualizing the lineage
65 |     proj = mds(M, W, 2)
66 | 
67 |     plt.scatter(proj[0,:], proj[1,:], s=10, c=cell_assignments, edgecolors='none', alpha=0.7)
68 |     plt.scatter(smoothed_points[0,:], smoothed_points[1,:], s=30, c=cell_assignments, edgecolors='none', alpha=0.7)
69 |     # connect the lines
70 |     for edge in edges:
71 |         plt.plot((smoothed_points[0, edge[0]], smoothed_points[0, edge[1]]),
72 |                  (smoothed_points[1, edge[0]], smoothed_points[1, edge[1]]), 'black', linewidth=2)
73 |     plt.xlabel('dim 1')
74 |     plt.ylabel('dim 2')
75 | 
76 | 
77 | Ensemble Methods
78 | ----------------
79 | 
80 | Consensus clustering, consensus clustering-based initialization for uncurl, etc. This requires the `Cluster_Ensembles package <https://github.com/GGiecold/Cluster_Ensembles>`_.
81 | 
82 | 
83 | Visualization
84 | -------------
85 | 
86 | see ``vis.py``
87 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/ajax-loader.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/html/_static/ajax-loader.gif


--------------------------------------------------------------------------------
/docs/_build/html/_static/classic.css:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * classic.css_t
  3 |  * ~~~~~~~~~~~~~
  4 |  *
  5 |  * Sphinx stylesheet -- classic theme.
  6 |  *
  7 |  * :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
  8 |  * :license: BSD, see LICENSE for details.
  9 |  *
 10 |  */
 11 | 
 12 | @import url("basic.css");
 13 | 
 14 | /* -- page layout ----------------------------------------------------------- */
 15 | 
 16 | body {
 17 |     font-family: sans-serif;
 18 |     font-size: 100%;
 19 |     background-color: #11303d;
 20 |     color: #000;
 21 |     margin: 0;
 22 |     padding: 0;
 23 | }
 24 | 
 25 | div.document {
 26 |     background-color: #1c4e63;
 27 | }
 28 | 
 29 | div.documentwrapper {
 30 |     float: left;
 31 |     width: 100%;
 32 | }
 33 | 
 34 | div.bodywrapper {
 35 |     margin: 0 0 0 230px;
 36 | }
 37 | 
 38 | div.body {
 39 |     background-color: #ffffff;
 40 |     color: #000000;
 41 |     padding: 0 20px 30px 20px;
 42 | }
 43 | 
 44 | div.footer {
 45 |     color: #ffffff;
 46 |     width: 100%;
 47 |     padding: 9px 0 9px 0;
 48 |     text-align: center;
 49 |     font-size: 75%;
 50 | }
 51 | 
 52 | div.footer a {
 53 |     color: #ffffff;
 54 |     text-decoration: underline;
 55 | }
 56 | 
 57 | div.related {
 58 |     background-color: #133f52;
 59 |     line-height: 30px;
 60 |     color: #ffffff;
 61 | }
 62 | 
 63 | div.related a {
 64 |     color: #ffffff;
 65 | }
 66 | 
 67 | div.sphinxsidebar {
 68 | }
 69 | 
 70 | div.sphinxsidebar h3 {
 71 |     font-family: 'Trebuchet MS', sans-serif;
 72 |     color: #ffffff;
 73 |     font-size: 1.4em;
 74 |     font-weight: normal;
 75 |     margin: 0;
 76 |     padding: 0;
 77 | }
 78 | 
 79 | div.sphinxsidebar h3 a {
 80 |     color: #ffffff;
 81 | }
 82 | 
 83 | div.sphinxsidebar h4 {
 84 |     font-family: 'Trebuchet MS', sans-serif;
 85 |     color: #ffffff;
 86 |     font-size: 1.3em;
 87 |     font-weight: normal;
 88 |     margin: 5px 0 0 0;
 89 |     padding: 0;
 90 | }
 91 | 
 92 | div.sphinxsidebar p {
 93 |     color: #ffffff;
 94 | }
 95 | 
 96 | div.sphinxsidebar p.topless {
 97 |     margin: 5px 10px 10px 10px;
 98 | }
 99 | 
100 | div.sphinxsidebar ul {
101 |     margin: 10px;
102 |     padding: 0;
103 |     color: #ffffff;
104 | }
105 | 
106 | div.sphinxsidebar a {
107 |     color: #98dbcc;
108 | }
109 | 
110 | div.sphinxsidebar input {
111 |     border: 1px solid #98dbcc;
112 |     font-family: sans-serif;
113 |     font-size: 1em;
114 | }
115 | 
116 | 
117 | 
118 | /* -- hyperlink styles ------------------------------------------------------ */
119 | 
120 | a {
121 |     color: #355f7c;
122 |     text-decoration: none;
123 | }
124 | 
125 | a:visited {
126 |     color: #355f7c;
127 |     text-decoration: none;
128 | }
129 | 
130 | a:hover {
131 |     text-decoration: underline;
132 | }
133 | 
134 | 
135 | 
136 | /* -- body styles ----------------------------------------------------------- */
137 | 
138 | div.body h1,
139 | div.body h2,
140 | div.body h3,
141 | div.body h4,
142 | div.body h5,
143 | div.body h6 {
144 |     font-family: 'Trebuchet MS', sans-serif;
145 |     background-color: #f2f2f2;
146 |     font-weight: normal;
147 |     color: #20435c;
148 |     border-bottom: 1px solid #ccc;
149 |     margin: 20px -20px 10px -20px;
150 |     padding: 3px 0 3px 10px;
151 | }
152 | 
153 | div.body h1 { margin-top: 0; font-size: 200%; }
154 | div.body h2 { font-size: 160%; }
155 | div.body h3 { font-size: 140%; }
156 | div.body h4 { font-size: 120%; }
157 | div.body h5 { font-size: 110%; }
158 | div.body h6 { font-size: 100%; }
159 | 
160 | a.headerlink {
161 |     color: #c60f0f;
162 |     font-size: 0.8em;
163 |     padding: 0 4px 0 4px;
164 |     text-decoration: none;
165 | }
166 | 
167 | a.headerlink:hover {
168 |     background-color: #c60f0f;
169 |     color: white;
170 | }
171 | 
172 | div.body p, div.body dd, div.body li, div.body blockquote {
173 |     text-align: justify;
174 |     line-height: 130%;
175 | }
176 | 
177 | div.admonition p.admonition-title + p {
178 |     display: inline;
179 | }
180 | 
181 | div.admonition p {
182 |     margin-bottom: 5px;
183 | }
184 | 
185 | div.admonition pre {
186 |     margin-bottom: 5px;
187 | }
188 | 
189 | div.admonition ul, div.admonition ol {
190 |     margin-bottom: 5px;
191 | }
192 | 
193 | div.note {
194 |     background-color: #eee;
195 |     border: 1px solid #ccc;
196 | }
197 | 
198 | div.seealso {
199 |     background-color: #ffc;
200 |     border: 1px solid #ff6;
201 | }
202 | 
203 | div.topic {
204 |     background-color: #eee;
205 | }
206 | 
207 | div.warning {
208 |     background-color: #ffe4e4;
209 |     border: 1px solid #f66;
210 | }
211 | 
212 | p.admonition-title {
213 |     display: inline;
214 | }
215 | 
216 | p.admonition-title:after {
217 |     content: ":";
218 | }
219 | 
220 | pre {
221 |     padding: 5px;
222 |     background-color: #eeffcc;
223 |     color: #333333;
224 |     line-height: 120%;
225 |     border: 1px solid #ac9;
226 |     border-left: none;
227 |     border-right: none;
228 | }
229 | 
230 | code {
231 |     background-color: #ecf0f3;
232 |     padding: 0 1px 0 1px;
233 |     font-size: 0.95em;
234 | }
235 | 
236 | th {
237 |     background-color: #ede;
238 | }
239 | 
240 | .warning code {
241 |     background: #efc2c2;
242 | }
243 | 
244 | .note code {
245 |     background: #d6d6d6;
246 | }
247 | 
248 | .viewcode-back {
249 |     font-family: sans-serif;
250 | }
251 | 
252 | div.viewcode-block:target {
253 |     background-color: #f4debf;
254 |     border-top: 1px solid #ac9;
255 |     border-bottom: 1px solid #ac9;
256 | }
257 | 
258 | div.code-block-caption {
259 |     color: #efefef;
260 |     background-color: #1c4e63;
261 | }


--------------------------------------------------------------------------------
/docs/_build/html/_static/custom.css:
--------------------------------------------------------------------------------
1 | /* This file intentionally left blank. */
2 | 


--------------------------------------------------------------------------------
/docs/_build/html/_static/pygments.css:
--------------------------------------------------------------------------------
 1 | .highlight .hll { background-color: #ffffcc }
 2 | .highlight  { background: #eeffcc; }
 3 | .highlight .c { color: #408090; font-style: italic } /* Comment */
 4 | .highlight .err { border: 1px solid #FF0000 } /* Error */
 5 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */
 6 | .highlight .o { color: #666666 } /* Operator */
 7 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
 8 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
 9 | .highlight .cp { color: #007020 } /* Comment.Preproc */
10 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
11 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
12 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
13 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
14 | .highlight .ge { font-style: italic } /* Generic.Emph */
15 | .highlight .gr { color: #FF0000 } /* Generic.Error */
16 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
17 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
18 | .highlight .go { color: #333333 } /* Generic.Output */
19 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
20 | .highlight .gs { font-weight: bold } /* Generic.Strong */
21 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
22 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
23 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
24 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
25 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
26 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */
27 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
28 | .highlight .kt { color: #902000 } /* Keyword.Type */
29 | .highlight .m { color: #208050 } /* Literal.Number */
30 | .highlight .s { color: #4070a0 } /* Literal.String */
31 | .highlight .na { color: #4070a0 } /* Name.Attribute */
32 | .highlight .nb { color: #007020 } /* Name.Builtin */
33 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
34 | .highlight .no { color: #60add5 } /* Name.Constant */
35 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
36 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
37 | .highlight .ne { color: #007020 } /* Name.Exception */
38 | .highlight .nf { color: #06287e } /* Name.Function */
39 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
40 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
41 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
42 | .highlight .nv { color: #bb60d5 } /* Name.Variable */
43 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
44 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
45 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */
46 | .highlight .mf { color: #208050 } /* Literal.Number.Float */
47 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */
48 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */
49 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */
50 | .highlight .sa { color: #4070a0 } /* Literal.String.Affix */
51 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
52 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */
53 | .highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
54 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
55 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */
56 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
57 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
58 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
59 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */
60 | .highlight .sr { color: #235388 } /* Literal.String.Regex */
61 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */
62 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */
63 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
64 | .highlight .fm { color: #06287e } /* Name.Function.Magic */
65 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
66 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
67 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
68 | .highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
69 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/docs/_build/html/_static/sidebar.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * sidebar.js
  3 |  * ~~~~~~~~~~
  4 |  *
  5 |  * This script makes the Sphinx sidebar collapsible.
  6 |  *
  7 |  * .sphinxsidebar contains .sphinxsidebarwrapper.  This script adds
  8 |  * in .sphixsidebar, after .sphinxsidebarwrapper, the #sidebarbutton
  9 |  * used to collapse and expand the sidebar.
 10 |  *
 11 |  * When the sidebar is collapsed the .sphinxsidebarwrapper is hidden
 12 |  * and the width of the sidebar and the margin-left of the document
 13 |  * are decreased. When the sidebar is expanded the opposite happens.
 14 |  * This script saves a per-browser/per-session cookie used to
 15 |  * remember the position of the sidebar among the pages.
 16 |  * Once the browser is closed the cookie is deleted and the position
 17 |  * reset to the default (expanded).
 18 |  *
 19 |  * :copyright: Copyright 2007-2016 by the Sphinx team, see AUTHORS.
 20 |  * :license: BSD, see LICENSE for details.
 21 |  *
 22 |  */
 23 | 
 24 | $(function() {
 25 |   
 26 |   
 27 |   
 28 |   
 29 |   
 30 |   
 31 |   
 32 | 
 33 |   // global elements used by the functions.
 34 |   // the 'sidebarbutton' element is defined as global after its
 35 |   // creation, in the add_sidebar_button function
 36 |   var bodywrapper = $('.bodywrapper');
 37 |   var sidebar = $('.sphinxsidebar');
 38 |   var sidebarwrapper = $('.sphinxsidebarwrapper');
 39 | 
 40 |   // for some reason, the document has no sidebar; do not run into errors
 41 |   if (!sidebar.length) return;
 42 | 
 43 |   // original margin-left of the bodywrapper and width of the sidebar
 44 |   // with the sidebar expanded
 45 |   var bw_margin_expanded = bodywrapper.css('margin-left');
 46 |   var ssb_width_expanded = sidebar.width();
 47 | 
 48 |   // margin-left of the bodywrapper and width of the sidebar
 49 |   // with the sidebar collapsed
 50 |   var bw_margin_collapsed = '.8em';
 51 |   var ssb_width_collapsed = '.8em';
 52 | 
 53 |   // colors used by the current theme
 54 |   var dark_color = $('.related').css('background-color');
 55 |   var light_color = $('.document').css('background-color');
 56 | 
 57 |   function sidebar_is_collapsed() {
 58 |     return sidebarwrapper.is(':not(:visible)');
 59 |   }
 60 | 
 61 |   function toggle_sidebar() {
 62 |     if (sidebar_is_collapsed())
 63 |       expand_sidebar();
 64 |     else
 65 |       collapse_sidebar();
 66 |   }
 67 | 
 68 |   function collapse_sidebar() {
 69 |     sidebarwrapper.hide();
 70 |     sidebar.css('width', ssb_width_collapsed);
 71 |     bodywrapper.css('margin-left', bw_margin_collapsed);
 72 |     sidebarbutton.css({
 73 |         'margin-left': '0',
 74 |         'height': bodywrapper.height()
 75 |     });
 76 |     sidebarbutton.find('span').text('»');
 77 |     sidebarbutton.attr('title', _('Expand sidebar'));
 78 |     document.cookie = 'sidebar=collapsed';
 79 |   }
 80 | 
 81 |   function expand_sidebar() {
 82 |     bodywrapper.css('margin-left', bw_margin_expanded);
 83 |     sidebar.css('width', ssb_width_expanded);
 84 |     sidebarwrapper.show();
 85 |     sidebarbutton.css({
 86 |         'margin-left': ssb_width_expanded-12,
 87 |         'height': bodywrapper.height()
 88 |     });
 89 |     sidebarbutton.find('span').text('«');
 90 |     sidebarbutton.attr('title', _('Collapse sidebar'));
 91 |     document.cookie = 'sidebar=expanded';
 92 |   }
 93 | 
 94 |   function add_sidebar_button() {
 95 |     sidebarwrapper.css({
 96 |         'float': 'left',
 97 |         'margin-right': '0',
 98 |         'width': ssb_width_expanded - 28
 99 |     });
100 |     // create the button
101 |     sidebar.append(
102 |         '<div id="sidebarbutton"><span>&laquo;</span></div>'
103 |     );
104 |     var sidebarbutton = $('#sidebarbutton');
105 |     light_color = sidebarbutton.css('background-color');
106 |     // find the height of the viewport to center the '<<' in the page
107 |     var viewport_height;
108 |     if (window.innerHeight)
109 |  	  viewport_height = window.innerHeight;
110 |     else
111 | 	  viewport_height = $(window).height();
112 |     sidebarbutton.find('span').css({
113 |         'display': 'block',
114 |         'margin-top': (viewport_height - sidebar.position().top - 20) / 2
115 |     });
116 | 
117 |     sidebarbutton.click(toggle_sidebar);
118 |     sidebarbutton.attr('title', _('Collapse sidebar'));
119 |     sidebarbutton.css({
120 |         'color': '#FFFFFF',
121 |         'border-left': '1px solid ' + dark_color,
122 |         'font-size': '1.2em',
123 |         'cursor': 'pointer',
124 |         'height': bodywrapper.height(),
125 |         'padding-top': '1px',
126 |         'margin-left': ssb_width_expanded - 12
127 |     });
128 | 
129 |     sidebarbutton.hover(
130 |       function () {
131 |           $(this).css('background-color', dark_color);
132 |       },
133 |       function () {
134 |           $(this).css('background-color', light_color);
135 |       }
136 |     );
137 |   }
138 | 
139 |   function set_position_from_cookie() {
140 |     if (!document.cookie)
141 |       return;
142 |     var items = document.cookie.split(';');
143 |     for(var k=0; k<items.length; k++) {
144 |       var key_val = items[k].split('=');
145 |       var key = key_val[0].replace(/ /, "");  // strip leading spaces
146 |       if (key == 'sidebar') {
147 |         var value = key_val[1];
148 |         if ((value == 'collapsed') && (!sidebar_is_collapsed()))
149 |           collapse_sidebar();
150 |         else if ((value == 'expanded') && (sidebar_is_collapsed()))
151 |           expand_sidebar();
152 |       }
153 |     }
154 |   }
155 | 
156 |   add_sidebar_button();
157 |   var sidebarbutton = $('#sidebarbutton');
158 |   set_position_from_cookie();
159 | });


--------------------------------------------------------------------------------
/docs/_build/html/best_practices.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>UNCURL Best Practices &#8212; UNCURL 0.2.3 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/classic.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '0.2.3',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true,
 21 |         SOURCELINK_SUFFIX: '.txt'
 22 |       };
 23 |     </script>
 24 |     <script type="text/javascript" src="_static/jquery.js"></script>
 25 |     <script type="text/javascript" src="_static/underscore.js"></script>
 26 |     <script type="text/javascript" src="_static/doctools.js"></script>
 27 |     <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
 28 |     <link rel="index" title="Index" href="genindex.html" />
 29 |     <link rel="search" title="Search" href="search.html" /> 
 30 |   </head>
 31 |   <body role="document">
 32 |     <div class="related" role="navigation" aria-label="related navigation">
 33 |       <h3>Navigation</h3>
 34 |       <ul>
 35 |         <li class="right" style="margin-right: 10px">
 36 |           <a href="genindex.html" title="General Index"
 37 |              accesskey="I">index</a></li>
 38 |         <li class="right" >
 39 |           <a href="py-modindex.html" title="Python Module Index"
 40 |              >modules</a> |</li>
 41 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
 42 |       </ul>
 43 |     </div>  
 44 | 
 45 |     <div class="document">
 46 |       <div class="documentwrapper">
 47 |         <div class="bodywrapper">
 48 |           <div class="body" role="main">
 49 |             
 50 |   <div class="section" id="uncurl-best-practices">
 51 | <h1>UNCURL Best Practices<a class="headerlink" href="#uncurl-best-practices" title="Permalink to this headline">¶</a></h1>
 52 | </div>
 53 | <div class="section" id="distribution-selection">
 54 | <h1>Distribution Selection<a class="headerlink" href="#distribution-selection" title="Permalink to this headline">¶</a></h1>
 55 | <p>In general, the best distribution to use for a given dataset is the one for which the most genes have the lowest error: see (cite figure here).</p>
 56 | </div>
 57 | <div class="section" id="state-estimation">
 58 | <h1>State Estimation<a class="headerlink" href="#state-estimation" title="Permalink to this headline">¶</a></h1>
 59 | <div class="section" id="gene-subset-selection">
 60 | <h2>Gene subset selection<a class="headerlink" href="#gene-subset-selection" title="Permalink to this headline">¶</a></h2>
 61 | <div class="highlight-python"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">uncurl</span>
 62 | </pre></div>
 63 | </div>
 64 | </div>
 65 | <div class="section" id="initialization">
 66 | <h2>Initialization<a class="headerlink" href="#initialization" title="Permalink to this headline">¶</a></h2>
 67 | <p>The default initialization</p>
 68 | </div>
 69 | <div class="section" id="semi-supervision-using-bulk-or-qualitative-data">
 70 | <h2>Semi-supervision, using bulk or qualitative data<a class="headerlink" href="#semi-supervision-using-bulk-or-qualitative-data" title="Permalink to this headline">¶</a></h2>
 71 | <p>Semi-supervision is done using the</p>
 72 | </div>
 73 | </div>
 74 | <div class="section" id="clustering">
 75 | <h1>Clustering<a class="headerlink" href="#clustering" title="Permalink to this headline">¶</a></h1>
 76 | </div>
 77 | <div class="section" id="visualization">
 78 | <h1>Visualization<a class="headerlink" href="#visualization" title="Permalink to this headline">¶</a></h1>
 79 | </div>
 80 | 
 81 | 
 82 |           </div>
 83 |         </div>
 84 |       </div>
 85 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 86 |         <div class="sphinxsidebarwrapper">
 87 |   <h3><a href="index.html">Table Of Contents</a></h3>
 88 |   <ul>
 89 | <li><a class="reference internal" href="#">UNCURL Best Practices</a></li>
 90 | <li><a class="reference internal" href="#distribution-selection">Distribution Selection</a></li>
 91 | <li><a class="reference internal" href="#state-estimation">State Estimation</a><ul>
 92 | <li><a class="reference internal" href="#gene-subset-selection">Gene subset selection</a></li>
 93 | <li><a class="reference internal" href="#initialization">Initialization</a></li>
 94 | <li><a class="reference internal" href="#semi-supervision-using-bulk-or-qualitative-data">Semi-supervision, using bulk or qualitative data</a></li>
 95 | </ul>
 96 | </li>
 97 | <li><a class="reference internal" href="#clustering">Clustering</a></li>
 98 | <li><a class="reference internal" href="#visualization">Visualization</a></li>
 99 | </ul>
100 | 
101 |   <div role="note" aria-label="source link">
102 |     <h3>This Page</h3>
103 |     <ul class="this-page-menu">
104 |       <li><a href="_sources/best_practices.rst.txt"
105 |             rel="nofollow">Show Source</a></li>
106 |     </ul>
107 |    </div>
108 | <div id="searchbox" style="display: none" role="search">
109 |   <h3>Quick search</h3>
110 |     <form class="search" action="search.html" method="get">
111 |       <div><input type="text" name="q" /></div>
112 |       <div><input type="submit" value="Go" /></div>
113 |       <input type="hidden" name="check_keywords" value="yes" />
114 |       <input type="hidden" name="area" value="default" />
115 |     </form>
116 | </div>
117 | <script type="text/javascript">$('#searchbox').show(0);</script>
118 |         </div>
119 |       </div>
120 |       <div class="clearer"></div>
121 |     </div>
122 |     <div class="related" role="navigation" aria-label="related navigation">
123 |       <h3>Navigation</h3>
124 |       <ul>
125 |         <li class="right" style="margin-right: 10px">
126 |           <a href="genindex.html" title="General Index"
127 |              >index</a></li>
128 |         <li class="right" >
129 |           <a href="py-modindex.html" title="Python Module Index"
130 |              >modules</a> |</li>
131 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
132 |       </ul>
133 |     </div>
134 |     <div class="footer" role="contentinfo">
135 |         &#169; Copyright 2017, Sumit Mukherjee, Yue Zhang.
136 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.3.
137 |     </div>
138 |   </body>
139 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/modules.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>uncurl &#8212; UNCURL 0.2.3 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/classic.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '0.2.3',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true,
 21 |         SOURCELINK_SUFFIX: '.txt'
 22 |       };
 23 |     </script>
 24 |     <script type="text/javascript" src="_static/jquery.js"></script>
 25 |     <script type="text/javascript" src="_static/underscore.js"></script>
 26 |     <script type="text/javascript" src="_static/doctools.js"></script>
 27 |     <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
 28 |     <link rel="index" title="Index" href="genindex.html" />
 29 |     <link rel="search" title="Search" href="search.html" /> 
 30 |   </head>
 31 |   <body role="document">
 32 |     <div class="related" role="navigation" aria-label="related navigation">
 33 |       <h3>Navigation</h3>
 34 |       <ul>
 35 |         <li class="right" style="margin-right: 10px">
 36 |           <a href="genindex.html" title="General Index"
 37 |              accesskey="I">index</a></li>
 38 |         <li class="right" >
 39 |           <a href="py-modindex.html" title="Python Module Index"
 40 |              >modules</a> |</li>
 41 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
 42 |       </ul>
 43 |     </div>  
 44 | 
 45 |     <div class="document">
 46 |       <div class="documentwrapper">
 47 |         <div class="bodywrapper">
 48 |           <div class="body" role="main">
 49 |             
 50 |   <div class="section" id="uncurl">
 51 | <h1>uncurl<a class="headerlink" href="#uncurl" title="Permalink to this headline">¶</a></h1>
 52 | <div class="toctree-wrapper compound">
 53 | <ul>
 54 | <li class="toctree-l1"><a class="reference internal" href="uncurl.html">uncurl package</a><ul>
 55 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#submodules">Submodules</a></li>
 56 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.preprocessing">uncurl.preprocessing module</a></li>
 57 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.run_se">uncurl.run_se module</a></li>
 58 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.state_estimation">uncurl.state_estimation module</a></li>
 59 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.nmf_wrapper">uncurl.nmf_wrapper module</a></li>
 60 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.qual2quant">uncurl.qual2quant module</a></li>
 61 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.clustering">uncurl.clustering module</a></li>
 62 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#uncurl-dimensionality-reduction-module">uncurl.dimensionality_reduction module</a></li>
 63 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.evaluation">uncurl.evaluation module</a></li>
 64 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#uncurl-experiment-runner-module">uncurl.experiment_runner module</a></li>
 65 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.lineage">uncurl.lineage module</a></li>
 66 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#uncurl-nb-cluster-module">uncurl.nb_cluster module</a></li>
 67 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.nb_state_estimation">uncurl.nb_state_estimation module</a></li>
 68 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.pois_ll">uncurl.pois_ll module</a></li>
 69 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl.simulation">uncurl.simulation module</a></li>
 70 | <li class="toctree-l2"><a class="reference internal" href="uncurl.html#module-uncurl">Module contents</a></li>
 71 | </ul>
 72 | </li>
 73 | </ul>
 74 | </div>
 75 | </div>
 76 | 
 77 | 
 78 |           </div>
 79 |         </div>
 80 |       </div>
 81 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 82 |         <div class="sphinxsidebarwrapper">
 83 |   <div role="note" aria-label="source link">
 84 |     <h3>This Page</h3>
 85 |     <ul class="this-page-menu">
 86 |       <li><a href="_sources/modules.rst.txt"
 87 |             rel="nofollow">Show Source</a></li>
 88 |     </ul>
 89 |    </div>
 90 | <div id="searchbox" style="display: none" role="search">
 91 |   <h3>Quick search</h3>
 92 |     <form class="search" action="search.html" method="get">
 93 |       <div><input type="text" name="q" /></div>
 94 |       <div><input type="submit" value="Go" /></div>
 95 |       <input type="hidden" name="check_keywords" value="yes" />
 96 |       <input type="hidden" name="area" value="default" />
 97 |     </form>
 98 | </div>
 99 | <script type="text/javascript">$('#searchbox').show(0);</script>
100 |         </div>
101 |       </div>
102 |       <div class="clearer"></div>
103 |     </div>
104 |     <div class="related" role="navigation" aria-label="related navigation">
105 |       <h3>Navigation</h3>
106 |       <ul>
107 |         <li class="right" style="margin-right: 10px">
108 |           <a href="genindex.html" title="General Index"
109 |              >index</a></li>
110 |         <li class="right" >
111 |           <a href="py-modindex.html" title="Python Module Index"
112 |              >modules</a> |</li>
113 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
114 |       </ul>
115 |     </div>
116 |     <div class="footer" role="contentinfo">
117 |         &#169; Copyright 2017, Sumit Mukherjee, Yue Zhang.
118 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.3.
119 |     </div>
120 |   </body>
121 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/nmf_wrapper.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | 
 4 | 
 5 | <html xmlns="http://www.w3.org/1999/xhtml">
 6 |   <head>
 7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 8 |     
 9 |     <title>&lt;no title&gt; &#8212; UNCURL 0.2.3 documentation</title>
10 |     
11 |     <link rel="stylesheet" href="_static/classic.css" type="text/css" />
12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
13 |     
14 |     <script type="text/javascript">
15 |       var DOCUMENTATION_OPTIONS = {
16 |         URL_ROOT:    './',
17 |         VERSION:     '0.2.3',
18 |         COLLAPSE_INDEX: false,
19 |         FILE_SUFFIX: '.html',
20 |         HAS_SOURCE:  true,
21 |         SOURCELINK_SUFFIX: '.txt'
22 |       };
23 |     </script>
24 |     <script type="text/javascript" src="_static/jquery.js"></script>
25 |     <script type="text/javascript" src="_static/underscore.js"></script>
26 |     <script type="text/javascript" src="_static/doctools.js"></script>
27 |     <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
28 |     <link rel="index" title="Index" href="genindex.html" />
29 |     <link rel="search" title="Search" href="search.html" /> 
30 |   </head>
31 |   <body role="document">
32 |     <div class="related" role="navigation" aria-label="related navigation">
33 |       <h3>Navigation</h3>
34 |       <ul>
35 |         <li class="right" style="margin-right: 10px">
36 |           <a href="genindex.html" title="General Index"
37 |              accesskey="I">index</a></li>
38 |         <li class="right" >
39 |           <a href="py-modindex.html" title="Python Module Index"
40 |              >modules</a> |</li>
41 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
42 |       </ul>
43 |     </div>  
44 | 
45 |     <div class="document">
46 |       <div class="documentwrapper">
47 |         <div class="bodywrapper">
48 |           <div class="body" role="main">
49 |             
50 |   
51 | 
52 |           </div>
53 |         </div>
54 |       </div>
55 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
56 |         <div class="sphinxsidebarwrapper">
57 |   <div role="note" aria-label="source link">
58 |     <h3>This Page</h3>
59 |     <ul class="this-page-menu">
60 |       <li><a href="_sources/nmf_wrapper.rst.txt"
61 |             rel="nofollow">Show Source</a></li>
62 |     </ul>
63 |    </div>
64 | <div id="searchbox" style="display: none" role="search">
65 |   <h3>Quick search</h3>
66 |     <form class="search" action="search.html" method="get">
67 |       <div><input type="text" name="q" /></div>
68 |       <div><input type="submit" value="Go" /></div>
69 |       <input type="hidden" name="check_keywords" value="yes" />
70 |       <input type="hidden" name="area" value="default" />
71 |     </form>
72 | </div>
73 | <script type="text/javascript">$('#searchbox').show(0);</script>
74 |         </div>
75 |       </div>
76 |       <div class="clearer"></div>
77 |     </div>
78 |     <div class="related" role="navigation" aria-label="related navigation">
79 |       <h3>Navigation</h3>
80 |       <ul>
81 |         <li class="right" style="margin-right: 10px">
82 |           <a href="genindex.html" title="General Index"
83 |              >index</a></li>
84 |         <li class="right" >
85 |           <a href="py-modindex.html" title="Python Module Index"
86 |              >modules</a> |</li>
87 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
88 |       </ul>
89 |     </div>
90 |     <div class="footer" role="contentinfo">
91 |         &#169; Copyright 2017, Sumit Mukherjee, Yue Zhang.
92 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.3.
93 |     </div>
94 |   </body>
95 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/docs/_build/html/objects.inv


--------------------------------------------------------------------------------
/docs/_build/html/preprocessing.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | 
 4 | 
 5 | <html xmlns="http://www.w3.org/1999/xhtml">
 6 |   <head>
 7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 8 |     
 9 |     <title>&lt;no title&gt; &#8212; UNCURL 0.2.3 documentation</title>
10 |     
11 |     <link rel="stylesheet" href="_static/classic.css" type="text/css" />
12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
13 |     
14 |     <script type="text/javascript">
15 |       var DOCUMENTATION_OPTIONS = {
16 |         URL_ROOT:    './',
17 |         VERSION:     '0.2.3',
18 |         COLLAPSE_INDEX: false,
19 |         FILE_SUFFIX: '.html',
20 |         HAS_SOURCE:  true,
21 |         SOURCELINK_SUFFIX: '.txt'
22 |       };
23 |     </script>
24 |     <script type="text/javascript" src="_static/jquery.js"></script>
25 |     <script type="text/javascript" src="_static/underscore.js"></script>
26 |     <script type="text/javascript" src="_static/doctools.js"></script>
27 |     <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
28 |     <link rel="index" title="Index" href="genindex.html" />
29 |     <link rel="search" title="Search" href="search.html" /> 
30 |   </head>
31 |   <body role="document">
32 |     <div class="related" role="navigation" aria-label="related navigation">
33 |       <h3>Navigation</h3>
34 |       <ul>
35 |         <li class="right" style="margin-right: 10px">
36 |           <a href="genindex.html" title="General Index"
37 |              accesskey="I">index</a></li>
38 |         <li class="right" >
39 |           <a href="py-modindex.html" title="Python Module Index"
40 |              >modules</a> |</li>
41 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
42 |       </ul>
43 |     </div>  
44 | 
45 |     <div class="document">
46 |       <div class="documentwrapper">
47 |         <div class="bodywrapper">
48 |           <div class="body" role="main">
49 |             
50 |   
51 | 
52 |           </div>
53 |         </div>
54 |       </div>
55 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
56 |         <div class="sphinxsidebarwrapper">
57 |   <div role="note" aria-label="source link">
58 |     <h3>This Page</h3>
59 |     <ul class="this-page-menu">
60 |       <li><a href="_sources/preprocessing.rst.txt"
61 |             rel="nofollow">Show Source</a></li>
62 |     </ul>
63 |    </div>
64 | <div id="searchbox" style="display: none" role="search">
65 |   <h3>Quick search</h3>
66 |     <form class="search" action="search.html" method="get">
67 |       <div><input type="text" name="q" /></div>
68 |       <div><input type="submit" value="Go" /></div>
69 |       <input type="hidden" name="check_keywords" value="yes" />
70 |       <input type="hidden" name="area" value="default" />
71 |     </form>
72 | </div>
73 | <script type="text/javascript">$('#searchbox').show(0);</script>
74 |         </div>
75 |       </div>
76 |       <div class="clearer"></div>
77 |     </div>
78 |     <div class="related" role="navigation" aria-label="related navigation">
79 |       <h3>Navigation</h3>
80 |       <ul>
81 |         <li class="right" style="margin-right: 10px">
82 |           <a href="genindex.html" title="General Index"
83 |              >index</a></li>
84 |         <li class="right" >
85 |           <a href="py-modindex.html" title="Python Module Index"
86 |              >modules</a> |</li>
87 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
88 |       </ul>
89 |     </div>
90 |     <div class="footer" role="contentinfo">
91 |         &#169; Copyright 2017, Sumit Mukherjee, Yue Zhang.
92 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.3.
93 |     </div>
94 |   </body>
95 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/run_se.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | 
 4 | 
 5 | <html xmlns="http://www.w3.org/1999/xhtml">
 6 |   <head>
 7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 8 |     
 9 |     <title>&lt;no title&gt; &#8212; UNCURL 0.2.3 documentation</title>
10 |     
11 |     <link rel="stylesheet" href="_static/classic.css" type="text/css" />
12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
13 |     
14 |     <script type="text/javascript">
15 |       var DOCUMENTATION_OPTIONS = {
16 |         URL_ROOT:    './',
17 |         VERSION:     '0.2.3',
18 |         COLLAPSE_INDEX: false,
19 |         FILE_SUFFIX: '.html',
20 |         HAS_SOURCE:  true,
21 |         SOURCELINK_SUFFIX: '.txt'
22 |       };
23 |     </script>
24 |     <script type="text/javascript" src="_static/jquery.js"></script>
25 |     <script type="text/javascript" src="_static/underscore.js"></script>
26 |     <script type="text/javascript" src="_static/doctools.js"></script>
27 |     <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
28 |     <link rel="index" title="Index" href="genindex.html" />
29 |     <link rel="search" title="Search" href="search.html" /> 
30 |   </head>
31 |   <body role="document">
32 |     <div class="related" role="navigation" aria-label="related navigation">
33 |       <h3>Navigation</h3>
34 |       <ul>
35 |         <li class="right" style="margin-right: 10px">
36 |           <a href="genindex.html" title="General Index"
37 |              accesskey="I">index</a></li>
38 |         <li class="right" >
39 |           <a href="py-modindex.html" title="Python Module Index"
40 |              >modules</a> |</li>
41 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
42 |       </ul>
43 |     </div>  
44 | 
45 |     <div class="document">
46 |       <div class="documentwrapper">
47 |         <div class="bodywrapper">
48 |           <div class="body" role="main">
49 |             
50 |   
51 | 
52 |           </div>
53 |         </div>
54 |       </div>
55 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
56 |         <div class="sphinxsidebarwrapper">
57 |   <div role="note" aria-label="source link">
58 |     <h3>This Page</h3>
59 |     <ul class="this-page-menu">
60 |       <li><a href="_sources/run_se.rst.txt"
61 |             rel="nofollow">Show Source</a></li>
62 |     </ul>
63 |    </div>
64 | <div id="searchbox" style="display: none" role="search">
65 |   <h3>Quick search</h3>
66 |     <form class="search" action="search.html" method="get">
67 |       <div><input type="text" name="q" /></div>
68 |       <div><input type="submit" value="Go" /></div>
69 |       <input type="hidden" name="check_keywords" value="yes" />
70 |       <input type="hidden" name="area" value="default" />
71 |     </form>
72 | </div>
73 | <script type="text/javascript">$('#searchbox').show(0);</script>
74 |         </div>
75 |       </div>
76 |       <div class="clearer"></div>
77 |     </div>
78 |     <div class="related" role="navigation" aria-label="related navigation">
79 |       <h3>Navigation</h3>
80 |       <ul>
81 |         <li class="right" style="margin-right: 10px">
82 |           <a href="genindex.html" title="General Index"
83 |              >index</a></li>
84 |         <li class="right" >
85 |           <a href="py-modindex.html" title="Python Module Index"
86 |              >modules</a> |</li>
87 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
88 |       </ul>
89 |     </div>
90 |     <div class="footer" role="contentinfo">
91 |         &#169; Copyright 2017, Sumit Mukherjee, Yue Zhang.
92 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.3.
93 |     </div>
94 |   </body>
95 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/search.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>Search &#8212; UNCURL 0.2.3 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/classic.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '0.2.3',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true,
 21 |         SOURCELINK_SUFFIX: '.txt'
 22 |       };
 23 |     </script>
 24 |     <script type="text/javascript" src="_static/jquery.js"></script>
 25 |     <script type="text/javascript" src="_static/underscore.js"></script>
 26 |     <script type="text/javascript" src="_static/doctools.js"></script>
 27 |     <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
 28 |     <script type="text/javascript" src="_static/searchtools.js"></script>
 29 |     <link rel="index" title="Index" href="genindex.html" />
 30 |     <link rel="search" title="Search" href="#" />
 31 |   <script type="text/javascript">
 32 |     jQuery(function() { Search.loadIndex("searchindex.js"); });
 33 |   </script>
 34 |   
 35 |   <script type="text/javascript" id="searchindexloader"></script>
 36 |    
 37 | 
 38 |   </head>
 39 |   <body role="document">
 40 |     <div class="related" role="navigation" aria-label="related navigation">
 41 |       <h3>Navigation</h3>
 42 |       <ul>
 43 |         <li class="right" style="margin-right: 10px">
 44 |           <a href="genindex.html" title="General Index"
 45 |              accesskey="I">index</a></li>
 46 |         <li class="right" >
 47 |           <a href="py-modindex.html" title="Python Module Index"
 48 |              >modules</a> |</li>
 49 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
 50 |       </ul>
 51 |     </div>  
 52 | 
 53 |     <div class="document">
 54 |       <div class="documentwrapper">
 55 |         <div class="bodywrapper">
 56 |           <div class="body" role="main">
 57 |             
 58 |   <h1 id="search-documentation">Search</h1>
 59 |   <div id="fallback" class="admonition warning">
 60 |   <script type="text/javascript">$('#fallback').hide();</script>
 61 |   <p>
 62 |     Please activate JavaScript to enable the search
 63 |     functionality.
 64 |   </p>
 65 |   </div>
 66 |   <p>
 67 |     From here you can search these documents. Enter your search
 68 |     words into the box below and click "search". Note that the search
 69 |     function will automatically search for all of the words. Pages
 70 |     containing fewer words won't appear in the result list.
 71 |   </p>
 72 |   <form action="" method="get">
 73 |     <input type="text" name="q" value="" />
 74 |     <input type="submit" value="search" />
 75 |     <span id="search-progress" style="padding-left: 10px"></span>
 76 |   </form>
 77 |   
 78 |   <div id="search-results">
 79 |   
 80 |   </div>
 81 | 
 82 |           </div>
 83 |         </div>
 84 |       </div>
 85 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 86 |         <div class="sphinxsidebarwrapper">
 87 |         </div>
 88 |       </div>
 89 |       <div class="clearer"></div>
 90 |     </div>
 91 |     <div class="related" role="navigation" aria-label="related navigation">
 92 |       <h3>Navigation</h3>
 93 |       <ul>
 94 |         <li class="right" style="margin-right: 10px">
 95 |           <a href="genindex.html" title="General Index"
 96 |              >index</a></li>
 97 |         <li class="right" >
 98 |           <a href="py-modindex.html" title="Python Module Index"
 99 |              >modules</a> |</li>
100 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
101 |       </ul>
102 |     </div>
103 |     <div class="footer" role="contentinfo">
104 |         &#169; Copyright 2017, Sumit Mukherjee, Yue Zhang.
105 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.3.
106 |     </div>
107 |   </body>
108 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/state_estimation.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
 2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 3 | 
 4 | 
 5 | <html xmlns="http://www.w3.org/1999/xhtml">
 6 |   <head>
 7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 8 |     
 9 |     <title>&lt;no title&gt; &#8212; UNCURL 0.2.3 documentation</title>
10 |     
11 |     <link rel="stylesheet" href="_static/classic.css" type="text/css" />
12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
13 |     
14 |     <script type="text/javascript">
15 |       var DOCUMENTATION_OPTIONS = {
16 |         URL_ROOT:    './',
17 |         VERSION:     '0.2.3',
18 |         COLLAPSE_INDEX: false,
19 |         FILE_SUFFIX: '.html',
20 |         HAS_SOURCE:  true,
21 |         SOURCELINK_SUFFIX: '.txt'
22 |       };
23 |     </script>
24 |     <script type="text/javascript" src="_static/jquery.js"></script>
25 |     <script type="text/javascript" src="_static/underscore.js"></script>
26 |     <script type="text/javascript" src="_static/doctools.js"></script>
27 |     <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
28 |     <link rel="index" title="Index" href="genindex.html" />
29 |     <link rel="search" title="Search" href="search.html" /> 
30 |   </head>
31 |   <body role="document">
32 |     <div class="related" role="navigation" aria-label="related navigation">
33 |       <h3>Navigation</h3>
34 |       <ul>
35 |         <li class="right" style="margin-right: 10px">
36 |           <a href="genindex.html" title="General Index"
37 |              accesskey="I">index</a></li>
38 |         <li class="right" >
39 |           <a href="py-modindex.html" title="Python Module Index"
40 |              >modules</a> |</li>
41 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
42 |       </ul>
43 |     </div>  
44 | 
45 |     <div class="document">
46 |       <div class="documentwrapper">
47 |         <div class="bodywrapper">
48 |           <div class="body" role="main">
49 |             
50 |   
51 | 
52 |           </div>
53 |         </div>
54 |       </div>
55 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
56 |         <div class="sphinxsidebarwrapper">
57 |   <div role="note" aria-label="source link">
58 |     <h3>This Page</h3>
59 |     <ul class="this-page-menu">
60 |       <li><a href="_sources/state_estimation.rst.txt"
61 |             rel="nofollow">Show Source</a></li>
62 |     </ul>
63 |    </div>
64 | <div id="searchbox" style="display: none" role="search">
65 |   <h3>Quick search</h3>
66 |     <form class="search" action="search.html" method="get">
67 |       <div><input type="text" name="q" /></div>
68 |       <div><input type="submit" value="Go" /></div>
69 |       <input type="hidden" name="check_keywords" value="yes" />
70 |       <input type="hidden" name="area" value="default" />
71 |     </form>
72 | </div>
73 | <script type="text/javascript">$('#searchbox').show(0);</script>
74 |         </div>
75 |       </div>
76 |       <div class="clearer"></div>
77 |     </div>
78 |     <div class="related" role="navigation" aria-label="related navigation">
79 |       <h3>Navigation</h3>
80 |       <ul>
81 |         <li class="right" style="margin-right: 10px">
82 |           <a href="genindex.html" title="General Index"
83 |              >index</a></li>
84 |         <li class="right" >
85 |           <a href="py-modindex.html" title="Python Module Index"
86 |              >modules</a> |</li>
87 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
88 |       </ul>
89 |     </div>
90 |     <div class="footer" role="contentinfo">
91 |         &#169; Copyright 2017, Sumit Mukherjee, Yue Zhang.
92 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.3.
93 |     </div>
94 |   </body>
95 | </html>


--------------------------------------------------------------------------------
/docs/_build/html/uncurl.robust.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
  2 |   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  3 | 
  4 | 
  5 | <html xmlns="http://www.w3.org/1999/xhtml">
  6 |   <head>
  7 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  8 |     
  9 |     <title>uncurl.robust package &#8212; UNCURL 0.2.3 documentation</title>
 10 |     
 11 |     <link rel="stylesheet" href="_static/classic.css" type="text/css" />
 12 |     <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
 13 |     
 14 |     <script type="text/javascript">
 15 |       var DOCUMENTATION_OPTIONS = {
 16 |         URL_ROOT:    './',
 17 |         VERSION:     '0.2.3',
 18 |         COLLAPSE_INDEX: false,
 19 |         FILE_SUFFIX: '.html',
 20 |         HAS_SOURCE:  true,
 21 |         SOURCELINK_SUFFIX: '.txt'
 22 |       };
 23 |     </script>
 24 |     <script type="text/javascript" src="_static/jquery.js"></script>
 25 |     <script type="text/javascript" src="_static/underscore.js"></script>
 26 |     <script type="text/javascript" src="_static/doctools.js"></script>
 27 |     <script type="text/javascript" src="https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
 28 |     <link rel="index" title="Index" href="genindex.html" />
 29 |     <link rel="search" title="Search" href="search.html" /> 
 30 |   </head>
 31 |   <body role="document">
 32 |     <div class="related" role="navigation" aria-label="related navigation">
 33 |       <h3>Navigation</h3>
 34 |       <ul>
 35 |         <li class="right" style="margin-right: 10px">
 36 |           <a href="genindex.html" title="General Index"
 37 |              accesskey="I">index</a></li>
 38 |         <li class="right" >
 39 |           <a href="py-modindex.html" title="Python Module Index"
 40 |              >modules</a> |</li>
 41 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
 42 |       </ul>
 43 |     </div>  
 44 | 
 45 |     <div class="document">
 46 |       <div class="documentwrapper">
 47 |         <div class="bodywrapper">
 48 |           <div class="body" role="main">
 49 |             
 50 |   <div class="section" id="uncurl-robust-package">
 51 | <h1>uncurl.robust package<a class="headerlink" href="#uncurl-robust-package" title="Permalink to this headline">¶</a></h1>
 52 | <div class="section" id="submodules">
 53 | <h2>Submodules<a class="headerlink" href="#submodules" title="Permalink to this headline">¶</a></h2>
 54 | </div>
 55 | <div class="section" id="uncurl-robust-state-estimation-module">
 56 | <h2>uncurl.robust.state_estimation module<a class="headerlink" href="#uncurl-robust-state-estimation-module" title="Permalink to this headline">¶</a></h2>
 57 | </div>
 58 | <div class="section" id="module-contents">
 59 | <h2>Module contents<a class="headerlink" href="#module-contents" title="Permalink to this headline">¶</a></h2>
 60 | </div>
 61 | </div>
 62 | 
 63 | 
 64 |           </div>
 65 |         </div>
 66 |       </div>
 67 |       <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
 68 |         <div class="sphinxsidebarwrapper">
 69 |   <h3><a href="index.html">Table Of Contents</a></h3>
 70 |   <ul>
 71 | <li><a class="reference internal" href="#">uncurl.robust package</a><ul>
 72 | <li><a class="reference internal" href="#submodules">Submodules</a></li>
 73 | <li><a class="reference internal" href="#uncurl-robust-state-estimation-module">uncurl.robust.state_estimation module</a></li>
 74 | <li><a class="reference internal" href="#module-contents">Module contents</a></li>
 75 | </ul>
 76 | </li>
 77 | </ul>
 78 | 
 79 |   <div role="note" aria-label="source link">
 80 |     <h3>This Page</h3>
 81 |     <ul class="this-page-menu">
 82 |       <li><a href="_sources/uncurl.robust.rst.txt"
 83 |             rel="nofollow">Show Source</a></li>
 84 |     </ul>
 85 |    </div>
 86 | <div id="searchbox" style="display: none" role="search">
 87 |   <h3>Quick search</h3>
 88 |     <form class="search" action="search.html" method="get">
 89 |       <div><input type="text" name="q" /></div>
 90 |       <div><input type="submit" value="Go" /></div>
 91 |       <input type="hidden" name="check_keywords" value="yes" />
 92 |       <input type="hidden" name="area" value="default" />
 93 |     </form>
 94 | </div>
 95 | <script type="text/javascript">$('#searchbox').show(0);</script>
 96 |         </div>
 97 |       </div>
 98 |       <div class="clearer"></div>
 99 |     </div>
100 |     <div class="related" role="navigation" aria-label="related navigation">
101 |       <h3>Navigation</h3>
102 |       <ul>
103 |         <li class="right" style="margin-right: 10px">
104 |           <a href="genindex.html" title="General Index"
105 |              >index</a></li>
106 |         <li class="right" >
107 |           <a href="py-modindex.html" title="Python Module Index"
108 |              >modules</a> |</li>
109 |         <li class="nav-item nav-item-0"><a href="index.html">UNCURL 0.2.3 documentation</a> &#187;</li> 
110 |       </ul>
111 |     </div>
112 |     <div class="footer" role="contentinfo">
113 |         &#169; Copyright 2017, Sumit Mukherjee, Yue Zhang.
114 |       Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.5.3.
115 |     </div>
116 |   </body>
117 | </html>


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # UNCURL documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Mar 27 13:42:21 2017.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | #
 19 | import os
 20 | import sys
 21 | sys.path.insert(0, os.path.abspath('../uncurl/'))
 22 | sys.path.insert(0, os.path.abspath('../'))
 23 | 
 24 | 
 25 | # -- General configuration ------------------------------------------------
 26 | 
 27 | # If your documentation needs a minimal Sphinx version, state it here.
 28 | #
 29 | # needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be
 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 33 | # ones.
 34 | extensions = ['sphinx.ext.autodoc',
 35 |     'sphinx.ext.coverage',
 36 |     'sphinx.ext.mathjax',
 37 |     'sphinx.ext.viewcode',
 38 |     'sphinx.ext.githubpages',
 39 |     'sphinx.ext.napoleon']
 40 | 
 41 | # Add any paths that contain templates here, relative to this directory.
 42 | templates_path = ['_templates']
 43 | 
 44 | # The suffix(es) of source filenames.
 45 | # You can specify multiple suffix as a list of string:
 46 | #
 47 | # source_suffix = ['.rst', '.md']
 48 | source_suffix = '.rst'
 49 | 
 50 | # The master toctree document.
 51 | master_doc = 'index'
 52 | 
 53 | # General information about the project.
 54 | project = u'UNCURL'
 55 | copyright = u'2017, Sumit Mukherjee, Yue Zhang'
 56 | author = u'Sumit Mukherjee, Yue Zhang'
 57 | 
 58 | # The version info for the project you're documenting, acts as replacement for
 59 | # |version| and |release|, also used in various other places throughout the
 60 | # built documents.
 61 | #
 62 | # The short X.Y version.
 63 | version = u'0.2.3'
 64 | # The full version, including alpha/beta/rc tags.
 65 | release = u'0.2.3'
 66 | 
 67 | # The language for content autogenerated by Sphinx. Refer to documentation
 68 | # for a list of supported languages.
 69 | #
 70 | # This is also used if you do content translation via gettext catalogs.
 71 | # Usually you set "language" from the command line for these cases.
 72 | language = None
 73 | 
 74 | # List of patterns, relative to source directory, that match files and
 75 | # directories to ignore when looking for source files.
 76 | # This patterns also effect to html_static_path and html_extra_path
 77 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 78 | 
 79 | # The name of the Pygments (syntax highlighting) style to use.
 80 | pygments_style = 'sphinx'
 81 | 
 82 | # If true, `todo` and `todoList` produce output, else they produce nothing.
 83 | todo_include_todos = False
 84 | 
 85 | 
 86 | # -- Options for HTML output ----------------------------------------------
 87 | 
 88 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 89 | # a list of builtin themes.
 90 | #
 91 | html_theme = 'classic'
 92 | 
 93 | # Theme options are theme-specific and customize the look and feel of a theme
 94 | # further.  For a list of options available for each theme, see the
 95 | # documentation.
 96 | #
 97 | # html_theme_options = {}
 98 | 
 99 | # Add any paths that contain custom static files (such as style sheets) here,
100 | # relative to this directory. They are copied after the builtin static files,
101 | # so a file named "default.css" will overwrite the builtin "default.css".
102 | html_static_path = ['_static']
103 | 
104 | 
105 | # -- Options for HTMLHelp output ------------------------------------------
106 | 
107 | # Output file base name for HTML help builder.
108 | htmlhelp_basename = 'UNCURLdoc'
109 | 
110 | 
111 | # -- Options for LaTeX output ---------------------------------------------
112 | 
113 | latex_elements = {
114 |     # The paper size ('letterpaper' or 'a4paper').
115 |     #
116 |     # 'papersize': 'letterpaper',
117 | 
118 |     # The font size ('10pt', '11pt' or '12pt').
119 |     #
120 |     # 'pointsize': '10pt',
121 | 
122 |     # Additional stuff for the LaTeX preamble.
123 |     #
124 |     # 'preamble': '',
125 | 
126 |     # Latex figure (float) alignment
127 |     #
128 |     # 'figure_align': 'htbp',
129 | }
130 | 
131 | # Grouping the document tree into LaTeX files. List of tuples
132 | # (source start file, target name, title,
133 | #  author, documentclass [howto, manual, or own class]).
134 | latex_documents = [
135 |     (master_doc, 'UNCURL.tex', u'UNCURL Documentation',
136 |      u'Sumit Mukherjee, Yue Zhang', 'manual'),
137 | ]
138 | 
139 | 
140 | # -- Options for manual page output ---------------------------------------
141 | 
142 | # One entry per manual page. List of tuples
143 | # (source start file, name, description, authors, manual section).
144 | man_pages = [
145 |     (master_doc, 'uncurl', u'UNCURL Documentation',
146 |      [author], 1)
147 | ]
148 | 
149 | 
150 | # -- Options for Texinfo output -------------------------------------------
151 | 
152 | # Grouping the document tree into Texinfo files. List of tuples
153 | # (source start file, target name, title, author,
154 | #  dir menu entry, description, category)
155 | texinfo_documents = [
156 |     (master_doc, 'UNCURL', u'UNCURL Documentation',
157 |      author, 'UNCURL', 'One line description of project.',
158 |      'Miscellaneous'),
159 | ]
160 | 
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. UNCURL documentation master file, created by
 2 |    sphinx-quickstart on Mon Mar 27 13:42:21 2017.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to UNCURL's documentation!
 7 | ==================================
 8 | 
 9 | 
10 | .. toctree::
11 |    :maxdepth: 2
12 |    :caption: Contents:
13 | 
14 |    readme_link
15 |    unsupported_methods
16 |    things_we_tried
17 |    uncurl
18 | 
19 |  
20 | 
21 | Indices and tables
22 | ==================
23 | 
24 | * :ref:`genindex`
25 | * :ref:`modindex`
26 | * :ref:`search`
27 | 


--------------------------------------------------------------------------------
/docs/readme_link.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 


--------------------------------------------------------------------------------
/docs/things_we_tried.rst:
--------------------------------------------------------------------------------
 1 | Non-default parameters: things we tried and their results
 2 | ================================================
 3 | 
 4 | There are a number of uncurl parameters (well, not necessarily parameters, more like... run configurations?) that we experimented with. Here are some results.
 5 | 
 6 | 
 7 | Cell normalization
 8 | ------------------
 9 | 
10 | This option involves normalizing the cells by their read counts. First, we calculate the total read count of each cell, and divide all counts for cell i by its total read count. Then, we find the median total read count over all cells, and multiply the entire matrix by that value. This method has been used previously for scRNA-seq datasets [see paper for reference].
11 | 
12 | The clustering performance after cell normalization were substantially better on count-valued datasets, and either had no effect or were marginally worse on RPKM-normalized and other forms of data that have already been normalized in some other way. So we would suggest using this option for unnormalized count-valued datasets. The downside is that it might lose some information (if certain cell types were correlated to larger read counts?), but I'm not sure if that happens in practice.
13 | 
14 | [TODO: include graphs]
15 | 
16 | To use this option, run ``data_normalized = uncurl.preprocessing.cell_normalize(data)``, and run uncurl on ``data_normalized``.
17 | 
18 | 
19 | Constrained W
20 | -------------
21 | 
22 | When this option is activated, the ``W`` matrix is normalized so that its columns sum to 1 after each round of alternating minimization. Without this option, ``W`` is only constrained to be nonnegative during the optimization process, and normalized after the end of the optimization.
23 | 
24 | In clustering experiments, this option had mixed results. It performed marginally better on some datasets and marginally worse on others. On the 10X datasets, constrained W performed slightly better when combined with cell normalization, and worse without cell normalization.
25 | 
26 | [TODO: include graphs]
27 | 
28 | To use this option, add the argument ``constrain_w=True`` to ``run_state_estimation`` or ``poisson_estimate_state``. This does not work for the NMF-based methods.
29 | 
30 | 
31 | Uncurl initialization options
32 | -----------------------------
33 | 
34 | We provide a variety of initialization options for uncurl. Most initialization methods first perform a clustering, initialize M based on the cluster means, and W based on the cluster assignments. The default initialization is based on truncated SVD followed by K-means. We also provide initializations based on Poisson clustering, and Poisson k-means++ with randomized W. 
35 | 
36 | In clustering experiments, truncated SVD initialization usually performed the best, but there were some datasets under which Poisson clustering initialization performed better. For example, on randomly downsampled data, Poisson clustering initialization seems to perform better.
37 | 
38 | To use different initializations, use the argument ``initialization=<method>``, where ``<method>`` can be one of ``tsvd`` (truncated SVD + K-means), ``cluster`` (Poisson clustering), ``kmpp`` (Poisson k-means++), or ``km`` (k-means on the full data).
39 | 
40 | 
41 | Alternative to QualNorm: mean-normalized initialization
42 | -------------------------------------------------------
43 | 
44 | Given prior gene expression data, there are a variety of methods for initializing uncurl. ``QualNorm`` is one way of doing this initialization. Another way, when we have real-valued prior data, we could normalize the prior data so that each cell type sums to 1, and then multiply that by the mean per-cell read count of the actual data.
45 | 
46 | This performed better than QualNorm on sparse datasets such as the 10X datasets.
47 | 
48 | 
49 | Optimization methods
50 | --------------------
51 | 
52 | The default optimization method for Poisson state estimation is NoLips [see paper for reference].
53 | 
54 | Before settling on NoLips as a default, we also tried a variety of different optimization methods. The first was L-BFGS, as implemented in scipy. We also tried gradient descent, stochastic gradient descent, and a custom method based on alternating iteratively reweighted least squares on a Poisson regression model. These methods are not included in the uncurl package because they had poor performance characteristics compared to NoLips. We settled on NoLips because it was easy to port to sparse matrices and was easily parallelizable. L-BFGS tends to converge in fewer iterations, but the per-iteration time for NoLips is much less: it has closed-form updates that don't require gradient or objective value calculations, and the updates take advantage of data sparsity.
55 | 
56 | To use different optimization methods, use the argument ``method=<method>``, where ``<method>`` can be either ``NoLips`` (default) or ``L-BFGS-B``.
57 | 


--------------------------------------------------------------------------------
/docs/uncurl.rst:
--------------------------------------------------------------------------------
  1 | uncurl package
  2 | ==============
  3 | 
  4 | Submodules
  5 | ----------
  6 | 
  7 | uncurl.preprocessing module
  8 | ---------------------------
  9 | 
 10 | .. automodule:: uncurl.preprocessing
 11 |     :members:
 12 |     :undoc-members:
 13 |     :show-inheritance:
 14 | 
 15 | uncurl.run_se module
 16 | ------------------------------
 17 | 
 18 | .. automodule:: uncurl.run_se
 19 |     :members:
 20 |     :undoc-members:
 21 |     :show-inheritance:
 22 | 
 23 | uncurl.state_estimation module
 24 | ------------------------------
 25 | 
 26 | .. automodule:: uncurl.state_estimation
 27 |     :members:
 28 |     :undoc-members:
 29 |     :show-inheritance:
 30 | 
 31 | uncurl.nmf_wrapper module
 32 | ------------------------------
 33 | 
 34 | .. automodule:: uncurl.nmf_wrapper
 35 |     :members:
 36 |     :undoc-members:
 37 |     :show-inheritance:
 38 | 
 39 | uncurl.qual2quant module
 40 | ------------------------
 41 | 
 42 | .. automodule:: uncurl.qual2quant
 43 |     :members:
 44 |     :undoc-members:
 45 |     :show-inheritance:
 46 | 
 47 | uncurl.clustering module
 48 | ------------------------
 49 | 
 50 | .. automodule:: uncurl.clustering
 51 |     :members:
 52 |     :undoc-members:
 53 |     :show-inheritance:
 54 | 
 55 | uncurl.dimensionality_reduction module
 56 | ------------------------
 57 | 
 58 | .. automodule:: uncurl.dimensionality_reduction
 59 |     :members:
 60 |     :undoc-members:
 61 |     :show-inheritance:
 62 | 
 63 | uncurl.evaluation module
 64 | ------------------------
 65 | 
 66 | .. automodule:: uncurl.evaluation
 67 |     :members:
 68 |     :undoc-members:
 69 |     :show-inheritance:
 70 | 
 71 | uncurl.experiment_runner module
 72 | ------------------------
 73 | 
 74 | .. automodule:: uncurl.experiment_runner
 75 |     :members:
 76 |     :undoc-members:
 77 |     :show-inheritance:
 78 | 
 79 | uncurl.lineage module
 80 | ---------------------
 81 | 
 82 | .. automodule:: uncurl.lineage
 83 |     :members:
 84 |     :undoc-members:
 85 |     :show-inheritance:
 86 | 
 87 | uncurl.nb_cluster module
 88 | ------------------------
 89 | 
 90 | .. automodule:: uncurl.nb_cluster
 91 |     :members:
 92 |     :undoc-members:
 93 |     :show-inheritance:
 94 | 
 95 | uncurl.nb_state_estimation module
 96 | ---------------------------------
 97 | 
 98 | .. automodule:: uncurl.nb_state_estimation
 99 |     :members:
100 |     :undoc-members:
101 |     :show-inheritance:
102 | 
103 | uncurl.pois_ll module
104 | ---------------------
105 | 
106 | .. automodule:: uncurl.pois_ll
107 |     :members:
108 |     :undoc-members:
109 |     :show-inheritance:
110 | 
111 | uncurl.simulation module
112 | ------------------------
113 | 
114 | .. automodule:: uncurl.simulation
115 |     :members:
116 |     :undoc-members:
117 |     :show-inheritance:
118 | 
119 | 
120 | Module contents
121 | ---------------
122 | 
123 | .. automodule:: uncurl
124 |     :members:
125 |     :undoc-members:
126 |     :show-inheritance:
127 | 


--------------------------------------------------------------------------------
/docs/uncurl_pub.rst:
--------------------------------------------------------------------------------
 1 | UNCURL public functions
 2 | =======================
 3 | 
 4 | uncurl.max_variance_genes
 5 | -----------------
 6 | 
 7 | .. autofunction:: uncurl.max_variance_genes
 8 | 
 9 | uncurl.qualNorm
10 | -----------------
11 | 
12 | .. autofunction:: uncurl.qualNorm
13 | 
14 | uncurl.poisson_cluster
15 | ----------------------
16 | 
17 | .. autofunction:: uncurl.poisson_cluster
18 | 
19 | uncurl.nb_cluster
20 | -----------------
21 | 
22 | .. autofunction:: uncurl.nb_cluster
23 | 
24 | uncurl.poisson_estimate_state
25 | -----------------------------
26 | 
27 | .. autofunction:: uncurl.poisson_estimate_state
28 | 
29 | uncurl.nb_estimate_state
30 | -----------------------------
31 | 
32 | .. autofunction:: uncurl.nb_estimate_state
33 | 
34 | uncurl.mds
35 | -----------------
36 | 
37 | .. autofunction:: uncurl.mds
38 | 
39 | uncurl.lineage
40 | --------------
41 | 
42 | .. autofunction:: uncurl.lineage
43 | 
44 | uncurl.pseudotime
45 | --------------
46 | 
47 | .. autofunction:: uncurl.pseudotime
48 | 


--------------------------------------------------------------------------------
/docs/unsupported_methods.rst:
--------------------------------------------------------------------------------
 1 | Details on unsupported methods
 2 | ==============================
 3 | 
 4 | There are a number of unsupported or experimental methods part of the UNCURL package. We provide information on them here for the sake of completeness but cannot vouch for their correctness.
 5 | 
 6 | Alternative state estimation methods
 7 | ------------------------------------
 8 | 
 9 | We provide implementations of the convex mixture model for the negative binomial (NB) and zero-inflated Poisson (ZIP) distributions. In our experiments they did not work as well as the Poisson model on most datasets, and are substantially less efficient.
10 | 
11 | We also provide methods based on LDA (latent Dirichlet allocation), using the LightLDA implementation. The outputs of these methods can be interpreted as state estimation with a binomial sampling distribution. See ``lightlda_utils.py``. In practice, they had worse performance than Poisson state estimation in accuracy, runtime, and memory usage, especially on larger datasets.
12 | 
13 | Alternative clustering methods
14 | ------------------------------
15 | 
16 | As with state estimation, we provide NB and ZIP versions of k-means. The same efficiency considerations apply.
17 | 
18 | Dimensionality reduction
19 | ------------------------
20 | 
21 | The ``mds`` function performs dimensionality reduction using MDS. This works by running MDS on M to convert it into a projection matrix, and then using that matrix to project W onto 2d space. This is much faster than tSNE or even PCA, at the cost of some fidelity, but it might work as a first pass.
22 | 
23 | Example:
24 | 
25 | .. code-block:: python
26 | 
27 |     import numpy as np
28 |     from uncurl import mds, dim_reduce_data
29 | 
30 |     data = np.loadtxt('counts.txt')
31 | 
32 |     # dimensionality reduction using MDS on state estimation means
33 |     M, W, ll = poisson_estimate_state(data, 4)
34 |     # proj is a 2d projection of the data.
35 |     proj = mds(M, W, 2)
36 | 
37 | 
38 | Lineage estimation
39 | ------------------
40 | 
41 | The ``lineage`` function performs lineage estimation from the output of ``poisson_estimate_state``. It fits the data to a different 5th degree polynomial for each cell type.
42 | 
43 | The ``pseudotime`` function calculates the pseudotime for each cell given the output of ``lineage`` and a starting cell.
44 | 
45 | Example (including visualization):
46 | 
47 | .. code-block:: python
48 | 
49 |     import numpy as np
50 |     import matplotlib.pyplot as plt
51 | 
52 |     from uncurl import poisson_estimate_state, mds, lineage, pseudotime
53 | 
54 |     data = np.loadtxt('counts.txt')
55 | 
56 |     # pretend that there are three natural clusters in the dataset.
57 |     M, W = poisson_estimate_state(data, 3)
58 | 
59 |     curve_params, smoothed_points, edges, cell_assignments = lineage(M, W)
60 | 
61 |     # assume the "root" is cell 0
62 |     ptime = pseudotime(0, edges, smoothed_points)
63 | 
64 |     # visualizing the lineage
65 |     proj = mds(M, W, 2)
66 | 
67 |     plt.scatter(proj[0,:], proj[1,:], s=10, c=cell_assignments, edgecolors='none', alpha=0.7)
68 |     plt.scatter(smoothed_points[0,:], smoothed_points[1,:], s=30, c=cell_assignments, edgecolors='none', alpha=0.7)
69 |     # connect the lines
70 |     for edge in edges:
71 |         plt.plot((smoothed_points[0, edge[0]], smoothed_points[0, edge[1]]),
72 |                  (smoothed_points[1, edge[0]], smoothed_points[1, edge[1]]), 'black', linewidth=2)
73 |     plt.xlabel('dim 1')
74 |     plt.ylabel('dim 2')
75 | 
76 | 
77 | Ensemble Methods
78 | ----------------
79 | 
80 | Consensus clustering, consensus clustering-based initialization for uncurl, etc. This requires the `Cluster_Ensembles package <https://github.com/GGiecold/Cluster_Ensembles>`_.
81 | 
82 | 
83 | Visualization
84 | -------------
85 | 
86 | see ``vis.py``
87 | 


--------------------------------------------------------------------------------
/examples/example.py:
--------------------------------------------------------------------------------
 1 | from scipy.io import loadmat
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | import uncurl
 6 | from uncurl.evaluation import purity
 7 | 
 8 | if __name__ == '__main__':
 9 |     dat = loadmat('data/SCDE_test.mat')
10 |     data = dat['dat'].toarray()
11 |     centers, assignments = uncurl.kmeans_pp(data, 2)
12 |     lls = uncurl.poisson_ll(data, centers)
13 |     # Poisson clustering
14 |     assignments_poisson, centers = uncurl.poisson_cluster(data, 2, init=centers)
15 |     # NB clustering
16 |     assignments_nb, P, R = uncurl.nb_cluster(data, 2)
17 |     # ZIP clustering
18 |     assignments_zip, M, L = uncurl.zip_cluster(data, 2)
19 |     true_labs = dat['Lab'][0]
20 |     print 'poisson purity:', purity(assignments_poisson, true_labs)
21 |     print 'NB purity:', purity(assignments_nb, true_labs)
22 |     print 'ZIP purity:', purity(assignments_zip, true_labs)
23 |     # State estimation
24 |     means, weights, ll = uncurl.poisson_estimate_state(data, 2, disp=False)
25 |     w_classes = weights.argmax(0)
26 |     print 'W argmax purity:', purity(w_classes, true_labs)
27 |     # dimensionality reduction
28 |     X = uncurl.dim_reduce(means, weights, 2)
29 |     proj = np.dot(X, weights)
30 |     # plotting dimensionality reduction
31 |     plt.cla()
32 |     # weight plot
33 |     plt.title('Dimensionality reduction plot - assigned weight labels')
34 |     plt.scatter(proj[0,:], proj[1,:], s=100, cmap='seismic', c=weights[0,:])
35 |     plt.xlabel('dim 1')
36 |     plt.ylabel('dim 2')
37 |     plt.savefig('dat.png')
38 |     plt.cla()
39 |     # Poisson cluster plot
40 |     plt.title('Dimensionality reduction plot - Poisson clustering labels')
41 |     plt.scatter(proj[0,:], proj[1,:], s=100, cmap='seismic', c=assignments_poisson)
42 |     plt.xlabel('dim 1')
43 |     plt.ylabel('dim 2')
44 |     plt.savefig('poisson_cluster_dat.png')
45 |     plt.cla()
46 |     # NB cluster plot
47 |     plt.title('Dimensionality reduction plot - NB clustering labels')
48 |     plt.scatter(proj[0,:], proj[1,:], s=100, cmap='seismic', c=assignments_nb)
49 |     plt.xlabel('dim 1')
50 |     plt.ylabel('dim 2')
51 |     plt.savefig('nb_cluster_dat.png')
52 |     plt.cla()
53 |     # ZIP cluster plot
54 |     plt.title('Dimensionality reduction plot - ZIP clustering labels')
55 |     plt.scatter(proj[0,:], proj[1,:], s=100, cmap='seismic', c=assignments_zip)
56 |     plt.xlabel('dim 1')
57 |     plt.ylabel('dim 2')
58 |     plt.savefig('zip_cluster_dat.png')
59 |     plt.cla()
60 |     # true label plot
61 |     plt.title('Dimensionality reduction plot - true labels')
62 |     plt.scatter(proj[0,:], proj[1,:], cmap='bwr', s=100, alpha=0.7, c=dat['Lab'])
63 |     plt.xlabel('dim 1')
64 |     plt.ylabel('dim 2')
65 |     plt.savefig('labels.png')
66 | 


--------------------------------------------------------------------------------
/examples/lineage_example.py:
--------------------------------------------------------------------------------
 1 | from scipy.io import loadmat
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | import uncurl
 6 | from uncurl.lineage import fourier_series
 7 | 
 8 | if __name__ == '__main__':
 9 |     dat = loadmat('data/BranchedSynDat.mat')
10 |     data = dat['Dat'].astype(float)
11 |     # Poisson clustering
12 |     assignments, centers = uncurl.poisson_cluster(data, 3)
13 |     # State estimation
14 |     means, weights, ll = uncurl.run_state_estimation(data, 3)
15 |     #means, weights = np.load('means_weights.npy')
16 |     # dimensionality reduction
17 |     X = uncurl.dim_reduce(means, weights, 2)
18 |     proj = np.dot(X.T, weights)
19 |     cluster_curves, cluster_fitted_vals, cluster_edges, cluster_assignments = uncurl.run_lineage(means, weights, curve_function='poly')
20 |     # dimensionality reduction with true data
21 |     true_weights = dat['X']
22 |     true_means = dat['M']
23 |     X = uncurl.dim_reduce(true_means, true_weights, 2)
24 |     proj_true = np.dot(X.T, true_weights)
25 |     true_curves, true_fitted, true_edges, true_assignments = uncurl.run_lineage(true_means, true_weights)
26 |     # plotting dimensionality reduction, fitted curves
27 |     plt.clf()
28 |     plt.cla()
29 |     plt.title('Dimensionality reduction plot')
30 |     plt.scatter(proj[0,:], proj[1,:], s=30, c=weights.argmax(0), edgecolors='none', alpha=0.7)
31 |     plt.scatter(cluster_fitted_vals[0,:], cluster_fitted_vals[1,:], s=30, c=weights.argmax(0), edgecolors='none', alpha=0.7)
32 |     # connect the lines
33 |     for edge in cluster_edges:
34 |         plt.plot((cluster_fitted_vals[0, edge[0]], cluster_fitted_vals[0, edge[1]]),
35 |                 (cluster_fitted_vals[1, edge[0]], cluster_fitted_vals[1, edge[1]]), 'black', linewidth=2)
36 |     plt.xlabel('dim 1')
37 |     plt.ylabel('dim 2')
38 |     plt.savefig('branching_dim_reduce_fitted_poly.png')
39 |     plt.cla()
40 |     # true label plot
41 |     """
42 |     plt.title('Dimensionality reduction plot - true labels')
43 |     plt.scatter(proj[0,:], proj[1,:], cmap='bwr', s=100, alpha=0.7, c=dat['Lab'])
44 |     plt.xlabel('dim 1')
45 |     plt.ylabel('dim 2')
46 |     plt.savefig('labels.png')
47 |     """
48 | 


--------------------------------------------------------------------------------
/examples/means_weights.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yjzhang/uncurl_python/0113ba6ca874549ac1d760ef961dd6e82ebfcc67/examples/means_weights.npy


--------------------------------------------------------------------------------
/examples/synthetic_example.py:
--------------------------------------------------------------------------------
 1 | from scipy.io import loadmat
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | import uncurl
 6 | 
 7 | if __name__ == '__main__':
 8 |     dat = loadmat('data/SynMouseESprog_1000.mat')
 9 |     data = dat['Dat'].toarray()
10 |     centers, assignments = uncurl.kmeans_pp(data, 2)
11 |     lls = uncurl.poisson_ll(data, centers)
12 |     # Poisson clustering
13 |     assignments, centers = uncurl.poisson_cluster(data, 3)
14 |     # State estimation
15 |     means, weights, ll = uncurl.poisson_estimate_state(data, 3, max_iters=5)
16 |     # dimensionality reduction
17 |     X = uncurl.dim_reduce(means, weights, 2)
18 |     proj = np.dot(X.T, weights)
19 |     # plotting dimensionality reduction
20 |     plt.cla()
21 |     # weight plot
22 |     plt.title('Dimensionality reduction plot - assigned weight labels')
23 |     plt.scatter(proj[0,:], proj[1,:], s=100, c=weights.argmax(0))
24 |     plt.xlabel('dim 1')
25 |     plt.ylabel('dim 2')
26 |     plt.savefig('synthetic_dim_reduce.png')
27 |     plt.cla()
28 |     # true label plot
29 |     """
30 |     plt.title('Dimensionality reduction plot - true labels')
31 |     plt.scatter(proj[0,:], proj[1,:], cmap='bwr', s=100, alpha=0.7, c=dat['Lab'])
32 |     plt.xlabel('dim 1')
33 |     plt.ylabel('dim 2')
34 |     plt.savefig('labels.png')
35 |     """
36 | 


--------------------------------------------------------------------------------
/examples/zeisel_subset_example.py:
--------------------------------------------------------------------------------
 1 | from scipy.io import loadmat
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from scipy import sparse
 5 | from sklearn.manifold import TSNE
 6 | 
 7 | import uncurl
 8 | from uncurl.sparse_utils import symmetric_kld
 9 | from uncurl.vis import visualize_dim_red
10 | 
11 | # note: this whole script should finish in under a few minutes.
12 | 
13 | if __name__ == '__main__':
14 | 
15 |     # 1. load data - 753 cells, 19971 genes
16 |     dat = loadmat('data/GSE60361_dat.mat')
17 |     data = dat['Dat']
18 |     true_labels = dat['ActLabs'].flatten()
19 |     data_csc = sparse.csc_matrix(data)
20 | 
21 |     # 2. gene selection
22 |     genes = uncurl.max_variance_genes(data_csc, nbins=5, frac=0.2)
23 |     data_subset = data_csc[genes,:]
24 | 
25 |     # 3. state estimation
26 |     k = 7 # number of clusters to use
27 |     M, W, ll = uncurl.poisson_estimate_state(data_subset, k)
28 |     argmax_labels = W.argmax(0)
29 | 
30 |     # 4. visualization
31 | 
32 |     # mds visualization
33 |     mds_proj = uncurl.mds(M, W, 2)
34 |     visualize_dim_red(mds_proj, true_labels, 'GSE60361_mds_true_labels.png', title='MDS', figsize=(12,7), alpha=0.5)
35 | 
36 |     # tsne visualization
37 |     tsne = TSNE(2, metric=symmetric_kld)
38 |     tsne_w = tsne.fit_transform(W.T)
39 |     # plot using true labels
40 |     visualize_dim_red(tsne_w.T, true_labels, 'GSE60361_tsne_true_labels.png', title='TSNE(W)', figsize=(12,7), alpha=0.5)
41 |     # plot using assigned labels
42 |     visualize_dim_red(tsne_w.T, argmax_labels, 'GSE60361_tsne_argmax_labels.png', title='TSNE(W)', figsize=(12,7), alpha=0.5)
43 | 


--------------------------------------------------------------------------------
/optional_requirements.txt:
--------------------------------------------------------------------------------
1 | Cluster-Ensembles
2 | matplotlib
3 | SIMLR
4 | 


--------------------------------------------------------------------------------
/push-docs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # run this from the master branch to build/push documentation
 4 | 
 5 | cd docs
 6 | make html
 7 | git add _build
 8 | git commit
 9 | 
10 | cd ..
11 | 
12 | git subtree push --prefix docs/_build/html origin gh-pages
13 | 
14 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cython>=0.27
2 | numpy>=1.12
3 | scipy>=0.19
4 | scikit-learn>=0.19
5 | matplotlib
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from distutils.extension import Extension
 3 | from Cython.Build import cythonize
 4 | import numpy
 5 | 
 6 | #directive_defaults['linetrace'] = True
 7 | #directive_defaults['binding'] = True
 8 | 
 9 | extensions = [
10 |         Extension('uncurl.nolips', ['uncurl/nolips.pyx'],
11 |             extra_compile_args=['-O3', '-ffast-math']),
12 |         Extension('uncurl.sparse_utils', ['uncurl/sparse_utils.pyx'],
13 |             extra_compile_args=['-O3', '-ffast-math'])
14 |         ]
15 | 
16 | parallel_extensions = [
17 |         Extension('uncurl.nolips_parallel', ['uncurl/nolips_parallel.pyx'],
18 |             extra_compile_args=['-O3', '-ffast-math', '-fopenmp'],
19 |             extra_link_args=['-fopenmp'])
20 |         ]
21 | 
22 | long_description = ''
23 | with open('README.rst') as f:
24 |     long_description = f.read()
25 | 
26 | setup(name='uncurl_seq',
27 |       version='0.2.16',
28 |       description='Tool for pre-processing single-cell RNASeq data',
29 |       long_description=long_description,
30 |       long_description_content_type='text/plain',
31 |       url='https://github.com/yjzhang/uncurl_python',
32 |       author='Yue Zhang',
33 |       author_email='yjzhang@cs.washington.edu',
34 |       license='MIT',
35 |       include_dirs=[numpy.get_include()],
36 |       ext_modules = cythonize(extensions + parallel_extensions),
37 |       packages=find_packages("."),
38 |       install_requires=[
39 |           'numpy',
40 |           'scipy',
41 |           'cython',
42 |           'scikit-learn',
43 |       ],
44 |       test_suite='nose.collector',
45 |       tests_require=['nose', 'flaky'],
46 |       classifiers=[
47 |           'Development Status :: 3 - Alpha',
48 |           'Topic :: Scientific/Engineering :: Bio-Informatics',
49 |           'License :: OSI Approved :: MIT License',
50 |           'Programming Language :: Python :: 2.7',
51 |           'Programming Language :: Python :: 3.5',
52 |       ],
53 |       zip_safe=False)
54 | 


--------------------------------------------------------------------------------
/tests/test_cluster.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import unittest
  4 | from unittest import TestCase
  5 | from flaky import flaky
  6 | 
  7 | import numpy as np
  8 | from scipy.io import loadmat
  9 | 
 10 | import uncurl
 11 | from uncurl.simulation import generate_poisson_data, generate_zip_data
 12 | from uncurl.evaluation import purity
 13 | from uncurl.zip_clustering import zip_fit_params_mle
 14 | 
 15 | @flaky(max_runs=3)
 16 | class ClusterTest(TestCase):
 17 | 
 18 |     def setUp(self):
 19 |         self.dat = loadmat('data/SCDE_k2_sup.mat')
 20 | 
 21 |     def test_kmeans_pp(self):
 22 |         data = self.dat['Dat']
 23 |         genes, cells = data.shape
 24 |         centers, assignments = uncurl.kmeans_pp(data, 3)
 25 |         self.assertEqual(centers.shape[0], genes)
 26 |         self.assertEqual(centers.shape[1], 3)
 27 |         # the center assignments are nondeterministic so...
 28 |         self.assertFalse(np.equal(centers[:,0], centers[:,1]).all())
 29 |         self.assertFalse(np.equal(centers[:,1], centers[:,2]).all())
 30 | 
 31 |     def test_cluster(self):
 32 |         data = self.dat['Dat']
 33 |         assignments, centers = uncurl.poisson_cluster(data, 3)
 34 |         self.assertEqual(assignments.shape[0], data.shape[1])
 35 |         self.assertEqual(centers.shape[0], data.shape[0])
 36 |         # just checking that the values are valid
 37 |         self.assertFalse(np.isnan(centers).any())
 38 | 
 39 |     def test_simulation(self):
 40 |         """
 41 |         Basically this is to test that the Poisson EM can correctly separate
 42 |         clusters in simulated data.
 43 |         """
 44 |         centers = np.array([[1,10,20], [1, 11, 1], [50, 1, 100]])
 45 |         centers = centers.astype(float)
 46 |         data, labs = generate_poisson_data(centers, 500)
 47 |         data = data.astype(float)
 48 |         assignments, c_centers = uncurl.poisson_cluster(data, 3)
 49 |         distances = np.zeros((3,3))
 50 |         for i in range(3):
 51 |             for j in range(3):
 52 |                 distances[i,j] = uncurl.poisson_dist(centers[:,i], c_centers[:,j])
 53 |         self.assertTrue(purity(assignments, labs) > 0.8)
 54 | 
 55 |     @flaky(max_runs=3)
 56 |     @unittest.skip('zip methods are unsupported')
 57 |     def test_zip_simulation(self):
 58 |         """
 59 |         ZIP clustering on poisson-simulated data
 60 |         """
 61 |         centers = np.array([[0.1,10,20], [0.1, 11, 0.1], [50, 0.1, 100]])
 62 |         centers = centers.astype(float)
 63 |         data, labs = generate_poisson_data(centers, 500)
 64 |         data = data.astype(float)
 65 |         assignments, c_centers, c_zeros = uncurl.zip_cluster(data, 3)
 66 |         self.assertTrue(purity(assignments, labs) > 0.8)
 67 | 
 68 |     @flaky(max_runs=3)
 69 |     @unittest.skip('zip methods are unsupported')
 70 |     def test_zip_fit(self):
 71 |         """
 72 |         Tests the algorithm for fitting a ZIP distribution.
 73 |         """
 74 |         for i in range(10):
 75 |             centers = np.random.randint(10, 1000, (3,1))
 76 |             M = np.random.random((3,1))
 77 |             data, labs = generate_zip_data(centers, M, 300)
 78 |             L_, M_ = zip_fit_params_mle(data)
 79 |             self.assertFalse(np.isnan(L_).any())
 80 |             self.assertFalse(np.isnan(M_).any())
 81 |             self.assertFalse(np.isnan(L_).any())
 82 |             self.assertFalse(np.isnan(M_).any())
 83 |             self.assertTrue(np.mean(np.abs(M.flatten() - M_)) < 0.2)
 84 |             self.assertTrue(np.mean(np.abs(centers.flatten() - L_)) < 10)
 85 | 
 86 |     @flaky(max_runs=3)
 87 |     @unittest.skip('zip methods are unsupported')
 88 |     def test_zip_simulation_2(self):
 89 |         """
 90 |         ZIP clustering on ZIP-simulated data
 91 |         """
 92 |         centers = np.random.randint(10, 1000, (3,3))
 93 |         L = np.random.random((3,3))
 94 |         print(centers)
 95 |         print(L)
 96 |         centers = centers.astype(float)
 97 |         data, labs = generate_zip_data(centers, L, 1000)
 98 |         data = data.astype(float)
 99 |         print(data)
100 |         assignments, c_centers, c_zeros = uncurl.zip_cluster(data, 3)
101 |         distances = np.zeros((3,3))
102 |         for i in range(3):
103 |             for j in range(3):
104 |                 distances[i,j] = uncurl.poisson_dist(centers[:,i], c_centers[:,j])
105 |         print(c_centers)
106 |         print(c_zeros)
107 |         print(purity(assignments, labs))
108 |         self.assertTrue(purity(assignments, labs) > 0.6)
109 |         #self.assertFalse(correspond[0]==correspond[1])
110 |         #self.assertFalse(correspond[1]==correspond[2])
111 | 


--------------------------------------------------------------------------------
/tests/test_cluster_sparse.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | from unittest import TestCase
 4 | from flaky import flaky
 5 | 
 6 | import numpy as np
 7 | from scipy.io import loadmat
 8 | from scipy import sparse
 9 | 
10 | import uncurl
11 | from uncurl.simulation import generate_poisson_data
12 | from uncurl.evaluation import purity
13 | 
14 | @flaky
15 | class SparseClusterTest(TestCase):
16 | 
17 |     def setUp(self):
18 |         dat = loadmat('data/SCDE_k2_sup.mat')
19 |         self.data = sparse.csc_matrix(dat['Dat'])
20 |         self.labs = dat['Lab'].flatten()
21 | 
22 |     def test_kmeans_pp(self):
23 |         data = self.data
24 |         genes, cells = data.shape
25 |         centers, assignments = uncurl.kmeans_pp(data, 3)
26 |         self.assertEqual(centers.shape[0], genes)
27 |         self.assertEqual(centers.shape[1], 3)
28 |         # the center assignments are nondeterministic so...
29 |         self.assertFalse(np.equal(centers[:,0], centers[:,1]).all())
30 |         self.assertFalse(np.equal(centers[:,1], centers[:,2]).all())
31 | 
32 |     def test_cluster(self):
33 |         data = self.data
34 |         assignments, centers = uncurl.poisson_cluster(data, 2)
35 |         self.assertEqual(assignments.shape[0], data.shape[1])
36 |         self.assertEqual(centers.shape[0], data.shape[0])
37 |         # just checking that the values are valid
38 |         self.assertFalse(np.isnan(centers).any())
39 |         self.assertTrue(purity(assignments, self.labs) > 0.8)
40 | 
41 |     def test_simulation(self):
42 |         """
43 |         Basically this is to test that the Poisson EM can correctly separate
44 |         clusters in simulated data.
45 |         """
46 |         centers = np.array([[1,10,20], [1, 11, 1], [50, 1, 100]])
47 |         centers = centers.astype(float)
48 |         data, labs = generate_poisson_data(centers, 500)
49 |         data = data.astype(float)
50 |         data = sparse.csc_matrix(data)
51 |         assignments, c_centers = uncurl.poisson_cluster(data, 3)
52 |         distances = np.zeros((3,3))
53 |         for i in range(3):
54 |             for j in range(3):
55 |                 distances[i,j] = uncurl.poisson_dist(centers[:,i], c_centers[:,j])
56 |         print(assignments)
57 |         print(labs)
58 |         print(purity(assignments, labs))
59 |         self.assertTrue(purity(assignments, labs) > 0.65)
60 | 
61 | 


--------------------------------------------------------------------------------
/tests/test_dim_reduce.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | import numpy as np
 4 | 
 5 | from uncurl import simulation, dim_reduce, dim_reduce_data, mds
 6 | 
 7 | class DimReduceTest(TestCase):
 8 | 
 9 |     def setUp(self):
10 |         pass
11 | 
12 |     def test_dim_reduce(self):
13 |         """
14 |         Test dimensionality reduction using sample data
15 |         """
16 |         sim_means = np.array([[20.,30.,1.],
17 |                               [10.,3.,8.],
18 |                               [90.,50.,20.],
19 |                               [10.,4.,30.]])
20 |         sim_assignments = np.array([[0.1,0.2,0.3,0.4,0.5,0.1,0.8],
21 |                                     [0.5,0.3,0.2,0.4,0.2,0.2,0.1],
22 |                                     [0.4,0.5,0.5,0.2,0.3,0.7,0.1]])
23 |         sim_data = simulation.generate_state_data(sim_means, sim_assignments)
24 |         sim_data = sim_data + 1e-8
25 |         X = dim_reduce(sim_means, sim_assignments, 2)
26 |         self.assertEqual(X.shape, (3, 2))
27 |         X2 = dim_reduce_data(sim_data, 2)
28 |         self.assertEqual(X2.shape, (sim_data.shape[1], 2))
29 |         projections = np.dot(X.transpose(), sim_assignments)
30 |         mds_proj = mds(sim_means, sim_assignments, 2)
31 |         self.assertTrue(np.abs(mds_proj - projections).sum() < 1e-6)
32 |         # assert something about the distances???
33 |         # 1-NN based error?
34 | 


--------------------------------------------------------------------------------
/tests/test_experiment_runner.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | from unittest import TestCase
 4 | 
 5 | import numpy as np
 6 | from scipy import sparse
 7 | from scipy.io import loadmat
 8 | 
 9 | import uncurl
10 | 
11 | class ExperimentRunnerTest(TestCase):
12 |     # TODO: test dataset
13 | 
14 |     def setUp(self):
15 |         dat = loadmat('data/SCDE_test.mat')
16 |         self.data = dat['dat'].toarray()[0:500, :]
17 |         self.data = sparse.csc_matrix(self.data)
18 |         self.labs = dat['Lab'][0]
19 | 
20 |     def test_run(self):
21 |         se = uncurl.experiment_runner.PoissonSE(clusters=2)
22 |         results, ll = se.run(self.data)
23 |         self.assertTrue(len(results)==1)
24 |         self.assertTrue(results[0].shape[0]==2)
25 | 
26 |     def test_runExperiment(self):
27 |         se = uncurl.experiment_runner.PoissonSE(clusters=2, max_iters=10, inner_max_iters=50)
28 |         argmax = uncurl.experiment_runner.Argmax(n_classes=2)
29 |         km = uncurl.experiment_runner.KM(n_classes=2)
30 |         methods = [(se, [argmax, km])]
31 |         results, names, other = uncurl.experiment_runner.run_experiment(methods, self.data, 2, self.labs, n_runs=2)
32 |         self.assertEqual(len(results), 2)
33 |         self.assertTrue('clusterings' in other)
34 |         self.assertTrue('timing' in other)
35 |         self.assertTrue('preprocessing' in other)
36 |         print(results)
37 |         self.assertTrue(results[0][0]>0.95)
38 | 
39 |     def test_runExperiment_2(self):
40 |         se = uncurl.experiment_runner.PoissonSE(clusters=2, max_iters=10, inner_max_iters=50)
41 |         pre = uncurl.experiment_runner.Preprocess()
42 |         argmax = uncurl.experiment_runner.Argmax(n_classes=2)
43 |         km = uncurl.experiment_runner.KM(n_classes=2)
44 |         pca_km = uncurl.experiment_runner.PcaKm(k=8, n_classes=2)
45 |         methods = [(se, [argmax, km]), (pre, [km, pca_km])]
46 |         results, names, other = uncurl.experiment_runner.run_experiment(methods, self.data, 2, self.labs, n_runs=2)
47 |         self.assertEqual(len(results), 2)
48 |         self.assertTrue('clusterings' in other)
49 |         self.assertTrue('timing' in other)
50 |         self.assertTrue('preprocessing' in other)
51 |         self.assertTrue(results[0][0]>0.95)
52 | 


--------------------------------------------------------------------------------
/tests/test_fit_dist.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest import TestCase
 3 | from flaky import flaky
 4 | 
 5 | import numpy as np
 6 | 
 7 | from scipy.io import loadmat
 8 | from scipy import sparse
 9 | 
10 | import uncurl
11 | from uncurl.simulation import generate_poisson_data
12 | from uncurl import fit_dist_data
13 | 
14 | @flaky(max_runs=4)
15 | class FitDistTest(TestCase):
16 | 
17 |     def setUp(self):
18 |         pass
19 | 
20 |     def testPoissonData(self):
21 |         """
22 |         Test with generated unimodal Poisson dataset.
23 |         """
24 |         centers = np.array([[1], [10], [50]])
25 |         centers = centers.astype(float)
26 |         data, labs = generate_poisson_data(centers, 500)
27 |         fit_errors = fit_dist_data.DistFitDataset(data)
28 |         self.assertTrue((fit_errors['poiss'] < fit_errors['norm']).all())
29 |         self.assertTrue((fit_errors['poiss'] < fit_errors['lognorm']).all())
30 | 
31 |     def testNormalData(self):
32 |         """
33 |         Test with generated unimodal Normal dataset.
34 |         """
35 |         centers = np.array([[100], [20], [50]])
36 |         variances = np.array([[1.0], [1.0], [5.0]])
37 |         centers = centers.astype(float)
38 |         data = np.random.normal(centers, variances, size=(3,500))
39 |         fit_errors = fit_dist_data.DistFitDataset(data)
40 |         self.assertTrue((fit_errors['poiss'] > fit_errors['norm']).all())
41 |         self.assertTrue((fit_errors['norm'] < fit_errors['lognorm']).all())
42 | 
43 |     @unittest.skip('still working on this')
44 |     def testLogNormalData(self):
45 |         """
46 |         Test with generated unimodal Log-Normal dataset.
47 |         """
48 |         centers = np.array([[-1.0], [0.0], [-2]])
49 |         variances = np.array([[2.0], [1.2], [1.5]])
50 |         centers = centers.astype(float)
51 |         data = np.random.lognormal(centers, variances, size=(3,500))
52 |         print(data.round())
53 |         print(data.round().max(1))
54 |         fit_errors = fit_dist_data.DistFitDataset(data)
55 |         print(fit_errors)
56 |         self.assertTrue((fit_errors['poiss'] > fit_errors['lognorm']).all())
57 |         self.assertTrue((fit_errors['norm'] > fit_errors['lognorm']).all())
58 | 
59 | 


--------------------------------------------------------------------------------
/tests/test_gap_score.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Using gap score to determine optimal cluster number
 3 | """
 4 | 
 5 | import unittest
 6 | from unittest import TestCase
 7 | from flaky import flaky
 8 | 
 9 | import numpy as np
10 | import scipy
11 | 
12 | from uncurl import gap_score
13 | 
14 | class GapScoreTest(TestCase):
15 | 
16 |     def setUp(self):
17 |         pass
18 | 
19 |     def test_gap_score(self):
20 |         data_mat = scipy.io.loadmat('data/10x_pooled_400.mat')
21 |         data = data_mat['data']
22 |         data_tsvd = gap_score.preproc_data(data, gene_subset=True)
23 |         max_k, gap_vals, sk_vals = gap_score.run_gap_k_selection(data_tsvd,
24 |                 k_min=1, k_max=50, skip=5, B=5)
25 |         # just test that the score is in a very broad range
26 |         self.assertTrue(max_k > 3)
27 |         self.assertTrue(max_k < 20)
28 | 
29 |     def test_gap_score_2(self):
30 |         data_mat = scipy.io.loadmat('data/GSE60361_dat.mat')
31 |         data = data_mat['Dat']
32 |         data_tsvd = gap_score.preproc_data(data, gene_subset=True)
33 |         max_k, gap_vals, sk_vals = gap_score.run_gap_k_selection(data_tsvd,
34 |                 k_min=1, k_max=50, skip=5, B=5)
35 |         self.assertTrue(max_k > 3)
36 |         self.assertTrue(max_k < 30)
37 | 
38 |     @flaky(max_runs=3)
39 |     def test_gap_score_3(self):
40 |         data_mat = scipy.io.loadmat('data/SCDE_test.mat')
41 |         data = data_mat['dat']
42 |         data_tsvd = gap_score.preproc_data(data, gene_subset=True)
43 |         max_k, gap_vals, sk_vals = gap_score.run_gap_k_selection(data_tsvd,
44 |                 k_min=1, k_max=50, skip=5, B=5)
45 |         self.assertTrue(max_k < 10)
46 | 
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     unittest.main()
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/tests/test_lineage.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | from flaky import flaky
 3 | 
 4 | import numpy as np
 5 | 
 6 | from uncurl import simulation, run_lineage, pseudotime
 7 | 
 8 | @flaky
 9 | class LineageTest(TestCase):
10 | 
11 |     def setUp(self):
12 |         pass
13 | 
14 |     def test_lineage(self):
15 |         """
16 |         Testing lineage using randomly generated lineage data
17 |         """
18 |         M, W = simulation.generate_poisson_lineage(3, 100, 50)
19 |         sim_data = simulation.generate_state_data(M, W)
20 |         sim_data = sim_data + 1e-8
21 |         m2 = M + np.random.random(M.shape) - 0.5
22 |         curves, fitted_vals, edges, assignments = run_lineage(m2, W)
23 |         # TODO: assert something about the distances???
24 |         print(len(edges))
25 |         adjacent_count = 0
26 |         for e in edges:
27 |             if np.abs(e[0]-e[1]) <= 1:
28 |                 adjacent_count += 1
29 |         self.assertTrue(adjacent_count>150)
30 | 
31 |     def test_pseudotime(self):
32 |         """
33 |         Test pseudotime calculations
34 |         """
35 |         M, W = simulation.generate_poisson_lineage(3, 100, 50)
36 |         sim_data = simulation.generate_state_data(M, W)
37 |         sim_data = sim_data + 1e-8
38 |         m2 = M + np.random.random(M.shape) - 0.5
39 |         curves, fitted_vals, edges, assignments = run_lineage(m2, W)
40 |         ptime = pseudotime(0, edges, fitted_vals)
41 |         # assert that the cells are generally increasing in ptime
42 |         # test each cluster
43 |         old_p = 0
44 |         for i in range(100):
45 |             p = ptime[i]
46 |             self.assertTrue(p >= old_p)
47 |             old_p = p
48 |         old_p = 0
49 |         for i in range(100, 200):
50 |             p = ptime[i]
51 |             self.assertTrue(p >= old_p)
52 |             self.assertTrue(p > 0)
53 |             old_p = p
54 |         old_p = 0
55 |         for i in range(200, 300):
56 |             p = ptime[i]
57 |             self.assertTrue(p >= old_p)
58 |             self.assertTrue(p > 0)
59 |             old_p = p
60 | 


--------------------------------------------------------------------------------
/tests/test_nb.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import unittest
  4 | from unittest import TestCase
  5 | from flaky import flaky
  6 | 
  7 | import numpy as np
  8 | 
  9 | from uncurl import nb_cluster, simulation
 10 | from uncurl.nb_clustering import nb_ll, nb_fit
 11 | from uncurl.evaluation import purity
 12 | 
 13 | 
 14 | @flaky
 15 | @unittest.skip('nb methods currently not supported')
 16 | class NBTest(TestCase):
 17 | 
 18 |     def setUp(self):
 19 |         self.p1 = np.array([1.,2.,3.])
 20 |         self.p2 = np.array([2.,2.,3.])
 21 | 
 22 |     def test_negative_binomial(self):
 23 |         """
 24 |         Test NB log-likelihood, nb_cluster
 25 |         """
 26 |         P = np.array([[0.5,0.4,0.8],
 27 |                       [0.5,0.3,0.7],
 28 |                       [0.5,0.3,0.9]])
 29 |         R = np.array([[1.,8.,10.],
 30 |                       [2.,8.,24],
 31 |                       [3.,6.,30.]])
 32 |         data, labels = simulation.generate_nb_data(P, R, 100)
 33 |         data = data.astype(float)
 34 |         #data += 1e-8
 35 |         ll = nb_ll(data, P, R)
 36 |         self.assertEqual(ll.shape, (100,3))
 37 |         self.assertFalse(np.isnan(ll).any())
 38 |         self.assertFalse(np.isinf(ll).any())
 39 |         # test derivative
 40 |         # test nb cluster
 41 |         # how to test the results... they're often not good...
 42 |         a,p,r = nb_cluster(data,3)
 43 |         self.assertEqual(p.shape, P.shape)
 44 |         self.assertEqual(r.shape, R.shape)
 45 |         p_nans = np.isnan(p)
 46 |         r_nans = np.isnan(r)
 47 |         self.assertFalse(p_nans.any())
 48 |         self.assertFalse(r_nans.any())
 49 |         # assert that all the points aren't being put into
 50 |         # the same cluster.
 51 |         self.assertTrue(purity(labels, a) > 0.8)
 52 |         self.assertFalse((a==a[0]).all())
 53 | 
 54 | 
 55 |     def test_nb_fit(self):
 56 |         """
 57 |         Tests fitting an NB distribution
 58 |         """
 59 |         P = np.array([[0.5],
 60 |                       [0.3],
 61 |                       [0.4]])
 62 |         R = np.array([[1.],
 63 |                       [8.],
 64 |                       [2.]])
 65 |         data, _ = simulation.generate_nb_data(P, R, 500)
 66 |         p, r = nb_fit(data)
 67 |         p_nans = np.isnan(p)
 68 |         r_nans = np.isnan(r)
 69 |         self.assertFalse(p_nans.any())
 70 |         self.assertFalse(r_nans.any())
 71 |         self.assertFalse(np.isinf(p).any())
 72 |         self.assertFalse(np.isinf(r).any())
 73 |         self.assertTrue(np.sum(np.abs(p - P.flatten())**2)/3 < 0.5)
 74 |         print(r)
 75 |         print(np.sqrt(np.sum(np.abs(r - R.flatten())**2))/3)
 76 |         self.assertTrue(np.sqrt(np.sum(np.abs(r - R.flatten())**2))/3 < 3)
 77 | 
 78 |     def test_nb_fit_random(self):
 79 |         """
 80 |         Tests fitting an NB distribution with random parameters
 81 |         """
 82 |         for i in range(5):
 83 |             P = np.random.random((3,1))*0.9+0.1
 84 |             R = np.random.randint(1, 100, (3,1))
 85 |             data, _ = simulation.generate_nb_data(P, R, 500)
 86 |             try:
 87 |                 p, r = nb_fit(data)
 88 |             except ValueError:
 89 |                 continue
 90 |             p_nans = np.isnan(p)
 91 |             r_nans = np.isnan(r)
 92 |             print(P)
 93 |             print(R)
 94 |             print(p)
 95 |             print(r)
 96 |             print(np.sqrt(np.sum(np.abs(r - R.flatten())**2))/3)
 97 |             self.assertTrue(np.sqrt(np.sum(np.abs(r - R.flatten())**2))/3 < 35)
 98 |             self.assertFalse(p_nans.any())
 99 |             self.assertFalse(r_nans.any())
100 |             self.assertFalse(np.isinf(p).any())
101 |             self.assertFalse(np.isinf(r).any())
102 |             self.assertTrue(np.sum(np.abs(p - P.flatten())**2)/3 < 0.5)
103 | 
104 | 


--------------------------------------------------------------------------------
/tests/test_nb_state_estimation.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import unittest
 4 | from unittest import TestCase
 5 | 
 6 | import numpy as np
 7 | 
 8 | from uncurl import nb_state_estimation, simulation
 9 | from uncurl.evaluation import purity
10 | 
11 | @unittest.skip('nb methods currently not supported')
12 | class StateEstimationTest(TestCase):
13 | 
14 |     def setUp(self):
15 |         pass
16 | 
17 |     def test_random_1(self):
18 |         """
19 |         Test NB state estimation with random parameters
20 |         """
21 |         M, W, R = simulation.generate_nb_states(2, 200, 20)
22 |         data = simulation.generate_nb_state_data(M, W, R)
23 |         M_noised = M + 0.1*(np.random.random(M.shape)-0.5)
24 |         M_, W_, R_, ll = nb_state_estimation.nb_estimate_state(data, 2, init_means=M_noised, R = R, disp=False)
25 |         c1 = W.argmax(0)
26 |         c2 = W_.argmax(0)
27 |         p = purity(c2, c1)
28 |         print(p)
29 |         print(data)
30 |         print(M)
31 |         print(M_)
32 |         self.assertTrue(p > 0.7)
33 | 


--------------------------------------------------------------------------------
/tests/test_nmf.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | from unittest import TestCase
 4 | 
 5 | import numpy as np
 6 | from scipy import sparse
 7 | from scipy.io import loadmat
 8 | 
 9 | import uncurl
10 | 
11 | class NMFTest(TestCase):
12 | 
13 |     def setUp(self):
14 |         dat = loadmat('data/SCDE_test.mat')
15 |         self.data = dat['dat'].toarray()[0:500, :]
16 |         self.data_sparse = sparse.csc_matrix(self.data)
17 |         self.labs = dat['Lab'][0]
18 | 
19 |     def test_run_lognorm_nmf(self):
20 |         w, h, cost = uncurl.nmf_wrapper.log_norm_nmf(self.data, 2)
21 |         labs = h.argmax(0)
22 |         self.assertTrue(uncurl.evaluation.purity(labs, self.labs) > 0.85)
23 | 
24 |     def test_run_norm_nmf(self):
25 |         w, h, cost = uncurl.nmf_wrapper.norm_nmf(self.data, 2)
26 |         labs = h.argmax(0)
27 |         self.assertTrue(uncurl.evaluation.purity(labs, self.labs) > 0.8)
28 | 
29 |     def test_run_se(self):
30 |         w, h, cost = uncurl.run_state_estimation(self.data, 2, dist='log-norm')
31 |         labs = h.argmax(0)
32 |         self.assertTrue(uncurl.evaluation.purity(labs, self.labs) > 0.85)
33 |         w1, h1, cost = uncurl.run_state_estimation(self.data, 2, dist='gaussian')
34 |         labs = h1.argmax(0)
35 |         self.assertTrue(uncurl.evaluation.purity(labs, self.labs) > 0.8)
36 | 


--------------------------------------------------------------------------------
/tests/test_poisson.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest import TestCase
 3 | from flaky import flaky
 4 | 
 5 | import numpy as np
 6 | 
 7 | from scipy import sparse
 8 | 
 9 | import uncurl
10 | from uncurl import pois_ll
11 | from uncurl.simulation import generate_poisson_data
12 | 
13 | class PoissonTest(TestCase):
14 | 
15 |     def setUp(self):
16 |         self.p1 = np.array([1.,2.,3.])
17 |         self.p2 = np.array([2.,2.,3.])
18 | 
19 |     def test_poisson_dist(self):
20 |         self.assertEqual(uncurl.poisson_dist(self.p1, self.p1), 0.0)
21 |         self.assertEqual(uncurl.poisson_dist(self.p2, self.p2), 0.0)
22 |         self.assertTrue(uncurl.poisson_dist(self.p1, self.p2) > 0.0)
23 |         self.assertTrue(
24 |                 np.abs(uncurl.sparse_utils.poisson_dist(self.p1, self.p2) -
25 |                     uncurl.poisson_dist(self.p1, self.p2)) < 1e-4)
26 | 
27 |     def test_sparse_poisson_dist(self):
28 |         sp1 = sparse.csc_matrix(self.p1)
29 |         sp2 = sparse.csc_matrix(self.p2)
30 |         self.assertTrue(
31 |                 np.abs(uncurl.sparse_utils.poisson_dist(self.p1, self.p2) -
32 |                     uncurl.poisson_dist(self.p1, self.p2)) < 1e-4)
33 | 
34 | 
35 |     def test_poisson_ll(self):
36 |         """
37 |         Test Poisson log-likelihood
38 |         """
39 |         centers = np.array([[1,10,20], [1, 11, 1], [50, 1, 100]])
40 |         centers = centers.astype(float)
41 |         data, labs = generate_poisson_data(centers, 500)
42 |         data = data.astype(float)
43 |         starting_centers = centers
44 |         poisson_ll = pois_ll.poisson_ll(data, starting_centers)
45 |         p_isnan = np.isnan(poisson_ll)
46 |         # just test that it's not nan
47 |         self.assertFalse(p_isnan.any())
48 | 
49 |     def test_sparse_poisson_ll(self):
50 |         """
51 |         Test Poisson log-likelihood
52 |         """
53 |         centers = np.array([[0.1,10,20], [5, 15, 1], [50, 1, 0.1]])
54 |         centers = centers.astype(float)
55 |         data, labs = generate_poisson_data(centers, 500)
56 |         data = data.astype(float)
57 |         data = sparse.csc_matrix(data)
58 |         starting_centers = centers
59 |         poisson_ll = pois_ll.poisson_ll(data, starting_centers)
60 |         p_isnan = np.isnan(poisson_ll)
61 |         self.assertFalse(p_isnan.any())
62 |         labels = poisson_ll.argmax(1)
63 |         self.assertTrue((labels==labs).sum() >= 450)
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/tests/test_preprocessing.py:
--------------------------------------------------------------------------------
 1 | from unittest import TestCase
 2 | 
 3 | import numpy as np
 4 | from scipy.io import loadmat
 5 | from scipy import sparse
 6 | 
 7 | import uncurl
 8 | from uncurl.preprocessing import sparse_mean_var, cell_normalize
 9 | from uncurl.simulation import generate_poisson_data
10 | from uncurl.evaluation import purity
11 | 
12 | class PreprocessingTest(TestCase):
13 | 
14 |     def setUp(self):
15 |         dat = loadmat('data/SCDE_k2_sup.mat')
16 |         self.data_sparse = sparse.csc_matrix(dat['Dat'])
17 |         self.data_dense = dat['Dat']
18 |         self.labs = dat['Lab'].flatten()
19 | 
20 |     def testSparseVar(self):
21 |         """
22 |         Test sparse variance
23 |         """
24 |         dense_var = np.var(self.data_dense, 1)
25 |         mean, sp_var = sparse_mean_var(self.data_sparse)
26 |         se = np.sqrt(np.sum((sp_var - dense_var)**2))
27 |         print(se)
28 |         self.assertTrue(se < 1e-5)
29 | 
30 |     def testMaxVarGenes(self):
31 |         """
32 |         test max variance genes for dense and sparse matrices
33 |         """
34 |         n_genes =self.data_sparse.shape[0]
35 |         genes1 = uncurl.max_variance_genes(self.data_dense, nbins=1, frac=0.5)
36 |         genes2 = uncurl.max_variance_genes(self.data_sparse, nbins=1, frac=0.5)
37 |         self.assertEqual(set(genes1), set(genes2))
38 |         self.assertEqual(len(genes1), int(0.5*n_genes))
39 |         genes1 = uncurl.max_variance_genes(self.data_dense, nbins=5, frac=0.2)
40 |         genes2 = uncurl.max_variance_genes(self.data_sparse, nbins=5, frac=0.2)
41 |         self.assertEqual(set(genes1), set(genes2))
42 |         self.assertEqual(len(genes1), 5*int((n_genes/5)*0.2))
43 | 
44 |     def testCellNormalize(self):
45 |         sparse_cell_norm = cell_normalize(self.data_sparse)
46 |         dense_cell_norm = cell_normalize(self.data_dense)
47 |         diff = dense_cell_norm - sparse_cell_norm.toarray()
48 |         diff = np.sqrt(np.sum(diff**2))
49 |         self.assertTrue(diff < 1e-6)
50 | 


--------------------------------------------------------------------------------
/tests/test_qual2quant.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | from unittest import TestCase
 4 | 
 5 | import numpy as np
 6 | from scipy import sparse
 7 | from scipy.io import loadmat
 8 | 
 9 | import uncurl
10 | 
11 | class Qual2QuantTest(TestCase):
12 |     # TODO: test dataset
13 | 
14 |     def setUp(self):
15 |         dat = loadmat('data/SCDE_test.mat')
16 |         self.data = dat['dat'].toarray()[0:500, :]
17 |         self.qualData = dat['M'].toarray()[0:500, :]
18 | 
19 | 
20 |     def test_qual2quant(self):
21 |         # simulated test data?
22 |         # no... use M as a starting matrix
23 |         # qual_matrix = np.zeros((self.data.shape[0], 2))
24 |         starting_points = uncurl.qualNorm(self.data, self.qualData)
25 |         self.assertTrue(starting_points.shape==(500, 2))
26 |         self.assertFalse(np.isnan(starting_points).any())
27 |         print((starting_points[:,0] == starting_points[:,1]).sum())
28 |         self.assertTrue((starting_points[:,0] == starting_points[:,1]).sum() < 10)
29 | 
30 | 
31 |     def test_qual2quant_sparse(self):
32 |         # simulated test data?
33 |         # no... use M as a starting matrix
34 |         # qual_matrix = np.zeros((self.data.shape[0], 2))
35 |         data_sparse = sparse.csc_matrix(self.data)
36 |         starting_points = uncurl.qualNorm(data_sparse, self.qualData)
37 |         self.assertTrue(starting_points.shape==(500, 2))
38 |         self.assertFalse(np.isnan(starting_points).any())
39 |         print((starting_points[:,0] == starting_points[:,1]).sum())
40 |         self.assertTrue((starting_points[:,0] == starting_points[:,1]).sum() < 10)
41 | 
42 | 
43 |     def test_qual2quant_missing_data(self):
44 |         # simulated test data?
45 |         # no... use M as a starting matrix
46 |         # qual_matrix = np.zeros((self.data.shape[0], 2))
47 |         qualData_m = self.qualData.copy()
48 |         for i in range(300):
49 |             qualData_m[i,:] = -1
50 |         starting_points = uncurl.qualNorm(self.data, qualData_m)
51 |         self.assertTrue(starting_points.shape==(500, 2))
52 |         self.assertFalse(np.isnan(starting_points).any())
53 |         print((starting_points[:,0] == starting_points[:,1]).sum())
54 |         self.assertTrue((starting_points[:,0] == starting_points[:,1]).sum() < 10)
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/tests/test_real_data.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import unittest
  4 | from unittest import TestCase
  5 | 
  6 | import numpy as np
  7 | from scipy import sparse
  8 | from scipy.io import loadmat
  9 | 
 10 | import uncurl
 11 | 
 12 | class RealDataTest(TestCase):
 13 |     """
 14 |     tests results on actual datasets: 10x_pooled, Zeisel 7-cluster subset,
 15 |     maybe add others?
 16 |     """
 17 | 
 18 |     def setUp(self):
 19 |         dat = loadmat('data/10x_pooled_400.mat')
 20 |         self.data = sparse.csc_matrix(dat['data'])
 21 |         self.labs = dat['labels'].flatten()
 22 |         dat_z = loadmat('data/GSE60361_dat.mat')
 23 |         self.data_z = sparse.csc_matrix(dat_z['Dat'])
 24 |         self.labs_z = dat_z['ActLabs'].flatten()
 25 | 
 26 |     def test_10xSE(self):
 27 |         # gene selection
 28 |         genes = uncurl.max_variance_genes(self.data)
 29 |         data_subset = self.data[genes,:]
 30 |         # smaller # of iterations than default so it finishes faster...
 31 |         se = uncurl.experiment_runner.PoissonSE(clusters=8, max_iters=10,
 32 |                 inner_max_iters=80)
 33 |         argmax = uncurl.experiment_runner.Argmax(n_classes=8)
 34 |         km = uncurl.experiment_runner.KM(n_classes=8)
 35 |         methods = [(se, [argmax, km])]
 36 |         results, names, other = uncurl.experiment_runner.run_experiment(
 37 |                 methods, data_subset, 8, self.labs, n_runs=1,
 38 |                 use_purity=False, use_nmi=True)
 39 |         print(results)
 40 |         # NMI should be > 0.75 on 10x_pure_pooled 
 41 |         # (accounting for lower than default iter count)
 42 |         self.assertTrue(results[0][0]>0.75)
 43 |         self.assertTrue(results[0][1]>0.75)
 44 | 
 45 |     def test_Zeisel(self):
 46 |         # gene selection
 47 |         genes = uncurl.max_variance_genes(self.data_z)
 48 |         data_subset = self.data_z[genes,:]
 49 |         # smaller # of iterations than default so it finishes faster...
 50 |         se = uncurl.experiment_runner.PoissonSE(clusters=7, max_iters=10,
 51 |                 inner_max_iters=80)
 52 |         argmax = uncurl.experiment_runner.Argmax(n_classes=7)
 53 |         km = uncurl.experiment_runner.KM(n_classes=7)
 54 |         methods = [(se, [argmax, km])]
 55 |         results, names, other = uncurl.experiment_runner.run_experiment(
 56 |                 methods, data_subset, 7, self.labs_z, n_runs=1,
 57 |                 use_purity=False, use_nmi=True)
 58 |         print(results)
 59 |         # NMI should be > 0.75 on Zeisel subset as well
 60 |         self.assertTrue(results[0][0]>0.75)
 61 |         self.assertTrue(results[0][1]>0.75)
 62 | 
 63 |     def test_10x_auto_cluster(self):
 64 |         """
 65 |         Test using automatic cluster size determination
 66 |         """
 67 |         from sklearn.metrics.cluster import normalized_mutual_info_score as nmi
 68 |         # gene selection
 69 |         genes = uncurl.max_variance_genes(self.data)
 70 |         data_subset = self.data[genes,:]
 71 |         # smaller # of iterations than default so it finishes faster...
 72 |         M, W, ll = uncurl.run_state_estimation(data_subset, clusters=0,
 73 |                 max_iters=10, inner_max_iters=80)
 74 |         labels = W.argmax(0)
 75 |         # NMI should be > 0.75 on 10x_pure_pooled 
 76 |         # (accounting for lower than default iter count)
 77 |         self.assertTrue(nmi(self.labs, labels)>0.6)
 78 |         # test RMSE
 79 |         test_data = np.dot(M, W)
 80 |         error = data_subset.toarray() - test_data
 81 |         error = np.sqrt(np.mean(error**2))
 82 |         print('data subset RMSE:', error)
 83 |         self.assertTrue(error < 2.0)
 84 | 
 85 |     def test_10x_update_m(self):
 86 |         """
 87 |         Test after updating M
 88 |         """
 89 |         from uncurl.state_estimation import update_m
 90 |         genes = uncurl.max_variance_genes(self.data)
 91 |         data_subset = self.data[genes,:]
 92 |         # smaller # of iterations than default so it finishes faster...
 93 |         M, W, ll = uncurl.run_state_estimation(data_subset, clusters=0,
 94 |                 max_iters=10, inner_max_iters=50)
 95 |         new_M = update_m(self.data, M, W, genes)
 96 |         self.assertEqual(new_M.shape, (self.data.shape[0], W.shape[0]))
 97 |         self.assertFalse(np.isnan(new_M).any())
 98 |         # test RMSE
 99 |         test_data = np.dot(new_M, W)
100 |         error = self.data.toarray() - test_data
101 |         error = np.sqrt(np.mean(error**2))
102 |         print('M update RMSE:', error)
103 |         self.assertTrue(error < 2.0)
104 | 
105 | if __name__ == '__main__':
106 |     unittest.main()
107 | 


--------------------------------------------------------------------------------
/tests/test_state_estimation.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import itertools
  4 | 
  5 | from unittest import TestCase
  6 | from flaky import flaky
  7 | 
  8 | import numpy as np
  9 | from scipy.io import loadmat
 10 | 
 11 | from uncurl import state_estimation, simulation, run_state_estimation
 12 | 
 13 | class StateEstimationTest(TestCase):
 14 | 
 15 |     def setUp(self):
 16 |         pass
 17 | 
 18 |     @flaky
 19 |     def test_state_estimation(self):
 20 |         """
 21 |         Generate sample data from a small set to see that the state
 22 |         estimation is accurate.
 23 | 
 24 |         7 cells, 4 genes, 2 clusters
 25 |         """
 26 |         sim_means = np.array([[20.,30.],
 27 |                               [10.,3.],
 28 |                               [90.,50.],
 29 |                               [10.,4.]])
 30 |         sim_assignments = np.array([[0.1,0.2,0.3,0.4,0.5,0.8,0.9],
 31 |                                     [0.9,0.8,0.7,0.6,0.5,0.2,0.1]])
 32 |         sim_data = simulation.generate_state_data(sim_means, sim_assignments)
 33 |         sim_data = sim_data + 1e-8
 34 |         print(sim_data)
 35 |         # add noise to the mean
 36 |         sim_means_noised = sim_means + 5*(np.random.random(sim_means.shape)-0.5)
 37 |         m, w, ll = state_estimation.poisson_estimate_state(sim_data, 2, init_means=sim_means_noised, max_iters=10, disp=False)
 38 |         print(m)
 39 |         print(w)
 40 |         self.assertTrue(np.max(w.sum(0) - 1.0)<0.01)
 41 |         # mean error in M is less than 5
 42 |         self.assertTrue(np.mean(np.abs(sim_means-m))<10.0 or
 43 |                 np.mean(np.abs(sim_means-m[:,[1,0]]))<10.0)
 44 |         # mean error in W is less than 0.2 (arbitrary boundary)
 45 |         self.assertTrue(np.mean(np.abs(sim_assignments-w))<0.3 or
 46 |                 np.mean(np.abs(sim_assignments-w[[1,0],:]))<0.3)
 47 | 
 48 |     def test_state_estimation_2(self):
 49 |         """
 50 |         Generate sample data from a slightly larger set to see that the state
 51 |         estimation is accurate.
 52 | 
 53 |         11 cells, 5 genes, 3 clusters
 54 | 
 55 |         This might fail due to inherent randomness...
 56 |         """
 57 |         sim_means = np.array([[20.,30.,4.],
 58 |                               [10.,3.,9.],
 59 |                               [90.,50.,10.],
 60 |                               [10.,4.,30.],
 61 |                               [35.,10.,2.]])
 62 |         sim_assignments = np.array([[0.1,0.2,0.3,0.4,0.1,0.7,0.6,0.9,0.5,0.2,0.1],
 63 |                                     [0.6,0.7,0.3,0.4,0.1,0.2,0.1,0.1,0.0,0.3,0.8],
 64 |                                     [0.3,0.1,0.4,0.2,0.8,0.1,0.3,0.0,0.5,0.5,0.1]])
 65 |         sim_data = simulation.generate_state_data(sim_means, sim_assignments)
 66 |         sim_data = sim_data + 1e-8
 67 |         print(sim_data)
 68 |         # add noise to the mean
 69 |         sim_means_noised = sim_means + 5*(np.random.random(sim_means.shape)-0.5)
 70 |         m, w, ll = state_estimation.poisson_estimate_state(sim_data, 3, init_means=sim_means_noised, max_iters=10, disp=False)
 71 |         print(m)
 72 |         print(w)
 73 |         print(w.sum(0))
 74 |         self.assertTrue(np.max(w.sum(0) - 1.0)<0.01)
 75 |         # mean error in M is less than 10
 76 |         means_good = False
 77 |         weights_good = False
 78 |         # test every permutation of clusters
 79 |         for p in itertools.permutations([0,1,2]):
 80 |             means_good = means_good or (np.mean(np.abs(sim_means-m[:,p]))<10.0)
 81 |             weights_good = weights_good or (np.mean(np.abs(sim_assignments-w[p,:]))<0.2)
 82 |         self.assertTrue(means_good)
 83 |         self.assertTrue(weights_good)
 84 | 
 85 |     def test_random_means(self):
 86 |         """
 87 |         Test state estimation with random means and weights.
 88 | 
 89 |         200 cells, 20 genes, 2 clusters
 90 |         """
 91 |         sim_m, sim_w = simulation.generate_poisson_states(2, 200, 20)
 92 |         sim_data = simulation.generate_state_data(sim_m, sim_w)
 93 |         sim_means_noised = sim_m + 5*(np.random.random(sim_m.shape)-0.5)
 94 |         m, w, ll = state_estimation.poisson_estimate_state(sim_data, 2, init_means=sim_means_noised, max_iters=10, disp=False, method='L-BFGS-B')
 95 |         self.assertTrue(np.max(w.sum(0) - 1.0)<0.001)
 96 |         means_good = False
 97 |         weights_good = False
 98 |         for p in itertools.permutations([0,1]):
 99 |             means_good = means_good or (np.mean(np.abs(sim_m-m[:,p]))<20.0)
100 |             weights_good = weights_good or (np.mean(np.abs(sim_w-w[p,:]))<0.3)
101 |         self.assertTrue(means_good)
102 |         self.assertTrue(weights_good)
103 | 
104 |     def test_random_means_2(self):
105 |         """
106 |         Test state estimation with random means and weights.
107 | 
108 |         20 cells, 200 genes, 2 clusters
109 |         """
110 |         sim_m, sim_w = simulation.generate_poisson_states(2, 20, 200)
111 |         sim_data = simulation.generate_state_data(sim_m, sim_w)
112 |         sim_means_noised = sim_m + 5*(np.random.random(sim_m.shape)-0.5)
113 |         m, w, ll = state_estimation.poisson_estimate_state(sim_data, 2, init_means=sim_means_noised, max_iters=10, disp=False)
114 |         means_good = False
115 |         weights_good = False
116 |         for p in itertools.permutations([0,1]):
117 |             means_good = means_good or (np.mean(np.abs(sim_m-m[:,p]))<20.0)
118 |             weights_good = weights_good or (np.mean(np.abs(sim_w-w[p,:]))<0.2)
119 |         self.assertTrue(means_good)
120 |         self.assertTrue(weights_good)
121 | 
122 |     def test_run_se(self):
123 |         """
124 |         test the run_state_estimation function
125 |         """
126 |         sim_m, sim_w = simulation.generate_poisson_states(2, 200, 20)
127 |         sim_data = simulation.generate_state_data(sim_m, sim_w)
128 |         m, w, ll = run_state_estimation(sim_data, 2, dist='Poiss', max_iters=10, disp=False)
129 |         means_good = False
130 |         weights_good = False
131 |         for p in itertools.permutations([0,1]):
132 |             means_good = means_good or (np.mean(np.abs(sim_m-m[:,p]))<20.0)
133 |             weights_good = weights_good or (np.mean(np.abs(sim_w-w[p,:]))<0.3)
134 |         self.assertTrue(means_good)
135 |         self.assertTrue(weights_good)
136 | 


--------------------------------------------------------------------------------
/tests/test_zip_state_estimation.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import unittest
  4 | from unittest import TestCase
  5 | from flaky import flaky
  6 | 
  7 | import numpy as np
  8 | from scipy.io import loadmat
  9 | 
 10 | from uncurl import zip_state_estimation, simulation
 11 | 
 12 | @flaky
 13 | @unittest.skip('zip methods currently not supported')
 14 | class ZIPStateEstimationTest(TestCase):
 15 | 
 16 |     def setUp(self):
 17 |         pass
 18 | 
 19 |     def test_state_estimation(self):
 20 |         """
 21 |         Generate sample data from a small set to see that the state
 22 |         estimation is accurate.
 23 | 
 24 |         7 cells, 4 genes, 2 clusters
 25 |         """
 26 |         sim_means = np.array([[20.,30.],
 27 |                               [10.,3.],
 28 |                               [90.,50.],
 29 |                               [10.,4.]])
 30 |         sim_assignments = np.array([[0.1,0.2,0.3,0.4,0.5,0.8,0.9],
 31 |                                     [0.9,0.8,0.7,0.6,0.5,0.2,0.1]])
 32 |         sim_data = simulation.generate_zip_state_data(sim_means, sim_assignments, 0.3)
 33 |         sim_data = sim_data + 1e-8
 34 |         print(sim_data)
 35 |         # add noise to the mean
 36 |         sim_means_noised = sim_means + 5*(np.random.random(sim_means.shape)-0.5)
 37 |         m, w, ll = zip_state_estimation.zip_estimate_state(sim_data, 2, init_means=sim_means_noised, max_iters=10, disp=False)
 38 |         print(m)
 39 |         print(w)
 40 |         print(w.sum(0))
 41 |         self.assertTrue(np.max(w.sum(0) - 1.0)<0.01)
 42 |         # mean error in M is less than 5
 43 |         self.assertTrue(np.mean(np.abs(sim_means-m))<10.0)
 44 |         # mean error in W is less than 0.2 (arbitrary boundary)
 45 |         self.assertTrue(np.mean(np.abs(sim_assignments-w))<0.3)
 46 | 
 47 |     def test_state_estimation_2(self):
 48 |         """
 49 |         Generate sample data from a slightly larger set to see that the state
 50 |         estimation is accurate.
 51 | 
 52 |         11 cells, 5 genes, 3 clusters
 53 | 
 54 |         This might fail due to inherent randomness...
 55 |         """
 56 |         sim_means = np.array([[20.,30.,4.],
 57 |                               [10.,3.,9.],
 58 |                               [90.,50.,10.],
 59 |                               [10.,4.,30.],
 60 |                               [35.,10.,2.]])
 61 |         sim_assignments = np.array([[0.1,0.2,0.3,0.4,0.1,0.7,0.6,0.9,0.5,0.2,0.1],
 62 |                                     [0.6,0.7,0.3,0.4,0.1,0.2,0.1,0.1,0.0,0.3,0.8],
 63 |                                     [0.3,0.1,0.4,0.2,0.8,0.1,0.3,0.0,0.5,0.5,0.1]])
 64 |         sim_data = simulation.generate_zip_state_data(sim_means, sim_assignments, 0.3)
 65 |         sim_data = sim_data + 1e-8
 66 |         print(sim_data)
 67 |         # add noise to the mean
 68 |         sim_means_noised = sim_means + 5*(np.random.random(sim_means.shape)-0.5)
 69 |         m, w, ll = zip_state_estimation.zip_estimate_state(sim_data, 3, init_means=sim_means_noised, max_iters=10, disp=False)
 70 |         print(m)
 71 |         print(w)
 72 |         print(w.sum(0))
 73 |         self.assertTrue(np.max(w.sum(0) - 1.0)<0.01)
 74 |         # mean error in M is less than 10
 75 |         self.assertTrue(np.mean(np.abs(sim_means-m))<10.0)
 76 |         # mean error in W is less than 0.4 (arbitrary boundary)
 77 |         self.assertTrue(np.mean(np.abs(sim_assignments-w))<0.4)
 78 | 
 79 |     def test_random_means(self):
 80 |         """
 81 |         Test state estimation with random means and weights.
 82 | 
 83 |         200 cells, 20 genes, 2 clusters
 84 |         """
 85 |         sim_m, sim_w = simulation.generate_poisson_states(2, 200, 20)
 86 |         z = np.random.random()/2
 87 |         sim_data = simulation.generate_zip_state_data(sim_m, sim_w, z)
 88 |         sim_means_noised = sim_m + 5*(np.random.random(sim_m.shape)-0.5)
 89 |         m, w, ll = zip_state_estimation.zip_estimate_state(sim_data, 2, init_means=sim_means_noised, max_iters=10, disp=False)
 90 |         self.assertTrue(np.max(w.sum(0) - 1.0)<0.001)
 91 |         self.assertTrue(np.mean(np.abs(sim_m-m))<50.0)
 92 |         self.assertTrue(np.mean(np.abs(sim_w-w))<0.4)
 93 | 
 94 |     def test_random_means_2(self):
 95 |         """
 96 |         Test state estimation with random means and weights.
 97 | 
 98 |         20 cells, 200 genes, 2 clusters
 99 |         """
100 |         sim_m, sim_w = simulation.generate_poisson_states(2, 20, 200)
101 |         sim_data = simulation.generate_state_data(sim_m, sim_w)
102 |         sim_means_noised = sim_m + 5*(np.random.random(sim_m.shape)-0.5)
103 |         m, w, ll = zip_state_estimation.zip_estimate_state(sim_data, 2, init_means=sim_means_noised, max_iters=10, disp=False)
104 |         self.assertTrue(np.max(w.sum(0) - 1.0)<0.001)
105 |         self.assertTrue(np.mean(np.abs(sim_m-m))<60.0)
106 |         self.assertTrue(np.mean(np.abs(sim_w-w))<0.5)
107 | 


--------------------------------------------------------------------------------
/uncurl/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .clustering import poisson_cluster, kmeans_pp
 3 | from .zip_clustering import zip_cluster
 4 | from .pois_ll import poisson_ll, poisson_dist
 5 | from .qual2quant import qualNorm
 6 | from .state_estimation import poisson_estimate_state, update_m
 7 | from .run_se import run_state_estimation
 8 | from .nb_state_estimation import nb_estimate_state
 9 | from .zip_state_estimation import zip_estimate_state
10 | from .dimensionality_reduction import mds, dim_reduce, dim_reduce_data
11 | from .lineage import run_lineage, pseudotime
12 | from .nb_clustering import nb_cluster
13 | 
14 | from .preprocessing import max_variance_genes
15 | 
16 | from . import sampling
17 | 
18 | from .nmf_wrapper import log_norm_nmf
19 | 
20 | try:
21 |     from . import experiment_runner
22 | except:
23 |     print('unable to import experiment_runner')
24 | 


--------------------------------------------------------------------------------
/uncurl/clustering.py:
--------------------------------------------------------------------------------
  1 | # poisson clustering
  2 | 
  3 | import numpy as np
  4 | from scipy import sparse
  5 | 
  6 | from .pois_ll import poisson_ll, poisson_dist
  7 | 
  8 | eps = 1e-10
  9 | 
 10 | def kmeans_pp(data, k, centers=None):
 11 |     """
 12 |     Generates kmeans++ initial centers.
 13 | 
 14 |     Args:
 15 |         data (array): A 2d array- genes x cells
 16 |         k (int): Number of clusters
 17 |         centers (array, optional): if provided, these are one or more known cluster centers. 2d array of genes x number of centers (<=k).
 18 | 
 19 |     Returns:
 20 |         centers - a genes x k array of cluster means.
 21 |         assignments - a cells x 1 array of cluster assignments
 22 |     """
 23 |     # TODO: what if there is missing data for a given gene?
 24 |     # missing data could be if all the entires are -1.
 25 |     genes, cells = data.shape
 26 |     if sparse.issparse(data) and not sparse.isspmatrix_csc(data):
 27 |         data = sparse.csc_matrix(data)
 28 |     num_known_centers = 0
 29 |     if centers is None:
 30 |         centers = np.zeros((genes, k))
 31 |     else:
 32 |         num_known_centers = centers.shape[1]
 33 |         centers = np.concatenate((centers, np.zeros((genes, k-num_known_centers))), 1)
 34 |     distances = np.zeros((cells, k))
 35 |     distances[:] = np.inf
 36 |     if num_known_centers == 0:
 37 |         init = np.random.randint(0, cells)
 38 |         if sparse.issparse(data):
 39 |             centers[:,0] = data[:, init].toarray().flatten()
 40 |         else:
 41 |             centers[:,0] = data[:, init]
 42 |         num_known_centers+=1
 43 |     available_cells = list(range(cells))
 44 |     for c in range(num_known_centers, k):
 45 |         c2 = c-1
 46 |         # use different formulation for distance... if sparse, use lls
 47 |         # if not sparse, use poisson_dist
 48 |         if sparse.issparse(data):
 49 |             lls = poisson_ll(data, centers[:,c2:c2+1]).flatten()
 50 |             distances[:,c2] = 1 + lls.max() - lls
 51 |             distances[:,c2] /= distances[:,c2].max()
 52 |         else:
 53 |             for cell in range(cells):
 54 |                 distances[cell, c2] = poisson_dist(data[:,cell], centers[:,c2])
 55 |         # choose a new data point as center... probability proportional
 56 |         # to distance^2
 57 |         min_distances = np.min(distances, 1)
 58 |         min_distances = min_distances**2
 59 |         min_distances = min_distances[available_cells]
 60 |         # should be sampling without replacement
 61 |         min_dist = np.random.choice(available_cells,
 62 |                 p=min_distances/min_distances.sum())
 63 |         available_cells.pop(available_cells.index(min_dist))
 64 |         if sparse.issparse(data):
 65 |             centers[:,c] = data[:, min_dist].toarray().flatten()
 66 |         else:
 67 |             centers[:,c] = data[:, min_dist]
 68 |     lls = poisson_ll(data, centers)
 69 |     new_assignments = np.argmax(lls, 1)
 70 |     centers[centers==0.0] = eps
 71 |     return centers, new_assignments
 72 | 
 73 | def poisson_cluster(data, k, init=None, max_iters=100):
 74 |     """
 75 |     Performs Poisson hard EM on the given data.
 76 | 
 77 |     Args:
 78 |         data (array): A 2d array- genes x cells. Can be dense or sparse; for best performance, sparse matrices should be in CSC format.
 79 |         k (int): Number of clusters
 80 |         init (array, optional): Initial centers - genes x k array. Default: None, use kmeans++
 81 |         max_iters (int, optional): Maximum number of iterations. Default: 100
 82 | 
 83 |     Returns:
 84 |         a tuple of two arrays: a cells x 1 vector of cluster assignments,
 85 |         and a genes x k array of cluster means.
 86 |     """
 87 |     # TODO: be able to use a combination of fixed and unknown starting points
 88 |     # e.g., have init values only for certain genes, have a row of all
 89 |     # zeros indicating that kmeans++ should be used for that row.
 90 |     genes, cells = data.shape
 91 |     #print 'starting: ', centers
 92 |     if sparse.issparse(data) and not sparse.isspmatrix_csc(data):
 93 |         data = sparse.csc_matrix(data)
 94 |     init, assignments = kmeans_pp(data, k, centers=init)
 95 |     centers = np.copy(init)
 96 |     assignments = np.zeros(cells)
 97 |     for it in range(max_iters):
 98 |         lls = poisson_ll(data, centers)
 99 |         #cluster_dists = np.zeros((cells, k))
100 |         new_assignments = np.argmax(lls, 1)
101 |         if np.equal(assignments, new_assignments).all():
102 |             #print 'ending: ', centers
103 |             return new_assignments, centers
104 |         for c in range(k):
105 |             if sparse.issparse(data):
106 |                 if data[:,new_assignments==c].shape[0]==0:
107 |                     # re-initialize centers?
108 |                     new_c, _ = kmeans_pp(data, k, centers[:,:c])
109 |                     centers[:,c] = new_c[:,c]
110 |                 else:
111 |                     centers[:,c] = np.asarray(data[:,new_assignments==c].mean(1)).flatten()
112 |             else:
113 |                 if len(data[:,new_assignments==c])==0:
114 |                     new_c, _ = kmeans_pp(data, k, centers[:,:c])
115 |                     centers[:,c] = new_c[:,c]
116 |                 else:
117 |                     centers[:,c] = np.mean(data[:,new_assignments==c], 1)
118 |         assignments = new_assignments
119 |     return assignments, centers
120 | 
121 | 


--------------------------------------------------------------------------------
/uncurl/dimensionality_reduction.py:
--------------------------------------------------------------------------------
 1 | # dimensionality reduction
 2 | 
 3 | import numpy as np
 4 | from .pois_ll import poisson_dist
 5 | 
 6 | eps=1e-8
 7 | max_or_zero = np.vectorize(lambda x: max(0.0,x))
 8 | 
 9 | def diffusion_mds(means, weights, d, diffusion_rounds=10):
10 |     """
11 |     Dimensionality reduction using MDS, while running diffusion on W.
12 | 
13 |     Args:
14 |         means (array): genes x clusters
15 |         weights (array): clusters x cells
16 |         d (int): desired dimensionality
17 | 
18 |     Returns:
19 |         W_reduced (array): array of shape (d, cells)
20 |     """
21 |     for i in range(diffusion_rounds):
22 |         weights = weights*weights
23 |         weights = weights/weights.sum(0)
24 |     X = dim_reduce(means, weights, d)
25 |     if X.shape[0]==2:
26 |         return X.dot(weights)
27 |     else:
28 |         return X.T.dot(weights)
29 | 
30 | 
31 | def mds(means, weights, d):
32 |     """
33 |     Dimensionality reduction using MDS.
34 | 
35 |     Args:
36 |         means (array): genes x clusters
37 |         weights (array): clusters x cells
38 |         d (int): desired dimensionality
39 | 
40 |     Returns:
41 |         W_reduced (array): array of shape (d, cells)
42 |     """
43 |     X = dim_reduce(means, weights, d)
44 |     if X.shape[0]==2:
45 |         return X.dot(weights)
46 |     else:
47 |         return X.T.dot(weights)
48 | 
49 | def dim_reduce(means, weights, d):
50 |     """
51 |     Dimensionality reduction using Poisson distances and MDS.
52 | 
53 |     Args:
54 |         means (array): genes x clusters
55 |         weights (array): clusters x cells
56 |         d (int): desired dimensionality
57 | 
58 |     Returns:
59 |         X, a clusters x d matrix representing the reduced dimensions
60 |         of the cluster centers.
61 |     """
62 |     return dim_reduce_data(means, d)
63 | 
64 | def dim_reduce_data(data, d):
65 |     """
66 |     Does a MDS on the data directly, not on the means.
67 | 
68 |     Args:
69 |         data (array): genes x cells
70 |         d (int): desired dimensionality
71 | 
72 |     Returns:
73 |         X, a cells x d matrix
74 |     """
75 |     genes, cells = data.shape
76 |     distances = np.zeros((cells, cells))
77 |     for i in range(cells):
78 |         for j in range(cells):
79 |             distances[i,j] = poisson_dist(data[:,i], data[:,j])
80 |     # do MDS on the distance matrix (procedure from Wikipedia)
81 |     proximity = distances**2
82 |     J = np.eye(cells) - 1./cells
83 |     B = -0.5*np.dot(J, np.dot(proximity, J))
84 |     # B should be symmetric, so we can use eigh
85 |     e_val, e_vec = np.linalg.eigh(B)
86 |     # Note: lam should be ordered to be the largest eigenvalues
87 |     lam = np.diag(e_val[-d:])[::-1]
88 |     #lam = max_or_zero(lam)
89 |     E = e_vec[:,-d:][::-1]
90 |     X = np.dot(E, lam**0.5)
91 |     return X
92 | 


--------------------------------------------------------------------------------
/uncurl/evaluation.py:
--------------------------------------------------------------------------------
 1 | from collections import Counter
 2 | 
 3 | import numpy as np
 4 | from sklearn.neighbors import BallTree
 5 | 
 6 | def purity(labels, true_labels):
 7 |     """
 8 |     Calculates the purity score for the given labels.
 9 | 
10 |     Args:
11 |         labels (array): 1D array of integers
12 |         true_labels (array): 1D array of integers - true labels
13 | 
14 |     Returns:
15 |         purity score - a float bewteen 0 and 1. Closer to 1 is better.
16 |     """
17 |     purity = 0.0
18 |     for i in set(labels):
19 |         indices = (labels==i)
20 |         true_clusters = true_labels[indices]
21 |         if len(true_clusters)==0:
22 |             continue
23 |         counts = Counter(true_clusters)
24 |         lab, count = counts.most_common()[0]
25 |         purity += count
26 |     return float(purity)/len(labels)
27 | 
28 | def nne(dim_red, true_labels):
29 |     """
30 |     Calculates the nearest neighbor accuracy (basically leave-one-out cross
31 |     validation with a 1NN classifier).
32 | 
33 |     Args:
34 |         dim_red (array): dimensions (k, cells)
35 |         true_labels (array): 1d array of integers
36 | 
37 |     Returns:
38 |         Nearest neighbor accuracy - fraction of points for which the 1NN
39 |         1NN classifier returns the correct value.
40 |     """
41 |     # use sklearn's BallTree
42 |     bt = BallTree(dim_red.T)
43 |     correct = 0
44 |     for i, l in enumerate(true_labels):
45 |         dist, ind = bt.query([dim_red[:,i]], k=2)
46 |         closest_cell = ind[0, 1]
47 |         if true_labels[closest_cell] == l:
48 |             correct += 1
49 |     return float(correct)/len(true_labels)
50 | 
51 | def mdl(ll, k, data):
52 |     """
53 |     Returns the minimum description length score of the model given its
54 |     log-likelihood and k, the number of cell types.
55 | 
56 |     a lower cost is better...
57 |     """
58 | 
59 |     """
60 |     N - no. of genes
61 |     n - no. of cells 
62 |     k - no. of cell types
63 |     R - sum(Dataset) i.e. total no. of reads
64 | 
65 |     function TotCost = TotBits(N,m,p,R,C)
66 |         # C is the cost from the cost function
67 |         TotCost = C + (N*m + m*p)*(log(R/(N*p)));
68 |     """
69 |     N, m = data.shape
70 |     cost = ll + (N*m + m*k)*(np.log(data.sum()/(N*k)))
71 |     return cost
72 | 


--------------------------------------------------------------------------------
/uncurl/fit_dist_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import division
 4 | import numpy as np
 5 | from scipy.stats import poisson
 6 | from scipy.stats import norm
 7 | import math as math
 8 | 
 9 | 
10 | 
11 | def GetDistFitError(Dat):
12 |     #Assumes data to be in the form of a numpy matrix 
13 |     # TODO: make this work for sparse inputs?
14 |     # TODO: fixed number of bins, rather than use the maximum value?
15 |     # use np.histogram
16 |     Dat = np.round(Dat).astype(int)
17 |     Dat2 = np.log(1 + Dat)
18 |     BinDat = np.zeros(max(Dat)+1)
19 |     Poiss = np.zeros(max(Dat)+1)
20 |     Norm = np.zeros(max(Dat)+1)
21 |     LogNorm = np.zeros(max(Dat)+1)
22 | 
23 |     m = np.mean(Dat)
24 |     std = np.std(Dat, ddof=1)
25 |     m_l = np.mean(Dat2)
26 |     std_l = np.std(Dat2, ddof=1)
27 | 
28 |     #Create a bin of frequencies and generate frequencies based on distr
29 |     for i in range(0,len(BinDat)):
30 |         # this is EXTREMELY INEFFICIENT!!!!!
31 |         # n^2 since Dat==i requires iterating through the whole array
32 |         BinDat[i] = sum(Dat==i)
33 |         Poiss[i] = poisson.pmf(i,m)
34 |         Norm[i] = norm.pdf((i-m+1)/std)
35 |         LogNorm[i] = norm.pdf((i-m_l)/std_l)
36 |     BinDat = BinDat/sum(BinDat) + 0.0
37 |     Poiss = Poiss/sum(Poiss) + 0.0
38 |     Norm = Norm/sum(Norm) + 0.0
39 |     LogNorm = LogNorm/sum(LogNorm) + 0.0
40 |     #Get error for each distribution 
41 |     PoissErr = np.linalg.norm(BinDat - Poiss)
42 |     NormErr = np.linalg.norm(BinDat - Norm)
43 |     LogNormErr = np.linalg.norm(BinDat - LogNorm)
44 |     d = {}
45 |     d['poiss'] = PoissErr
46 |     d['norm'] = NormErr
47 |     d['lognorm'] = LogNormErr
48 |     return d
49 | 
50 | def NormPDF(x,mu,std):
51 |     pi = math.pi
52 |     temp = np.exp(-((x-mu)**2)/(2*std**2))/np.sqrt(2*pi*std**2)
53 |     return temp
54 | 
55 | def DistFitDataset(Dat):
56 |     """
57 |     Given a data matrix, this returns the per-gene fit error for the
58 |     Poisson, Normal, and Log-Normal distributions.
59 | 
60 |     Args:
61 |         Dat (array): numpy array with shape (genes, cells)
62 | 
63 |     Returns:
64 |         d (dict): 'poiss', 'norm', 'lognorm' give the fit error for each distribution.
65 |     """
66 |     #Assumes data to be in the form of a numpy matrix 
67 |     (r,c) = Dat.shape
68 |     Poiss = np.zeros(r)
69 |     Norm = np.zeros(r)
70 |     LogNorm = np.zeros(r)
71 |     for i in range(r):
72 |         temp = GetDistFitError(Dat[i])
73 |         Poiss[i] = temp['poiss']
74 |         Norm[i] = temp['norm']
75 |         LogNorm[i] = temp['lognorm']
76 |     d = {}
77 |     d['poiss'] = Poiss
78 |     d['norm'] = Norm
79 |     d['lognorm'] = LogNorm
80 |     return d
81 | 
82 | 
83 | #Dat = np.array([[0,0,0,1,1,2,2,3,4],[0,0,0,1,1,1,3,5,7]])
84 | #Dat = np.array([2,3,4,5])
85 | #print GetDistFitError(Dat)
86 | #n = 100
87 | #Dat = np.random.poisson(lam = [[2]*n,[.5]*n], size = (2,n))
88 | #d = DistFitDataset(Dat)    
89 | 


--------------------------------------------------------------------------------
/uncurl/gap_score.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Using gap score to determine optimal cluster number
  3 | """
  4 | import numpy as np
  5 | from sklearn.cluster import KMeans
  6 | 
  7 | def preproc_data(data, gene_subset=False, **kwargs):
  8 |     """
  9 |     basic data preprocessing before running gap score
 10 | 
 11 |     Assumes that data is a matrix of shape (genes, cells).
 12 | 
 13 |     Returns a matrix of shape (cells, 8), using the first 8 SVD
 14 |     components. Why 8? It's an arbitrary selection...
 15 |     """
 16 |     import uncurl
 17 |     from uncurl.preprocessing import log1p, cell_normalize
 18 |     from sklearn.decomposition import TruncatedSVD
 19 |     data_subset = data
 20 |     if gene_subset:
 21 |         gene_subset = uncurl.max_variance_genes(data)
 22 |         data_subset = data[gene_subset, :]
 23 |     tsvd = TruncatedSVD(min(8, data_subset.shape[0] - 1))
 24 |     data_tsvd = tsvd.fit_transform(log1p(cell_normalize(data_subset)).T)
 25 |     return data_tsvd
 26 | 
 27 | def calculate_bounding_box(data):
 28 |     """
 29 |     Returns a 2 x m array indicating the min and max along each
 30 |     dimension.
 31 |     """
 32 |     mins = data.min(0)
 33 |     maxes = data.max(0)
 34 |     return mins, maxes
 35 | 
 36 | def calculate_gap(data, clustering, km, B=50, **kwargs):
 37 |     """
 38 |     See: https://datasciencelab.wordpress.com/2013/12/27/finding-the-k-in-k-means-clustering/
 39 | 
 40 |     https://web.stanford.edu/~hastie/Papers/gap.pdf
 41 | 
 42 |     Returns two results: the gap score, and s_k.
 43 |     """
 44 |     k = len(set(clustering))
 45 |     Wk = km.inertia_
 46 |     mins, maxes = calculate_bounding_box(data)
 47 |     Wk_est = []
 48 |     for i in range(B):
 49 |         data_sample = (maxes-mins)*np.random.random(data.shape) + mins
 50 |         km_b = KMeans(k)
 51 |         km_b.fit_predict(data_sample)
 52 |         Wk_est.append(km_b.inertia_)
 53 |     Wk_est = np.log(np.array(Wk_est))
 54 |     Wk_mean = np.mean(Wk_est)
 55 |     Wk_std = np.std(Wk_est)
 56 |     gap = Wk_mean - np.log(Wk)
 57 |     sk = np.sqrt(1 + 1.0/B)*Wk_std
 58 |     return gap, sk
 59 | 
 60 | 
 61 | def run_gap_k_selection(data, k_min=1, k_max=50, B=5,
 62 |         skip=5, **kwargs):
 63 |     """
 64 |     Runs gap score for all k from k_min to k_max.
 65 |     """
 66 |     if k_min == k_max:
 67 |         return k_min
 68 |     gap_vals = []
 69 |     sk_vals = []
 70 |     k_range = list(range(k_min, k_max, skip))
 71 |     min_k = 0
 72 |     min_i = 0
 73 |     for i, k in enumerate(k_range):
 74 |         km = KMeans(k)
 75 |         clusters = km.fit_predict(data)
 76 |         gap, sk = calculate_gap(data, clusters, km, B=B)
 77 |         if len(gap_vals) > 1:
 78 |             if gap_vals[-1] >= gap - (skip+1)*sk:
 79 |                 min_i = i
 80 |                 min_k = k_range[i-1]
 81 |                 break
 82 |                 #return k_range[-1], gap_vals, sk_vals
 83 |         gap_vals.append(gap)
 84 |         sk_vals.append(sk)
 85 |     if min_k == 0:
 86 |         min_k = k_max
 87 |     if skip == 1:
 88 |         return min_k, gap_vals, sk_vals
 89 |     gap_vals = []
 90 |     sk_vals = []
 91 |     for k in range(min_k - skip, min_k + skip):
 92 |         km = KMeans(k)
 93 |         clusters = km.fit_predict(data)
 94 |         gap, sk = calculate_gap(data, clusters, km, B=B)
 95 |         if len(gap_vals) > 1:
 96 |             if gap_vals[-1] >= gap - sk:
 97 |                 min_k = k-1
 98 |                 return min_k, gap_vals, sk_vals
 99 |         gap_vals.append(gap)
100 |         sk_vals.append(sk)
101 |     return k, gap_vals, sk_vals
102 | 
103 | 


--------------------------------------------------------------------------------
/uncurl/nb_state_estimation.py:
--------------------------------------------------------------------------------
  1 | # state estimation with NB convex mixture model
  2 | 
  3 | from .clustering import kmeans_pp
  4 | from .nb_clustering import nb_fit, find_nb_genes
  5 | from .state_estimation import initialize_from_assignments
  6 | 
  7 | import numpy as np
  8 | from scipy.optimize import minimize
  9 | 
 10 | eps=1e-8
 11 | 
 12 | def _create_w_objective(m, X, R):
 13 |     """
 14 |     Creates an objective function and its derivative for W, given M and X (data)
 15 | 
 16 |     Args:
 17 |         m (array): genes x clusters
 18 |         X (array): genes x cells
 19 |         R (array): 1 x genes
 20 |     """
 21 |     genes, clusters = m.shape
 22 |     cells = X.shape[1]
 23 |     R1 = R.reshape((genes, 1)).dot(np.ones((1, cells)))
 24 |     def objective(w):
 25 |         # convert w into a matrix first... because it's a vector for
 26 |         # optimization purposes
 27 |         w = w.reshape((m.shape[1], X.shape[1]))
 28 |         d = m.dot(w)+eps
 29 |         return np.sum((X + R1)*np.log(d + R1) - X*np.log(d))/genes
 30 |     def deriv(w):
 31 |         # derivative of objective wrt all elements of w
 32 |         # for w_{ij}, the derivative is... m_j1+...+m_jn sum over genes minus 
 33 |         # x_ij
 34 |         w2 = w.reshape((m.shape[1], X.shape[1]))
 35 |         d = m.dot(w2)+eps
 36 |         temp = X/d
 37 |         temp2 = (X+R1)/(d+R1)
 38 |         m1 = m.T.dot(temp2)
 39 |         m2 = m.T.dot(temp)
 40 |         deriv = m1 - m2
 41 |         return deriv.flatten()/genes
 42 |     return objective, deriv
 43 | 
 44 | def _create_m_objective(w, X, R):
 45 |     """
 46 |     Creates an objective function and its derivative for M, given W and X
 47 | 
 48 |     Args:
 49 |         w (array): clusters x cells
 50 |         X (array): genes x cells
 51 |         R (array): 1 x genes
 52 |     """
 53 |     clusters, cells = w.shape
 54 |     genes = X.shape[0]
 55 |     R1 = R.reshape((genes, 1)).dot(np.ones((1, cells)))
 56 |     def objective(m):
 57 |         m = m.reshape((X.shape[0], w.shape[0]))
 58 |         d = m.dot(w)+eps
 59 |         return np.sum((X+R1)*np.log(d + R1) - X*np.log(d))/genes
 60 |     def deriv(m):
 61 |         m2 = m.reshape((X.shape[0], w.shape[0]))
 62 |         d = m2.dot(w)+eps
 63 |         temp = X/d
 64 |         temp2 = (X+R1)/(d+R1)
 65 |         w1 = w.dot(temp2.T)
 66 |         w2 = w.dot(temp.T)
 67 |         deriv = w1.T - w2.T
 68 |         return deriv.flatten()/genes
 69 |     return objective, deriv
 70 | 
 71 | def nb_estimate_state(data, clusters, R=None, init_means=None, init_weights=None, max_iters=10, tol=1e-4, disp=True, inner_max_iters=400, normalize=True):
 72 |     """
 73 |     Uses a Negative Binomial Mixture model to estimate cell states and
 74 |     cell state mixing weights.
 75 | 
 76 |     If some of the genes do not fit a negative binomial distribution
 77 |     (mean > var), then the genes are discarded from the analysis.
 78 | 
 79 |     Args:
 80 |         data (array): genes x cells
 81 |         clusters (int): number of mixture components
 82 |         R (array, optional): vector of length genes containing the dispersion estimates for each gene. Default: use nb_fit
 83 |         init_means (array, optional): initial centers - genes x clusters. Default: kmeans++ initializations
 84 |         init_weights (array, optional): initial weights - clusters x cells. Default: random(0,1)
 85 |         max_iters (int, optional): maximum number of iterations. Default: 10
 86 |         tol (float, optional): if both M and W change by less than tol (in RMSE), then the iteration is stopped. Default: 1e-4
 87 |         disp (bool, optional): whether or not to display optimization parameters. Default: True
 88 |         inner_max_iters (int, optional): Number of iterations to run in the scipy minimizer for M and W. Default: 400
 89 |         normalize (bool, optional): True if the resulting W should sum to 1 for each cell. Default: True.
 90 | 
 91 |     Returns:
 92 |         M (array): genes x clusters - state centers
 93 |         W (array): clusters x cells - state mixing components for each cell
 94 |         R (array): 1 x genes - NB dispersion parameter for each gene
 95 |         ll (float): Log-likelihood of final iteration
 96 |     """
 97 |     # TODO: deal with non-NB data... just ignore it? or do something else?
 98 |     data_subset = data.copy()
 99 |     genes, cells = data_subset.shape
100 |     # 1. use nb_fit to get inital Rs
101 |     if R is None:
102 |         nb_indices = find_nb_genes(data)
103 |         data_subset = data[nb_indices, :]
104 |         if init_means is not None and len(init_means) > sum(nb_indices):
105 |             init_means = init_means[nb_indices, :]
106 |         genes, cells = data_subset.shape
107 |         R = np.zeros(genes)
108 |         P, R = nb_fit(data_subset)
109 |     if init_means is None:
110 |         means, assignments = kmeans_pp(data_subset, clusters)
111 |     else:
112 |         means = init_means.copy()
113 |     clusters = means.shape[1]
114 |     w_init = np.random.random(cells*clusters)
115 |     if init_weights is not None:
116 |         if len(init_weights.shape)==1:
117 |             init_weights = initialize_from_assignments(init_weights, clusters)
118 |         w_init = init_weights.reshape(cells*clusters)
119 |     m_init = means.reshape(genes*clusters)
120 |     ll = np.inf
121 |     # repeat steps 1 and 2 until convergence:
122 |     for i in range(max_iters):
123 |         if disp:
124 |             print('iter: {0}'.format(i))
125 |         w_bounds = [(0, 1.0) for x in w_init]
126 |         m_bounds = [(0, None) for x in m_init]
127 |         # step 1: given M, estimate W
128 |         w_objective, w_deriv = _create_w_objective(means, data_subset, R)
129 |         w_res = minimize(w_objective, w_init, method='L-BFGS-B', jac=w_deriv, bounds=w_bounds, options={'disp':disp, 'maxiter':inner_max_iters})
130 |         w_diff = np.sqrt(np.sum((w_res.x-w_init)**2))/w_init.size
131 |         w_new = w_res.x.reshape((clusters, cells))
132 |         w_init = w_res.x
133 |         # step 2: given W, update M
134 |         m_objective, m_deriv = _create_m_objective(w_new, data_subset, R)
135 |         # method could be 'L-BFGS-B' or 'SLSQP'... SLSQP gives a memory error...
136 |         # or use TNC...
137 |         m_res = minimize(m_objective, m_init, method='L-BFGS-B', jac=m_deriv, bounds=m_bounds, options={'disp':disp, 'maxiter':inner_max_iters})
138 |         m_diff = np.sqrt(np.sum((m_res.x-m_init)**2))/m_init.size
139 |         m_new = m_res.x.reshape((genes, clusters))
140 |         m_init = m_res.x
141 |         ll = m_res.fun
142 |         means = m_new
143 |         if w_diff < tol and m_diff < tol:
144 |             break
145 |     if normalize:
146 |         w_new = w_new/w_new.sum(0)
147 |     return m_new, w_new, R, ll
148 | 


--------------------------------------------------------------------------------
/uncurl/nolips_parallel.pyx:
--------------------------------------------------------------------------------
  1 | # parallel sparse implementation of nolips Poisson optimization
  2 | 
  3 | #import cython
  4 | cimport cython
  5 | 
  6 | from cython.parallel import prange
  7 | 
  8 | from scipy import sparse
  9 | 
 10 | import numpy as np
 11 | cimport numpy as np
 12 | #DTYPE = np.double
 13 | #ctypedef np.double_t DTYPE_t
 14 | 
 15 | # TODO: use fused types
 16 | ctypedef fused int2:
 17 |     short
 18 |     int
 19 |     long
 20 |     long long
 21 | 
 22 | ctypedef fused DTYPE_t:
 23 |     float
 24 |     double
 25 | 
 26 | cdef double eps = 1e-10
 27 | 
 28 | @cython.boundscheck(False)
 29 | @cython.wraparound(False)
 30 | @cython.nonecheck(False)
 31 | cdef inline void _update(int2 i, DTYPE_t[:] data_, int2[:] indices, int2[:] indptr, DTYPE_t[:,:] cij, double[:] R_view, double[:,:] M_view, double[:,:] W_view, double[:,:] Wnew_view, double lam, double eps, int2 k, double regularization) nogil:
 32 |     # NoLips in-place update for a single cell/column of w.
 33 |     # all these updates can run in parallel.
 34 |     cdef int2 start_ind = indptr[i]
 35 |     cdef int2 end_ind = indptr[i+1]
 36 |     cdef int2 g, k2, j, ind
 37 |     cdef double mw, divisor
 38 |     for ind in range(start_ind, end_ind):
 39 |         g = indices[ind]
 40 |         mw = eps
 41 |         for k2 in range(k):
 42 |             mw += M_view[g,k2]*W_view[k2,i]
 43 |         mw = data_[ind]/mw
 44 |         for j in range(k):
 45 |             cij[i,j] += M_view[g,j]*mw
 46 |     for j in range(k):
 47 |         # divisor has to be >= 0
 48 |         divisor = 1+lam*W_view[j,i]*(regularization + R_view[j]-cij[i,j])
 49 |         if divisor > 0:
 50 |             Wnew_view[j,i] = W_view[j,i]/divisor
 51 |         else:
 52 |             Wnew_view[j,i] = 0.0
 53 | 
 54 | @cython.boundscheck(False)
 55 | @cython.wraparound(False)
 56 | @cython.nonecheck(False)
 57 | def sparse_nolips_update_w(np.ndarray[DTYPE_t, ndim=1] X_data,
 58 |         np.ndarray[int2, ndim=1] X_indices,
 59 |         np.ndarray[int2, ndim=1] X_indptr,
 60 |         int2 cells,
 61 |         int2 genes,
 62 |         np.ndarray[DTYPE_t, ndim=2] M,
 63 |         np.ndarray[DTYPE_t, ndim=2] W,
 64 |         np.ndarray[DTYPE_t, ndim=1] lams,
 65 |         np.ndarray[DTYPE_t, ndim=1] m_sum, int2 n_threads=4, disp=False,
 66 |         double regularization=0.0):
 67 |     """
 68 |     Parallel nolips...
 69 | 
 70 |     Args:
 71 |         X (csc sparse array): data with shape genes x cells
 72 |         M (array): genes x k
 73 |         W (array): k x cells
 74 |         lams (array): 1/(2*X.sum(0)) - sum each column of X - has length cells
 75 |         m_sum (array): M.sum(0)
 76 |         n_threads (int2): number of threads
 77 |         disp (bool): currently unused
 78 |         regularization (double): regularization factor for L1 regularization
 79 | 
 80 |     Returns:
 81 |         Updated copy of W
 82 |     """
 83 |     cdef int2 k = W.shape[0]
 84 |     cdef double[:,:] M_view = M
 85 |     #cdef np.ndarray[DTYPE_t, ndim=1] R = M.sum(0)
 86 |     cdef double[:] mw_view
 87 |     cdef double[:] R_view = m_sum
 88 |     cdef np.ndarray[DTYPE_t, ndim=1] z = np.zeros(k)
 89 |     cdef double lam, mw, xig
 90 |     #cdef np.ndarray[DTYPE_t, ndim=1] lams = 1/(2*Xsum)
 91 |     cdef double[:] lams_view = lams
 92 |     cdef double[:,:] Wnew_view = np.empty((k, cells), dtype=np.double)
 93 |     cdef double[:,:] W_view = W
 94 |     cdef Py_ssize_t i
 95 |     #X_csc = sparse.csc_matrix(X)
 96 |     # when there are more than 2^31 elements, will be long.
 97 |     # so this function won't work - have to deal with this in the calling
 98 |     # function.
 99 |     #cdef int2[:] indices, indptr
100 |     cdef int2[:] indices = X_indices
101 |     cdef int2[:] indptr = X_indptr
102 |     cdef DTYPE_t[:] data_ = X_data
103 |     cdef DTYPE_t[:,:] cij = np.zeros((cells, k))
104 |     # schedules: guided, 
105 |     for i in prange(cells, schedule="guided", nogil=True, num_threads=n_threads):
106 |         _update(i, data_, indices, indptr, cij, R_view, M_view, W_view, Wnew_view, lams_view[i], eps, k, regularization)
107 |     return np.asarray(Wnew_view)
108 | 
109 | 


--------------------------------------------------------------------------------
/uncurl/plda_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import os
  5 | import subprocess
  6 | 
  7 | from scipy import sparse
  8 | 
  9 | from uncurl.sparse_utils import sparse_create_plda_file
 10 | 
 11 | PLDA_FOLDER = "/home/yjzhang/plda"
 12 | eps=1e-10
 13 | 
 14 | # Contains methods to process input and output files for PLDA.
 15 | 
 16 | # Generates an input file for PLDA from the matrix
 17 | # Assumes "matrix" is a Numpy array containing only integers, with dimensions (genes x cells)
 18 | def create_plda_file(matrix, filename):
 19 |     if sparse.issparse(matrix):
 20 |         sparse_create_plda_file(matrix, filename)
 21 |         return
 22 |     f = open(filename, "w")
 23 |     (r,c) = matrix.shape
 24 |     strings = []
 25 |     # PLDA input format requires one line per document (cell). Each line contains a sparse
 26 |     # representation of the counts of the words (genes) present. Example:
 27 |     # G1 12 G2 4 G5 6
 28 |     # (G1 appears 12 times, G2 appears 4 times, G5 appears 6 times)
 29 |     for i in range(c):
 30 |         for j in range(r):
 31 |             strings.append("G" + str(j) + " "  + str(int(matrix[j,i])) + " ")
 32 |         strings.append("\n")
 33 |     f.write("".join(strings))
 34 | 
 35 | 
 36 | # Parses the "model file" outputted by PLDA into a
 37 | # (word x topic) (or gene x archetype) matrix.
 38 | def parse_model_file(model_file, word_topic=None, included_genes=None):
 39 |     f = open(model_file, "r")
 40 |     lines = f.readlines()
 41 |     num_words = len(lines)  # There's 1 line for each word
 42 |     num_topics = len(lines[1].split()) - 1
 43 | 
 44 |     if word_topic is None:
 45 |         word_topic = np.zeros((num_words, num_topics))
 46 |     if included_genes is None:
 47 |         included_genes = np.arange(num_words)
 48 |     for line in lines:
 49 |         tokens = line.split()
 50 |         gene_number = int(tokens[0][1:])
 51 |         original_row_number = included_genes[gene_number]
 52 |         word_topic[original_row_number, :] = np.array(map(float, tokens[1:]))
 53 |     return word_topic
 54 | 
 55 | 
 56 | # Parse the "inference result" matrix outputed by PLDA into a
 57 | # (topic x document) (or archetype x cell) matrix.
 58 | def parse_result_file(result_file):
 59 |     document_topic = np.loadtxt(result_file, dtype="float")
 60 |     return document_topic.T
 61 | 
 62 | 
 63 | 
 64 | # Given a PLDA input file (each line is a "document", with each word followed by
 65 | # its count), return a corresponding data matrix.
 66 | def parse_plda_input(input_file, num_columns):
 67 |     f = open(input_file, "r")
 68 |     lines = f.readlines()
 69 |     num_lines = len(lines)
 70 |     matrix = np.zeros((num_lines, num_columns))
 71 |     row = 0
 72 | 
 73 |     for line in lines:
 74 |         tokens = line.split()
 75 |         i = 1
 76 |         while i < len(tokens):
 77 |             gene_number = int(tokens[i-1][1:])
 78 |             matrix[row, gene_number] = int(tokens[i])
 79 |             i += 2
 80 |         row += 1
 81 |     return matrix
 82 | 
 83 | 
 84 | # Given a PLDA input file, runs PLDA to find the M/W matrices.
 85 | # Note: please call "create_plda_file()" beforehand to create a PLDA input
 86 | # file from your matrix.
 87 | def plda_estimate_state(data, k, threads=4, num_iterations=150, plda_folder=None):
 88 |     if plda_folder is None:
 89 |         plda_folder = PLDA_FOLDER
 90 |     data_mean = np.array(data.mean(0)).flatten()
 91 |     try:
 92 |         os.mkdir('plda')
 93 |     except:
 94 |         pass
 95 |     filename = os.path.join(os.getcwd(), 'plda', 'data.txt')
 96 |     create_plda_file(data, filename)
 97 |     print("Training PLDA")
 98 |     train_args = ("mpiexec", "-n", str(threads), os.path.join(plda_folder, "mpi_lda"),
 99 |                   "--num_topics", str(k), "--alpha", "0.1",
100 |                   "--beta", "0.01", "--training_data_file", filename,
101 |                   "--model_file", "model.txt", "--burn_in_iterations", "100", "--total_iterations", str(num_iterations))
102 |     subprocess.call(train_args) #, stdout=subprocess.PIPE)
103 | 
104 |     print("TRAINED")
105 | 
106 |     inference_args = (os.path.join(plda_folder, "infer"), "--alpha", "0.1", "--beta",
107 |                       "0.01", "--inference_data_file", filename, "--inference_result_file",
108 |                       "result.txt", "--model_file", "model.txt", "--total_iterations",
109 |                       "50", "--burn_in_iterations", "20")
110 |     subprocess.call(inference_args) #, stdout=subprocess.PIPE)
111 | 
112 |     M = parse_model_file("model.txt")
113 |     W = parse_result_file("result.txt")
114 |     M *= (data_mean / np.mean(M))
115 |     W = W/W.sum(axis=0, keepdims=1)
116 |     return M, W
117 | 
118 | 


--------------------------------------------------------------------------------
/uncurl/pois_ll.py:
--------------------------------------------------------------------------------
 1 | # Poisson log-likelihood
 2 | 
 3 | import numpy as np
 4 | from scipy import sparse
 5 | from scipy.special import xlogy, gammaln
 6 | 
 7 | from uncurl.sparse_utils import sparse_poisson_ll_csc
 8 | 
 9 | eps = 1e-10
10 | 
11 | def sparse_poisson_ll(data, means):
12 |     data = sparse.csc_matrix(data)
13 |     return sparse_poisson_ll_csc(
14 |             data.data,
15 |             data.indices,
16 |             data.indptr,
17 |             data.shape[0],
18 |             data.shape[1],
19 |             means,
20 |             eps)
21 | 
22 | def poisson_ll(data, means):
23 |     """
24 |     Calculates the Poisson log-likelihood.
25 | 
26 |     Args:
27 |         data (array): 2d numpy array of genes x cells
28 |         means (array): 2d numpy array of genes x k
29 | 
30 |     Returns:
31 |         cells x k array of log-likelihood for each cell/cluster pair
32 |     """
33 |     if sparse.issparse(data):
34 |         return sparse_poisson_ll(data, means)
35 |     genes, cells = data.shape
36 |     clusters = means.shape[1]
37 |     ll = np.zeros((cells, clusters))
38 |     for i in range(clusters):
39 |         means_i = np.tile(means[:,i], (cells, 1))
40 |         means_i = means_i.transpose() + eps
41 |         #ll[:,i] = np.sum(xlogy(data, means_i) - gammaln(data+1) - means_i, 0)
42 |         ll[:,i] = np.sum(xlogy(data, means_i) - means_i, 0)
43 |     return ll
44 | 
45 | def poisson_ll_2(p1, p2):
46 |     """
47 |     Calculates Poisson LL(p1|p2).
48 |     """
49 |     p1_1 = p1 + eps
50 |     p2_1 = p2 + eps
51 |     return np.sum(-p2_1 + p1_1*np.log(p2_1))
52 | 
53 | def poisson_dist(p1, p2):
54 |     """
55 |     Calculates the Poisson distance between two vectors.
56 | 
57 |     p1 can be a sparse matrix, while p2 has to be a dense matrix.
58 |     """
59 |     # ugh...
60 |     p1_ = p1 + eps
61 |     p2_ = p2 + eps
62 |     return np.dot(p1_-p2_, np.log(p1_/p2_))
63 | 
64 | 


--------------------------------------------------------------------------------
/uncurl/preprocessing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Misc functions...
  3 | """
  4 | 
  5 | import numpy as np
  6 | from scipy import sparse
  7 | 
  8 | from uncurl.sparse_utils import sparse_cell_normalize, sparse_means_var_csc
  9 | 
 10 | def sparse_mean_var(data):
 11 |     """
 12 |     Calculates the variance for each row of a sparse matrix,
 13 |     using the relationship Var = E[x^2] - E[x]^2.
 14 | 
 15 |     Returns:
 16 |         pair of matrices mean, variance.
 17 |     """
 18 |     data = sparse.csc_matrix(data)
 19 |     return sparse_means_var_csc(data.data,
 20 |             data.indices,
 21 |             data.indptr,
 22 |             data.shape[1],
 23 |             data.shape[0])
 24 | 
 25 | def max_variance_genes(data, nbins=5, frac=0.2):
 26 |     """
 27 |     This function identifies the genes that have the max variance
 28 |     across a number of bins sorted by mean.
 29 | 
 30 |     Args:
 31 |         data (array): genes x cells
 32 |         nbins (int): number of bins to sort genes by mean expression level. Default: 10.
 33 |         frac (float): fraction of genes to return per bin - between 0 and 1. Default: 0.1
 34 | 
 35 |     Returns:
 36 |         list of gene indices (list of ints)
 37 |     """
 38 |     # TODO: profile, make more efficient for large matrices
 39 |     # 8000 cells: 0.325 seconds
 40 |     # top time: sparse.csc_tocsr, csc_matvec, astype, copy, mul_scalar
 41 |     # 73233 cells: 5.347 seconds, 4.762 s in sparse_var
 42 |     # csc_tocsr: 1.736 s
 43 |     # copy: 1.028 s
 44 |     # astype: 0.999 s
 45 |     # there is almost certainly something superlinear in this method
 46 |     # maybe it's to_csr?
 47 |     indices = []
 48 |     if sparse.issparse(data):
 49 |         means, var = sparse_mean_var(data)
 50 |     else:
 51 |         means = data.mean(1)
 52 |         var = data.var(1)
 53 |     mean_indices = means.argsort()
 54 |     n_elements = int(data.shape[0]/nbins)
 55 |     frac_elements = int(n_elements*frac)
 56 |     for i in range(nbins):
 57 |         bin_i = mean_indices[i*n_elements : (i+1)*n_elements]
 58 |         if i==nbins-1:
 59 |             bin_i = mean_indices[i*n_elements :]
 60 |         var_i = var[bin_i]
 61 |         var_sorted = var_i.argsort()
 62 |         top_var_indices = var_sorted[len(bin_i) - frac_elements:]
 63 |         ind = bin_i[top_var_indices]
 64 |         # filter out genes with zero variance
 65 |         ind = [index for index in ind if var[index]>0]
 66 |         indices.extend(ind)
 67 |     return indices
 68 | 
 69 | def cell_normalize(data, multiply_means=True):
 70 |     """
 71 |     Returns the data where the expression is normalized so that the total
 72 |     count per cell is equal.
 73 | 
 74 |     If multiply_means is true, then the data will be multiplied to have the median UMI count for all cells.
 75 |     """
 76 |     if sparse.issparse(data):
 77 |         data = sparse.csc_matrix(data.astype(float))
 78 |         # normalize in-place
 79 |         sparse_cell_normalize(data.data,
 80 |                 data.indices,
 81 |                 data.indptr,
 82 |                 data.shape[1],
 83 |                 data.shape[0],
 84 |                 multiply_means)
 85 |         return data
 86 |     data_norm = data.astype(float)
 87 |     total_umis = []
 88 |     for i in range(data.shape[1]):
 89 |         di = data_norm[:,i]
 90 |         total_umis.append(di.sum())
 91 |         di /= total_umis[i]
 92 |     if multiply_means:
 93 |         med = np.median(total_umis)
 94 |         data_norm *= med
 95 |     return data_norm
 96 | 
 97 | def log1p(data):
 98 |     """
 99 |     Returns ln(data+1), whether the original data is dense or sparse.
100 |     """
101 |     if sparse.issparse(data):
102 |         return data.log1p()
103 |     else:
104 |         return np.log1p(data)
105 | 


--------------------------------------------------------------------------------
/uncurl/run_se.py:
--------------------------------------------------------------------------------
 1 | # state estimation with poisson convex mixture model
 2 | 
 3 | from .state_estimation import poisson_estimate_state
 4 | from .nb_state_estimation import nb_estimate_state
 5 | from .zip_state_estimation import zip_estimate_state
 6 | from .nmf_wrapper import log_norm_nmf, norm_nmf
 7 | 
 8 | import numpy as np
 9 | from scipy import sparse
10 | 
11 | def run_state_estimation(data, clusters, dist='Poiss', reps=1, **kwargs):
12 |     """
13 |     Runs state estimation for multiple initializations, returning the result with the highest log-likelihood. All the arguments are passed to the underlying state estimation functions (poisson_estimate_state, nb_estimate_state, zip_estimate_state).
14 | 
15 |     Args:
16 |         data (array): genes x cells
17 |         clusters (int): number of mixture components. If this is set to 0, this is automatically estimated using gap score.
18 |         dist (str, optional): Distribution used in state estimation. Options: 'Poiss', 'NB', 'ZIP', 'LogNorm', 'Gaussian'. Default: 'Poiss'
19 |         reps (int, optional): number of times to run the state estimation, taking the result with the highest log-likelihood.
20 |         **kwargs: arguments to pass to the underlying state estimation function.
21 | 
22 |     Returns:
23 |         M (array): genes x clusters - state means
24 |         W (array): clusters x cells - state mixing components for each cell
25 |         ll (float): final log-likelihood
26 |     """
27 |     clusters = int(clusters)
28 |     func = poisson_estimate_state
29 |     dist = dist.lower()
30 |     if dist=='poiss' or dist=='poisson':
31 |         pass
32 |     elif dist=='nb':
33 |         func = nb_estimate_state
34 |     elif dist=='zip':
35 |         func = zip_estimate_state
36 |     elif dist=='lognorm' or dist=='log-normal' or dist=='lognormal':
37 |         func = log_norm_nmf
38 |     elif dist=='gaussian' or dist=='norm' or dist=='normal':
39 |         func = norm_nmf
40 |     elif dist=='none':
41 |         func = run_baseline
42 |     else:
43 |         print('dist should be one of Poiss, NB, ZIP, LogNorm, or Gaussian. Using Poiss.')
44 |     # TODO: estimate number of clusters
45 |     if clusters == 0:
46 |         from .gap_score import run_gap_k_selection, preproc_data
47 |         data_tsvd = preproc_data(data, gene_subset=False)
48 |         max_k, gap_vals, sk_vals = run_gap_k_selection(data_tsvd,
49 |                 k_min=1, k_max=50, skip=5, B=6)
50 |         clusters = min(max_k, data.shape[0] - 1, data.shape[1] - 1)
51 |     best_ll = np.inf
52 |     best_M = None
53 |     best_W = None
54 |     for i in range(reps):
55 |         results = func(data, clusters, **kwargs)
56 |         M = results[0]
57 |         W = results[1]
58 |         if dist=='NB':
59 |             ll = results[3]
60 |         else:
61 |             ll = results[2]
62 |         if ll < best_ll:
63 |             best_ll = ll
64 |             best_M = M
65 |             best_W = W
66 |     return best_M, best_W, best_ll
67 | 
68 | 
69 | def run_baseline(data, clusters, **kwargs):
70 |     """
71 |     Run "baseline" tSVD + k-means
72 |     """
73 |     from .state_estimation import initialize_means_weights
74 |     m, w = initialize_means_weights(data, clusters, initialization='tsvd',
75 |             max_assign_weight=0.95, use_log_norm=False)
76 |     return m, w, 0
77 | 


--------------------------------------------------------------------------------
/uncurl/sampling.py:
--------------------------------------------------------------------------------
 1 | # downsampling count datasets (for comparisons)
 2 | 
 3 | import numpy as np
 4 | 
 5 | from scipy import sparse
 6 | 
 7 | def downsample(data, percent):
 8 |     """
 9 |     downsample the data by removing a given percentage of the reads.
10 | 
11 |     Args:
12 |         data: genes x cells array or sparse matrix
13 |         percent: float between 0 and 1
14 |     """
15 |     n_genes = data.shape[0]
16 |     n_cells = data.shape[1]
17 |     new_data = data.copy()
18 |     total_count = float(data.sum())
19 |     to_remove = total_count*percent
20 |     # sum of read counts per cell
21 |     cell_sums = data.sum(0).astype(float)
22 |     # probability of selecting genes per cell
23 |     cell_gene_probs = data/cell_sums
24 |     # probability of selecting cells
25 |     cell_probs = np.array(cell_sums/total_count).flatten()
26 |     cells_selected = np.random.multinomial(to_remove, pvals=cell_probs)
27 |     for i, num_selected in enumerate(cells_selected):
28 |         cell_gene = np.array(cell_gene_probs[:,i]).flatten()
29 |         genes_selected = np.random.multinomial(num_selected, pvals=cell_gene)
30 |         if sparse.issparse(data):
31 |             genes_selected = sparse.csc_matrix(genes_selected).T
32 |         new_data[:,i] -= genes_selected
33 |     new_data[new_data < 0] = 0
34 |     return new_data
35 | 


--------------------------------------------------------------------------------
/uncurl/vis.py:
--------------------------------------------------------------------------------
 1 | # basic functions for visualization of clustering, state estimation, lineage
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | from sklearn.decomposition import PCA
 5 | 
 6 | def visualize_poisson_w(w, labels, filename, method='pca', figsize=(18,10), title='', **scatter_options):
 7 |     """
 8 |     Saves a scatter plot of a visualization of W, the result from Poisson SE.
 9 |     """
10 |     if method == 'pca':
11 |         pca = PCA(2)
12 |         r_dim_red = pca.fit_transform(w.T).T
13 |     elif method == 'tsne':
14 |         pass
15 |     else:
16 |         print("Method is not available. use 'pca' (default) or 'tsne'.")
17 |         return
18 |     visualize_dim_red(r_dim_red, labels, filename, figsize, title, **scatter_options)
19 | 
20 | def visualize_dim_red(r, labels, filename=None, figsize=(18,10), title='', legend=True, label_map=None, label_scale=False, label_color_map=None, **scatter_options):
21 |     """
22 |     Saves a scatter plot of a (2,n) matrix r, where each column is a cell.
23 | 
24 |     Args:
25 |         r (array): (2,n) matrix
26 |         labels (array): (n,) array of ints/strings or floats. Can be None.
27 |         filename (string): string to save the output graph. If None, then this just displays the plot.
28 |         figsize (tuple): Default: (18, 10)
29 |         title (string): graph title
30 |         legend (bool): Default: True
31 |         label_map (dict): map of labels to label names. Default: None
32 |         label_scale (bool): True if labels is should be treated as floats. Default: False
33 |         label_color_map (array): (n,) array or list of colors for each label.
34 |     """
35 |     fig = plt.figure(figsize=figsize)
36 |     plt.cla()
37 |     if not label_scale:
38 |         for i in set(labels):
39 |             label = i
40 |             if label_map is not None:
41 |                 label = label_map[i]
42 |             if label_color_map is not None:
43 |                 c = label_color_map[i]
44 |                 plt.scatter(r[0, labels==i], r[1, labels==i], label=label, c=c, **scatter_options)
45 |             else:
46 |                 plt.scatter(r[0, labels==i], r[1, labels==i], label=label, **scatter_options)
47 |     else:
48 |         if labels is None:
49 |             plt.scatter(r[0,:], r[1,:], **scatter_options)
50 |         else:
51 |             plt.scatter(r[0,:], r[1,:], c=labels/labels.max(), **scatter_options)
52 |     plt.title(title)
53 |     if legend:
54 |         plt.legend()
55 |     if filename is not None:
56 |         plt.savefig(filename, dpi=100)
57 |         plt.close()
58 |     return fig
59 | 


--------------------------------------------------------------------------------
/uncurl/zip_clustering.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy import sparse
 3 | from scipy.optimize import minimize
 4 | 
 5 | from .clustering import kmeans_pp
 6 | from .zip_utils import zip_ll, zip_ll_row
 7 | 
 8 | eps = 1e-8
 9 | 
10 | 
11 | def zip_fit_params(data):
12 |     """
13 |     Returns the ZIP parameters that best fit a given data set.
14 | 
15 |     Args:
16 |         data (array): 2d array of genes x cells belonging to a given cluster
17 | 
18 |     Returns:
19 |         L (array): 1d array of means
20 |         M (array): 1d array of zero-inflation parameter
21 |     """
22 |     genes, cells = data.shape
23 |     m = data.mean(1)
24 |     v = data.var(1)
25 |     M = (v-m)/(m**2+v-m)
26 |     #M = v/(v+m**2)
27 |     #M[np.isnan(M)] = 0.0
28 |     M = np.array([min(1.0, max(0.0, x)) for x in M])
29 |     L = m + v/m - 1.0
30 |     #L = (v + m**2)/m
31 |     L[np.isnan(L)] = 0.0
32 |     L = np.array([max(0.0, x) for x in L])
33 |     return L, M
34 | 
35 | def zip_fit_params_mle(data):
36 |     genes, cells = data.shape
37 |     L, M = zip_fit_params(data)
38 |     for i in range(genes):
39 |         result = minimize(zip_ll_row, [L[i], M[i]], args=(data[i,:],),
40 |                 bounds=[(eps, None),(0,1)])
41 |         params = result.x
42 |         L[i] = params[0]
43 |         M[i] = params[1]
44 |     return L, M
45 | 
46 | def zip_cluster(data, k, init=None, max_iters=100):
47 |     """
48 |     Performs hard EM clustering using the zero-inflated Poisson distribution.
49 | 
50 |     Args:
51 |         data (array): A 2d array- genes x cells
52 |         k (int): Number of clusters
53 |         init (array, optional): Initial centers - genes x k array. Default: None, use kmeans++
54 |         max_iters (int, optional): Maximum number of iterations. Default: 100
55 | 
56 |     Returns:
57 |         assignments (array): integer assignments of cells to clusters (length cells)
58 |         L (array): Poisson parameter (genes x k)
59 |         M (array): zero-inflation parameter (genes x k)
60 |     """
61 |     genes, cells = data.shape
62 |     init, new_assignments = kmeans_pp(data+eps, k, centers=init)
63 |     centers = np.copy(init)
64 |     M = np.zeros(centers.shape)
65 |     assignments = new_assignments
66 |     for c in range(k):
67 |         centers[:,c], M[:,c] = zip_fit_params_mle(data[:, assignments==c])
68 |     for it in range(max_iters):
69 |         lls = zip_ll(data, centers, M)
70 |         new_assignments = np.argmax(lls, 1)
71 |         if np.equal(assignments, new_assignments).all():
72 |             return assignments, centers, M
73 |         for c in range(k):
74 |             centers[:,c], M[:,c] = zip_fit_params_mle(data[:, assignments==c])
75 |         assignments = new_assignments
76 |     return assignments, centers, M
77 | 
78 | 


--------------------------------------------------------------------------------
/uncurl/zip_state_estimation.py:
--------------------------------------------------------------------------------
  1 | # state estimation with Zero-Inflated Poisson model
  2 | # TODO
  3 | 
  4 | from .clustering import kmeans_pp
  5 | from .zip_clustering import zip_fit_params_mle
  6 | from .state_estimation import initialize_from_assignments
  7 | 
  8 | import numpy as np
  9 | from scipy.optimize import minimize
 10 | 
 11 | eps=1e-8
 12 | 
 13 | def _create_w_objective(m, X, Z=None):
 14 |     """
 15 |     Creates an objective function and its derivative for W, given M and X (data)
 16 | 
 17 |     Args:
 18 |         m (array): genes x clusters
 19 |         X (array): genes x cells
 20 |         Z (array): zero-inflation parameters - genes x 1
 21 |     """
 22 |     genes, clusters = m.shape
 23 |     cells = X.shape[1]
 24 |     nonzeros = (X!=0)
 25 |     def objective(w):
 26 |         # convert w into a matrix first... because it's a vector for
 27 |         # optimization purposes
 28 |         w = w.reshape((m.shape[1], X.shape[1]))
 29 |         d = m.dot(w)+eps
 30 |         # derivative of objective wrt all elements of w
 31 |         # for w_{ij}, the derivative is... m_j1+...+m_jn sum over genes minus 
 32 |         # x_ij
 33 |         temp = X/d
 34 |         m_sum = m.T.dot(nonzeros)
 35 |         m2 = m.T.dot(temp)
 36 |         deriv = m_sum - m2
 37 |         return np.sum(nonzeros*(d - X*np.log(d)))/genes, deriv.flatten()/genes
 38 |     return objective
 39 | 
 40 | def _create_m_objective(w, X, Z=None):
 41 |     """
 42 |     Creates an objective function and its derivative for M, given W and X
 43 | 
 44 |     Args:
 45 |         w (array): clusters x cells
 46 |         X (array): genes x cells
 47 |         Z (array): zero-inflation parameters - genes x 1
 48 |     """
 49 |     clusters, cells = w.shape
 50 |     genes = X.shape[0]
 51 |     nonzeros = (X!=0)
 52 |     def objective(m):
 53 |         m = m.reshape((X.shape[0], w.shape[0]))
 54 |         d = m.dot(w)+eps
 55 |         temp = nonzeros*(X/d)
 56 |         w_sum = w.dot(nonzeros.T)
 57 |         w2 = w.dot(temp.T)
 58 |         deriv = w_sum.T - w2.T
 59 |         return np.sum(nonzeros*(d - X*np.log(d)))/genes, deriv.flatten()/genes
 60 |     return objective
 61 | 
 62 | 
 63 | 
 64 | def zip_estimate_state(data, clusters, init_means=None, init_weights=None, max_iters=10, tol=1e-4, disp=True, inner_max_iters=400, normalize=True):
 65 |     """
 66 |     Uses a Zero-inflated Poisson Mixture model to estimate cell states and
 67 |     cell state mixing weights.
 68 | 
 69 |     Args:
 70 |         data (array): genes x cells
 71 |         clusters (int): number of mixture components
 72 |         init_means (array, optional): initial centers - genes x clusters. Default: kmeans++ initializations
 73 |         init_weights (array, optional): initial weights - clusters x cells. Default: random(0,1)
 74 |         max_iters (int, optional): maximum number of iterations. Default: 10
 75 |         tol (float, optional): if both M and W change by less than tol (in RMSE), then the iteration is stopped. Default: 1e-4
 76 |         disp (bool, optional): whether or not to display optimization parameters. Default: True
 77 |         inner_max_iters (int, optional): Number of iterations to run in the scipy minimizer for M and W. Default: 400
 78 |         normalize (bool, optional): True if the resulting W should sum to 1 for each cell. Default: True.
 79 | 
 80 |     Returns:
 81 |         M: genes x clusters - state centers
 82 |         W: clusters x cells - state mixing components for each cell
 83 |         ll: final log-likelihood
 84 |     """
 85 |     genes, cells = data.shape
 86 |     # TODO: estimate ZIP parameter?
 87 |     if init_means is None:
 88 |         means, assignments = kmeans_pp(data, clusters)
 89 |     else:
 90 |         means = init_means.copy()
 91 |     clusters = means.shape[1]
 92 |     w_init = np.random.random(cells*clusters)
 93 |     if init_weights is not None:
 94 |         if len(init_weights.shape)==1:
 95 |             init_weights = initialize_from_assignments(init_weights, clusters)
 96 |         w_init = init_weights.reshape(cells*clusters)
 97 |     m_init = means.reshape(genes*clusters)
 98 |     # using zero-inflated parameters...
 99 |     L, Z = zip_fit_params_mle(data)
100 |     # repeat steps 1 and 2 until convergence:
101 |     ll = np.inf
102 |     for i in range(max_iters):
103 |         if disp:
104 |             print('iter: {0}'.format(i))
105 |         w_bounds = [(0, 1.0) for x in w_init]
106 |         m_bounds = [(0, None) for x in m_init]
107 |         # step 1: given M, estimate W
108 |         w_objective = _create_w_objective(means, data, Z)
109 |         w_res = minimize(w_objective, w_init, method='L-BFGS-B', jac=True, bounds=w_bounds, options={'disp':disp, 'maxiter':inner_max_iters})
110 |         w_diff = np.sqrt(np.sum((w_res.x-w_init)**2))/w_init.size
111 |         w_new = w_res.x.reshape((clusters, cells))
112 |         w_init = w_res.x
113 |         # step 2: given W, update M
114 |         m_objective = _create_m_objective(w_new, data, Z)
115 |         # method could be 'L-BFGS-B' or 'SLSQP'... SLSQP gives a memory error...
116 |         # or use TNC...
117 |         m_res = minimize(m_objective, m_init, method='L-BFGS-B', jac=True, bounds=m_bounds, options={'disp':disp, 'maxiter':inner_max_iters})
118 |         ll = m_res.fun
119 |         m_diff = np.sqrt(np.sum((m_res.x-m_init)**2))/m_init.size
120 |         m_new = m_res.x.reshape((genes, clusters))
121 |         m_init = m_res.x
122 |         means = m_new
123 |         if w_diff < tol and m_diff < tol:
124 |             break
125 |     if normalize:
126 |         w_new = w_new/w_new.sum(0)
127 |     return m_new, w_new, ll
128 | 


--------------------------------------------------------------------------------
/uncurl/zip_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy import sparse
 3 | from scipy.stats import poisson
 4 | from scipy.special import xlogy, gammaln
 5 | 
 6 | eps = 1e-10
 7 | 
 8 | 
 9 | def zip_ll(data, means, M):
10 |     """
11 |     Calculates the zero-inflated Poisson log-likelihood.
12 | 
13 |     Args:
14 |         data (array): genes x cells
15 |         means (array): genes x k
16 |         M (array): genes x k - this is the zero-inflation parameter.
17 | 
18 |     Returns:
19 |         cells x k array of log-likelihood for each cell/cluster pair.
20 |     """
21 |     genes, cells = data.shape
22 |     clusters = means.shape[1]
23 |     ll = np.zeros((cells, clusters))
24 |     d0 = (data==0)
25 |     d1 = (data>0)
26 |     for i in range(clusters):
27 |         means_i = np.tile(means[:,i], (cells, 1))
28 |         means_i = means_i.transpose()
29 |         L_i = np.tile(M[:,i], (cells, 1))
30 |         L_i = L_i.transpose()
31 |         ll_0 = np.log(L_i + (1 - L_i)*np.exp(-means_i))
32 |         ll_0 = np.where((L_i==0) & (means_i==0), -means_i, ll_0)
33 |         # not including constant factors
34 |         ll_1 = np.log(1 - L_i) + xlogy(data, means_i) -  means_i
35 |         ll_0 = np.where(d0, ll_0, 0.0)
36 |         ll_1 = np.where(d1, ll_1, 0.0)
37 |         ll[:,i] = np.sum(ll_0 + ll_1, 0)
38 |     return ll
39 | 
40 | def zip_ll_row(params, data_row):
41 |     """
42 |     Returns the negative log-likelihood of a row given ZIP data.
43 | 
44 |     Args:
45 |         params (list): [lambda zero-inf]
46 |         data_row (array): 1d array
47 | 
48 |     Returns:
49 |         negative log-likelihood
50 |     """
51 |     l = params[0]
52 |     pi = params[1]
53 |     d0 = (data_row==0)
54 |     likelihood = d0*pi + (1-pi)*poisson.pmf(data_row, l)
55 |     return -np.log(likelihood+eps).sum()
56 | 
57 | 


--------------------------------------------------------------------------------