├── doc
    ├── source
    │   ├── _static
    │   │   └── dummy
    │   ├── sort.rst
    │   ├── util.rst
    │   ├── balances.rst
    │   ├── plot.rst
    │   ├── cluster.rst
    │   ├── regression.rst
    │   └── index.rst
    ├── Makefile
    └── README.md
├── gneiss
    ├── cluster
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── test_metadata.txt
    │   │   │   ├── tree.qza
    │   │   │   ├── polytomy.qza
    │   │   │   ├── weighted.biom.qza
    │   │   │   ├── test_gradient.biom.qza
    │   │   │   └── test_composition.biom.qza
    │   │   └── test_pba.py
    │   ├── __init__.py
    │   └── _pba.py
    ├── regression
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   ├── not-regression.pickle
    │   │   │   ├── lme.pickle
    │   │   │   ├── ols.pickle
    │   │   │   ├── lme_tree.qza
    │   │   │   ├── ols_tree.qza
    │   │   │   ├── test_tree.qza
    │   │   │   ├── lme_balances.qza
    │   │   │   ├── ols_balances.qza
    │   │   │   ├── test_lme_composition.qza
    │   │   │   ├── test_ols_composition.qza
    │   │   │   ├── lovo.csv
    │   │   │   ├── lovo2.csv
    │   │   │   ├── exp_ols_results2.txt
    │   │   │   ├── exp_lme_results2.txt
    │   │   │   ├── kfold2.csv
    │   │   │   ├── exp_lme_results.txt
    │   │   │   ├── exp_ols_results.txt
    │   │   │   ├── kfold.csv
    │   │   │   ├── loo.csv
    │   │   │   ├── loo2.csv
    │   │   │   ├── coefficients.csv
    │   │   │   └── pvalues.csv
    │   │   ├── test_mixedlm.py
    │   │   ├── test_model.py
    │   │   └── test_ols.py
    │   ├── __init__.py
    │   └── _model.py
    ├── __init__.py
    ├── composition
    │   ├── __init__.py
    │   ├── tests
    │   │   ├── test_variance.py
    │   │   └── test_composition.py
    │   ├── _variance.py
    │   └── _composition.py
    ├── plot
    │   ├── __init__.py
    │   ├── tests
    │   │   ├── data
    │   │   │   └── example.nwk
    │   │   ├── test_radial.py
    │   │   ├── test_regression_plot.py
    │   │   ├── test_dendrogram.py
    │   │   ├── test_heatmap.py
    │   │   └── test_decompose.py
    │   └── _radial.py
    ├── _model.py
    ├── tests
    │   ├── test_model.py
    │   ├── data
    │   │   └── large_tree.nwk
    │   ├── test_balances.py
    │   └── test_sort.py
    ├── sort.py
    └── balances.py
├── ci
    ├── conda_requirements.txt
    ├── pip_requirements.txt
    └── environment.yml
├── ipynb
    ├── images
    │   ├── Slide1.jpg
    │   ├── Slide2.jpg
    │   ├── Slide3.jpg
    │   └── Slide4.jpg
    ├── 88soils
    │   └── 238_otu_table.biom
    └── cfstudy
    │   ├── cfstudy_taxonomy.qza
    │   ├── cfstudy_common.biom.qza
    │   └── cfstudy-qiime2-tutorial.ipynb
├── MANIFEST.in
├── .coveragerc
├── Makefile
├── .gitignore
├── .travis.yml
├── .github
    └── workflows
    │   └── master.yml
├── COPYING.txt
├── README.md
├── setup.py
└── CHANGELOG.md


/doc/source/_static/dummy:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gneiss/cluster/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/doc/source/sort.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: gneiss.sort


--------------------------------------------------------------------------------
/doc/source/util.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: gneiss.util
2 | 


--------------------------------------------------------------------------------
/doc/source/balances.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: gneiss.balances
2 | 


--------------------------------------------------------------------------------
/doc/source/plot.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: gneiss.plot
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/ci/conda_requirements.txt:
--------------------------------------------------------------------------------
1 | pip
2 | biom-format
3 | bokeh=1.1.0
4 | 


--------------------------------------------------------------------------------
/doc/source/cluster.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: gneiss.cluster
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/not-regression.pickle:
--------------------------------------------------------------------------------
1 | asdfasdfasdff
2 | 


--------------------------------------------------------------------------------
/doc/source/regression.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: gneiss.regression
2 | 
3 | 
4 | 


--------------------------------------------------------------------------------
/ci/pip_requirements.txt:
--------------------------------------------------------------------------------
1 | coveralls
2 | sphinx
3 | pycodestyle
4 | flake8
5 | 


--------------------------------------------------------------------------------
/ipynb/images/Slide1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/images/Slide1.jpg


--------------------------------------------------------------------------------
/ipynb/images/Slide2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/images/Slide2.jpg


--------------------------------------------------------------------------------
/ipynb/images/Slide3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/images/Slide3.jpg


--------------------------------------------------------------------------------
/ipynb/images/Slide4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/images/Slide4.jpg


--------------------------------------------------------------------------------
/gneiss/cluster/tests/data/test_metadata.txt:
--------------------------------------------------------------------------------
1 | 	x	y
2 | s1	1	a
3 | s2	2	a
4 | s3	3	a
5 | s4	4	a
6 | s5	5	a
7 | 


--------------------------------------------------------------------------------
/gneiss/cluster/tests/data/tree.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/cluster/tests/data/tree.qza


--------------------------------------------------------------------------------
/ipynb/88soils/238_otu_table.biom:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/88soils/238_otu_table.biom


--------------------------------------------------------------------------------
/ipynb/cfstudy/cfstudy_taxonomy.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/cfstudy/cfstudy_taxonomy.qza


--------------------------------------------------------------------------------
/gneiss/cluster/tests/data/polytomy.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/cluster/tests/data/polytomy.qza


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/lme.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/lme.pickle


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/ols.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/ols.pickle


--------------------------------------------------------------------------------
/ipynb/cfstudy/cfstudy_common.biom.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/cfstudy/cfstudy_common.biom.qza


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/lme_tree.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/lme_tree.qza


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/ols_tree.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/ols_tree.qza


--------------------------------------------------------------------------------
/gneiss/cluster/tests/data/weighted.biom.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/cluster/tests/data/weighted.biom.qza


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/test_tree.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/test_tree.qza


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/lme_balances.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/lme_balances.qza


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/ols_balances.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/ols_balances.qza


--------------------------------------------------------------------------------
/gneiss/cluster/tests/data/test_gradient.biom.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/cluster/tests/data/test_gradient.biom.qza


--------------------------------------------------------------------------------
/gneiss/cluster/tests/data/test_composition.biom.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/cluster/tests/data/test_composition.biom.qza


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/test_lme_composition.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/test_lme_composition.qza


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/test_ols_composition.qza:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/test_ols_composition.qza


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/lovo.csv:
--------------------------------------------------------------------------------
1 | ,mse,Rsquared,R2diff
2 | Intercept,4310.47487689949,0.21981627865598752,0.0
3 | x1,4310.4748768994905,0.21981627865598752,0.0
4 | x2,4310.4748768994905,0.21981627865598752,0.0
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include .coveragerc
 2 | include CHANGELOG.md
 3 | include COPYING.txt
 4 | include Makefile
 5 | include README.md
 6 | 
 7 | graft gneiss
 8 | 
 9 | global-exclude *.pyc
10 | global-exclude *.pyo
11 | global-exclude .git
12 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/lovo2.csv:
--------------------------------------------------------------------------------
1 | ,mse,Rsquared,R2diff
2 | Intercept,4309.602746314058,0.9949205109438772,5.546379782983557e-05
3 | x1,4305.631922549035,0.994925191132991,5.078360871602072e-05
4 | x2,6910.497401174743,0.9918549810764683,0.0031209936652387693
5 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/exp_ols_results2.txt:
--------------------------------------------------------------------------------
 1 | Simplicial Least Squares Results
 2 | ============================
 3 | No. Observations      5.0000
 4 | Model:                OLS   
 5 | Rsquared:             0.4405
 6 | ----------------------------
 7 |      c    Intercept   real  
 8 | ----------------------------
 9 | Y1  slope  1.60E+00 6.00E-01
10 | Y1 pvalue  1.28E-01 8.05E-02
11 | ============================
12 | 


--------------------------------------------------------------------------------
/gneiss/__init__.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------
 2 | # Copyright (c) 2016--, gneiss development team.
 3 | #
 4 | # Distributed under the terms of the Modified BSD License.
 5 | #
 6 | # The full license is in the file COPYING.txt, distributed with this software.
 7 | # ----------------------------------------------------------------------------
 8 | 
 9 | __version__ = "0.4.6"
10 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | # this file is based on the examples provided on scikit-learn's .coveragerc
 2 | 
 3 | [run]
 4 | omit =
 5 |     */tests*
 6 |     */__init__.py
 7 |     */gneiss/layouts.py
 8 | source = gneiss
 9 | branch = True
10 | include = */gneiss/*
11 | 
12 | [report]
13 | exclude_lines =
14 |     pragma: no cover
15 |     raise NotImplementedError
16 |     if __name__ == .__main__.:
17 | omit =
18 |     */tests*
19 |     */__init__.py
20 |     */gneiss/layouts.py


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/exp_lme_results2.txt:
--------------------------------------------------------------------------------
 1 |        Simplicial Mixed Linear Model Results
 2 | ====================================================
 3 | No. Observations 1600.0000 Model: Simplicial MixedLM
 4 | -----------------------------------------------------
 5 |              Intercept  groups RE     x1        x2   
 6 | -----------------------------------------------------
 7 | Y1    slope   4.21E+00   9.36E-02  1.02E+00  9.25E-01
 8 | Y1   pvalue  4.83E-236   4.42E-05  3.97E-35  3.57E-30
 9 | ====================================================
10 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. gneiss documentation master file, created by
 2 |    sphinx-quickstart on Sat Nov 26 16:35:10 2016.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to gneiss's documentation!
 7 | ==================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 1
13 | 
14 |    balances
15 |    regression
16 |    cluster
17 |    plot
18 |    sort
19 |    util
20 | 
21 | 
22 | 
23 | Indices and tables
24 | ==================
25 | 
26 | * :ref:`genindex`
27 | * :ref:`modindex`
28 | * :ref:`search`
29 | 
30 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .DEFAULT_GOAL := help
 2 | 
 3 | ifeq ($(WITH_COVERAGE), TRUE)
 4 | 	TEST_COMMAND = COVERAGE_FILE=.coverage coverage run --rcfile .coveragerc setup.py nosetests --with-doctest
 5 | else
 6 | 	TEST_COMMAND = nosetests --with-doctest
 7 | endif
 8 | 
 9 | help:
10 | 	@echo 'Use "make test" to run all the unit tests and docstring tests.'
11 | 	@echo 'Use "make pep8" to validate PEP8 compliance.'
12 | 	@echo 'Use "make html" to create html documentation with sphinx'
13 | 	@echo 'Use "make all" to run all the targets listed above.'
14 | test:
15 | 	$(TEST_COMMAND)
16 | pep8:
17 | 	flake8 gneiss setup.py --ignore E303,E731,E722
18 | 
19 | all: pep8 test
20 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/kfold2.csv:
--------------------------------------------------------------------------------
 1 | ,model_mse,Rsquared,pred_mse
 2 | fold_0,3584.5769364882744,0.9916720125376085,663.109474872684
 3 | fold_1,3610.186079514632,0.9928445881359773,428.0519957464969
 4 | fold_2,3768.107015449521,0.9930668932082859,197.3834417792511
 5 | fold_3,3680.381544872904,0.9936245114219088,279.11469601532144
 6 | fold_4,3021.72858660822,0.9948952737882077,960.8363758112072
 7 | fold_5,3533.664042171059,0.9939605493886041,445.10100399311614
 8 | fold_6,3390.1041044617687,0.993817702859089,581.6366207655954
 9 | fold_7,3409.878710170101,0.9930604339509851,588.5085391323519
10 | fold_8,3369.763753775731,0.9915125403479148,709.6491871580773
11 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/exp_lme_results.txt:
--------------------------------------------------------------------------------
 1 |        Simplicial Mixed Linear Model Results
 2 | ====================================================
 3 | No. Observations 1600.0000 Model: Simplicial MixedLM
 4 | -----------------------------------------------------
 5 |              Intercept  groups RE     x1        x2   
 6 | -----------------------------------------------------
 7 | Y1    slope   4.21E+00   9.36E-02  1.02E+00  9.25E-01
 8 | Y1   pvalue  4.83E-236   4.42E-05  3.97E-35  3.57E-30
 9 | Y2    slope   2.12E-01   9.36E-02  1.02E+00  9.25E-01
10 | Y2   pvalue   9.94E-02   4.42E-05  3.97E-35  3.57E-30
11 | ====================================================
12 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/exp_ols_results.txt:
--------------------------------------------------------------------------------
 1 |  Simplicial Least Squares Results
 2 | ==================================
 3 | No. Observations            5.0000
 4 | Model:                      OLS   
 5 | Rsquared:                   0.4405
 6 | ----------------------------------
 7 |             mse   Rsquared  R2diff
 8 | ----------------------------------
 9 | Intercept  2.1409 0.8916   -0.4511
10 | real       2.1000 0.0000   0.4405 
11 | ----------------------------------
12 |        model_mse Rsquared pred_mse
13 | ----------------------------------
14 | fold_0 0.0000    1.0000   12.5000 
15 | fold_1 0.0000    1.0000   14.5000 
16 | ==================================
17 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/kfold.csv:
--------------------------------------------------------------------------------
 1 | ,model_mse,Rsquared,pred_mse
 2 | fold_0,3649.961032819785,0.13217034860627497,483.0402110305879
 3 | fold_1,3691.4006801996566,0.2095347670376564,403.8071396736002
 4 | fold_2,3830.37833521066,0.15015827888995004,206.67020202108148
 5 | fold_3,3729.7569631036204,0.1728096904238252,302.4686128376491
 6 | fold_4,3135.1878975392115,0.21496560314322954,897.5468948642697
 7 | fold_5,3587.243280769308,0.20003652741163214,453.8238419838613
 8 | fold_6,3445.455291051452,0.17658737964869753,599.173109675699
 9 | fold_7,3471.7837469746355,0.16790161609540577,604.6536872291142
10 | fold_8,3485.6635258096953,0.12600459767177707,552.1337252734353
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Temporary files
 2 | *~
 3 | \#*#
 4 | 
 5 | *.py[cod]
 6 | 
 7 | # C extensions
 8 | *.so
 9 | 
10 | # Packages
11 | *.egg
12 | *.egg-info
13 | dist
14 | build
15 | eggs
16 | parts
17 | bin
18 | var
19 | sdist
20 | develop-eggs
21 | .installed.cfg
22 | lib
23 | lib64
24 | __pycache__
25 | 
26 | # Installer logs
27 | pip-log.txt
28 | 
29 | # Unit test / coverage reports
30 | .coverage
31 | .tox
32 | nosetests.xml
33 | 
34 | # Translations
35 | *.mo
36 | 
37 | # Mr Developer
38 | .mr.developer.cfg
39 | .project
40 | .pydevproject
41 | 
42 | # vi
43 | .*.swp
44 | 
45 | # Sphinx builds
46 | doc/source/generated
47 | 
48 | # OSX files
49 | .DS_Store
50 | 
51 | # IPythnon checkpoints
52 | .ipynb_checkpoints


--------------------------------------------------------------------------------
/gneiss/composition/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Composition functions (:mod:`gneiss.composition`)
 3 | ===============================================
 4 | 
 5 | .. currentmodule:: gneiss.composition
 6 | 
 7 | This module contains compositional functions
 8 | 
 9 | Functions
10 | ---------
11 | 
12 | .. autosummary::
13 |    :toctree: generated/
14 | 
15 |    variation_matrix
16 | 
17 | """
18 | # ----------------------------------------------------------------------------
19 | # Copyright (c) 2016--, gneiss development team.
20 | #
21 | # Distributed under the terms of the Modified BSD License.
22 | #
23 | # The full license is in the file COPYING.txt, distributed with this software.
24 | # ----------------------------------------------------------------------------
25 | from ._composition import ilr_transform
26 | from ._variance import variation_matrix
27 | 
28 | 
29 | __all__ = ["ilr_transform", "variation_matrix"]
30 | 


--------------------------------------------------------------------------------
/gneiss/plot/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Plotting functions (:mod:`gneiss.plot`)
 3 | ===============================================
 4 | 
 5 | .. currentmodule:: gneiss.plot
 6 | 
 7 | This module contains plotting functionality
 8 | 
 9 | Functions
10 | ---------
11 | 
12 | .. autosummary::
13 |    :toctree: generated/
14 | 
15 |    heatmap
16 |    radialplot
17 |    balance_boxplot
18 |    balance_barplots
19 | """
20 | # ----------------------------------------------------------------------------
21 | # Copyright (c) 2016--, gneiss development team.
22 | #
23 | # Distributed under the terms of the Modified BSD License.
24 | #
25 | # The full license is in the file COPYING.txt, distributed with this software.
26 | # ----------------------------------------------------------------------------
27 | 
28 | from ._heatmap import heatmap
29 | from ._radial import radialplot
30 | from ._decompose import balance_boxplot, balance_barplots, proportion_plot
31 | 
32 | 
33 | __all__ = ["heatmap", "radialplot", "balance_boxplot",
34 |            "balance_barplots", "proportion_plot"]
35 | 


--------------------------------------------------------------------------------
/gneiss/regression/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Regression functions (:mod:`gneiss.regression`)
 3 | ===============================================
 4 | 
 5 | .. currentmodule:: gneiss.regression
 6 | 
 7 | This module contains functions that can convert proportions
 8 | to balances for regression analysis
 9 | 
10 | Functions
11 | ---------
12 | 
13 | .. autosummary::
14 |    :toctree: generated/
15 | 
16 |    ols
17 |    mixedlm
18 | 
19 | Classes
20 | -------
21 | .. autosummary::
22 |    :toctree: generated/
23 | 
24 |    OLSModel
25 |    LMEModel
26 | 
27 | """
28 | # ----------------------------------------------------------------------------
29 | # Copyright (c) 2016--, gneiss development team.
30 | #
31 | # Distributed under the terms of the Modified BSD License.
32 | #
33 | # The full license is in the file COPYING.txt, distributed with this software.
34 | # ----------------------------------------------------------------------------
35 | from ._ols import ols, OLSModel
36 | from ._mixedlm import mixedlm, LMEModel
37 | 
38 | 
39 | __all__ = ["ols", "OLSModel", "mixedlm", "LMEModel"]
40 | 


--------------------------------------------------------------------------------
/gneiss/cluster/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Clustering functions (:mod:`gneiss.cluster`)
 3 | ============================================
 4 | 
 5 | .. currentmodule:: gneiss.cluster
 6 | 
 7 | This module contains functions to build hierarchical clusterings.
 8 | 
 9 | 
10 | Functions
11 | ---------
12 | 
13 | .. autosummary::
14 |    :toctree: generated/
15 | 
16 |        correlation_linkage
17 |        gradient_linkage
18 |        rank_linkage
19 |        random_linkage
20 | 
21 | """
22 | # ----------------------------------------------------------------------------
23 | # Copyright (c) 2016--, gneiss development team.
24 | #
25 | # Distributed under the terms of the Modified BSD License.
26 | #
27 | # The full license is in the file COPYING.txt, distributed with this software.
28 | # ----------------------------------------------------------------------------
29 | from ._pba import (correlation_linkage, gradient_linkage,
30 |                    rank_linkage, random_linkage)
31 | 
32 | 
33 | __all__ = ['correlation_linkage', 'gradient_linkage',
34 |            'rank_linkage', 'random_linkage']
35 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Travis yml file inspired by scikit-bio
 2 | # Check on http://lint.travis-ci.org/ after modifying it!
 3 | sudo: false
 4 | language: python
 5 | env:
 6 |   - PYVERSION=3.6  USE_CYTHON=TRUE MAKE_DOC=TRUE
 7 | before_install:
 8 |   - export MPLBACKEND='Agg'
 9 |   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
10 |   - chmod +x miniconda.sh
11 |   - ./miniconda.sh -b
12 |   - export PATH=/home/travis/miniconda3/bin:$PATH
13 |   # Update conda itself
14 |   - conda update --yes conda
15 |   # Useful for debugging any issues with conda
16 |   - conda info -a
17 | install:
18 |   - conda create --yes -n test_env python=$PYVERSION
19 |   - conda install --yes -n test_env --file ci/conda_requirements.txt -c biocore
20 |   - conda install --yes -n test_env cython
21 |   - source activate test_env
22 |   - pip install -r ci/pip_requirements.txt
23 |   - pip install -e .
24 | script:
25 |   - WITH_COVERAGE=TRUE make all
26 |   - if [ ${MAKE_DOC} ]; then make -C doc clean html; fi
27 | after_success:
28 |   - coveralls
29 | notifications:
30 |   webhooks:
31 |     on_success: change
32 |     on_failure: always
33 | 


--------------------------------------------------------------------------------
/.github/workflows/master.yml:
--------------------------------------------------------------------------------
 1 | # much of this is taken from the Empress main.yml file
 2 | name: gneiss CI
 3 | 
 4 | on:
 5 |   pull_request:
 6 |     branches:
 7 |       - master
 8 |   push:
 9 |     branches:
10 |       - master
11 | 
12 | jobs:
13 |   build:
14 |     runs-on: ubuntu-latest
15 | 
16 |     strategy:
17 |       matrix:
18 |         python-version: ["3.8", "3.9"]
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v2
22 |         with:
23 |           persist-credentials: false
24 |           fetch-depth: 0
25 | 
26 |       - uses: conda-incubator/setup-miniconda@v2
27 |         with:
28 |           activate-environment: gneiss
29 |           python-version: ${{ matrix.python-version }}
30 | 
31 |       - name: Test conda installation
32 |         shell: bash -l {0}
33 |         run: conda info
34 | 
35 |       - name: Install conda packages
36 |         shell: bash -l {0}
37 |         run: conda install -c conda-forge statsmodels scikit-bio biom-format matplotlib flake8
38 | 
39 |       - name: Install gneiss
40 |         shell: bash -l {0}
41 |         run: pip install -e .[dev]
42 | 
43 |       - name: Run tests
44 |         shell: bash -l {0}
45 |         run: make all
46 | 


--------------------------------------------------------------------------------
/gneiss/plot/tests/data/example.nwk:
--------------------------------------------------------------------------------
1 | ((((y15:0.200853,(y31:0.42924,(y47:0.914445,y48:0.837693)y32:0.266535)y16:0.327741)y7:0.743731,((y33:0.0653228,y34:0.0404005)y17:0.0864676,((y49:0.26801,(y67:0.340285,(y85:0.135346,y86:0.55117)y68:0.291215)y50:0.332233)y35:0.370523,((y69:0.38023,y70:0.991233)y51:0.647156,(y71:0.615186,y72:0.781904)y52:0.168594)y36:0.732766)y18:0.663758)y8:0.404288)y3:0.591153,((y19:0.565967,y20:0.952246)y9:0.539617,(y21:0.459132,y22:0.269279)y10:0.86029)y4:0.102227)y1:0.569204,(((y23:0.38369,y24:0.856949)y11:0.939149,(y25:0.518678,(y37:0.569999,(y53:0.414425,(y73:0.458147,y74:0.027975)y54:0.00158475)y38:0.199839)y26:0.561358)y12:0.381204)y5:0.472245,(((y39:0.861009,(y55:0.0324591,(y75:0.01456,y76:0.755587)y56:0.94357)y40:0.798439)y27:0.527629,((y57:0.344423,y58:0.0695154)y41:0.230867,(y59:0.656657,(y77:0.473771,y78:0.0236346)y60:0.648203)y42:0.218781)y28:0.763701)y13:0.432767,((y43:0.258421,(y61:0.81704,y62:0.0208181)y44:0.253458)y29:0.41618,((y63:0.445669,(y79:0.223196,(y87:0.659824,y88:0.426299)y80:0.648506)y64:0.506309)y45:0.12089,((y81:0.875534,(y89:0.743842,y90:0.416172)y82:0.306387)y65:0.507717,((y91:0.590584,y92:0.21759)y83:0.846197,(y93:0.377969,(y95:0.591409,(y97:0.0172002,y98:0.612128)y96:0.492351)y94:0.346931)y84:0.505284)y66:0.910185)y46:0.332695)y30:0.91627)y14:0.76228)y6:0.379615)y2:0.802265)y0;
2 | 


--------------------------------------------------------------------------------
/COPYING.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016--, gneiss development team.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice, this
11 |   list of conditions and the following disclaimer in the documentation and/or
12 |   other materials provided with the distribution.
13 | 
14 | * Neither the names scikit-bio, skbio, or biocore nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/gneiss/composition/tests/test_variance.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------
 2 | # Copyright (c) 2016--, gneiss development team.
 3 | #
 4 | # Distributed under the terms of the Modified BSD License.
 5 | #
 6 | # The full license is in the file COPYING.txt, distributed with this software.
 7 | # ----------------------------------------------------------------------------
 8 | import numpy as np
 9 | import pandas as pd
10 | import unittest
11 | from skbio import DistanceMatrix
12 | from skbio.util import get_data_path
13 | from gneiss.composition._variance import variation_matrix
14 | 
15 | 
16 | class TestVariationMatrix(unittest.TestCase):
17 |     def setUp(self):
18 |         pass
19 | 
20 |     def test_varmat1(self):
21 |         X = pd.DataFrame({'x': np.arange(1, 10),
22 |                           'y': np.arange(2, 11)})
23 |         res = variation_matrix(X)
24 |         exp = DistanceMatrix([[0, 0.032013010420979787 / 2],
25 |                               [0.032013010420979787 / 2, 0]], ids=['x', 'y'])
26 |         self.assertEqual(str(res), str(exp))
27 | 
28 |     def test_varmat_larg(self):
29 |         np.random.seed(123)
30 |         D = 50
31 |         N = 100
32 |         mean = np.ones(D) * 10
33 |         cov = np.eye(D)
34 |         n__ = np.random.multivariate_normal(mean, cov, size=N)
35 |         X = pd.DataFrame(np.abs(n__), columns=np.arange(D).astype(np.str))
36 |         res = variation_matrix(X)
37 | 
38 |         exp = DistanceMatrix.read(get_data_path('exp_varmat.txt'))
39 |         self.assertEqual(str(res), str(exp))
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     unittest.main()
44 | 


--------------------------------------------------------------------------------
/gneiss/composition/tests/test_composition.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------
 2 | # Copyright (c) 2016--, gneiss development team.
 3 | #
 4 | # Distributed under the terms of the Modified BSD License.
 5 | #
 6 | # The full license is in the file COPYING.txt, distributed with this software.
 7 | # ----------------------------------------------------------------------------
 8 | import unittest
 9 | import numpy as np
10 | import pandas as pd
11 | from gneiss.composition._composition import ilr_transform
12 | from gneiss.cluster import gradient_linkage
13 | import pandas.util.testing as pdt
14 | 
15 | 
16 | class TestILRTransform(unittest.TestCase):
17 | 
18 |     def test_ilr(self):
19 |         np.random.seed(0)
20 |         table = pd.DataFrame([[1, 1, 2, 2],
21 |                               [1, 2, 2, 1],
22 |                               [2, 2, 1, 1]],
23 |                              index=[1, 2, 3],
24 |                              columns=['a', 'b', 'c', 'd'])
25 |         table = table.reindex(columns=np.random.permutation(table.columns))
26 |         ph = pd.Series([1, 2, 3], index=table.index)
27 |         tree = gradient_linkage(table, ph)
28 |         res_balances = ilr_transform(table, tree)
29 |         exp_balances = pd.DataFrame(
30 |             [[0.693147, -5.551115e-17, 2.775558e-17],
31 |              [0.000000, -4.901291e-01, -4.901291e-01],
32 |              [-0.693147, 5.551115e-17, -2.775558e-17]],
33 |             columns=['y0', 'y1', 'y2'],
34 |             index=[1, 2, 3])
35 |         pdt.assert_frame_equal(res_balances, exp_balances)
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     unittest.main()
40 | 


--------------------------------------------------------------------------------
/gneiss/_model.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------
 2 | # Copyright (c) 2016--, gneiss development team.
 3 | #
 4 | # Distributed under the terms of the Modified BSD License.
 5 | #
 6 | # The full license is in the file COPYING.txt, distributed with this software.
 7 | # ----------------------------------------------------------------------------
 8 | import abc
 9 | import numpy as np
10 | 
11 | 
12 | class Model(metaclass=abc.ABCMeta):
13 | 
14 |     def __init__(self, Y, Xs):
15 |         """
16 |         Abstract container for balance models.
17 | 
18 |         Parameters
19 |         ----------
20 |         Y : pd.DataFrame
21 |             Response matrix.  This is the matrix being predicted.
22 |             Also known as the dependent variable in univariate analysis.
23 |         Xs : iterable of pd.DataFrame
24 |             Design matrices.  Also known as the independent variable
25 |             in univariate analysis. Note that this allows for multiple
26 |             design matrices to be inputted to enable multiple data block
27 |             analysis.
28 |         """
29 |         self.response_matrix = Y
30 |         self.design_matrices = Xs
31 | 
32 |     @abc.abstractmethod
33 |     def fit(self, **kwargs):
34 |         pass
35 | 
36 |     @abc.abstractmethod
37 |     def summary(self):
38 |         """ Print summary results """
39 |         pass
40 | 
41 |     def percent_explained(self):
42 |         """ Proportion explained by each principal balance."""
43 |         # Using sum of squares error calculation (df=1)
44 |         # instead of population variance (df=0).
45 |         axis_vars = np.var(self.response_matrix, ddof=1, axis=0)
46 |         return axis_vars / axis_vars.sum()
47 | 


--------------------------------------------------------------------------------
/gneiss/composition/_variance.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------
 2 | # Copyright (c) 2016--, gneiss development team.
 3 | #
 4 | # Distributed under the terms of the Modified BSD License.
 5 | #
 6 | # The full license is in the file COPYING.txt, distributed with this software.
 7 | # ----------------------------------------------------------------------------
 8 | import numpy as np
 9 | from skbio import DistanceMatrix
10 | from skbio.stats.composition import closure
11 | 
12 | 
13 | def variation_matrix(X):
14 |     r""" Calculate Aitchison variation matrix.
15 | 
16 |     This calculates the Aitchison variation matrix.  Given a compositional
17 |     matrix :math:`X`, and columns :math:`i` and :math:`j`, the :math:`ij` entry
18 |     in the variation matrix of :math:`X` is given by
19 | 
20 |     .. math:
21 |         V_{ij} = \frac{1}{2} var(\ln \frac{x_i}{x_j})
22 | 
23 |     Parameters
24 |     ----------
25 |     X : pd.DataFrame
26 |         Contingency table where there are n rows corresponding to samples
27 |         and p features corresponding to columns.
28 | 
29 |     Returns
30 |     -------
31 |     skbio.DistanceMatrix
32 |         Total variation matrix of size n x n.
33 | 
34 |     References
35 |     ----------
36 |     .. [1] V. Pawlowsky-Glahn, J. J. Egozcue, R. Tolosana-Delgado (2015),
37 |        Modeling and Analysis of Compositional Data, Wiley, Chichester, UK
38 | 
39 |     .. [2] J. J. Egozcue, V. Pawlowsky-Glahn (2004), Groups of Parts and
40 |        Their Balances in Compositional Data Analysis, Mathematical Geology
41 |     """
42 |     v = np.zeros((X.shape[1], X.shape[1]))
43 |     x = closure(X)
44 |     for i in range(X.shape[1]):
45 |         for j in range(i):
46 |             v[i, j] = np.var(np.log(x[:, i]) - np.log(x[:, j]))
47 |     # Making matrix symmetry since V(ln (x/y) ) = V(ln (y/x) )
48 |     # Also dividing by 2, to ensure unit norm for balances.
49 |     # See Eqn 4 in [2]
50 |     return DistanceMatrix((v + v.T) / 2, ids=X.columns)
51 | 


--------------------------------------------------------------------------------
/gneiss/composition/_composition.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------
 2 | # Copyright (c) 2016--, gneiss development team.
 3 | #
 4 | # Distributed under the terms of the Modified BSD License.
 5 | #
 6 | # The full license is in the file COPYING.txt, distributed with this software.
 7 | # ----------------------------------------------------------------------------
 8 | import pandas as pd
 9 | import skbio
10 | from skbio.stats.composition import ilr
11 | from gneiss.balances import balance_basis
12 | from gneiss.util import match_tips
13 | 
14 | 
15 | def ilr_transform(table: pd.DataFrame, tree: skbio.TreeNode) -> pd.DataFrame:
16 |     """Performs isometric logratio (ilr) transformation on feature-table.
17 | 
18 |     This creates a new table with balances (groups of features) that
19 |     distinguish samples. Zeros must first be removed from the table
20 |     (e.g. add-pseudocount). For source documentation check out:
21 |     https://numpydoc.readthedocs.io/en/latest/
22 | 
23 |     Parameters
24 |     -----------
25 |     table : pd.DataFrame
26 |         Dataframe of the feature table where rows correspond to samples
27 |         and columns are features. The values within the table must be
28 |         positive and nonzero.
29 |     tree : skbio.TreeNode
30 |         A tree relating all of the features to balances or
31 |         log-contrasts (hierarchy). This tree must be bifurcating
32 |         (i.e. has exactly 2 nodes). The internal nodes of the tree
33 |          will be renamed.
34 | 
35 |     Returns
36 |     --------
37 |     balances : pd.DataFrame
38 |          Balances calculated from the feature table. Balance represents
39 |          the log ratio of subchildren values below the specified internal node.
40 |     """
41 |     _table, _tree = match_tips(table, tree)
42 |     basis, _ = balance_basis(_tree)
43 |     balances = ilr(_table.values, basis)
44 |     in_nodes = [n.name for n in _tree.levelorder() if not n.is_tip()]
45 |     return pd.DataFrame(balances,
46 |                         columns=in_nodes,
47 |                         index=table.index)
48 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/loo.csv:
--------------------------------------------------------------------------------
 1 | ,model_mse,pred_mse
 2 | 0,4260.496054701282,54.18622304652599
 3 | 1,4249.729540651938,65.5315803644453
 4 | 2,4308.381781538387,2.2473041294562393
 5 | 3,4171.3649026828525,148.68431600602923
 6 | 4,4229.0766673909075,86.62702948774607
 7 | 5,4116.071957191982,206.04849363338943
 8 | 6,4291.356272273773,20.185811261941183
 9 | 7,4247.848387235389,65.88182699812373
10 | 8,4280.951200463445,30.952001700594646
11 | 9,4308.791284413965,1.7593744234763966
12 | 10,4296.811286205901,14.235755262856841
13 | 11,4266.038114539728,46.16820874785575
14 | 12,4294.513406076312,16.54048715628344
15 | 13,4198.80916007727,115.44075421081786
16 | 14,4299.933519918486,10.873972613900444
17 | 15,4308.962558527042,1.5569522496643602
18 | 16,4146.048139786893,168.97881959977258
19 | 17,4271.59957874892,39.888553605911454
20 | 18,4283.214605760981,27.93225479528788
21 | 19,4240.267113131621,71.85358170168348
22 | 20,3865.3123050881723,455.1507540244841
23 | 21,4298.985490333344,11.737955025509407
24 | 22,4212.7032680853445,99.82809146668683
25 | 23,4195.950083899159,116.88780700999078
26 | 24,4072.6177711303317,242.71727889272387
27 | 25,4159.708341918885,153.8471722349595
28 | 26,4270.2705100334715,41.033911977299155
29 | 27,4287.618745208718,23.3368769592242
30 | 28,4189.413285610791,123.68071225408006
31 | 29,4201.68539487846,111.23040863550231
32 | 30,4225.484906018272,86.9823149007921
33 | 31,4271.267305724941,40.17406365429842
34 | 32,4271.840582007024,39.64126877885141
35 | 33,3977.404975585566,342.2907962328493
36 | 34,4223.808909462919,89.22378741677774
37 | 35,4162.58769041999,152.55352974220145
38 | 36,4162.168549226462,153.32005926284594
39 | 37,4194.701126330643,119.97354476493928
40 | 38,4211.112602919985,103.23385330388982
41 | 39,4283.057863114158,28.56510466677602
42 | 40,4251.739438646766,61.37924034589412
43 | 41,4211.337569965258,103.93346843059663
44 | 42,4213.677952181935,101.82844803208135
45 | 43,4004.6251255020197,322.9223825214111
46 | 44,4309.593365853613,0.9343173620906449
47 | 45,4307.59891842044,3.0607029500315304
48 | 46,4265.732051685689,47.82228161965757
49 | 47,4136.159353565893,187.15821681904137
50 | 48,4238.3420772679465,77.81628431034508
51 | 49,4302.189190917612,8.983205465045119
52 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/loo2.csv:
--------------------------------------------------------------------------------
 1 | ,model_mse,pred_mse
 2 | 0,4210.254252226571,62.7244814276346
 3 | 1,4211.637544078904,59.28731330907396
 4 | 2,4261.16964292544,1.5632108577163226
 5 | 3,4120.227706392463,158.4176551220517
 6 | 4,4194.81383810487,74.12893770610398
 7 | 5,4062.59080988408,215.8389185989188
 8 | 6,4249.074649016179,14.3801085059596
 9 | 7,4201.031230679247,65.0913132126485
10 | 8,4230.052330598066,34.14975753720326
11 | 9,4260.674247388393,1.956655442219737
12 | 10,4248.808610693286,14.312123274205462
13 | 11,4217.133108158621,47.20095190398427
14 | 12,4247.62638198413,15.485331316173468
15 | 13,4158.5575171719565,107.87520269124951
16 | 14,4248.736427270089,14.326680157044192
17 | 15,4262.180211177333,0.37917285947764945
18 | 16,4102.386908392766,166.41818121291553
19 | 17,4219.178090822697,45.11498285380986
20 | 18,4242.171042180773,21.222511951309993
21 | 19,4195.86950494634,69.54045197741316
22 | 20,3817.4178283650426,464.80939115822355
23 | 21,4243.375657832784,20.038443946057576
24 | 22,4139.460083408845,128.7706636543021
25 | 23,4167.699582287961,99.28472513339455
26 | 24,3993.4915035994995,281.73012807025697
27 | 25,4129.178914240647,139.64996281723694
28 | 26,4232.6815844335015,31.26152893262742
29 | 27,4248.3445753384085,14.856826916275455
30 | 28,4130.719920325524,137.79891120579325
31 | 29,4139.821025182945,128.15082264058097
32 | 30,4188.892773390983,76.81691339553873
33 | 31,4210.650307206859,54.0553002064334
34 | 32,4224.565303799641,39.51067743285984
35 | 33,3950.9211117494106,323.80385337396444
36 | 34,4165.780677853521,100.45174573187475
37 | 35,4101.517281439408,167.06442738984504
38 | 36,4104.628998644643,163.81966431619293
39 | 37,4145.145229741029,121.85610388044178
40 | 38,4165.6365980300525,100.72568307909934
41 | 39,4235.22250796671,28.467179798193584
42 | 40,4201.176969556616,64.170451061669
43 | 41,4163.859850749393,103.71697665477915
44 | 42,4158.274885579024,110.33395650500965
45 | 43,3970.6676233926455,311.57982208008514
46 | 44,4259.023597799898,3.801644051465528
47 | 45,4261.765412215231,0.8537315788551669
48 | 46,4196.769182277087,73.21731835996582
49 | 47,4119.239750993762,162.82391090276036
50 | 48,4153.57187529307,126.91052991408179
51 | 49,4249.5788963444775,15.553691413241404
52 | 


--------------------------------------------------------------------------------
/gneiss/tests/test_model.py:
--------------------------------------------------------------------------------
 1 | # ----------------------------------------------------------------------------
 2 | # Copyright (c) 2016--, gneiss development team.
 3 | #
 4 | # Distributed under the terms of the Modified BSD License.
 5 | #
 6 | # The full license is in the file COPYING.txt, distributed with this software.
 7 | # ----------------------------------------------------------------------------
 8 | import pandas as pd
 9 | import statsmodels.formula.api as smf
10 | from skbio import TreeNode
11 | from gneiss._model import Model
12 | import unittest
13 | import os
14 | import pandas.util.testing as pdt
15 | 
16 | 
17 | # create some mock classes for testing
18 | class submock_ok(Model):
19 |     def __init__(self, *args, **kwargs):
20 |         super().__init__(*args, **kwargs)
21 | 
22 |     def summary(self):
23 |         print("OK!")
24 | 
25 |     def fit(self, **kwargs):
26 |         pass
27 | 
28 | 
29 | class submock_bad(Model):
30 |     def __init__(self, **kwargs):
31 |         super(Model, self, **kwargs)
32 | 
33 | 
34 | class TestModel(unittest.TestCase):
35 |     def setUp(self):
36 |         self.pickle_fname = "test.pickle"
37 |         self.data = pd.DataFrame([[1, 1, 1],
38 |                                   [3, 2, 3],
39 |                                   [4, 3, 2],
40 |                                   [5, 4, 4],
41 |                                   [2, 5, 3],
42 |                                   [3, 6, 5],
43 |                                   [4, 7, 4]],
44 |                                  index=['s1', 's2', 's3', 's4',
45 |                                         's5', 's6', 's7'],
46 |                                  columns=['Y1', 'Y2', 'X'])
47 | 
48 |         self.model1 = smf.ols(formula="Y1 ~ X", data=self.data)
49 |         self.model2 = smf.ols(formula="Y2 ~ X", data=self.data)
50 | 
51 |         self.basis = pd.DataFrame([[0.80442968, 0.19557032]],
52 |                                   index=['a'],
53 |                                   columns=['x', 'y'])
54 |         self.tree = TreeNode.read(['(x, y)a;'])
55 |         self.balances = pd.DataFrame({'a': [-1, 0, 1]})
56 |         self.metadata = pd.DataFrame(
57 |             [[1], [3], [2]],
58 |             columns=['X'])
59 | 
60 |     def tearDown(self):
61 |         if os.path.exists(self.pickle_fname):
62 |             os.remove(self.pickle_fname)
63 | 
64 |     def test_init(self):
65 |         res = submock_ok(Y=self.balances, Xs=self.metadata)
66 | 
67 |         # check balances
68 |         pdt.assert_frame_equal(self.balances, res.response_matrix)
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     unittest.main()
73 | 


--------------------------------------------------------------------------------
/gneiss/tests/data/large_tree.nwk:
--------------------------------------------------------------------------------
1 | (((((((1122517:0.06882,((252012:0.00548,588042:0.03795)45:0.00078,1121144:0.00648)34:0.0202)22:0.02416,(3330572:0.10987,279138:0.06878)23:0.01632)13:0.05558,(((75371:0.0859,214611:0.07975)35:0.00449,(143135:0.09553,356045:0.1125)36:0.00595)24:0.01277,160908:0.07333)14:0.105)7:0.07707,(((112795:0.09046,357011:0.13893)25:0.07603,4447334:0.25156)15:0.00593,((732929:0.14242,831289:0.16867)26:0.00658,(((1638797:0.02978,(((792450:0.04478,4681:0.02314)80:0.00634,((83531:0.02222,213177:0.01197)93:0.00769,(215692:0.01593,319907:0.05481)94:0.01631)81:0.00808)69:0.00336,208293:0.01894)57:0.01132)46:0.05063,((1806981:0.01642,(523224:0.03288,((148783:0.01871,146397:0.02416)95:0.00014,(148890:0.01623,146676:0.01235)96:0.0011)82:0.00547)70:0.01097)58:0.00721,(222209:0.0811,216805:0.02552)59:0.05218)47:0.01742)37:0.02315,((1137157:0.00854,1139779:0.01691)48:0.00141,(4362556:0.02248,4416927:0.00937)49:0.01111)38:0.07136)27:0.07544)16:0.01742)8:0.0766)4:0.02393,136959:0.21412)2:0.01293,((((1094976:0.27174,(2601820:0.12574,1124701:0.03443)28:0.21253)17:0.00958,(171768:0.31195,(211848:0.24113,845780:0.16002)29:0.0206)18:0.02439)9:0.25276,(3431064:0.06261,4423681:0.00907)10:0.39256)5:0.04968,((3749019:0.23613,(((1876538:0.09481,((((4468200:0.02519,((1146003:0.01465,1142972:0.00893)104:0.01089,1122202:0.01897)97:0.00342)83:0.01099,(216549:0.00841,(607006:0.02324,663880:0.02481)98:0.01213)84:0.01983)71:0.00654,(((4440638:0.00637,((697997:0.01101,4346060:0.02106)109:0.00484,804187:0.01941)105:0.00823)99:0.0033,1108390:0.02694)85:0.01253,3639039:0.0138)72:0.01591)60:0.03741,4440611:0.02193)50:0.045)39:0.05434,572134:0.13989)30:0.05905,((((((((850823:0.01423,1123984:0.01446)106:0.00333,(2578357:0.00999,(1024089:0.0153,(256536:0.00389,746927:0.0085)111:0.02256)110:0.01297)107:0.00745)100:0.00982,242467:0.00942)86:0.00283,224043:0.00904)73:0.06305,(209803:0.05809,(1147699:0.00958,78839:0.01539)87:0.01255)74:0.02145)61:0.05437,(203969:0.01049,113212:0.01718)62:0.07463)51:0.09163,((((766178:0.00642,156065:0.02144)88:0.00252,3616127:0.01843)75:0.01515,205391:0.08328)63:0.01478,(843189:0.05067,2867534:0.04027)64:0.05349)52:0.12384)40:0.02845,(((512006:0.01707,(1130478:0.01127,((742260:0.0579,4440396:0.03928)101:0.01357,2285453:0.01427)89:0.00474)76:0.05994)65:0.01083,(156611:0.04171,151283:0.03279)66:0.01435)53:0.01822,(((((4409771:0.0131,4450823:0.01556)102:0.01771,4367783:0.0177)90:0.0423,(1127423:0.01089,((1104509:0.00653,4424782:0.0305)108:0.00209,154494:0.0309)103:0.02042)91:0.01431)77:0.00614,(211129:0.08219,(4478794:0.01191,1129210:0.01509)92:0.04495)78:0.00774)67:0.01808,((154519:0.0024,4341561:0.04065)79:0.01119,4351648:0.01519)68:0.03589)54:0.01789)41:0.03913)31:0.03029)19:0.12182)11:0.01264,((((255018:0.01859,4466061:0.01239)42:0.0169,825937:0.0207)32:0.02137,1123837:0.05322)20:0.2196,((((364805:0.00924,4470139:0.00877)55:0.0024,154567:0.05202)43:0.0347,(223583:0.14375,(834883:0.00959,592291:0.00967)56:0.09018)44:0.02203)33:0.04164,238800:0.0929)21:0.01988)12:0.18772)6:0.01563)3:0.02085)1:0.0212,4322321:0.2845)0:0.25729;
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # gneiss
 2 | 
 3 | [![Build Status](https://travis-ci.org/biocore/gneiss.png?branch=master)](https://travis-ci.org/biocore/gneiss)
 4 | [![Coverage Status](https://coveralls.io/repos/biocore/gneiss/badge.svg)](https://coveralls.io/r/biocore/gneiss)
 5 | [![Gitter](https://badges.gitter.im/biocore/gneiss.svg)](https://gitter.im/biocore/gneiss?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
 6 | 
 7 | Canonically pronouced *nice*
 8 | 
 9 | 
10 | gneiss is a compositional data analysis and visualization toolbox designed for analyzing high dimensional proportions.  See [here](https://biocore.github.io/gneiss/) for API documentation.
11 |  
12 | Note that gneiss is not compatible with python 2, and is compatible with Python 3.4 or later.
13 | gneiss is currently in alpha.  We are actively developing it, and __backward-incompatible interface changes may arise__.
14 | 
15 | # Installation
16 | 
17 | To install this package, it is recommended to use conda.  First make sure that the appropriate channels are configured.
18 | 
19 | ```
20 | conda config --add channels https://conda.anaconda.org/bioconda
21 | conda config --add channels https://conda.anaconda.org/biocore
22 | conda config --add channels https://conda.anaconda.org/qiime2
23 | conda config --add channels https://conda.anaconda.org/qiime2/label/r2017.6
24 | ```
25 | 
26 | Then gneiss can be installed in a conda environment as follows
27 | ```
28 | conda create -n gneiss_env gneiss
29 | ```
30 | To install the most up to date version of gneiss, run the following command
31 | 
32 | ```
33 | pip install git+https://github.com/biocore/gneiss.git
34 | ```
35 | 
36 | # Tutorials
37 | 
38 | * [What are balances](https://github.com/biocore/gneiss/blob/master/ipynb/balance_trees.ipynb)
39 | 
40 | # Qiime2 tutorials
41 | 
42 | * [Linear regression on balances in the 88 soils](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/qiime2/88soils-qiime2-tutorial.html)
43 | * [Linear mixed effects models on balances in a CF study](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/qiime2/cfstudy-qiime2-tutorial.html)
44 | * [Linear regression on balances in the Chronic Fatigue Syndrome](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/qiime2/cfs-qiime2-tutorial.html)
45 | 
46 | # Python tutorials
47 | 
48 | * [Linear regression on balances in the 88 soils](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/python/88soils-python-tutorial.html)
49 | * [Linear mixed effects models on balances in a CF study](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/python/cfstudy-python-tutorial.html)
50 | * [Linear regression on balances in the Chronic Fatigue Syndrome](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/python/cfs-python-tutorial.html)
51 | 
52 | 
53 | If you use this software package in your own publications, please cite it at
54 | ```
55 | Morton JT, Sanders J, Quinn RA, McDonald D, Gonzalez A, Vázquez-Baeza Y, 
56 | Navas-Molina JA, Song SJ, Metcalf JL, Hyde ER, Lladser M, Dorrestein PC, 
57 | Knight R. 2017. Balance trees reveal microbial niche differentiation. 
58 | mSystems 2:e00162-16. https://doi.org/10.1128/mSystems.00162-16.
59 | ```
60 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # ----------------------------------------------------------------------------
 4 | # Copyright (c) 2016--, gneiss development team.
 5 | #
 6 | # Distributed under the terms of the Modified BSD License.
 7 | #
 8 | # The full license is in the file COPYING.txt, distributed with this software.
 9 | # ----------------------------------------------------------------------------
10 | 
11 | import re
12 | import ast
13 | import os
14 | 
15 | from setuptools import find_packages, setup
16 | from setuptools.command.build_ext import build_ext as _build_ext
17 | 
18 | 
19 | class build_ext(_build_ext):
20 |     def finalize_options(self):
21 |         _build_ext.finalize_options(self)
22 |         # Prevent numpy from thinking it is still in its setup process:
23 |         __builtins__.__NUMPY_SETUP__ = False
24 |         import numpy
25 |         self.include_dirs.append(numpy.get_include())
26 | 
27 | 
28 | # Dealing with Cython
29 | USE_CYTHON = os.environ.get('USE_CYTHON', False)
30 | ext = '.pyx' if USE_CYTHON else '.c'
31 | 
32 | extensions = [
33 | ]
34 | 
35 | if USE_CYTHON:
36 |     from Cython.Build import cythonize
37 |     extensions = cythonize(extensions)
38 | 
39 | classes = """
40 |     Development Status :: 4 - Beta
41 |     License :: OSI Approved :: BSD License
42 |     Topic :: Software Development :: Libraries
43 |     Topic :: Scientific/Engineering
44 |     Topic :: Scientific/Engineering :: Bio-Informatics
45 |     Programming Language :: Python :: 3
46 |     Programming Language :: Python :: 3 :: Only
47 |     Operating System :: Unix
48 |     Operating System :: POSIX
49 |     Operating System :: MacOS :: MacOS X
50 | """
51 | classifiers = [s.strip() for s in classes.split('\n') if s]
52 | 
53 | description = ('Compositional data analysis tools and visualizations')
54 | 
55 | with open('README.md') as f:
56 |     long_description = f.read()
57 | 
58 | 
59 | # version parsing from __init__ pulled from Flask's setup.py
60 | # https://github.com/mitsuhiko/flask/blob/master/setup.py
61 | _version_re = re.compile(r'__version__\s+=\s+(.*)')
62 | 
63 | with open('gneiss/__init__.py', 'rb') as f:
64 |     hit = _version_re.search(f.read().decode('utf-8')).group(1)
65 |     version = str(ast.literal_eval(hit))
66 | 
67 | setup(name='gneiss',
68 |       version=version,
69 |       license='BSD',
70 |       description=description,
71 |       long_description=long_description,
72 |       long_description_content_type='text/markdown',
73 |       author="gneiss development team",
74 |       author_email="jamietmorton@gmail.com",
75 |       maintainer="gneiss development team",
76 |       maintainer_email="jamietmorton@gmail.com",
77 |       packages=find_packages(),
78 |       setup_requires=['numpy >= 1.15.3'],
79 |       ext_modules=extensions,
80 |       cmdclass={'build_ext': build_ext},
81 |       install_requires=[
82 |           'IPython >= 3.2.0',
83 |           'matplotlib >= 1.4.3',
84 |           'numpy >= 1.15.3',
85 |           'pandas >= 0.18.0',
86 |           'scipy >= 0.15.1',
87 |           'nose >= 1.3.7',
88 |           'scikit-bio >= 0.5.5',
89 |           'statsmodels>=0.8.0',
90 |           'biom-format',
91 |           'seaborn',
92 |           'bokeh==1.1.0'
93 |       ],
94 |       classifiers=classifiers,
95 |       package_data={})
96 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # gneiss changelog
 2 | 
 3 | ## Version 0.4.5
 4 | * Pandas dependencies have been updated [#282](https://github.com/biocore/gneiss/pull/282)
 5 | 
 6 | ## Version 0.4.4
 7 | * Numpy and scikit-bio dependencies have been updated [#278](https://github.com/biocore/gneiss/pull/278)
 8 | 
 9 | ## Version 0.4.3
10 | * Enabling direct download of fdr corrected pvalues
11 | * Adding in sparse version of the ilr transform utilizing COO-formated sparse matrices [#250](https://github.com/biocore/gneiss/pull/250)
12 | * Adding in sparse utilities for matching biom tables [#253](https://github.com/biocore/gneiss/pull/253)
13 | 
14 | 
15 | ## Version 0.4.2
16 | * Added `proportion_plot` to plot the mean proportions within a single balance [#234](https://github.com/biocore/gneiss/pull/234)
17 | 
18 | ## Version 0.4.1
19 | * Added colorbar for heatmap
20 | * Decoupled qiime2 from gneiss.  All qiime2 commands have now been ported to [q2-gneiss](https://github.com/qiime2/q2-gneiss)
21 | 
22 | ## Version 0.4.0
23 | * Accelerated the ordinary least squares regression
24 | * Improved summary statistics and cross validation in ordinary least squares regression
25 | * Improved summary visualizations for OLS and MixedLM
26 | 
27 | ## Version 0.3.2
28 | * Added `balance_boxplot` and `balance_barplot` to make interpretation balance partitions easier.
29 | * Added `balance_summary` to summarize a given balance using the q2 cli.
30 | * Added `assign_ids` command to allow for ids to be added manually.
31 | 
32 | ## Version 0.3.0
33 | * Added q2 support for linear regression and linear mixed effects models [#98](https://github.com/biocore/gneiss/pull/98)
34 | * Added q2 support hierarchical clustering [#116](https://github.com/biocore/gneiss/pull/116)
35 | * Added interactive heatmaps with highlights with matplotlib [#114](https://github.com/biocore/gneiss/pull/114)
36 | * Added tree visualizations for unrooted trees with bokeh [#112](https://github.com/biocore/gneiss/pull/112)
37 | * Added support of cross validation for ordinary least squares [#101](https://github.com/biocore/gneiss/pull/101)
38 | 
39 | ## Version 0.2.1
40 | * Added heatmap dendrogram plotting functionality [#87](https://github.com/biocore/gneiss/issues/87)
41 | * Added principal balance analysis heuristic using proportionality and wards clustering algorithm [#83](https://github.com/biocore/gneiss/issues/83)
42 | 
43 | ## Version 0.2.0
44 | 
45 | ### Features
46 | * Added filehandle support for write and read io in RegressionResults object [#77](https://github.com/biocore/gneiss/issues/77)
47 | 
48 | 
49 | ## Version 0.1.3
50 | 
51 | ### Features
52 | * Added write and read io for RegressionResults object [#72](https://github.com/biocore/gneiss/issues/72)
53 | 
54 | ## Version 0.1.2
55 | 
56 | ### Features
57 | * Added `ladderize` and `gradient_sort` [#29](https://github.com/biocore/gneiss/issues/29)
58 | 
59 | ### Bug fixes
60 | 
61 | 
62 | ## Version 0.0.2
63 | 
64 | ### Features
65 | * Added statsmodels inference [#22](https://github.com/biocore/gneiss/pull/22)
66 | * Added support for ordinary least squares regression [#33](https://github.com/biocore/gneiss/pull/33)
67 | * Added support for linear mixed effects models [#38](https://github.com/biocore/gneiss/pull/38)
68 | * Added RegressionResults object to summarize statistics from statistical analyses
69 | * Adding in a niche sorting algorithm `gneiss.sort.niche_sort` that can generate a band table given a gradient [#16](https://github.com/biocore/gneiss/pull/16)
70 | * Adding in utility functions for handing feature tables, metadata, and trees. [#12](https://github.com/biocore/gneiss/pull/12)
71 | * Adding GPL license.
72 | 
73 | ### Bug fixes
74 | 


--------------------------------------------------------------------------------
/gneiss/cluster/tests/test_pba.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import numpy as np
  9 | import pandas as pd
 10 | import unittest
 11 | from gneiss.cluster._pba import (correlation_linkage, gradient_linkage,
 12 |                                  rank_linkage, random_linkage)
 13 | from skbio import TreeNode
 14 | 
 15 | 
 16 | class TestPBA(unittest.TestCase):
 17 |     def setUp(self):
 18 |         pass
 19 | 
 20 |     def test_correlation_linkage_1(self):
 21 |         table = pd.DataFrame(
 22 |             [[1, 1, 0, 0, 0],
 23 |              [0, 1, 1, 0, 0],
 24 |              [0, 0, 1, 1, 0],
 25 |              [0, 0, 0, 1, 1]],
 26 |             columns=['s1', 's2', 's3', 's4', 's5'],
 27 |             index=['o1', 'o2', 'o3', 'o4']).T
 28 |         exp_str = ('((o1:0.574990173931,o2:0.574990173931)y1:0.773481312844,'
 29 |                    '(o3:0.574990173931,o4:0.574990173931)y2:0.773481312844)'
 30 |                    'y0;\n')
 31 |         exp_tree = TreeNode.read([exp_str])
 32 |         res_tree = correlation_linkage(table + 0.1)
 33 |         # only check for tree topology since checking for floating point
 34 |         # numbers on the branches is still tricky.
 35 |         self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art())
 36 | 
 37 |     def test_correlation_linkage_2(self):
 38 |         t = pd.DataFrame([[1, 1, 2, 3, 1, 4],
 39 |                           [2, 2, 0.1, 4, 1, .1],
 40 |                           [3, 3.1, 2, 3, 2, 2],
 41 |                           [4.1, 4, 0.2, 1, 1, 2.5]],
 42 |                          index=['S1', 'S2', 'S3', 'S4'],
 43 |                          columns=['F1', 'F2', 'F3', 'F4', 'F5', 'F6'])
 44 |         exp_str = ('((F4:0.228723591874,(F5:0.074748541601,'
 45 |                    '(F1:0.00010428164962,F2:0.00010428164962)'
 46 |                    'y4:0.0746442599513)y3:0.153975050273)'
 47 |                    'y1:0.70266138894,(F3:0.266841737789,F6:0.266841737789)'
 48 |                    'y2:0.664543243026)y0;\n')
 49 |         exp_tree = TreeNode.read([exp_str])
 50 |         res_tree = correlation_linkage(t)
 51 |         self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art())
 52 | 
 53 | 
 54 | class TestUPGMA(unittest.TestCase):
 55 |     def setUp(self):
 56 |         pass
 57 | 
 58 |     def test_gradient_linkage(self):
 59 |         table = pd.DataFrame(
 60 |             [[1, 1, 0, 0, 0],
 61 |              [0, 1, 1, 0, 0],
 62 |              [0, 0, 1, 1, 0],
 63 |              [0, 0, 0, 1, 1]],
 64 |             columns=['s1', 's2', 's3', 's4', 's5'],
 65 |             index=['o1', 'o2', 'o3', 'o4']).T
 66 |         gradient = pd.Series(
 67 |             [1, 2, 3, 4, 5],
 68 |             index=['s1', 's2', 's3', 's4', 's5'])
 69 |         res_tree = gradient_linkage(table, gradient)
 70 |         exp_str = '((o1:0.5,o2:0.5)y1:0.5,(o3:0.5,o4:0.5)y2:0.5)y0;\n'
 71 |         self.assertEqual(exp_str, str(res_tree))
 72 | 
 73 | 
 74 | class TestRandomLinkage(unittest.TestCase):
 75 | 
 76 |     def test_random_tree(self):
 77 |         np.random.seed(0)
 78 |         t = random_linkage(10)
 79 |         exp_str = (
 80 |             '((7:0.0359448798595,8:0.0359448798595)y1:0.15902486847,'
 81 |             '((9:0.0235897432375,(4:0.00696620596189,6:0.00696620596189)'
 82 |             'y5:0.0166235372756)y3:0.0747173561014,(1:0.0648004111784,'
 83 |             '((0:0.00196516046521,3:0.00196516046521)y7:0.0367750400883,'
 84 |             '(2:0.0215653684975,5:0.0215653684975)y8:0.017174832056)'
 85 |             'y6:0.0260602106249)y4:0.0335066881605)y2:0.0966626489905)y0;\n')
 86 |         exp_tree = TreeNode.read([exp_str])
 87 |         self.assertEqual(t.ascii_art(), exp_tree.ascii_art())
 88 | 
 89 | 
 90 | class TestRankLinkage(unittest.TestCase):
 91 | 
 92 |     def test_rank_linkage(self):
 93 |         ranks = pd.Series([1, 2, 4, 5],
 94 |                           index=['o1', 'o2', 'o3', 'o4'])
 95 |         t = rank_linkage(ranks)
 96 |         exp = '((o1:0.5,o2:0.5)y1:1.0,(o3:0.5,o4:0.5)y2:1.0)y0;\n'
 97 |         self.assertEqual(str(t), exp)
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     unittest.main()
102 | 


--------------------------------------------------------------------------------
/gneiss/plot/tests/test_radial.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import pandas as pd
  3 | import numpy as np
  4 | from scipy.cluster.hierarchy import ward
  5 | 
  6 | from skbio import TreeNode, DistanceMatrix
  7 | from gneiss.plot._radial import radialplot
  8 | from gneiss.plot._dendrogram import UnrootedDendrogram
  9 | import numpy.testing as npt
 10 | 
 11 | 
 12 | class TestRadial(unittest.TestCase):
 13 |     def setUp(self):
 14 | 
 15 |         self.coords = pd.DataFrame(
 16 |             [['487.5', '347.769', 'NaN', 'NaN', 'True'],
 17 |              ['12.5', '483.28', 'NaN', 'NaN', 'True'],
 18 |              ['324.897', '16.7199', 'NaN', 'NaN', 'True'],
 19 |              ['338.261', '271.728', '0', '2', 'False'],
 20 |              ['193.169', '365.952', '1', 'y3', 'False']],
 21 |             columns=['x', 'y', 'child0', 'child1', 'is_tip'],
 22 |             index=['0', '1', '2', 'y3', 'y4'])
 23 | 
 24 |     @unittest.skip('Visualizations are deprecated')
 25 |     def test_basic_plot(self):
 26 |         self.maxDiff = None
 27 |         exp_edges = {'dest_node': ['0', '1', '2', 'y3'],
 28 |                      'edge_color': ['#00FF00', '#00FF00',
 29 |                                     '#00FF00', '#FF0000'],
 30 |                      'edge_width': [2, 2, 2, 2],
 31 |                      'src_node': ['y3', 'y4', 'y3', 'y4'],
 32 |                      'x0': [338.2612593838583,
 33 |                             193.1688862557773,
 34 |                             338.2612593838583,
 35 |                             193.1688862557773],
 36 |                      'x1': [487.5, 12.499999999999972,
 37 |                             324.89684138234867, 338.2612593838583],
 38 |                      'y0': [271.7282256126416,
 39 |                             365.95231443706376,
 40 |                             271.7282256126416,
 41 |                             365.95231443706376],
 42 |                      'y1': [347.7691620070637,
 43 |                             483.2800610261029,
 44 |                             16.719938973897143,
 45 |                             271.7282256126416]}
 46 | 
 47 |         exp_nodes = {'child0': [np.nan, np.nan, np.nan, '0', '1'],
 48 |                      'child1': [np.nan, np.nan, np.nan, '2', 'y3'],
 49 |                      'color': ['#1C9099', '#1C9099', '#1C9099',
 50 |                                '#FF999F', '#FF999F'],
 51 |                      'hover_var': [None, None, None, None, None],
 52 |                      'is_tip': [True, True, True, False, False],
 53 |                      'node_size': [10, 10, 10, 10, 10],
 54 |                      'x': [12.499999999999972,
 55 |                            487.5,
 56 |                            324.89684138234867,
 57 |                            338.26125938385832,
 58 |                            193.16888625577729],
 59 |                      'y': [483.28006102610289,
 60 |                            347.7691620070637,
 61 |                            16.719938973897143,
 62 |                            271.72822561264161,
 63 |                            365.95231443706376]}
 64 |         np.random.seed(0)
 65 |         num_otus = 3  # otus
 66 |         x = np.random.rand(num_otus)
 67 |         dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x - y))
 68 |         lm = ward(dm.condensed_form())
 69 |         t = TreeNode.from_linkage_matrix(lm, np.arange(len(x)).astype(np.str))
 70 |         t = UnrootedDendrogram.from_tree(t)
 71 |         # incorporate colors in tree
 72 |         for i, n in enumerate(t.postorder(include_self=True)):
 73 |             if not n.is_tip():
 74 |                 n.name = "y%d" % i
 75 |                 n.color = '#FF999F'
 76 |                 n.edge_color = '#FF0000'
 77 |                 n.node_size = 10
 78 |             else:
 79 |                 n.color = '#1C9099'
 80 |                 n.edge_color = '#00FF00'
 81 |                 n.node_size = 10
 82 |             n.length = np.random.rand() * 3
 83 |             n.edge_width = 2
 84 |         p = radialplot(t, node_color='color', edge_color='edge_color',
 85 |                        node_size='node_size', edge_width='edge_width')
 86 | 
 87 |         for e in exp_edges.keys():
 88 |             if isinstance(exp_edges[e], float):
 89 |                 npt.assert_allclose(
 90 |                     p.renderers[0].data_source.data[e],
 91 |                     np.array(exp_edges[e])
 92 |                 )
 93 |             else:
 94 |                 self.assertListEqual(
 95 |                     list(p.renderers[0].data_source.data[e]),
 96 |                     exp_edges[e])
 97 | 
 98 |         for e in exp_nodes.keys():
 99 |             self.assertListEqual(
100 |                 list(p.renderers[1].data_source.data[e]),
101 |                 exp_nodes[e])
102 | 
103 |         self.assertTrue(isinstance(t, TreeNode))
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     unittest.main()
108 | 


--------------------------------------------------------------------------------
/ci/environment.yml:
--------------------------------------------------------------------------------
  1 | name: gneiss
  2 | channels: !!python/tuple
  3 | - https://conda.anaconda.org/bioconda
  4 | - https://conda.anaconda.org/biocore
  5 | - https://conda.anaconda.org/qiime2
  6 | - defaults
  7 | dependencies:
  8 | - anaconda::matplotlib=1.5.1=np111py35_0
  9 | - anaconda::numexpr=2.6.1=np111py35_1
 10 | - anaconda::numpy=1.11.2=py35_0
 11 | - anaconda::pandas=0.19.1=np111py35_0
 12 | - anaconda::pytables=3.2.2=np111py35_4
 13 | - anaconda::scikit-bio=0.5.1=np111py35_0
 14 | - anaconda::scipy=0.18.1=np111py35_0
 15 | - bioconda::click=6.6=py35_0
 16 | - bokeh=0.12.4=py35_0
 17 | - bz2file=0.98=py35_0
 18 | - cachecontrol=0.11.6=py35_0
 19 | - contextlib2=0.4.0=py35_0
 20 | - cycler=0.10.0=py35_0
 21 | - decorator=4.0.10=py35_0
 22 | - entrypoints=0.2.2=py35_0
 23 | - flake8=2.5.1=py35_0
 24 | - freetype=2.5.5=1
 25 | - future=0.15.2=py35_0
 26 | - h5py=2.6.0=np111py35_1
 27 | - hdf5=1.8.16=0
 28 | - ipykernel=4.0.3=py35_0
 29 | - ipython=3.2.3=py35_0
 30 | - ipython_genutils=0.1.0=py35_0
 31 | - ipywidgets=4.1.1=py35_0
 32 | - jinja2=2.8=py35_1
 33 | - jsonschema=2.5.1=py35_0
 34 | - jupyter=1.0.0=py35_3
 35 | - jupyter_client=4.3.0=py35_0
 36 | - jupyter_console=5.0.0=py35_0
 37 | - jupyter_core=4.1.0=py35_0
 38 | - libpng=1.6.22=0
 39 | - lockfile=0.12.2=py35_0
 40 | - markupsafe=0.23=py35_2
 41 | - mccabe=0.3.1=py35_0
 42 | - mistune=0.7.2=py35_1
 43 | - mkl=11.3.3=0
 44 | - natsort=4.0.3=py35_0
 45 | - nbconvert=4.2.0=py35_0
 46 | - nbformat=4.0.1=py35_0
 47 | - nose=1.3.7=py35_1
 48 | - notebook=4.2.1=py35_0
 49 | - openssl=1.0.2h=1
 50 | - patsy=0.4.1=py35_0
 51 | - pep8=1.7.0=py35_0
 52 | - pip=8.1.2=py35_0
 53 | - prompt_toolkit=1.0.9=py35_0
 54 | - ptyprocess=0.5.1=py35_0
 55 | - pyflakes=1.2.3=py35_0
 56 | - pygments=2.1.3=py35_0
 57 | - pyparsing=2.1.4=py35_0
 58 | - pyqt=4.11.4=py35_3
 59 | - python=3.5.2=0
 60 | - python-dateutil=2.5.3=py35_0
 61 | - python.app=1.2=py35_4
 62 | - pytz=2016.4=py35_0
 63 | - pyyaml=3.12=py35_0
 64 | - pyzmq=15.2.0=py35_1
 65 | - qiime2::arrow=0.8.0=py35_0
 66 | - qiime2::binaryornot=0.3.0=0_ge797740
 67 | - qiime2::biom-format=2.1.5=py35_3
 68 | - qiime2::cookiecutter=1.4.0=py35_0
 69 | - qiime2::ijson=2.3=py35_0
 70 | - qiime2::ipymd=0.1.2=py35_0
 71 | - qiime2::jinja2-time=0.2.0=py35_0
 72 | - qiime2::poyo=0.4.0=py35_0
 73 | - qiime2::python-frontmatter=0.2.1=py35_0
 74 | - qiime2::tzlocal=1.3=py35_0
 75 | - qiime2::whichcraft=0.4.0=py35_0
 76 | - qt=4.8.7=3
 77 | - qtconsole=4.0.1=py35_0
 78 | - readline=6.2=2
 79 | - requests=2.10.0=py35_0
 80 | - setuptools=23.0.0=py35_0
 81 | - sip=4.16.9=py35_0
 82 | - six=1.10.0=py35_0
 83 | - sqlite=3.13.0=0
 84 | - statsmodels=0.8.0=np111py35_0
 85 | - terminado=0.6=py35_0
 86 | - tk=8.5.18=0
 87 | - tornado=4.3=py35_1
 88 | - traitlets=4.2.1=py35_0
 89 | - wcwidth=0.1.7=py35_0
 90 | - wheel=0.29.0=py35_0
 91 | - xz=5.2.2=0
 92 | - yaml=0.1.6=0
 93 | - zlib=1.2.8=3
 94 | - pip:
 95 |   - alabaster==0.7.9
 96 |   - appdirs==1.4.0
 97 |   - appnope==0.1.0
 98 |   - args==0.1.0
 99 |   - babel==2.3.4
100 |   - backports.shutil-get-terminal-size==1.0.0
101 |   - canvas==0.0.1
102 |   - chest==0.2.3
103 |   - clint==0.5.1
104 |   - cloudpickle==0.2.1
105 |   - colorama==0.3.7
106 |   - coverage==4.1
107 |   - coveralls==1.1
108 |   - cvxopt==1.1.8
109 |   - cython==0.23.5
110 |   - dask==0.11.0
111 |   - datashader==0.4.0
112 |   - datashape==0.5.2
113 |   - docopt==0.6.2
114 |   - docutils==0.12
115 |   - emperor==1.0.0b5
116 |   - ete3==3.0.0b35
117 |   - gneiss (/Users/mortonjt/Dropbox/UCSD/research/gneiss)==0.3.1
118 |   - gnureadline==6.3.3
119 |   - heapdict==1.0.0
120 |   - igraph==0.1.11
121 |   - imagesize==0.7.1
122 |   - ipyparallel==5.2.0
123 |   - ipython-genutils==0.1.0
124 |   - jgraph==0.2.1
125 |   - joblib==0.10.2
126 |   - jupyter-client==4.3.0
127 |   - jupyter-console==5.0.0
128 |   - jupyter-core==4.1.0
129 |   - llvmlite==0.14.0
130 |   - locket==0.2.0
131 |   - matplotlib-venn==0.11.4
132 |   - msgpack-python==0.4.8
133 |   - multipledispatch==0.4.8
134 |   - nbdime==0.1.0
135 |   - networkx==1.11
136 |   - numba==0.29.0
137 |   - odo==0.4.2
138 |   - packaging==16.8
139 |   - partd==0.3.6
140 |   - path.py==8.1.2
141 |   - pexpect==4.2.1
142 |   - pickleshare==0.7.4
143 |   - pillow==3.3.1
144 |   - pkginfo==1.4.1
145 |   - ply==3.9
146 |   - prompt-toolkit==1.0.13
147 |   - pulp==1.6.5
148 |   - pyemd==0.3.0
149 |   - pyomo==5.1.1
150 |   - pysam==0.9.0
151 |   - python-igraph==0.7.1.post6
152 |   - python-ternary==1.0
153 |   - pyutilib==5.4.1
154 |   - q2-composition==2017.3.0.dev0
155 |   - q2-feature-table==2017.3.0.dev0
156 |   - q2-taxa==2017.3.0.dev0
157 |   - q2-types==2017.3.0.dev0
158 |   - q2cli (/Users/mortonjt/Dropbox/UCSD/research/q2/q2cli)==2017.3.0.dev0
159 |   - q2templates==2017.3.0.dev0
160 |   - qcli==0.1.1
161 |   - qiime2 (/Users/mortonjt/Dropbox/UCSD/research/q2/qiime2)==2017.3.0.dev0
162 |   - requests-toolbelt==0.7.0
163 |   - rpy2==2.7.8
164 |   - scikit-learn==0.17.1
165 |   - seaborn==0.7.1
166 |   - simplegeneric==0.8.1
167 |   - snowballstemmer==1.2.1
168 |   - sourcetracker==2.0.1.dev0
169 |   - sphinx==1.4.9
170 |   - tables==3.2.2
171 |   - tabview==1.4.2
172 |   - toolz==0.8.0
173 |   - twine==1.8.1
174 |   - xarray==0.8.2
175 |   - xlrd==0.9.4
176 | 
177 | 


--------------------------------------------------------------------------------
/gneiss/plot/_radial.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import pandas as pd
  9 | from gneiss.plot._dendrogram import UnrootedDendrogram
 10 | import warnings
 11 | try:
 12 |     from bokeh.models.glyphs import Circle, Segment
 13 |     from bokeh.models import ColumnDataSource, DataRange1d, Plot
 14 |     from bokeh.models import (HoverTool, BoxZoomTool, ResetTool,
 15 |                               WheelZoomTool, SaveTool, PanTool)
 16 | except:
 17 |     warnings.warn("Bokeh isn't installed - "
 18 |                   "the interactive visualizations won't work.")
 19 | 
 20 | 
 21 | def radialplot(tree, node_color='node_color', node_size='node_size',
 22 |                node_alpha='node_alpha', edge_color='edge_color',
 23 |                edge_alpha='edge_alpha', edge_width='edge_width',
 24 |                hover_var='hover_var', figsize=(500, 500), **kwargs):
 25 |     """ Plots unrooted radial tree.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     tree : instance of skbio.TreeNode
 30 |        Input tree for plotting.
 31 |     node_color : str
 32 |        Name of variable in `tree` to color nodes.
 33 |     node_size : str
 34 |        Name of variable in `tree` that specifies the radius of nodes.
 35 |     node_alpha : str
 36 |        Name of variable in `tree` to specify node transparency.
 37 |     edge_color : str
 38 |        Name of variable in `tree` to color edges.
 39 |     edge_alpha : str
 40 |        Name of variable in `tree` to specify edge transparency.
 41 |     edge_width : str
 42 |        Name of variable in `tree` to specify edge width.
 43 |     hover_var : str
 44 |        Name of variable in `tree` to display in the hover menu.
 45 |     figsize : tuple, int
 46 |        Size of resulting figure.  default: (500, 500)
 47 |     **kwargs: dict
 48 |        Plotting options to pass into bokeh.models.Plot
 49 | 
 50 |     Returns
 51 |     -------
 52 |     bokeh.models.Plot
 53 |        Interactive plotting instance.
 54 | 
 55 | 
 56 |     Notes
 57 |     -----
 58 |     This assumes that the tree is strictly bifurcating.
 59 | 
 60 |     See also
 61 |     --------
 62 |     bifurcate
 63 |     """
 64 |     warnings.warn("This visualization are deprecated.", DeprecationWarning)
 65 |     # This entire function was motivated by
 66 |     # http://chuckpr.github.io/blog/trees2.html
 67 |     t = UnrootedDendrogram.from_tree(tree.copy())
 68 | 
 69 |     nodes = t.coords(figsize[0], figsize[1])
 70 | 
 71 |     # fill in all of the node attributes
 72 |     def _retreive(tree, x, default):
 73 |         return pd.Series({n.name: getattr(n, x, default)
 74 |                           for n in tree.levelorder()})
 75 | 
 76 |     # default node color to light grey
 77 |     nodes[node_color] = _retreive(t, node_color, default='#D3D3D3')
 78 |     nodes[node_size] = _retreive(t, node_size, default=1)
 79 |     nodes[node_alpha] = _retreive(t, node_alpha, default=1)
 80 |     nodes[hover_var] = _retreive(t, hover_var, default=None)
 81 | 
 82 |     edges = nodes[['child0', 'child1']]
 83 |     edges = edges.dropna(subset=['child0', 'child1'])
 84 |     edges = edges.unstack()
 85 |     edges = pd.DataFrame({'src_node': edges.index.get_level_values(1),
 86 |                           'dest_node': edges.values})
 87 |     edges['x0'] = [nodes.loc[n].x for n in edges.src_node]
 88 |     edges['x1'] = [nodes.loc[n].x for n in edges.dest_node]
 89 |     edges['y0'] = [nodes.loc[n].y for n in edges.src_node]
 90 |     edges['y1'] = [nodes.loc[n].y for n in edges.dest_node]
 91 |     ns = [n.name for n in t.levelorder(include_self=True)]
 92 |     attrs = pd.DataFrame(index=ns)
 93 | 
 94 |     # default edge color to black
 95 |     attrs[edge_color] = _retreive(t, edge_color, default='#000000')
 96 |     attrs[edge_width] = _retreive(t, edge_width, default=1)
 97 |     attrs[edge_alpha] = _retreive(t, edge_alpha, default=1)
 98 | 
 99 |     edges = pd.merge(edges, attrs, left_on='dest_node',
100 |                      right_index=True, how='outer')
101 |     edges = edges.dropna(subset=['src_node'])
102 | 
103 |     node_glyph = Circle(x="x", y="y",
104 |                         radius=node_size,
105 |                         fill_color=node_color,
106 |                         fill_alpha=node_alpha)
107 | 
108 |     edge_glyph = Segment(x0="x0", y0="y0",
109 |                          x1="x1", y1="y1",
110 |                          line_color=edge_color,
111 |                          line_alpha=edge_alpha,
112 |                          line_width=edge_width)
113 | 
114 |     def df2ds(df):
115 |         return ColumnDataSource(ColumnDataSource.from_df(df))
116 | 
117 |     ydr = DataRange1d(range_padding=0.05)
118 |     xdr = DataRange1d(range_padding=0.05)
119 | 
120 |     plot = Plot(x_range=xdr, y_range=ydr, **kwargs)
121 |     plot.add_glyph(df2ds(edges), edge_glyph)
122 |     ns = plot.add_glyph(df2ds(nodes), node_glyph)
123 | 
124 |     tooltip = [
125 |         ("Feature ID", "@index")
126 |     ]
127 |     if hover_var is not None:
128 |         tooltip += [(hover_var, "@" + hover_var)]
129 | 
130 |     hover = HoverTool(renderers=[ns], tooltips=tooltip)
131 |     plot.add_tools(hover, BoxZoomTool(), ResetTool(),
132 |                    WheelZoomTool(), SaveTool(), PanTool())
133 | 
134 |     return plot
135 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/coefficients.csv:
--------------------------------------------------------------------------------
  1 | ,Intercept,ph
  2 | y0,-2.417821963898191,0.3566904413073933
  3 | y1,-0.9052110511210674,0.049641573168426295
  4 | y2,-0.9357918880737763,0.10783154573751495
  5 | y3,-2.3133000514177455,0.3606910551421914
  6 | y4,0.33748362746687366,0.025202276296260796
  7 | y5,-1.7196040211996224,0.24445492171611835
  8 | y6,0.7183771668620241,-0.09853010667696244
  9 | y7,-1.2448131020654418,0.18461233905393876
 10 | y8,0.0043281368780772956,0.017766196752291036
 11 | y9,-8.888635895449163,1.4338978894784278
 12 | y10,-0.1380162869555483,-0.027033261659450195
 13 | y11,2.0081696710019457,-0.33669643064356075
 14 | y12,-0.4458001655134542,0.052614929491326755
 15 | y13,-2.5388789829593894,0.36269640192567715
 16 | y14,-1.7009227515869187,0.2486995712072929
 17 | y15,2.2342942376459476,-0.28137737848819855
 18 | y16,0.9952936138338209,-0.1358283261966583
 19 | y17,1.8985792038741323,-0.309574156534415
 20 | y18,-0.5185397408352778,0.045702588530660976
 21 | y19,0.29038527282402016,-0.031194965277184943
 22 | y20,0.7927326302466351,-0.10810810411171143
 23 | y21,-0.19862086569906898,0.09197269958700291
 24 | y22,1.11695795500751,-0.15393879768283344
 25 | y23,-1.9375935274988665,0.316771155039698
 26 | y24,-1.5197127403457722,0.2466330689419788
 27 | y25,-7.222697297003709,1.090728825447765
 28 | y26,11.135550530286885,-1.842919462263025
 29 | y27,-0.5969641090728304,0.11717486570967786
 30 | y28,-0.5083320185752758,0.07391030315795062
 31 | y29,-0.7113172797977424,0.12208714562398323
 32 | y30,-0.4043710947066187,0.038640993551844874
 33 | y31,2.731012073137618,-0.46159032232526753
 34 | y32,3.965568487845451,-0.5957211835451143
 35 | y33,-1.3843125783110992,0.2063797362105431
 36 | y34,0.11186906658990858,0.09974027808792005
 37 | y35,0.7458762279144321,-0.15991699299592574
 38 | y36,0.5462379729323912,-0.12509925899594823
 39 | y37,0.7715428127698168,-0.08374870435548773
 40 | y38,-0.006490297265430415,0.006489381824971823
 41 | y39,-1.1505876784891131,0.2419237573784568
 42 | y40,1.4927158016341542,-0.22453611806483276
 43 | y41,0.2362826246016099,-0.02382474060748356
 44 | y42,-0.8199793918824666,0.14617589105266396
 45 | y43,-1.2479231999016884,0.25900931458640947
 46 | y44,1.6489193595737197,-0.2656311740355035
 47 | y45,0.37513055994129435,-0.06575775407372958
 48 | y46,-0.23235985768487274,0.04345932796883874
 49 | y47,0.232413460376192,-0.04363490376686736
 50 | y48,-0.9701856408728218,0.17462167769163456
 51 | y49,0.08141413719517979,0.004396636449273949
 52 | y50,1.8409060639108943,-0.29310915670058585
 53 | y51,0.11107094311846646,-0.023971435949612815
 54 | y52,-0.3288508216536449,0.04478598174234949
 55 | y53,-1.950201188792175,0.27019388715883313
 56 | y54,-0.21754527398648923,0.02567564793019846
 57 | y55,0.5070344389950753,-0.08435530699541947
 58 | y56,0.5469829898615253,-0.015692985885081844
 59 | y57,-1.0651920370632453,0.10357010418282334
 60 | y58,-0.3618051141258941,0.04083277567819005
 61 | y59,0.7359516747239916,-0.1191674300733181
 62 | y60,-0.27027205230119355,0.027219092575649903
 63 | y61,0.11332657234338792,-0.037628480171609864
 64 | y62,-0.054802386332120014,0.015438312126530043
 65 | y63,0.17834351919623104,-0.0353004982881047
 66 | y64,0.5021027013118613,-0.08149261323028857
 67 | y65,-0.7317177817250978,0.11092985149299428
 68 | y66,0.47637412333038126,-0.0035924721497009432
 69 | y67,0.020606271402806065,0.009997371330490225
 70 | y68,1.9221471293322256,-0.26752500098575666
 71 | y69,1.17970241212005,-0.1641827048201078
 72 | y70,-0.8184772546295412,0.12022121428082484
 73 | y71,-0.12330798367923247,0.028963428511499006
 74 | y72,-0.7043520903538832,0.12111737639867173
 75 | y73,0.16468385036158828,-0.01491809942028498
 76 | y74,-0.05458864419854579,0.008102836974669044
 77 | y75,-1.3794040734816935,0.18755889639070997
 78 | y76,1.2988084300034206,-0.17970824507379088
 79 | y77,0.23804873279931538,-0.04967872251402735
 80 | y78,1.3443871234443194,-0.1511514994081249
 81 | y79,1.791956991985029,-0.31027551804655645
 82 | y80,-0.2260027577677782,0.0352420930295071
 83 | y81,-0.7315837744871326,0.09397099811752183
 84 | y82,0.22342083868015558,-0.036252296752822454
 85 | y83,0.9890597158215642,-0.12754719940053685
 86 | y84,0.020923823752194582,0.006047398987785311
 87 | y85,1.2659322933931254,-0.1790956481945273
 88 | y86,0.5490840386384254,-0.09191494829380803
 89 | y87,-0.854051259233914,0.11497150927490624
 90 | y88,1.5442076690205047,-0.22477604150408292
 91 | y89,0.05180071552460162,-0.04441923140965983
 92 | y90,2.1702012325252618,-0.32509549154637923
 93 | y91,-1.1438217763831038,0.18457225499582508
 94 | y92,0.20182664476633647,-0.03617016003385909
 95 | y93,0.32283178714269156,-0.07556082692785951
 96 | y94,0.6624037908148538,-0.11691640268465277
 97 | y95,0.12781936469114472,-0.05442398123766153
 98 | y96,0.3225935214956588,-0.049489243690185744
 99 | y97,0.05504548243442081,-0.0019830812889765153
100 | y98,-0.31634347553783554,0.03592855377461579
101 | y99,-0.20450768537043104,0.01647646243984905
102 | y100,0.5706627524394878,-0.07096218388437267
103 | y101,0.4481857221755603,-0.05229362634021477
104 | y102,1.4610714039897088,-0.1974608317058108
105 | y103,-0.5831225769898846,0.12847535822056447
106 | y104,-0.3778252764706474,0.05540222040774105
107 | y105,-0.10391591807003929,0.05028748957940854
108 | y106,-0.6458814418806339,0.0919046542929978
109 | y107,1.726022770127027,-0.23392347023971047
110 | y108,0.40707386320519157,-0.05445419955171094
111 | y109,1.7269767479591678,-0.22161951275880676
112 | y110,-0.3485150472085344,0.04398702248446438
113 | y111,-0.6674467573758245,0.22982383932854994
114 | y112,-1.5226066949789847,0.21540215886086916
115 | y113,-0.4682652952856936,0.06948694013844546
116 | y114,-0.5963751787826362,0.07423163157790996
117 | y115,0.18941501262895516,-0.003290492356183168
118 | y116,0.3319245569899489,-0.07497860531797448
119 | y117,-0.44905384789598235,0.059247252407698896
120 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/data/pvalues.csv:
--------------------------------------------------------------------------------
  1 | ,Intercept,ph
  2 | y0,2.0231382176562713e-07,1.5986797744717098e-06
  3 | y1,0.06011885055018011,0.5200315964601321
  4 | y2,0.016362183704805352,0.08459797560939894
  5 | y3,2.711345677586785e-05,4.908287303929518e-05
  6 | y4,0.37635229111205415,0.6823421422061464
  7 | y5,0.0005396934480471485,0.0021766278626422775
  8 | y6,0.07985080247891974,0.13639964193115275
  9 | y7,5.369709631311585e-05,0.0001910744159765913
 10 | y8,0.988225064070116,0.7080350094829028
 11 | y9,4.409135649041548e-30,5.177944970965206e-30
 12 | y10,0.6303272886698309,0.5600337884848634
 13 | y11,3.128836166342658e-07,1.30963118803634e-07
 14 | y12,0.1066787667914187,0.23735473121815692
 15 | y13,1.295050642130945e-08,3.0887411253115263e-07
 16 | y14,2.2367190437659e-05,0.00010972539901087592
 17 | y15,1.1370780526586244e-11,3.000983940825702e-08
 18 | y16,0.006185201230885478,0.02009040359692411
 19 | y17,7.695994405895602e-05,6.775560403179732e-05
 20 | y18,0.12150931064477158,0.39638796434776435
 21 | y19,0.47269850766776533,0.6330303174570436
 22 | y20,0.010308804348404794,0.02962236033751162
 23 | y21,0.6217481615522058,0.15975695806591558
 24 | y22,0.0025029093920176213,0.009470244972664433
 25 | y23,5.771271567361451e-09,4.189942632303552e-09
 26 | y24,1.6429321667207632e-05,1.54081456332786e-05
 27 | y25,7.958076802929733e-18,2.2899968563086935e-16
 28 | y26,1.9455705722308582e-18,5.777506242608944e-19
 29 | y27,0.012384954189432178,0.002601398943639638
 30 | y28,0.07759917873075849,0.11193438405921681
 31 | y29,0.004749709484931084,0.0028015471036144045
 32 | y30,0.18729661964979205,0.43434670921461993
 33 | y31,8.055677022011245e-11,1.696061444790691e-11
 34 | y32,3.302116870758148e-13,5.795117625440092e-12
 35 | y33,1.3939285026652989e-05,5.424895386889316e-05
 36 | y34,0.8709721145588748,0.3715329255621058
 37 | y35,0.00021920050748216926,1.8379424591619849e-06
 38 | y36,0.16360444034078986,0.04973415032091874
 39 | y37,0.01355347015225425,0.09429934604574804
 40 | y38,0.9731249027798142,0.8350011803300309
 41 | y39,0.00031226279875793295,4.677092902076776e-06
 42 | y40,0.0015954833779699596,0.003203703162063911
 43 | y41,0.18614915413516903,0.40828378488135897
 44 | y42,0.004251445104840961,0.001709061310481806
 45 | y43,1.703359098557291e-05,8.552286564044924e-08
 46 | y44,1.2426600622226377e-05,1.328985290260878e-05
 47 | y45,0.17551299912142332,0.14230620069990005
 48 | y46,0.35489045213483716,0.28496905252631144
 49 | y47,0.16602735174894406,0.10845519956226542
 50 | y48,0.006427527916242203,0.002539622003724941
 51 | y49,0.75368004235738,0.9165150791122991
 52 | y50,0.005771991467566583,0.006530235615020178
 53 | y51,0.6017802273926969,0.48637481410913097
 54 | y52,0.26759490722527424,0.3499562720950433
 55 | y53,8.924597332512463e-06,0.00011325234507231189
 56 | y54,0.31949290114412354,0.466871596076887
 57 | y55,0.09013458423408123,0.08136538477870153
 58 | y56,0.0816663196354404,0.7553993031914661
 59 | y57,0.004433194199059207,0.08250281174037756
 60 | y58,0.2290113551466194,0.40014959567649544
 61 | y59,0.018282435042915796,0.01812677919937224
 62 | y60,0.32371878164778645,0.5381075053380269
 63 | y61,0.6958683825610459,0.42280243487177394
 64 | y62,0.8206811909928219,0.6930044395685215
 65 | y63,0.3361523253936626,0.2396769106368928
 66 | y64,0.07411406118822376,0.0730594671582051
 67 | y65,0.0008487512443398987,0.0016913290798711417
 68 | y66,0.08156930966743642,0.9346993696415715
 69 | y67,0.9536637593962555,0.8616063078659033
 70 | y68,5.764697472006051e-07,1.1980943688674127e-05
 71 | y69,3.469880515887769e-08,1.251969770259889e-06
 72 | y70,0.0050466333460564726,0.01056032847691737
 73 | y71,0.5824003403194246,0.42486627824141254
 74 | y72,0.020027189607415154,0.013568182783744916
 75 | y73,0.5313886104027061,0.725662026927302
 76 | y74,0.8883364466894954,0.8974424613269687
 77 | y75,0.0004461288566022144,0.0028619022760101014
 78 | y76,3.957183581647853e-06,6.0947195212273386e-05
 79 | y77,0.4332278891006891,0.3123717778815628
 80 | y78,0.0016576279361801888,0.026496104629617434
 81 | y79,0.0007138976393654403,0.00030933688733006414
 82 | y80,0.28891981081632123,0.30633969607425043
 83 | y81,0.04627436056711397,0.11190946808988014
 84 | y82,0.4988366152669681,0.4973509989614011
 85 | y83,5.783715711833836e-08,8.475043164083101e-06
 86 | y84,0.9336421911918483,0.8816971497803062
 87 | y85,0.0004047193067072164,0.0018101551247001204
 88 | y86,0.13752630011858602,0.12436393090433022
 89 | y87,0.0007949699924884687,0.004807041210853434
 90 | y88,1.681321342801181e-06,1.2957462590935792e-05
 91 | y89,0.8441422899161682,0.2985832666575177
 92 | y90,4.5606670103847854e-08,3.0324380533746814e-07
 93 | y91,0.000679688358488179,0.0006962171212845777
 94 | y92,0.5530261279307382,0.5109620405114144
 95 | y93,0.07684438987080362,0.011188109318831293
 96 | y94,0.012222421854624525,0.0064113611717861
 97 | y95,0.6863133156452363,0.2888980046697222
 98 | y96,0.3378152841800086,0.3630338526064949
 99 | y97,0.8089947540821971,0.957043446828022
100 | y98,0.1803712958932434,0.3454962628351098
101 | y99,0.4710487985349683,0.7192012832165596
102 | y100,0.2037978320656817,0.3275210105710409
103 | y101,0.07838268739124357,0.2021477449146662
104 | y102,6.275550928468192e-10,1.0081774647797712e-07
105 | y103,0.03602584131371042,0.004694931406947464
106 | y104,0.2205716136177221,0.26625549453712816
107 | y105,0.648991144530094,0.17514504906365966
108 | y106,0.03191994202863099,0.05829444014664721
109 | y107,9.159580575936758e-07,2.7343819297797546e-05
110 | y108,0.20615265960503892,0.29493372156085573
111 | y109,3.9957320068973616e-08,7.122719359440899e-06
112 | y110,0.1601883874809344,0.2719299929048468
113 | y111,0.057124882232972594,9.02276720234949e-05
114 | y112,8.83353387262592e-05,0.0005319623291459005
115 | y113,0.07855307166119041,0.10596204097226382
116 | y114,0.09814960073925237,0.2015466315199174
117 | y115,0.46208377440008475,0.9369296099174003
118 | y116,0.4032484957728807,0.24383860569835114
119 | y117,0.05493605762180559,0.11604625913417174
120 | 


--------------------------------------------------------------------------------
/gneiss/regression/_model.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import abc
  9 | import pandas as pd
 10 | from skbio.stats.composition import ilr_inv
 11 | from gneiss._model import Model
 12 | from gneiss.balances import balance_basis
 13 | 
 14 | 
 15 | class RegressionModel(Model):
 16 |     def __init__(self, *args, **kwargs):
 17 |         """
 18 |         Summary object for storing regression results.
 19 | 
 20 |         A `RegressionResults` object stores information about the
 21 |         individual balances used in the regression, the coefficients,
 22 |         residuals. This object can be used to perform predictions.
 23 |         In addition, summary statistics such as the coefficient
 24 |         of determination for the overall fit can be calculated.
 25 | 
 26 |         Parameters
 27 |         ----------
 28 |         submodels : list of statsmodels objects
 29 |             List of statsmodels result objects.
 30 |         balances : pd.DataFrame
 31 |             A table of balances where samples are rows and
 32 |             balances are columns.  These balances were calculated
 33 |             using `tree`.
 34 |         """
 35 |         self._beta = None
 36 |         self._resid = None
 37 |         self._fitted = False
 38 |         super().__init__(*args, **kwargs)
 39 |         # there is only one design matrix for regression
 40 |         self.design_matrix = self.design_matrices
 41 | 
 42 |     def coefficients(self, tree=None):
 43 |         """ Returns coefficients from fit.
 44 | 
 45 |         Parameters
 46 |         ----------
 47 |         tree : skbio.TreeNode, optional
 48 |             The tree used to perform the ilr transformation.  If this
 49 |             is specified, then the prediction will be represented as
 50 |             proportions. Otherwise, if this is not specified, the prediction
 51 |             will be represented as balances. (default: None).
 52 | 
 53 |         Returns
 54 |         -------
 55 |         pd.DataFrame
 56 |             A table of coefficients where rows are covariates,
 57 |             and the columns are balances. If `tree` is specified, then
 58 |             the columns are proportions.
 59 |         """
 60 |         if not self._fitted:
 61 |             ValueError(('Model not fitted - coefficients not calculated.'
 62 |                         'See `fit()`'))
 63 |         coef = self._beta
 64 |         if tree is not None:
 65 |             basis, _ = balance_basis(tree)
 66 |             c = ilr_inv(coef.values, basis=basis)
 67 |             ids = [n.name for n in tree.tips()]
 68 |             return pd.DataFrame(c, columns=ids, index=coef.index)
 69 |         else:
 70 |             return coef
 71 | 
 72 |     def residuals(self, tree=None):
 73 |         """ Returns calculated residuals from fit.
 74 | 
 75 |         Parameters
 76 |         ----------
 77 |         X : pd.DataFrame, optional
 78 |             Input table of covariates.  If not specified, then the
 79 |             fitted values calculated from training the model will be
 80 |             returned.
 81 |         tree : skbio.TreeNode, optional
 82 |             The tree used to perform the ilr transformation.  If this
 83 |             is specified, then the prediction will be represented
 84 |             as proportions. Otherwise, if this is not specified,
 85 |             the prediction will be represented as balances. (default: None).
 86 | 
 87 |         Returns
 88 |         -------
 89 |         pd.DataFrame
 90 |             A table of residuals where rows are covariates,
 91 |             and the columns are balances. If `tree` is specified, then
 92 |             the columns are proportions.
 93 | 
 94 |         References
 95 |         ----------
 96 |         .. [1] Aitchison, J. "A concise guide to compositional data analysis,
 97 |            CDA work." Girona 24 (2003): 73-81.
 98 |         """
 99 |         if not self._fitted:
100 |             ValueError(('Model not fitted - coefficients not calculated.'
101 |                         'See `fit()`'))
102 |         resid = self._resid
103 |         if tree is not None:
104 |             basis, _ = balance_basis(tree)
105 |             proj_resid = ilr_inv(resid.values, basis=basis)
106 |             ids = [n.name for n in tree.tips()]
107 |             return pd.DataFrame(proj_resid,
108 |                                 columns=ids,
109 |                                 index=resid.index)
110 |         else:
111 |             return resid
112 | 
113 |     @abc.abstractmethod
114 |     def predict(self, X=None, tree=None, **kwargs):
115 |         """ Performs a prediction based on model.
116 | 
117 |         Parameters
118 |         ----------
119 |         X : pd.DataFrame, optional
120 |             Input table of covariates, where columns are covariates, and
121 |             rows are samples.  If not specified, then the fitted values
122 |             calculated from training the model will be returned.
123 |         tree : skbio.TreeNode, optional
124 |             The tree used to perform the ilr transformation.  If this
125 |             is specified, then the prediction will be represented
126 |             as proportions. Otherwise, if this is not specified,
127 |             the prediction will be represented as balances. (default: None).
128 |         **kwargs : dict
129 |             Other arguments to be passed into the model prediction.
130 | 
131 |         Returns
132 |         -------
133 |         pd.DataFrame
134 |             A table of predicted values where rows are covariates,
135 |             and the columns are balances. If `tree` is specified, then
136 |             the columns are proportions.
137 | 
138 |         """
139 |         if not self._fitted:
140 |             ValueError(('Model not fitted - coefficients not calculated.'
141 |                         'See `fit()`'))
142 |         if X is None:
143 |             X = self.design_matrices
144 | 
145 |         prediction = X.dot(self._beta)
146 |         if tree is not None:
147 |             basis, _ = balance_basis(tree)
148 |             proj_prediction = ilr_inv(prediction.values, basis=basis)
149 |             ids = [n.name for n in tree.tips()]
150 |             return pd.DataFrame(proj_prediction,
151 |                                 columns=ids,
152 |                                 index=prediction.index)
153 |         else:
154 |             return prediction
155 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/test_mixedlm.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import os
  9 | import shutil
 10 | import numpy as np
 11 | import pandas as pd
 12 | import pandas.util.testing as pdt
 13 | import unittest
 14 | from skbio import TreeNode
 15 | from gneiss.regression import mixedlm
 16 | 
 17 | 
 18 | class TestMixedLM(unittest.TestCase):
 19 | 
 20 |     def setUp(self):
 21 |         np.random.seed(6241)
 22 |         n = 1600
 23 |         exog = np.random.normal(size=(n, 2))
 24 |         groups = np.kron(np.arange(n // 16), np.ones(16))
 25 | 
 26 |         # Build up the random error vector
 27 |         errors = 0
 28 | 
 29 |         # The random effects
 30 |         exog_re = np.random.normal(size=(n, 2))
 31 |         slopes = np.random.normal(size=(n // 16, 2))
 32 |         slopes = np.kron(slopes, np.ones((16, 1))) * exog_re
 33 |         errors += slopes.sum(1)
 34 | 
 35 |         # First variance component
 36 |         errors += np.kron(2 * np.random.normal(size=n // 4), np.ones(4))
 37 | 
 38 |         # Second variance component
 39 |         errors += np.kron(2 * np.random.normal(size=n // 2), np.ones(2))
 40 | 
 41 |         # iid errors
 42 |         errors += np.random.normal(size=n)
 43 | 
 44 |         endog = exog.sum(1) + errors
 45 | 
 46 |         df = pd.DataFrame(index=range(n))
 47 |         df["y1"] = endog
 48 |         df["y2"] = endog + 2 * 2
 49 |         df["groups"] = groups
 50 |         df["x1"] = exog[:, 0]
 51 |         df["x2"] = exog[:, 1]
 52 | 
 53 |         self.tree = TreeNode.read(['(c, (b,a)y2)y1;'])
 54 |         self.table = df[["y1", "y2"]]
 55 |         self.metadata = df[['x1', 'x2', 'groups']]
 56 | 
 57 |         # for testing the plugins
 58 |         self.results = "results"
 59 |         if not os.path.exists(self.results):
 60 |             os.mkdir(self.results)
 61 | 
 62 |     def tearDown(self):
 63 |         shutil.rmtree(self.results)
 64 | 
 65 | 
 66 | class TestMixedLMFunctions(TestMixedLM):
 67 | 
 68 |     def test_mixedlm_balances(self):
 69 | 
 70 |         res = mixedlm("x1 + x2", self.table, self.metadata,
 71 |                       groups="groups")
 72 |         res.fit()
 73 |         exp_pvalues = pd.DataFrame(
 74 |             [[0.0994110906314, 4.4193804e-05, 3.972325e-35, 3.568599e-30],
 75 |              [4.82688604e-236, 4.4193804e-05, 3.972325e-35, 3.568599e-30]],
 76 |             index=['y1', 'y2'],
 77 |             columns=['Intercept', 'Group Var', 'x1', 'x2']).T
 78 | 
 79 |         res_pvals = res.pvalues.sort_index(axis=0).sort_index(axis=1)
 80 |         exp_pvals = exp_pvalues.sort_index(axis=0).sort_index(axis=1)
 81 | 
 82 |         pdt.assert_frame_equal(res_pvals, exp_pvals,
 83 |                                check_less_precise=True)
 84 | 
 85 |         exp_coefficients = pd.DataFrame(
 86 |             [[0.211451, 0.0935786, 1.022008, 0.924873],
 87 |              [4.211451, 0.0935786, 1.022008, 0.924873]],
 88 |             columns=['Intercept', 'Group Var', 'x1', 'x2'],
 89 |             index=['y1', 'y2']).sort_index().T
 90 |         res_coef = res.coefficients().sort_index(axis=0).sort_index(axis=1)
 91 |         exp_coef = exp_coefficients.sort_index(axis=0).sort_index(axis=1)
 92 | 
 93 |         pdt.assert_frame_equal(res_coef, exp_coef,
 94 |                                check_less_precise=True)
 95 | 
 96 |     def test_mixedlm_balances_vcf(self):
 97 |         np.random.seed(6241)
 98 |         n = 1600
 99 |         exog = np.random.normal(size=(n, 2))
100 |         groups = np.kron(np.arange(n // 16), np.ones(16))
101 | 
102 |         # Build up the random error vector
103 |         errors = 0
104 | 
105 |         # The random effects
106 |         exog_re = np.random.normal(size=(n, 2))
107 |         slopes = np.random.normal(size=(n // 16, 2))
108 |         slopes = np.kron(slopes, np.ones((16, 1))) * exog_re
109 |         errors += slopes.sum(1)
110 | 
111 |         # First variance component
112 |         subgroups1 = np.kron(np.arange(n // 4), np.ones(4))
113 |         errors += np.kron(2 * np.random.normal(size=n // 4), np.ones(4))
114 | 
115 |         # Second variance component
116 |         subgroups2 = np.kron(np.arange(n // 2), np.ones(2))
117 |         errors += np.kron(2 * np.random.normal(size=n // 2), np.ones(2))
118 | 
119 |         # iid errors
120 |         errors += np.random.normal(size=n)
121 | 
122 |         endog = exog.sum(1) + errors
123 | 
124 |         df = pd.DataFrame(index=range(n))
125 |         df["y1"] = endog
126 |         df["y2"] = endog + 2 * 2
127 |         df["groups"] = groups
128 |         df["x1"] = exog[:, 0]
129 |         df["x2"] = exog[:, 1]
130 |         df["z1"] = exog_re[:, 0]
131 |         df["z2"] = exog_re[:, 1]
132 |         df["v1"] = subgroups1
133 |         df["v2"] = subgroups2
134 | 
135 |         table = df[["y1", "y2"]]
136 |         metadata = df[['x1', 'x2', 'z1', 'z2', 'v1', 'v2', 'groups']]
137 | 
138 |         res = mixedlm("x1 + x2", table, metadata, groups="groups",
139 |                       re_formula="0+z1+z2")
140 |         res.fit()
141 | 
142 |         exp_pvalues = pd.DataFrame([
143 |             [0.038015, 3.858750e-39, 2.245068e-33,
144 |              2.552217e-05, 0.923418, 6.645741e-34],
145 |             [0.000000, 3.858750e-39, 2.245068e-33,
146 |              2.552217e-05, 0.923418, 6.645741e-34]],
147 |             columns=['Intercept', 'x1', 'x2', 'z1 Var',
148 |                      'z1 x z2 Cov', 'z2 Var'],
149 |             index=['y1', 'y2']).T
150 | 
151 |         exp_coefficients = pd.DataFrame(
152 |             [[0.163141, 1.030013, 0.935514, 0.115082, -0.001962, 0.14792],
153 |              [4.163141, 1.030013, 0.935514, 0.115082, -0.001962, 0.14792]],
154 |             columns=['Intercept', 'x1', 'x2', 'z1 Var',
155 |                      'z1 x z2 Cov', 'z2 Var'],
156 |             index=['y1', 'y2']).T
157 | 
158 |         pdt.assert_frame_equal(res.pvalues.sort_index(axis=0),
159 |                                exp_pvalues.sort_index(axis=0),
160 |                                check_less_precise=True)
161 | 
162 |         pdt.assert_frame_equal(res.coefficients().sort_index(axis=0),
163 |                                exp_coefficients.sort_index(axis=0),
164 |                                check_less_precise=True)
165 | 
166 |     def test_percent_explained(self):
167 |         model = mixedlm("x1 + x2", self.table, self.metadata,
168 |                         groups="groups")
169 | 
170 |         model.fit()
171 |         res = model.percent_explained()
172 |         exp = pd.Series([0.5, 0.5], index=['y1', 'y2'])
173 |         pdt.assert_series_equal(res, exp, check_less_precise=True)
174 | 
175 | 
176 | if __name__ == '__main__':
177 |     unittest.main()
178 | 


--------------------------------------------------------------------------------
/gneiss/tests/test_balances.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | 
  9 | from __future__ import absolute_import, division, print_function
 10 | import unittest
 11 | import numpy as np
 12 | import numpy.testing as npt
 13 | from gneiss.balances import (balance_basis, _count_matrix,
 14 |                              _balance_basis, sparse_balance_basis)
 15 | from skbio import TreeNode
 16 | from skbio.util import get_data_path
 17 | from scipy.sparse import coo_matrix
 18 | 
 19 | 
 20 | def assert_coo_allclose(res, exp, rtol=1e-7, atol=1e-7):
 21 |     res_data = np.vstack((res.row, res.col, res.data)).T
 22 |     exp_data = np.vstack((exp.row, exp.col, exp.data)).T
 23 | 
 24 |     # sort by row and col
 25 |     res_data = res_data[res_data[:, 1].argsort()]
 26 |     res_data = res_data[res_data[:, 0].argsort()]
 27 |     exp_data = exp_data[exp_data[:, 1].argsort()]
 28 |     exp_data = exp_data[exp_data[:, 0].argsort()]
 29 |     npt.assert_allclose(res_data, exp_data, rtol=rtol, atol=atol)
 30 | 
 31 | 
 32 | class TestSparseBalances(unittest.TestCase):
 33 | 
 34 |     def test_sparse_balance_basis_base_case(self):
 35 |         tree = u"(a,b);"
 36 |         t = TreeNode.read([tree])
 37 | 
 38 |         exp_basis = coo_matrix(
 39 |             np.array([[-np.sqrt(1. / 2),
 40 |                        np.sqrt(1. / 2)]]))
 41 |         exp_keys = [t.name]
 42 |         res_basis, res_keys = sparse_balance_basis(t)
 43 | 
 44 |         assert_coo_allclose(exp_basis, res_basis)
 45 |         self.assertListEqual(exp_keys, res_keys)
 46 | 
 47 |     def test_sparse_balance_basis_invalid(self):
 48 |         with self.assertRaises(ValueError):
 49 |             tree = u"(a,b,c);"
 50 |             t = TreeNode.read([tree])
 51 |             sparse_balance_basis(t)
 52 | 
 53 |     def test_sparse_balance_basis_unbalanced(self):
 54 |         tree = u"((a,b)c, d);"
 55 |         t = TreeNode.read([tree])
 56 |         exp_basis = coo_matrix(np.array(
 57 |             [[-np.sqrt(1. / 6), -np.sqrt(1. / 6), np.sqrt(2. / 3)],
 58 |              [-np.sqrt(1. / 2), np.sqrt(1. / 2), 0]]
 59 |         ))
 60 |         exp_keys = [t.name, t[0].name]
 61 |         res_basis, res_keys = sparse_balance_basis(t)
 62 | 
 63 |         assert_coo_allclose(exp_basis, res_basis)
 64 |         self.assertListEqual(exp_keys, res_keys)
 65 | 
 66 |     def test_sparse_balance_basis_unbalanced2(self):
 67 |         tree = u"(d, (a,b)c);"
 68 | 
 69 |         t = TreeNode.read([tree])
 70 | 
 71 |         exp_basis = coo_matrix(np.array(
 72 |             [
 73 |                 [-np.sqrt(2. / 3), np.sqrt(1. / 6), np.sqrt(1. / 6)],
 74 |                 [0, -np.sqrt(1. / 2), np.sqrt(1. / 2)]
 75 |             ]
 76 |         ))
 77 | 
 78 |         exp_keys = [t.name, t[1].name]
 79 |         res_basis, res_keys = sparse_balance_basis(t)
 80 |         assert_coo_allclose(exp_basis, res_basis, atol=1e-7, rtol=1e-7)
 81 |         self.assertListEqual(exp_keys, res_keys)
 82 | 
 83 | 
 84 | class TestBalances(unittest.TestCase):
 85 | 
 86 |     def test_count_matrix_base_case(self):
 87 |         tree = u"(a,b);"
 88 |         t = TreeNode.read([tree])
 89 |         res, _ = _count_matrix(t)
 90 |         exp = {'k': 0, 'l': 1, 'r': 1, 't': 0, 'tips': 2}
 91 |         self.assertEqual(res[t], exp)
 92 | 
 93 |         exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1}
 94 |         self.assertEqual(res[t[0]], exp)
 95 | 
 96 |         exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1}
 97 |         self.assertEqual(res[t[1]], exp)
 98 | 
 99 |     def test_count_matrix_unbalanced(self):
100 |         tree = u"((a,b)c, d);"
101 |         t = TreeNode.read([tree])
102 |         res, _ = _count_matrix(t)
103 | 
104 |         exp = {'k': 0, 'l': 2, 'r': 1, 't': 0, 'tips': 3}
105 |         self.assertEqual(res[t], exp)
106 |         exp = {'k': 1, 'l': 1, 'r': 1, 't': 0, 'tips': 2}
107 |         self.assertEqual(res[t[0]], exp)
108 | 
109 |         exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1}
110 |         self.assertEqual(res[t[1]], exp)
111 |         self.assertEqual(res[t[0][0]], exp)
112 |         self.assertEqual(res[t[0][1]], exp)
113 | 
114 |     def test_count_matrix_singleton_error(self):
115 |         with self.assertRaises(ValueError):
116 |             tree = u"(((a,b)c, d)root);"
117 |             t = TreeNode.read([tree])
118 |             _count_matrix(t)
119 | 
120 |     def test_count_matrix_trifurcating_error(self):
121 |         with self.assertRaises(ValueError):
122 |             tree = u"((a,b,e)c, d);"
123 |             t = TreeNode.read([tree])
124 |             _count_matrix(t)
125 | 
126 |     def test__balance_basis_base_case(self):
127 |         tree = u"(a,b);"
128 |         t = TreeNode.read([tree])
129 | 
130 |         exp_basis = np.array([[-np.sqrt(1. / 2), np.sqrt(1. / 2)]])
131 |         exp_keys = [t.name]
132 |         res_basis, res_keys = _balance_basis(t)
133 | 
134 |         npt.assert_allclose(exp_basis, res_basis)
135 |         self.assertListEqual(exp_keys, res_keys)
136 | 
137 |     def test__balance_basis_unbalanced(self):
138 |         tree = u"((a,b)c, d);"
139 |         t = TreeNode.read([tree])
140 | 
141 |         exp_basis = np.array(
142 |             [[-np.sqrt(1. / 6), -np.sqrt(1. / 6), np.sqrt(2. / 3)],
143 |              [-np.sqrt(1. / 2), np.sqrt(1. / 2), 0]]
144 |         )
145 |         exp_keys = [t.name, t[0].name]
146 |         res_basis, res_keys = _balance_basis(t)
147 | 
148 |         npt.assert_allclose(exp_basis, res_basis)
149 |         self.assertListEqual(exp_keys, res_keys)
150 | 
151 |     def test_balance_basis_base_case(self):
152 |         tree = u"(a,b);"
153 |         t = TreeNode.read([tree])
154 |         exp_keys = [t.name]
155 |         exp_basis = np.array([0.19557032, 0.80442968])
156 |         res_basis, res_keys = balance_basis(t)
157 | 
158 |         npt.assert_allclose(exp_basis, res_basis)
159 |         self.assertListEqual(exp_keys, res_keys)
160 | 
161 |     def test_balance_basis_unbalanced(self):
162 |         tree = u"((a,b)c, d);"
163 |         t = TreeNode.read([tree])
164 |         exp_keys = [t.name, t[0].name]
165 |         exp_basis = np.array([[0.18507216, 0.18507216, 0.62985567],
166 |                               [0.14002925, 0.57597535, 0.28399541]])
167 | 
168 |         res_basis, res_keys = balance_basis(t)
169 | 
170 |         npt.assert_allclose(exp_basis, res_basis)
171 |         self.assertListEqual(exp_keys, list(res_keys))
172 | 
173 |     def test_balance_basis_large1(self):
174 |         fname = get_data_path('large_tree.nwk',
175 |                               subfolder='data')
176 |         t = TreeNode.read(fname)
177 |         # note that the basis is in reverse level order
178 |         exp_basis = np.loadtxt(
179 |             get_data_path('large_tree_basis.txt',
180 |                           subfolder='data'))
181 |         res_basis, res_keys = balance_basis(t)
182 |         npt.assert_allclose(exp_basis[:, ::-1], res_basis)
183 | 
184 | 
185 | if __name__ == "__main__":
186 |     unittest.main()
187 | 


--------------------------------------------------------------------------------
/gneiss/plot/tests/test_regression_plot.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import unittest
  9 | import os
 10 | import shutil
 11 | 
 12 | import numpy as np
 13 | import pandas as pd
 14 | import numpy.testing as npt
 15 | 
 16 | from skbio import TreeNode
 17 | from skbio.util import get_data_path
 18 | 
 19 | from gneiss.plot._regression_plot import ols_summary, lme_summary
 20 | from gneiss.regression import ols, mixedlm
 21 | 
 22 | 
 23 | class TestOLS_Summary(unittest.TestCase):
 24 | 
 25 |     def setUp(self):
 26 |         A = np.array  # aliasing for the sake of pep8
 27 |         self.table = pd.DataFrame({
 28 |             's1': A([1., 1.]),
 29 |             's2': A([1., 2.]),
 30 |             's3': A([1., 3.]),
 31 |             's4': A([1., 4.]),
 32 |             's5': A([1., 5.])},
 33 |             index=['Y2', 'Y1']).T
 34 |         self.tree = TreeNode.read(['(c, (b,a)Y2)Y1;'])
 35 |         self.metadata = pd.DataFrame({
 36 |             'lame': [1, 1, 1, 1, 1],
 37 |             'real': [1, 2, 3, 4, 5]
 38 |         }, index=['s1', 's2', 's3', 's4', 's5'])
 39 | 
 40 |         np.random.seed(0)
 41 |         n = 15
 42 |         a = np.array([1, 4.2, 5.3, -2.2, 8])
 43 |         x1 = np.linspace(.01, 0.1, n)
 44 |         x2 = np.logspace(0, 0.01, n)
 45 |         x3 = np.exp(np.linspace(0, 0.01, n))
 46 |         x4 = x1 ** 2
 47 |         self.x = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4})
 48 |         n__ = np.random.normal(size=n)
 49 |         y = a[0] + a[1] * x1 + a[2] * x2 + a[3] * x3 + a[4] * x4 + n__
 50 |         sy = np.vstack((-y / 10, -y)).T
 51 |         self.y = pd.DataFrame(sy, columns=['y0', 'y1'])
 52 |         self.t2 = TreeNode.read([r"((a,b)y1,c)y0;"])
 53 | 
 54 |         self.results = "results"
 55 |         os.mkdir(self.results)
 56 | 
 57 |     def tearDown(self):
 58 |         shutil.rmtree(self.results)
 59 | 
 60 |     @unittest.skip('Visualizations are deprecated')
 61 |     def test_visualization(self):
 62 |         res = ols(formula="x1 + x2 + x3 + x4",
 63 |                   table=self.y, metadata=self.x)
 64 |         res.fit()
 65 | 
 66 |         ols_summary(self.results, res, tree=self.t2)
 67 |         fp = os.path.join(self.results, 'pvalues.csv')
 68 |         self.assertTrue(os.path.exists(fp))
 69 |         fp = os.path.join(self.results, 'coefficients.csv')
 70 |         self.assertTrue(os.path.exists(fp))
 71 |         fp = os.path.join(self.results, 'predicted.csv')
 72 |         self.assertTrue(os.path.exists(fp))
 73 |         fp = os.path.join(self.results, 'residuals.csv')
 74 |         self.assertTrue(os.path.exists(fp))
 75 | 
 76 |         index_fp = os.path.join(self.results, 'index.html')
 77 |         self.assertTrue(os.path.exists(index_fp))
 78 | 
 79 |         with open(index_fp, 'r') as fh:
 80 |             html = fh.read()
 81 |             self.assertIn('<h1>Simplicial Linear Regression Summary</h1>',
 82 |                           html)
 83 |             self.assertIn('<th>Coefficients</th>\n', html)
 84 |             self.assertIn('<th>Predicted Balances</th>\n', html)
 85 |             self.assertIn('<th>Residuals</th>\n', html)
 86 | 
 87 | 
 88 | class TestLME_Summary(unittest.TestCase):
 89 | 
 90 |     def setUp(self):
 91 |         np.random.seed(6241)
 92 |         n = 1600
 93 |         exog = np.random.normal(size=(n, 2))
 94 |         groups = np.kron(np.arange(n // 16), np.ones(16))
 95 | 
 96 |         # Build up the random error vector
 97 |         errors = 0
 98 | 
 99 |         # The random effects
100 |         exog_re = np.random.normal(size=(n, 2))
101 |         slopes = np.random.normal(size=(n // 16, 2))
102 |         slopes = np.kron(slopes, np.ones((16, 1))) * exog_re
103 |         errors += slopes.sum(1)
104 | 
105 |         # First variance component
106 |         errors += np.kron(2 * np.random.normal(size=n // 4), np.ones(4))
107 | 
108 |         # Second variance component
109 |         errors += np.kron(2 * np.random.normal(size=n // 2), np.ones(2))
110 | 
111 |         # iid errors
112 |         errors += np.random.normal(size=n)
113 | 
114 |         endog = exog.sum(1) + errors
115 | 
116 |         df = pd.DataFrame(index=range(n))
117 |         df["Y1"] = endog + 2 * 2
118 |         df["Y2"] = endog
119 |         df["groups"] = groups
120 |         df["x1"] = exog[:, 0]
121 |         df["x2"] = exog[:, 1]
122 | 
123 |         self.tree = TreeNode.read(['(c, (b,a)Y2)Y1;'])
124 |         self.table = df[["Y1", "Y2"]]
125 |         self.metadata = df[['x1', 'x2', 'groups']]
126 | 
127 |         self.results = "results"
128 |         if not os.path.exists(self.results):
129 |             os.mkdir(self.results)
130 | 
131 |     def tearDown(self):
132 |         shutil.rmtree(self.results)
133 | 
134 |     @unittest.skip('Visualizations are deprecated')
135 |     def test_visualization(self):
136 |         model = mixedlm("x1 + x2", self.table, self.metadata,
137 |                         groups="groups")
138 |         model.fit()
139 |         lme_summary(self.results, model, self.tree)
140 |         pvals = pd.read_csv(os.path.join(self.results, 'pvalues.csv'),
141 |                             index_col=0)
142 |         coefs = pd.read_csv(os.path.join(self.results, 'coefficients.csv'),
143 |                             index_col=0)
144 |         pred = pd.read_csv(os.path.join(self.results, 'predicted.csv'),
145 |                            index_col=0)
146 |         resid = pd.read_csv(os.path.join(self.results, 'residuals.csv'),
147 |                             index_col=0)
148 | 
149 |         exp_pvals = pd.DataFrame({
150 |             'Intercept': {'Y1': 4.8268860492262526e-236,
151 |                           'Y2': 0.099411090631406948},
152 |             'Group Var': {'Y1': 4.4193804668281966e-05,
153 |                           'Y2': 4.4193804668280984e-05},
154 |             'x1': {'Y1': 3.9704936434633392e-35,
155 |                    'Y2': 3.9704936434628853e-35},
156 |             'x2': {'Y1': 3.56912071867573e-30,
157 |                    'Y2': 3.56912071867573e-30}}).sort_index(axis=1)
158 |         pvals = pvals.sort_index(axis=0).sort_index(axis=1)
159 |         exp_pvals = exp_pvals.sort_index(axis=0).sort_index(axis=1)
160 | 
161 |         npt.assert_allclose(pvals, exp_pvals, rtol=1e-5)
162 | 
163 |         exp_coefs = pd.DataFrame({
164 |             'Intercept': {'Y1': 4.2115280233151946,
165 |                           'Y2': 0.211528023315187},
166 |             'Group Var': {'Y1': 0.093578639287859755,
167 |                           'Y2': 0.093578639287860019},
168 |             'x1': {'Y1': 1.0220072967452645,
169 |                    'Y2': 1.0220072967452651},
170 |             'x2': {'Y1': 0.92487193877761575,
171 |                    'Y2': 0.92487193877761564}}
172 |         ).sort_index(axis=1)
173 | 
174 |         npt.assert_allclose(coefs.sort_index(axis=0),
175 |                             exp_coefs.sort_index(axis=0),
176 |                             rtol=1e-2, atol=1e-2)
177 | 
178 |         exp_resid = pd.read_csv(get_data_path('exp_resid.csv'), index_col=0)
179 |         npt.assert_allclose(resid, exp_resid.T, rtol=1e-2, atol=1e-2)
180 | 
181 |         exp_pred = pd.read_csv(get_data_path('exp_pred.csv'), index_col=0)
182 |         npt.assert_allclose(pred, exp_pred.T, rtol=1e-2, atol=1e-2)
183 | 
184 | 
185 | if __name__ == "__main__":
186 |     unittest.main()
187 | 


--------------------------------------------------------------------------------
/gneiss/plot/tests/test_dendrogram.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import unittest
  9 | import numpy as np
 10 | import pandas as pd
 11 | from skbio import DistanceMatrix, TreeNode
 12 | from gneiss.plot._dendrogram import (Dendrogram, UnrootedDendrogram,
 13 |                                      SquareDendrogram)
 14 | from scipy.cluster.hierarchy import ward
 15 | import pandas.util.testing as pdt
 16 | 
 17 | 
 18 | class mock(Dendrogram):
 19 |     # mock dendrogram class to make sure that inheritance
 20 |     # is working as expected.
 21 |     def rescale(self, width, height):
 22 |         pass
 23 | 
 24 | 
 25 | class TestDendrogram(unittest.TestCase):
 26 | 
 27 |     def test_cache_ntips(self):
 28 |         dm = DistanceMatrix.from_iterable([0, 1, 2, 3],
 29 |                                           lambda x, y: np.abs(x - y))
 30 |         lm = ward(dm.condensed_form())
 31 |         ids = np.arange(4).astype(np.str)
 32 |         t = mock.from_linkage_matrix(lm, ids)
 33 | 
 34 |         t._cache_ntips()
 35 | 
 36 |         self.assertEqual(t.leafcount, 4)
 37 |         self.assertEqual(t.children[0].leafcount, 2)
 38 |         self.assertEqual(t.children[1].leafcount, 2)
 39 |         self.assertEqual(t.children[0].children[0].leafcount, 1)
 40 |         self.assertEqual(t.children[0].children[1].leafcount, 1)
 41 |         self.assertEqual(t.children[1].children[0].leafcount, 1)
 42 |         self.assertEqual(t.children[1].children[1].leafcount, 1)
 43 | 
 44 | 
 45 | class TestUnrootedDendrogram(unittest.TestCase):
 46 | 
 47 |     def setUp(self):
 48 |         np.random.seed(0)
 49 |         x = np.random.rand(10)
 50 |         dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x - y))
 51 |         lm = ward(dm.condensed_form())
 52 |         ids = np.arange(len(x)).astype(np.str)
 53 |         self.tree = TreeNode.from_linkage_matrix(lm, ids)
 54 | 
 55 |         # initialize tree with branch length and named internal nodes
 56 |         for i, n in enumerate(self.tree.postorder(include_self=True)):
 57 |             n.length = 1
 58 |             if not n.is_tip():
 59 |                 n.name = "y%d" % i
 60 | 
 61 |     def test_from_tree(self):
 62 |         t = UnrootedDendrogram.from_tree(self.tree)
 63 |         self.assertEqual(t.__class__, UnrootedDendrogram)
 64 | 
 65 |     def test_coords(self):
 66 |         t = UnrootedDendrogram.from_tree(self.tree)
 67 | 
 68 |         exp = pd.DataFrame({'0': [404.097, 396.979, np.nan, np.nan, True],
 69 |                             '1': [464.724, 174.338, np.nan, np.nan, True],
 70 |                             '2': [487.5, 43.2804, np.nan, np.nan, True],
 71 |                             '3': [446.172, 359.095, np.nan, np.nan, True],
 72 |                             '4': [32.4704, 456.72, np.nan, np.nan, True],
 73 |                             '5': [438.468, 14.9717, np.nan, np.nan, True],
 74 |                             '6': [81.5024, 485.028, np.nan, np.nan, True],
 75 |                             '7': [54.5748, 34.9421, np.nan, np.nan, True],
 76 |                             '8': [12.5, 72.8265, np.nan, np.nan, True],
 77 |                             '9': [55.2464, 325.662, np.nan, np.nan, True],
 78 |                             'y10': [366.837, 313.291, '0', '3', False],
 79 |                             'y14': [419.421, 104.579, '2', '5', False],
 80 |                             'y15': [373.617, 183.914, '1', 'y14', False],
 81 |                             'y16': [305.539, 245.212, 'y10', 'y15', False],
 82 |                             'y17': [214.432, 254.788, 'y7', 'y16', False],
 83 |                             'y18': [153.134, 186.709, 'y2', 'y17', False],
 84 |                             'y2': [91.8354, 118.631, '7', '8', False],
 85 |                             'y6': [100.549, 395.421, '4', '6', False],
 86 |                             'y7': [146.353, 316.086, '9', 'y6', False]},
 87 |                            index=['x', 'y', 'child0', 'child1', 'is_tip']).T
 88 | 
 89 |         res = t.coords(500, 500)
 90 |         exp = exp.loc[res.index]
 91 |         pdt.assert_frame_equal(exp, res)
 92 | 
 93 |     def test_rescale(self):
 94 |         t = UnrootedDendrogram.from_tree(self.tree)
 95 |         self.assertAlmostEqual(t.rescale(500, 500), 91.608680314971238,
 96 |                                places=5)
 97 | 
 98 |     def test_update_coordinates(self):
 99 |         t = UnrootedDendrogram.from_tree(self.tree)
100 |         exp = pd.DataFrame([(-0.59847214410395644, -1.6334372886412185),
101 |                             (-0.99749498660405445, -0.76155647142658189),
102 |                             (1.0504174348855488, 0.34902579063315775),
103 |                             (2.8507394969018511, 0.88932809650129752),
104 |                             (3.3688089449017027, 0.082482736278627664),
105 |                             (0.81247946938427551, -3.4080712447257464),
106 |                             (-0.13677590240930079, -3.5433843164696093),
107 |                             (-1.6101831260150372, -1.1190611577178871),
108 |                             (-1.6176088321192579, 0.76057470265451865),
109 |                             (-0.69694851846105044, 1.0284925540912822)])
110 | 
111 |         res = pd.DataFrame(t.update_coordinates(1, 0, 0, 2, 1))
112 |         pdt.assert_frame_equal(res, exp, check_less_precise=True)
113 | 
114 | 
115 | class TestSquareDendrogram(unittest.TestCase):
116 | 
117 |     def setUp(self):
118 |         np.random.seed(0)
119 |         self.table = pd.DataFrame(np.random.random((5, 5)))
120 |         num_otus = 5  # otus
121 |         x = np.random.rand(num_otus)
122 |         dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x - y))
123 |         lm = ward(dm.condensed_form())
124 |         t = TreeNode.from_linkage_matrix(lm, np.arange(len(x)).astype(np.str))
125 |         self.tree = SquareDendrogram.from_tree(t)
126 | 
127 |         for i, n in enumerate(t.postorder()):
128 |             if not n.is_tip():
129 |                 n.name = "y%d" % i
130 |             n.length = np.random.rand() * 3
131 | 
132 |     def test_from_tree(self):
133 |         t = SquareDendrogram.from_tree(self.tree)
134 |         self.assertEqual(t.__class__, SquareDendrogram)
135 | 
136 |     def test_coords(self):
137 |         # just test to make sure that the coordinates are calculated properly.
138 |         t = SquareDendrogram.from_tree(self.tree)
139 | 
140 |         exp = pd.DataFrame({'0': [20, 2.5, np.nan, np.nan, True],
141 |                             '1': [20, 3.5, np.nan, np.nan, True],
142 |                             '2': [20, 4.5, np.nan, np.nan, True],
143 |                             '3': [20, 1.5, np.nan, np.nan, True],
144 |                             '4': [20, 0.5, np.nan, np.nan, True],
145 |                             'y5': [14.25, 1, '3', '4', False],
146 |                             'y6': [9.5, 1.75, '0', 'y5', False],
147 |                             'y7': [4.75, 2.625, '1', 'y6', False],
148 |                             'y8': [0, 3.5625, '2', 'y7', False]},
149 |                            index=['x', 'y', 'child0', 'child1', 'is_tip']).T
150 | 
151 |         res = t.coords(width=20, height=self.table.shape[0])
152 |         exp = exp.loc[res.index]
153 |         pdt.assert_frame_equal(exp, res)
154 | 
155 |     def test_rescale(self):
156 |         t = SquareDendrogram.from_tree(self.tree)
157 |         res = t.rescale(10, 10)
158 |         self.assertEqual(res, 2.5)
159 | 
160 | 
161 | if __name__ == "__main__":
162 |     unittest.main()
163 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/test_model.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import pandas as pd
  9 | from skbio import TreeNode
 10 | import numpy as np
 11 | from skbio.stats.composition import ilr_inv, clr_inv
 12 | import statsmodels.formula.api as smf
 13 | import pandas.util.testing as pdt
 14 | from gneiss.regression._model import RegressionModel
 15 | from gneiss.balances import balance_basis
 16 | import unittest
 17 | import os
 18 | 
 19 | 
 20 | # create some mock classes for testing
 21 | class submock(RegressionModel):
 22 |     def __init__(self, *args, **kwargs):
 23 |         super().__init__(*args, **kwargs)
 24 |         self.results = []
 25 | 
 26 |     def summary(self):
 27 |         print("OK!")
 28 | 
 29 |     def predict(self, **kwargs):
 30 |         pass
 31 | 
 32 |     def fit(self, **kwargs):
 33 |         """ Fit the model """
 34 |         for s in self.submodels:
 35 |             # assumes that the underlying submodels have implemented `fit`.
 36 |             m = s.fit(**kwargs)
 37 | 
 38 |             self.results.append(m)
 39 | 
 40 |         coef = pd.DataFrame()
 41 |         for r in self.results:
 42 |             c = r.params
 43 |             c.name = r.model.endog_names
 44 |             coef = coef.append(c)
 45 |         self._beta = coef.T
 46 | 
 47 |         resid = pd.DataFrame()
 48 |         for r in self.results:
 49 |             err = r.resid
 50 |             err.name = r.model.endog_names
 51 |             resid = resid.append(err)
 52 |         self._resid = resid.T
 53 | 
 54 |         pvals = pd.DataFrame()
 55 |         for r in self.results:
 56 |             p = r.pvalues
 57 |             p.name = r.model.endog_names
 58 |             pvals = pvals.append(p)
 59 | 
 60 |         self.pvalues = pvals
 61 | 
 62 |         self._fitted = True
 63 | 
 64 | 
 65 | class TestRegressionModel(unittest.TestCase):
 66 |     def setUp(self):
 67 |         self.pickle_fname = "test.pickle"
 68 |         self.data = pd.DataFrame([[1, 1, 1],
 69 |                                   [3, 2, 3],
 70 |                                   [4, 3, 2],
 71 |                                   [5, 4, 4],
 72 |                                   [2, 5, 3],
 73 |                                   [3, 6, 5],
 74 |                                   [4, 7, 4]],
 75 |                                  index=['s1', 's2', 's3', 's4',
 76 |                                         's5', 's6', 's7'],
 77 |                                  columns=['Y1', 'Y2', 'X'])
 78 |         self.model1 = smf.ols(formula="Y1 ~ X", data=self.data)
 79 |         self.model2 = smf.ols(formula="Y2 ~ X", data=self.data)
 80 |         self.tree = TreeNode.read(['((a,b)Y1, c)Y2;'])
 81 |         self.basis = pd.DataFrame(clr_inv(balance_basis(self.tree)[0]),
 82 |                                   columns=['a', 'b', 'c'],
 83 |                                   index=['Y1', 'Y2'])
 84 |         self.balances = pd.DataFrame(self.data[['Y1', 'Y2']],
 85 |                                      index=self.data.index,
 86 |                                      columns=['Y1', 'Y2'])
 87 | 
 88 |     def tearDown(self):
 89 |         if os.path.exists(self.pickle_fname):
 90 |             os.remove(self.pickle_fname)
 91 | 
 92 |     def test_regression_results_pvalues(self):
 93 |         # checks to see if pvalues are calculated correctly.
 94 | 
 95 |         submodels = [self.model1, self.model2]
 96 |         res = submock(Y=self.balances, Xs=None)
 97 |         submock.submodels = submodels
 98 |         res.fit()
 99 |         exp = pd.DataFrame({'Intercept': [0.307081, 0.972395],
100 |                             'X': [0.211391, 0.029677]},
101 |                            index=['Y1', 'Y2'])
102 |         pdt.assert_frame_equal(res.pvalues, exp,
103 |                                check_exact=False,
104 |                                check_less_precise=True)
105 | 
106 |     def test_regression_results_coefficient(self):
107 |         exp_coef = pd.DataFrame({'Intercept': [1.447368, -0.052632],
108 |                                  'X': [0.539474, 1.289474]},
109 |                                 index=['Y1', 'Y2']).T
110 |         submodels = [self.model1, self.model2]
111 |         res = submock(Y=self.balances, Xs=None)
112 |         submock.submodels = submodels
113 |         res.fit()
114 |         res_coef = res.coefficients()
115 |         pdt.assert_frame_equal(res_coef, exp_coef,
116 |                                check_exact=False,
117 |                                check_less_precise=True)
118 | 
119 |     def test_regression_results_coefficient_projection(self):
120 |         tree = TreeNode.read([r'(c, (a, b)Y2)Y1;'])
121 |         exp_coef = pd.DataFrame(
122 |             np.array([[0.47802399, 0.44373548, 0.07824052],
123 |                       [0.11793186, 0.73047731, 0.15159083]]).T,
124 |             columns=['Intercept', 'X'],
125 |             index=['a', 'b', 'c'])
126 | 
127 |         submodels = [self.model1, self.model2]
128 |         res = submock(Y=self.balances, Xs=None)
129 |         submock.submodels = submodels
130 |         res.fit()
131 |         res_coef = res.coefficients(tree).T
132 |         res_coef = res_coef.sort_index()
133 | 
134 |         pdt.assert_frame_equal(res_coef, exp_coef,
135 |                                check_exact=False,
136 |                                check_less_precise=True)
137 | 
138 |     def test_regression_results_residuals_projection(self):
139 |         tree = TreeNode.read([r'(c, (a, b)Y2)Y1;'])
140 |         basis, _ = balance_basis(tree)
141 |         exp_resid = pd.DataFrame({'s1': [-0.986842, -0.236842],
142 |                                   's2': [-0.065789, -1.815789],
143 |                                   's3': [1.473684, 0.473684],
144 |                                   's4': [1.394737, -1.105263],
145 |                                   's5': [-1.065789, 1.184211],
146 |                                   's6': [-1.144737, -0.394737],
147 |                                   's7': [0.394737, 1.894737]},
148 |                                  index=['Y1', 'Y2']).T
149 |         exp_resid = pd.DataFrame(ilr_inv(exp_resid, basis),
150 |                                  index=['s1', 's2', 's3', 's4',
151 |                                         's5', 's6', 's7'],
152 |                                  columns=['c', 'a', 'b'])
153 | 
154 |         submodels = [self.model1, self.model2]
155 |         res = submock(Y=self.balances, Xs=None)
156 |         submock.submodels = submodels
157 |         res.fit()
158 |         res_resid = res.residuals(tree).sort_index()
159 |         pdt.assert_frame_equal(res_resid, exp_resid,
160 |                                check_exact=False,
161 |                                check_less_precise=True)
162 | 
163 |     def test_regression_results_residuals(self):
164 |         exp_resid = pd.DataFrame({'s1': [-0.986842, -0.236842],
165 |                                   's2': [-0.065789, -1.815789],
166 |                                   's3': [1.473684, 0.473684],
167 |                                   's4': [1.394737, -1.105263],
168 |                                   's5': [-1.065789, 1.184211],
169 |                                   's6': [-1.144737, -0.394737],
170 |                                   's7': [0.394737, 1.894737]},
171 |                                  index=['Y1', 'Y2']).T
172 |         submodels = [self.model1, self.model2]
173 |         res = submock(Y=self.balances, Xs=None)
174 |         submock.submodels = submodels
175 |         res.fit()
176 | 
177 |         pdt.assert_frame_equal(res.residuals(), exp_resid,
178 |                                check_exact=False,
179 |                                check_less_precise=True)
180 | 
181 | 
182 | if __name__ == "__main__":
183 |     unittest.main()
184 | 


--------------------------------------------------------------------------------
/gneiss/cluster/_pba.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import numpy as np
  9 | import pandas as pd
 10 | from gneiss.sort import mean_niche_estimator
 11 | from gneiss.util import match, rename_internal_nodes
 12 | from gneiss.composition._variance import variation_matrix
 13 | 
 14 | from skbio import TreeNode, DistanceMatrix
 15 | from scipy.cluster.hierarchy import linkage
 16 | 
 17 | 
 18 | def correlation_linkage(X, method='ward'):
 19 |     r"""
 20 |     Hierarchical Clustering based on proportionality.
 21 | 
 22 |     The hierarchy is built based on the correlationity between
 23 |     any two pairs of features.  Specifically the correlation between
 24 |     two features :math:`x` and :math:`y` is measured by
 25 | 
 26 |     .. math::
 27 |         p(x, y) = var (\ln \frac{x}{y})
 28 | 
 29 |     If :math:`p(x, y)` is very small, then :math:`x` and :math:`y`
 30 |     are said to be highly correlation. A hierarchical clustering is
 31 |     then performed using this correlation as a distance metric.
 32 | 
 33 |     This can be useful for constructing principal balances [1]_.
 34 | 
 35 |     Parameters
 36 |     ----------
 37 |     X : pd.DataFrame
 38 |         Contingency table where the samples are rows and the features
 39 |         are columns.
 40 |     method : str
 41 |         Clustering method.  (default='ward')
 42 | 
 43 |     Returns
 44 |     -------
 45 |     skbio.TreeNode
 46 |         Tree for constructing principal balances.
 47 | 
 48 |     References
 49 |     ----------
 50 | 
 51 |     .. [1] Pawlowsky-Glahn V, Egozcue JJ, and Tolosana-Delgado R.
 52 |        Principal Balances (2011).
 53 | 
 54 |     Examples
 55 |     --------
 56 |     >>> import pandas as pd
 57 |     >>> from gneiss.cluster import correlation_linkage
 58 |     >>> table = pd.DataFrame([[1, 1, 0, 0, 0],
 59 |     ...                       [0, 1, 1, 0, 0],
 60 |     ...                       [0, 0, 1, 1, 0],
 61 |     ...                       [0, 0, 0, 1, 1]],
 62 |     ...                      columns=['s1', 's2', 's3', 's4', 's5'],
 63 |     ...                      index=['o1', 'o2', 'o3', 'o4']).T
 64 |     >>> tree = correlation_linkage(table+0.1)
 65 |     >>> print(tree.ascii_art())
 66 |                         /-o1
 67 |               /y1------|
 68 |              |          \-o2
 69 |     -y0------|
 70 |              |          /-o3
 71 |               \y2------|
 72 |                         \-o4
 73 |     """
 74 |     dm = variation_matrix(X)
 75 |     lm = linkage(dm.condensed_form(), method=method)
 76 |     t = TreeNode.from_linkage_matrix(lm, X.columns)
 77 |     t = rename_internal_nodes(t)
 78 |     return t
 79 | 
 80 | 
 81 | def rank_linkage(r, method='average'):
 82 |     r""" Hierchical Clustering on feature ranks.
 83 | 
 84 |     The hierarchy is built based on the rank values of the features given
 85 |     an input vector `r` of ranks. The distance between two features :math:`x`
 86 |     and :math:`y` can be defined as
 87 | 
 88 |     .. math::
 89 |        d(x, y) = (r(x) - r(y))^2
 90 | 
 91 |     Where :math:`r(x)` is the rank of the features.  Hierarchical clustering is
 92 |     then performed using :math:`d(x, y)` as the distance metric.
 93 | 
 94 |     This can be useful for constructing principal balances.
 95 | 
 96 |     Parameters
 97 |     ----------
 98 |     r : pd.Series
 99 |         Continuous vector representing some ordering of the features in X.
100 |     method : str
101 |         Clustering method.  (default='average')
102 | 
103 |     Returns
104 |     -------
105 |     skbio.TreeNode
106 |         Tree for constructing principal balances.
107 | 
108 |     Examples
109 |     --------
110 |     >>> import pandas as pd
111 |     >>> from gneiss.cluster import rank_linkage
112 |     >>> ranks = pd.Series([1, 2, 4, 5],
113 |     ...                   index=['o1', 'o2', 'o3', 'o4'])
114 |     >>> tree = rank_linkage(ranks)
115 |     >>> print(tree.ascii_art())
116 |                         /-o1
117 |               /y1------|
118 |              |          \-o2
119 |     -y0------|
120 |              |          /-o3
121 |               \y2------|
122 |                         \-o4
123 |     """
124 |     dm = DistanceMatrix.from_iterable(r, lambda a, b: np.abs(b-a))
125 |     lm = linkage(dm.condensed_form(), method)
126 |     t = TreeNode.from_linkage_matrix(lm, r.index)
127 |     t = rename_internal_nodes(t)
128 |     return t
129 | 
130 | 
131 | def gradient_linkage(X, y, method='average'):
132 |     r"""
133 |     Hierarchical Clustering on known gradient.
134 | 
135 |     The hierarchy is built based on the values of the samples
136 |     located along a gradient.  Given a feature :math:`x`, the mean gradient
137 |     values that :math:`x` was observed in is calculated by
138 | 
139 |     .. math::
140 |         f(g , x) =
141 |          \sum\limits_{i=1}^N g_i \frac{x_i}{\sum\limits_{j=1}^N x_j}
142 | 
143 |     Where :math:`N` is the number of samples, :math:`x_i` is the proportion of
144 |     feature :math:`x` in sample :math:`i`, :math:`g_i` is the gradient value
145 |     at sample `i`.
146 | 
147 |     The distance between two features :math:`x` and :math:`y` can be defined as
148 | 
149 |     .. math::
150 |         d(x, y) = (f(g, x) - f(g, y))^2
151 | 
152 |     If :math:`d(x, y)` is very small, then :math:`x` and :math:`y`
153 |     are expected to live in very similar positions across the gradient.
154 |     A hierarchical clustering is then performed using :math:`d(x, y)` as
155 |     the distance metric.
156 | 
157 |     This can be useful for constructing principal balances.
158 | 
159 |     Parameters
160 |     ----------
161 |     X : pd.DataFrame
162 |         Contingency table where the samples are rows and the features
163 |         are columns.
164 |     y : pd.Series
165 |         Continuous vector representing some ordering of the samples in X.
166 |     method : str
167 |         Clustering method.  (default='average')
168 | 
169 |     Returns
170 |     -------
171 |     skbio.TreeNode
172 |         Tree for constructing principal balances.
173 | 
174 |     See Also
175 |     --------
176 |     mean_niche_estimator
177 | 
178 |     Examples
179 |     --------
180 |     >>> import pandas as pd
181 |     >>> from gneiss.cluster import gradient_linkage
182 |     >>> table = pd.DataFrame([[1, 1, 0, 0, 0],
183 |     ...                       [0, 1, 1, 0, 0],
184 |     ...                       [0, 0, 1, 1, 0],
185 |     ...                       [0, 0, 0, 1, 1]],
186 |     ...                      columns=['s1', 's2', 's3', 's4', 's5'],
187 |     ...                      index=['o1', 'o2', 'o3', 'o4']).T
188 |     >>> gradient = pd.Series([1, 2, 3, 4, 5],
189 |     ...                      index=['s1', 's2', 's3', 's4', 's5'])
190 |     >>> tree = gradient_linkage(table, gradient)
191 |     >>> print(tree.ascii_art())
192 |                         /-o1
193 |               /y1------|
194 |              |          \-o2
195 |     -y0------|
196 |              |          /-o3
197 |               \y2------|
198 |                         \-o4
199 |     """
200 |     _X, _y = match(X, y)
201 |     mean_X = mean_niche_estimator(_X, gradient=_y)
202 |     t = rank_linkage(mean_X)
203 |     return t
204 | 
205 | 
206 | def random_linkage(n):
207 |     """ Generates a tree with random topology.
208 | 
209 |     Parameters
210 |     ----------
211 |     n : int
212 |         Number of nodes in the tree
213 | 
214 |     Returns
215 |     -------
216 |     skbio.TreeNode
217 |     Random tree for constructing principal balances.
218 | 
219 |     Examples
220 |     --------
221 |     >>> from gneiss.cluster import random_linkage
222 |     >>> tree = random_linkage(10)
223 | 
224 |     Notes
225 |     -----
226 |     The nodes will be labeled from 0 to n.
227 |     """
228 |     index = np.arange(n).astype(np.str)
229 |     x = pd.Series(np.random.rand(n), index=index)
230 |     t = rank_linkage(x)
231 |     return t
232 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 16 | 
 17 | .PHONY: help
 18 | help:
 19 | 	@echo "Please use \`make <target>' where <target> is one of"
 20 | 	@echo "  html       to make standalone HTML files"
 21 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 22 | 	@echo "  singlehtml to make a single large HTML file"
 23 | 	@echo "  pickle     to make pickle files"
 24 | 	@echo "  json       to make JSON files"
 25 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 26 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 27 | 	@echo "  applehelp  to make an Apple Help Book"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  epub3      to make an epub3"
 31 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 32 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 33 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 34 | 	@echo "  text       to make text files"
 35 | 	@echo "  man        to make manual pages"
 36 | 	@echo "  texinfo    to make Texinfo files"
 37 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 38 | 	@echo "  gettext    to make PO message catalogs"
 39 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 40 | 	@echo "  xml        to make Docutils-native XML files"
 41 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 42 | 	@echo "  linkcheck  to check all external links for integrity"
 43 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 44 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 45 | 	@echo "  dummy      to check syntax errors of document sources"
 46 | 
 47 | .PHONY: clean
 48 | clean:
 49 | 	rm -rf $(BUILDDIR)/*
 50 | 
 51 | .PHONY: html
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | .PHONY: dirhtml
 58 | dirhtml:
 59 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 60 | 	@echo
 61 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 62 | 
 63 | .PHONY: singlehtml
 64 | singlehtml:
 65 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 66 | 	@echo
 67 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 68 | 
 69 | .PHONY: pickle
 70 | pickle:
 71 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 72 | 	@echo
 73 | 	@echo "Build finished; now you can process the pickle files."
 74 | 
 75 | .PHONY: json
 76 | json:
 77 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 78 | 	@echo
 79 | 	@echo "Build finished; now you can process the JSON files."
 80 | 
 81 | .PHONY: htmlhelp
 82 | htmlhelp:
 83 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 84 | 	@echo
 85 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 86 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 87 | 
 88 | .PHONY: qthelp
 89 | qthelp:
 90 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 91 | 	@echo
 92 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 93 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 94 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/gneiss.qhcp"
 95 | 	@echo "To view the help file:"
 96 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/gneiss.qhc"
 97 | 
 98 | .PHONY: applehelp
 99 | applehelp:
100 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
101 | 	@echo
102 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
103 | 	@echo "N.B. You won't be able to view it unless you put it in" \
104 | 	      "~/Library/Documentation/Help or install it in your application" \
105 | 	      "bundle."
106 | 
107 | .PHONY: devhelp
108 | devhelp:
109 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
110 | 	@echo
111 | 	@echo "Build finished."
112 | 	@echo "To view the help file:"
113 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/gneiss"
114 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/gneiss"
115 | 	@echo "# devhelp"
116 | 
117 | .PHONY: epub
118 | epub:
119 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
120 | 	@echo
121 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
122 | 
123 | .PHONY: epub3
124 | epub3:
125 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
126 | 	@echo
127 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
128 | 
129 | .PHONY: latex
130 | latex:
131 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
132 | 	@echo
133 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
134 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
135 | 	      "(use \`make latexpdf' here to do that automatically)."
136 | 
137 | .PHONY: latexpdf
138 | latexpdf:
139 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
140 | 	@echo "Running LaTeX files through pdflatex..."
141 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
142 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
143 | 
144 | .PHONY: latexpdfja
145 | latexpdfja:
146 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
147 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
148 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
149 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
150 | 
151 | .PHONY: text
152 | text:
153 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
154 | 	@echo
155 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
156 | 
157 | .PHONY: man
158 | man:
159 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
160 | 	@echo
161 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
162 | 
163 | .PHONY: texinfo
164 | texinfo:
165 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
166 | 	@echo
167 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
168 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
169 | 	      "(use \`make info' here to do that automatically)."
170 | 
171 | .PHONY: info
172 | info:
173 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
174 | 	@echo "Running Texinfo files through makeinfo..."
175 | 	make -C $(BUILDDIR)/texinfo info
176 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
177 | 
178 | .PHONY: gettext
179 | gettext:
180 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
181 | 	@echo
182 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
183 | 
184 | .PHONY: changes
185 | changes:
186 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
187 | 	@echo
188 | 	@echo "The overview file is in $(BUILDDIR)/changes."
189 | 
190 | .PHONY: linkcheck
191 | linkcheck:
192 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
193 | 	@echo
194 | 	@echo "Link check complete; look for any errors in the above output " \
195 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
196 | 
197 | .PHONY: doctest
198 | doctest:
199 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
200 | 	@echo "Testing of doctests in the sources finished, look at the " \
201 | 	      "results in $(BUILDDIR)/doctest/output.txt."
202 | 
203 | .PHONY: coverage
204 | coverage:
205 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
206 | 	@echo "Testing of coverage in the sources finished, look at the " \
207 | 	      "results in $(BUILDDIR)/coverage/python.txt."
208 | 
209 | .PHONY: xml
210 | xml:
211 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
212 | 	@echo
213 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
214 | 
215 | .PHONY: pseudoxml
216 | pseudoxml:
217 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
218 | 	@echo
219 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
220 | 
221 | .PHONY: dummy
222 | dummy:
223 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
224 | 	@echo
225 | 	@echo "Build finished. Dummy builder generates no files."
226 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
  1 | gneiss documentation
  2 | ========================
  3 | 
  4 | This guide contains instructions for building the gneiss documentation, as
  5 | well as guidelines for contributing to the documentation.
  6 | 
  7 | Building the documentation
  8 | --------------------------
  9 | 
 10 | To build the documentation, you'll need a gneiss development environment
 11 | set up. See [CONTRIBUTING.md](../CONTRIBUTING.md) for instructions.
 12 | 
 13 | **Important:** The documentation will be built for whatever version of
 14 | gneiss is *currently installed* on your system (i.e., the version imported
 15 | by ```import gneiss```). This may not match the code located in this repository.
 16 | You will need to either install this version of gneiss somewhere (e.g., in
 17 | a virtualenv) or point your ```PYTHONPATH``` environment variable to this code,
 18 | *before* building the documentation.
 19 | 
 20 | To build the documentation, assuming you are at the top-level gneiss
 21 | directory:
 22 | 
 23 |     make -C doc clean html
 24 | 
 25 | The built HTML documentation will be at ```doc/build/html/index.html```.
 26 | 
 27 | Contributing to the documentation
 28 | ---------------------------------
 29 | 
 30 | If you would like to contribute to the documentation, whether by adding
 31 | something entirely new or by modifying existing documentation, please first
 32 | review our [gneiss contribution guide](../CONTRIBUTING.md).
 33 | 
 34 | Before submitting your changes, ensure that the documentation builds without
 35 | errors or warnings.
 36 | 
 37 | ### Documentation guidelines
 38 | 
 39 | Most of gneiss's API documentation is automatically generated from
 40 | [docstrings](http://legacy.python.org/dev/peps/pep-0257/#what-is-a-docstring).
 41 | The advantage to this approach is that users can access the documentation in an
 42 | interactive Python session or from our website as HTML. Other output formats
 43 | are also possible, such as PDF.
 44 | 
 45 | gneiss docstrings follow the [numpydoc conventions](https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt).
 46 | This ensures that the docstrings are easily readable both from the interpreter
 47 | and HTML, PDF, etc. Please read the numpydoc guidelines before continuing.
 48 | 
 49 | ### Documenting a module in gneiss
 50 | 
 51 | In addition to following the numpydoc conventions for docstrings, we have a few
 52 | more conventions that will ensure your documentation is correctly built and
 53 | linked within our website, and that it maintains consistency with the rest of
 54 | the gneiss docs.
 55 | 
 56 | The easiest way to get started with documenting your code is to look at the
 57 | docstrings in existing gneiss modules. An example of a module to start with
 58 | is ```gneiss.balances```. Go ahead and look
 59 | through those now. We've structured our docs in a similar way to
 60 | [SciPy's documentation](http://docs.scipy.org/doc/scipy/reference/), so that
 61 | may be another good place to look for examples.
 62 | 
 63 | We'll take a top-down approach by discussing how to document a new module that
 64 | you'd like to add to gneiss (let's call it ```gneiss/example.py```).
 65 | 
 66 | #### Module docstring
 67 | 
 68 | The first thing you'll need to add is a docstring for the module. The docstring
 69 | must start at the first line of the file. It should start with a title for the
 70 | module:
 71 | 
 72 |     """
 73 |     Documentation examples (:mod:`gneiss.example`)
 74 |     =============================================
 75 | 
 76 | It is important to include the ```:mod:``` Sphinx directive in the title, as
 77 | this title will be included in the table of contents. Also make sure that the
 78 | title underline is the same length as the title.
 79 | 
 80 | We also need to include another Sphinx directive below this:
 81 | 
 82 |     .. currentmodule:: gneiss.example
 83 | 
 84 | This directive tells Sphinx that other classes, functions, etc. that we will
 85 | reference are located in the ```gneiss.example``` module.
 86 | 
 87 | Next, include a more detailed description of the module. For example:
 88 | 
 89 |     This module consists of several example classes and functions to illustrate
 90 |     the gneiss documentation system.
 91 | 
 92 | Following that, list any classes, functions, and exceptions that you'd like
 93 | documentation generated for. Note that you do *not* need to include every
 94 | single class, function, or exception that is defined in the module. Also, you
 95 | do not need to list class methods, as those will be automatically included in
 96 | the generated class documentation. Only include objects that should be exposed
 97 | as part of the public API.
 98 | 
 99 | For example:
100 | 
101 |     Classes
102 |     -------
103 | 
104 |     .. autosummary::
105 |        :toctree: generated/
106 | 
107 |        ExampleClass1
108 |        ExampleClass2
109 | 
110 |     Functions
111 |     ---------
112 | 
113 |     .. autosummary::
114 |        :toctree: generated/
115 | 
116 |        example_function1
117 |        example_function2
118 | 
119 |     Exceptions
120 |     ----------
121 | 
122 |     .. autosummary::
123 |        :toctree: generated/
124 | 
125 |        ExampleError
126 | 
127 | The ```autosummary``` directives are important as they generate RST files in
128 | the ```generated/``` directory for each object. A single-line summary and link
129 | to each object is inserted into the page for you.
130 | 
131 | After listing public module members, we encourage a usage example section
132 | showing how to use some of the module's functionality. Examples should be
133 | written in [doctest](http://docs.python.org/3/library/doctest.html) format so
134 | that they can be automatically tested (e.g., using ```make test```).
135 | 
136 |     Examples
137 |     --------
138 | 
139 |     Run the ``example_function1`` function:
140 | 
141 |     >>> from gneiss.example import example_function1
142 |     >>> example_function1("hello", "world")
143 |     hello world!
144 | 
145 | You can also embed the plots that an example generates into the built
146 | documentation with the ```.. plot::``` directive. For example:
147 | 
148 |     .. plot::
149 | 
150 |        >>> import pandas as pd
151 |        >>> df = pd.DataFrame({'col1': [1, 2, 3, 4], 'col2': [10, 11, 12, 13]})
152 |        >>> fig = df.boxplot()
153 | 
154 | This will include the plot, a link to the source code used to generate the
155 | plot, and links to different image formats (e.g., PNG and PDF) so that users
156 | can easily download the plot.
157 | 
158 | You're now ready to document the members of your module.
159 | 
160 | #### Documenting module members
161 | 
162 | When documenting the members of a module (e.g., classes, methods, attributes,
163 | functions, and exceptions), follow the numpydoc conventions. In addition to
164 | these conventions, there are a few things to keep in mind:
165 | 
166 | - When documenting a class, only public methods and attributes are included in
167 |   the built documentation. If a method or attribute starts with an
168 |   underscore, it is assumed to be private.
169 | 
170 | - When documenting a class, include the ```Parameters``` section in the class
171 |   docstring, instead of in the ```__init__``` docstring. While numpydoc
172 |   technically supports either form, ```__init__``` is not included in the list
173 |   of methods by default and thus should have its documentation included in the
174 |   class docstring.
175 | 
176 | #### Including the module in the docs
177 | 
178 | Until now, we've only been editing docstrings, which are attached to Python
179 | code. The final step is to hook up this new module's docstrings to the
180 | documentation build system:
181 | 
182 | 1. Make sure you're within the ```gneiss/doc``` directory.
183 | 2. Create a new file with the same name as your module under the ```source```
184 |    directory. Do not include ```gneiss``` as part of the name, and use
185 |    ```.rst``` as the suffix. For example, ```source/example.rst```.
186 | 3. Add the following line to ```source/example.rst``` to have your module's
187 |    docstring pulled into the document:
188 | 
189 |     ```
190 |     .. automodule:: gneiss.example
191 |     ```
192 | 
193 | 4. Add the following line to ```source/index.rst``` to add the new page to the
194 |    top-level table of contents:
195 | 
196 |     ```
197 |     example
198 |     ```
199 | 
200 | That's it! You can now try building the documentation, which should include the
201 | documentation for your new module!
202 | 
203 | ### Documenting a subpackage in gneiss
204 | 
205 | The process of documenting a subpackage is very similar to documenting a module
206 | in gneiss. The only difference is that the module docstring goes in the
207 | subpackage's ```__init__.py```.
208 | 
209 | ### Troubleshooting
210 | 
211 | If things aren't working correctly, try running ```make clean``` and then
212 | rebuild the docs. If things still aren't working, try building the docs
213 | *without* your changes, and see if there are any Sphinx errors or warnings.
214 | Make note of these, and then see what new errors or warnings are generated when
215 | you add your changes again.
216 | 
217 | ### Acknowledgements
218 | This documentation guide lines are adapted from scikit-bio's guide line.


--------------------------------------------------------------------------------
/gneiss/plot/tests/test_heatmap.py:
--------------------------------------------------------------------------------
  1 | from gneiss.plot import heatmap
  2 | from gneiss.plot._heatmap import _sort_table
  3 | 
  4 | import pandas as pd
  5 | import pandas.util.testing as pdt
  6 | from skbio import TreeNode, DistanceMatrix
  7 | from scipy.cluster.hierarchy import ward
  8 | from gneiss.plot._dendrogram import SquareDendrogram
  9 | from gneiss.util import block_diagonal
 10 | from gneiss.cluster import rank_linkage
 11 | import numpy as np
 12 | import numpy.testing.utils as npt
 13 | import unittest
 14 | 
 15 | 
 16 | class HeatmapTest(unittest.TestCase):
 17 |     def setUp(self):
 18 |         np.random.seed(0)
 19 |         self.table = pd.DataFrame(np.random.random((5, 5)),
 20 |                                   index=['0', '1', '2', '3', '4'],
 21 |                                   columns=['0', '1', '2', '3', '4'])
 22 | 
 23 |         num_otus = 5  # otus
 24 |         x = np.random.rand(num_otus)
 25 |         dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x - y))
 26 |         lm = ward(dm.condensed_form())
 27 |         t = TreeNode.from_linkage_matrix(lm, np.arange(len(x)).astype(np.str))
 28 |         self.t = SquareDendrogram.from_tree(t)
 29 |         self.md = pd.Series(['a', 'a', 'a', 'b', 'b'],
 30 |                             index=['0', '1', '2', '3', '4'])
 31 |         for i, n in enumerate(t.postorder()):
 32 |             if not n.is_tip():
 33 |                 n.name = "y%d" % i
 34 |             n.length = np.random.rand() * 3
 35 | 
 36 |         self.highlights = pd.DataFrame({'y8': ['#FF0000', '#00FF00'],
 37 |                                         'y6': ['#0000FF', '#F0000F']}).T
 38 | 
 39 |     def test_sort_table(self):
 40 |         table = pd.DataFrame(
 41 |             [[1, 1, 0, 0, 0],
 42 |              [0, 1, 1, 0, 0],
 43 |              [0, 0, 1, 1, 0],
 44 |              [0, 0, 0, 1, 1]],
 45 |             columns=['s1', 's2', 's3', 's4', 's5'],
 46 |             index=['o1', 'o2', 'o3', 'o4'])
 47 |         mdvar = pd.Series(['a', 'b', 'a', 'b', 'a'],
 48 |                           index=['s1', 's2', 's3', 's4', 's5'])
 49 |         res_table, res_mdvar = _sort_table(table, mdvar)
 50 |         pdt.assert_index_equal(pd.Index(['s1', 's3', 's5', 's2', 's4']),
 51 |                                res_mdvar.index)
 52 |         pdt.assert_index_equal(pd.Index(['s1', 's3', 's5', 's2', 's4']),
 53 |                                res_table.columns)
 54 | 
 55 |     @unittest.skip('Visualizations are deprecated')
 56 |     def test_basic(self):
 57 |         fig = heatmap(self.table, self.t, self.md,
 58 |                       figsize=(5, self.table.shape[0]))
 59 | 
 60 |         # Test to see if the lineages of the tree are ok
 61 |         lines = list(fig.get_axes()[0].get_lines())
 62 | 
 63 |         exp_coords = np.array([[14.25, 0.5],
 64 |                                [14.25, 1.],
 65 |                                [14.25, 1.],
 66 |                                [20., 1.],
 67 |                                [9.5, 1.25],
 68 |                                [9.5, 2.],
 69 |                                [9.5, 2.],
 70 |                                [20., 2.],
 71 |                                [4.75, 2.125],
 72 |                                [4.75, 3.],
 73 |                                [4.75, 3.],
 74 |                                [20., 3.],
 75 |                                [0., 3.0625],
 76 |                                [0., 4.],
 77 |                                [0., 4.],
 78 |                                [20., 4.],
 79 |                                [14.25, 0.5],
 80 |                                [14.25, 0.],
 81 |                                [14.25, 0.],
 82 |                                [20., 0.],
 83 |                                [9.5, 1.25],
 84 |                                [9.5, 0.5],
 85 |                                [9.5, 0.5],
 86 |                                [14.25, 0.5],
 87 |                                [4.75, 2.125],
 88 |                                [4.75, 1.25],
 89 |                                [4.75, 1.25],
 90 |                                [9.5, 1.25],
 91 |                                [0., 3.0625],
 92 |                                [0., 2.125],
 93 |                                [0., 2.125],
 94 |                                [4.75, 2.125]])
 95 | 
 96 |         res = np.vstack([i._xy for i in lines])
 97 | 
 98 |         npt.assert_allclose(exp_coords, res)
 99 | 
100 |         # Make sure that the metadata labels are set properly
101 |         res = str(fig.get_axes()[1].get_xticklabels(minor=True)[0])
102 |         self.assertEqual(res, "Text(0, 0, 'a')")
103 | 
104 |         res = str(fig.get_axes()[1].get_xticklabels(minor=True)[1])
105 |         self.assertEqual(res, "Text(0, 0, 'b')")
106 | 
107 |         res = str(fig.get_axes()[1].get_xlabel())
108 |         self.assertEqual(res, "")
109 | 
110 |     def test_basic_line_width(self):
111 |         fig = heatmap(self.table, self.t, self.md,
112 |                       figsize=(5, self.table.shape[0]), linewidth=1)
113 | 
114 |         # Test to see if the lineages of the tree are ok
115 |         lines = list(fig.get_axes()[1].get_lines())
116 |         widths = [L.get_lw() for L in lines]
117 |         np.allclose(widths, [1.0] * len(widths))
118 | 
119 |     @unittest.skip('Visualizations are deprecated')
120 |     def test_highlights(self):
121 | 
122 |         table = pd.DataFrame(block_diagonal(ncols=5, nrows=5, nblocks=2),
123 |                              index=['0', '1', '2', '3', '4'],
124 |                              columns=['0', '1', '2', '3', '4'])
125 |         t = rank_linkage(pd.Series([1, 2, 3, 4, 5],
126 |                                    index=['0', '1', '2', '3', '4']))
127 |         t = SquareDendrogram.from_tree(t)
128 |         md = pd.Series(['a', 'a', 'a', 'b', 'b'],
129 |                        index=['0', '1', '2', '3', '4'])
130 |         for i, n in enumerate(t.postorder()):
131 |             if not n.is_tip():
132 |                 n.name = "y%d" % i
133 |             n.length = np.random.rand() * 3
134 | 
135 |         highlights = pd.DataFrame({'y8': ['#FF0000', '#00FF00'],
136 |                                    'y7': ['#0000FF', '#F0000F']}).T
137 | 
138 |         fig = heatmap(table, t, md, highlights)
139 | 
140 |         # Test to see if the lineages of the tree are ok
141 |         lines = list(fig.get_axes()[0].get_lines())
142 | 
143 |         pts = self.t.coords(width=20, height=self.table.shape[0])
144 |         pts['y'] = pts['y'] - 0.5  # account for offset
145 |         pts['x'] = pts['x'].astype(np.float)
146 |         pts['y'] = pts['y'].astype(np.float)
147 | 
148 |         exp_coords = np.array([[6.33333333, 3.5],
149 |                                [6.33333333, 4.],
150 |                                [6.33333333, 4.],
151 |                                [20., 4.],
152 |                                [12.66666667, 0.5],
153 |                                [12.66666667, 1.],
154 |                                [12.66666667, 1.],
155 |                                [20., 1.],
156 |                                [6.33333333, 1.25],
157 |                                [6.33333333, 2.],
158 |                                [6.33333333, 2.],
159 |                                [20., 2.],
160 |                                [0., 2.375],
161 |                                [0., 3.5],
162 |                                [0., 3.5],
163 |                                [6.33333333, 3.5],
164 |                                [6.33333333, 3.5],
165 |                                [6.33333333, 3.],
166 |                                [6.33333333, 3.],
167 |                                [20., 3.],
168 |                                [12.66666667, 0.5],
169 |                                [12.66666667, 0.],
170 |                                [12.66666667, 0.],
171 |                                [20., 0.],
172 |                                [6.33333333, 1.25],
173 |                                [6.33333333, 0.5],
174 |                                [6.33333333, 0.5],
175 |                                [12.66666667, 0.5],
176 |                                [0., 2.375],
177 |                                [0., 1.25],
178 |                                [0., 1.25],
179 |                                [6.33333333, 1.25]])
180 | 
181 |         res = np.vstack([i._xy for i in lines])
182 | 
183 |         npt.assert_allclose(exp_coords, res)
184 | 
185 |         # Make sure that the metadata labels are set properly
186 |         res = str(fig.get_axes()[2].get_xticklabels(minor=True)[0])
187 |         self.assertEqual(res, "Text(0, 0, 'a')")
188 | 
189 |         res = str(fig.get_axes()[2].get_xticklabels(minor=True)[1])
190 |         self.assertEqual(res, "Text(0, 0, 'b')")
191 | 
192 |         print([str(i) for i in fig.get_axes()[1].get_xticklabels()])
193 |         # Make sure that the highlight labels are set properly
194 |         res = str(fig.get_axes()[1].get_xticklabels()[0])
195 |         self.assertEqual(res, "Text(0, 0, 'y8')")
196 | 
197 |         res = str(fig.get_axes()[1].get_xticklabels()[1])
198 |         self.assertEqual(res, "Text(0, 0, 'y7')")
199 | 
200 |         # Test to see if the highlights are ok
201 |         res = fig.get_axes()[2].get_position()._points
202 |         exp = np.array([[0.24, 0.1],
203 |                         [0.808, 0.9]])
204 |         npt.assert_allclose(res, exp)
205 | 
206 | 
207 | if __name__ == "__main__":
208 |     unittest.main()
209 | 


--------------------------------------------------------------------------------
/gneiss/sort.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Sort functions (:mod:`gneiss.sort`)
  3 | ===================================
  4 | 
  5 | .. currentmodule:: gneiss.sort
  6 | 
  7 | This module contains sorting functions that sort contingency tables
  8 | in addition to trees.
  9 | 
 10 | Functions
 11 | ---------
 12 | 
 13 | .. autosummary::
 14 |    :toctree: generated/
 15 | 
 16 |    mean_niche_estimator
 17 |    niche_sort
 18 |    ladderize
 19 |    gradient_sort
 20 | """
 21 | # ----------------------------------------------------------------------------
 22 | # Copyright (c) 2016--, gneiss development team.
 23 | #
 24 | # Distributed under the terms of the Modified BSD License.
 25 | #
 26 | # The full license is in the file COPYING.txt, distributed with this software.
 27 | # ----------------------------------------------------------------------------
 28 | import numpy as np
 29 | import pandas as pd
 30 | from functools import partial
 31 | from gneiss.util import match
 32 | 
 33 | 
 34 | def mean_niche_estimator(abundances, gradient):
 35 |     r""" Estimates the mean niche along a gradient of an organism.
 36 | 
 37 |     Calculates the mean niche of an organism along a gradient.
 38 |     This is done by calculating the mean gradient values that
 39 |     an organism is observed in.
 40 | 
 41 |     Specifically, this module calculates the following
 42 | 
 43 |     .. math::
 44 |         f(g , x) =
 45 |          \sum\limits_{i=1}^N g_i \frac{x_i}{\sum\limits_{j=1}^N x_j}
 46 | 
 47 | 
 48 |     Where :math:`N` is the number of samples, :math:`x_i` is the proportion of
 49 |     species :math:`x` in sample :math:`i`, :math:`g_i` is the gradient value
 50 |     at sample `i`.
 51 | 
 52 |     Parameters
 53 |     ----------
 54 |     abundances : pd.DataFrame or pd.Series, np.float
 55 |         Vector of fraction abundances of an organism over a list of samples.
 56 |     gradient : pd.Series, np.float
 57 |         Vector of numerical gradient values.
 58 | 
 59 |     Returns
 60 |     -------
 61 |     pd.Series or np.float :
 62 |         The mean gradient that the feature is observed in.
 63 |         If `abundances` is a `pd.DataFrame` containing the mean gradient
 64 |         values for each feature.  Otherwise a float is returned.
 65 | 
 66 |     Raises
 67 |     ------
 68 |     ValueError:
 69 |         If the length of `abundances` is not the same length as `gradient`.
 70 |     ValueError:
 71 |         If the length of `gradient` contains nans.
 72 |     """
 73 |     len_abundances = len(abundances)
 74 |     len_gradient = len(gradient)
 75 |     if len_abundances != len_gradient:
 76 |         raise ValueError("Length of `abundances` (%d) doesn't match the length"
 77 |                          " of the `gradient` (%d)" % (len_abundances,
 78 |                                                       len_gradient))
 79 |     if np.any(pd.isnull(gradient)):
 80 |         raise ValueError("`gradient` cannot have any nans.")
 81 | 
 82 |     # normalizes the proportions of the organism across all of the
 83 |     # samples to add to 1.
 84 |     v = abundances / abundances.sum()
 85 |     m = np.dot(gradient, v)
 86 |     if isinstance(abundances, pd.DataFrame):
 87 |         m = pd.Series(m, index=abundances.columns)
 88 |     return m
 89 | 
 90 | 
 91 | def niche_sort(table, gradient, niche_estimator=mean_niche_estimator):
 92 |     """ Sort the table according to estimated niches.
 93 | 
 94 |     Sorts the table by samples along the gradient
 95 |     and otus by their estimated niche along the gradient.
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     table : pd.DataFrame
100 |         Contingency table where samples are rows and features (i.e. OTUs)
101 |         are columns.
102 |     gradient : pd.Series
103 |         Vector of numerical gradient values.
104 |     niche_estimator : function, optional
105 |         A function that takes in two pandas series and returns an ordered
106 |         object. The ability for the object to be ordered is critical, since
107 |         this will allow the table to be sorted according to this ordering.
108 |         By default, `mean_niche_estimator` will be used.
109 | 
110 |     Returns
111 |     -------
112 |     pd.DataFrame :
113 |         Sorted table according to the gradient of the samples, and the niches
114 |         of the organisms along that gradient.
115 | 
116 |     Raises
117 |     ------
118 |     ValueError :
119 |         Raised if `niche_estimator` is not a function.
120 |     """
121 |     if not callable(niche_estimator):
122 |         raise ValueError("`niche_estimator` is not a function.")
123 | 
124 |     table, gradient = match(table, gradient)
125 |     niche_estimator = partial(niche_estimator, gradient=gradient)
126 | 
127 |     # normalizes feature abundances to sum to 1, for each sample.
128 |     # (i.e. scales values in each row to sum to 1).
129 |     normtable = table.apply(lambda x: x / x.sum(), axis=1)
130 | 
131 |     # calculates estimated niche for each feature
132 |     est_niche = normtable.apply(niche_estimator, axis=0)
133 |     gradient = gradient.sort_values()
134 |     est_niche = est_niche.sort_values()
135 | 
136 |     table = table.reindex(index=gradient.index,
137 |                           columns=est_niche.index)
138 |     return table
139 | 
140 | 
141 | def _cache_ntips(tree):
142 |     for n in tree.postorder(include_self=True):
143 |         if n.is_tip():
144 |             n._n_tips = 1
145 |         else:
146 |             n._n_tips = sum(c._n_tips for c in n.children)
147 |     return tree
148 | 
149 | 
150 | def ladderize(tree, ascending=True):
151 |     r"""
152 |     Sorts tree according to the size of the subtrees.
153 | 
154 |     Parameters
155 |     ----------
156 |     tree : skbio.TreeNode
157 |        Input tree where leafs correspond to features.
158 | 
159 |     Returns
160 |     -------
161 |     skbio.TreeNode
162 |        A tree whose tips are sorted according to subtree size.
163 | 
164 |     Examples
165 |     --------
166 |     >>> from skbio import TreeNode
167 |     >>> tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
168 |     >>> print(tree.ascii_art())
169 |                         /-a
170 |               /c-------|
171 |              |          \-b
172 |     -r-------|
173 |              |                    /-g
174 |              |          /e-------|
175 |               \d-------|          \-h
176 |                        |
177 |                         \-f
178 |     >>> sorted_tree = ladderize(tree)
179 |     >>> print(sorted_tree.ascii_art())
180 |                         /-a
181 |               /c-------|
182 |              |          \-b
183 |     -r-------|
184 |              |          /-f
185 |               \d-------|
186 |                        |          /-g
187 |                         \e-------|
188 |                                   \-h
189 |     """
190 |     sorted_tree = tree.copy()
191 |     sorted_tree = _cache_ntips(tree)
192 | 
193 |     for n in sorted_tree.postorder(include_self=True):
194 |         sizes = [k._n_tips for k in n.children]
195 |         idx = np.argsort(sizes)
196 |         if not ascending:
197 |             idx = idx[::-1]
198 |         n.children = [n.children[i] for i in idx]
199 |     return sorted_tree
200 | 
201 | 
202 | def gradient_sort(tree, gradient, ascending=True):
203 |     r"""
204 |     Sorts tree according to ordering in gradient.
205 | 
206 |     Parameters
207 |     ----------
208 |     tree : skbio.TreeNode
209 |        Input tree where leafs correspond to features
210 |        contained in the index in `gradient`.
211 |     gradient : pd.Series, numeric
212 |        Gradient where the index correspond to feature names.
213 |        The index in the gradient must be consistent with
214 |        names of the tips in the `tree`.
215 | 
216 |     Returns
217 |     -------
218 |     skbio.TreeNode
219 |        A tree whose tips are sorted along the gradient.
220 | 
221 |     Examples
222 |     --------
223 |     >>> from skbio import TreeNode
224 |     >>> import pandas as pd
225 |     >>> tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
226 |     >>> x = pd.Series({'f':3, 'g':1, 'h':2, 'a':4, 'b':5})
227 |     >>> print(tree.ascii_art())
228 |                         /-a
229 |               /c-------|
230 |              |          \-b
231 |     -r-------|
232 |              |                    /-g
233 |              |          /e-------|
234 |               \d-------|          \-h
235 |                        |
236 |                         \-f
237 |     >>> res = gradient_sort(tree, x)
238 |     >>> print(res.ascii_art())
239 |                                   /-g
240 |                         /e-------|
241 |               /d-------|          \-h
242 |              |         |
243 |     -r-------|          \-f
244 |              |
245 |              |          /-a
246 |               \c-------|
247 |                         \-b
248 |     """
249 |     sorted_tree = tree.copy()
250 |     if not np.issubdtype(gradient, np.number):
251 |         raise ValueError('`gradient` needs to be numeric, not %s' %
252 |                          gradient.dtype)
253 | 
254 |     # Note that this operation is not optimal
255 |     # See https://github.com/biocore/gneiss/issues/58
256 |     for n in sorted_tree.postorder(include_self=True):
257 |         means = [gradient.loc[list(k.subset())].mean() for k in n.children]
258 |         idx = np.argsort(means)
259 |         if not ascending:
260 |             idx = idx[::-1]
261 |         n.children = [n.children[i] for i in idx]
262 |     return sorted_tree
263 | 


--------------------------------------------------------------------------------
/gneiss/balances.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Balances (:mod:`gneiss.balances`)
  3 | 
  4 | =================================
  5 | 
  6 | .. currentmodule:: gneiss.balances
  7 | 
  8 | This module contains modules for calculating balances and creating ETE
  9 | objects to visualize these balances on a tree.
 10 | 
 11 | Functions
 12 | ---------
 13 | 
 14 | .. autosummary::
 15 |    :toctree: generated/
 16 | 
 17 |    balance_basis
 18 | 
 19 | """
 20 | # ----------------------------------------------------------------------------
 21 | # Copyright (c) 2016--, gneiss development team.
 22 | #
 23 | # Distributed under the terms of the Modified BSD License.
 24 | #
 25 | # The full license is in the file COPYING.txt, distributed with this software.
 26 | # ----------------------------------------------------------------------------
 27 | 
 28 | 
 29 | from __future__ import division
 30 | import numpy as np
 31 | from skbio.stats.composition import clr_inv
 32 | from collections import OrderedDict
 33 | from gneiss.util import NUMERATOR, DENOMINATOR
 34 | from scipy.sparse import coo_matrix
 35 | 
 36 | 
 37 | def _balance_basis(tree_node):
 38 |     """ Helper method for calculating balance basis
 39 |     """
 40 |     counts, n_tips = _count_matrix(tree_node)
 41 |     counts = OrderedDict([(x, counts[x])
 42 |                           for x in counts.keys() if not x.is_tip()])
 43 |     nds = counts.keys()
 44 |     r = np.array([counts[n]['r'] for n in nds])
 45 |     s = np.array([counts[n]['l'] for n in nds])
 46 |     k = np.array([counts[n]['k'] for n in nds])
 47 |     t = np.array([counts[n]['t'] for n in nds])
 48 | 
 49 |     a = np.sqrt(s / (r * (r + s)))
 50 |     b = -1 * np.sqrt(r / (s * (r + s)))
 51 | 
 52 |     basis = np.zeros((n_tips - 1, n_tips))
 53 |     for i in range(len(nds)):
 54 |         basis[i, :] = np.array(
 55 |             [0] * k[i] + [a[i]] * r[i] + [b[i]] * s[i] + [0] * t[i])
 56 |     # Make sure that the basis is in level order
 57 |     basis = basis[:, ::-1]
 58 |     nds = [n.name for n in nds]
 59 |     return basis, nds
 60 | 
 61 | 
 62 | def balance_basis(tree_node):
 63 |     """
 64 |     Determines the basis based on bifurcating tree.
 65 | 
 66 |     This is commonly referred to as sequential binary partition [1]_.
 67 |     Given a binary tree relating a list of features, this module can
 68 |     be used to calculate an orthonormal basis, which is used to
 69 |     calculate the ilr transform.
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     treenode : skbio.TreeNode
 74 |         Input bifurcating tree.  Must be strictly bifurcating
 75 |         (i.e. every internal node needs to have exactly 2 children).
 76 | 
 77 |     Returns
 78 |     -------
 79 |     basis : np.array
 80 |         Returns a set of orthonormal bases in the Aitchison simplex
 81 |         corresponding to the tree. The order of the
 82 |         basis is index by the level order of the internal nodes.
 83 |     nodes : list, skbio.TreeNode
 84 |         List of tree nodes indicating the ordering in the basis.
 85 | 
 86 |     Raises
 87 |     ------
 88 |     ValueError
 89 |         The tree doesn't contain two branches.
 90 | 
 91 |     Examples
 92 |     --------
 93 |     >>> from gneiss.balances import balance_basis
 94 |     >>> from skbio import TreeNode
 95 |     >>> tree = u"((b,c)a, d)root;"
 96 |     >>> t = TreeNode.read([tree])
 97 |     >>> basis, nodes = balance_basis(t)
 98 |     >>> basis
 99 |     array([[0.18507216, 0.18507216, 0.62985567],
100 |            [0.14002925, 0.57597535, 0.28399541]])
101 | 
102 |     Notes
103 |     -----
104 |     The tree must be strictly bifurcating, meaning that
105 |     every internal node has exactly 2 children.
106 | 
107 |     See Also
108 |     --------
109 |     skbio.stats.composition.ilr
110 | 
111 |     References
112 |     ----------
113 |     .. [1] J.J. Egozcue and V. Pawlowsky-Glahn "Exploring Compositional Data
114 |         with the CoDa-Dendrogram" (2011)
115 | 
116 |     """
117 |     basis, nodes = _balance_basis(tree_node)
118 |     basis = clr_inv(basis)
119 |     return basis, nodes
120 | 
121 | 
122 | def _count_matrix(treenode):
123 |     n_tips = 0
124 |     nodes = list(treenode.levelorder(include_self=True))
125 |     # fill in the Ordered dictionary. Note that the
126 |     # elements of this Ordered dictionary are
127 |     # dictionaries.
128 |     counts = OrderedDict()
129 |     columns = ['k', 'r', 'l', 't', 'tips']
130 |     for n in nodes:
131 |         if n not in counts:
132 |             counts[n] = {}
133 |         for c in columns:
134 |             counts[n][c] = 0
135 | 
136 |     # fill in r and l.  This is done in reverse level order.
137 |     for n in nodes[::-1]:
138 |         if n.is_tip():
139 |             counts[n]['tips'] = 1
140 |             n_tips += 1
141 |         elif len(n.children) == 2:
142 |             lchild = n.children[0]
143 |             rchild = n.children[1]
144 |             counts[n]['r'] = counts[rchild]['tips']
145 |             counts[n]['l'] = counts[lchild]['tips']
146 |             counts[n]['tips'] = counts[n]['r'] + counts[n]['l']
147 |         else:
148 |             raise ValueError("Not a strictly bifurcating tree!")
149 | 
150 |     # fill in k and t
151 |     for n in nodes:
152 |         if n.parent is None:
153 |             counts[n]['k'] = 0
154 |             counts[n]['t'] = 0
155 |             continue
156 |         elif n.is_tip():
157 |             continue
158 |         # left or right child
159 |         # left = 0, right = 1
160 |         child_idx = 'l' if n.parent.children[0] != n else 'r'
161 |         if child_idx == 'l':
162 |             counts[n]['t'] = counts[n.parent]['t'] + counts[n.parent]['l']
163 |             counts[n]['k'] = counts[n.parent]['k']
164 |         else:
165 |             counts[n]['k'] = counts[n.parent]['k'] + counts[n.parent]['r']
166 |             counts[n]['t'] = counts[n.parent]['t']
167 |     return counts, n_tips
168 | 
169 | 
170 | def sparse_balance_basis(tree):
171 |     """ Calculates sparse representation of an ilr basis from a tree.
172 | 
173 |     This computes an orthonormal basis specified from a bifurcating tree.
174 | 
175 |     Parameters
176 |     ----------
177 |     tree : skbio.TreeNode
178 |         Input bifurcating tree.  Must be strictly bifurcating
179 |         (i.e. every internal node needs to have exactly 2 children).
180 |         This is used to specify the ilr basis.
181 | 
182 |     Returns
183 |     -------
184 |     scipy.sparse.coo_matrix
185 |        The ilr basis required to perform the ilr_inv transform.
186 |        This is also known as the sequential binary partition.
187 |        Note that this matrix is represented in clr coordinates.
188 |     nodes : list, str
189 |         List of tree nodes indicating the ordering in the basis.
190 | 
191 |     Raises
192 |     ------
193 |     ValueError
194 |         The tree doesn't contain two branches.
195 | 
196 |     """
197 |     # this is inspired by @wasade in
198 |     # https://github.com/biocore/gneiss/pull/8
199 |     t = tree.copy()
200 |     D = len(list(tree.tips()))
201 |     # calculate number of tips under each node
202 |     for n in t.postorder(include_self=True):
203 |         if n.is_tip():
204 |             n._tip_count = 1
205 |         else:
206 |             if len(n.children) == 2:
207 |                 left, right = n.children[NUMERATOR], n.children[DENOMINATOR],
208 |             else:
209 |                 raise ValueError("Not a strictly bifurcating tree.")
210 |             n._tip_count = left._tip_count + right._tip_count
211 | 
212 |     # calculate k, r, s, t coordinate for each node
213 |     left, right = t.children[NUMERATOR], t.children[DENOMINATOR],
214 |     t._k, t._r, t._s, t._t = 0, left._tip_count, right._tip_count, 0
215 |     for n in t.preorder(include_self=False):
216 |         if n.is_tip():
217 |             n._k, n._r, n._s, n._t = 0, 0, 0, 0
218 | 
219 |         elif n == n.parent.children[NUMERATOR]:
220 |             n._k = n.parent._k
221 |             n._r = n.children[NUMERATOR]._tip_count
222 |             n._s = n.children[DENOMINATOR]._tip_count
223 |             n._t = n.parent._s + n.parent._t
224 |         elif n == n.parent.children[DENOMINATOR]:
225 |             n._k = n.parent._r + n.parent._k
226 |             n._r = n.children[NUMERATOR]._tip_count
227 |             n._s = n.children[DENOMINATOR]._tip_count
228 |             n._t = n.parent._t
229 |         else:
230 |             raise ValueError("Tree topology is not correct.")
231 | 
232 |     # navigate through tree to build the basis in a sparse matrix form
233 |     value = []
234 |     row, col = [], []
235 |     nodes = []
236 |     i = 0
237 | 
238 |     for n in t.levelorder(include_self=True):
239 | 
240 |         if n.is_tip():
241 |             continue
242 | 
243 |         for j in range(n._k, n._k + n._r):
244 |             row.append(i)
245 |             # consider tips in reverse order. May want to rethink
246 |             # this orientation in the future.
247 |             col.append(D - 1 - j)
248 |             A = np.sqrt(n._s / (n._r * (n._s + n._r)))
249 | 
250 |             value.append(A)
251 | 
252 |         for j in range(n._k + n._r, n._k + n._r + n._s):
253 |             row.append(i)
254 |             col.append(D - 1 - j)
255 |             B = -np.sqrt(n._r / (n._s * (n._s + n._r)))
256 | 
257 |             value.append(B)
258 |         i += 1
259 |         nodes.append(n.name)
260 | 
261 |     basis = coo_matrix((value, (row, col)), shape=(D - 1, D))
262 | 
263 |     return basis, nodes
264 | 


--------------------------------------------------------------------------------
/gneiss/tests/test_sort.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import numpy as np
  9 | import pandas as pd
 10 | import unittest
 11 | from gneiss.sort import (niche_sort, mean_niche_estimator,
 12 |                          ladderize, gradient_sort)
 13 | import pandas.util.testing as pdt
 14 | from skbio import TreeNode
 15 | 
 16 | 
 17 | class TestSort(unittest.TestCase):
 18 |     def setUp(self):
 19 |         pass
 20 | 
 21 |     def test_mean_niche_estimator1(self):
 22 |         gradient = pd.Series(
 23 |             [1, 2, 3, 4, 5],
 24 |             index=['s1', 's2', 's3', 's4', 's5'])
 25 |         values = pd.Series(
 26 |             [1, 1, 0, 0, 0],
 27 |             index=['s1', 's2', 's3', 's4', 's5'])
 28 |         m = mean_niche_estimator(values, gradient)
 29 |         self.assertEqual(m, 1.5)
 30 | 
 31 |     def test_mean_niche_estimator2(self):
 32 |         gradient = pd.Series(
 33 |             [1, 2, 3, 4, 5],
 34 |             index=['s1', 's2', 's3', 's4', 's5'])
 35 |         values = pd.Series(
 36 |             [1, 3, 0, 0, 0],
 37 |             index=['s1', 's2', 's3', 's4', 's5'])
 38 |         m = mean_niche_estimator(values, gradient)
 39 |         self.assertEqual(m, 1.75)
 40 | 
 41 |     def test_mean_niche_estimator_frame(self):
 42 |         gradient = pd.Series(
 43 |             [1, 2, 3, 4, 5],
 44 |             index=['s1', 's2', 's3', 's4', 's5'])
 45 |         values = pd.DataFrame(
 46 |             np.array([[1, 3, 0, 0, 0],
 47 |                       [1, 3, 0, 0, 0]]).T,
 48 |             index=['s1', 's2', 's3', 's4', 's5'],
 49 |             columns=['o1', 'o2'])
 50 |         m = mean_niche_estimator(values, gradient)
 51 |         exp = pd.Series([1.75, 1.75], index=['o1', 'o2'])
 52 |         pdt.assert_series_equal(m, exp)
 53 | 
 54 |     def test_mean_niche_estimator_bad_length(self):
 55 |         gradient = pd.Series(
 56 |             [1, 2, 3, 4, 5],
 57 |             index=['s1', 's2', 's3', 's4', 's5'])
 58 |         values = pd.Series(
 59 |             [1, 3, 0, 0, 0, 0],
 60 |             index=['s1', 's2', 's3', 's4', 's5', 's6'])
 61 | 
 62 |         with self.assertRaises(ValueError):
 63 |             mean_niche_estimator(values, gradient)
 64 | 
 65 |     def test_mean_niche_estimator_missing(self):
 66 |         gradient = pd.Series(
 67 |             [1, 2, 3, 4, np.nan],
 68 |             index=['s1', 's2', 's3', 's4', 's5'])
 69 |         values = pd.Series(
 70 |             [1, 3, 0, 0, 0],
 71 |             index=['s1', 's2', 's3', 's4', 's5'])
 72 | 
 73 |         with self.assertRaises(ValueError):
 74 |             mean_niche_estimator(values, gradient)
 75 | 
 76 |     def test_basic_niche_sort(self):
 77 |         table = pd.DataFrame(
 78 |             [[1, 1, 0, 0, 0],
 79 |              [0, 1, 1, 0, 0],
 80 |              [0, 0, 1, 1, 0],
 81 |              [0, 0, 0, 1, 1]],
 82 |             columns=['s1', 's2', 's3', 's4', 's5'],
 83 |             index=['o1', 'o2', 'o3', 'o4']).T
 84 |         gradient = pd.Series(
 85 |             [1, 2, 3, 4, 5],
 86 |             index=['s1', 's2', 's3', 's4', 's5'])
 87 |         res_table = niche_sort(table, gradient)
 88 |         pdt.assert_frame_equal(table, res_table)
 89 | 
 90 |     def test_basic_niche_sort_error(self):
 91 |         table = pd.DataFrame(
 92 |             [[1, 1, 0, 0, 0],
 93 |              [0, 1, 1, 0, 0],
 94 |              [0, 0, 1, 1, 0],
 95 |              [0, 0, 0, 1, 1]],
 96 |             columns=['s1', 's2', 's3', 's4', 's5'],
 97 |             index=['o1', 'o2', 'o3', 'o4']).T
 98 |         gradient = pd.Series(
 99 |             [1, 2, 3, 4, 5],
100 |             index=['s1', 's2', 's3', 's4', 's5'])
101 |         with self.assertRaises(ValueError):
102 |             niche_sort(table, gradient, niche_estimator='rawr')
103 | 
104 |     def test_basic_niche_sort_scrambled(self):
105 |         # Swap samples s1 and s2 and features o1 and o2 to see if this can
106 |         # obtain the original table structure.
107 |         table = pd.DataFrame(
108 |             [[1, 0, 1, 0, 0],
109 |              [1, 1, 0, 0, 0],
110 |              [0, 0, 1, 1, 0],
111 |              [0, 0, 0, 1, 1]],
112 |             columns=['s2', 's1', 's3', 's4', 's5'],
113 |             index=['o2', 'o1', 'o3', 'o4']).T
114 | 
115 |         gradient = pd.Series(
116 |             [2, 1, 3, 4, 5],
117 |             index=['s2', 's1', 's3', 's4', 's5'])
118 | 
119 |         exp_table = pd.DataFrame(
120 |             [[1, 1, 0, 0, 0],
121 |              [0, 1, 1, 0, 0],
122 |              [0, 0, 1, 1, 0],
123 |              [0, 0, 0, 1, 1]],
124 |             columns=['s1', 's2', 's3', 's4', 's5'],
125 |             index=['o1', 'o2', 'o3', 'o4']).T
126 | 
127 |         res_table = niche_sort(table, gradient)
128 | 
129 |         pdt.assert_frame_equal(exp_table, res_table)
130 | 
131 |     def test_basic_niche_sort_lambda(self):
132 |         table = pd.DataFrame(
133 |             [[1, 1, 0, 0, 0],
134 |              [0, 0, 1, 1, 0],
135 |              [0, 1, 1, 0, 0],
136 |              [0, 0, 0, 1, 1]],
137 |             columns=['s1', 's2', 's3', 's4', 's5'],
138 |             index=['o1', 'o3', 'o2', 'o4']).T
139 |         gradient = pd.Series(
140 |             [1, 2, 3, 4, 5],
141 |             index=['s1', 's2', 's3', 's4', 's5'])
142 | 
143 |         exp_table = pd.DataFrame(
144 |             [[1, 1, 0, 0, 0],
145 |              [0, 1, 1, 0, 0],
146 |              [0, 0, 1, 1, 0],
147 |              [0, 0, 0, 1, 1]],
148 |             columns=['s1', 's2', 's3', 's4', 's5'],
149 |             index=['o1', 'o2', 'o3', 'o4']).T
150 | 
151 |         def _dumb_estimator(v, gradient):
152 |             v[v > 0] = 1
153 |             values = v / v.sum()
154 |             return np.dot(gradient, values)
155 | 
156 |         res_table = niche_sort(table, gradient,
157 |                                niche_estimator=_dumb_estimator)
158 |         pdt.assert_frame_equal(exp_table, res_table)
159 | 
160 |     def test_basic_niche_sort_immutable(self):
161 |         # Swap samples s1 and s2 and features o1 and o2 to see if this can
162 |         # obtain the original table structure.
163 |         table = pd.DataFrame(
164 |             [[1, 0, 1, 0, 0],
165 |              [1, 1, 0, 0, 0],
166 |              [0, 0, 1, 1, 0],
167 |              [0, 0, 0, 1, 1]],
168 |             columns=['s2', 's1', 's3', 's4', 's5'],
169 |             index=['o2', 'o1', 'o3', 'o4']).T
170 | 
171 |         gradient = pd.Series(
172 |             [2, 1, 3, 4, 5],
173 |             index=['s2', 's1', 's3', 's4', 's5'])
174 | 
175 |         exp_table = pd.DataFrame(
176 |             [[1, 0, 1, 0, 0],
177 |              [1, 1, 0, 0, 0],
178 |              [0, 0, 1, 1, 0],
179 |              [0, 0, 0, 1, 1]],
180 |             columns=['s2', 's1', 's3', 's4', 's5'],
181 |             index=['o2', 'o1', 'o3', 'o4']).T
182 | 
183 |         exp_gradient = pd.Series(
184 |             [2, 1, 3, 4, 5],
185 |             index=['s2', 's1', 's3', 's4', 's5'])
186 | 
187 |         niche_sort(table, gradient)
188 |         pdt.assert_frame_equal(exp_table, table)
189 |         pdt.assert_series_equal(exp_gradient, gradient)
190 | 
191 |     def test_ladderize1(self):
192 |         # Makes sure that 1 subtree is ordered
193 |         tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
194 |         exp = '((a,b)c,(f,(g,h)e)d)r;\n'
195 |         res = str(ladderize(tree))
196 |         self.assertEqual(exp, res)
197 | 
198 |     def test_ladderize2(self):
199 |         # Makes sure that 2 subtrees are ordered
200 |         tree = TreeNode.read([u'(((n,m)a,b)c, ((g,(i,j)h)e,f)d)r;'])
201 |         exp = '((b,(n,m)a)c,(f,(g,(i,j)h)e)d)r;\n'
202 |         res = str(ladderize(tree))
203 |         self.assertEqual(exp, res)
204 | 
205 |     def test_ladderize_descending(self):
206 |         # Makes sure that 2 subtrees are ordered
207 |         tree = TreeNode.read([u'(((n,m)a,b)c, ((g,(i,j)h)e,f)d)r;'])
208 |         exp = '((((j,i)h,g)e,f)d,((m,n)a,b)c)r;\n'
209 |         res = str(ladderize(tree, ascending=False))
210 |         self.assertEqual(exp, res)
211 | 
212 |     def test_gradient_sort(self):
213 |         # Makes sure that the tree is sorted according
214 |         # a pre-set ordering
215 |         tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
216 |         exp = '(((g,h)e,f)d,(a,b)c)r;\n'
217 |         x = pd.Series({'f': 3, 'g': 1, 'h': 2, 'a': 4, 'b': 5})
218 |         res = str(gradient_sort(tree, x))
219 |         self.assertEqual(exp, res)
220 | 
221 |     def test_gradient_sort_descending(self):
222 |         # Makes sure that the tree is sorted according
223 |         # a pre-set ordering in descending order
224 |         tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
225 |         exp = '((b,a)c,(f,(h,g)e)d)r;\n'
226 |         x = pd.Series({'f': 3, 'g': 1, 'h': 2, 'a': 4, 'b': 5})
227 |         res = str(gradient_sort(tree, x, ascending=False))
228 |         self.assertEqual(exp, res)
229 | 
230 |     def test_gradient_sort_error(self):
231 |         # Makes sure that the tree is sorted according
232 |         # a pre-set ordering
233 |         tree = TreeNode.read([u'((a,b)c, ((g,h)e,f)d)r;'])
234 |         x = pd.Series({'f': 'x', 'g': 'y', 'h': 'z', 'a': 'u', 'b': 'dz'})
235 |         with self.assertRaises(ValueError):
236 |             gradient_sort(tree, x)
237 | 
238 | 
239 | if __name__ == '__main__':
240 |     unittest.main()
241 | 


--------------------------------------------------------------------------------
/ipynb/cfstudy/cfstudy-qiime2-tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "In this study, there were 18 patients with cystic fibrosis.  The hypothesis was that there were two main microbial communities at play in the CF lung.  One of these communities thrives at low pH, and the other community thrives at high pH.  To test this, sputum samples were divided among 8 tubes, and each of the tubes was perturbed with a different pH.  Here we will calculate balances, and test how these balances change with respect to pH, using linear mixed effects models."
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "First, we'll want to filter out low abundance OTUs.  This will not only remove potential confounders, but could also alleviate the issue with zeros. "
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 1,
 20 |    "metadata": {},
 21 |    "outputs": [
 22 |     {
 23 |      "name": "stdout",
 24 |      "output_type": "stream",
 25 |      "text": [
 26 |       "/Users/mortonjt/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
 27 |       "  from ._conv import register_converters as _register_converters\n",
 28 |       "\u001b[32mSaved FeatureTable[Frequency] to: cfstudy_common_filt500.biom.qza\u001b[0m\n"
 29 |      ]
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "!qiime feature-table filter-features \\\n",
 34 |     "    --i-table cfstudy_common.biom.qza \\\n",
 35 |     "    --o-filtered-table cfstudy_common_filt500.biom.qza \\\n",
 36 |     "    --p-min-frequency 500"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "Again, we will create the tree using pH.  Note that we'll also want to reorder the OTU table for the balance calculations."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 2,
 49 |    "metadata": {},
 50 |    "outputs": [
 51 |     {
 52 |      "name": "stdout",
 53 |      "output_type": "stream",
 54 |      "text": [
 55 |       "/Users/mortonjt/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
 56 |       "  from ._conv import register_converters as _register_converters\n",
 57 |       "\u001b[32mSaved Hierarchy to: ph_tree.nwk.qza\u001b[0m\n"
 58 |      ]
 59 |     }
 60 |    ],
 61 |    "source": [
 62 |     "!qiime gneiss gradient-clustering \\\n",
 63 |     "    --i-table cfstudy_common_filt500.biom.qza \\\n",
 64 |     "    --m-gradient-file cfstudy_modified_metadata.txt \\\n",
 65 |     "    --m-gradient-column ph \\\n",
 66 |     "    --o-clustering ph_tree.nwk.qza \\\n",
 67 |     "    --p-weighted --verbose"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "Before running the linear mixed effects models using mixed we'll want to replace zeros with a pseudocount to approximate the uncertainity probability."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 3,
 80 |    "metadata": {},
 81 |    "outputs": [
 82 |     {
 83 |      "name": "stdout",
 84 |      "output_type": "stream",
 85 |      "text": [
 86 |       "/Users/mortonjt/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
 87 |       "  from ._conv import register_converters as _register_converters\n",
 88 |       "\u001b[32mSaved FeatureTable[Composition] to: cf_composition.qza\u001b[0m\n"
 89 |      ]
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "!qiime composition add-pseudocount \\\n",
 94 |     "    --i-table cfstudy_common_filt500.biom.qza \\\n",
 95 |     "    --p-pseudocount 1 \\\n",
 96 |     "    --o-composition-table cf_composition.qza"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 4,
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "name": "stdout",
106 |      "output_type": "stream",
107 |      "text": [
108 |       "/Users/mortonjt/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
109 |       "  from ._conv import register_converters as _register_converters\n",
110 |       "\u001b[32mSaved FeatureTable[Balance] to: cf_balances.qza\u001b[0m\n"
111 |      ]
112 |     }
113 |    ],
114 |    "source": [
115 |     "!qiime gneiss ilr-transform \\\n",
116 |     "    --i-table cf_composition.qza \\\n",
117 |     "    --i-tree ph_tree.nwk.qza \\\n",
118 |     "    --o-balances cf_balances.qza"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "Now we can run the linear mixed effects models. pH is the only covariate being tested for and each of the patients are being accounted for by passing host_subject_id into groups.  This is because the microbial differences between the patients are much larger than the pH effects, so we need to correct for this change, by treating each patient separately.  This is why the linear mixed effects strategy is chosen."
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 5,
131 |    "metadata": {},
132 |    "outputs": [
133 |     {
134 |      "name": "stdout",
135 |      "output_type": "stream",
136 |      "text": [
137 |       "/Users/mortonjt/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
138 |       "  from ._conv import register_converters as _register_converters\n",
139 |       "\u001b[32mSaved Visualization to: cf_linear_mixed_effects_model.qzv\u001b[0m\n"
140 |      ]
141 |     }
142 |    ],
143 |    "source": [
144 |     "!qiime gneiss lme-regression \\\n",
145 |     "    --p-formula \"ph\" \\\n",
146 |     "    --i-table cf_balances.qza \\\n",
147 |     "    --i-tree ph_tree.nwk.qza \\\n",
148 |     "    --m-metadata-file cfstudy_modified_metadata.txt \\\n",
149 |     "    --p-groups host_subject_id \\\n",
150 |     "    --o-visualization cf_linear_mixed_effects_model"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "These summary results can be visualized in qiime2 visualization framework.  Checkout [view.qiime2.org](https://view.qiime2.org)\n",
158 |     "\n",
159 |     "Let's further summarize the results of the linear mixed effects model.  We'll plot the how one of the top balances change with respect to the pH."
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": 10,
165 |    "metadata": {},
166 |    "outputs": [
167 |     {
168 |      "name": "stdout",
169 |      "output_type": "stream",
170 |      "text": [
171 |       "/Users/mortonjt/miniconda3/envs/qiime2-2018.2/lib/python3.5/site-packages/h5py/__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
172 |       "  from ._conv import register_converters as _register_converters\n",
173 |       "\u001b[32mSaved Visualization to: y2_taxa_summary.qzv\u001b[0m\n"
174 |      ]
175 |     }
176 |    ],
177 |    "source": [
178 |     "!qiime gneiss balance-taxonomy \\\n",
179 |     "    --i-table cf_composition.qza \\\n",
180 |     "    --i-tree ph_tree.nwk.qza \\\n",
181 |     "    --i-taxonomy cfstudy_taxonomy.qza \\\n",
182 |     "    --p-taxa-level 4 \\\n",
183 |     "    --p-balance-name 'y2' \\\n",
184 |     "    --m-metadata-file 'cfstudy_modified_metadata.txt' \\\n",
185 |     "    --m-metadata-column 'ph' \\\n",
186 |     "    --o-visualization y2_taxa_summary.qzv"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {
192 |     "collapsed": true
193 |    },
194 |    "source": [
195 |     "Similar to the 88soils example, there is a very obvious transition from low pH organisms to high pH organism as the pH increases.  However, given that every patient has different microbes, so it is difficult to test for individual microbes abundances across patients.  However, every patient has microbes that behave the same with respect to pH.  Balances is a very powerful tool for addressing this, as it can allow for entire subcommunities to be tested, rather than just individual OTUs."
196 |    ]
197 |   }
198 |  ],
199 |  "metadata": {
200 |   "kernelspec": {
201 |    "display_name": "Python 3",
202 |    "language": "python",
203 |    "name": "python3"
204 |   },
205 |   "language_info": {
206 |    "codemirror_mode": {
207 |     "name": "ipython",
208 |     "version": 3
209 |    },
210 |    "file_extension": ".py",
211 |    "mimetype": "text/x-python",
212 |    "name": "python",
213 |    "nbconvert_exporter": "python",
214 |    "pygments_lexer": "ipython3",
215 |    "version": "3.5.5"
216 |   }
217 |  },
218 |  "nbformat": 4,
219 |  "nbformat_minor": 1
220 | }
221 | 


--------------------------------------------------------------------------------
/gneiss/plot/tests/test_decompose.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import unittest
  9 | from gneiss.plot import balance_boxplot, balance_barplots, proportion_plot
 10 | import numpy as np
 11 | import pandas as pd
 12 | import numpy.testing as npt
 13 | import matplotlib.pyplot as plt
 14 | from skbio import TreeNode
 15 | 
 16 | 
 17 | class TestBoxplot(unittest.TestCase):
 18 |     def setUp(self):
 19 |         self.df = pd.DataFrame({
 20 |             'y': [-2, -2.2, -1.8, -1.5, -1, 1, 1.5, 2, 2.2, 1.8],
 21 |             'group': ['a', 'a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b'],
 22 |             'hue': ['0', '1', '0', '1', '0', '1', '0', '1', '0', '1']}
 23 |         )
 24 |         self.tree = TreeNode.read(['((c, d)z, (b,a)x)y;'])
 25 |         self.feature_df = pd.DataFrame(
 26 |             {
 27 |                 'type': ['tomato', 'carrots', 'apple', 'bacon'],
 28 |                 'food': ['vegatable', 'vegatable', 'fruit', 'meat'],
 29 |                 'seed': ['yes', 'no', 'yes', 'no']
 30 |             },
 31 |             index=["a", "b", "c", "d"]
 32 |         )
 33 | 
 34 |     @unittest.skip('Visualizations are deprecated')
 35 |     def test_basic_boxplot(self):
 36 |         a = balance_boxplot('y', y='group', data=self.df)
 37 |         res = np.vstack([i._xy for i in a.get_lines()])
 38 |         exp = np.array([[-2., 0.],
 39 |                         [-2.2, 0.],
 40 |                         [-1.5, 0.],
 41 |                         [-1., 0.],
 42 |                         [-2.2, -0.2],
 43 |                         [-2.2, 0.2],
 44 |                         [-1., -0.2],
 45 |                         [-1., 0.2],
 46 |                         [-1.8, -0.4],
 47 |                         [-1.8, 0.4],
 48 |                         [1.5, 1.],
 49 |                         [1., 1.],
 50 |                         [2., 1.],
 51 |                         [2.2, 1.],
 52 |                         [1., 0.8],
 53 |                         [1., 1.2],
 54 |                         [2.2, 0.8],
 55 |                         [2.2, 1.2],
 56 |                         [1.8, 0.6],
 57 |                         [1.8, 1.4]])
 58 |         npt.assert_allclose(res, exp)
 59 | 
 60 |     @unittest.skip('Visualizations are deprecated')
 61 |     def test_basic_hue_boxplot(self):
 62 |         a = balance_boxplot('y', y='group', hue='hue', data=self.df)
 63 |         res = np.vstack([i._xy for i in a.get_lines()])
 64 |         exp = np.array([[-1.9, -0.2],
 65 |                         [-2., -0.2],
 66 |                         [-1.4, -0.2],
 67 |                         [-1., -0.2],
 68 |                         [-2., -0.298],
 69 |                         [-2., -0.102],
 70 |                         [-1., -0.298],
 71 |                         [-1., -0.102],
 72 |                         [-1.8, -0.396],
 73 |                         [-1.8, -0.004],
 74 |                         [-2.025, 0.2],
 75 |                         [-2.2, 0.2],
 76 |                         [-1.675, 0.2],
 77 |                         [-1.5, 0.2],
 78 |                         [-2.2, 0.102],
 79 |                         [-2.2, 0.298],
 80 |                         [-1.5, 0.102],
 81 |                         [-1.5, 0.298],
 82 |                         [-1.85, 0.004],
 83 |                         [-1.85, 0.396],
 84 |                         [1.675, 0.8],
 85 |                         [1.5, 0.8],
 86 |                         [2.025, 0.8],
 87 |                         [2.2, 0.8],
 88 |                         [1.5, 0.702],
 89 |                         [1.5, 0.898],
 90 |                         [2.2, 0.702],
 91 |                         [2.2, 0.898],
 92 |                         [1.85, 0.604],
 93 |                         [1.85, 0.996],
 94 |                         [1.4, 1.2],
 95 |                         [1., 1.2],
 96 |                         [1.9, 1.2],
 97 |                         [2., 1.2],
 98 |                         [1., 1.102],
 99 |                         [1., 1.298],
100 |                         [2., 1.102],
101 |                         [2., 1.298],
102 |                         [1.8, 1.004],
103 |                         [1.8, 1.396]])
104 |         npt.assert_allclose(res, exp)
105 | 
106 |     @unittest.skip('Visualizations are deprecated')
107 |     def test_basic_barplot(self):
108 |         ax_denom, ax_num = balance_barplots(self.tree, 'y', header='food',
109 |                                             feature_metadata=self.feature_df)
110 | 
111 | 
112 | class TestProportionPlot(unittest.TestCase):
113 |     def setUp(self):
114 |         self.table = pd.DataFrame({
115 |             'A': [1, 1.2, 1.1, 2.1, 2.2, 2],
116 |             'B': [9.9, 10, 10.1, 2, 2.4, 2.1],
117 |             'C': [5, 3, 1, 2, 2, 3],
118 |             'D': [5, 5, 5, 5, 5, 5],
119 |         }, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])
120 | 
121 |         self.feature_metadata = pd.DataFrame({
122 |             'A': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'],
123 |             'B': ['k__foo', 'p__bar', 'c__', 'o__', 'f__', 'g__', 's__'],
124 |             'C': ['k__poo', 'p__tar', 'c__', 'o__', 'f__', 'g__', 's__'],
125 |             'D': ['k__poo', 'p__far', 'c__', 'o__', 'f__', 'g__', 's__']
126 |         }, index=['kingdom', 'phylum', 'class', 'order',
127 |                   'family', 'genus', 'species']).T
128 | 
129 |         self.metadata = pd.DataFrame({
130 |             'groups': ['X', 'X', 'X', 'Y', 'Y', 'Y'],
131 |             'dry': [1, 2, 3, 4, 5, 6]
132 |         }, index=['S1', 'S2', 'S3', 'S4', 'S5', 'S6'])
133 | 
134 |     @unittest.skip('Visualizations are deprecated')
135 |     def test_proportion_plot(self):
136 |         np.random.seed(0)
137 |         num_features = ['A', 'B']
138 |         denom_features = ['C', 'D']
139 |         ax1, ax2 = proportion_plot(self.table, self.metadata,
140 |                                    'groups', 'X', 'Y',
141 |                                    num_features, denom_features,
142 |                                    self.feature_metadata,
143 |                                    label_col='phylum')
144 |         res = np.vstack([L.get_xydata() for L in ax1.get_lines()])
145 |         exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])
146 | 
147 |         npt.assert_allclose(res[:, 1], exp, verbose=True)
148 | 
149 |         res = np.vstack([L.get_xydata() for L in ax2.get_lines()])
150 |         exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])
151 | 
152 |         npt.assert_allclose(res[:, 1], exp, verbose=True)
153 | 
154 |         res = [L._text for L in ax2.get_yticklabels()]
155 |         exp = ['p__bar', 'p__bar', 'p__tar', 'p__far']
156 |         self.assertListEqual(res, exp)
157 | 
158 |     @unittest.skip('Visualizations are deprecated')
159 |     def test_proportion_plot_order(self):
160 |         self.maxDiff = None
161 |         np.random.seed(0)
162 |         # tests for different ordering
163 |         num_features = ['A', 'B']
164 |         denom_features = ['D', 'C']
165 |         ax1, ax2 = proportion_plot(self.table, self.metadata,
166 |                                    'groups', 'X', 'Y',
167 |                                    num_features, denom_features,
168 |                                    self.feature_metadata,
169 |                                    label_col='phylum')
170 |         res = np.vstack([L.get_xydata() for L in ax1.get_lines()])
171 |         exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])
172 | 
173 |         npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)
174 | 
175 |         res = np.vstack([L.get_xydata() for L in ax2.get_lines()])
176 |         exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])
177 | 
178 |         npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)
179 | 
180 |         res = [L._text for L in ax2.get_yticklabels()]
181 |         exp = ['p__bar', 'p__bar', 'p__far', 'p__tar']
182 |         self.assertListEqual(res, exp)
183 | 
184 |     @unittest.skip('Visualizations are deprecated')
185 |     def test_proportion_plot_order_figure(self):
186 |         self.maxDiff = None
187 |         np.random.seed(0)
188 |         # tests for different ordering
189 |         fig, axes = plt.subplots(1, 2)
190 | 
191 |         num_features = ['A', 'B']
192 |         denom_features = ['D', 'C']
193 |         ax1, ax2 = proportion_plot(self.table, self.metadata,
194 |                                    'groups', 'X', 'Y',
195 |                                    num_features, denom_features,
196 |                                    self.feature_metadata,
197 |                                    label_col='phylum', axes=axes)
198 |         res = np.vstack([L.get_xydata() for L in ax1.get_lines()])
199 |         exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])
200 | 
201 |         npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)
202 | 
203 |         res = np.vstack([L.get_xydata() for L in ax2.get_lines()])
204 |         exp = np.array([0., 0., 1., 1., 2., 2., 3., 3.])
205 | 
206 |         npt.assert_allclose(res[:, 1], exp, atol=1e-2, rtol=1e-2, verbose=True)
207 | 
208 |         res = [L._text for L in ax2.get_yticklabels()]
209 |         exp = ['p__bar', 'p__bar', 'p__far', 'p__tar']
210 |         self.assertListEqual(res, exp)
211 | 
212 |     def test_proportion_plot_original_labels(self):
213 |         # tests for different ordering
214 |         fig, axes = plt.subplots(1, 2)
215 | 
216 |         num_features = ['A', 'B']
217 |         denom_features = ['D', 'C']
218 |         ax1, ax2 = proportion_plot(self.table, self.metadata,
219 |                                    'groups', 'X', 'Y',
220 |                                    num_features, denom_features,
221 |                                    axes=axes)
222 | 
223 |         res = [L._text for L in ax2.get_yticklabels()]
224 |         exp = ['A', 'B', 'D', 'C']
225 |         self.assertListEqual(res, exp)
226 | 
227 | 
228 | if __name__ == '__main__':
229 |     unittest.main()
230 | 


--------------------------------------------------------------------------------
/gneiss/regression/tests/test_ols.py:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------------------------
  2 | # Copyright (c) 2016--, gneiss development team.
  3 | #
  4 | # Distributed under the terms of the Modified BSD License.
  5 | #
  6 | # The full license is in the file COPYING.txt, distributed with this software.
  7 | # ----------------------------------------------------------------------------
  8 | import os
  9 | import shutil
 10 | import unittest
 11 | import numpy as np
 12 | import pandas as pd
 13 | import pandas.util.testing as pdt
 14 | from skbio.stats.composition import ilr_inv
 15 | from skbio import TreeNode
 16 | from skbio.util import get_data_path
 17 | from gneiss.regression import ols
 18 | from gneiss.balances import balance_basis
 19 | from statsmodels.regression.linear_model import OLS
 20 | 
 21 | 
 22 | class TestOLS(unittest.TestCase):
 23 |     """ Tests OLS regression with refactored matrix multiplication. """
 24 |     def setUp(self):
 25 |         np.random.seed(0)
 26 |         b01, b11, b21 = 1, 2, -3
 27 |         b02, b12, b22 = 2, -1, 4
 28 |         n = 50
 29 |         x1 = np.linspace(0, 10, n)
 30 |         x2 = np.linspace(10, 15, n)
 31 |         e = np.random.normal(size=n) * 10
 32 |         y1 = b01 + b11 * x1 + b21 * x2 + e
 33 |         e = np.random.normal(size=n) * 10
 34 |         y2 = b02 + b12 * x1 + b22 * x2 + e
 35 |         Y = pd.DataFrame(np.vstack((y1, y2)).T,
 36 |                          columns=['y1', 'y2'])
 37 | 
 38 |         B = pd.DataFrame([[b01, b11, b21],
 39 |                           [b02, b12, b22]])
 40 | 
 41 |         X = pd.DataFrame(
 42 |             np.vstack((np.ones(n), x1, x2)).T,
 43 |             columns=['Intercept', 'x1', 'x2'])
 44 | 
 45 |         self.Y = Y
 46 |         self.B = B
 47 |         self.X = X
 48 |         self.r1_ = OLS(endog=y1, exog=X).fit()
 49 |         self.r2_ = OLS(endog=y2, exog=X).fit()
 50 |         self.tree = TreeNode.read(['(c, (b,a)y2)y1;'])
 51 | 
 52 |         self.results = "results"
 53 |         if not os.path.exists(self.results):
 54 |             os.mkdir(self.results)
 55 | 
 56 |     def tearDown(self):
 57 |         shutil.rmtree(self.results)
 58 | 
 59 |     def test_ols_immutable(self):
 60 |         # test to see if values in table get filtered out.
 61 |         # and that the original table doesn't change
 62 |         table = self.Y
 63 |         x = pd.DataFrame(self.X.values, columns=self.X.columns,
 64 |                          index=range(100, 100 + len(self.X.index)))
 65 |         metadata = pd.concat((self.X, x))
 66 | 
 67 |         exp_metadata = metadata.copy()
 68 |         ols('x1 + x2', self.Y, self.X)
 69 |         self.assertEqual(str(table), str(self.Y))
 70 |         self.assertEqual(str(metadata), str(exp_metadata))
 71 | 
 72 |     def test_ols_missing_metadata(self):
 73 |         # test to see if values in table get filtered out.
 74 |         # and that the original table doesn't change
 75 |         table = self.Y
 76 |         y = pd.DataFrame(self.Y.values, columns=self.Y.columns,
 77 |                          index=range(100, 100 + len(self.Y.index)))
 78 | 
 79 |         table = pd.concat((self.Y, y))
 80 |         ids = np.arange(100, 100 + len(self.X.index))
 81 |         x = pd.DataFrame([[np.nan] * len(self.X.columns)] * len(ids),
 82 |                          columns=self.X.columns, index=ids)
 83 | 
 84 |         metadata = pd.concat((self.X, x))
 85 |         model = ols('x1 + x2', table, metadata)
 86 |         model.fit()
 87 | 
 88 |         # test prediction
 89 |         exp = pd.DataFrame({'y1': self.r1_.predict(),
 90 |                             'y2': self.r2_.predict()},
 91 |                            index=self.Y.index)
 92 |         res = model.predict()
 93 | 
 94 |         pdt.assert_frame_equal(res, exp)
 95 | 
 96 |     def test_ols_test(self):
 97 | 
 98 |         model = ols('x1 + x2', self.Y, self.X)
 99 |         model.fit()
100 | 
101 |         # test pvalues
102 |         exp = pd.DataFrame({'y1': self.r1_.pvalues,
103 |                             'y2': self.r2_.pvalues})
104 |         pdt.assert_frame_equal(model.pvalues, exp)
105 | 
106 |         # test coefficients
107 |         exp = pd.DataFrame({'y1': self.r1_.params,
108 |                             'y2': self.r2_.params})
109 |         res = model.coefficients()
110 |         pdt.assert_frame_equal(res, exp)
111 | 
112 |         # test residuals
113 |         exp = pd.DataFrame({'y1': self.r1_.resid,
114 |                             'y2': self.r2_.resid},
115 |                            index=self.Y.index)
116 |         res = model.residuals()
117 |         pdt.assert_frame_equal(res, exp)
118 | 
119 |         # test prediction
120 |         exp = pd.DataFrame({'y1': self.r1_.predict(),
121 |                             'y2': self.r2_.predict()},
122 |                            index=self.Y.index)
123 |         res = model.predict()
124 |         pdt.assert_frame_equal(res, exp)
125 | 
126 |         # make a small prediction
127 |         fx = pd.DataFrame(
128 |             [[1, 1, 1],
129 |              [1, 1, 2]],
130 |             columns=['Intercept', 'x1', 'x2'],
131 |             index=['f1', 'f2'])
132 | 
133 |         rp1 = self.r1_.predict([[1, 1, 1],
134 |                                 [1, 1, 2]])
135 |         rp2 = self.r2_.predict([[1, 1, 1],
136 |                                 [1, 1, 2]])
137 |         exp = pd.DataFrame({'y1': rp1,
138 |                             'y2': rp2},
139 |                            index=['f1', 'f2'])
140 | 
141 |         res = model.predict(X=fx)
142 |         pdt.assert_frame_equal(res, exp)
143 | 
144 |         # test r2
145 |         self.assertAlmostEqual(model.r2, 0.21981627865598752)
146 | 
147 |     def test_ols_ilr_inv_test(self):
148 | 
149 |         model = ols('x1 + x2', self.Y, self.X)
150 |         model.fit()
151 |         basis, _ = balance_basis(self.tree)
152 |         # test pvalues
153 |         exp = pd.DataFrame({'y1': self.r1_.pvalues,
154 |                             'y2': self.r2_.pvalues})
155 |         pdt.assert_frame_equal(model.pvalues, exp)
156 | 
157 |         # test coefficients
158 |         exp = pd.DataFrame({'y1': self.r1_.params,
159 |                             'y2': self.r2_.params})
160 | 
161 |         exp = pd.DataFrame(ilr_inv(exp, basis),
162 |                            columns=['c', 'b', 'a'],
163 |                            index=self.X.columns)
164 | 
165 |         res = model.coefficients(tree=self.tree)
166 |         pdt.assert_frame_equal(res, exp)
167 | 
168 |         # test residuals
169 |         exp = pd.DataFrame({'y1': self.r1_.resid,
170 |                             'y2': self.r2_.resid},
171 |                            index=self.Y.index)
172 |         exp = pd.DataFrame(ilr_inv(exp, basis),
173 |                            index=self.Y.index,
174 |                            columns=['c', 'b', 'a'])
175 |         res = model.residuals(tree=self.tree)
176 |         pdt.assert_frame_equal(res, exp)
177 | 
178 |         # test prediction
179 |         exp = pd.DataFrame({'y1': self.r1_.predict(),
180 |                             'y2': self.r2_.predict()},
181 |                            index=self.Y.index)
182 |         exp = pd.DataFrame(ilr_inv(exp, basis),
183 |                            index=self.Y.index,
184 |                            columns=['c', 'b', 'a'])
185 |         res = model.predict(tree=self.tree)
186 |         pdt.assert_frame_equal(res, exp)
187 | 
188 |     def test_tvalues(self):
189 |         model = ols('x1 + x2', self.Y, self.X)
190 |         model.fit()
191 | 
192 |         exp = pd.DataFrame({'y1': self.r1_.tvalues,
193 |                             'y2': self.r2_.tvalues})
194 |         pdt.assert_frame_equal(model.tvalues, exp)
195 | 
196 |     def test_mse(self):
197 |         model = ols('x1 + x2', self.Y, self.X)
198 |         model.fit()
199 | 
200 |         exp = pd.Series({'y1': self.r1_.mse_resid,
201 |                          'y2': self.r2_.mse_resid})
202 |         pdt.assert_series_equal(model.mse, exp)
203 | 
204 |     def test_ess(self):
205 |         model = ols('x1 + x2', self.Y, self.X)
206 |         model.fit()
207 | 
208 |         exp = pd.Series({'y1': self.r1_.ess,
209 |                          'y2': self.r2_.ess})
210 |         pdt.assert_series_equal(model.ess, exp)
211 | 
212 |     def test_loo(self):
213 |         model = ols('x1 + x2', self.Y, self.X)
214 |         model.fit()
215 |         res = model.loo()
216 |         exp = pd.read_csv(get_data_path('loo.csv'), index_col=0)
217 |         pdt.assert_frame_equal(res, exp)
218 | 
219 |     def test_kfold(self):
220 |         model = ols('x1 + x2', self.Y, self.X)
221 |         model.fit()
222 |         res = model.kfold(9)
223 |         exp = pd.read_csv(get_data_path('kfold.csv'), index_col=0)
224 |         pdt.assert_frame_equal(res, exp)
225 | 
226 |     def test_lovo(self):
227 |         model = ols('x1 + x2', self.Y, self.X)
228 |         model.fit()
229 |         res = model.lovo()
230 |         exp = pd.read_csv(get_data_path('lovo.csv'), index_col=0)
231 |         pdt.assert_frame_equal(res, exp)
232 | 
233 | 
234 | class TestOLSCV(unittest.TestCase):
235 |     """ Tests OLS regression with refactored matrix multiplication. """
236 |     def setUp(self):
237 |         np.random.seed(0)
238 |         b01, b11, b21 = 1, 2, -3
239 |         b02, b12, b22 = 2, -1, 4
240 |         n = 50
241 |         x1 = np.linspace(0, 10, n)
242 |         x2 = np.linspace(10, 15, n)**2
243 |         e = np.random.normal(size=n) * 10
244 |         y1 = b01 + b11 * x1 + b21 * x2 + e
245 |         e = np.random.normal(size=n) * 10
246 |         y2 = b02 + b12 * x1 + b22 * x2 + e
247 |         Y = pd.DataFrame(np.vstack((y1, y2)).T,
248 |                          columns=['y1', 'y2'])
249 | 
250 |         B = pd.DataFrame([[b01, b11, b21],
251 |                           [b02, b12, b22]])
252 | 
253 |         X = pd.DataFrame(
254 |             np.vstack((np.ones(n), x1, x2)).T,
255 |             columns=['Intercept', 'x1', 'x2'])
256 | 
257 |         self.Y = Y
258 |         self.B = B
259 |         self.X = X
260 |         self.r1_ = OLS(endog=y1, exog=X).fit()
261 |         self.r2_ = OLS(endog=y2, exog=X).fit()
262 |         self.tree = TreeNode.read(['(c, (b,a)y2)y1;'])
263 | 
264 |     def test_loo(self):
265 |         model = ols('x1 + x2', self.Y, self.X)
266 |         model.fit()
267 |         res = model.loo()
268 |         exp = pd.read_csv(get_data_path('loo2.csv'), index_col=0)
269 |         pdt.assert_frame_equal(res, exp)
270 | 
271 |     def test_kfold(self):
272 |         model = ols('x1 + x2', self.Y, self.X)
273 |         model.fit()
274 |         res = model.kfold(9)
275 |         exp = pd.read_csv(get_data_path('kfold2.csv'), index_col=0)
276 |         pdt.assert_frame_equal(res, exp)
277 | 
278 |     def test_lovo(self):
279 |         model = ols('x1 + x2', self.Y, self.X)
280 |         model.fit()
281 |         res = model.lovo()
282 |         exp = pd.read_csv(get_data_path('lovo2.csv'), index_col=0)
283 |         pdt.assert_frame_equal(res, exp)
284 | 
285 | 
286 | if __name__ == "__main__":
287 |     unittest.main()
288 | 


--------------------------------------------------------------------------------