├── doc ├── source │ ├── _static │ │ └── dummy │ ├── sort.rst │ ├── util.rst │ ├── balances.rst │ ├── plot.rst │ ├── cluster.rst │ ├── regression.rst │ └── index.rst ├── Makefile └── README.md ├── gneiss ├── cluster │ ├── tests │ │ ├── __init__.py │ │ ├── data │ │ │ ├── test_metadata.txt │ │ │ ├── tree.qza │ │ │ ├── polytomy.qza │ │ │ ├── weighted.biom.qza │ │ │ ├── test_gradient.biom.qza │ │ │ └── test_composition.biom.qza │ │ └── test_pba.py │ ├── __init__.py │ └── _pba.py ├── regression │ ├── tests │ │ ├── __init__.py │ │ ├── data │ │ │ ├── not-regression.pickle │ │ │ ├── lme.pickle │ │ │ ├── ols.pickle │ │ │ ├── lme_tree.qza │ │ │ ├── ols_tree.qza │ │ │ ├── test_tree.qza │ │ │ ├── lme_balances.qza │ │ │ ├── ols_balances.qza │ │ │ ├── test_lme_composition.qza │ │ │ ├── test_ols_composition.qza │ │ │ ├── lovo.csv │ │ │ ├── lovo2.csv │ │ │ ├── exp_ols_results2.txt │ │ │ ├── exp_lme_results2.txt │ │ │ ├── kfold2.csv │ │ │ ├── exp_lme_results.txt │ │ │ ├── exp_ols_results.txt │ │ │ ├── kfold.csv │ │ │ ├── loo.csv │ │ │ ├── loo2.csv │ │ │ ├── coefficients.csv │ │ │ └── pvalues.csv │ │ ├── test_mixedlm.py │ │ ├── test_model.py │ │ └── test_ols.py │ ├── __init__.py │ └── _model.py ├── __init__.py ├── composition │ ├── __init__.py │ ├── tests │ │ ├── test_variance.py │ │ └── test_composition.py │ ├── _variance.py │ └── _composition.py ├── plot │ ├── __init__.py │ ├── tests │ │ ├── data │ │ │ └── example.nwk │ │ ├── test_radial.py │ │ ├── test_regression_plot.py │ │ ├── test_dendrogram.py │ │ ├── test_heatmap.py │ │ └── test_decompose.py │ └── _radial.py ├── _model.py ├── tests │ ├── test_model.py │ ├── data │ │ └── large_tree.nwk │ ├── test_balances.py │ └── test_sort.py ├── sort.py └── balances.py ├── ci ├── conda_requirements.txt ├── pip_requirements.txt └── environment.yml ├── ipynb ├── images │ ├── Slide1.jpg │ ├── Slide2.jpg │ ├── Slide3.jpg │ └── Slide4.jpg ├── 88soils │ └── 238_otu_table.biom └── cfstudy │ ├── cfstudy_taxonomy.qza │ ├── cfstudy_common.biom.qza │ └── cfstudy-qiime2-tutorial.ipynb ├── MANIFEST.in ├── .coveragerc ├── Makefile ├── .gitignore ├── .travis.yml ├── .github └── workflows │ └── master.yml ├── COPYING.txt ├── README.md ├── setup.py └── CHANGELOG.md /doc/source/_static/dummy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gneiss/cluster/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gneiss/regression/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/source/sort.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: gneiss.sort -------------------------------------------------------------------------------- /doc/source/util.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: gneiss.util 2 | -------------------------------------------------------------------------------- /doc/source/balances.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: gneiss.balances 2 | -------------------------------------------------------------------------------- /doc/source/plot.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: gneiss.plot 2 | 3 | 4 | -------------------------------------------------------------------------------- /ci/conda_requirements.txt: -------------------------------------------------------------------------------- 1 | pip 2 | biom-format 3 | bokeh=1.1.0 4 | -------------------------------------------------------------------------------- /doc/source/cluster.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: gneiss.cluster 2 | 3 | 4 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/not-regression.pickle: -------------------------------------------------------------------------------- 1 | asdfasdfasdff 2 | -------------------------------------------------------------------------------- /doc/source/regression.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: gneiss.regression 2 | 3 | 4 | -------------------------------------------------------------------------------- /ci/pip_requirements.txt: -------------------------------------------------------------------------------- 1 | coveralls 2 | sphinx 3 | pycodestyle 4 | flake8 5 | -------------------------------------------------------------------------------- /ipynb/images/Slide1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/images/Slide1.jpg -------------------------------------------------------------------------------- /ipynb/images/Slide2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/images/Slide2.jpg -------------------------------------------------------------------------------- /ipynb/images/Slide3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/images/Slide3.jpg -------------------------------------------------------------------------------- /ipynb/images/Slide4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/images/Slide4.jpg -------------------------------------------------------------------------------- /gneiss/cluster/tests/data/test_metadata.txt: -------------------------------------------------------------------------------- 1 | x y 2 | s1 1 a 3 | s2 2 a 4 | s3 3 a 5 | s4 4 a 6 | s5 5 a 7 | -------------------------------------------------------------------------------- /gneiss/cluster/tests/data/tree.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/cluster/tests/data/tree.qza -------------------------------------------------------------------------------- /ipynb/88soils/238_otu_table.biom: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/88soils/238_otu_table.biom -------------------------------------------------------------------------------- /ipynb/cfstudy/cfstudy_taxonomy.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/cfstudy/cfstudy_taxonomy.qza -------------------------------------------------------------------------------- /gneiss/cluster/tests/data/polytomy.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/cluster/tests/data/polytomy.qza -------------------------------------------------------------------------------- /gneiss/regression/tests/data/lme.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/lme.pickle -------------------------------------------------------------------------------- /gneiss/regression/tests/data/ols.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/ols.pickle -------------------------------------------------------------------------------- /ipynb/cfstudy/cfstudy_common.biom.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/ipynb/cfstudy/cfstudy_common.biom.qza -------------------------------------------------------------------------------- /gneiss/regression/tests/data/lme_tree.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/lme_tree.qza -------------------------------------------------------------------------------- /gneiss/regression/tests/data/ols_tree.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/ols_tree.qza -------------------------------------------------------------------------------- /gneiss/cluster/tests/data/weighted.biom.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/cluster/tests/data/weighted.biom.qza -------------------------------------------------------------------------------- /gneiss/regression/tests/data/test_tree.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/test_tree.qza -------------------------------------------------------------------------------- /gneiss/regression/tests/data/lme_balances.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/lme_balances.qza -------------------------------------------------------------------------------- /gneiss/regression/tests/data/ols_balances.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/ols_balances.qza -------------------------------------------------------------------------------- /gneiss/cluster/tests/data/test_gradient.biom.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/cluster/tests/data/test_gradient.biom.qza -------------------------------------------------------------------------------- /gneiss/cluster/tests/data/test_composition.biom.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/cluster/tests/data/test_composition.biom.qza -------------------------------------------------------------------------------- /gneiss/regression/tests/data/test_lme_composition.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/test_lme_composition.qza -------------------------------------------------------------------------------- /gneiss/regression/tests/data/test_ols_composition.qza: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/biocore/gneiss/HEAD/gneiss/regression/tests/data/test_ols_composition.qza -------------------------------------------------------------------------------- /gneiss/regression/tests/data/lovo.csv: -------------------------------------------------------------------------------- 1 | ,mse,Rsquared,R2diff 2 | Intercept,4310.47487689949,0.21981627865598752,0.0 3 | x1,4310.4748768994905,0.21981627865598752,0.0 4 | x2,4310.4748768994905,0.21981627865598752,0.0 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include .coveragerc 2 | include CHANGELOG.md 3 | include COPYING.txt 4 | include Makefile 5 | include README.md 6 | 7 | graft gneiss 8 | 9 | global-exclude *.pyc 10 | global-exclude *.pyo 11 | global-exclude .git 12 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/lovo2.csv: -------------------------------------------------------------------------------- 1 | ,mse,Rsquared,R2diff 2 | Intercept,4309.602746314058,0.9949205109438772,5.546379782983557e-05 3 | x1,4305.631922549035,0.994925191132991,5.078360871602072e-05 4 | x2,6910.497401174743,0.9918549810764683,0.0031209936652387693 5 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/exp_ols_results2.txt: -------------------------------------------------------------------------------- 1 | Simplicial Least Squares Results 2 | ============================ 3 | No. Observations 5.0000 4 | Model: OLS 5 | Rsquared: 0.4405 6 | ---------------------------- 7 | c Intercept real 8 | ---------------------------- 9 | Y1 slope 1.60E+00 6.00E-01 10 | Y1 pvalue 1.28E-01 8.05E-02 11 | ============================ 12 | -------------------------------------------------------------------------------- /gneiss/__init__.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | 9 | __version__ = "0.4.6" 10 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | # this file is based on the examples provided on scikit-learn's .coveragerc 2 | 3 | [run] 4 | omit = 5 | */tests* 6 | */__init__.py 7 | */gneiss/layouts.py 8 | source = gneiss 9 | branch = True 10 | include = */gneiss/* 11 | 12 | [report] 13 | exclude_lines = 14 | pragma: no cover 15 | raise NotImplementedError 16 | if __name__ == .__main__.: 17 | omit = 18 | */tests* 19 | */__init__.py 20 | */gneiss/layouts.py -------------------------------------------------------------------------------- /gneiss/regression/tests/data/exp_lme_results2.txt: -------------------------------------------------------------------------------- 1 | Simplicial Mixed Linear Model Results 2 | ==================================================== 3 | No. Observations 1600.0000 Model: Simplicial MixedLM 4 | ----------------------------------------------------- 5 | Intercept groups RE x1 x2 6 | ----------------------------------------------------- 7 | Y1 slope 4.21E+00 9.36E-02 1.02E+00 9.25E-01 8 | Y1 pvalue 4.83E-236 4.42E-05 3.97E-35 3.57E-30 9 | ==================================================== 10 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. gneiss documentation master file, created by 2 | sphinx-quickstart on Sat Nov 26 16:35:10 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to gneiss's documentation! 7 | ================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 1 13 | 14 | balances 15 | regression 16 | cluster 17 | plot 18 | sort 19 | util 20 | 21 | 22 | 23 | Indices and tables 24 | ================== 25 | 26 | * :ref:`genindex` 27 | * :ref:`modindex` 28 | * :ref:`search` 29 | 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .DEFAULT_GOAL := help 2 | 3 | ifeq ($(WITH_COVERAGE), TRUE) 4 | TEST_COMMAND = COVERAGE_FILE=.coverage coverage run --rcfile .coveragerc setup.py nosetests --with-doctest 5 | else 6 | TEST_COMMAND = nosetests --with-doctest 7 | endif 8 | 9 | help: 10 | @echo 'Use "make test" to run all the unit tests and docstring tests.' 11 | @echo 'Use "make pep8" to validate PEP8 compliance.' 12 | @echo 'Use "make html" to create html documentation with sphinx' 13 | @echo 'Use "make all" to run all the targets listed above.' 14 | test: 15 | $(TEST_COMMAND) 16 | pep8: 17 | flake8 gneiss setup.py --ignore E303,E731,E722 18 | 19 | all: pep8 test 20 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/kfold2.csv: -------------------------------------------------------------------------------- 1 | ,model_mse,Rsquared,pred_mse 2 | fold_0,3584.5769364882744,0.9916720125376085,663.109474872684 3 | fold_1,3610.186079514632,0.9928445881359773,428.0519957464969 4 | fold_2,3768.107015449521,0.9930668932082859,197.3834417792511 5 | fold_3,3680.381544872904,0.9936245114219088,279.11469601532144 6 | fold_4,3021.72858660822,0.9948952737882077,960.8363758112072 7 | fold_5,3533.664042171059,0.9939605493886041,445.10100399311614 8 | fold_6,3390.1041044617687,0.993817702859089,581.6366207655954 9 | fold_7,3409.878710170101,0.9930604339509851,588.5085391323519 10 | fold_8,3369.763753775731,0.9915125403479148,709.6491871580773 11 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/exp_lme_results.txt: -------------------------------------------------------------------------------- 1 | Simplicial Mixed Linear Model Results 2 | ==================================================== 3 | No. Observations 1600.0000 Model: Simplicial MixedLM 4 | ----------------------------------------------------- 5 | Intercept groups RE x1 x2 6 | ----------------------------------------------------- 7 | Y1 slope 4.21E+00 9.36E-02 1.02E+00 9.25E-01 8 | Y1 pvalue 4.83E-236 4.42E-05 3.97E-35 3.57E-30 9 | Y2 slope 2.12E-01 9.36E-02 1.02E+00 9.25E-01 10 | Y2 pvalue 9.94E-02 4.42E-05 3.97E-35 3.57E-30 11 | ==================================================== 12 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/exp_ols_results.txt: -------------------------------------------------------------------------------- 1 | Simplicial Least Squares Results 2 | ================================== 3 | No. Observations 5.0000 4 | Model: OLS 5 | Rsquared: 0.4405 6 | ---------------------------------- 7 | mse Rsquared R2diff 8 | ---------------------------------- 9 | Intercept 2.1409 0.8916 -0.4511 10 | real 2.1000 0.0000 0.4405 11 | ---------------------------------- 12 | model_mse Rsquared pred_mse 13 | ---------------------------------- 14 | fold_0 0.0000 1.0000 12.5000 15 | fold_1 0.0000 1.0000 14.5000 16 | ================================== 17 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/kfold.csv: -------------------------------------------------------------------------------- 1 | ,model_mse,Rsquared,pred_mse 2 | fold_0,3649.961032819785,0.13217034860627497,483.0402110305879 3 | fold_1,3691.4006801996566,0.2095347670376564,403.8071396736002 4 | fold_2,3830.37833521066,0.15015827888995004,206.67020202108148 5 | fold_3,3729.7569631036204,0.1728096904238252,302.4686128376491 6 | fold_4,3135.1878975392115,0.21496560314322954,897.5468948642697 7 | fold_5,3587.243280769308,0.20003652741163214,453.8238419838613 8 | fold_6,3445.455291051452,0.17658737964869753,599.173109675699 9 | fold_7,3471.7837469746355,0.16790161609540577,604.6536872291142 10 | fold_8,3485.6635258096953,0.12600459767177707,552.1337252734353 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Temporary files 2 | *~ 3 | \#*# 4 | 5 | *.py[cod] 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Packages 11 | *.egg 12 | *.egg-info 13 | dist 14 | build 15 | eggs 16 | parts 17 | bin 18 | var 19 | sdist 20 | develop-eggs 21 | .installed.cfg 22 | lib 23 | lib64 24 | __pycache__ 25 | 26 | # Installer logs 27 | pip-log.txt 28 | 29 | # Unit test / coverage reports 30 | .coverage 31 | .tox 32 | nosetests.xml 33 | 34 | # Translations 35 | *.mo 36 | 37 | # Mr Developer 38 | .mr.developer.cfg 39 | .project 40 | .pydevproject 41 | 42 | # vi 43 | .*.swp 44 | 45 | # Sphinx builds 46 | doc/source/generated 47 | 48 | # OSX files 49 | .DS_Store 50 | 51 | # IPythnon checkpoints 52 | .ipynb_checkpoints -------------------------------------------------------------------------------- /gneiss/composition/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Composition functions (:mod:`gneiss.composition`) 3 | =============================================== 4 | 5 | .. currentmodule:: gneiss.composition 6 | 7 | This module contains compositional functions 8 | 9 | Functions 10 | --------- 11 | 12 | .. autosummary:: 13 | :toctree: generated/ 14 | 15 | variation_matrix 16 | 17 | """ 18 | # ---------------------------------------------------------------------------- 19 | # Copyright (c) 2016--, gneiss development team. 20 | # 21 | # Distributed under the terms of the Modified BSD License. 22 | # 23 | # The full license is in the file COPYING.txt, distributed with this software. 24 | # ---------------------------------------------------------------------------- 25 | from ._composition import ilr_transform 26 | from ._variance import variation_matrix 27 | 28 | 29 | __all__ = ["ilr_transform", "variation_matrix"] 30 | -------------------------------------------------------------------------------- /gneiss/plot/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Plotting functions (:mod:`gneiss.plot`) 3 | =============================================== 4 | 5 | .. currentmodule:: gneiss.plot 6 | 7 | This module contains plotting functionality 8 | 9 | Functions 10 | --------- 11 | 12 | .. autosummary:: 13 | :toctree: generated/ 14 | 15 | heatmap 16 | radialplot 17 | balance_boxplot 18 | balance_barplots 19 | """ 20 | # ---------------------------------------------------------------------------- 21 | # Copyright (c) 2016--, gneiss development team. 22 | # 23 | # Distributed under the terms of the Modified BSD License. 24 | # 25 | # The full license is in the file COPYING.txt, distributed with this software. 26 | # ---------------------------------------------------------------------------- 27 | 28 | from ._heatmap import heatmap 29 | from ._radial import radialplot 30 | from ._decompose import balance_boxplot, balance_barplots, proportion_plot 31 | 32 | 33 | __all__ = ["heatmap", "radialplot", "balance_boxplot", 34 | "balance_barplots", "proportion_plot"] 35 | -------------------------------------------------------------------------------- /gneiss/regression/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Regression functions (:mod:`gneiss.regression`) 3 | =============================================== 4 | 5 | .. currentmodule:: gneiss.regression 6 | 7 | This module contains functions that can convert proportions 8 | to balances for regression analysis 9 | 10 | Functions 11 | --------- 12 | 13 | .. autosummary:: 14 | :toctree: generated/ 15 | 16 | ols 17 | mixedlm 18 | 19 | Classes 20 | ------- 21 | .. autosummary:: 22 | :toctree: generated/ 23 | 24 | OLSModel 25 | LMEModel 26 | 27 | """ 28 | # ---------------------------------------------------------------------------- 29 | # Copyright (c) 2016--, gneiss development team. 30 | # 31 | # Distributed under the terms of the Modified BSD License. 32 | # 33 | # The full license is in the file COPYING.txt, distributed with this software. 34 | # ---------------------------------------------------------------------------- 35 | from ._ols import ols, OLSModel 36 | from ._mixedlm import mixedlm, LMEModel 37 | 38 | 39 | __all__ = ["ols", "OLSModel", "mixedlm", "LMEModel"] 40 | -------------------------------------------------------------------------------- /gneiss/cluster/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Clustering functions (:mod:`gneiss.cluster`) 3 | ============================================ 4 | 5 | .. currentmodule:: gneiss.cluster 6 | 7 | This module contains functions to build hierarchical clusterings. 8 | 9 | 10 | Functions 11 | --------- 12 | 13 | .. autosummary:: 14 | :toctree: generated/ 15 | 16 | correlation_linkage 17 | gradient_linkage 18 | rank_linkage 19 | random_linkage 20 | 21 | """ 22 | # ---------------------------------------------------------------------------- 23 | # Copyright (c) 2016--, gneiss development team. 24 | # 25 | # Distributed under the terms of the Modified BSD License. 26 | # 27 | # The full license is in the file COPYING.txt, distributed with this software. 28 | # ---------------------------------------------------------------------------- 29 | from ._pba import (correlation_linkage, gradient_linkage, 30 | rank_linkage, random_linkage) 31 | 32 | 33 | __all__ = ['correlation_linkage', 'gradient_linkage', 34 | 'rank_linkage', 'random_linkage'] 35 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Travis yml file inspired by scikit-bio 2 | # Check on http://lint.travis-ci.org/ after modifying it! 3 | sudo: false 4 | language: python 5 | env: 6 | - PYVERSION=3.6 USE_CYTHON=TRUE MAKE_DOC=TRUE 7 | before_install: 8 | - export MPLBACKEND='Agg' 9 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 10 | - chmod +x miniconda.sh 11 | - ./miniconda.sh -b 12 | - export PATH=/home/travis/miniconda3/bin:$PATH 13 | # Update conda itself 14 | - conda update --yes conda 15 | # Useful for debugging any issues with conda 16 | - conda info -a 17 | install: 18 | - conda create --yes -n test_env python=$PYVERSION 19 | - conda install --yes -n test_env --file ci/conda_requirements.txt -c biocore 20 | - conda install --yes -n test_env cython 21 | - source activate test_env 22 | - pip install -r ci/pip_requirements.txt 23 | - pip install -e . 24 | script: 25 | - WITH_COVERAGE=TRUE make all 26 | - if [ ${MAKE_DOC} ]; then make -C doc clean html; fi 27 | after_success: 28 | - coveralls 29 | notifications: 30 | webhooks: 31 | on_success: change 32 | on_failure: always 33 | -------------------------------------------------------------------------------- /.github/workflows/master.yml: -------------------------------------------------------------------------------- 1 | # much of this is taken from the Empress main.yml file 2 | name: gneiss CI 3 | 4 | on: 5 | pull_request: 6 | branches: 7 | - master 8 | push: 9 | branches: 10 | - master 11 | 12 | jobs: 13 | build: 14 | runs-on: ubuntu-latest 15 | 16 | strategy: 17 | matrix: 18 | python-version: ["3.8", "3.9"] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | with: 23 | persist-credentials: false 24 | fetch-depth: 0 25 | 26 | - uses: conda-incubator/setup-miniconda@v2 27 | with: 28 | activate-environment: gneiss 29 | python-version: ${{ matrix.python-version }} 30 | 31 | - name: Test conda installation 32 | shell: bash -l {0} 33 | run: conda info 34 | 35 | - name: Install conda packages 36 | shell: bash -l {0} 37 | run: conda install -c conda-forge statsmodels scikit-bio biom-format matplotlib flake8 38 | 39 | - name: Install gneiss 40 | shell: bash -l {0} 41 | run: pip install -e .[dev] 42 | 43 | - name: Run tests 44 | shell: bash -l {0} 45 | run: make all 46 | -------------------------------------------------------------------------------- /gneiss/plot/tests/data/example.nwk: -------------------------------------------------------------------------------- 1 | ((((y15:0.200853,(y31:0.42924,(y47:0.914445,y48:0.837693)y32:0.266535)y16:0.327741)y7:0.743731,((y33:0.0653228,y34:0.0404005)y17:0.0864676,((y49:0.26801,(y67:0.340285,(y85:0.135346,y86:0.55117)y68:0.291215)y50:0.332233)y35:0.370523,((y69:0.38023,y70:0.991233)y51:0.647156,(y71:0.615186,y72:0.781904)y52:0.168594)y36:0.732766)y18:0.663758)y8:0.404288)y3:0.591153,((y19:0.565967,y20:0.952246)y9:0.539617,(y21:0.459132,y22:0.269279)y10:0.86029)y4:0.102227)y1:0.569204,(((y23:0.38369,y24:0.856949)y11:0.939149,(y25:0.518678,(y37:0.569999,(y53:0.414425,(y73:0.458147,y74:0.027975)y54:0.00158475)y38:0.199839)y26:0.561358)y12:0.381204)y5:0.472245,(((y39:0.861009,(y55:0.0324591,(y75:0.01456,y76:0.755587)y56:0.94357)y40:0.798439)y27:0.527629,((y57:0.344423,y58:0.0695154)y41:0.230867,(y59:0.656657,(y77:0.473771,y78:0.0236346)y60:0.648203)y42:0.218781)y28:0.763701)y13:0.432767,((y43:0.258421,(y61:0.81704,y62:0.0208181)y44:0.253458)y29:0.41618,((y63:0.445669,(y79:0.223196,(y87:0.659824,y88:0.426299)y80:0.648506)y64:0.506309)y45:0.12089,((y81:0.875534,(y89:0.743842,y90:0.416172)y82:0.306387)y65:0.507717,((y91:0.590584,y92:0.21759)y83:0.846197,(y93:0.377969,(y95:0.591409,(y97:0.0172002,y98:0.612128)y96:0.492351)y94:0.346931)y84:0.505284)y66:0.910185)y46:0.332695)y30:0.91627)y14:0.76228)y6:0.379615)y2:0.802265)y0; 2 | -------------------------------------------------------------------------------- /COPYING.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016--, gneiss development team. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, this 11 | list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | * Neither the names scikit-bio, skbio, or biocore nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /gneiss/composition/tests/test_variance.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import numpy as np 9 | import pandas as pd 10 | import unittest 11 | from skbio import DistanceMatrix 12 | from skbio.util import get_data_path 13 | from gneiss.composition._variance import variation_matrix 14 | 15 | 16 | class TestVariationMatrix(unittest.TestCase): 17 | def setUp(self): 18 | pass 19 | 20 | def test_varmat1(self): 21 | X = pd.DataFrame({'x': np.arange(1, 10), 22 | 'y': np.arange(2, 11)}) 23 | res = variation_matrix(X) 24 | exp = DistanceMatrix([[0, 0.032013010420979787 / 2], 25 | [0.032013010420979787 / 2, 0]], ids=['x', 'y']) 26 | self.assertEqual(str(res), str(exp)) 27 | 28 | def test_varmat_larg(self): 29 | np.random.seed(123) 30 | D = 50 31 | N = 100 32 | mean = np.ones(D) * 10 33 | cov = np.eye(D) 34 | n__ = np.random.multivariate_normal(mean, cov, size=N) 35 | X = pd.DataFrame(np.abs(n__), columns=np.arange(D).astype(np.str)) 36 | res = variation_matrix(X) 37 | 38 | exp = DistanceMatrix.read(get_data_path('exp_varmat.txt')) 39 | self.assertEqual(str(res), str(exp)) 40 | 41 | 42 | if __name__ == '__main__': 43 | unittest.main() 44 | -------------------------------------------------------------------------------- /gneiss/composition/tests/test_composition.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import unittest 9 | import numpy as np 10 | import pandas as pd 11 | from gneiss.composition._composition import ilr_transform 12 | from gneiss.cluster import gradient_linkage 13 | import pandas.util.testing as pdt 14 | 15 | 16 | class TestILRTransform(unittest.TestCase): 17 | 18 | def test_ilr(self): 19 | np.random.seed(0) 20 | table = pd.DataFrame([[1, 1, 2, 2], 21 | [1, 2, 2, 1], 22 | [2, 2, 1, 1]], 23 | index=[1, 2, 3], 24 | columns=['a', 'b', 'c', 'd']) 25 | table = table.reindex(columns=np.random.permutation(table.columns)) 26 | ph = pd.Series([1, 2, 3], index=table.index) 27 | tree = gradient_linkage(table, ph) 28 | res_balances = ilr_transform(table, tree) 29 | exp_balances = pd.DataFrame( 30 | [[0.693147, -5.551115e-17, 2.775558e-17], 31 | [0.000000, -4.901291e-01, -4.901291e-01], 32 | [-0.693147, 5.551115e-17, -2.775558e-17]], 33 | columns=['y0', 'y1', 'y2'], 34 | index=[1, 2, 3]) 35 | pdt.assert_frame_equal(res_balances, exp_balances) 36 | 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /gneiss/_model.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import abc 9 | import numpy as np 10 | 11 | 12 | class Model(metaclass=abc.ABCMeta): 13 | 14 | def __init__(self, Y, Xs): 15 | """ 16 | Abstract container for balance models. 17 | 18 | Parameters 19 | ---------- 20 | Y : pd.DataFrame 21 | Response matrix. This is the matrix being predicted. 22 | Also known as the dependent variable in univariate analysis. 23 | Xs : iterable of pd.DataFrame 24 | Design matrices. Also known as the independent variable 25 | in univariate analysis. Note that this allows for multiple 26 | design matrices to be inputted to enable multiple data block 27 | analysis. 28 | """ 29 | self.response_matrix = Y 30 | self.design_matrices = Xs 31 | 32 | @abc.abstractmethod 33 | def fit(self, **kwargs): 34 | pass 35 | 36 | @abc.abstractmethod 37 | def summary(self): 38 | """ Print summary results """ 39 | pass 40 | 41 | def percent_explained(self): 42 | """ Proportion explained by each principal balance.""" 43 | # Using sum of squares error calculation (df=1) 44 | # instead of population variance (df=0). 45 | axis_vars = np.var(self.response_matrix, ddof=1, axis=0) 46 | return axis_vars / axis_vars.sum() 47 | -------------------------------------------------------------------------------- /gneiss/composition/_variance.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import numpy as np 9 | from skbio import DistanceMatrix 10 | from skbio.stats.composition import closure 11 | 12 | 13 | def variation_matrix(X): 14 | r""" Calculate Aitchison variation matrix. 15 | 16 | This calculates the Aitchison variation matrix. Given a compositional 17 | matrix :math:`X`, and columns :math:`i` and :math:`j`, the :math:`ij` entry 18 | in the variation matrix of :math:`X` is given by 19 | 20 | .. math: 21 | V_{ij} = \frac{1}{2} var(\ln \frac{x_i}{x_j}) 22 | 23 | Parameters 24 | ---------- 25 | X : pd.DataFrame 26 | Contingency table where there are n rows corresponding to samples 27 | and p features corresponding to columns. 28 | 29 | Returns 30 | ------- 31 | skbio.DistanceMatrix 32 | Total variation matrix of size n x n. 33 | 34 | References 35 | ---------- 36 | .. [1] V. Pawlowsky-Glahn, J. J. Egozcue, R. Tolosana-Delgado (2015), 37 | Modeling and Analysis of Compositional Data, Wiley, Chichester, UK 38 | 39 | .. [2] J. J. Egozcue, V. Pawlowsky-Glahn (2004), Groups of Parts and 40 | Their Balances in Compositional Data Analysis, Mathematical Geology 41 | """ 42 | v = np.zeros((X.shape[1], X.shape[1])) 43 | x = closure(X) 44 | for i in range(X.shape[1]): 45 | for j in range(i): 46 | v[i, j] = np.var(np.log(x[:, i]) - np.log(x[:, j])) 47 | # Making matrix symmetry since V(ln (x/y) ) = V(ln (y/x) ) 48 | # Also dividing by 2, to ensure unit norm for balances. 49 | # See Eqn 4 in [2] 50 | return DistanceMatrix((v + v.T) / 2, ids=X.columns) 51 | -------------------------------------------------------------------------------- /gneiss/composition/_composition.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import pandas as pd 9 | import skbio 10 | from skbio.stats.composition import ilr 11 | from gneiss.balances import balance_basis 12 | from gneiss.util import match_tips 13 | 14 | 15 | def ilr_transform(table: pd.DataFrame, tree: skbio.TreeNode) -> pd.DataFrame: 16 | """Performs isometric logratio (ilr) transformation on feature-table. 17 | 18 | This creates a new table with balances (groups of features) that 19 | distinguish samples. Zeros must first be removed from the table 20 | (e.g. add-pseudocount). For source documentation check out: 21 | https://numpydoc.readthedocs.io/en/latest/ 22 | 23 | Parameters 24 | ----------- 25 | table : pd.DataFrame 26 | Dataframe of the feature table where rows correspond to samples 27 | and columns are features. The values within the table must be 28 | positive and nonzero. 29 | tree : skbio.TreeNode 30 | A tree relating all of the features to balances or 31 | log-contrasts (hierarchy). This tree must be bifurcating 32 | (i.e. has exactly 2 nodes). The internal nodes of the tree 33 | will be renamed. 34 | 35 | Returns 36 | -------- 37 | balances : pd.DataFrame 38 | Balances calculated from the feature table. Balance represents 39 | the log ratio of subchildren values below the specified internal node. 40 | """ 41 | _table, _tree = match_tips(table, tree) 42 | basis, _ = balance_basis(_tree) 43 | balances = ilr(_table.values, basis) 44 | in_nodes = [n.name for n in _tree.levelorder() if not n.is_tip()] 45 | return pd.DataFrame(balances, 46 | columns=in_nodes, 47 | index=table.index) 48 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/loo.csv: -------------------------------------------------------------------------------- 1 | ,model_mse,pred_mse 2 | 0,4260.496054701282,54.18622304652599 3 | 1,4249.729540651938,65.5315803644453 4 | 2,4308.381781538387,2.2473041294562393 5 | 3,4171.3649026828525,148.68431600602923 6 | 4,4229.0766673909075,86.62702948774607 7 | 5,4116.071957191982,206.04849363338943 8 | 6,4291.356272273773,20.185811261941183 9 | 7,4247.848387235389,65.88182699812373 10 | 8,4280.951200463445,30.952001700594646 11 | 9,4308.791284413965,1.7593744234763966 12 | 10,4296.811286205901,14.235755262856841 13 | 11,4266.038114539728,46.16820874785575 14 | 12,4294.513406076312,16.54048715628344 15 | 13,4198.80916007727,115.44075421081786 16 | 14,4299.933519918486,10.873972613900444 17 | 15,4308.962558527042,1.5569522496643602 18 | 16,4146.048139786893,168.97881959977258 19 | 17,4271.59957874892,39.888553605911454 20 | 18,4283.214605760981,27.93225479528788 21 | 19,4240.267113131621,71.85358170168348 22 | 20,3865.3123050881723,455.1507540244841 23 | 21,4298.985490333344,11.737955025509407 24 | 22,4212.7032680853445,99.82809146668683 25 | 23,4195.950083899159,116.88780700999078 26 | 24,4072.6177711303317,242.71727889272387 27 | 25,4159.708341918885,153.8471722349595 28 | 26,4270.2705100334715,41.033911977299155 29 | 27,4287.618745208718,23.3368769592242 30 | 28,4189.413285610791,123.68071225408006 31 | 29,4201.68539487846,111.23040863550231 32 | 30,4225.484906018272,86.9823149007921 33 | 31,4271.267305724941,40.17406365429842 34 | 32,4271.840582007024,39.64126877885141 35 | 33,3977.404975585566,342.2907962328493 36 | 34,4223.808909462919,89.22378741677774 37 | 35,4162.58769041999,152.55352974220145 38 | 36,4162.168549226462,153.32005926284594 39 | 37,4194.701126330643,119.97354476493928 40 | 38,4211.112602919985,103.23385330388982 41 | 39,4283.057863114158,28.56510466677602 42 | 40,4251.739438646766,61.37924034589412 43 | 41,4211.337569965258,103.93346843059663 44 | 42,4213.677952181935,101.82844803208135 45 | 43,4004.6251255020197,322.9223825214111 46 | 44,4309.593365853613,0.9343173620906449 47 | 45,4307.59891842044,3.0607029500315304 48 | 46,4265.732051685689,47.82228161965757 49 | 47,4136.159353565893,187.15821681904137 50 | 48,4238.3420772679465,77.81628431034508 51 | 49,4302.189190917612,8.983205465045119 52 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/loo2.csv: -------------------------------------------------------------------------------- 1 | ,model_mse,pred_mse 2 | 0,4210.254252226571,62.7244814276346 3 | 1,4211.637544078904,59.28731330907396 4 | 2,4261.16964292544,1.5632108577163226 5 | 3,4120.227706392463,158.4176551220517 6 | 4,4194.81383810487,74.12893770610398 7 | 5,4062.59080988408,215.8389185989188 8 | 6,4249.074649016179,14.3801085059596 9 | 7,4201.031230679247,65.0913132126485 10 | 8,4230.052330598066,34.14975753720326 11 | 9,4260.674247388393,1.956655442219737 12 | 10,4248.808610693286,14.312123274205462 13 | 11,4217.133108158621,47.20095190398427 14 | 12,4247.62638198413,15.485331316173468 15 | 13,4158.5575171719565,107.87520269124951 16 | 14,4248.736427270089,14.326680157044192 17 | 15,4262.180211177333,0.37917285947764945 18 | 16,4102.386908392766,166.41818121291553 19 | 17,4219.178090822697,45.11498285380986 20 | 18,4242.171042180773,21.222511951309993 21 | 19,4195.86950494634,69.54045197741316 22 | 20,3817.4178283650426,464.80939115822355 23 | 21,4243.375657832784,20.038443946057576 24 | 22,4139.460083408845,128.7706636543021 25 | 23,4167.699582287961,99.28472513339455 26 | 24,3993.4915035994995,281.73012807025697 27 | 25,4129.178914240647,139.64996281723694 28 | 26,4232.6815844335015,31.26152893262742 29 | 27,4248.3445753384085,14.856826916275455 30 | 28,4130.719920325524,137.79891120579325 31 | 29,4139.821025182945,128.15082264058097 32 | 30,4188.892773390983,76.81691339553873 33 | 31,4210.650307206859,54.0553002064334 34 | 32,4224.565303799641,39.51067743285984 35 | 33,3950.9211117494106,323.80385337396444 36 | 34,4165.780677853521,100.45174573187475 37 | 35,4101.517281439408,167.06442738984504 38 | 36,4104.628998644643,163.81966431619293 39 | 37,4145.145229741029,121.85610388044178 40 | 38,4165.6365980300525,100.72568307909934 41 | 39,4235.22250796671,28.467179798193584 42 | 40,4201.176969556616,64.170451061669 43 | 41,4163.859850749393,103.71697665477915 44 | 42,4158.274885579024,110.33395650500965 45 | 43,3970.6676233926455,311.57982208008514 46 | 44,4259.023597799898,3.801644051465528 47 | 45,4261.765412215231,0.8537315788551669 48 | 46,4196.769182277087,73.21731835996582 49 | 47,4119.239750993762,162.82391090276036 50 | 48,4153.57187529307,126.91052991408179 51 | 49,4249.5788963444775,15.553691413241404 52 | -------------------------------------------------------------------------------- /gneiss/tests/test_model.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import pandas as pd 9 | import statsmodels.formula.api as smf 10 | from skbio import TreeNode 11 | from gneiss._model import Model 12 | import unittest 13 | import os 14 | import pandas.util.testing as pdt 15 | 16 | 17 | # create some mock classes for testing 18 | class submock_ok(Model): 19 | def __init__(self, *args, **kwargs): 20 | super().__init__(*args, **kwargs) 21 | 22 | def summary(self): 23 | print("OK!") 24 | 25 | def fit(self, **kwargs): 26 | pass 27 | 28 | 29 | class submock_bad(Model): 30 | def __init__(self, **kwargs): 31 | super(Model, self, **kwargs) 32 | 33 | 34 | class TestModel(unittest.TestCase): 35 | def setUp(self): 36 | self.pickle_fname = "test.pickle" 37 | self.data = pd.DataFrame([[1, 1, 1], 38 | [3, 2, 3], 39 | [4, 3, 2], 40 | [5, 4, 4], 41 | [2, 5, 3], 42 | [3, 6, 5], 43 | [4, 7, 4]], 44 | index=['s1', 's2', 's3', 's4', 45 | 's5', 's6', 's7'], 46 | columns=['Y1', 'Y2', 'X']) 47 | 48 | self.model1 = smf.ols(formula="Y1 ~ X", data=self.data) 49 | self.model2 = smf.ols(formula="Y2 ~ X", data=self.data) 50 | 51 | self.basis = pd.DataFrame([[0.80442968, 0.19557032]], 52 | index=['a'], 53 | columns=['x', 'y']) 54 | self.tree = TreeNode.read(['(x, y)a;']) 55 | self.balances = pd.DataFrame({'a': [-1, 0, 1]}) 56 | self.metadata = pd.DataFrame( 57 | [[1], [3], [2]], 58 | columns=['X']) 59 | 60 | def tearDown(self): 61 | if os.path.exists(self.pickle_fname): 62 | os.remove(self.pickle_fname) 63 | 64 | def test_init(self): 65 | res = submock_ok(Y=self.balances, Xs=self.metadata) 66 | 67 | # check balances 68 | pdt.assert_frame_equal(self.balances, res.response_matrix) 69 | 70 | 71 | if __name__ == '__main__': 72 | unittest.main() 73 | -------------------------------------------------------------------------------- /gneiss/tests/data/large_tree.nwk: -------------------------------------------------------------------------------- 1 | (((((((1122517:0.06882,((252012:0.00548,588042:0.03795)45:0.00078,1121144:0.00648)34:0.0202)22:0.02416,(3330572:0.10987,279138:0.06878)23:0.01632)13:0.05558,(((75371:0.0859,214611:0.07975)35:0.00449,(143135:0.09553,356045:0.1125)36:0.00595)24:0.01277,160908:0.07333)14:0.105)7:0.07707,(((112795:0.09046,357011:0.13893)25:0.07603,4447334:0.25156)15:0.00593,((732929:0.14242,831289:0.16867)26:0.00658,(((1638797:0.02978,(((792450:0.04478,4681:0.02314)80:0.00634,((83531:0.02222,213177:0.01197)93:0.00769,(215692:0.01593,319907:0.05481)94:0.01631)81:0.00808)69:0.00336,208293:0.01894)57:0.01132)46:0.05063,((1806981:0.01642,(523224:0.03288,((148783:0.01871,146397:0.02416)95:0.00014,(148890:0.01623,146676:0.01235)96:0.0011)82:0.00547)70:0.01097)58:0.00721,(222209:0.0811,216805:0.02552)59:0.05218)47:0.01742)37:0.02315,((1137157:0.00854,1139779:0.01691)48:0.00141,(4362556:0.02248,4416927:0.00937)49:0.01111)38:0.07136)27:0.07544)16:0.01742)8:0.0766)4:0.02393,136959:0.21412)2:0.01293,((((1094976:0.27174,(2601820:0.12574,1124701:0.03443)28:0.21253)17:0.00958,(171768:0.31195,(211848:0.24113,845780:0.16002)29:0.0206)18:0.02439)9:0.25276,(3431064:0.06261,4423681:0.00907)10:0.39256)5:0.04968,((3749019:0.23613,(((1876538:0.09481,((((4468200:0.02519,((1146003:0.01465,1142972:0.00893)104:0.01089,1122202:0.01897)97:0.00342)83:0.01099,(216549:0.00841,(607006:0.02324,663880:0.02481)98:0.01213)84:0.01983)71:0.00654,(((4440638:0.00637,((697997:0.01101,4346060:0.02106)109:0.00484,804187:0.01941)105:0.00823)99:0.0033,1108390:0.02694)85:0.01253,3639039:0.0138)72:0.01591)60:0.03741,4440611:0.02193)50:0.045)39:0.05434,572134:0.13989)30:0.05905,((((((((850823:0.01423,1123984:0.01446)106:0.00333,(2578357:0.00999,(1024089:0.0153,(256536:0.00389,746927:0.0085)111:0.02256)110:0.01297)107:0.00745)100:0.00982,242467:0.00942)86:0.00283,224043:0.00904)73:0.06305,(209803:0.05809,(1147699:0.00958,78839:0.01539)87:0.01255)74:0.02145)61:0.05437,(203969:0.01049,113212:0.01718)62:0.07463)51:0.09163,((((766178:0.00642,156065:0.02144)88:0.00252,3616127:0.01843)75:0.01515,205391:0.08328)63:0.01478,(843189:0.05067,2867534:0.04027)64:0.05349)52:0.12384)40:0.02845,(((512006:0.01707,(1130478:0.01127,((742260:0.0579,4440396:0.03928)101:0.01357,2285453:0.01427)89:0.00474)76:0.05994)65:0.01083,(156611:0.04171,151283:0.03279)66:0.01435)53:0.01822,(((((4409771:0.0131,4450823:0.01556)102:0.01771,4367783:0.0177)90:0.0423,(1127423:0.01089,((1104509:0.00653,4424782:0.0305)108:0.00209,154494:0.0309)103:0.02042)91:0.01431)77:0.00614,(211129:0.08219,(4478794:0.01191,1129210:0.01509)92:0.04495)78:0.00774)67:0.01808,((154519:0.0024,4341561:0.04065)79:0.01119,4351648:0.01519)68:0.03589)54:0.01789)41:0.03913)31:0.03029)19:0.12182)11:0.01264,((((255018:0.01859,4466061:0.01239)42:0.0169,825937:0.0207)32:0.02137,1123837:0.05322)20:0.2196,((((364805:0.00924,4470139:0.00877)55:0.0024,154567:0.05202)43:0.0347,(223583:0.14375,(834883:0.00959,592291:0.00967)56:0.09018)44:0.02203)33:0.04164,238800:0.0929)21:0.01988)12:0.18772)6:0.01563)3:0.02085)1:0.0212,4322321:0.2845)0:0.25729; 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gneiss 2 | 3 | [](https://travis-ci.org/biocore/gneiss) 4 | [](https://coveralls.io/r/biocore/gneiss) 5 | [](https://gitter.im/biocore/gneiss?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) 6 | 7 | Canonically pronouced *nice* 8 | 9 | 10 | gneiss is a compositional data analysis and visualization toolbox designed for analyzing high dimensional proportions. See [here](https://biocore.github.io/gneiss/) for API documentation. 11 | 12 | Note that gneiss is not compatible with python 2, and is compatible with Python 3.4 or later. 13 | gneiss is currently in alpha. We are actively developing it, and __backward-incompatible interface changes may arise__. 14 | 15 | # Installation 16 | 17 | To install this package, it is recommended to use conda. First make sure that the appropriate channels are configured. 18 | 19 | ``` 20 | conda config --add channels https://conda.anaconda.org/bioconda 21 | conda config --add channels https://conda.anaconda.org/biocore 22 | conda config --add channels https://conda.anaconda.org/qiime2 23 | conda config --add channels https://conda.anaconda.org/qiime2/label/r2017.6 24 | ``` 25 | 26 | Then gneiss can be installed in a conda environment as follows 27 | ``` 28 | conda create -n gneiss_env gneiss 29 | ``` 30 | To install the most up to date version of gneiss, run the following command 31 | 32 | ``` 33 | pip install git+https://github.com/biocore/gneiss.git 34 | ``` 35 | 36 | # Tutorials 37 | 38 | * [What are balances](https://github.com/biocore/gneiss/blob/master/ipynb/balance_trees.ipynb) 39 | 40 | # Qiime2 tutorials 41 | 42 | * [Linear regression on balances in the 88 soils](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/qiime2/88soils-qiime2-tutorial.html) 43 | * [Linear mixed effects models on balances in a CF study](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/qiime2/cfstudy-qiime2-tutorial.html) 44 | * [Linear regression on balances in the Chronic Fatigue Syndrome](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/qiime2/cfs-qiime2-tutorial.html) 45 | 46 | # Python tutorials 47 | 48 | * [Linear regression on balances in the 88 soils](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/python/88soils-python-tutorial.html) 49 | * [Linear mixed effects models on balances in a CF study](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/python/cfstudy-python-tutorial.html) 50 | * [Linear regression on balances in the Chronic Fatigue Syndrome](https://biocore.github.io/gneiss/docs/v0.4.0/tutorials/python/cfs-python-tutorial.html) 51 | 52 | 53 | If you use this software package in your own publications, please cite it at 54 | ``` 55 | Morton JT, Sanders J, Quinn RA, McDonald D, Gonzalez A, Vázquez-Baeza Y, 56 | Navas-Molina JA, Song SJ, Metcalf JL, Hyde ER, Lladser M, Dorrestein PC, 57 | Knight R. 2017. Balance trees reveal microbial niche differentiation. 58 | mSystems 2:e00162-16. https://doi.org/10.1128/mSystems.00162-16. 59 | ``` 60 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # ---------------------------------------------------------------------------- 4 | # Copyright (c) 2016--, gneiss development team. 5 | # 6 | # Distributed under the terms of the Modified BSD License. 7 | # 8 | # The full license is in the file COPYING.txt, distributed with this software. 9 | # ---------------------------------------------------------------------------- 10 | 11 | import re 12 | import ast 13 | import os 14 | 15 | from setuptools import find_packages, setup 16 | from setuptools.command.build_ext import build_ext as _build_ext 17 | 18 | 19 | class build_ext(_build_ext): 20 | def finalize_options(self): 21 | _build_ext.finalize_options(self) 22 | # Prevent numpy from thinking it is still in its setup process: 23 | __builtins__.__NUMPY_SETUP__ = False 24 | import numpy 25 | self.include_dirs.append(numpy.get_include()) 26 | 27 | 28 | # Dealing with Cython 29 | USE_CYTHON = os.environ.get('USE_CYTHON', False) 30 | ext = '.pyx' if USE_CYTHON else '.c' 31 | 32 | extensions = [ 33 | ] 34 | 35 | if USE_CYTHON: 36 | from Cython.Build import cythonize 37 | extensions = cythonize(extensions) 38 | 39 | classes = """ 40 | Development Status :: 4 - Beta 41 | License :: OSI Approved :: BSD License 42 | Topic :: Software Development :: Libraries 43 | Topic :: Scientific/Engineering 44 | Topic :: Scientific/Engineering :: Bio-Informatics 45 | Programming Language :: Python :: 3 46 | Programming Language :: Python :: 3 :: Only 47 | Operating System :: Unix 48 | Operating System :: POSIX 49 | Operating System :: MacOS :: MacOS X 50 | """ 51 | classifiers = [s.strip() for s in classes.split('\n') if s] 52 | 53 | description = ('Compositional data analysis tools and visualizations') 54 | 55 | with open('README.md') as f: 56 | long_description = f.read() 57 | 58 | 59 | # version parsing from __init__ pulled from Flask's setup.py 60 | # https://github.com/mitsuhiko/flask/blob/master/setup.py 61 | _version_re = re.compile(r'__version__\s+=\s+(.*)') 62 | 63 | with open('gneiss/__init__.py', 'rb') as f: 64 | hit = _version_re.search(f.read().decode('utf-8')).group(1) 65 | version = str(ast.literal_eval(hit)) 66 | 67 | setup(name='gneiss', 68 | version=version, 69 | license='BSD', 70 | description=description, 71 | long_description=long_description, 72 | long_description_content_type='text/markdown', 73 | author="gneiss development team", 74 | author_email="jamietmorton@gmail.com", 75 | maintainer="gneiss development team", 76 | maintainer_email="jamietmorton@gmail.com", 77 | packages=find_packages(), 78 | setup_requires=['numpy >= 1.15.3'], 79 | ext_modules=extensions, 80 | cmdclass={'build_ext': build_ext}, 81 | install_requires=[ 82 | 'IPython >= 3.2.0', 83 | 'matplotlib >= 1.4.3', 84 | 'numpy >= 1.15.3', 85 | 'pandas >= 0.18.0', 86 | 'scipy >= 0.15.1', 87 | 'nose >= 1.3.7', 88 | 'scikit-bio >= 0.5.5', 89 | 'statsmodels>=0.8.0', 90 | 'biom-format', 91 | 'seaborn', 92 | 'bokeh==1.1.0' 93 | ], 94 | classifiers=classifiers, 95 | package_data={}) 96 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # gneiss changelog 2 | 3 | ## Version 0.4.5 4 | * Pandas dependencies have been updated [#282](https://github.com/biocore/gneiss/pull/282) 5 | 6 | ## Version 0.4.4 7 | * Numpy and scikit-bio dependencies have been updated [#278](https://github.com/biocore/gneiss/pull/278) 8 | 9 | ## Version 0.4.3 10 | * Enabling direct download of fdr corrected pvalues 11 | * Adding in sparse version of the ilr transform utilizing COO-formated sparse matrices [#250](https://github.com/biocore/gneiss/pull/250) 12 | * Adding in sparse utilities for matching biom tables [#253](https://github.com/biocore/gneiss/pull/253) 13 | 14 | 15 | ## Version 0.4.2 16 | * Added `proportion_plot` to plot the mean proportions within a single balance [#234](https://github.com/biocore/gneiss/pull/234) 17 | 18 | ## Version 0.4.1 19 | * Added colorbar for heatmap 20 | * Decoupled qiime2 from gneiss. All qiime2 commands have now been ported to [q2-gneiss](https://github.com/qiime2/q2-gneiss) 21 | 22 | ## Version 0.4.0 23 | * Accelerated the ordinary least squares regression 24 | * Improved summary statistics and cross validation in ordinary least squares regression 25 | * Improved summary visualizations for OLS and MixedLM 26 | 27 | ## Version 0.3.2 28 | * Added `balance_boxplot` and `balance_barplot` to make interpretation balance partitions easier. 29 | * Added `balance_summary` to summarize a given balance using the q2 cli. 30 | * Added `assign_ids` command to allow for ids to be added manually. 31 | 32 | ## Version 0.3.0 33 | * Added q2 support for linear regression and linear mixed effects models [#98](https://github.com/biocore/gneiss/pull/98) 34 | * Added q2 support hierarchical clustering [#116](https://github.com/biocore/gneiss/pull/116) 35 | * Added interactive heatmaps with highlights with matplotlib [#114](https://github.com/biocore/gneiss/pull/114) 36 | * Added tree visualizations for unrooted trees with bokeh [#112](https://github.com/biocore/gneiss/pull/112) 37 | * Added support of cross validation for ordinary least squares [#101](https://github.com/biocore/gneiss/pull/101) 38 | 39 | ## Version 0.2.1 40 | * Added heatmap dendrogram plotting functionality [#87](https://github.com/biocore/gneiss/issues/87) 41 | * Added principal balance analysis heuristic using proportionality and wards clustering algorithm [#83](https://github.com/biocore/gneiss/issues/83) 42 | 43 | ## Version 0.2.0 44 | 45 | ### Features 46 | * Added filehandle support for write and read io in RegressionResults object [#77](https://github.com/biocore/gneiss/issues/77) 47 | 48 | 49 | ## Version 0.1.3 50 | 51 | ### Features 52 | * Added write and read io for RegressionResults object [#72](https://github.com/biocore/gneiss/issues/72) 53 | 54 | ## Version 0.1.2 55 | 56 | ### Features 57 | * Added `ladderize` and `gradient_sort` [#29](https://github.com/biocore/gneiss/issues/29) 58 | 59 | ### Bug fixes 60 | 61 | 62 | ## Version 0.0.2 63 | 64 | ### Features 65 | * Added statsmodels inference [#22](https://github.com/biocore/gneiss/pull/22) 66 | * Added support for ordinary least squares regression [#33](https://github.com/biocore/gneiss/pull/33) 67 | * Added support for linear mixed effects models [#38](https://github.com/biocore/gneiss/pull/38) 68 | * Added RegressionResults object to summarize statistics from statistical analyses 69 | * Adding in a niche sorting algorithm `gneiss.sort.niche_sort` that can generate a band table given a gradient [#16](https://github.com/biocore/gneiss/pull/16) 70 | * Adding in utility functions for handing feature tables, metadata, and trees. [#12](https://github.com/biocore/gneiss/pull/12) 71 | * Adding GPL license. 72 | 73 | ### Bug fixes 74 | -------------------------------------------------------------------------------- /gneiss/cluster/tests/test_pba.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import numpy as np 9 | import pandas as pd 10 | import unittest 11 | from gneiss.cluster._pba import (correlation_linkage, gradient_linkage, 12 | rank_linkage, random_linkage) 13 | from skbio import TreeNode 14 | 15 | 16 | class TestPBA(unittest.TestCase): 17 | def setUp(self): 18 | pass 19 | 20 | def test_correlation_linkage_1(self): 21 | table = pd.DataFrame( 22 | [[1, 1, 0, 0, 0], 23 | [0, 1, 1, 0, 0], 24 | [0, 0, 1, 1, 0], 25 | [0, 0, 0, 1, 1]], 26 | columns=['s1', 's2', 's3', 's4', 's5'], 27 | index=['o1', 'o2', 'o3', 'o4']).T 28 | exp_str = ('((o1:0.574990173931,o2:0.574990173931)y1:0.773481312844,' 29 | '(o3:0.574990173931,o4:0.574990173931)y2:0.773481312844)' 30 | 'y0;\n') 31 | exp_tree = TreeNode.read([exp_str]) 32 | res_tree = correlation_linkage(table + 0.1) 33 | # only check for tree topology since checking for floating point 34 | # numbers on the branches is still tricky. 35 | self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art()) 36 | 37 | def test_correlation_linkage_2(self): 38 | t = pd.DataFrame([[1, 1, 2, 3, 1, 4], 39 | [2, 2, 0.1, 4, 1, .1], 40 | [3, 3.1, 2, 3, 2, 2], 41 | [4.1, 4, 0.2, 1, 1, 2.5]], 42 | index=['S1', 'S2', 'S3', 'S4'], 43 | columns=['F1', 'F2', 'F3', 'F4', 'F5', 'F6']) 44 | exp_str = ('((F4:0.228723591874,(F5:0.074748541601,' 45 | '(F1:0.00010428164962,F2:0.00010428164962)' 46 | 'y4:0.0746442599513)y3:0.153975050273)' 47 | 'y1:0.70266138894,(F3:0.266841737789,F6:0.266841737789)' 48 | 'y2:0.664543243026)y0;\n') 49 | exp_tree = TreeNode.read([exp_str]) 50 | res_tree = correlation_linkage(t) 51 | self.assertEqual(exp_tree.ascii_art(), res_tree.ascii_art()) 52 | 53 | 54 | class TestUPGMA(unittest.TestCase): 55 | def setUp(self): 56 | pass 57 | 58 | def test_gradient_linkage(self): 59 | table = pd.DataFrame( 60 | [[1, 1, 0, 0, 0], 61 | [0, 1, 1, 0, 0], 62 | [0, 0, 1, 1, 0], 63 | [0, 0, 0, 1, 1]], 64 | columns=['s1', 's2', 's3', 's4', 's5'], 65 | index=['o1', 'o2', 'o3', 'o4']).T 66 | gradient = pd.Series( 67 | [1, 2, 3, 4, 5], 68 | index=['s1', 's2', 's3', 's4', 's5']) 69 | res_tree = gradient_linkage(table, gradient) 70 | exp_str = '((o1:0.5,o2:0.5)y1:0.5,(o3:0.5,o4:0.5)y2:0.5)y0;\n' 71 | self.assertEqual(exp_str, str(res_tree)) 72 | 73 | 74 | class TestRandomLinkage(unittest.TestCase): 75 | 76 | def test_random_tree(self): 77 | np.random.seed(0) 78 | t = random_linkage(10) 79 | exp_str = ( 80 | '((7:0.0359448798595,8:0.0359448798595)y1:0.15902486847,' 81 | '((9:0.0235897432375,(4:0.00696620596189,6:0.00696620596189)' 82 | 'y5:0.0166235372756)y3:0.0747173561014,(1:0.0648004111784,' 83 | '((0:0.00196516046521,3:0.00196516046521)y7:0.0367750400883,' 84 | '(2:0.0215653684975,5:0.0215653684975)y8:0.017174832056)' 85 | 'y6:0.0260602106249)y4:0.0335066881605)y2:0.0966626489905)y0;\n') 86 | exp_tree = TreeNode.read([exp_str]) 87 | self.assertEqual(t.ascii_art(), exp_tree.ascii_art()) 88 | 89 | 90 | class TestRankLinkage(unittest.TestCase): 91 | 92 | def test_rank_linkage(self): 93 | ranks = pd.Series([1, 2, 4, 5], 94 | index=['o1', 'o2', 'o3', 'o4']) 95 | t = rank_linkage(ranks) 96 | exp = '((o1:0.5,o2:0.5)y1:1.0,(o3:0.5,o4:0.5)y2:1.0)y0;\n' 97 | self.assertEqual(str(t), exp) 98 | 99 | 100 | if __name__ == '__main__': 101 | unittest.main() 102 | -------------------------------------------------------------------------------- /gneiss/plot/tests/test_radial.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import pandas as pd 3 | import numpy as np 4 | from scipy.cluster.hierarchy import ward 5 | 6 | from skbio import TreeNode, DistanceMatrix 7 | from gneiss.plot._radial import radialplot 8 | from gneiss.plot._dendrogram import UnrootedDendrogram 9 | import numpy.testing as npt 10 | 11 | 12 | class TestRadial(unittest.TestCase): 13 | def setUp(self): 14 | 15 | self.coords = pd.DataFrame( 16 | [['487.5', '347.769', 'NaN', 'NaN', 'True'], 17 | ['12.5', '483.28', 'NaN', 'NaN', 'True'], 18 | ['324.897', '16.7199', 'NaN', 'NaN', 'True'], 19 | ['338.261', '271.728', '0', '2', 'False'], 20 | ['193.169', '365.952', '1', 'y3', 'False']], 21 | columns=['x', 'y', 'child0', 'child1', 'is_tip'], 22 | index=['0', '1', '2', 'y3', 'y4']) 23 | 24 | @unittest.skip('Visualizations are deprecated') 25 | def test_basic_plot(self): 26 | self.maxDiff = None 27 | exp_edges = {'dest_node': ['0', '1', '2', 'y3'], 28 | 'edge_color': ['#00FF00', '#00FF00', 29 | '#00FF00', '#FF0000'], 30 | 'edge_width': [2, 2, 2, 2], 31 | 'src_node': ['y3', 'y4', 'y3', 'y4'], 32 | 'x0': [338.2612593838583, 33 | 193.1688862557773, 34 | 338.2612593838583, 35 | 193.1688862557773], 36 | 'x1': [487.5, 12.499999999999972, 37 | 324.89684138234867, 338.2612593838583], 38 | 'y0': [271.7282256126416, 39 | 365.95231443706376, 40 | 271.7282256126416, 41 | 365.95231443706376], 42 | 'y1': [347.7691620070637, 43 | 483.2800610261029, 44 | 16.719938973897143, 45 | 271.7282256126416]} 46 | 47 | exp_nodes = {'child0': [np.nan, np.nan, np.nan, '0', '1'], 48 | 'child1': [np.nan, np.nan, np.nan, '2', 'y3'], 49 | 'color': ['#1C9099', '#1C9099', '#1C9099', 50 | '#FF999F', '#FF999F'], 51 | 'hover_var': [None, None, None, None, None], 52 | 'is_tip': [True, True, True, False, False], 53 | 'node_size': [10, 10, 10, 10, 10], 54 | 'x': [12.499999999999972, 55 | 487.5, 56 | 324.89684138234867, 57 | 338.26125938385832, 58 | 193.16888625577729], 59 | 'y': [483.28006102610289, 60 | 347.7691620070637, 61 | 16.719938973897143, 62 | 271.72822561264161, 63 | 365.95231443706376]} 64 | np.random.seed(0) 65 | num_otus = 3 # otus 66 | x = np.random.rand(num_otus) 67 | dm = DistanceMatrix.from_iterable(x, lambda x, y: np.abs(x - y)) 68 | lm = ward(dm.condensed_form()) 69 | t = TreeNode.from_linkage_matrix(lm, np.arange(len(x)).astype(np.str)) 70 | t = UnrootedDendrogram.from_tree(t) 71 | # incorporate colors in tree 72 | for i, n in enumerate(t.postorder(include_self=True)): 73 | if not n.is_tip(): 74 | n.name = "y%d" % i 75 | n.color = '#FF999F' 76 | n.edge_color = '#FF0000' 77 | n.node_size = 10 78 | else: 79 | n.color = '#1C9099' 80 | n.edge_color = '#00FF00' 81 | n.node_size = 10 82 | n.length = np.random.rand() * 3 83 | n.edge_width = 2 84 | p = radialplot(t, node_color='color', edge_color='edge_color', 85 | node_size='node_size', edge_width='edge_width') 86 | 87 | for e in exp_edges.keys(): 88 | if isinstance(exp_edges[e], float): 89 | npt.assert_allclose( 90 | p.renderers[0].data_source.data[e], 91 | np.array(exp_edges[e]) 92 | ) 93 | else: 94 | self.assertListEqual( 95 | list(p.renderers[0].data_source.data[e]), 96 | exp_edges[e]) 97 | 98 | for e in exp_nodes.keys(): 99 | self.assertListEqual( 100 | list(p.renderers[1].data_source.data[e]), 101 | exp_nodes[e]) 102 | 103 | self.assertTrue(isinstance(t, TreeNode)) 104 | 105 | 106 | if __name__ == "__main__": 107 | unittest.main() 108 | -------------------------------------------------------------------------------- /ci/environment.yml: -------------------------------------------------------------------------------- 1 | name: gneiss 2 | channels: !!python/tuple 3 | - https://conda.anaconda.org/bioconda 4 | - https://conda.anaconda.org/biocore 5 | - https://conda.anaconda.org/qiime2 6 | - defaults 7 | dependencies: 8 | - anaconda::matplotlib=1.5.1=np111py35_0 9 | - anaconda::numexpr=2.6.1=np111py35_1 10 | - anaconda::numpy=1.11.2=py35_0 11 | - anaconda::pandas=0.19.1=np111py35_0 12 | - anaconda::pytables=3.2.2=np111py35_4 13 | - anaconda::scikit-bio=0.5.1=np111py35_0 14 | - anaconda::scipy=0.18.1=np111py35_0 15 | - bioconda::click=6.6=py35_0 16 | - bokeh=0.12.4=py35_0 17 | - bz2file=0.98=py35_0 18 | - cachecontrol=0.11.6=py35_0 19 | - contextlib2=0.4.0=py35_0 20 | - cycler=0.10.0=py35_0 21 | - decorator=4.0.10=py35_0 22 | - entrypoints=0.2.2=py35_0 23 | - flake8=2.5.1=py35_0 24 | - freetype=2.5.5=1 25 | - future=0.15.2=py35_0 26 | - h5py=2.6.0=np111py35_1 27 | - hdf5=1.8.16=0 28 | - ipykernel=4.0.3=py35_0 29 | - ipython=3.2.3=py35_0 30 | - ipython_genutils=0.1.0=py35_0 31 | - ipywidgets=4.1.1=py35_0 32 | - jinja2=2.8=py35_1 33 | - jsonschema=2.5.1=py35_0 34 | - jupyter=1.0.0=py35_3 35 | - jupyter_client=4.3.0=py35_0 36 | - jupyter_console=5.0.0=py35_0 37 | - jupyter_core=4.1.0=py35_0 38 | - libpng=1.6.22=0 39 | - lockfile=0.12.2=py35_0 40 | - markupsafe=0.23=py35_2 41 | - mccabe=0.3.1=py35_0 42 | - mistune=0.7.2=py35_1 43 | - mkl=11.3.3=0 44 | - natsort=4.0.3=py35_0 45 | - nbconvert=4.2.0=py35_0 46 | - nbformat=4.0.1=py35_0 47 | - nose=1.3.7=py35_1 48 | - notebook=4.2.1=py35_0 49 | - openssl=1.0.2h=1 50 | - patsy=0.4.1=py35_0 51 | - pep8=1.7.0=py35_0 52 | - pip=8.1.2=py35_0 53 | - prompt_toolkit=1.0.9=py35_0 54 | - ptyprocess=0.5.1=py35_0 55 | - pyflakes=1.2.3=py35_0 56 | - pygments=2.1.3=py35_0 57 | - pyparsing=2.1.4=py35_0 58 | - pyqt=4.11.4=py35_3 59 | - python=3.5.2=0 60 | - python-dateutil=2.5.3=py35_0 61 | - python.app=1.2=py35_4 62 | - pytz=2016.4=py35_0 63 | - pyyaml=3.12=py35_0 64 | - pyzmq=15.2.0=py35_1 65 | - qiime2::arrow=0.8.0=py35_0 66 | - qiime2::binaryornot=0.3.0=0_ge797740 67 | - qiime2::biom-format=2.1.5=py35_3 68 | - qiime2::cookiecutter=1.4.0=py35_0 69 | - qiime2::ijson=2.3=py35_0 70 | - qiime2::ipymd=0.1.2=py35_0 71 | - qiime2::jinja2-time=0.2.0=py35_0 72 | - qiime2::poyo=0.4.0=py35_0 73 | - qiime2::python-frontmatter=0.2.1=py35_0 74 | - qiime2::tzlocal=1.3=py35_0 75 | - qiime2::whichcraft=0.4.0=py35_0 76 | - qt=4.8.7=3 77 | - qtconsole=4.0.1=py35_0 78 | - readline=6.2=2 79 | - requests=2.10.0=py35_0 80 | - setuptools=23.0.0=py35_0 81 | - sip=4.16.9=py35_0 82 | - six=1.10.0=py35_0 83 | - sqlite=3.13.0=0 84 | - statsmodels=0.8.0=np111py35_0 85 | - terminado=0.6=py35_0 86 | - tk=8.5.18=0 87 | - tornado=4.3=py35_1 88 | - traitlets=4.2.1=py35_0 89 | - wcwidth=0.1.7=py35_0 90 | - wheel=0.29.0=py35_0 91 | - xz=5.2.2=0 92 | - yaml=0.1.6=0 93 | - zlib=1.2.8=3 94 | - pip: 95 | - alabaster==0.7.9 96 | - appdirs==1.4.0 97 | - appnope==0.1.0 98 | - args==0.1.0 99 | - babel==2.3.4 100 | - backports.shutil-get-terminal-size==1.0.0 101 | - canvas==0.0.1 102 | - chest==0.2.3 103 | - clint==0.5.1 104 | - cloudpickle==0.2.1 105 | - colorama==0.3.7 106 | - coverage==4.1 107 | - coveralls==1.1 108 | - cvxopt==1.1.8 109 | - cython==0.23.5 110 | - dask==0.11.0 111 | - datashader==0.4.0 112 | - datashape==0.5.2 113 | - docopt==0.6.2 114 | - docutils==0.12 115 | - emperor==1.0.0b5 116 | - ete3==3.0.0b35 117 | - gneiss (/Users/mortonjt/Dropbox/UCSD/research/gneiss)==0.3.1 118 | - gnureadline==6.3.3 119 | - heapdict==1.0.0 120 | - igraph==0.1.11 121 | - imagesize==0.7.1 122 | - ipyparallel==5.2.0 123 | - ipython-genutils==0.1.0 124 | - jgraph==0.2.1 125 | - joblib==0.10.2 126 | - jupyter-client==4.3.0 127 | - jupyter-console==5.0.0 128 | - jupyter-core==4.1.0 129 | - llvmlite==0.14.0 130 | - locket==0.2.0 131 | - matplotlib-venn==0.11.4 132 | - msgpack-python==0.4.8 133 | - multipledispatch==0.4.8 134 | - nbdime==0.1.0 135 | - networkx==1.11 136 | - numba==0.29.0 137 | - odo==0.4.2 138 | - packaging==16.8 139 | - partd==0.3.6 140 | - path.py==8.1.2 141 | - pexpect==4.2.1 142 | - pickleshare==0.7.4 143 | - pillow==3.3.1 144 | - pkginfo==1.4.1 145 | - ply==3.9 146 | - prompt-toolkit==1.0.13 147 | - pulp==1.6.5 148 | - pyemd==0.3.0 149 | - pyomo==5.1.1 150 | - pysam==0.9.0 151 | - python-igraph==0.7.1.post6 152 | - python-ternary==1.0 153 | - pyutilib==5.4.1 154 | - q2-composition==2017.3.0.dev0 155 | - q2-feature-table==2017.3.0.dev0 156 | - q2-taxa==2017.3.0.dev0 157 | - q2-types==2017.3.0.dev0 158 | - q2cli (/Users/mortonjt/Dropbox/UCSD/research/q2/q2cli)==2017.3.0.dev0 159 | - q2templates==2017.3.0.dev0 160 | - qcli==0.1.1 161 | - qiime2 (/Users/mortonjt/Dropbox/UCSD/research/q2/qiime2)==2017.3.0.dev0 162 | - requests-toolbelt==0.7.0 163 | - rpy2==2.7.8 164 | - scikit-learn==0.17.1 165 | - seaborn==0.7.1 166 | - simplegeneric==0.8.1 167 | - snowballstemmer==1.2.1 168 | - sourcetracker==2.0.1.dev0 169 | - sphinx==1.4.9 170 | - tables==3.2.2 171 | - tabview==1.4.2 172 | - toolz==0.8.0 173 | - twine==1.8.1 174 | - xarray==0.8.2 175 | - xlrd==0.9.4 176 | 177 | -------------------------------------------------------------------------------- /gneiss/plot/_radial.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import pandas as pd 9 | from gneiss.plot._dendrogram import UnrootedDendrogram 10 | import warnings 11 | try: 12 | from bokeh.models.glyphs import Circle, Segment 13 | from bokeh.models import ColumnDataSource, DataRange1d, Plot 14 | from bokeh.models import (HoverTool, BoxZoomTool, ResetTool, 15 | WheelZoomTool, SaveTool, PanTool) 16 | except: 17 | warnings.warn("Bokeh isn't installed - " 18 | "the interactive visualizations won't work.") 19 | 20 | 21 | def radialplot(tree, node_color='node_color', node_size='node_size', 22 | node_alpha='node_alpha', edge_color='edge_color', 23 | edge_alpha='edge_alpha', edge_width='edge_width', 24 | hover_var='hover_var', figsize=(500, 500), **kwargs): 25 | """ Plots unrooted radial tree. 26 | 27 | Parameters 28 | ---------- 29 | tree : instance of skbio.TreeNode 30 | Input tree for plotting. 31 | node_color : str 32 | Name of variable in `tree` to color nodes. 33 | node_size : str 34 | Name of variable in `tree` that specifies the radius of nodes. 35 | node_alpha : str 36 | Name of variable in `tree` to specify node transparency. 37 | edge_color : str 38 | Name of variable in `tree` to color edges. 39 | edge_alpha : str 40 | Name of variable in `tree` to specify edge transparency. 41 | edge_width : str 42 | Name of variable in `tree` to specify edge width. 43 | hover_var : str 44 | Name of variable in `tree` to display in the hover menu. 45 | figsize : tuple, int 46 | Size of resulting figure. default: (500, 500) 47 | **kwargs: dict 48 | Plotting options to pass into bokeh.models.Plot 49 | 50 | Returns 51 | ------- 52 | bokeh.models.Plot 53 | Interactive plotting instance. 54 | 55 | 56 | Notes 57 | ----- 58 | This assumes that the tree is strictly bifurcating. 59 | 60 | See also 61 | -------- 62 | bifurcate 63 | """ 64 | warnings.warn("This visualization are deprecated.", DeprecationWarning) 65 | # This entire function was motivated by 66 | # http://chuckpr.github.io/blog/trees2.html 67 | t = UnrootedDendrogram.from_tree(tree.copy()) 68 | 69 | nodes = t.coords(figsize[0], figsize[1]) 70 | 71 | # fill in all of the node attributes 72 | def _retreive(tree, x, default): 73 | return pd.Series({n.name: getattr(n, x, default) 74 | for n in tree.levelorder()}) 75 | 76 | # default node color to light grey 77 | nodes[node_color] = _retreive(t, node_color, default='#D3D3D3') 78 | nodes[node_size] = _retreive(t, node_size, default=1) 79 | nodes[node_alpha] = _retreive(t, node_alpha, default=1) 80 | nodes[hover_var] = _retreive(t, hover_var, default=None) 81 | 82 | edges = nodes[['child0', 'child1']] 83 | edges = edges.dropna(subset=['child0', 'child1']) 84 | edges = edges.unstack() 85 | edges = pd.DataFrame({'src_node': edges.index.get_level_values(1), 86 | 'dest_node': edges.values}) 87 | edges['x0'] = [nodes.loc[n].x for n in edges.src_node] 88 | edges['x1'] = [nodes.loc[n].x for n in edges.dest_node] 89 | edges['y0'] = [nodes.loc[n].y for n in edges.src_node] 90 | edges['y1'] = [nodes.loc[n].y for n in edges.dest_node] 91 | ns = [n.name for n in t.levelorder(include_self=True)] 92 | attrs = pd.DataFrame(index=ns) 93 | 94 | # default edge color to black 95 | attrs[edge_color] = _retreive(t, edge_color, default='#000000') 96 | attrs[edge_width] = _retreive(t, edge_width, default=1) 97 | attrs[edge_alpha] = _retreive(t, edge_alpha, default=1) 98 | 99 | edges = pd.merge(edges, attrs, left_on='dest_node', 100 | right_index=True, how='outer') 101 | edges = edges.dropna(subset=['src_node']) 102 | 103 | node_glyph = Circle(x="x", y="y", 104 | radius=node_size, 105 | fill_color=node_color, 106 | fill_alpha=node_alpha) 107 | 108 | edge_glyph = Segment(x0="x0", y0="y0", 109 | x1="x1", y1="y1", 110 | line_color=edge_color, 111 | line_alpha=edge_alpha, 112 | line_width=edge_width) 113 | 114 | def df2ds(df): 115 | return ColumnDataSource(ColumnDataSource.from_df(df)) 116 | 117 | ydr = DataRange1d(range_padding=0.05) 118 | xdr = DataRange1d(range_padding=0.05) 119 | 120 | plot = Plot(x_range=xdr, y_range=ydr, **kwargs) 121 | plot.add_glyph(df2ds(edges), edge_glyph) 122 | ns = plot.add_glyph(df2ds(nodes), node_glyph) 123 | 124 | tooltip = [ 125 | ("Feature ID", "@index") 126 | ] 127 | if hover_var is not None: 128 | tooltip += [(hover_var, "@" + hover_var)] 129 | 130 | hover = HoverTool(renderers=[ns], tooltips=tooltip) 131 | plot.add_tools(hover, BoxZoomTool(), ResetTool(), 132 | WheelZoomTool(), SaveTool(), PanTool()) 133 | 134 | return plot 135 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/coefficients.csv: -------------------------------------------------------------------------------- 1 | ,Intercept,ph 2 | y0,-2.417821963898191,0.3566904413073933 3 | y1,-0.9052110511210674,0.049641573168426295 4 | y2,-0.9357918880737763,0.10783154573751495 5 | y3,-2.3133000514177455,0.3606910551421914 6 | y4,0.33748362746687366,0.025202276296260796 7 | y5,-1.7196040211996224,0.24445492171611835 8 | y6,0.7183771668620241,-0.09853010667696244 9 | y7,-1.2448131020654418,0.18461233905393876 10 | y8,0.0043281368780772956,0.017766196752291036 11 | y9,-8.888635895449163,1.4338978894784278 12 | y10,-0.1380162869555483,-0.027033261659450195 13 | y11,2.0081696710019457,-0.33669643064356075 14 | y12,-0.4458001655134542,0.052614929491326755 15 | y13,-2.5388789829593894,0.36269640192567715 16 | y14,-1.7009227515869187,0.2486995712072929 17 | y15,2.2342942376459476,-0.28137737848819855 18 | y16,0.9952936138338209,-0.1358283261966583 19 | y17,1.8985792038741323,-0.309574156534415 20 | y18,-0.5185397408352778,0.045702588530660976 21 | y19,0.29038527282402016,-0.031194965277184943 22 | y20,0.7927326302466351,-0.10810810411171143 23 | y21,-0.19862086569906898,0.09197269958700291 24 | y22,1.11695795500751,-0.15393879768283344 25 | y23,-1.9375935274988665,0.316771155039698 26 | y24,-1.5197127403457722,0.2466330689419788 27 | y25,-7.222697297003709,1.090728825447765 28 | y26,11.135550530286885,-1.842919462263025 29 | y27,-0.5969641090728304,0.11717486570967786 30 | y28,-0.5083320185752758,0.07391030315795062 31 | y29,-0.7113172797977424,0.12208714562398323 32 | y30,-0.4043710947066187,0.038640993551844874 33 | y31,2.731012073137618,-0.46159032232526753 34 | y32,3.965568487845451,-0.5957211835451143 35 | y33,-1.3843125783110992,0.2063797362105431 36 | y34,0.11186906658990858,0.09974027808792005 37 | y35,0.7458762279144321,-0.15991699299592574 38 | y36,0.5462379729323912,-0.12509925899594823 39 | y37,0.7715428127698168,-0.08374870435548773 40 | y38,-0.006490297265430415,0.006489381824971823 41 | y39,-1.1505876784891131,0.2419237573784568 42 | y40,1.4927158016341542,-0.22453611806483276 43 | y41,0.2362826246016099,-0.02382474060748356 44 | y42,-0.8199793918824666,0.14617589105266396 45 | y43,-1.2479231999016884,0.25900931458640947 46 | y44,1.6489193595737197,-0.2656311740355035 47 | y45,0.37513055994129435,-0.06575775407372958 48 | y46,-0.23235985768487274,0.04345932796883874 49 | y47,0.232413460376192,-0.04363490376686736 50 | y48,-0.9701856408728218,0.17462167769163456 51 | y49,0.08141413719517979,0.004396636449273949 52 | y50,1.8409060639108943,-0.29310915670058585 53 | y51,0.11107094311846646,-0.023971435949612815 54 | y52,-0.3288508216536449,0.04478598174234949 55 | y53,-1.950201188792175,0.27019388715883313 56 | y54,-0.21754527398648923,0.02567564793019846 57 | y55,0.5070344389950753,-0.08435530699541947 58 | y56,0.5469829898615253,-0.015692985885081844 59 | y57,-1.0651920370632453,0.10357010418282334 60 | y58,-0.3618051141258941,0.04083277567819005 61 | y59,0.7359516747239916,-0.1191674300733181 62 | y60,-0.27027205230119355,0.027219092575649903 63 | y61,0.11332657234338792,-0.037628480171609864 64 | y62,-0.054802386332120014,0.015438312126530043 65 | y63,0.17834351919623104,-0.0353004982881047 66 | y64,0.5021027013118613,-0.08149261323028857 67 | y65,-0.7317177817250978,0.11092985149299428 68 | y66,0.47637412333038126,-0.0035924721497009432 69 | y67,0.020606271402806065,0.009997371330490225 70 | y68,1.9221471293322256,-0.26752500098575666 71 | y69,1.17970241212005,-0.1641827048201078 72 | y70,-0.8184772546295412,0.12022121428082484 73 | y71,-0.12330798367923247,0.028963428511499006 74 | y72,-0.7043520903538832,0.12111737639867173 75 | y73,0.16468385036158828,-0.01491809942028498 76 | y74,-0.05458864419854579,0.008102836974669044 77 | y75,-1.3794040734816935,0.18755889639070997 78 | y76,1.2988084300034206,-0.17970824507379088 79 | y77,0.23804873279931538,-0.04967872251402735 80 | y78,1.3443871234443194,-0.1511514994081249 81 | y79,1.791956991985029,-0.31027551804655645 82 | y80,-0.2260027577677782,0.0352420930295071 83 | y81,-0.7315837744871326,0.09397099811752183 84 | y82,0.22342083868015558,-0.036252296752822454 85 | y83,0.9890597158215642,-0.12754719940053685 86 | y84,0.020923823752194582,0.006047398987785311 87 | y85,1.2659322933931254,-0.1790956481945273 88 | y86,0.5490840386384254,-0.09191494829380803 89 | y87,-0.854051259233914,0.11497150927490624 90 | y88,1.5442076690205047,-0.22477604150408292 91 | y89,0.05180071552460162,-0.04441923140965983 92 | y90,2.1702012325252618,-0.32509549154637923 93 | y91,-1.1438217763831038,0.18457225499582508 94 | y92,0.20182664476633647,-0.03617016003385909 95 | y93,0.32283178714269156,-0.07556082692785951 96 | y94,0.6624037908148538,-0.11691640268465277 97 | y95,0.12781936469114472,-0.05442398123766153 98 | y96,0.3225935214956588,-0.049489243690185744 99 | y97,0.05504548243442081,-0.0019830812889765153 100 | y98,-0.31634347553783554,0.03592855377461579 101 | y99,-0.20450768537043104,0.01647646243984905 102 | y100,0.5706627524394878,-0.07096218388437267 103 | y101,0.4481857221755603,-0.05229362634021477 104 | y102,1.4610714039897088,-0.1974608317058108 105 | y103,-0.5831225769898846,0.12847535822056447 106 | y104,-0.3778252764706474,0.05540222040774105 107 | y105,-0.10391591807003929,0.05028748957940854 108 | y106,-0.6458814418806339,0.0919046542929978 109 | y107,1.726022770127027,-0.23392347023971047 110 | y108,0.40707386320519157,-0.05445419955171094 111 | y109,1.7269767479591678,-0.22161951275880676 112 | y110,-0.3485150472085344,0.04398702248446438 113 | y111,-0.6674467573758245,0.22982383932854994 114 | y112,-1.5226066949789847,0.21540215886086916 115 | y113,-0.4682652952856936,0.06948694013844546 116 | y114,-0.5963751787826362,0.07423163157790996 117 | y115,0.18941501262895516,-0.003290492356183168 118 | y116,0.3319245569899489,-0.07497860531797448 119 | y117,-0.44905384789598235,0.059247252407698896 120 | -------------------------------------------------------------------------------- /gneiss/regression/tests/data/pvalues.csv: -------------------------------------------------------------------------------- 1 | ,Intercept,ph 2 | y0,2.0231382176562713e-07,1.5986797744717098e-06 3 | y1,0.06011885055018011,0.5200315964601321 4 | y2,0.016362183704805352,0.08459797560939894 5 | y3,2.711345677586785e-05,4.908287303929518e-05 6 | y4,0.37635229111205415,0.6823421422061464 7 | y5,0.0005396934480471485,0.0021766278626422775 8 | y6,0.07985080247891974,0.13639964193115275 9 | y7,5.369709631311585e-05,0.0001910744159765913 10 | y8,0.988225064070116,0.7080350094829028 11 | y9,4.409135649041548e-30,5.177944970965206e-30 12 | y10,0.6303272886698309,0.5600337884848634 13 | y11,3.128836166342658e-07,1.30963118803634e-07 14 | y12,0.1066787667914187,0.23735473121815692 15 | y13,1.295050642130945e-08,3.0887411253115263e-07 16 | y14,2.2367190437659e-05,0.00010972539901087592 17 | y15,1.1370780526586244e-11,3.000983940825702e-08 18 | y16,0.006185201230885478,0.02009040359692411 19 | y17,7.695994405895602e-05,6.775560403179732e-05 20 | y18,0.12150931064477158,0.39638796434776435 21 | y19,0.47269850766776533,0.6330303174570436 22 | y20,0.010308804348404794,0.02962236033751162 23 | y21,0.6217481615522058,0.15975695806591558 24 | y22,0.0025029093920176213,0.009470244972664433 25 | y23,5.771271567361451e-09,4.189942632303552e-09 26 | y24,1.6429321667207632e-05,1.54081456332786e-05 27 | y25,7.958076802929733e-18,2.2899968563086935e-16 28 | y26,1.9455705722308582e-18,5.777506242608944e-19 29 | y27,0.012384954189432178,0.002601398943639638 30 | y28,0.07759917873075849,0.11193438405921681 31 | y29,0.004749709484931084,0.0028015471036144045 32 | y30,0.18729661964979205,0.43434670921461993 33 | y31,8.055677022011245e-11,1.696061444790691e-11 34 | y32,3.302116870758148e-13,5.795117625440092e-12 35 | y33,1.3939285026652989e-05,5.424895386889316e-05 36 | y34,0.8709721145588748,0.3715329255621058 37 | y35,0.00021920050748216926,1.8379424591619849e-06 38 | y36,0.16360444034078986,0.04973415032091874 39 | y37,0.01355347015225425,0.09429934604574804 40 | y38,0.9731249027798142,0.8350011803300309 41 | y39,0.00031226279875793295,4.677092902076776e-06 42 | y40,0.0015954833779699596,0.003203703162063911 43 | y41,0.18614915413516903,0.40828378488135897 44 | y42,0.004251445104840961,0.001709061310481806 45 | y43,1.703359098557291e-05,8.552286564044924e-08 46 | y44,1.2426600622226377e-05,1.328985290260878e-05 47 | y45,0.17551299912142332,0.14230620069990005 48 | y46,0.35489045213483716,0.28496905252631144 49 | y47,0.16602735174894406,0.10845519956226542 50 | y48,0.006427527916242203,0.002539622003724941 51 | y49,0.75368004235738,0.9165150791122991 52 | y50,0.005771991467566583,0.006530235615020178 53 | y51,0.6017802273926969,0.48637481410913097 54 | y52,0.26759490722527424,0.3499562720950433 55 | y53,8.924597332512463e-06,0.00011325234507231189 56 | y54,0.31949290114412354,0.466871596076887 57 | y55,0.09013458423408123,0.08136538477870153 58 | y56,0.0816663196354404,0.7553993031914661 59 | y57,0.004433194199059207,0.08250281174037756 60 | y58,0.2290113551466194,0.40014959567649544 61 | y59,0.018282435042915796,0.01812677919937224 62 | y60,0.32371878164778645,0.5381075053380269 63 | y61,0.6958683825610459,0.42280243487177394 64 | y62,0.8206811909928219,0.6930044395685215 65 | y63,0.3361523253936626,0.2396769106368928 66 | y64,0.07411406118822376,0.0730594671582051 67 | y65,0.0008487512443398987,0.0016913290798711417 68 | y66,0.08156930966743642,0.9346993696415715 69 | y67,0.9536637593962555,0.8616063078659033 70 | y68,5.764697472006051e-07,1.1980943688674127e-05 71 | y69,3.469880515887769e-08,1.251969770259889e-06 72 | y70,0.0050466333460564726,0.01056032847691737 73 | y71,0.5824003403194246,0.42486627824141254 74 | y72,0.020027189607415154,0.013568182783744916 75 | y73,0.5313886104027061,0.725662026927302 76 | y74,0.8883364466894954,0.8974424613269687 77 | y75,0.0004461288566022144,0.0028619022760101014 78 | y76,3.957183581647853e-06,6.0947195212273386e-05 79 | y77,0.4332278891006891,0.3123717778815628 80 | y78,0.0016576279361801888,0.026496104629617434 81 | y79,0.0007138976393654403,0.00030933688733006414 82 | y80,0.28891981081632123,0.30633969607425043 83 | y81,0.04627436056711397,0.11190946808988014 84 | y82,0.4988366152669681,0.4973509989614011 85 | y83,5.783715711833836e-08,8.475043164083101e-06 86 | y84,0.9336421911918483,0.8816971497803062 87 | y85,0.0004047193067072164,0.0018101551247001204 88 | y86,0.13752630011858602,0.12436393090433022 89 | y87,0.0007949699924884687,0.004807041210853434 90 | y88,1.681321342801181e-06,1.2957462590935792e-05 91 | y89,0.8441422899161682,0.2985832666575177 92 | y90,4.5606670103847854e-08,3.0324380533746814e-07 93 | y91,0.000679688358488179,0.0006962171212845777 94 | y92,0.5530261279307382,0.5109620405114144 95 | y93,0.07684438987080362,0.011188109318831293 96 | y94,0.012222421854624525,0.0064113611717861 97 | y95,0.6863133156452363,0.2888980046697222 98 | y96,0.3378152841800086,0.3630338526064949 99 | y97,0.8089947540821971,0.957043446828022 100 | y98,0.1803712958932434,0.3454962628351098 101 | y99,0.4710487985349683,0.7192012832165596 102 | y100,0.2037978320656817,0.3275210105710409 103 | y101,0.07838268739124357,0.2021477449146662 104 | y102,6.275550928468192e-10,1.0081774647797712e-07 105 | y103,0.03602584131371042,0.004694931406947464 106 | y104,0.2205716136177221,0.26625549453712816 107 | y105,0.648991144530094,0.17514504906365966 108 | y106,0.03191994202863099,0.05829444014664721 109 | y107,9.159580575936758e-07,2.7343819297797546e-05 110 | y108,0.20615265960503892,0.29493372156085573 111 | y109,3.9957320068973616e-08,7.122719359440899e-06 112 | y110,0.1601883874809344,0.2719299929048468 113 | y111,0.057124882232972594,9.02276720234949e-05 114 | y112,8.83353387262592e-05,0.0005319623291459005 115 | y113,0.07855307166119041,0.10596204097226382 116 | y114,0.09814960073925237,0.2015466315199174 117 | y115,0.46208377440008475,0.9369296099174003 118 | y116,0.4032484957728807,0.24383860569835114 119 | y117,0.05493605762180559,0.11604625913417174 120 | -------------------------------------------------------------------------------- /gneiss/regression/_model.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import abc 9 | import pandas as pd 10 | from skbio.stats.composition import ilr_inv 11 | from gneiss._model import Model 12 | from gneiss.balances import balance_basis 13 | 14 | 15 | class RegressionModel(Model): 16 | def __init__(self, *args, **kwargs): 17 | """ 18 | Summary object for storing regression results. 19 | 20 | A `RegressionResults` object stores information about the 21 | individual balances used in the regression, the coefficients, 22 | residuals. This object can be used to perform predictions. 23 | In addition, summary statistics such as the coefficient 24 | of determination for the overall fit can be calculated. 25 | 26 | Parameters 27 | ---------- 28 | submodels : list of statsmodels objects 29 | List of statsmodels result objects. 30 | balances : pd.DataFrame 31 | A table of balances where samples are rows and 32 | balances are columns. These balances were calculated 33 | using `tree`. 34 | """ 35 | self._beta = None 36 | self._resid = None 37 | self._fitted = False 38 | super().__init__(*args, **kwargs) 39 | # there is only one design matrix for regression 40 | self.design_matrix = self.design_matrices 41 | 42 | def coefficients(self, tree=None): 43 | """ Returns coefficients from fit. 44 | 45 | Parameters 46 | ---------- 47 | tree : skbio.TreeNode, optional 48 | The tree used to perform the ilr transformation. If this 49 | is specified, then the prediction will be represented as 50 | proportions. Otherwise, if this is not specified, the prediction 51 | will be represented as balances. (default: None). 52 | 53 | Returns 54 | ------- 55 | pd.DataFrame 56 | A table of coefficients where rows are covariates, 57 | and the columns are balances. If `tree` is specified, then 58 | the columns are proportions. 59 | """ 60 | if not self._fitted: 61 | ValueError(('Model not fitted - coefficients not calculated.' 62 | 'See `fit()`')) 63 | coef = self._beta 64 | if tree is not None: 65 | basis, _ = balance_basis(tree) 66 | c = ilr_inv(coef.values, basis=basis) 67 | ids = [n.name for n in tree.tips()] 68 | return pd.DataFrame(c, columns=ids, index=coef.index) 69 | else: 70 | return coef 71 | 72 | def residuals(self, tree=None): 73 | """ Returns calculated residuals from fit. 74 | 75 | Parameters 76 | ---------- 77 | X : pd.DataFrame, optional 78 | Input table of covariates. If not specified, then the 79 | fitted values calculated from training the model will be 80 | returned. 81 | tree : skbio.TreeNode, optional 82 | The tree used to perform the ilr transformation. If this 83 | is specified, then the prediction will be represented 84 | as proportions. Otherwise, if this is not specified, 85 | the prediction will be represented as balances. (default: None). 86 | 87 | Returns 88 | ------- 89 | pd.DataFrame 90 | A table of residuals where rows are covariates, 91 | and the columns are balances. If `tree` is specified, then 92 | the columns are proportions. 93 | 94 | References 95 | ---------- 96 | .. [1] Aitchison, J. "A concise guide to compositional data analysis, 97 | CDA work." Girona 24 (2003): 73-81. 98 | """ 99 | if not self._fitted: 100 | ValueError(('Model not fitted - coefficients not calculated.' 101 | 'See `fit()`')) 102 | resid = self._resid 103 | if tree is not None: 104 | basis, _ = balance_basis(tree) 105 | proj_resid = ilr_inv(resid.values, basis=basis) 106 | ids = [n.name for n in tree.tips()] 107 | return pd.DataFrame(proj_resid, 108 | columns=ids, 109 | index=resid.index) 110 | else: 111 | return resid 112 | 113 | @abc.abstractmethod 114 | def predict(self, X=None, tree=None, **kwargs): 115 | """ Performs a prediction based on model. 116 | 117 | Parameters 118 | ---------- 119 | X : pd.DataFrame, optional 120 | Input table of covariates, where columns are covariates, and 121 | rows are samples. If not specified, then the fitted values 122 | calculated from training the model will be returned. 123 | tree : skbio.TreeNode, optional 124 | The tree used to perform the ilr transformation. If this 125 | is specified, then the prediction will be represented 126 | as proportions. Otherwise, if this is not specified, 127 | the prediction will be represented as balances. (default: None). 128 | **kwargs : dict 129 | Other arguments to be passed into the model prediction. 130 | 131 | Returns 132 | ------- 133 | pd.DataFrame 134 | A table of predicted values where rows are covariates, 135 | and the columns are balances. If `tree` is specified, then 136 | the columns are proportions. 137 | 138 | """ 139 | if not self._fitted: 140 | ValueError(('Model not fitted - coefficients not calculated.' 141 | 'See `fit()`')) 142 | if X is None: 143 | X = self.design_matrices 144 | 145 | prediction = X.dot(self._beta) 146 | if tree is not None: 147 | basis, _ = balance_basis(tree) 148 | proj_prediction = ilr_inv(prediction.values, basis=basis) 149 | ids = [n.name for n in tree.tips()] 150 | return pd.DataFrame(proj_prediction, 151 | columns=ids, 152 | index=prediction.index) 153 | else: 154 | return prediction 155 | -------------------------------------------------------------------------------- /gneiss/regression/tests/test_mixedlm.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import os 9 | import shutil 10 | import numpy as np 11 | import pandas as pd 12 | import pandas.util.testing as pdt 13 | import unittest 14 | from skbio import TreeNode 15 | from gneiss.regression import mixedlm 16 | 17 | 18 | class TestMixedLM(unittest.TestCase): 19 | 20 | def setUp(self): 21 | np.random.seed(6241) 22 | n = 1600 23 | exog = np.random.normal(size=(n, 2)) 24 | groups = np.kron(np.arange(n // 16), np.ones(16)) 25 | 26 | # Build up the random error vector 27 | errors = 0 28 | 29 | # The random effects 30 | exog_re = np.random.normal(size=(n, 2)) 31 | slopes = np.random.normal(size=(n // 16, 2)) 32 | slopes = np.kron(slopes, np.ones((16, 1))) * exog_re 33 | errors += slopes.sum(1) 34 | 35 | # First variance component 36 | errors += np.kron(2 * np.random.normal(size=n // 4), np.ones(4)) 37 | 38 | # Second variance component 39 | errors += np.kron(2 * np.random.normal(size=n // 2), np.ones(2)) 40 | 41 | # iid errors 42 | errors += np.random.normal(size=n) 43 | 44 | endog = exog.sum(1) + errors 45 | 46 | df = pd.DataFrame(index=range(n)) 47 | df["y1"] = endog 48 | df["y2"] = endog + 2 * 2 49 | df["groups"] = groups 50 | df["x1"] = exog[:, 0] 51 | df["x2"] = exog[:, 1] 52 | 53 | self.tree = TreeNode.read(['(c, (b,a)y2)y1;']) 54 | self.table = df[["y1", "y2"]] 55 | self.metadata = df[['x1', 'x2', 'groups']] 56 | 57 | # for testing the plugins 58 | self.results = "results" 59 | if not os.path.exists(self.results): 60 | os.mkdir(self.results) 61 | 62 | def tearDown(self): 63 | shutil.rmtree(self.results) 64 | 65 | 66 | class TestMixedLMFunctions(TestMixedLM): 67 | 68 | def test_mixedlm_balances(self): 69 | 70 | res = mixedlm("x1 + x2", self.table, self.metadata, 71 | groups="groups") 72 | res.fit() 73 | exp_pvalues = pd.DataFrame( 74 | [[0.0994110906314, 4.4193804e-05, 3.972325e-35, 3.568599e-30], 75 | [4.82688604e-236, 4.4193804e-05, 3.972325e-35, 3.568599e-30]], 76 | index=['y1', 'y2'], 77 | columns=['Intercept', 'Group Var', 'x1', 'x2']).T 78 | 79 | res_pvals = res.pvalues.sort_index(axis=0).sort_index(axis=1) 80 | exp_pvals = exp_pvalues.sort_index(axis=0).sort_index(axis=1) 81 | 82 | pdt.assert_frame_equal(res_pvals, exp_pvals, 83 | check_less_precise=True) 84 | 85 | exp_coefficients = pd.DataFrame( 86 | [[0.211451, 0.0935786, 1.022008, 0.924873], 87 | [4.211451, 0.0935786, 1.022008, 0.924873]], 88 | columns=['Intercept', 'Group Var', 'x1', 'x2'], 89 | index=['y1', 'y2']).sort_index().T 90 | res_coef = res.coefficients().sort_index(axis=0).sort_index(axis=1) 91 | exp_coef = exp_coefficients.sort_index(axis=0).sort_index(axis=1) 92 | 93 | pdt.assert_frame_equal(res_coef, exp_coef, 94 | check_less_precise=True) 95 | 96 | def test_mixedlm_balances_vcf(self): 97 | np.random.seed(6241) 98 | n = 1600 99 | exog = np.random.normal(size=(n, 2)) 100 | groups = np.kron(np.arange(n // 16), np.ones(16)) 101 | 102 | # Build up the random error vector 103 | errors = 0 104 | 105 | # The random effects 106 | exog_re = np.random.normal(size=(n, 2)) 107 | slopes = np.random.normal(size=(n // 16, 2)) 108 | slopes = np.kron(slopes, np.ones((16, 1))) * exog_re 109 | errors += slopes.sum(1) 110 | 111 | # First variance component 112 | subgroups1 = np.kron(np.arange(n // 4), np.ones(4)) 113 | errors += np.kron(2 * np.random.normal(size=n // 4), np.ones(4)) 114 | 115 | # Second variance component 116 | subgroups2 = np.kron(np.arange(n // 2), np.ones(2)) 117 | errors += np.kron(2 * np.random.normal(size=n // 2), np.ones(2)) 118 | 119 | # iid errors 120 | errors += np.random.normal(size=n) 121 | 122 | endog = exog.sum(1) + errors 123 | 124 | df = pd.DataFrame(index=range(n)) 125 | df["y1"] = endog 126 | df["y2"] = endog + 2 * 2 127 | df["groups"] = groups 128 | df["x1"] = exog[:, 0] 129 | df["x2"] = exog[:, 1] 130 | df["z1"] = exog_re[:, 0] 131 | df["z2"] = exog_re[:, 1] 132 | df["v1"] = subgroups1 133 | df["v2"] = subgroups2 134 | 135 | table = df[["y1", "y2"]] 136 | metadata = df[['x1', 'x2', 'z1', 'z2', 'v1', 'v2', 'groups']] 137 | 138 | res = mixedlm("x1 + x2", table, metadata, groups="groups", 139 | re_formula="0+z1+z2") 140 | res.fit() 141 | 142 | exp_pvalues = pd.DataFrame([ 143 | [0.038015, 3.858750e-39, 2.245068e-33, 144 | 2.552217e-05, 0.923418, 6.645741e-34], 145 | [0.000000, 3.858750e-39, 2.245068e-33, 146 | 2.552217e-05, 0.923418, 6.645741e-34]], 147 | columns=['Intercept', 'x1', 'x2', 'z1 Var', 148 | 'z1 x z2 Cov', 'z2 Var'], 149 | index=['y1', 'y2']).T 150 | 151 | exp_coefficients = pd.DataFrame( 152 | [[0.163141, 1.030013, 0.935514, 0.115082, -0.001962, 0.14792], 153 | [4.163141, 1.030013, 0.935514, 0.115082, -0.001962, 0.14792]], 154 | columns=['Intercept', 'x1', 'x2', 'z1 Var', 155 | 'z1 x z2 Cov', 'z2 Var'], 156 | index=['y1', 'y2']).T 157 | 158 | pdt.assert_frame_equal(res.pvalues.sort_index(axis=0), 159 | exp_pvalues.sort_index(axis=0), 160 | check_less_precise=True) 161 | 162 | pdt.assert_frame_equal(res.coefficients().sort_index(axis=0), 163 | exp_coefficients.sort_index(axis=0), 164 | check_less_precise=True) 165 | 166 | def test_percent_explained(self): 167 | model = mixedlm("x1 + x2", self.table, self.metadata, 168 | groups="groups") 169 | 170 | model.fit() 171 | res = model.percent_explained() 172 | exp = pd.Series([0.5, 0.5], index=['y1', 'y2']) 173 | pdt.assert_series_equal(res, exp, check_less_precise=True) 174 | 175 | 176 | if __name__ == '__main__': 177 | unittest.main() 178 | -------------------------------------------------------------------------------- /gneiss/tests/test_balances.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | 9 | from __future__ import absolute_import, division, print_function 10 | import unittest 11 | import numpy as np 12 | import numpy.testing as npt 13 | from gneiss.balances import (balance_basis, _count_matrix, 14 | _balance_basis, sparse_balance_basis) 15 | from skbio import TreeNode 16 | from skbio.util import get_data_path 17 | from scipy.sparse import coo_matrix 18 | 19 | 20 | def assert_coo_allclose(res, exp, rtol=1e-7, atol=1e-7): 21 | res_data = np.vstack((res.row, res.col, res.data)).T 22 | exp_data = np.vstack((exp.row, exp.col, exp.data)).T 23 | 24 | # sort by row and col 25 | res_data = res_data[res_data[:, 1].argsort()] 26 | res_data = res_data[res_data[:, 0].argsort()] 27 | exp_data = exp_data[exp_data[:, 1].argsort()] 28 | exp_data = exp_data[exp_data[:, 0].argsort()] 29 | npt.assert_allclose(res_data, exp_data, rtol=rtol, atol=atol) 30 | 31 | 32 | class TestSparseBalances(unittest.TestCase): 33 | 34 | def test_sparse_balance_basis_base_case(self): 35 | tree = u"(a,b);" 36 | t = TreeNode.read([tree]) 37 | 38 | exp_basis = coo_matrix( 39 | np.array([[-np.sqrt(1. / 2), 40 | np.sqrt(1. / 2)]])) 41 | exp_keys = [t.name] 42 | res_basis, res_keys = sparse_balance_basis(t) 43 | 44 | assert_coo_allclose(exp_basis, res_basis) 45 | self.assertListEqual(exp_keys, res_keys) 46 | 47 | def test_sparse_balance_basis_invalid(self): 48 | with self.assertRaises(ValueError): 49 | tree = u"(a,b,c);" 50 | t = TreeNode.read([tree]) 51 | sparse_balance_basis(t) 52 | 53 | def test_sparse_balance_basis_unbalanced(self): 54 | tree = u"((a,b)c, d);" 55 | t = TreeNode.read([tree]) 56 | exp_basis = coo_matrix(np.array( 57 | [[-np.sqrt(1. / 6), -np.sqrt(1. / 6), np.sqrt(2. / 3)], 58 | [-np.sqrt(1. / 2), np.sqrt(1. / 2), 0]] 59 | )) 60 | exp_keys = [t.name, t[0].name] 61 | res_basis, res_keys = sparse_balance_basis(t) 62 | 63 | assert_coo_allclose(exp_basis, res_basis) 64 | self.assertListEqual(exp_keys, res_keys) 65 | 66 | def test_sparse_balance_basis_unbalanced2(self): 67 | tree = u"(d, (a,b)c);" 68 | 69 | t = TreeNode.read([tree]) 70 | 71 | exp_basis = coo_matrix(np.array( 72 | [ 73 | [-np.sqrt(2. / 3), np.sqrt(1. / 6), np.sqrt(1. / 6)], 74 | [0, -np.sqrt(1. / 2), np.sqrt(1. / 2)] 75 | ] 76 | )) 77 | 78 | exp_keys = [t.name, t[1].name] 79 | res_basis, res_keys = sparse_balance_basis(t) 80 | assert_coo_allclose(exp_basis, res_basis, atol=1e-7, rtol=1e-7) 81 | self.assertListEqual(exp_keys, res_keys) 82 | 83 | 84 | class TestBalances(unittest.TestCase): 85 | 86 | def test_count_matrix_base_case(self): 87 | tree = u"(a,b);" 88 | t = TreeNode.read([tree]) 89 | res, _ = _count_matrix(t) 90 | exp = {'k': 0, 'l': 1, 'r': 1, 't': 0, 'tips': 2} 91 | self.assertEqual(res[t], exp) 92 | 93 | exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1} 94 | self.assertEqual(res[t[0]], exp) 95 | 96 | exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1} 97 | self.assertEqual(res[t[1]], exp) 98 | 99 | def test_count_matrix_unbalanced(self): 100 | tree = u"((a,b)c, d);" 101 | t = TreeNode.read([tree]) 102 | res, _ = _count_matrix(t) 103 | 104 | exp = {'k': 0, 'l': 2, 'r': 1, 't': 0, 'tips': 3} 105 | self.assertEqual(res[t], exp) 106 | exp = {'k': 1, 'l': 1, 'r': 1, 't': 0, 'tips': 2} 107 | self.assertEqual(res[t[0]], exp) 108 | 109 | exp = {'k': 0, 'l': 0, 'r': 0, 't': 0, 'tips': 1} 110 | self.assertEqual(res[t[1]], exp) 111 | self.assertEqual(res[t[0][0]], exp) 112 | self.assertEqual(res[t[0][1]], exp) 113 | 114 | def test_count_matrix_singleton_error(self): 115 | with self.assertRaises(ValueError): 116 | tree = u"(((a,b)c, d)root);" 117 | t = TreeNode.read([tree]) 118 | _count_matrix(t) 119 | 120 | def test_count_matrix_trifurcating_error(self): 121 | with self.assertRaises(ValueError): 122 | tree = u"((a,b,e)c, d);" 123 | t = TreeNode.read([tree]) 124 | _count_matrix(t) 125 | 126 | def test__balance_basis_base_case(self): 127 | tree = u"(a,b);" 128 | t = TreeNode.read([tree]) 129 | 130 | exp_basis = np.array([[-np.sqrt(1. / 2), np.sqrt(1. / 2)]]) 131 | exp_keys = [t.name] 132 | res_basis, res_keys = _balance_basis(t) 133 | 134 | npt.assert_allclose(exp_basis, res_basis) 135 | self.assertListEqual(exp_keys, res_keys) 136 | 137 | def test__balance_basis_unbalanced(self): 138 | tree = u"((a,b)c, d);" 139 | t = TreeNode.read([tree]) 140 | 141 | exp_basis = np.array( 142 | [[-np.sqrt(1. / 6), -np.sqrt(1. / 6), np.sqrt(2. / 3)], 143 | [-np.sqrt(1. / 2), np.sqrt(1. / 2), 0]] 144 | ) 145 | exp_keys = [t.name, t[0].name] 146 | res_basis, res_keys = _balance_basis(t) 147 | 148 | npt.assert_allclose(exp_basis, res_basis) 149 | self.assertListEqual(exp_keys, res_keys) 150 | 151 | def test_balance_basis_base_case(self): 152 | tree = u"(a,b);" 153 | t = TreeNode.read([tree]) 154 | exp_keys = [t.name] 155 | exp_basis = np.array([0.19557032, 0.80442968]) 156 | res_basis, res_keys = balance_basis(t) 157 | 158 | npt.assert_allclose(exp_basis, res_basis) 159 | self.assertListEqual(exp_keys, res_keys) 160 | 161 | def test_balance_basis_unbalanced(self): 162 | tree = u"((a,b)c, d);" 163 | t = TreeNode.read([tree]) 164 | exp_keys = [t.name, t[0].name] 165 | exp_basis = np.array([[0.18507216, 0.18507216, 0.62985567], 166 | [0.14002925, 0.57597535, 0.28399541]]) 167 | 168 | res_basis, res_keys = balance_basis(t) 169 | 170 | npt.assert_allclose(exp_basis, res_basis) 171 | self.assertListEqual(exp_keys, list(res_keys)) 172 | 173 | def test_balance_basis_large1(self): 174 | fname = get_data_path('large_tree.nwk', 175 | subfolder='data') 176 | t = TreeNode.read(fname) 177 | # note that the basis is in reverse level order 178 | exp_basis = np.loadtxt( 179 | get_data_path('large_tree_basis.txt', 180 | subfolder='data')) 181 | res_basis, res_keys = balance_basis(t) 182 | npt.assert_allclose(exp_basis[:, ::-1], res_basis) 183 | 184 | 185 | if __name__ == "__main__": 186 | unittest.main() 187 | -------------------------------------------------------------------------------- /gneiss/plot/tests/test_regression_plot.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # Copyright (c) 2016--, gneiss development team. 3 | # 4 | # Distributed under the terms of the Modified BSD License. 5 | # 6 | # The full license is in the file COPYING.txt, distributed with this software. 7 | # ---------------------------------------------------------------------------- 8 | import unittest 9 | import os 10 | import shutil 11 | 12 | import numpy as np 13 | import pandas as pd 14 | import numpy.testing as npt 15 | 16 | from skbio import TreeNode 17 | from skbio.util import get_data_path 18 | 19 | from gneiss.plot._regression_plot import ols_summary, lme_summary 20 | from gneiss.regression import ols, mixedlm 21 | 22 | 23 | class TestOLS_Summary(unittest.TestCase): 24 | 25 | def setUp(self): 26 | A = np.array # aliasing for the sake of pep8 27 | self.table = pd.DataFrame({ 28 | 's1': A([1., 1.]), 29 | 's2': A([1., 2.]), 30 | 's3': A([1., 3.]), 31 | 's4': A([1., 4.]), 32 | 's5': A([1., 5.])}, 33 | index=['Y2', 'Y1']).T 34 | self.tree = TreeNode.read(['(c, (b,a)Y2)Y1;']) 35 | self.metadata = pd.DataFrame({ 36 | 'lame': [1, 1, 1, 1, 1], 37 | 'real': [1, 2, 3, 4, 5] 38 | }, index=['s1', 's2', 's3', 's4', 's5']) 39 | 40 | np.random.seed(0) 41 | n = 15 42 | a = np.array([1, 4.2, 5.3, -2.2, 8]) 43 | x1 = np.linspace(.01, 0.1, n) 44 | x2 = np.logspace(0, 0.01, n) 45 | x3 = np.exp(np.linspace(0, 0.01, n)) 46 | x4 = x1 ** 2 47 | self.x = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4}) 48 | n__ = np.random.normal(size=n) 49 | y = a[0] + a[1] * x1 + a[2] * x2 + a[3] * x3 + a[4] * x4 + n__ 50 | sy = np.vstack((-y / 10, -y)).T 51 | self.y = pd.DataFrame(sy, columns=['y0', 'y1']) 52 | self.t2 = TreeNode.read([r"((a,b)y1,c)y0;"]) 53 | 54 | self.results = "results" 55 | os.mkdir(self.results) 56 | 57 | def tearDown(self): 58 | shutil.rmtree(self.results) 59 | 60 | @unittest.skip('Visualizations are deprecated') 61 | def test_visualization(self): 62 | res = ols(formula="x1 + x2 + x3 + x4", 63 | table=self.y, metadata=self.x) 64 | res.fit() 65 | 66 | ols_summary(self.results, res, tree=self.t2) 67 | fp = os.path.join(self.results, 'pvalues.csv') 68 | self.assertTrue(os.path.exists(fp)) 69 | fp = os.path.join(self.results, 'coefficients.csv') 70 | self.assertTrue(os.path.exists(fp)) 71 | fp = os.path.join(self.results, 'predicted.csv') 72 | self.assertTrue(os.path.exists(fp)) 73 | fp = os.path.join(self.results, 'residuals.csv') 74 | self.assertTrue(os.path.exists(fp)) 75 | 76 | index_fp = os.path.join(self.results, 'index.html') 77 | self.assertTrue(os.path.exists(index_fp)) 78 | 79 | with open(index_fp, 'r') as fh: 80 | html = fh.read() 81 | self.assertIn('