├── .flake8
├── .github
    └── workflows
    │   └── tests.yml
├── .gitignore
├── .readthedocs.yaml
├── .travis.yml
├── LICENSE.txt
├── MANIFEST.in
├── Makefile
├── README.md
├── bin
    ├── generate_build.sh
    └── test_mpi.sh
├── codecov.yaml
├── compat
    └── win32
    │   └── stdint.h
├── docs
    ├── Makefile
    └── source
    │   ├── api.rst
    │   ├── art
    │       ├── pyuoi.pdf
    │       └── pyuoi.png
    │   ├── conf.py
    │   ├── contributing.rst
    │   ├── index.rst
    │   ├── installation.rst
    │   ├── introduction.rst
    │   ├── mpi.rst
    │   └── pyuoi
    │       ├── datasets
    │           └── datasets.rst
    │       ├── decomposition
    │           └── decomposition.rst
    │       ├── linear_model
    │           └── linear_model.rst
    │       ├── mpi_utils.rst
    │       └── utils.rst
├── examples
    ├── README.rst
    ├── plot_swimmer.py
    └── plot_uoi_lasso.py
├── liblbfgs
    ├── COPYING
    ├── README
    ├── arithmetic_ansi.h
    ├── arithmetic_sse_double.h
    ├── arithmetic_sse_float.h
    ├── lbfgs.c
    └── lbfgs.h
├── paper
    ├── paper.bib
    └── paper.md
├── pyproject.toml
├── pytest.ini
├── requirements-dev.txt
├── requirements.txt
├── setup.py
├── src
    └── pyuoi
    │   ├── __init__.py
    │   ├── data
    │       └── Swimmer.h5
    │   ├── datasets
    │       └── __init__.py
    │   ├── decomposition
    │       ├── CUR.py
    │       ├── NMF.py
    │       ├── __init__.py
    │       ├── base.py
    │       └── utils.py
    │   ├── lbfgs
    │       ├── LICENSE
    │       ├── __init__.py
    │       └── _lowlevel.pyx
    │   ├── linear_model
    │       ├── __init__.py
    │       ├── base.py
    │       ├── elasticnet.py
    │       ├── lasso.py
    │       ├── logistic.py
    │       ├── poisson.py
    │       ├── scikit-learn_license
    │       └── utils.py
    │   ├── mpi_utils.py
    │   └── utils.py
└── tests
    ├── test_cur.py
    ├── test_elasticnet.py
    ├── test_lbfgs.py
    ├── test_mpi
        ├── __init__.py
        ├── test_mpi_uoi_linear_model.py
        └── test_mpi_utils.py
    ├── test_nmf.py
    ├── test_poisson.py
    ├── test_scores.py
    ├── test_uoi_l1logistic.py
    ├── test_uoi_lasso.py
    └── test_utils.py


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E401,W503,W504
3 | max-line-length = 100
4 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: pyuoi_tests
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |   pull_request:
 7 |     branches:
 8 |       - main
 9 | jobs:
10 |   run-tests:
11 |     runs-on: ${{ matrix.os  }}
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version: ["3.7", "3.8", "3.9"]
16 |         os: [ubuntu-latest, macOS-latest]
17 |     steps:
18 |       - name: Test pyuoi
19 |         uses: actions/checkout@v3
20 |       - name: Set up Python ${{ matrix.python-version }}
21 |         uses: actions/setup-python@v3
22 |         with:
23 |           python-version: ${{ matrix.python-version }}
24 |       - name: Install dependencies
25 |         run: |
26 | 
27 |           python -m pip install --upgrade pip
28 |           python -m pip install -r requirements-dev.txt
29 |           if [ "${{matrix.os}}" = "ubuntu-latest" ]; then
30 |             sudo apt-get update
31 |             sudo apt-get install -y openmpi-bin libopenmpi-dev gcc
32 |             python -m pip install mpi4py
33 |           fi
34 |           python -m pip install codecov pytest-cov pycasso
35 |           python -m pip install -e .
36 |       - name: Lint with flake8
37 |         run: |
38 |           python -m flake8 src/pyuoi tests examples
39 |       - name: Test with pytest
40 |         run: |
41 |           python -m pytest -sv --cov=./ tests
42 |       - name: Build docs
43 |         run: |
44 |           sphinx-build -b html docs/source docs/build
45 |       - name: Codecov
46 |         run: |
47 |           python -m codecov
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # pdfs
  2 | *.pdf
  3 | 
  4 | # docs
  5 | docs/
  6 | 
  7 | # pip wheel metadata
  8 | pip-wheel-metadata/
  9 | 
 10 | # Pycharm settings files
 11 | .idea
 12 | 
 13 | # Byte-compiled / optimized / DLL files
 14 | __pycache__/
 15 | *.py[cod]
 16 | *$py.class
 17 | 
 18 | # C extensions
 19 | *.so
 20 | 
 21 | # Distribution / packaging
 22 | .Python
 23 | env/
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *,cover
 58 | .hypothesis/
 59 | .pytest_cache
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | target/
 81 | 
 82 | # IPython Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # celery beat schedule file
 89 | celerybeat-schedule
 90 | 
 91 | # dotenv
 92 | .env
 93 | 
 94 | # virtualenv
 95 | venv/
 96 | ENV/
 97 | 
 98 | # Spyder project settings
 99 | .spyderproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # Macs
105 | .DS_Store
106 | 
107 | # VS Code
108 | .vscode
109 | 
110 | # lbfgs solver stuff
111 | pyuoi/lbfgs/_lowlevel.c
112 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Optionally set the version of Python and requirements required to build your docs
 9 | python:
10 |   install:
11 |     - requirements: requirements-dev.txt
12 |     - method: pip
13 |       path: .
14 |       extra_requirements:
15 |            - dev
16 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | os: linux
 3 | python:
 4 |   - 3.6
 5 |   - 3.7
 6 |   - 3.8
 7 | matrix:
 8 |   include:
 9 |     - os: osx
10 |       language: generic
11 |       env: PYTHON=3.7.4
12 |       addons:
13 |         homebrew:
14 |           packages:
15 |             - openmpi
16 |             - pyenv-virtualenv
17 | before_install:
18 |     - |
19 |       if [ "$TRAVIS_OS_NAME" = "osx" ]; then
20 |         pyenv install $PYTHON
21 |         export PYENV_VERSION=$PYTHON
22 |         export PATH="/Users/travis/.pyenv/shims:${PATH}"
23 |         pyenv virtualenv venv
24 |         source /Users/travis/.pyenv/versions/3.7.4/envs/venv/bin/activate
25 |       fi
26 |     - |
27 |       if [ "$TRAVIS_OS_NAME" = "linux" ]; then
28 |         sudo apt-get update
29 |         sudo apt-get install -y openmpi-bin libopenmpi-dev gcc
30 |       fi
31 | install:
32 |     - pip install -r requirements-dev.txt
33 |     - pip install codecov
34 |     - pip install mpi4py
35 |     - pip install pycasso
36 |     - python setup.py build
37 |     - python setup.py develop
38 |     - pip install pytest-cov
39 | script:
40 |     - flake8 pyuoi tests examples
41 |     - pytest --cov=./ tests
42 |     - sphinx-build -W -b html docs/source docs/build
43 | after_success:
44 |     - codecov
45 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | PyUoI Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy).  All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 4 | 
 5 | (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 6 | 
 7 | (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 8 | 
 9 | (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 | 
11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 | 
13 | You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a  non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form.
14 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include pyproject.toml
 2 | include requirements.txt
 3 | include requirements-dev.txt
 4 | include LICENSE.txt
 5 | include liblbfgs/*.h
 6 | include liblbfgs/*.c
 7 | include pyuoi/lbfgs/*.pyx
 8 | exclude pyuoi/lbfgs/*.c
 9 | recursive-include test* *.py
10 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHON = python
 2 | FLAKE = flake8
 3 | COVERAGE = coverage
 4 | 
 5 | help:
 6 | 	@echo "Please use \`make <target>' where <target> is one of"
 7 | 	@echo "  init           to install required packages"
 8 | 	@echo "  build          to build the python package(s)"
 9 | 	@echo "  install        to build and install the python package(s)"
10 | 	@echo "  develop        to build and install the python package(s) for development"
11 | 	@echo "  test           to run all integration and unit tests"
12 | 	@echo "  htmldoc        to make the HTML documentation and open it with the default browser"
13 | 	@echo "  coverage       to run tests, build coverage HTML report and open it with the default browser"
14 | 	@echo ""
15 | 	@echo "Advanced targets"
16 | 	@echo "  apidoc         to generate API docs *.rst files from sources"
17 | 	@echo "  coverage-only  to run tests and build coverage report"
18 | 	@echo "  coverage-open  to open coverage HTML report in the default browser"
19 | 	@echo "  htmlclean      to remove all generated documentation"
20 | 	@echo "  htmldoc-only   to make the HTML documentation"
21 | 	@echo "  htmldoc-open   to open the HTML documentation with the default browser"
22 | 	@echo "  pdfdoc         to make the LaTeX sources and build the PDF of the documentation"
23 | 
24 | init:
25 | 	pip install -r requirements.txt -r requirements-dev.txt -r requirements-doc.txt
26 | 
27 | build:
28 | 	$(PYTHON) setup.py build
29 | 
30 | install: build
31 | 	$(PYTHON) setup.py install
32 | 
33 | develop: build
34 | 	$(PYTHON) setup.py develop
35 | 
36 | test:
37 | 	pip install -r requirements-dev.txt
38 | 	tox
39 | 
40 | flake:
41 | 	$(FLAKE) pyuoi/
42 | 	$(FLAKE) tests/
43 | 	$(FLAKE) --ignore E402,W504 docs/gallery
44 | 
45 | checkpdb:
46 | 	find {pyuoi,tests} -name "*.py" -exec grep -Hn pdb {} \;
47 | 
48 | #devtest:
49 | #$(PYTHON) -W ignore:::pynwb.form.build.map: test.py -fpi
50 | 
51 | testclean:
52 | 	rm *.npy *.nwb *.yaml
53 | 
54 | apidoc:
55 | 	pip install -r requirements-doc.txt
56 | 	cd docs && $(MAKE) apidoc
57 | 
58 | htmldoc-only: apidoc
59 | 	cd docs && $(MAKE) html
60 | 
61 | htmlclean:
62 | 	cd docs && $(MAKE) clean
63 | 
64 | htmldoc-open:
65 | 	@echo ""
66 | 	@echo "To view the HTML documentation open: docs/_build/html/index.html"
67 | 	open docs/_build/html/index.html || xdg-open docs/_build/html/index.html
68 | 
69 | htmldoc: htmldoc-only htmldoc-open
70 | 
71 | pdfdoc:
72 | 	cd docs && $(MAKE) latexpdf
73 | 	@echo ""
74 | 	@echo "To view the PDF documentation open: docs/_build/latex/PyNWB.pdf"
75 | 
76 | coverage-only:
77 | 	tox -e localcoverage
78 | 
79 | coverage-open:
80 | 	@echo "To view coverage data open: ./tests/coverage/htmlcov/index.html"
81 | 	open ./tests/coverage/htmlcov/index.html || xdg-open ./tests/coverage/htmlcov/index.html
82 | 
83 | coverage: coverage-only coverage-open
84 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <img src="docs/source/art/pyuoi.png" alt="PyUoI logo" width="200px">
  2 | 
  3 | [![Actions Status](https://github.com/BouchardLab/pyuoi/workflows/pyuoi_tests/badge.svg)](https://github.com/BouchardLab/pyuoi/actions)
  4 | [![codecov](https://codecov.io/gh/BouchardLab/pyuoi/branch/main/graph/badge.svg?token=DxEQxVEam8)](https://codecov.io/gh/BouchardLab/pyuoi)
  5 | [![Documentation Status](https://readthedocs.org/projects/pyuoi/badge/?version=latest)](https://pyuoi.readthedocs.io/en/latest/?badge=latest)
  6 | ![PyPI](https://img.shields.io/pypi/v/pyuoi)
  7 | [![Anaconda-Server Badge](https://anaconda.org/conda-forge/pyuoi/badges/installer/conda.svg)](https://conda.anaconda.org/conda-forge)
  8 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.01799/status.svg)](https://doi.org/10.21105/joss.01799)
  9 | 
 10 | 
 11 | PyUoI contains implementations of Union of Intersections framework for a variety
 12 | of penalized generalized linear models as well as dimensionality reductions
 13 | techniques such as column subset selection and non-negative matrix
 14 | factorization. In general, UoI is a statistical machine learning framework that
 15 | leverages two concepts in model inference:
 16 | 
 17 | 1. Separating the selection and estimation problems to simultaneously achieve
 18 |    sparse models with low-bias and low-variance parameter estimates.
 19 | 2. Stability to perturbations in both selection and estimation.
 20 | 
 21 | 
 22 | PyUoI is designed to function similarly to ``scikit-learn``, as it often builds
 23 | upon ``scikit-learn``'s implementations of the aforementioned algorithms.
 24 | 
 25 | Further details on the UoI framework can be found in the NeurIPS paper (<a href="https://papers.nips.cc/paper/6708-union-of-intersections-uoi-for-interpretable-data-driven-discovery-and-prediction">Bouchard et al., 2017</a>).
 26 | 
 27 | # Installation
 28 | 
 29 | PyUoI is available for Python 3 on PyPI:
 30 | 
 31 | ```
 32 | pip install pyuoi
 33 | ```
 34 | 
 35 | and through conda-forge:
 36 | 
 37 | ```
 38 | conda install pyuoi -c conda-forge
 39 | ```
 40 | 
 41 | # Requirements
 42 | 
 43 | ## Runtime
 44 | 
 45 | PyUoI requires
 46 | 
 47 | * numpy>=1.14
 48 | * h5py>=2.8
 49 | * scikit-learn>=0.24
 50 | 
 51 | and optionally
 52 | 
 53 | * pycasso
 54 | * mpi4py
 55 | 
 56 | to run.
 57 | 
 58 | ## Develop
 59 | 
 60 | To develop PyUoI you will additionally need
 61 | 
 62 | * cython
 63 | 
 64 | to build from source and
 65 | 
 66 | * pytest
 67 | * flake8
 68 | 
 69 | to run the tests and check formatting.
 70 | 
 71 | PyUoI has been built and tested on Python 3.9.18 with
 72 | 
 73 | * numpy==1.26.1
 74 | * h5py==3.10.0
 75 | * scikit-learn==1.3.1
 76 | * cython==3.0.4
 77 | * pytest==7.4.2
 78 | * flake8==6.1.0
 79 | 
 80 | # Features
 81 | 
 82 | PyUoI is split up into two modules, with the following UoI algorithms:
 83 | 
 84 | * `linear_model` (generalized linear models)
 85 |     * Lasso penalized linear regression UoI<sub>Lasso</sub>.
 86 |     * Elastic-net penalized linear regression (UoI<sub>ElasticNet</sub>).
 87 |     * Logistic regression (Bernoulli and multinomial) (UoI<sub>Logistic</sub>).
 88 |     * Poisson regression (UoI<sub>Poisson</sub>).
 89 | * `decomposition` (dimensionality reduction)
 90 |     * Column subset selection (UoI<sub>CSS</sub>).
 91 |     * Non-negative matrix factorization (UoI<sub>NMF</sub>).
 92 | 
 93 | Similar to `scikit-learn`, each UoI algorithm has its own Python class.
 94 | 
 95 | # Documentation
 96 | 
 97 | Please see our <a href="https://pyuoi.readthedocs.io/en/latest/">ReadTheDocs</a> page for an introduction to Union of Intersections, usage of PyUoI, and the API.
 98 | 
 99 | # Copyright
100 | 
101 | PyUoI Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy).  All rights reserved.
102 | 
103 | If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at  IPO@lbl.gov referring to " PyUoI" (LBNL Ref 2019-157)."
104 | 
105 | NOTICE.  This software was developed under funding from the U.S. Department of Energy.  As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, and perform publicly and display publicly.  The U.S. Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so.
106 | 


--------------------------------------------------------------------------------
/bin/generate_build.sh:
--------------------------------------------------------------------------------
 1 | eval "$(conda shell.bash hook)"
 2 | mkdir dist
 3 | for py in 3.6 3.7 3.8; do
 4 |   git clone https://github.com/BouchardLab/pyuoi.git
 5 |   cd pyuoi
 6 |   conda create -y -n temp_build_env python=$py
 7 |   conda activate temp_build_env
 8 |   conda install -y numpy cython
 9 |   pip install setuptools wheel
10 |   python setup.py sdist bdist_wheel
11 |   conda deactivate
12 |   conda remove -y -n temp_build_env --all
13 |   mv dist/* ../dist/.
14 |   cd ..
15 |   rm -rf pyuoi
16 | done
17 | 


--------------------------------------------------------------------------------
/bin/test_mpi.sh:
--------------------------------------------------------------------------------
1 | mpiexec -n 4 pytest -sv $1
2 | mpiexec -n 49 pytest -sv $1
3 | 


--------------------------------------------------------------------------------
/codecov.yaml:
--------------------------------------------------------------------------------
1 | ignore:
2 |   - "setup.py"
3 |   - "tests"
4 | 


--------------------------------------------------------------------------------
/compat/win32/stdint.h:
--------------------------------------------------------------------------------
  1 | // ISO C9x  compliant stdint.h for Microsoft Visual Studio
  2 | // Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
  3 | // 
  4 | //  Copyright (c) 2006-2008 Alexander Chemeris
  5 | // 
  6 | // Redistribution and use in source and binary forms, with or without
  7 | // modification, are permitted provided that the following conditions are met:
  8 | // 
  9 | //   1. Redistributions of source code must retain the above copyright notice,
 10 | //      this list of conditions and the following disclaimer.
 11 | // 
 12 | //   2. Redistributions in binary form must reproduce the above copyright
 13 | //      notice, this list of conditions and the following disclaimer in the
 14 | //      documentation and/or other materials provided with the distribution.
 15 | // 
 16 | //   3. The name of the author may be used to endorse or promote products
 17 | //      derived from this software without specific prior written permission.
 18 | // 
 19 | // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 20 | // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 21 | // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 22 | // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 24 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 25 | // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 26 | // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 27 | // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 28 | // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | // 
 30 | ///////////////////////////////////////////////////////////////////////////////
 31 | 
 32 | #ifndef _MSC_VER // [
 33 | #error "Use this header only with Microsoft Visual C++ compilers!"
 34 | #endif // _MSC_VER ]
 35 | 
 36 | #ifndef _MSC_STDINT_H_ // [
 37 | #define _MSC_STDINT_H_
 38 | 
 39 | #if _MSC_VER > 1000
 40 | #pragma once
 41 | #endif
 42 | 
 43 | #include <limits.h>
 44 | 
 45 | // For Visual Studio 6 in C++ mode and for many Visual Studio versions when
 46 | // compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
 47 | // or compiler give many errors like this:
 48 | //   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
 49 | #ifdef __cplusplus
 50 | extern "C" {
 51 | #endif
 52 | #  include <wchar.h>
 53 | #ifdef __cplusplus
 54 | }
 55 | #endif
 56 | 
 57 | // Define _W64 macros to mark types changing their size, like intptr_t.
 58 | #ifndef _W64
 59 | #  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
 60 | #     define _W64 __w64
 61 | #  else
 62 | #     define _W64
 63 | #  endif
 64 | #endif
 65 | 
 66 | 
 67 | // 7.18.1 Integer types
 68 | 
 69 | // 7.18.1.1 Exact-width integer types
 70 | 
 71 | // Visual Studio 6 and Embedded Visual C++ 4 doesn't
 72 | // realize that, e.g. char has the same size as __int8
 73 | // so we give up on __intX for them.
 74 | #if (_MSC_VER < 1300)
 75 |    typedef signed char       int8_t;
 76 |    typedef signed short      int16_t;
 77 |    typedef signed int        int32_t;
 78 |    typedef unsigned char     uint8_t;
 79 |    typedef unsigned short    uint16_t;
 80 |    typedef unsigned int      uint32_t;
 81 | #else
 82 |    typedef signed __int8     int8_t;
 83 |    typedef signed __int16    int16_t;
 84 |    typedef signed __int32    int32_t;
 85 |    typedef unsigned __int8   uint8_t;
 86 |    typedef unsigned __int16  uint16_t;
 87 |    typedef unsigned __int32  uint32_t;
 88 | #endif
 89 | typedef signed __int64       int64_t;
 90 | typedef unsigned __int64     uint64_t;
 91 | 
 92 | 
 93 | // 7.18.1.2 Minimum-width integer types
 94 | typedef int8_t    int_least8_t;
 95 | typedef int16_t   int_least16_t;
 96 | typedef int32_t   int_least32_t;
 97 | typedef int64_t   int_least64_t;
 98 | typedef uint8_t   uint_least8_t;
 99 | typedef uint16_t  uint_least16_t;
100 | typedef uint32_t  uint_least32_t;
101 | typedef uint64_t  uint_least64_t;
102 | 
103 | // 7.18.1.3 Fastest minimum-width integer types
104 | typedef int8_t    int_fast8_t;
105 | typedef int16_t   int_fast16_t;
106 | typedef int32_t   int_fast32_t;
107 | typedef int64_t   int_fast64_t;
108 | typedef uint8_t   uint_fast8_t;
109 | typedef uint16_t  uint_fast16_t;
110 | typedef uint32_t  uint_fast32_t;
111 | typedef uint64_t  uint_fast64_t;
112 | 
113 | // 7.18.1.4 Integer types capable of holding object pointers
114 | #ifdef _WIN64 // [
115 |    typedef signed __int64    intptr_t;
116 |    typedef unsigned __int64  uintptr_t;
117 | #else // _WIN64 ][
118 |    typedef _W64 signed int   intptr_t;
119 |    typedef _W64 unsigned int uintptr_t;
120 | #endif // _WIN64 ]
121 | 
122 | // 7.18.1.5 Greatest-width integer types
123 | typedef int64_t   intmax_t;
124 | typedef uint64_t  uintmax_t;
125 | 
126 | 
127 | // 7.18.2 Limits of specified-width integer types
128 | 
129 | #if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
130 | 
131 | // 7.18.2.1 Limits of exact-width integer types
132 | #define INT8_MIN     ((int8_t)_I8_MIN)
133 | #define INT8_MAX     _I8_MAX
134 | #define INT16_MIN    ((int16_t)_I16_MIN)
135 | #define INT16_MAX    _I16_MAX
136 | #define INT32_MIN    ((int32_t)_I32_MIN)
137 | #define INT32_MAX    _I32_MAX
138 | #define INT64_MIN    ((int64_t)_I64_MIN)
139 | #define INT64_MAX    _I64_MAX
140 | #define UINT8_MAX    _UI8_MAX
141 | #define UINT16_MAX   _UI16_MAX
142 | #define UINT32_MAX   _UI32_MAX
143 | #define UINT64_MAX   _UI64_MAX
144 | 
145 | // 7.18.2.2 Limits of minimum-width integer types
146 | #define INT_LEAST8_MIN    INT8_MIN
147 | #define INT_LEAST8_MAX    INT8_MAX
148 | #define INT_LEAST16_MIN   INT16_MIN
149 | #define INT_LEAST16_MAX   INT16_MAX
150 | #define INT_LEAST32_MIN   INT32_MIN
151 | #define INT_LEAST32_MAX   INT32_MAX
152 | #define INT_LEAST64_MIN   INT64_MIN
153 | #define INT_LEAST64_MAX   INT64_MAX
154 | #define UINT_LEAST8_MAX   UINT8_MAX
155 | #define UINT_LEAST16_MAX  UINT16_MAX
156 | #define UINT_LEAST32_MAX  UINT32_MAX
157 | #define UINT_LEAST64_MAX  UINT64_MAX
158 | 
159 | // 7.18.2.3 Limits of fastest minimum-width integer types
160 | #define INT_FAST8_MIN    INT8_MIN
161 | #define INT_FAST8_MAX    INT8_MAX
162 | #define INT_FAST16_MIN   INT16_MIN
163 | #define INT_FAST16_MAX   INT16_MAX
164 | #define INT_FAST32_MIN   INT32_MIN
165 | #define INT_FAST32_MAX   INT32_MAX
166 | #define INT_FAST64_MIN   INT64_MIN
167 | #define INT_FAST64_MAX   INT64_MAX
168 | #define UINT_FAST8_MAX   UINT8_MAX
169 | #define UINT_FAST16_MAX  UINT16_MAX
170 | #define UINT_FAST32_MAX  UINT32_MAX
171 | #define UINT_FAST64_MAX  UINT64_MAX
172 | 
173 | // 7.18.2.4 Limits of integer types capable of holding object pointers
174 | #ifdef _WIN64 // [
175 | #  define INTPTR_MIN   INT64_MIN
176 | #  define INTPTR_MAX   INT64_MAX
177 | #  define UINTPTR_MAX  UINT64_MAX
178 | #else // _WIN64 ][
179 | #  define INTPTR_MIN   INT32_MIN
180 | #  define INTPTR_MAX   INT32_MAX
181 | #  define UINTPTR_MAX  UINT32_MAX
182 | #endif // _WIN64 ]
183 | 
184 | // 7.18.2.5 Limits of greatest-width integer types
185 | #define INTMAX_MIN   INT64_MIN
186 | #define INTMAX_MAX   INT64_MAX
187 | #define UINTMAX_MAX  UINT64_MAX
188 | 
189 | // 7.18.3 Limits of other integer types
190 | 
191 | #ifdef _WIN64 // [
192 | #  define PTRDIFF_MIN  _I64_MIN
193 | #  define PTRDIFF_MAX  _I64_MAX
194 | #else  // _WIN64 ][
195 | #  define PTRDIFF_MIN  _I32_MIN
196 | #  define PTRDIFF_MAX  _I32_MAX
197 | #endif  // _WIN64 ]
198 | 
199 | #define SIG_ATOMIC_MIN  INT_MIN
200 | #define SIG_ATOMIC_MAX  INT_MAX
201 | 
202 | #ifndef SIZE_MAX // [
203 | #  ifdef _WIN64 // [
204 | #     define SIZE_MAX  _UI64_MAX
205 | #  else // _WIN64 ][
206 | #     define SIZE_MAX  _UI32_MAX
207 | #  endif // _WIN64 ]
208 | #endif // SIZE_MAX ]
209 | 
210 | // WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
211 | #ifndef WCHAR_MIN // [
212 | #  define WCHAR_MIN  0
213 | #endif  // WCHAR_MIN ]
214 | #ifndef WCHAR_MAX // [
215 | #  define WCHAR_MAX  _UI16_MAX
216 | #endif  // WCHAR_MAX ]
217 | 
218 | #define WINT_MIN  0
219 | #define WINT_MAX  _UI16_MAX
220 | 
221 | #endif // __STDC_LIMIT_MACROS ]
222 | 
223 | 
224 | // 7.18.4 Limits of other integer types
225 | 
226 | #if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
227 | 
228 | // 7.18.4.1 Macros for minimum-width integer constants
229 | 
230 | #define INT8_C(val)  val##i8
231 | #define INT16_C(val) val##i16
232 | #define INT32_C(val) val##i32
233 | #define INT64_C(val) val##i64
234 | 
235 | #define UINT8_C(val)  val##ui8
236 | #define UINT16_C(val) val##ui16
237 | #define UINT32_C(val) val##ui32
238 | #define UINT64_C(val) val##ui64
239 | 
240 | // 7.18.4.2 Macros for greatest-width integer constants
241 | #define INTMAX_C   INT64_C
242 | #define UINTMAX_C  UINT64_C
243 | 
244 | #endif // __STDC_CONSTANT_MACROS ]
245 | 
246 | 
247 | #endif // _MSC_STDINT_H_ ]
248 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | .. PyUoI
 2 | 
 3 | ===
 4 | API
 5 | ===
 6 | 
 7 | .. toctree::
 8 |     :maxdepth: 2
 9 | 
10 |     pyuoi/linear_model/linear_model
11 |     pyuoi/decomposition/decomposition
12 |     pyuoi/datasets/datasets
13 |     pyuoi/utils
14 |     pyuoi/mpi_utils
15 | 


--------------------------------------------------------------------------------
/docs/source/art/pyuoi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BouchardLab/pyuoi/25e47655a07895f206c2e3ee3b259421c144a05d/docs/source/art/pyuoi.pdf


--------------------------------------------------------------------------------
/docs/source/art/pyuoi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BouchardLab/pyuoi/25e47655a07895f206c2e3ee3b259421c144a05d/docs/source/art/pyuoi.png


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | import sphinx_rtd_theme
 18 | 
 19 | # Get the project root dir, which is the parent parent dir of this
 20 | project_root = os.path.dirname(os.getcwd())
 21 | 
 22 | # Insert the project root dir as the first element in the PYTHONPATH.
 23 | # This lets us ensure that the source package is imported, and that its
 24 | # version is used.
 25 | sys.path.insert(0, project_root)
 26 | 
 27 | 
 28 | # -- Project information -----------------------------------------------------
 29 | 
 30 | project = 'PyUoI'
 31 | copyright = 'The Regents of the University of California, through Lawrence Berkeley National Laboratory'
 32 | author = 'Contributors'
 33 | 
 34 | # The short X.Y version
 35 | version = ''
 36 | # The full version, including alpha/beta/rc tags
 37 | release = 'alpha'
 38 | 
 39 | 
 40 | # -- General configuration ---------------------------------------------------
 41 | 
 42 | # If your documentation needs a minimal Sphinx version, state it here.
 43 | #
 44 | # needs_sphinx = '1.0'
 45 | 
 46 | # Add any Sphinx extension module names here, as strings. They can be
 47 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 48 | # ones.
 49 | extensions = [
 50 |     'sphinx.ext.autodoc',
 51 |     'sphinx.ext.doctest',
 52 |     'sphinx.ext.intersphinx',
 53 |     'sphinx.ext.todo',
 54 |     'sphinx.ext.coverage',
 55 |     'sphinx.ext.mathjax',
 56 |     'sphinx.ext.ifconfig',
 57 |     'sphinx.ext.viewcode',
 58 |     'sphinx.ext.githubpages',
 59 |     'sphinx.ext.napoleon',
 60 |     'sphinx.ext.mathjax',
 61 |     'sphinx_rtd_theme',
 62 |     'sphinx_gallery.gen_gallery'
 63 | ]
 64 | 
 65 | sphinx_gallery_conf = {
 66 |     # path to your examples scripts
 67 |     'examples_dirs': ['../../examples'],
 68 |     # path where to save gallery generated examples
 69 |     'gallery_dirs': ['auto_examples'],
 70 |     #'subsection_order': ExplicitOrder(['../gallery/general', '../gallery/domain']),
 71 |     'backreferences_dir': 'gen_modules/backreferences',
 72 |     'min_reported_time': 5
 73 | }
 74 | 
 75 | # Add any paths that contain templates here, relative to this directory.
 76 | templates_path = ['_templates']
 77 | 
 78 | # The suffix(es) of source filenames.
 79 | # You can specify multiple suffix as a list of string:
 80 | #
 81 | # source_suffix = ['.rst', '.md']
 82 | source_suffix = '.rst'
 83 | 
 84 | # The master toctree document.
 85 | master_doc = 'index'
 86 | 
 87 | # The language for content autogenerated by Sphinx. Refer to documentation
 88 | # for a list of supported languages.
 89 | #
 90 | # This is also used if you do content translation via gettext catalogs.
 91 | # Usually you set "language" from the command line for these cases.
 92 | language = 'en'
 93 | 
 94 | # List of patterns, relative to source directory, that match files and
 95 | # directories to ignore when looking for source files.
 96 | # This pattern also affects html_static_path and html_extra_path.
 97 | exclude_patterns = []
 98 | 
 99 | # The name of the Pygments (syntax highlighting) style to use.
100 | pygments_style = None
101 | 
102 | 
103 | # -- Options for HTML output -------------------------------------------------
104 | 
105 | # The theme to use for HTML and HTML Help pages.  See the documentation for
106 | # a list of builtin themes.
107 | #
108 | html_theme = "sphinx_rtd_theme"
109 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
110 | 
111 | # Theme options are theme-specific and customize the look and feel of a theme
112 | # further.  For a list of options available for each theme, see the
113 | # documentation.
114 | #
115 | # html_theme_options = {}
116 | 
117 | # Add any paths that contain custom static files (such as style sheets) here,
118 | # relative to this directory. They are copied after the builtin static files,
119 | # so a file named "default.css" will overwrite the builtin "default.css".
120 | # html_static_path = ['_static']
121 | 
122 | # Custom sidebar templates, must be a dictionary that maps document names
123 | # to template names.
124 | #
125 | # The default sidebars (for documents that don't match any pattern) are
126 | # defined by theme itself.  Builtin themes are using these templates by
127 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
128 | # 'searchbox.html']``.
129 | #
130 | # html_sidebars = {}
131 | 
132 | html_logo = 'art/pyuoi.png'
133 | html_theme_options = {'logo_only': True}
134 | 
135 | 
136 | # -- Options for HTMLHelp output ---------------------------------------------
137 | 
138 | # Output file base name for HTML help builder.
139 | htmlhelp_basename = 'PyUoIdoc'
140 | 
141 | 
142 | # -- Options for LaTeX output ------------------------------------------------
143 | 
144 | latex_elements = {
145 |     # The paper size ('letterpaper' or 'a4paper').
146 |     #
147 |     # 'papersize': 'letterpaper',
148 | 
149 |     # The font size ('10pt', '11pt' or '12pt').
150 |     #
151 |     # 'pointsize': '10pt',
152 | 
153 |     # Additional stuff for the LaTeX preamble.
154 |     #
155 |     # 'preamble': '',
156 | 
157 |     # Latex figure (float) alignment
158 |     #
159 |     # 'figure_align': 'htbp',
160 | }
161 | 
162 | # Grouping the document tree into LaTeX files. List of tuples
163 | # (source start file, target name, title,
164 | #  author, documentclass [howto, manual, or own class]).
165 | latex_documents = [
166 |     (master_doc, 'PyUoI.tex', 'PyUoI Documentation',
167 |      'BouchardLab', 'manual'),
168 | ]
169 | 
170 | 
171 | # -- Options for manual page output ------------------------------------------
172 | 
173 | # One entry per manual page. List of tuples
174 | # (source start file, name, description, authors, manual section).
175 | man_pages = [
176 |     (master_doc, 'pyuoi', 'PyUoI Documentation',
177 |      [author], 1)
178 | ]
179 | 
180 | 
181 | # -- Options for Texinfo output ----------------------------------------------
182 | 
183 | # Grouping the document tree into Texinfo files. List of tuples
184 | # (source start file, target name, title, author,
185 | #  dir menu entry, description, category)
186 | texinfo_documents = [
187 |     (master_doc, 'PyUoI', 'PyUoI Documentation',
188 |      author, 'PyUoI', 'One line description of project.',
189 |      'Miscellaneous'),
190 | ]
191 | 
192 | 
193 | # -- Options for Epub output -------------------------------------------------
194 | 
195 | # Bibliographic Dublin Core info.
196 | epub_title = project
197 | 
198 | # The unique identifier of the text. This can be a ISBN number
199 | # or the project homepage.
200 | #
201 | # epub_identifier = ''
202 | 
203 | # A unique identification for the text.
204 | #
205 | # epub_uid = ''
206 | 
207 | # A list of files that should not be packed into the epub file.
208 | epub_exclude_files = ['search.html']
209 | 
210 | 
211 | # -- Extension configuration -------------------------------------------------
212 | 
213 | # -- Options for intersphinx extension ---------------------------------------
214 | 
215 | # Example configuration for intersphinx: refer to the Python standard library.
216 | intersphinx_mapping = {"python": ("https://docs.python.org/", None),
217 |                        "sklearn": ("https://scikit-learn.org/stable/", None)}
218 | 
219 | # -- Options for todo extension ----------------------------------------------
220 | 
221 | # If true, `todo` and `todoList` produce output, else they produce nothing.
222 | todo_include_todos = True
223 | 


--------------------------------------------------------------------------------
/docs/source/contributing.rst:
--------------------------------------------------------------------------------
 1 | .. PyUoI
 2 | 
 3 | ==========================
 4 | How to contribute to PyUoI
 5 | ==========================
 6 | 
 7 | Code of Conduct
 8 | ---------------
 9 | 
10 | Contributing Patches and Changes
11 | --------------------------------
12 | 
13 | First, check whether the feature or change has already been contributed. If not, from your local copy directory, use the following commands.
14 | 
15 | If you have not already, you will need to clone the repo:
16 | 
17 | .. code-block:: bash
18 | 
19 |     $ git clone https://github.com/BouchardLab/PyUoI.git
20 | 
21 | 1) First create a new branch to work on
22 | 
23 | .. code-block:: bash
24 | 
25 |     $ git checkout -b <new_branch>
26 | 
27 | 2) Make your changes.
28 | 
29 | 3) We will automatically run tests to ensure that your contributions didn't break anything and that they follow our style guide. You can speed up the testing cycle by running these tests locally on your own computer running ``pytest -sv tests``, ``flake8 pyuoi``, and ``flake8 tests``.
30 | 
31 | 4) Push your feature branch to origin
32 | 
33 | .. code-block:: bash
34 | 
35 |     $ git push origin <new_branch>
36 | 
37 | 5) Once you have tested and finalized your changes, create a pull request (PR):
38 | 
39 |     * Ensure the PR description clearly describes the issue and changes.
40 |     * Close the relevant issue number if applicable. Writing "Closes #29" in the PR description will automatically close issue #29 when the PR is merged.
41 |     * If your changes fix a bug or add a feature, write a test so that it will not break in the future.
42 |     * Before submitting, please ensure that the tests pass and that the code follows the standard coding style.
43 | 
44 | Styleguides
45 | -----------
46 | 
47 | Documentation Styleguide
48 | ^^^^^^^^^^^^^^^^^^^^^^^^
49 | 
50 | All documentations is written in reStructuredText (RST) using Sphinx.
51 | 
52 | Format Specification Styleguide
53 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
54 | 
55 | Python Code Styleguide
56 | ^^^^^^^^^^^^^^^^^^^^^^
57 | 
58 | Python coding style is checked via ``flake8`` for automatic checking of PEP8 style during pull requests.
59 | 
60 | License and Copyright
61 | ---------------------
62 | 
63 | PyUol Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy).  All rights reserved.
64 | 
65 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
66 | 
67 | (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
68 | 
69 | (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
70 | 
71 | (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
72 | 
73 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
74 | 
75 | You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a  non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form.
76 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. PyUoI
 2 | 
 3 | =====================================================
 4 | PyUoI: The Union of Intersections Framework in Python
 5 | =====================================================
 6 | 
 7 | PyUoI contains implementations of Union of Intersections framework for a variety
 8 | of penalized generalized linear models as well as dimensionality reduction
 9 | techniques such as column subset selection and non-negative matrix
10 | factorization. In general, UoI is a statistical machine learning framework that
11 | leverages two concepts in model inference:
12 | 
13 | #. Separating the selection and estimation problems to simultaneously achieve
14 |    sparse models with low-bias and low-variance parameter estimates.
15 | #. Stability to perturbations in both selection and estimation.
16 | 
17 | 
18 | PyUoI is designed to function similarly to scikit-learn, as it often builds
19 | upon scikit-learn's implementations of the aforementioned algorithms.
20 | 
21 | Further details on the UoI framework can be found in [Bouchard2017]_ and
22 | [Ubaru2017]_.
23 | 
24 | .. toctree::
25 |    :maxdepth: 2
26 |    :caption: Contents:
27 | 
28 |    introduction
29 |    installation
30 |    auto_examples/index
31 |    contributing
32 |    mpi
33 |    api
34 | 
35 | .. rubric:: References
36 | 
37 | .. [Bouchard2017] Bouchard, K., Bujan, A., Roosta-Khorasani, F., Ubaru, S.,
38 |     Prabhat, M., Snijders, A., ... & Bhattacharya, S. (2017). Union of
39 |     intersections (UoI) for interpretable data driven discovery and
40 |     prediction. In Advances in Neural Information Processing
41 |     Systems (pp. 1078-1086).
42 | .. [Ubaru2017] Ubaru, S., Wu, K., & Bouchard, K. E. (2017, December). UoI-NMF
43 |     cluster: a robust nonnegative matrix factorization algorithm for improved
44 |     parts-based decomposition and reconstruction of noisy data. In 2017 16th
45 |     IEEE International Conference on Machine Learning and Applications (ICMLA)
46 |     (pp. 241-248). IEEE.
47 | 
48 | 
49 | Indices and tables
50 | ------------------
51 | 
52 | * :ref:`genindex`
53 | * :ref:`modindex`
54 | * :ref:`search`
55 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | .. PyUoI
 2 | 
 3 | ============
 4 | Installation
 5 | ============
 6 | 
 7 | PyUoI is available for Python 3 on PyPI:
 8 | 
 9 | .. code-block:: bash
10 | 
11 |     $ pip install pyuoi
12 | 
13 | and through conda-forge:
14 | 
15 | .. code-block:: bash
16 | 
17 |     $ conda install pyuoi -c conda-forge
18 | 
19 | ``pip`` and ``conda`` will install the required dependencies.
20 | 
21 | Requirements
22 | ------------
23 | 
24 | Runtime
25 | ^^^^^^^
26 | 
27 | PyUoI requires
28 | 
29 |   * numpy>=1.14
30 |   * h5py>=2.8
31 |   * scikit-learn>=0.24
32 | 
33 | and optionally
34 | 
35 |   * pycasso
36 |   * mpi4py
37 | 
38 | to run.
39 | 
40 | Develop
41 | ^^^^^^^
42 | 
43 | To develop PyUoI you will additionally need
44 | 
45 |   * cython
46 | 
47 | to build from source and
48 | 
49 |   * pytest
50 |   * flake8
51 | 
52 | to run the tests and check formatting.
53 | 
54 | PyUoI has been built and tested on Python 3.9.18 with
55 | 
56 |   * numpy==1.26.1
57 |   * h5py==3.10.0
58 |   * scikit-learn==1.3.1
59 |   * cython==3.0.4
60 |   * pytest==7.4.2
61 |   * flake8==6.1.0
62 | 
63 | Docs
64 | ^^^^
65 | 
66 | To build the docs you will additionally need
67 | 
68 |   * sphinx
69 |   * sphinx_rtd_theme
70 | 
71 | Install from source
72 | -------------------
73 | 
74 | The latest development version of the code can be installed from https://github.com/BouchardLab/PyUoI
75 | 
76 | .. code-block:: bash
77 | 
78 |     # use ssh
79 |     $ git clone git@github.com:BouchardLab/pyuoi.git
80 |     # or use https
81 |     $ git clone https://github.com/BouchardLab/pyuoi.git
82 |     $ cd pyuoi
83 |     $ pip install -e .[dev]
84 | 


--------------------------------------------------------------------------------
/docs/source/mpi.rst:
--------------------------------------------------------------------------------
 1 | .. PyUoI
 2 | 
 3 | ===
 4 | MPI
 5 | ===
 6 | 
 7 | MPI (Message Passing Interface) is a parallel computing interface that can be
 8 | used through the ``mpi4py`` library in Python. Currently, the models in the
 9 | ``linear_model`` module can take advantage of MPI parallelism during model
10 | fitting. We assume some familiarity with using ``mpi4py`` here.
11 | 
12 | During the UoI feature selection step, many models are fit across bootstraps and
13 | regularization parameters. These can all potentially be done in parallel using
14 | MPI. Similarly, during UoI estimation, many models are fit across bootstraps
15 | and supports. These can also be done in parallel.
16 | 
17 | Using MPI parallelism requires ``mpi4py`` to be installed. In your code, the
18 | two extra things you will need to do to use MPI parallelism is 1) to make sure
19 | the dataset is on all ranks and 2) pass an MPI communicator into the model.
20 | 
21 | Broadcasting the dataset to all ranks
22 | -------------------------------------
23 | 
24 | PyUoI provides helper functions to share data across MPI ranks. The two
25 | strategies we support are 1) load the data from a HDF5 file and 2) the user
26 | can load the data on a single rank by hand and broadcast the data.
27 | 
28 | Loading data from an HDF5 file
29 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
30 | 
31 | .. code:: python
32 | 
33 |     from pyuoi.mpi_utils import load_data_MPI
34 | 
35 |     # file with keys 'X' and 'y'
36 |     h5_file = 'my_file.h5'
37 | 
38 |     X, y = load_data_MPI(h5_file)
39 | 
40 | Loading data by hand
41 | ^^^^^^^^^^^^^^^^^^^^
42 | 
43 | .. code:: python
44 | 
45 |     from mpi4py import MPI
46 |     import numpy as np
47 |     from pyuoi.mpi_utils import Bcast_from_root
48 | 
49 | 
50 |     comm = MPI.COMM_WORLD
51 |     rank = comm.rank
52 | 
53 |     X = None
54 |     y = None
55 |     if rank == 0:
56 |         # file with keys 'X' and 'y'
57 |         data = np.load('my_file.npz')
58 |         X = data['X']
59 |         y = data['y']
60 | 
61 |     X = Bcast_from_root(X, comm)
62 |     y = Bcast_from_root(y, comm)
63 | 
64 | Fitting with MPI parallelism
65 | -------------------------------------
66 | 
67 | Fitting models with MPI parallelism is similar to fitting models with no
68 | parallelism.
69 | 
70 | .. code:: python
71 | 
72 |     from mpi4py import MPI
73 |     from pyuoi.mpi_utils import load_data_MPI
74 |     from pyuoi.linear_model import UoI_Lasso
75 | 
76 |     comm = MPI.COMM_WORLD
77 |     rank = comm.rank
78 | 
79 |     # file with keys 'X' and 'y'
80 |     h5_file = 'my_file.h5'
81 | 
82 |     X, y = load_data_MPI(h5_file)
83 | 
84 |     model = UoI_Lasso(comm=comm)
85 |     model.fit(X, y)
86 | 
87 |     # model will now have fit parameters across all ranks
88 | 


--------------------------------------------------------------------------------
/docs/source/pyuoi/datasets/datasets.rst:
--------------------------------------------------------------------------------
 1 | ==============
 2 | pyuoi.datasets
 3 | ==============
 4 | 
 5 | Dataset utility functions for the ``pyuoi`` package.
 6 | 
 7 | Testing Utilities
 8 | -----------------
 9 | 
10 | .. automodule:: pyuoi.datasets
11 |     :noindex:
12 |     :members: make_linear_regression, make_classification,
13 |               make_poisson_regression
14 | 


--------------------------------------------------------------------------------
/docs/source/pyuoi/decomposition/decomposition.rst:
--------------------------------------------------------------------------------
 1 | #############
 2 | decomposition
 3 | #############
 4 | 
 5 | Abstract Base Class
 6 | -------------------
 7 | 
 8 | Decomposition classes are built through an ``AbstractDecompositionModel``, which
 9 | extends ``scikit-learn``'s ``BaseEstimator`` class to include methods that are
10 | relevant for decomposition methods.
11 | 
12 | .. automodule:: pyuoi.decomposition.base
13 |     :members: AbstractDecompositionModel
14 | 
15 | CUR Decomposition
16 | -----------------
17 | 
18 | The ``pyuoi`` package includes a class to perform ordinary CUR decomposition in
19 | addition to a class that performs UoI\ :sub:`CUR`.
20 | 
21 | .. automodule:: pyuoi.decomposition.CUR
22 |     :members: CUR, UoI_CUR
23 | 
24 | Non-negative Matrix Factorization
25 | ---------------------------------
26 | 
27 | UoI\ :sub:`NMF` can be customized with various NMF, clustering,
28 | non-negative least squares, and consensus algorithms. A base class accepts
29 | general objects or functions to perform the desired NMF, clustering,
30 | regression, and consensus grouping (provided that they have the correct
31 | structure). A derived class which uses
32 | 
33 | * ``scikit-learn``'s NMF object
34 | 
35 | * DBSCAN for clustering
36 | 
37 | * ``scipy``'s non-negative least squares function
38 | 
39 | * the median function for consensus grouping
40 | 
41 | is also provided. This derived class accepts keyword arguments that correspond
42 | to the keyword arguments of the above algorithms, so that the user does not
43 | have to provide instantiated objects.
44 | 
45 | .. automodule:: pyuoi.decomposition.NMF
46 |     :members: UoI_NMF_Base, UoI_NMF
47 | 


--------------------------------------------------------------------------------
/docs/source/pyuoi/linear_model/linear_model.rst:
--------------------------------------------------------------------------------
 1 | ############
 2 | linear_model
 3 | ############
 4 | All linear models operate through the basic structure provided by the base
 5 | class. The base class performs the necessary bootstrapping, fitting procedures,
 6 | intersection step, and model averaging. The derived classes simply provide
 7 | objects to the base class that perform the actual fits (e.g., UoI\ :sub:`Lasso`
 8 | provides ``Lasso`` and ``LinearRegression`` objects to the base class).
 9 | 
10 | Base Classes
11 | ------------
12 | 
13 | The base class for all linear models is ``AbstractUoILinearModel``.
14 | Intermediate derived classes, ``AbstractUoILinearRegressor`` (for lasso and
15 | elastic net), and ``AbstractUoIGeneralizedLinearRegressor`` (for logistic and
16 | Poisson regression) are also provided.
17 | 
18 | .. automodule:: pyuoi.linear_model.base
19 |     :members: AbstractUoILinearModel, AbstractUoILinearRegressor,
20 |               AbstractUoIGeneralizedLinearRegressor
21 | 
22 | Lasso
23 | -----
24 | The ``UoI_Lasso`` object provides the base class with a ``Lasso`` object for
25 | the selection module and a ``LinearRegression`` object for the estimation
26 | module. Additionally, the ``pycasso`` solver is provided as the ``PycLasso``
27 | class.
28 | 
29 | .. automodule:: pyuoi.linear_model.lasso
30 |    :members: UoI_Lasso, PycLasso
31 | 
32 | Elastic Net
33 | -----------
34 | The ``UoI_ElasticNet`` object provides the base class with an ``ElasticNet``
35 | object for the selection module and a ``LinearRegression`` object for the
36 | estimation module.
37 | 
38 | .. automodule:: pyuoi.linear_model.elasticnet
39 |     :members: UoI_ElasticNet
40 | 
41 | Logistic Regression
42 | -------------------
43 | The ``UoI_L1Logistic`` module uses a custom logistic regression solver for both
44 | the selection and estimation modules. This solver uses a modified orthant-wise
45 | limited memory quasi-Newton algorithm. For estimation, no regularization is
46 | performed.
47 | 
48 | .. automodule:: pyuoi.linear_model.logistic
49 |     :members: UoI_L1Logistic
50 | 
51 | Poisson Regression
52 | ------------------
53 | The ``poisson`` module provides a Poisson regression solver that uses either
54 | coordinate descent or a modified orthant-wise limited memory quasi-Newton
55 | solver. ``UoI_Poisson`` uses ``Poisson`` objects for both selection and
56 | estimation; however, the estimation module uses no regularization penalties.
57 | 
58 | .. automodule:: pyuoi.linear_model.poisson
59 |     :members: UoI_Poisson, Poisson
60 | 


--------------------------------------------------------------------------------
/docs/source/pyuoi/mpi_utils.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | pyuoi.mpi_utils
3 | ===============
4 | 
5 | .. automodule:: pyuoi.mpi_utils
6 |    :members:
7 | 


--------------------------------------------------------------------------------
/docs/source/pyuoi/utils.rst:
--------------------------------------------------------------------------------
 1 | ===========
 2 | pyuoi.utils
 3 | ===========
 4 | 
 5 | Utility functions for the ``pyuoi`` package.
 6 | 
 7 | Scoring Utilities
 8 | -----------------
 9 | 
10 | .. automodule:: pyuoi.utils
11 |    :members: AIC, BIC, AICc, log_likelihood_glm
12 | 
13 | Other Utilities
14 | ---------------
15 | 
16 | .. automodule:: pyuoi.utils
17 |     :noindex:
18 |     :members: softmax, sigmoid, check_logger
19 | 


--------------------------------------------------------------------------------
/examples/README.rst:
--------------------------------------------------------------------------------
1 | .. _tutorials:
2 | 
3 | 
4 | Tutorials
5 | =========
6 | 


--------------------------------------------------------------------------------
/examples/plot_swimmer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | .. _swimmer:
  3 | 
  4 | UoI-NMF for robust parts-based decomposition of noisy data
  5 | ==========================================================
  6 | 
  7 | This example will demonstrate parts-based decomposition with
  8 | UoI-NMF on the swimmer dataset.
  9 | The swimmer dataset is the canonical example of separable data.
 10 | 
 11 | """
 12 | 
 13 | ###############################################################################
 14 | # Swimmer dataset
 15 | # ---------------
 16 | 
 17 | 
 18 | import matplotlib
 19 | import matplotlib.pyplot as plt
 20 | import numpy as np
 21 | 
 22 | from sklearn.preprocessing import minmax_scale
 23 | from sklearn.manifold import TSNE
 24 | 
 25 | from pyuoi.decomposition import UoI_NMF
 26 | from pyuoi.datasets import load_swimmer
 27 | 
 28 | 
 29 | matplotlib.rcParams['figure.figsize'] = [4, 4]
 30 | np.random.seed(10)
 31 | 
 32 | swimmers = load_swimmer()
 33 | swimmers = minmax_scale(swimmers, axis=1)
 34 | 
 35 | 
 36 | ###############################################################################
 37 | # Original Swimmer samples
 38 | # ------------------------
 39 | 
 40 | fig, ax = plt.subplots(4, 4, subplot_kw={'xticks': [], 'yticks': []})
 41 | indices = np.random.randint(16, size=16) + np.arange(0, 256, 16)
 42 | ax = ax.flatten()
 43 | for i in range(len(indices)):
 44 |     ax[i].imshow(swimmers[indices[i]].reshape(32, 32).T,
 45 |                  aspect='auto', cmap='gray')
 46 | 
 47 | 
 48 | ###############################################################################
 49 | # Swimmer samples corrupted with Absolute Gaussian noise
 50 | # ------------------------------------------------------
 51 | #
 52 | # Corrupt the images with with absolute Gaussian noise with ``std = 0.25``.
 53 | 
 54 | 
 55 | reps = 1
 56 | n_swim = swimmers.shape[0]
 57 | corrupted = np.zeros((n_swim * reps, swimmers.shape[1]))
 58 | for r in range(reps):
 59 |     noise = np.abs(np.random.normal(scale=0.25, size=swimmers.shape))
 60 |     corrupted[r * n_swim:(r + 1) * n_swim] = swimmers + noise
 61 | 
 62 | fig, ax = plt.subplots(4, 4, subplot_kw={'xticks': [], 'yticks': []})
 63 | ax = ax.flatten()
 64 | for i in range(len(indices)):
 65 |     ax[i].imshow(corrupted[indices[i]].reshape(32, 32).T,
 66 |                  aspect='auto', cmap='gray')
 67 | 
 68 | ###############################################################################
 69 | # Run UoI NMF on corrupted Swimmer data
 70 | # -------------------------------------
 71 | #
 72 | # Twenty bootstraps should be enough.
 73 | # ``min_pts`` should be half of the number of bootstraps.
 74 | 
 75 | nboot = 20
 76 | min_pts = max(nboot // 2, 1)
 77 | ranks = [16]
 78 | 
 79 | shape = corrupted.shape
 80 | 
 81 | uoi_nmf = UoI_NMF(n_boots=nboot, ranks=ranks, db_min_samples=min_pts,
 82 |                   nmf_max_iter=800)
 83 | 
 84 | transformed = uoi_nmf.fit_transform(corrupted)
 85 | recovered = transformed @ uoi_nmf.components_
 86 | 
 87 | ###############################################################################
 88 | # NMF Swimmer bases
 89 | # -----------------
 90 | 
 91 | order = np.argsort(np.sum(uoi_nmf.components_, axis=1))
 92 | 
 93 | fig, ax = plt.subplots(4, 4, subplot_kw={'xticks': [], 'yticks': []})
 94 | ax = ax.flatten()
 95 | for i in range(uoi_nmf.components_.shape[0]):
 96 |     ax[i].imshow(uoi_nmf.components_[order[i]].reshape(32, 32).T,
 97 |                  aspect='auto', cmap='gray')
 98 | 
 99 | 
100 | ###############################################################################
101 | # Recovered Swimmers
102 | # ------------------
103 | 
104 | 
105 | fig, ax = plt.subplots(4, 4, subplot_kw={'xticks': [], 'yticks': []})
106 | ax = ax.flatten()
107 | for i in range(len(indices)):
108 |     ax[i].imshow(recovered[indices[i]].reshape(32, 32).T,
109 |                  aspect='auto', cmap='gray')
110 | 
111 | 
112 | ###############################################################################
113 | # Plot them all together so we can see how well we recovered
114 | # the original swimmer data.
115 | 
116 | 
117 | fig, ax = plt.subplots(3, 16, figsize=(27, 5),
118 |                        subplot_kw={'xticks': [], 'yticks': []})
119 | indices = np.random.randint(16, size=16) + np.arange(0, 256, 16)
120 | ax = ax.flatten()
121 | 
122 | # plot Original
123 | ax[0].set_ylabel('Original', rotation=0, fontsize=25, labelpad=40)
124 | ax[0].yaxis.set_label_coords(-1.0, 0.5)
125 | for i in range(len(indices)):
126 |     ax[i].imshow(swimmers[indices[i]].reshape(32, 32).T,
127 |                  aspect='auto', cmap='gray')
128 | 
129 | # plot Corrupted
130 | ax[16].set_ylabel('Corrupted', rotation=0, fontsize=25, labelpad=40)
131 | ax[16].yaxis.set_label_coords(-1.1, 0.5)
132 | for i in range(len(indices)):
133 |     ax[16 + i].imshow(corrupted[indices[i]].reshape(32, 32).T,
134 |                       aspect='auto', cmap='gray')
135 | 
136 | # plot Recovered
137 | ax[32].set_ylabel('Recovered', rotation=0, fontsize=25, labelpad=40)
138 | ax[32].yaxis.set_label_coords(-1.1, 0.5)
139 | for i in range(len(indices)):
140 |     ax[32 + i].imshow(recovered[indices[i]].reshape(32, 32).T,
141 |                       aspect='auto', cmap='gray')
142 | 
143 | ###############################################################################
144 | # To see what DBSCAN is doing, let's look at the bases samples.
145 | 
146 | plt.figure()
147 | embedding = TSNE(n_components=2).fit_transform(uoi_nmf.bases_samples_)
148 | sc = plt.scatter(embedding[:, 0], embedding[:, 1],
149 |                  c=uoi_nmf.bases_samples_labels_, s=80, cmap="nipy_spectral")
150 | sc.set_facecolor('none')
151 | plt.show()
152 | 


--------------------------------------------------------------------------------
/examples/plot_uoi_lasso.py:
--------------------------------------------------------------------------------
  1 | """
  2 | .. _uoi_lasso:
  3 | 
  4 | UoI-Lasso for sparse, minimal bias, regression
  5 | ==============================================
  6 | 
  7 | This example with demonstrate the ability of UoI-Lasso to recover sparse
  8 | models with minimal bias.
  9 | 
 10 | """
 11 | 
 12 | ###############################################################################
 13 | # Load synthetic data
 14 | # -------------------
 15 | #
 16 | # The synthetic data will have 40 features, 10 of which are informative and
 17 | # 1 response variable.
 18 | 
 19 | 
 20 | import matplotlib
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | 
 24 | from sklearn.linear_model import LinearRegression, LassoCV
 25 | 
 26 | from pyuoi.linear_model import UoI_Lasso
 27 | from pyuoi.datasets import make_linear_regression
 28 | 
 29 | 
 30 | matplotlib.rcParams['figure.figsize'] = [4, 4]
 31 | np.random.seed(0)
 32 | 
 33 | X, y, beta, intercept = make_linear_regression(n_features=40, n_informative=10,
 34 |                                                X_loc=0., beta_low=-1.,
 35 |                                                beta_high=1.)
 36 | 
 37 | 
 38 | ###############################################################################
 39 | # Visualize data
 40 | # --------------
 41 | #
 42 | # Some features are informative and others are not.
 43 | 
 44 | 
 45 | fig, axes = plt.subplots(2, 2)
 46 | for ii, ax in enumerate(axes.ravel()):
 47 |     ax.scatter(X[:, ii], y.ravel(), marker='.')
 48 |     ax.set_xlabel('Feature {}'.format(ii))
 49 | for ax in axes[:, 0]:
 50 |     ax.set_ylabel('Response')
 51 | fig.tight_layout()
 52 | 
 53 | 
 54 | ###############################################################################
 55 | # Fit a UoI-Lasso model
 56 | # ---------------------
 57 | #
 58 | # UoI-Lasso can fit low bias model parameters with feature selectivity. We can
 59 | # evaluate the predictions of the model, compare the fit :math:`\beta`, and look
 60 | # at the fraction of false positive and false negatives.
 61 | 
 62 | 
 63 | uoi_lasso = UoI_Lasso()
 64 | uoi_lasso.fit(X, y)
 65 | yhat = uoi_lasso.predict(X)
 66 | 
 67 | fig, axes = plt.subplots(1, 3, figsize=(7.5, 2.5))
 68 | ax = axes[0]
 69 | ax.scatter(y, yhat, marker='.')
 70 | ax.set_xlabel('True response')
 71 | ax.set_ylabel('Predicted response')
 72 | 
 73 | ax = axes[1]
 74 | val = max(abs(beta).max(), abs(uoi_lasso.coef_).max()) * 1.1
 75 | ax.scatter(beta.ravel(), uoi_lasso.coef_.ravel(), marker='.')
 76 | ax.set_xlabel(r'True $\beta_i$')
 77 | ax.set_ylabel(r'Fit $\beta_i$')
 78 | ax.set_xlim(-val, val)
 79 | ax.set_ylim(-val, val)
 80 | ax.plot([-val, val], [-val, val], c='k', lw=1.)
 81 | 
 82 | ax = axes[2]
 83 | fp = np.logical_and(uoi_lasso.coef_ != 0, beta == 0).mean()
 84 | fn = np.logical_and(uoi_lasso.coef_ == 0, beta != 0).mean()
 85 | ax.bar([0, 1], [fp, fn], align='center')
 86 | ax.set_xticks([0, 1])
 87 | ax.set_xticklabels(['False\npositive', 'False\nnegative'])
 88 | ax.set_ylabel('Fraction')
 89 | ax.set_ylim(0, 1)
 90 | fig.tight_layout()
 91 | 
 92 | ###############################################################################
 93 | # Ordinary Least Squares
 94 | # ----------------------
 95 | #
 96 | # OLS will have low bias fits, but will not generally have feature selectivity
 97 | # resulting in many false positives.
 98 | 
 99 | lr = LinearRegression()
100 | lr.fit(X, y)
101 | yhat = lr.predict(X)
102 | 
103 | fig, axes = plt.subplots(1, 3, figsize=(7.5, 2.5))
104 | ax = axes[0]
105 | ax.scatter(y, yhat, marker='.')
106 | ax.set_xlabel('True response')
107 | ax.set_ylabel('Predicted response')
108 | 
109 | ax = axes[1]
110 | val = max(abs(beta).max(), abs(lr.coef_).max()) * 1.1
111 | ax.scatter(beta.ravel(), lr.coef_.ravel(), marker='.')
112 | ax.set_xlabel(r'True $\beta_i$')
113 | ax.set_ylabel(r'Fit $\beta_i$')
114 | ax.set_xlim(-val, val)
115 | ax.set_ylim(-val, val)
116 | ax.plot([-val, val], [-val, val], c='k', lw=1.)
117 | 
118 | ax = axes[2]
119 | fp = np.logical_and(lr.coef_ != 0, beta == 0).mean()
120 | fn = np.logical_and(lr.coef_ == 0, beta != 0).mean()
121 | ax.bar([0, 1], [fp, fn], align='center')
122 | ax.set_xticks([0, 1])
123 | ax.set_xticklabels(['False\npositive', 'False\nnegative'])
124 | ax.set_ylabel('Fraction')
125 | ax.set_ylim(0, 1)
126 | fig.tight_layout()
127 | 
128 | ###############################################################################
129 | # Cross-validated Lasso
130 | # ---------------------
131 | #
132 | # Lasso can fit models with feature selectivity, but will have biased estimates
133 | # of the parameters and will typically have more false positives and false
134 | # negatives than UoI-Lasso.
135 | 
136 | lr = LassoCV(cv=5)
137 | lr.fit(X, y)
138 | yhat = lr.predict(X)
139 | 
140 | fig, axes = plt.subplots(1, 3, figsize=(7.5, 2.5))
141 | ax = axes[0]
142 | ax.scatter(y, yhat, marker='.')
143 | ax.set_xlabel('True response')
144 | ax.set_ylabel('Predicted response')
145 | 
146 | ax = axes[1]
147 | val = max(abs(beta).max(), abs(lr.coef_).max()) * 1.1
148 | ax.scatter(beta.ravel(), lr.coef_.ravel(), marker='.')
149 | ax.set_xlabel(r'True $\beta_i$')
150 | ax.set_ylabel(r'Fit $\beta_i$')
151 | ax.set_xlim(-val, val)
152 | ax.set_ylim(-val, val)
153 | ax.plot([-val, val], [-val, val], c='k', lw=1.)
154 | 
155 | ax = axes[2]
156 | fp = np.logical_and(lr.coef_ != 0, beta == 0).mean()
157 | fn = np.logical_and(lr.coef_ == 0, beta != 0).mean()
158 | ax.bar([0, 1], [fp, fn], align='center')
159 | ax.set_xticks([0, 1])
160 | ax.set_xticklabels(['False\npositive', 'False\nnegative'])
161 | ax.set_ylabel('Fraction')
162 | ax.set_ylim(0, 1)
163 | fig.tight_layout()
164 | plt.show()
165 | 


--------------------------------------------------------------------------------
/liblbfgs/COPYING:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 1990 Jorge Nocedal
 4 | Copyright (c) 2007-2010 Naoaki Okazaki
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a
 7 | copy of this software and associated documentation files (the "Software"),
 8 | to deal in the Software without restriction, including without limitation
 9 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | and/or sell copies of the Software, and to permit persons to whom the
11 | Software is furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/liblbfgs/README:
--------------------------------------------------------------------------------
 1 | 
 2 |            libLBFGS: C library of limited-memory BFGS (L-BFGS)
 3 | 
 4 |                                        Copyright (c) 1990, Jorge Nocedal
 5 |                                  Copyright (c) 2007-2010, Naoaki Okazaki
 6 | 
 7 | =========================================================================
 8 | 1. Introduction
 9 | =========================================================================
10 | libLBFGS is a C port of the implementation of Limited-memory
11 | Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal.
12 | The original FORTRAN source code is available at:
13 | http://www.ece.northwestern.edu/~nocedal/lbfgs.html
14 | 
15 | The L-BFGS method solves the unconstrainted minimization problem:
16 |     minimize F(x), x = (x1, x2, ..., xN),
17 | only if the objective function F(x) and its gradient G(x) are computable.
18 | 
19 | Refer to the libLBFGS web site for more information.
20 | http://www.chokkan.org/software/liblbfgs/
21 | 
22 | 
23 | 
24 | =========================================================================
25 | 2. How to build
26 | =========================================================================
27 | [Microsoft Visual Studio 2008]
28 | Open the solution file "lbfgs.sln" and build it.
29 | 
30 | [GCC]
31 | $ ./configure
32 | $ make
33 | $ make install  # To install libLBFGS library and header.
34 | 
35 | 
36 | 
37 | =========================================================================
38 | 3. Note on SSE/SSE2 optimization
39 | =========================================================================
40 | This library has SSE/SSE2 optimization routines for vector arithmetic
41 | operations on Intel/AMD processors. The SSE2 routine is for 64 bit double
42 | values, and the SSE routine is for 32 bit float values. Since the default
43 | parameters in libLBFGS are tuned for double precision values, it may need
44 | to modify these parameters to use the SSE optimization routines.
45 | 
46 | To use the SSE2 optimization routine, specify --enable-sse2 option to the
47 | configure script.
48 | 
49 | $ ./configure --enable-sse2
50 | 
51 | To build libLBFGS with SSE2 optimization enabled on Microsoft Visual
52 | Studio 2005, define USE_SSE and __SSE2__ symbols.
53 | 
54 | Make sure to run libLBFGS on processors where SSE2 instrunctions are
55 | available. The library does not check the existence of SSE2 instructions.
56 | 
57 | To package maintainers,
58 | 
59 | Please do not enable SSE/SSE2 optimization routine. The library built
60 | with SSE/SSE2 optimization will crash without any notice when necessary
61 | SSE/SSE2 instructions are unavailable on CPUs.
62 | 
63 | 
64 | 
65 | =========================================================================
66 | 4. License
67 | =========================================================================
68 | libLBFGS is distributed under the term of the MIT license.
69 | Please refer to COPYING file in the distribution.
70 | 
71 | $Id$
72 | 


--------------------------------------------------------------------------------
/liblbfgs/arithmetic_ansi.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *      ANSI C implementation of vector operations.
  3 |  *
  4 |  * Copyright (c) 2007-2010 Naoaki Okazaki
  5 |  * All rights reserved.
  6 |  *
  7 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  8 |  * of this software and associated documentation files (the "Software"), to deal
  9 |  * in the Software without restriction, including without limitation the rights
 10 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the Software is
 12 |  * furnished to do so, subject to the following conditions:
 13 |  *
 14 |  * The above copyright notice and this permission notice shall be included in
 15 |  * all copies or substantial portions of the Software.
 16 |  *
 17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 |  * THE SOFTWARE.
 24 |  */
 25 | 
 26 | /* $Id$ */
 27 | 
 28 | #include <stdlib.h>
 29 | #include <memory.h>
 30 | 
 31 | #if     LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
 32 | #define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
 33 | #else
 34 | #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
 35 | #endif/*LBFGS_IEEE_FLOAT*/
 36 | 
 37 | inline static void* vecalloc(size_t size)
 38 | {
 39 |     void *memblock = malloc(size);
 40 |     if (memblock) {
 41 |         memset(memblock, 0, size);
 42 |     }
 43 |     return memblock;
 44 | }
 45 | 
 46 | inline static void vecfree(void *memblock)
 47 | {
 48 |     free(memblock);
 49 | }
 50 | 
 51 | inline static void vecset(lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n)
 52 | {
 53 |     int i;
 54 |     
 55 |     for (i = 0;i < n;++i) {
 56 |         x[i] = c;
 57 |     }
 58 | }
 59 | 
 60 | inline static void veccpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
 61 | {
 62 |     int i;
 63 | 
 64 |     for (i = 0;i < n;++i) {
 65 |         y[i] = x[i];
 66 |     }
 67 | }
 68 | 
 69 | inline static void vecncpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
 70 | {
 71 |     int i;
 72 | 
 73 |     for (i = 0;i < n;++i) {
 74 |         y[i] = -x[i];
 75 |     }
 76 | }
 77 | 
 78 | inline static void vecadd(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n)
 79 | {
 80 |     int i;
 81 | 
 82 |     for (i = 0;i < n;++i) {
 83 |         y[i] += c * x[i];
 84 |     }
 85 | }
 86 | 
 87 | inline static void vecdiff(lbfgsfloatval_t *z, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n)
 88 | {
 89 |     int i;
 90 | 
 91 |     for (i = 0;i < n;++i) {
 92 |         z[i] = x[i] - y[i];
 93 |     }
 94 | }
 95 | 
 96 | inline static void vecscale(lbfgsfloatval_t *y, const lbfgsfloatval_t c, const int n)
 97 | {
 98 |     int i;
 99 | 
100 |     for (i = 0;i < n;++i) {
101 |         y[i] *= c;
102 |     }
103 | }
104 | 
105 | inline static void vecmul(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
106 | {
107 |     int i;
108 | 
109 |     for (i = 0;i < n;++i) {
110 |         y[i] *= x[i];
111 |     }
112 | }
113 | 
114 | inline static void vecdot(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n)
115 | {
116 |     int i;
117 |     *s = 0.;
118 |     for (i = 0;i < n;++i) {
119 |         *s += x[i] * y[i];
120 |     }
121 | }
122 | 
123 | inline static void vec2norm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
124 | {
125 |     vecdot(s, x, x, n);
126 |     *s = (lbfgsfloatval_t)sqrt(*s);
127 | }
128 | 
129 | inline static void vec2norminv(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
130 | {
131 |     vec2norm(s, x, n);
132 |     *s = (lbfgsfloatval_t)(1.0 / *s);
133 | }
134 | 


--------------------------------------------------------------------------------
/liblbfgs/arithmetic_sse_double.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *      SSE2 implementation of vector oprations (64bit double).
  3 |  *
  4 |  * Copyright (c) 2007-2010 Naoaki Okazaki
  5 |  * All rights reserved.
  6 |  *
  7 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  8 |  * of this software and associated documentation files (the "Software"), to deal
  9 |  * in the Software without restriction, including without limitation the rights
 10 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the Software is
 12 |  * furnished to do so, subject to the following conditions:
 13 |  *
 14 |  * The above copyright notice and this permission notice shall be included in
 15 |  * all copies or substantial portions of the Software.
 16 |  *
 17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 |  * THE SOFTWARE.
 24 |  */
 25 | 
 26 | /* $Id$ */
 27 | 
 28 | #include <stdlib.h>
 29 | #ifndef __APPLE__
 30 | #include <malloc.h>
 31 | #endif
 32 | #include <memory.h>
 33 | 
 34 | #if     1400 <= _MSC_VER
 35 | #include <intrin.h>
 36 | #endif/*1400 <= _MSC_VER*/
 37 | 
 38 | #if     HAVE_EMMINTRIN_H
 39 | #include <emmintrin.h>
 40 | #endif/*HAVE_EMMINTRIN_H*/
 41 | 
 42 | inline static void* vecalloc(size_t size)
 43 | {
 44 | #if     defined(_MSC_VER)
 45 |     void *memblock = _aligned_malloc(size, 16);
 46 | #elif   defined(__APPLE__)  /* OS X always aligns on 16-byte boundaries */
 47 |     void *memblock = malloc(size);
 48 | #else
 49 |     void *memblock = NULL, *p = NULL;
 50 |     if (posix_memalign(&p, 16, size) == 0) {
 51 |         memblock = p;
 52 |     }
 53 | #endif
 54 |     if (memblock != NULL) {
 55 |         memset(memblock, 0, size);
 56 |     }
 57 |     return memblock;
 58 | }
 59 | 
 60 | inline static void vecfree(void *memblock)
 61 | {
 62 | #ifdef	_MSC_VER
 63 |     _aligned_free(memblock);
 64 | #else
 65 |     free(memblock);
 66 | #endif
 67 | }
 68 | 
 69 | #define fsigndiff(x, y) \
 70 |     ((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002)
 71 | 
 72 | #define vecset(x, c, n) \
 73 | { \
 74 |     int i; \
 75 |     __m128d XMM0 = _mm_set1_pd(c); \
 76 |     for (i = 0;i < (n);i += 8) { \
 77 |         _mm_store_pd((x)+i  , XMM0); \
 78 |         _mm_store_pd((x)+i+2, XMM0); \
 79 |         _mm_store_pd((x)+i+4, XMM0); \
 80 |         _mm_store_pd((x)+i+6, XMM0); \
 81 |     } \
 82 | }
 83 | 
 84 | #define veccpy(y, x, n) \
 85 | { \
 86 |     int i; \
 87 |     for (i = 0;i < (n);i += 8) { \
 88 |         __m128d XMM0 = _mm_load_pd((x)+i  ); \
 89 |         __m128d XMM1 = _mm_load_pd((x)+i+2); \
 90 |         __m128d XMM2 = _mm_load_pd((x)+i+4); \
 91 |         __m128d XMM3 = _mm_load_pd((x)+i+6); \
 92 |         _mm_store_pd((y)+i  , XMM0); \
 93 |         _mm_store_pd((y)+i+2, XMM1); \
 94 |         _mm_store_pd((y)+i+4, XMM2); \
 95 |         _mm_store_pd((y)+i+6, XMM3); \
 96 |     } \
 97 | }
 98 | 
 99 | #define vecncpy(y, x, n) \
100 | { \
101 |     int i; \
102 |     for (i = 0;i < (n);i += 8) { \
103 |         __m128d XMM0 = _mm_setzero_pd(); \
104 |         __m128d XMM1 = _mm_setzero_pd(); \
105 |         __m128d XMM2 = _mm_setzero_pd(); \
106 |         __m128d XMM3 = _mm_setzero_pd(); \
107 |         __m128d XMM4 = _mm_load_pd((x)+i  ); \
108 |         __m128d XMM5 = _mm_load_pd((x)+i+2); \
109 |         __m128d XMM6 = _mm_load_pd((x)+i+4); \
110 |         __m128d XMM7 = _mm_load_pd((x)+i+6); \
111 |         XMM0 = _mm_sub_pd(XMM0, XMM4); \
112 |         XMM1 = _mm_sub_pd(XMM1, XMM5); \
113 |         XMM2 = _mm_sub_pd(XMM2, XMM6); \
114 |         XMM3 = _mm_sub_pd(XMM3, XMM7); \
115 |         _mm_store_pd((y)+i  , XMM0); \
116 |         _mm_store_pd((y)+i+2, XMM1); \
117 |         _mm_store_pd((y)+i+4, XMM2); \
118 |         _mm_store_pd((y)+i+6, XMM3); \
119 |     } \
120 | }
121 | 
122 | #define vecadd(y, x, c, n) \
123 | { \
124 |     int i; \
125 |     __m128d XMM7 = _mm_set1_pd(c); \
126 |     for (i = 0;i < (n);i += 4) { \
127 |         __m128d XMM0 = _mm_load_pd((x)+i  ); \
128 |         __m128d XMM1 = _mm_load_pd((x)+i+2); \
129 |         __m128d XMM2 = _mm_load_pd((y)+i  ); \
130 |         __m128d XMM3 = _mm_load_pd((y)+i+2); \
131 |         XMM0 = _mm_mul_pd(XMM0, XMM7); \
132 |         XMM1 = _mm_mul_pd(XMM1, XMM7); \
133 |         XMM2 = _mm_add_pd(XMM2, XMM0); \
134 |         XMM3 = _mm_add_pd(XMM3, XMM1); \
135 |         _mm_store_pd((y)+i  , XMM2); \
136 |         _mm_store_pd((y)+i+2, XMM3); \
137 |     } \
138 | }
139 | 
140 | #define vecdiff(z, x, y, n) \
141 | { \
142 |     int i; \
143 |     for (i = 0;i < (n);i += 8) { \
144 |         __m128d XMM0 = _mm_load_pd((x)+i  ); \
145 |         __m128d XMM1 = _mm_load_pd((x)+i+2); \
146 |         __m128d XMM2 = _mm_load_pd((x)+i+4); \
147 |         __m128d XMM3 = _mm_load_pd((x)+i+6); \
148 |         __m128d XMM4 = _mm_load_pd((y)+i  ); \
149 |         __m128d XMM5 = _mm_load_pd((y)+i+2); \
150 |         __m128d XMM6 = _mm_load_pd((y)+i+4); \
151 |         __m128d XMM7 = _mm_load_pd((y)+i+6); \
152 |         XMM0 = _mm_sub_pd(XMM0, XMM4); \
153 |         XMM1 = _mm_sub_pd(XMM1, XMM5); \
154 |         XMM2 = _mm_sub_pd(XMM2, XMM6); \
155 |         XMM3 = _mm_sub_pd(XMM3, XMM7); \
156 |         _mm_store_pd((z)+i  , XMM0); \
157 |         _mm_store_pd((z)+i+2, XMM1); \
158 |         _mm_store_pd((z)+i+4, XMM2); \
159 |         _mm_store_pd((z)+i+6, XMM3); \
160 |     } \
161 | }
162 | 
163 | #define vecscale(y, c, n) \
164 | { \
165 |     int i; \
166 |     __m128d XMM7 = _mm_set1_pd(c); \
167 |     for (i = 0;i < (n);i += 4) { \
168 |         __m128d XMM0 = _mm_load_pd((y)+i  ); \
169 |         __m128d XMM1 = _mm_load_pd((y)+i+2); \
170 |         XMM0 = _mm_mul_pd(XMM0, XMM7); \
171 |         XMM1 = _mm_mul_pd(XMM1, XMM7); \
172 |         _mm_store_pd((y)+i  , XMM0); \
173 |         _mm_store_pd((y)+i+2, XMM1); \
174 |     } \
175 | }
176 | 
177 | #define vecmul(y, x, n) \
178 | { \
179 |     int i; \
180 |     for (i = 0;i < (n);i += 8) { \
181 |         __m128d XMM0 = _mm_load_pd((x)+i  ); \
182 |         __m128d XMM1 = _mm_load_pd((x)+i+2); \
183 |         __m128d XMM2 = _mm_load_pd((x)+i+4); \
184 |         __m128d XMM3 = _mm_load_pd((x)+i+6); \
185 |         __m128d XMM4 = _mm_load_pd((y)+i  ); \
186 |         __m128d XMM5 = _mm_load_pd((y)+i+2); \
187 |         __m128d XMM6 = _mm_load_pd((y)+i+4); \
188 |         __m128d XMM7 = _mm_load_pd((y)+i+6); \
189 |         XMM4 = _mm_mul_pd(XMM4, XMM0); \
190 |         XMM5 = _mm_mul_pd(XMM5, XMM1); \
191 |         XMM6 = _mm_mul_pd(XMM6, XMM2); \
192 |         XMM7 = _mm_mul_pd(XMM7, XMM3); \
193 |         _mm_store_pd((y)+i  , XMM4); \
194 |         _mm_store_pd((y)+i+2, XMM5); \
195 |         _mm_store_pd((y)+i+4, XMM6); \
196 |         _mm_store_pd((y)+i+6, XMM7); \
197 |     } \
198 | }
199 | 
200 | 
201 | 
202 | #if     3 <= __SSE__ || defined(__SSE3__)
203 | /*
204 |     Horizontal add with haddps SSE3 instruction. The work register (rw)
205 |     is unused.
206 |  */
207 | #define __horizontal_sum(r, rw) \
208 |     r = _mm_hadd_ps(r, r); \
209 |     r = _mm_hadd_ps(r, r);
210 | 
211 | #else
212 | /*
213 |     Horizontal add with SSE instruction. The work register (rw) is used.
214 |  */
215 | #define __horizontal_sum(r, rw) \
216 |     rw = r; \
217 |     r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
218 |     r = _mm_add_ps(r, rw); \
219 |     rw = r; \
220 |     r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
221 |     r = _mm_add_ps(r, rw);
222 | 
223 | #endif
224 | 
225 | #define vecdot(s, x, y, n) \
226 | { \
227 |     int i; \
228 |     __m128d XMM0 = _mm_setzero_pd(); \
229 |     __m128d XMM1 = _mm_setzero_pd(); \
230 |     __m128d XMM2, XMM3, XMM4, XMM5; \
231 |     for (i = 0;i < (n);i += 4) { \
232 |         XMM2 = _mm_load_pd((x)+i  ); \
233 |         XMM3 = _mm_load_pd((x)+i+2); \
234 |         XMM4 = _mm_load_pd((y)+i  ); \
235 |         XMM5 = _mm_load_pd((y)+i+2); \
236 |         XMM2 = _mm_mul_pd(XMM2, XMM4); \
237 |         XMM3 = _mm_mul_pd(XMM3, XMM5); \
238 |         XMM0 = _mm_add_pd(XMM0, XMM2); \
239 |         XMM1 = _mm_add_pd(XMM1, XMM3); \
240 |     } \
241 |     XMM0 = _mm_add_pd(XMM0, XMM1); \
242 |     XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
243 |     XMM0 = _mm_add_pd(XMM0, XMM1); \
244 |     _mm_store_sd((s), XMM0); \
245 | }
246 | 
247 | #define vec2norm(s, x, n) \
248 | { \
249 |     int i; \
250 |     __m128d XMM0 = _mm_setzero_pd(); \
251 |     __m128d XMM1 = _mm_setzero_pd(); \
252 |     __m128d XMM2, XMM3, XMM4, XMM5; \
253 |     for (i = 0;i < (n);i += 4) { \
254 |         XMM2 = _mm_load_pd((x)+i  ); \
255 |         XMM3 = _mm_load_pd((x)+i+2); \
256 |         XMM4 = XMM2; \
257 |         XMM5 = XMM3; \
258 |         XMM2 = _mm_mul_pd(XMM2, XMM4); \
259 |         XMM3 = _mm_mul_pd(XMM3, XMM5); \
260 |         XMM0 = _mm_add_pd(XMM0, XMM2); \
261 |         XMM1 = _mm_add_pd(XMM1, XMM3); \
262 |     } \
263 |     XMM0 = _mm_add_pd(XMM0, XMM1); \
264 |     XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
265 |     XMM0 = _mm_add_pd(XMM0, XMM1); \
266 |     XMM0 = _mm_sqrt_pd(XMM0); \
267 |     _mm_store_sd((s), XMM0); \
268 | }
269 | 
270 | 
271 | #define vec2norminv(s, x, n) \
272 | { \
273 |     int i; \
274 |     __m128d XMM0 = _mm_setzero_pd(); \
275 |     __m128d XMM1 = _mm_setzero_pd(); \
276 |     __m128d XMM2, XMM3, XMM4, XMM5; \
277 |     for (i = 0;i < (n);i += 4) { \
278 |         XMM2 = _mm_load_pd((x)+i  ); \
279 |         XMM3 = _mm_load_pd((x)+i+2); \
280 |         XMM4 = XMM2; \
281 |         XMM5 = XMM3; \
282 |         XMM2 = _mm_mul_pd(XMM2, XMM4); \
283 |         XMM3 = _mm_mul_pd(XMM3, XMM5); \
284 |         XMM0 = _mm_add_pd(XMM0, XMM2); \
285 |         XMM1 = _mm_add_pd(XMM1, XMM3); \
286 |     } \
287 |     XMM2 = _mm_set1_pd(1.0); \
288 |     XMM0 = _mm_add_pd(XMM0, XMM1); \
289 |     XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
290 |     XMM0 = _mm_add_pd(XMM0, XMM1); \
291 |     XMM0 = _mm_sqrt_pd(XMM0); \
292 |     XMM2 = _mm_div_pd(XMM2, XMM0); \
293 |     _mm_store_sd((s), XMM2); \
294 | }
295 | 


--------------------------------------------------------------------------------
/liblbfgs/arithmetic_sse_float.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *      SSE/SSE3 implementation of vector oprations (32bit float).
  3 |  *
  4 |  * Copyright (c) 2007-2010 Naoaki Okazaki
  5 |  * All rights reserved.
  6 |  *
  7 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  8 |  * of this software and associated documentation files (the "Software"), to deal
  9 |  * in the Software without restriction, including without limitation the rights
 10 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 |  * copies of the Software, and to permit persons to whom the Software is
 12 |  * furnished to do so, subject to the following conditions:
 13 |  *
 14 |  * The above copyright notice and this permission notice shall be included in
 15 |  * all copies or substantial portions of the Software.
 16 |  *
 17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 23 |  * THE SOFTWARE.
 24 |  */
 25 | 
 26 | /* $Id$ */
 27 | 
 28 | #include <stdlib.h>
 29 | #ifndef __APPLE__
 30 | #include <malloc.h>
 31 | #endif
 32 | #include <memory.h>
 33 | 
 34 | #if     1400 <= _MSC_VER
 35 | #include <intrin.h>
 36 | #endif/*_MSC_VER*/
 37 | 
 38 | #if     HAVE_XMMINTRIN_H
 39 | #include <xmmintrin.h>
 40 | #endif/*HAVE_XMMINTRIN_H*/
 41 | 
 42 | #if     LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
 43 | #define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
 44 | #else
 45 | #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
 46 | #endif/*LBFGS_IEEE_FLOAT*/
 47 | 
 48 | inline static void* vecalloc(size_t size)
 49 | {
 50 | #if     defined(_MSC_VER)
 51 |     void *memblock = _aligned_malloc(size, 16);
 52 | #elif   defined(__APPLE__)  /* OS X always aligns on 16-byte boundaries */
 53 |     void *memblock = malloc(size);
 54 | #else
 55 |     void *memblock = NULL, *p = NULL;
 56 |     if (posix_memalign(&p, 16, size) == 0) {
 57 |         memblock = p;
 58 |     }
 59 | #endif
 60 |     if (memblock != NULL) {
 61 |         memset(memblock, 0, size);
 62 |     }
 63 |     return memblock;
 64 | }
 65 | 
 66 | inline static void vecfree(void *memblock)
 67 | {
 68 | #ifdef	_MSC_VER
 69 |     _aligned_free(memblock);
 70 | #else
 71 |     free(memblock);
 72 | #endif
 73 | }
 74 | 
 75 | #define vecset(x, c, n) \
 76 | { \
 77 |     int i; \
 78 |     __m128 XMM0 = _mm_set_ps1(c); \
 79 |     for (i = 0;i < (n);i += 16) { \
 80 |         _mm_store_ps((x)+i   , XMM0); \
 81 |         _mm_store_ps((x)+i+ 4, XMM0); \
 82 |         _mm_store_ps((x)+i+ 8, XMM0); \
 83 |         _mm_store_ps((x)+i+12, XMM0); \
 84 |     } \
 85 | }
 86 | 
 87 | #define veccpy(y, x, n) \
 88 | { \
 89 |     int i; \
 90 |     for (i = 0;i < (n);i += 16) { \
 91 |         __m128 XMM0 = _mm_load_ps((x)+i   ); \
 92 |         __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
 93 |         __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
 94 |         __m128 XMM3 = _mm_load_ps((x)+i+12); \
 95 |         _mm_store_ps((y)+i   , XMM0); \
 96 |         _mm_store_ps((y)+i+ 4, XMM1); \
 97 |         _mm_store_ps((y)+i+ 8, XMM2); \
 98 |         _mm_store_ps((y)+i+12, XMM3); \
 99 |     } \
100 | }
101 | 
102 | #define vecncpy(y, x, n) \
103 | { \
104 |     int i; \
105 |     const uint32_t mask = 0x80000000; \
106 |     __m128 XMM4 = _mm_load_ps1((float*)&mask); \
107 |     for (i = 0;i < (n);i += 16) { \
108 |         __m128 XMM0 = _mm_load_ps((x)+i   ); \
109 |         __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
110 |         __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
111 |         __m128 XMM3 = _mm_load_ps((x)+i+12); \
112 |         XMM0 = _mm_xor_ps(XMM0, XMM4); \
113 |         XMM1 = _mm_xor_ps(XMM1, XMM4); \
114 |         XMM2 = _mm_xor_ps(XMM2, XMM4); \
115 |         XMM3 = _mm_xor_ps(XMM3, XMM4); \
116 |         _mm_store_ps((y)+i   , XMM0); \
117 |         _mm_store_ps((y)+i+ 4, XMM1); \
118 |         _mm_store_ps((y)+i+ 8, XMM2); \
119 |         _mm_store_ps((y)+i+12, XMM3); \
120 |     } \
121 | }
122 | 
123 | #define vecadd(y, x, c, n) \
124 | { \
125 |     int i; \
126 |     __m128 XMM7 = _mm_set_ps1(c); \
127 |     for (i = 0;i < (n);i += 8) { \
128 |         __m128 XMM0 = _mm_load_ps((x)+i  ); \
129 |         __m128 XMM1 = _mm_load_ps((x)+i+4); \
130 |         __m128 XMM2 = _mm_load_ps((y)+i  ); \
131 |         __m128 XMM3 = _mm_load_ps((y)+i+4); \
132 |         XMM0 = _mm_mul_ps(XMM0, XMM7); \
133 |         XMM1 = _mm_mul_ps(XMM1, XMM7); \
134 |         XMM2 = _mm_add_ps(XMM2, XMM0); \
135 |         XMM3 = _mm_add_ps(XMM3, XMM1); \
136 |         _mm_store_ps((y)+i  , XMM2); \
137 |         _mm_store_ps((y)+i+4, XMM3); \
138 |     } \
139 | }
140 | 
141 | #define vecdiff(z, x, y, n) \
142 | { \
143 |     int i; \
144 |     for (i = 0;i < (n);i += 16) { \
145 |         __m128 XMM0 = _mm_load_ps((x)+i   ); \
146 |         __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
147 |         __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
148 |         __m128 XMM3 = _mm_load_ps((x)+i+12); \
149 |         __m128 XMM4 = _mm_load_ps((y)+i   ); \
150 |         __m128 XMM5 = _mm_load_ps((y)+i+ 4); \
151 |         __m128 XMM6 = _mm_load_ps((y)+i+ 8); \
152 |         __m128 XMM7 = _mm_load_ps((y)+i+12); \
153 |         XMM0 = _mm_sub_ps(XMM0, XMM4); \
154 |         XMM1 = _mm_sub_ps(XMM1, XMM5); \
155 |         XMM2 = _mm_sub_ps(XMM2, XMM6); \
156 |         XMM3 = _mm_sub_ps(XMM3, XMM7); \
157 |         _mm_store_ps((z)+i   , XMM0); \
158 |         _mm_store_ps((z)+i+ 4, XMM1); \
159 |         _mm_store_ps((z)+i+ 8, XMM2); \
160 |         _mm_store_ps((z)+i+12, XMM3); \
161 |     } \
162 | }
163 | 
164 | #define vecscale(y, c, n) \
165 | { \
166 |     int i; \
167 |     __m128 XMM7 = _mm_set_ps1(c); \
168 |     for (i = 0;i < (n);i += 8) { \
169 |         __m128 XMM0 = _mm_load_ps((y)+i  ); \
170 |         __m128 XMM1 = _mm_load_ps((y)+i+4); \
171 |         XMM0 = _mm_mul_ps(XMM0, XMM7); \
172 |         XMM1 = _mm_mul_ps(XMM1, XMM7); \
173 |         _mm_store_ps((y)+i  , XMM0); \
174 |         _mm_store_ps((y)+i+4, XMM1); \
175 |     } \
176 | }
177 | 
178 | #define vecmul(y, x, n) \
179 | { \
180 |     int i; \
181 |     for (i = 0;i < (n);i += 16) { \
182 |         __m128 XMM0 = _mm_load_ps((x)+i   ); \
183 |         __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
184 |         __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
185 |         __m128 XMM3 = _mm_load_ps((x)+i+12); \
186 |         __m128 XMM4 = _mm_load_ps((y)+i   ); \
187 |         __m128 XMM5 = _mm_load_ps((y)+i+ 4); \
188 |         __m128 XMM6 = _mm_load_ps((y)+i+ 8); \
189 |         __m128 XMM7 = _mm_load_ps((y)+i+12); \
190 |         XMM4 = _mm_mul_ps(XMM4, XMM0); \
191 |         XMM5 = _mm_mul_ps(XMM5, XMM1); \
192 |         XMM6 = _mm_mul_ps(XMM6, XMM2); \
193 |         XMM7 = _mm_mul_ps(XMM7, XMM3); \
194 |         _mm_store_ps((y)+i   , XMM4); \
195 |         _mm_store_ps((y)+i+ 4, XMM5); \
196 |         _mm_store_ps((y)+i+ 8, XMM6); \
197 |         _mm_store_ps((y)+i+12, XMM7); \
198 |     } \
199 | }
200 | 
201 | 
202 | 
203 | #if     3 <= __SSE__ || defined(__SSE3__)
204 | /*
205 |     Horizontal add with haddps SSE3 instruction. The work register (rw)
206 |     is unused.
207 |  */
208 | #define __horizontal_sum(r, rw) \
209 |     r = _mm_hadd_ps(r, r); \
210 |     r = _mm_hadd_ps(r, r);
211 | 
212 | #else
213 | /*
214 |     Horizontal add with SSE instruction. The work register (rw) is used.
215 |  */
216 | #define __horizontal_sum(r, rw) \
217 |     rw = r; \
218 |     r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
219 |     r = _mm_add_ps(r, rw); \
220 |     rw = r; \
221 |     r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
222 |     r = _mm_add_ps(r, rw);
223 | 
224 | #endif
225 | 
226 | #define vecdot(s, x, y, n) \
227 | { \
228 |     int i; \
229 |     __m128 XMM0 = _mm_setzero_ps(); \
230 |     __m128 XMM1 = _mm_setzero_ps(); \
231 |     __m128 XMM2, XMM3, XMM4, XMM5; \
232 |     for (i = 0;i < (n);i += 8) { \
233 |         XMM2 = _mm_load_ps((x)+i  ); \
234 |         XMM3 = _mm_load_ps((x)+i+4); \
235 |         XMM4 = _mm_load_ps((y)+i  ); \
236 |         XMM5 = _mm_load_ps((y)+i+4); \
237 |         XMM2 = _mm_mul_ps(XMM2, XMM4); \
238 |         XMM3 = _mm_mul_ps(XMM3, XMM5); \
239 |         XMM0 = _mm_add_ps(XMM0, XMM2); \
240 |         XMM1 = _mm_add_ps(XMM1, XMM3); \
241 |     } \
242 |     XMM0 = _mm_add_ps(XMM0, XMM1); \
243 |     __horizontal_sum(XMM0, XMM1); \
244 |     _mm_store_ss((s), XMM0); \
245 | }
246 | 
247 | #define vec2norm(s, x, n) \
248 | { \
249 |     int i; \
250 |     __m128 XMM0 = _mm_setzero_ps(); \
251 |     __m128 XMM1 = _mm_setzero_ps(); \
252 |     __m128 XMM2, XMM3; \
253 |     for (i = 0;i < (n);i += 8) { \
254 |         XMM2 = _mm_load_ps((x)+i  ); \
255 |         XMM3 = _mm_load_ps((x)+i+4); \
256 |         XMM2 = _mm_mul_ps(XMM2, XMM2); \
257 |         XMM3 = _mm_mul_ps(XMM3, XMM3); \
258 |         XMM0 = _mm_add_ps(XMM0, XMM2); \
259 |         XMM1 = _mm_add_ps(XMM1, XMM3); \
260 |     } \
261 |     XMM0 = _mm_add_ps(XMM0, XMM1); \
262 |     __horizontal_sum(XMM0, XMM1); \
263 |     XMM2 = XMM0; \
264 |     XMM1 = _mm_rsqrt_ss(XMM0); \
265 |     XMM3 = XMM1; \
266 |     XMM1 = _mm_mul_ss(XMM1, XMM1); \
267 |     XMM1 = _mm_mul_ss(XMM1, XMM3); \
268 |     XMM1 = _mm_mul_ss(XMM1, XMM0); \
269 |     XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
270 |     XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
271 |     XMM3 = _mm_add_ss(XMM3, XMM1); \
272 |     XMM3 = _mm_mul_ss(XMM3, XMM2); \
273 |     _mm_store_ss((s), XMM3); \
274 | }
275 | 
276 | #define vec2norminv(s, x, n) \
277 | { \
278 |     int i; \
279 |     __m128 XMM0 = _mm_setzero_ps(); \
280 |     __m128 XMM1 = _mm_setzero_ps(); \
281 |     __m128 XMM2, XMM3; \
282 |     for (i = 0;i < (n);i += 16) { \
283 |         XMM2 = _mm_load_ps((x)+i  ); \
284 |         XMM3 = _mm_load_ps((x)+i+4); \
285 |         XMM2 = _mm_mul_ps(XMM2, XMM2); \
286 |         XMM3 = _mm_mul_ps(XMM3, XMM3); \
287 |         XMM0 = _mm_add_ps(XMM0, XMM2); \
288 |         XMM1 = _mm_add_ps(XMM1, XMM3); \
289 |     } \
290 |     XMM0 = _mm_add_ps(XMM0, XMM1); \
291 |     __horizontal_sum(XMM0, XMM1); \
292 |     XMM2 = XMM0; \
293 |     XMM1 = _mm_rsqrt_ss(XMM0); \
294 |     XMM3 = XMM1; \
295 |     XMM1 = _mm_mul_ss(XMM1, XMM1); \
296 |     XMM1 = _mm_mul_ss(XMM1, XMM3); \
297 |     XMM1 = _mm_mul_ss(XMM1, XMM0); \
298 |     XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
299 |     XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
300 |     XMM3 = _mm_add_ss(XMM3, XMM1); \
301 |     _mm_store_ss((s), XMM3); \
302 | }
303 | 


--------------------------------------------------------------------------------
/paper/paper.bib:
--------------------------------------------------------------------------------
  1 | @incollection{bouchard2017,
  2 |     title = {Union of {I}ntersections ({UoI}) for Interpretable Data Driven Discovery and Prediction},
  3 |     author = {Bouchard, Kristofer and Bujan, Alejandro and Roosta-Khorasani, Farbod and Ubaru, Shashanka and Prabhat, Mr. and Snijders, Antoine and Mao, Jian-Hua and Chang, Edward and Mahoney, Michael W and Bhattacharya, Sharmodeep},
  4 |     booktitle = {{Advances in Neural Information Processing Systems 30}},
  5 |     pages = {1078--1086},
  6 |     year = {2017},
  7 | }
  8 | 
  9 | @INPROCEEDINGS{ubaru2017,
 10 |     author = {S. {Ubaru} and K. {Wu} and K. E. {Bouchard}},
 11 |     booktitle = {{2017 16th IEEE International Conference on Machine Learning and Applications (ICMLA)}},
 12 |     title = {{UoI-NMF} Cluster: A Robust Nonnegative Matrix Factorization Algorithm for Improved Parts-Based Decomposition and Reconstruction of Noisy Data},
 13 |     year = {2017},
 14 |     volume = {},
 15 |     number = {},
 16 |     pages = {241-248},
 17 |     doi = "10.1109/ICMLA.2017.0-152"
 18 | }
 19 | 
 20 | @ARTICLE{tibshirani1994,
 21 |     author = {Robert Tibshirani},
 22 |     title = {Regression Shrinkage and Selection Via the Lasso},
 23 |     journal = {Journal of the Royal Statistical Society, Series B},
 24 |     year = {1994},
 25 |     volume = {58},
 26 |     pages = {267--288}
 27 | }
 28 | 
 29 | @article{bickel2006,
 30 |     title={Regularization in statistics},
 31 |     author={Peter J. Bickel and Bo Li and Alexandre B. Tsybakov and Sara A. van de Geer and Bin Yu and Te{\'o}filo Vald{\'e}s and Carlos Rivero and Jianqing Fan and Aad van der Vaart},
 32 |     journal={Test},
 33 |     year={2006},
 34 |     volume={15},
 35 |     pages={271-344},
 36 |     doi="10.1007/BF02607055"
 37 | }
 38 | 
 39 | @inproceedings{sklearn_api,
 40 |   author    = {Lars Buitinck and Gilles Louppe and Mathieu Blondel and
 41 |                Fabian Pedregosa and Andreas Mueller and Olivier Grisel and
 42 |                Vlad Niculae and Peter Prettenhofer and Alexandre Gramfort
 43 |                and Jaques Grobler and Robert Layton and Jake VanderPlas and
 44 |                Arnaud Joly and Brian Holt and Ga{\"{e}}l Varoquaux},
 45 |   title     = {{API} design for machine learning software: experiences from the scikit-learn
 46 |                project},
 47 |   booktitle = {{ECML PKDD Workshop: Languages for Data Mining and Machine Learning}},
 48 |   year      = {2013},
 49 |   pages = {108--122},
 50 | }
 51 | 
 52 | @inproceedings{gong2015,
 53 |     author = {Gong, Pinghua and Ye, Jieping},
 54 |     title = {A Modified Orthant-Wise Limited Memory Quasi-Newton Method with Convergence Analysis},
 55 |     booktitle = {{Proceedings of the 32nd International Conference on International Conference on Machine Learning - Volume 37}},
 56 |     year = {2015},
 57 |     pages = {276--284},
 58 |     numpages = {9},
 59 | }
 60 | 
 61 | @misc{ge2019,
 62 |     author = {Ge, Jason},
 63 |     title = {PICASSO: PathwIse CalibrAted Sparse Shooting algOrithm},
 64 |     year = {2019},
 65 |     publisher = {GitHub},
 66 |     journal = {GitHub repository},
 67 |     howpublished = {\url{https://github.com/jasonge27/picasso}},
 68 | }
 69 | 
 70 | @article {murdoch2019,
 71 |   author = {Murdoch, W. James and Singh, Chandan and Kumbier, Karl and Abbasi-Asl, Reza and Yu, Bin},
 72 |   title = {Definitions, methods, and applications in interpretable machine learning},
 73 |   volume = {116},
 74 |   number = {44},
 75 |   pages = {22071--22080},
 76 |   year = {2019},
 77 |   doi = {10.1073/pnas.1900654116},
 78 |   publisher = {National Academy of Sciences},
 79 |   journal = {Proceedings of the National Academy of Sciences}
 80 | }
 81 | 
 82 | @article{dalcin2005,
 83 |   title={{MPI} for {P}ython},
 84 |   author={Dalc{\'\i}n, Lisandro and Paz, Rodrigo and Storti, Mario},
 85 |   journal={Journal of Parallel and Distributed Computing},
 86 |   volume={65},
 87 |   number={9},
 88 |   pages={1108--1115},
 89 |   year={2005},
 90 |   publisher={Elsevier}
 91 | }
 92 | 
 93 | @article{scikit-learn,
 94 |  title={Scikit-learn: Machine Learning in {P}ython},
 95 |  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
 96 |          and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
 97 |          and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
 98 |          Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
 99 |  journal={Journal of Machine Learning Research},
100 |  volume={12},
101 |  pages={2825--2830},
102 |  year={2011}
103 | }
104 | 
105 | @incollection{akaike1998,
106 |   title={Information theory and an extension of the maximum likelihood principle},
107 |   author={Akaike, Hirotogu},
108 |   booktitle={{Selected papers of Hirotugu Akaike}},
109 |   pages={199--213},
110 |   year={1998},
111 |   publisher={Springer},
112 |   doi="10.1007/978-1-4612-1694-0_15"
113 | }
114 | 
115 | @article{schwarz1978,
116 |   title={Estimating the dimension of a model},
117 |   author={Schwarz},
118 |   journal={The Annals of Statistics},
119 |   volume={6},
120 |   number={2},
121 |   pages={461--464},
122 |   year={1978},
123 |   publisher={Institute of Mathematical Statistics},
124 |   doi="10.1214/aos/1176344136"
125 | }
126 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | # Minimum requirements for the build system to execute.
3 | requires = ["setuptools>=42", "wheel", "numpy", "cython"]
4 | build-backend = "setuptools.build_meta"
5 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | norecursedirs = mpi
3 | markers =
4 |     fast: mark a test as a fast test e.g. unit test
5 |     slow: mark a test as a slow test e.g. end-to-end test
6 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | matplotlib
3 | pytest
4 | flake8
5 | cython
6 | sphinx-gallery
7 | sphinx-rtd-theme
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.14
2 | h5py>=2.8
3 | scikit-learn>=0.24
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | from setuptools import setup, find_packages, Extension
  2 | from setuptools.command.build_ext import build_ext
  3 | from distutils.ccompiler import get_default_compiler
  4 | # To use a consistent encoding
  5 | from codecs import open
  6 | from os import path
  7 | 
  8 | import numpy as np
  9 | 
 10 | 
 11 | here = path.abspath(path.dirname(__file__))
 12 | 
 13 | with open(path.join(here, 'README.md'), encoding='utf-8') as f:
 14 |     long_description = f.read()
 15 | with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f:
 16 |     requirements = f.read().splitlines()
 17 | with open(path.join(here, 'requirements-dev.txt'), encoding='utf-8') as f:
 18 |     dev_requirements = f.read().splitlines()
 19 |     dev_requirements = dev_requirements[1:] # Throw away the first line which is not a package.
 20 | 
 21 | # Prepare lbfgs
 22 | from Cython.Build import cythonize
 23 | 
 24 | class custom_build_ext(build_ext):
 25 |     def finalize_options(self):
 26 |         build_ext.finalize_options(self)
 27 |         if self.compiler is None:
 28 |             compiler = get_default_compiler()
 29 |         else:
 30 |             compiler = self.compiler
 31 | 
 32 |         if compiler == 'msvc':
 33 |             include_dirs.append('compat/win32')
 34 | 
 35 | include_dirs = ['liblbfgs', np.get_include()]
 36 | 
 37 | ext_modules = cythonize(
 38 |     [Extension('pyuoi.lbfgs._lowlevel',
 39 |                ['src/pyuoi/lbfgs/_lowlevel.pyx', 'liblbfgs/lbfgs.c'],
 40 |                include_dirs=include_dirs)])
 41 | 
 42 | 
 43 | setup(
 44 |     name='pyuoi',
 45 | 
 46 |     # Versions should comply with PEP440.  For a discussion on single-sourcing
 47 |     # the version across setup.py and the project code, see
 48 |     # https://packaging.python.org/en/latest/single_source_version.html
 49 |     version='1.1.1',
 50 | 
 51 |     description='The Union of Intersections framework in Python.',
 52 |     long_description=long_description,
 53 |     long_description_content_type="text/markdown",
 54 | 
 55 | 
 56 |     # Author details
 57 |     author='',
 58 |     author_email='',
 59 | 
 60 |     # See https://pypi.python.org/pypi?%3Aaction=list_classifiers
 61 |     classifiers=[
 62 |         # How mature is this project? Common values are
 63 |         #   3 - Alpha
 64 |         #   4 - Beta
 65 |         #   5 - Production/Stable
 66 |         'Development Status :: 4 - Beta',
 67 | 
 68 |         # Indicate who your project is intended for
 69 |         'Intended Audience :: Science/Research',
 70 |         'Topic :: Scientific/Engineering',
 71 | 
 72 |         # Pick your license as you wish (should match "license" above)
 73 |         'License :: OSI Approved :: BSD License',
 74 | 
 75 |         # Specify the Python versions you support here. In particular, ensure
 76 |         # that you indicate whether you support Python 2, Python 3 or both.
 77 |         'Programming Language :: Python :: 3',
 78 |         'Programming Language :: Python :: 3.7',
 79 |         'Programming Language :: Python :: 3.8',
 80 |         'Programming Language :: Python :: 3.9',
 81 |     ],
 82 | 
 83 |     # What does your project relate to?
 84 |     keywords='UoI',
 85 | 
 86 |     # You can just specify the packages manually here if your project is
 87 |     # simple. Or you can use find_packages().
 88 |     package_dir={'': 'src'},
 89 |     packages=find_packages() +
 90 |         find_packages(where="src"),
 91 |     package_data={'pyuoi': ['data/*.h5']},
 92 | 
 93 |     # Alternatively, if you want to distribute just a my_module.py, uncomment
 94 |     # this:
 95 |     #   py_modules=["my_module"],
 96 | 
 97 |     # List run-time dependencies here.  These will be installed by pip when
 98 |     # your project is installed. For an analysis of "install_requires" vs pip's
 99 |     # requirements files see:
100 |     # https://packaging.python.org/en/latest/requirements.html
101 |     install_requires=requirements,
102 | 
103 |     # List additional groups of dependencies here (e.g. development
104 |     # dependencies). You can install these using the following syntax,
105 |     # for example:
106 |     # $ pip install -e .[dev,test]
107 |     extras_require={
108 |         'perf': ['mpi4py', 'pycasso'],
109 |         'dev': dev_requirements
110 |     },
111 | 
112 |     url='https://github.com/BouchardLab/pyuoi',
113 |     ext_modules=ext_modules,
114 |     cmdclass={'build_ext': custom_build_ext}
115 | 
116 | 
117 |     # To provide executable scripts, use entry points in preference to the
118 |     # "scripts" keyword. Entry points provide cross-platform support and allow
119 |     # pip to create the appropriate form of executable for the target platform.
120 |     # entry_points={
121 |     #    'console_scripts': [
122 |     #        'sample=sample:main',
123 |     #    ],
124 |     # },
125 | )
126 | 


--------------------------------------------------------------------------------
/src/pyuoi/__init__.py:
--------------------------------------------------------------------------------
 1 | from .linear_model import UoI_Lasso
 2 | from .linear_model import UoI_ElasticNet
 3 | from .linear_model import UoI_L1Logistic
 4 | from .decomposition import UoI_NMF
 5 | from .decomposition import UoI_CUR
 6 | 
 7 | 
 8 | __all__ = ["UoI_Lasso",
 9 |            "UoI_L1Logistic",
10 |            "UoI_ElasticNet",
11 |            "UoI_NMF",
12 |            "UoI_CUR"]
13 | 
14 | name = "pyuoi"
15 | 


--------------------------------------------------------------------------------
/src/pyuoi/data/Swimmer.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BouchardLab/pyuoi/25e47655a07895f206c2e3ee3b259421c144a05d/src/pyuoi/data/Swimmer.h5


--------------------------------------------------------------------------------
/src/pyuoi/datasets/__init__.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.utils import check_random_state
  3 | 
  4 | from ..utils import softmax, sigmoid
  5 | 
  6 | 
  7 | def load_swimmer(flatten=True):
  8 |     from pkg_resources import resource_filename
  9 |     import h5py
 10 |     with h5py.File(resource_filename('pyuoi', 'data/Swimmer.h5'), 'r+') as f:
 11 |         swimmers = f['Y'][:].astype(float)
 12 |     if flatten:
 13 |         swimmers = swimmers.T.reshape(256, 1024)
 14 |     return swimmers
 15 | 
 16 | 
 17 | def make_linear_regression(n_samples=100, n_features=5, n_informative=2,
 18 |                            X_loc=3., X_scale=1., snr=5.,
 19 |                            beta=None, beta_low=1., beta_high=3.,
 20 |                            include_intercept=False, random_state=None):
 21 |     """Make a Linear regression dataset.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     n_samples : int
 26 |         The number of samples to make.
 27 |     n_features : int
 28 |         The number of feature to use.
 29 |     n_informative : int
 30 |         The number of feature with non-zero weights.
 31 |     X_loc : float
 32 |         The mean of the features in the design matrix.
 33 |     X_scale : float
 34 |         The standard deviation of the features in the design matrix.
 35 |     snr : float
 36 |         The signal-to-noise ratio, which informs the variance of the noise
 37 |         term.
 38 |     beta : np.ndarray or None
 39 |         The beta values to use. If None, beta values will be drawn from a
 40 |         uniform distribution.
 41 |     beta_low : float
 42 |         The lower bound for the beta values.
 43 |     beta_high : float
 44 |         The upper bound for the beta values.
 45 |     include_intercept : bool
 46 |         If true, includes an intercept in the model, if False, the intercept is
 47 |         set to 0.
 48 |     random_state : int, np.random.RandomState instance, or None
 49 |         Random number seed or state.
 50 | 
 51 |     Returns
 52 |     -------
 53 |     X : ndarray, shape (n_samples, n_features)
 54 |         The design matrix.
 55 |     y : ndarray, shape (n_samples,)
 56 |         The response vector.
 57 |     beta : ndarray, shape (n_features,)
 58 |         The feature coefficients.
 59 |     intercept : float
 60 |         The intercept. If include_intercept is False, then intercept is zero.
 61 |     """
 62 |     rng = check_random_state(random_state)
 63 | 
 64 |     # create design matrix
 65 |     X = rng.normal(loc=X_loc,
 66 |                    scale=X_scale,
 67 |                    size=(n_samples, n_features))
 68 | 
 69 |     # create coefficients
 70 |     if beta is None:
 71 |         # draw beta values from gamma distribution
 72 |         beta = rng.uniform(low=beta_low,
 73 |                            high=beta_high,
 74 |                            size=n_features)
 75 | 
 76 |         # choose sparsity mask
 77 |         zero_idx = np.zeros(n_features)
 78 |         zero_idx[:n_informative] = 1
 79 |         rng.shuffle(zero_idx)
 80 |         # randomly assign beta elements to zero
 81 |         beta = beta * zero_idx
 82 | 
 83 |     # create intercept
 84 |     if include_intercept:
 85 |         intercept = rng.uniform(low=beta_low, high=beta_high)
 86 |     else:
 87 |         intercept = 0
 88 | 
 89 |     # draw response variable
 90 |     eta = intercept + np.dot(X, beta)
 91 |     signal_var = np.var(eta)
 92 |     noise_var = signal_var / snr
 93 |     noise = rng.normal(loc=0, scale=np.sqrt(noise_var), size=eta.shape)
 94 |     y = eta + noise
 95 | 
 96 |     return X, y, beta, intercept
 97 | 
 98 | 
 99 | def make_classification(n_samples=100, n_features=20, n_informative=2,
100 |                         n_classes=2, shared_support=False, random_state=None,
101 |                         w_scale=1., include_intercept=False):
102 |     """Make a linear classification dataset.
103 | 
104 |     Parameters
105 |     ----------
106 |     n_samples : int
107 |         The number of samples to make.
108 |     n_features : int
109 |         The number of feature to use.
110 |     n_informative : int
111 |         The number of feature with non-zero weights.
112 |     n_classes : int
113 |         The number of classes.
114 |     shared_support : bool
115 |         If True, all classes will share the same random support. If False, they
116 |         will each have randomly chooses support.
117 |     random_state : int or np.random.RandomState instance
118 |         Random number seed or state.
119 |     w_scale : float
120 |         The model parameter matrix, w, will be drawn from a normal distribution
121 |         with std=w_scale.
122 |     include_intercept : bool
123 |         If true, includes an intercept in the model, if False, the intercept is
124 |         set to 0.
125 |     """
126 |     if isinstance(random_state, int):
127 |         rng = np.random.RandomState(random_state)
128 |     else:
129 |         if random_state is None:
130 |             rng = np.random.RandomState()
131 |         else:
132 |             rng = random_state
133 |     n_not_informative = n_features - n_informative
134 | 
135 |     X = rng.randn(n_samples, n_features)
136 |     X -= X.mean(axis=-1, keepdims=True)
137 |     X /= X.std(axis=-1, keepdims=True)
138 | 
139 |     if n_classes > 2:
140 |         w = rng.randn(n_features, n_classes)
141 |         if include_intercept:
142 |             intercept = rng.randn(1, n_classes)
143 |             intercept -= intercept.max()
144 |         else:
145 |             intercept = np.zeros((1, n_classes))
146 |         if n_not_informative > 0:
147 |             if shared_support:
148 |                 idxs = rng.permutation(n_features)[:n_not_informative]
149 |                 w[idxs] = 0.
150 |             else:
151 |                 for ii in range(n_classes):
152 |                     idxs = rng.permutation(n_features)[:n_not_informative]
153 |                     w[idxs, ii * np.ones_like(idxs, dtype=int)] = 0.
154 |     else:
155 |         w = rng.randn(n_features, 1)
156 |         if include_intercept:
157 |             intercept = rng.randn(1, 1)
158 |         else:
159 |             intercept = np.zeros((1, 1))
160 |         if n_not_informative > 0:
161 |             idxs = rng.permutation(n_features)[:n_not_informative]
162 |             w[idxs] = 0.
163 |     w *= w_scale
164 | 
165 |     log_p = X.dot(w)
166 |     if include_intercept:
167 |         log_p += intercept
168 |     if n_classes > 2:
169 |         p = softmax(log_p)
170 |         y = np.array([rng.multinomial(1, pi) for pi in p])
171 |         y = y.argmax(axis=-1)
172 |     else:
173 |         p = sigmoid(np.squeeze(log_p))
174 |         y = np.array([rng.binomial(1, pi) for pi in p])
175 | 
176 |     return X, y, w.T, intercept
177 | 
178 | 
179 | def make_poisson_regression(n_samples=100, n_features=5, n_informative=2,
180 |                             X_loc=0., X_scale=1. / 8,
181 |                             beta=None, beta_shape=1., beta_scale=3.,
182 |                             include_intercept=False, random_state=None):
183 |     """Make a Poisson regression dataset.
184 | 
185 |     Parameters
186 |     ----------
187 |     n_samples : int
188 |         The number of samples to make.
189 |     n_features : int
190 |         The number of feature to use.
191 |     n_informative : int
192 |         The number of feature with non-zero weights.
193 |     X_loc : float
194 |         The mean of the features in the design matrix.
195 |     X_scale : float
196 |         The standard deviation of the features in the design matrix.
197 |     beta : np.ndarray or None
198 |         The beta values to use. If None, beta values will be drawn from a gamma
199 |         distribution.
200 |     beta_shape : float
201 |         The shape parameter for the beta values.
202 |     beta_scale : float
203 |         The scale parameter for the beta values.
204 |     include_intercept : bool
205 |         If true, includes an intercept in the model, if False, the intercept is
206 |         set to 0.
207 |     random_state : int, np.random.RandomState instance, or None
208 |         Random number seed or state.
209 | 
210 |     Returns
211 |     -------
212 |     X : ndarray, shape (n_samples, n_features)
213 |         The design matrix.
214 |     y : ndarray, shape (n_samples,)
215 |         The response vector.
216 |     beta : ndarray, shape (n_features,)
217 |         The feature coefficients.
218 |     intercept : float
219 |         The intercept. If include_intercept is False, then intercept is zero.
220 |     """
221 |     rng = check_random_state(random_state)
222 | 
223 |     # create design matrix
224 |     X = rng.normal(loc=X_loc,
225 |                    scale=X_scale,
226 |                    size=(n_samples, n_features))
227 | 
228 |     # create coefficients
229 |     if beta is None:
230 |         # draw beta values from gamma distribution
231 |         beta = rng.gamma(shape=beta_shape,
232 |                          scale=beta_scale,
233 |                          size=n_features)
234 |         # choose sparsity mask
235 |         zero_idx = np.zeros(n_features)
236 |         zero_idx[:n_informative] = 1
237 |         rng.shuffle(zero_idx)
238 |         # randomly assign beta elements to zero
239 |         beta = beta * zero_idx
240 | 
241 |     # create intercept
242 |     if include_intercept:
243 |         intercept = rng.gamma(shape=beta_shape, scale=beta_scale)
244 |     else:
245 |         intercept = 0
246 | 
247 |     # draw response variable
248 |     eta = intercept + np.dot(X, beta)
249 |     y = rng.poisson(np.exp(eta))
250 | 
251 |     return X, y, beta, intercept
252 | 


--------------------------------------------------------------------------------
/src/pyuoi/decomposition/__init__.py:
--------------------------------------------------------------------------------
 1 | """Union of Intersection models with matrix decomposition."""
 2 | from .CUR import UoI_CUR
 3 | from .CUR import CUR
 4 | from .NMF import UoI_NMF
 5 | from .NMF import UoI_NMF_Base
 6 | 
 7 | __all__ = ["UoI_CUR",
 8 |            "CUR",
 9 |            "UoI_NMF",
10 |            "UoI_NMF_Base"]
11 | 


--------------------------------------------------------------------------------
/src/pyuoi/decomposition/base.py:
--------------------------------------------------------------------------------
 1 | import abc as _abc
 2 | 
 3 | from sklearn.linear_model._base import BaseEstimator
 4 | 
 5 | 
 6 | class AbstractDecompositionModel(BaseEstimator, metaclass=_abc.ABCMeta):
 7 |     @_abc.abstractmethod
 8 |     def fit(X):
 9 |         """Placeholder for fit. Subclasses should implement this method.
10 |         Fit the model with X.
11 | 
12 |         Parameters
13 |         ----------
14 |         X : array-like, shape (n_samples, n_features)
15 |             Training data.
16 | 
17 |         Returns
18 |         -------
19 |         self : object
20 |             Returns the instance itself.
21 |         """
22 |         pass
23 | 
24 |     @_abc.abstractmethod
25 |     def transform(self, X):
26 |         """Apply dimensionality reduction to X.
27 | 
28 |         Parameters
29 |         ----------
30 |         X : array-like, shape (n_samples, n_features)
31 |             Data matrix to be transformed.
32 | 
33 |         Returns
34 |         -------
35 |         X_new : array-like, shape (n_samples, n_components)
36 |             The transformed data matrix.
37 |         """
38 |         pass
39 | 
40 |     @_abc.abstractmethod
41 |     def fit_transform(self, X):
42 |         """Transform the data X according to the fitted decomposition.
43 | 
44 |         Parameters
45 |         ----------
46 |         X : array-like, shape (n_samples, n_features)
47 |             Data matrix to be decomposed.
48 | 
49 |         Returns
50 |         -------
51 |         X_new : array-like, shape (n_samples, n_components)
52 |             Transformed data.
53 |         """
54 |         pass
55 | 


--------------------------------------------------------------------------------
/src/pyuoi/decomposition/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def column_select(V, c, leverage_sort=False, random_state=None):
  5 |     """Chooses column indices from a matrix given its SVD.
  6 | 
  7 |     Parameters
  8 |     ----------
  9 |     V : ndarray, shape (n_features, rank)
 10 |         The set of singular vectors.
 11 | 
 12 |     c : float
 13 |         The expected number of columns to select.
 14 | 
 15 |     leverage_sort : bool
 16 |         If True, resorts the column indices in increasing order of leverage
 17 |         score. If False, the column indices are sorted normally.
 18 | 
 19 |     Returns
 20 |     -------
 21 |     column_indices : ndarray of ints
 22 |         An array of indices denoting which columns were selected. If
 23 |         leverage_sort was true, this array is arranged by increasing leverage
 24 |         score.
 25 |     """
 26 |     # random state
 27 |     rng = np.random.RandomState(seed=None)
 28 | 
 29 |     # extract number of samples and rank
 30 |     n_features, k = V.shape
 31 | 
 32 |     # calculate normalized leverage score
 33 |     pi = np.sum(V**2, axis=1) / k
 34 | 
 35 |     # iterate through columns
 36 |     column_flags = np.zeros(n_features, dtype=bool)
 37 |     for column in range(n_features):
 38 |         # Mahoney (2009), eqn 3
 39 |         p = min(1, c * pi[column])
 40 |         # selected column randomly
 41 |         column_flags[column] = p > rng.rand()
 42 | 
 43 |     column_indices = np.argwhere(column_flags).ravel()
 44 | 
 45 |     # if desired, sort by increasing leverage score
 46 |     if leverage_sort:
 47 |         pi_subset = pi[column_indices]
 48 |         column_indices = column_indices[np.argsort(pi_subset)]
 49 | 
 50 |     return column_indices
 51 | 
 52 | 
 53 | def stability_selection_to_threshold(stability_selection, n_boots):
 54 |     """Converts user inputted stability selection to an array of
 55 |     thresholds. These thresholds correspond to the number of bootstraps
 56 |     that a feature must appear in to guarantee placement in the selection
 57 |     profile.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     stability_selection : int, float, or array-like
 62 |         If int, treated as the number of bootstraps that a feature must
 63 |         appear in to guarantee placement in selection profile. If float,
 64 |         must be between 0 and 1, and is instead the proportion of
 65 |         bootstraps.
 66 | 
 67 |     n_boots: int
 68 |         The number of bootstraps that will be used for selection
 69 |     """
 70 | 
 71 |     # float, indicating proportion of bootstraps
 72 |     if isinstance(stability_selection, float):
 73 |         selection_threshold = int(stability_selection * n_boots)
 74 | 
 75 |     # int, indicating number of bootstraps
 76 |     elif isinstance(stability_selection, int):
 77 |         selection_threshold = stability_selection
 78 | 
 79 |     else:
 80 |         raise ValueError("Stability selection must be a valid float or int.")
 81 | 
 82 |     # ensure that ensuing list of selection thresholds satisfies
 83 |     # the correct bounds
 84 |     if not (
 85 |         selection_threshold <= n_boots and selection_threshold >= 1
 86 |     ):
 87 |         raise ValueError("Stability selection thresholds must be within "
 88 |                          "the correct bounds.")
 89 | 
 90 |     return selection_threshold
 91 | 
 92 | 
 93 | def dissimilarity(H1, H2):
 94 |     """Calculates the dissimilarity between two sets of NMF bases.
 95 | 
 96 |     Parameters
 97 |     ----------
 98 |     H1 : ndarray, shape (n_components, n_features)
 99 |         First set of bases.
100 | 
101 |     H2 : ndarray, shape (n_components, n_features)
102 |         Second set of bases.
103 | 
104 |     Returns
105 |     -------
106 |     diss : float
107 |         Dissimilarity between the two sets of bases.
108 |     """
109 |     k = H1.shape[0]
110 |     H1 = H1 / np.linalg.norm(H1, axis=1, keepdims=True)
111 |     H2 = H2 / np.linalg.norm(H2, axis=1, keepdims=True)
112 |     C = np.dot(H1, H2.T)
113 |     diss = 1 - ((np.max(C, axis=0).sum() + np.max(C, axis=1).sum()) / (2. * k))
114 |     return diss
115 | 


--------------------------------------------------------------------------------
/src/pyuoi/lbfgs/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright 2011 University of Amsterdam
 4 | Copyright 2011-2012 Lars Buitinck
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a
 7 | copy of this software and associated documentation files (the "Software"),
 8 | to deal in the Software without restriction, including without limitation
 9 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | and/or sell copies of the Software, and to permit persons to whom the
11 | Software is furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in
14 | all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 | THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/src/pyuoi/lbfgs/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | LBFGS and OWL-QN optimization algorithms
  3 | 
  4 | Python wrapper around liblbfgs.
  5 | """
  6 | 
  7 | import warnings
  8 | from ._lowlevel import LBFGS, AllZeroLBFGSError  # noqa: F401
  9 | 
 10 | 
 11 | def fmin_lbfgs(f, x0, progress=None, args=(), orthantwise_c=0,
 12 |                orthantwise_start=0, orthantwise_end=-1, m=10,
 13 |                epsilon=1e-5, past=0, delta=0., max_iterations=0,
 14 |                line_search="default", max_linesearch=20, min_step=1e-20,
 15 |                max_step=1e+20, ftol=1e-4, wolfe=0.9, gtol=0.9, xtol=1e-30):
 16 |     """Minimize a function using LBFGS or OWL-QN
 17 | 
 18 |      Parameters
 19 |     ----------
 20 |     f : callable(x, g, *args)
 21 |         Computes function to minimize and its gradient.
 22 |         Called with the current position x (a numpy.ndarray), a gradient
 23 |         vector g (a numpy.ndarray) to be filled in and *args.
 24 |         Must return the value at x and set the gradient vector g.
 25 | 
 26 |     x0 : array-like
 27 |         Initial values. A copy of this array is made prior to optimization.
 28 | 
 29 |     progress : callable(x, g, fx, xnorm, gnorm, step, k, num_eval, *args),
 30 |                optional
 31 |         If not None, called at each iteration after the call to f with the
 32 |         current values of x, g and f(x), the L2 norms of x and g, the line
 33 |         search step, the iteration number, the number of evaluations at
 34 |         this iteration and args (see below).
 35 |         If the return value from this callable is not 0 and not None,
 36 |         optimization is stopped and LBFGSError is raised.
 37 | 
 38 |     args : sequence
 39 |         Arbitrary list of arguments, passed on to f and progress as *args.
 40 | 
 41 |     orthantwise_c: float, optional (default=0)
 42 |         Coefficient for the L1 norm of variables.
 43 |         This parameter should be set to zero for standard minimization
 44 |         problems. Setting this parameter to a positive value activates
 45 |         Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which
 46 |         minimizes the objective function F(x) combined with the L1 norm |x|
 47 |         of the variables, {F(x) + C |x|}. This parameter is the coefficient
 48 |         for the |x|, i.e., C. As the L1 norm |x| is not differentiable at
 49 |         zero, the library modifies function and gradient evaluations from
 50 |         a client program suitably; a client program thus have only to return
 51 |         the function value F(x) and gradients G(x) as usual. The default value
 52 |         is zero.
 53 | 
 54 |         If orthantwise_c is set, then line_search cannot be the default
 55 |         and must be one of 'armijo', 'wolfe', or 'strongwolfe'.
 56 | 
 57 |     orthantwise_start: int, optional (default=0)
 58 |         Start index for computing L1 norm of the variables.
 59 |         This parameter is valid only for OWL-QN method
 60 |         (i.e., orthantwise_c != 0). This parameter b (0 <= b < N)
 61 |         specifies the index number from which the library computes the
 62 |         L1 norm of the variables x,
 63 |             |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| .
 64 |         In other words, variables x_1, ..., x_{b-1} are not used for
 65 |         computing the L1 norm. Setting b (0 < b < N), one can protect
 66 |         variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic
 67 |         regression) from being regularized. The default value is zero.
 68 | 
 69 |     orthantwise_end: int, optional (default=-1)
 70 |         End index for computing L1 norm of the variables.
 71 |         This parameter is valid only for OWL-QN method
 72 |         (i.e., orthantwise_c != 0). This parameter e (0 < e <= N)
 73 |         specifies the index number at which the library stops computing the
 74 |         L1 norm of the variables x,
 75 | 
 76 |     m: int, optional, default=6
 77 |         The number of corrections to approximate the inverse hessian matrix.
 78 |         The L-BFGS routine stores the computation results of previous `m`
 79 |         iterations to approximate the inverse hessian matrix of the current
 80 |         iteration. This parameter controls the size of the limited memories
 81 |         (corrections). The default value is 6. Values less than 3 are
 82 |         not recommended. Large values will result in excessive computing time.
 83 | 
 84 |     epsilon: float, optional (default=1e-5)
 85 |         Epsilon for convergence test.
 86 |         This parameter determines the accuracy with which the solution is to
 87 |         be found. A minimization terminates when
 88 |             ||g|| < \ref epsilon * max(1, ||x||),
 89 |         where ||.|| denotes the Euclidean (L2) norm. The default value is
 90 |         1e-5.
 91 | 
 92 |     past: int, optional (default=0)
 93 |         Distance for delta-based convergence test.
 94 |         This parameter determines the distance, in iterations, to compute
 95 |         the rate of decrease of the objective function. If the value of this
 96 |         parameter is zero, the library does not perform the delta-based
 97 |         convergence test. The default value is 0.
 98 | 
 99 |     delta: float, optional (default=0.)
100 |         Delta for convergence test.
101 |         This parameter determines the minimum rate of decrease of the
102 |         objective function. The library stops iterations when the
103 |         following condition is met:
104 |             (f' - f) / f < delta,
105 |         where f' is the objective value of `past` iterations ago, and f is
106 |         the objective value of the current iteration.
107 |         The default value is 0.
108 | 
109 |     max_iterations: int, optional (default=0)
110 |         The maximum number of iterations. Setting this parameter to zero
111 |         continues an optimization process until a convergence or error. The
112 |         default value is 0.
113 | 
114 |     line_search: str, optional (default="default")
115 |         The line search algorithm.
116 |         This parameter specifies a line search algorithm to be used by the
117 |         L-BFGS routine. Possible values are:
118 | 
119 |         - 'default': same as 'morethuente'
120 |         - 'morethuente': Method proposed by More and Thuente
121 |         - 'armijo': backtracking with Armijo's conditions
122 |         - 'wolfe': backtracking with Wolfe's conditions
123 |         - 'strongwolfe': backtracking with strong Wolfe's conditions
124 | 
125 |     max_linesearch: int, optional (default=20)
126 |         The maximum number of trials for the line search.
127 |         This parameter controls the number of function and gradients evaluations
128 |         per iteration for the line search routine. The default value is 20.
129 | 
130 |     min_step: float, optional (default=1e-20)
131 |         The minimum step of the line search routine.
132 |         The default value is 1e-20. This value need not be modified unless
133 |         the exponents are too large for the machine being used, or unless the
134 |         problem is extremely badly scaled (in which case the exponents should
135 |         be increased).
136 | 
137 |     max_step: float, optional (default=1e20)
138 |         The maximum step of the line search.
139 |         The default value is 1e+20. This value need not be modified unless
140 |         the exponents are too large for the machine being used, or unless the
141 |         problem is extremely badly scaled (in which case the exponents should
142 |         be increased).
143 | 
144 |     ftol: float, optional (default=1e-4)
145 |         A parameter to control the accuracy of the line search routine.
146 |         The default value is 1e-4. This parameter should be greater
147 |         than zero and smaller than 0.5.
148 | 
149 |     wolfe: float, optional (default=0.9)
150 |         A coefficient for the Wolfe condition. This parameter is valid only
151 |         when the backtracking line-search algorithm is used with the Wolfe
152 |         condition (`line_search='wolfe'` or `line_search='strongwolfe'`),
153 |         The default value is 0.9. This parameter should be greater
154 |         the `ftol` parameter and smaller than 1.0.
155 | 
156 |     gtol: float, optional (default=0.9)
157 |         A parameter to control the accuracy of the line search routine.
158 |         The default value is 0.9. If the function and gradient
159 |         evaluations are inexpensive with respect to the cost of the
160 |         iteration (which is sometimes the case when solving very large
161 |         problems) it may be advantageous to set this parameter to a small
162 |         value. A typical small value is 0.1. This parameter should be
163 |         greater than the ftol parameter (1e-4) and smaller than
164 |         1.0.
165 | 
166 | 
167 |     xtol: float, optional (default=1e-30)
168 |         The machine precision for floating-point values.
169 |         This parameter must be a positive value set by a client program to
170 |         estimate the machine precision. The line search routine will terminate
171 |         with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width
172 |         of the interval of uncertainty is less than this parameter.
173 | 
174 | 
175 |     """
176 | 
177 |     # Input validation to make sure defaults with OWL-QN are adapted correctly
178 |     assert orthantwise_c >= 0, "Orthantwise_c cannot be negative"
179 | 
180 |     if orthantwise_c > 0 and line_search not in ['wolfe', 'default']:
181 |         line_search = 'wolfe'
182 |         warnings.warn("When using OWL-QN, 'wolfe' is the only valid "
183 |                       + "line_search. line_search has been set to 'wolfe'.")
184 |     elif orthantwise_c > 0 and line_search == 'default':
185 |         line_search = 'wolfe'
186 | 
187 |     opt = LBFGS()
188 |     opt.orthantwise_c = orthantwise_c
189 |     opt.orthantwise_start = orthantwise_start
190 |     opt.orthantwise_end = orthantwise_end
191 |     opt.m = m
192 |     opt.epsilon = epsilon
193 |     opt.past = past
194 |     opt.delta = delta
195 |     opt.max_iterations = max_iterations
196 |     opt.linesearch = line_search
197 |     opt.max_linesearch = max_linesearch
198 |     opt.min_step = min_step
199 |     opt.max_step = max_step
200 |     opt.ftol = ftol
201 |     opt.wolfe = wolfe
202 |     opt.gtol = gtol
203 |     opt.xtol = xtol
204 | 
205 |     return opt.minimize(f, x0, progress=progress, args=args)
206 | 


--------------------------------------------------------------------------------
/src/pyuoi/linear_model/__init__.py:
--------------------------------------------------------------------------------
 1 | """Union of Intersection models with linear selection and estimation.
 2 | 
 3 | Provides both abstract base classes for creating user-defined UoI models
 4 | and several concrete implementations.
 5 | """
 6 | from .base import (AbstractUoILinearModel, AbstractUoILinearRegressor,
 7 |                    AbstractUoIGeneralizedLinearRegressor)
 8 | from .lasso import UoI_Lasso
 9 | from .elasticnet import UoI_ElasticNet
10 | from .logistic import MaskedCoefLogisticRegression, UoI_L1Logistic
11 | from .poisson import Poisson, UoI_Poisson
12 | 
13 | __all__ = ["AbstractUoILinearModel",
14 |            "AbstractUoILinearRegressor",
15 |            "AbstractUoIGeneralizedLinearRegressor",
16 |            "MaskedCoefLogisticRegression",
17 |            "UoI_L1Logistic",
18 |            "UoI_Lasso",
19 |            "UoI_ElasticNet",
20 |            "Poisson",
21 |            "UoI_Poisson"]
22 | 


--------------------------------------------------------------------------------
/src/pyuoi/linear_model/elasticnet.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .base import AbstractUoILinearRegressor
  4 | 
  5 | from sklearn.linear_model import LinearRegression
  6 | from sklearn.linear_model._coordinate_descent import _alpha_grid
  7 | from sklearn.linear_model import ElasticNet
  8 | 
  9 | 
 10 | class UoI_ElasticNet(AbstractUoILinearRegressor, LinearRegression):
 11 |     r"""UoI\ :sub:`ElasticNet` solver.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     n_boots_sel : int
 16 |         The number of data bootstraps to use in the selection module.
 17 |         Increasing this number will make selection more strict.
 18 |     n_boots_est : int
 19 |         The number of data bootstraps to use in the estimation module.
 20 |         Increasing this number will relax selection and decrease variance.
 21 |     selection_frac : float
 22 |         The fraction of the dataset to use for training in each resampled
 23 |         bootstrap, during the selection module. Small values of this parameter
 24 |         imply larger "perturbations" to the dataset.
 25 |     estimation_frac : float
 26 |         The fraction of the dataset to use for training in each resampled
 27 |         bootstrap, during the estimation module. The remaining data is used
 28 |         to obtain validation scores. Small values of this parameters imply
 29 |         larger "perturbations" to the dataset. IGNORED - Leaving this here
 30 |         to double check later
 31 |     n_lambdas : int
 32 |         The number of regularization values to use for selection.
 33 |     alphas : list or ndarray
 34 |         The parameter that trades off L1 versus L2 regularization for a given
 35 |         lambda.
 36 |     stability_selection : int, float, or array-like
 37 |         If int, treated as the number of bootstraps that a feature must
 38 |         appear in to guarantee placement in selection profile. If float,
 39 |         must be between 0 and 1, and is instead the proportion of
 40 |         bootstraps. If array-like, must consist of either ints or floats
 41 |         between 0 and 1. In this case, each entry in the array-like object
 42 |         will act as a separate threshold for placement in the selection
 43 |         profile.
 44 |     estimation_score : string, "r2" | "AIC" | "AICc" | "BIC"
 45 |         Objective used to choose the best estimates per bootstrap.
 46 |     estimation_target : string, "train" | "test"
 47 |         Decide whether to assess the estimation_score on the train
 48 |         or test data across each bootstrap. By default, a sensible
 49 |         choice is made based on the chosen estimation_score.
 50 |     warm_start : bool
 51 |         When set to ``True``, reuse the solution of the previous call to fit as
 52 |         initialization, otherwise, just erase the previous solution
 53 |     eps : float
 54 |         Length of the lasso path. ``eps=1e-3`` means that
 55 |         ``alpha_min / alpha_max = 1e-3``.
 56 |     copy_X : bool
 57 |         If ``True``, X will be copied; else, it may be overwritten.
 58 |     fit_intercept : bool
 59 |         Whether to calculate the intercept for this model. If set
 60 |         to False, no intercept will be used in calculations
 61 |         (e.g. data is expected to be already centered).
 62 |     standardize : bool
 63 |         If True, the regressors X will be standardized before regression by
 64 |         subtracting the mean and dividing by their standard deviations.
 65 |     max_iter : int
 66 |         Maximum number of iterations for iterative fitting methods.
 67 |     tol : float
 68 |         Stopping criteria for solver.
 69 |     random_state : int, RandomState instance, or None
 70 |         The seed of the pseudo random number generator that selects a random
 71 |         feature to update.  If int, random_state is the seed used by the random
 72 |         number generator; If RandomState instance, random_state is the random
 73 |         number generator; If None, the random number generator is the
 74 |         RandomState instance used by `np.random`.
 75 |     comm : MPI communicator
 76 |         If passed, the selection and estimation steps are parallelized.
 77 |     logger : Logger
 78 |         The logger to use for messages when ``verbose=True`` in ``fit``.
 79 |         If *None* is passed, a logger that writes to ``sys.stdout`` will be
 80 |         used.
 81 | 
 82 |     Attributes
 83 |     ----------
 84 |     coef_ : array, shape (n_features,) or (n_targets, n_features)
 85 |         Estimated coefficients for the linear regression problem.
 86 |     intercept_ : float
 87 |         Independent term in the linear model.
 88 |     supports_ : ndarray, shape (n_supports, n_features)
 89 |         Boolean array indicating whether a given regressor (column) is selected
 90 |         for estimation for a given regularization parameter value (row).
 91 |     """
 92 |     def __init__(self, n_boots_sel=24, n_boots_est=24, selection_frac=0.9,
 93 |                  estimation_frac=0.9, n_lambdas=48,
 94 |                  alphas=np.array([0.5]), stability_selection=1.,
 95 |                  estimation_score='r2', estimation_target=None,
 96 |                  warm_start=True, eps=1e-3, copy_X=True,
 97 |                  fit_intercept=True, standardize=True,
 98 |                  max_iter=1000, tol=1e-4, random_state=None,
 99 |                  comm=None, logger=None):
100 |         super(UoI_ElasticNet, self).__init__(
101 |             n_boots_sel=n_boots_sel,
102 |             n_boots_est=n_boots_est,
103 |             selection_frac=selection_frac,
104 |             estimation_frac=estimation_frac,
105 |             stability_selection=stability_selection,
106 |             estimation_score=estimation_score,
107 |             estimation_target=estimation_target,
108 |             copy_X=copy_X,
109 |             fit_intercept=fit_intercept,
110 |             standardize=standardize,
111 |             random_state=random_state,
112 |             comm=comm,
113 |             max_iter=max_iter,
114 |             tol=tol,
115 |             logger=logger)
116 |         self.n_lambdas = n_lambdas
117 |         self.alphas = alphas
118 |         self.n_alphas = len(alphas)
119 |         self.warm_start = warm_start
120 |         self.eps = eps
121 |         self.lambdas = None
122 |         self._selection_lm = ElasticNet(
123 |             fit_intercept=fit_intercept,
124 |             max_iter=max_iter,
125 |             tol=tol,
126 |             copy_X=copy_X,
127 |             warm_start=warm_start,
128 |             random_state=random_state)
129 |         self._estimation_lm = LinearRegression(fit_intercept=fit_intercept)
130 | 
131 |     def get_reg_params(self, X, y):
132 |         r"""Calculates the regularization parameters (alpha and lambda) to be
133 |         used for the provided data.
134 | 
135 |         Note that the Elastic Net penalty is given by
136 | 
137 |         .. math::
138 |            \frac{1}{2\ \text{n_samples}} ||y - Xb||^2_2
139 |            + \lambda (\alpha |b|_1 + 0.5 (1 - \alpha) |b|^2_2)
140 | 
141 |         where lambda and alpha are regularization parameters.
142 | 
143 |         ``scikit-learn`` does not use these names. Instead, ``scitkit-learn``
144 |         denotes alpha by 'l1_ratio' and lambda by 'alpha'.
145 | 
146 |         Parameters
147 |         ----------
148 |         X : array-like, shape (n_samples, n_features)
149 |             The design matrix.
150 | 
151 |         y : array-like, shape (n_samples)
152 |             The response vector.
153 | 
154 |         Returns
155 |         -------
156 |         reg_params : a list of dictionaries
157 |             A list containing dictionaries with the value of each
158 |             (lambda, alpha) describing the type of regularization to impose.
159 |             The keys adhere to scikit-learn's terminology (lambda->alpha,
160 |             alpha->l1_ratio). This allows easy passing into the ElasticNet
161 |             object.
162 |         """
163 |         if self.lambdas is None:
164 |             self.lambdas = np.zeros((self.n_alphas, self.n_lambdas))
165 |             # a set of lambdas are generated for each alpha value (l1_ratio in
166 |             # sci-kit learn parlance)
167 |             for alpha_idx, alpha in enumerate(self.alphas):
168 |                 self.lambdas[alpha_idx, :] = _alpha_grid(
169 |                     X=X, y=y,
170 |                     l1_ratio=alpha,
171 |                     fit_intercept=self.fit_intercept,
172 |                     eps=self.eps,
173 |                     n_alphas=self.n_lambdas)
174 | 
175 |         # place the regularization parameters into a list of dictionaries
176 |         reg_params = list()
177 |         for alpha_idx, alpha in enumerate(self.alphas):
178 |             for lamb_idx, lamb in enumerate(self.lambdas[alpha_idx]):
179 |                 # reset the regularization parameter
180 |                 reg_params.append(dict(alpha=lamb, l1_ratio=alpha))
181 | 
182 |         return reg_params
183 | 


--------------------------------------------------------------------------------
/src/pyuoi/linear_model/lasso.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from sklearn.exceptions import NotFittedError
  4 | from sklearn.linear_model import Lasso, LinearRegression
  5 | from sklearn.linear_model._coordinate_descent import _alpha_grid
  6 | try:
  7 |     import pycasso
  8 | except ImportError:
  9 |     pycasso = None
 10 | 
 11 | from .base import AbstractUoILinearRegressor
 12 | 
 13 | 
 14 | class PycLasso():
 15 |     """Lasso using the pycasso solver. Solves for an entire regularization path
 16 |     at once.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     alphas : nd-array
 21 |         The regularization path. Defaults to None for compatibility with UoI,
 22 |         but needs to be set prior to fitting.
 23 |     fit_intercept : bool
 24 |         Whether to calculate the intercept for this model. If set to ``False``,
 25 |         no intercept will be used in calculations.
 26 |     max_iter : int
 27 |         Maximum number of iterations for pycasso solver.
 28 |     tol : float
 29 |         Stopping criteria for solver.
 30 | 
 31 |     Attributes
 32 |     ----------
 33 |     coef_ : ndarray, shape (n_features,) or (n_targets, n_features)
 34 |         Estimated coefficients for the linear regression problem.
 35 |     intercept_ : float
 36 |         Independent term in the linear model.
 37 |     """
 38 |     def __init__(self, alphas=None, fit_intercept=True, max_iter=1000,
 39 |                  tol=1e-4):
 40 |         if fit_intercept is False:
 41 |             string = ('There is currently a bug in picasso that prevents ' +
 42 |                       'its use with `fit_intercept=False.`' +
 43 |                       'See https://github.com/jasonge27/picasso/' +
 44 |                       'issues/25 for resolution.')
 45 |             raise ValueError(string)
 46 |         self.max_iter = max_iter
 47 |         self.tol = tol
 48 |         self.fit_intercept = fit_intercept
 49 |         self.alphas = alphas
 50 | 
 51 |         # Flag to prevent us from predicting before fitting
 52 |         self.isfitted = False
 53 |         self.tol = tol
 54 | 
 55 |     def set_params(self, **kwargs):
 56 |         """Sets the parameters of this estimator."""
 57 |         _valid_params = ['alphas', 'fit_intercept', 'max_iter']
 58 | 
 59 |         for key, value in kwargs.items():
 60 |             if key in _valid_params:
 61 |                 setattr(self, key, value)
 62 |             else:
 63 |                 raise ValueError('Invalid parameter %s' % key)
 64 | 
 65 |     def predict(self, X):
 66 |         """Predicts responses given a design matrix.
 67 | 
 68 |         Parameters
 69 |         ----------
 70 |         X : ndarray, (n_samples, n_features)
 71 |             The design matrix.
 72 | 
 73 |         Returns
 74 |         -------
 75 |         y : ndarray, shape (n_samples,)
 76 |             Predicted response vector.
 77 |         """
 78 |         if self.isfitted:
 79 |             return np.matmul(X, self.coef_.T) + self.intercept_
 80 |         else:
 81 |             raise NotFittedError('Estimator is not fit.')
 82 | 
 83 |     def fit(self, X, y):
 84 |         """Fit data according to the pycasso object.
 85 | 
 86 |         Parameters
 87 |         ----------
 88 |         X : ndarray, (n_samples, n_features)
 89 |             The design matrix.
 90 |         y : ndarray, shape (n_samples,)
 91 |             Response vector. Will be cast to X's dtype if necessary.
 92 |             Currently, this implementation does not handle multiple response
 93 |             variables.
 94 |         """
 95 |         if self.alphas is None:
 96 |             raise Exception('Set alphas before fitting.')
 97 |         if self.fit_intercept is False:
 98 |             string = ('There is currently a bug in picasso that prevents ' +
 99 |                       'its use with `fit_intercept=False.`' +
100 |                       'See https://github.com/jasonge27/picasso/' +
101 |                       'issues/25 for resolution.')
102 |             raise ValueError(string)
103 | 
104 |         self.solver = pycasso.Solver(X, y, family='gaussian',
105 |                                      useintercept=self.fit_intercept,
106 |                                      lambdas=self.alphas,
107 |                                      penalty='l1',
108 |                                      max_ite=self.max_iter,
109 |                                      prec=self.tol)
110 |         self.solver.train()
111 |         # Coefs across the entire solution path
112 |         self.coef_ = self.solver.result['beta']
113 |         self.intercept_ = self.solver.result['intercept']
114 |         self.isfitted = True
115 |         return self
116 | 
117 | 
118 | class UoI_Lasso(AbstractUoILinearRegressor, LinearRegression):
119 |     r"""UoI\ :sub:`Lasso` solver.
120 | 
121 |     Parameters
122 |     ----------
123 |     n_boots_sel : int
124 |         The number of data bootstraps/resamples to use in the selection module.
125 |         Increasing this number will make selection more strict.
126 |     n_boots_est : int
127 |         The number of data bootstraps/resamples to use in the estimation
128 |         module. Increasing this number will relax selection and decrease
129 |         variance.
130 |     n_lambdas : int
131 |         The number of regularization values to use for selection.
132 |     selection_frac : float
133 |         The fraction of the dataset to use for training in each resampled
134 |         bootstrap, during the selection module. Small values of this parameter
135 |         imply larger "perturbations" to the dataset.
136 |     estimation_frac : float
137 |         The fraction of the dataset to use for training in each resampled
138 |         bootstrap, during the estimation module. The remaining data is used
139 |         to obtain validation scores. Small values of this parameters imply
140 |         larger "perturbations" to the dataset.
141 |     stability_selection : int, float, or array-like
142 |         If int, treated as the number of bootstraps that a feature must
143 |         appear in order to guarantee placement in selection profile. If float,
144 |         must be between 0 and 1, and is instead the proportion of
145 |         bootstraps. If array-like, must consist of either ints or floats
146 |         between 0 and 1. In this case, each entry in the array-like object
147 |         will act as a separate threshold for placement in the selection
148 |         profile.
149 |     estimation_score : string, "r2" | "AIC" | "AICc" | "BIC"
150 |         Objective used to choose the best estimates per bootstrap.
151 |     estimation_target : string, "train" | "test"
152 |         Decide whether to assess the estimation_score on the train
153 |         or test data across each bootstrap. By deafult, a sensible
154 |         choice is made based on the chosen estimation_score
155 |     warm_start : bool
156 |         When set to ``True``, reuse the solution of the previous call to fit as
157 |         initialization, otherwise, just erase the previous solution
158 |     eps : float
159 |         Length of the lasso path. ``eps=1e-3`` means that
160 |         ``lambda_min / lambda_max = 1e-3``
161 |     copy_X : bool
162 |         If ``True``, X will be copied; else, it may be overwritten.
163 |     fit_intercept : bool
164 |         Whether to calculate the intercept for this model. If set
165 |         to False, no intercept will be used in calculations
166 |         (e.g. data is expected to be already centered).
167 |     standardize : bool
168 |         If True, the regressors X will be standardized before regression by
169 |         subtracting the mean and dividing by their standard deviations. This
170 |         parameter is equivalent to ``normalize`` in ``scikit-learn`` models.
171 |     max_iter : int
172 |         Maximum number of iterations for iterative fitting methods.
173 |     tol : float
174 |         Stopping criteria for solver.
175 |     random_state : int, RandomState instance, or None
176 |         The seed of the pseudo random number generator that selects a random
177 |         feature to update.  If int, random_state is the seed used by the random
178 |         number generator; If RandomState instance, random_state is the random
179 |         number generator; If None, the random number generator is the
180 |         RandomState instance used by ``np.random``.
181 |     comm : MPI communicator
182 |         If passed, the selection and estimation steps are parallelized.
183 |     logger : Logger
184 |         The logger to use for messages when ``verbose=True`` in ``fit``.
185 |         If *None* is passed, a logger that writes to ``sys.stdout`` will be
186 |         used.
187 |     solver : string, 'cd' | 'pyc'
188 |         If cd, will use the ``scikit-learn`` lasso implementation (via
189 |         coordinate descent). If pyc, will use pyclasso, built off of the
190 |         pycasso path-wise solver.
191 | 
192 | 
193 |     Attributes
194 |     ----------
195 |     coef_ : nd-array, shape (n_features,) or (n_targets, n_features)
196 |         Estimated coefficients for the linear regression problem.
197 |     intercept_ : float
198 |         Independent term in the linear model.
199 |     supports_ : array, shape
200 |         boolean array indicating whether a given regressor (column) is selected
201 |         for estimation for a given regularization parameter value (row).
202 |     """
203 |     def __init__(self, n_boots_sel=24, n_boots_est=24, selection_frac=0.9,
204 |                  estimation_frac=0.9, n_lambdas=48, stability_selection=1.,
205 |                  estimation_score='r2', estimation_target=None, eps=1e-3,
206 |                  warm_start=True, copy_X=True, fit_intercept=True,
207 |                  standardize=True, max_iter=1000, tol=1e-4, random_state=None,
208 |                  comm=None, logger=None, solver='cd'):
209 |         super(UoI_Lasso, self).__init__(
210 |             n_boots_sel=n_boots_sel,
211 |             n_boots_est=n_boots_est,
212 |             selection_frac=selection_frac,
213 |             estimation_frac=estimation_frac,
214 |             estimation_target=estimation_target,
215 |             stability_selection=stability_selection,
216 |             copy_X=copy_X,
217 |             fit_intercept=fit_intercept,
218 |             standardize=standardize,
219 |             random_state=random_state,
220 |             comm=comm,
221 |             estimation_score=estimation_score,
222 |             max_iter=max_iter,
223 |             tol=tol,
224 |             logger=logger)
225 |         self.n_lambdas = n_lambdas
226 |         self.eps = eps
227 |         self.solver = solver
228 |         self.tol = tol
229 | 
230 |         if solver == 'cd':
231 |             self._selection_lm = Lasso(
232 |                 max_iter=max_iter,
233 |                 tol=tol,
234 |                 warm_start=warm_start,
235 |                 random_state=random_state,
236 |                 fit_intercept=fit_intercept)
237 |         elif solver == 'pyc':
238 |             if pycasso is None:
239 |                 raise ImportError('pycasso is not installed.')
240 |             self._selection_lm = PycLasso(
241 |                 fit_intercept=fit_intercept,
242 |                 max_iter=max_iter,
243 |                 tol=tol)
244 | 
245 |         self._estimation_lm = LinearRegression(fit_intercept=fit_intercept)
246 | 
247 |     def get_reg_params(self, X, y):
248 |         alphas = _alpha_grid(
249 |             X=X, y=y,
250 |             l1_ratio=1.0,
251 |             fit_intercept=self.fit_intercept,
252 |             eps=self.eps,
253 |             n_alphas=self.n_lambdas)
254 | 
255 |         return [{'alpha': a} for a in alphas]
256 | 
257 |     def uoi_selection_sweep(self, X, y, reg_param_values):
258 |         """Overwrite base class selection sweep to accommodate pycasso
259 |         path-wise solution"""
260 | 
261 |         if self.solver == 'pyc':
262 |             alphas = np.array([reg_param['alpha']
263 |                                for reg_param in reg_param_values])
264 |             self._selection_lm.set_params(alphas=alphas)
265 |             self._selection_lm.fit(X, y)
266 | 
267 |             return self._selection_lm.coef_
268 |         else:
269 |             return super(UoI_Lasso, self).uoi_selection_sweep(X, y,
270 |                                                               reg_param_values)
271 | 


--------------------------------------------------------------------------------
/src/pyuoi/linear_model/scikit-learn_license:
--------------------------------------------------------------------------------
 1 | Portions of logistic.py including
 2 | MaskedCoefLogisticRegression,
 3 | _logistic_regression_path,
 4 | _intercept_dot,
 5 | _logistic_loss_and_grad, and
 6 | _multinomial_loss_grad
 7 | are based on code from scikit-learn. The scikit-learn license is below.
 8 | 
 9 | ---------------
10 | New BSD License
11 | 
12 | Copyright (c) 2007–2019 The scikit-learn developers.
13 | All rights reserved.
14 | 
15 | 
16 | Redistribution and use in source and binary forms, with or without
17 | modification, are permitted provided that the following conditions are met:
18 | 
19 |   a. Redistributions of source code must retain the above copyright notice,
20 |      this list of conditions and the following disclaimer.
21 |   b. Redistributions in binary form must reproduce the above copyright
22 |      notice, this list of conditions and the following disclaimer in the
23 |      documentation and/or other materials provided with the distribution.
24 |   c. Neither the name of the Scikit-learn Developers  nor the names of
25 |      its contributors may be used to endorse or promote products
26 |      derived from this software without specific prior written
27 |      permission.
28 | 
29 | 
30 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
31 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
34 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
36 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
37 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
40 | DAMAGE.
41 | 


--------------------------------------------------------------------------------
/src/pyuoi/linear_model/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def stability_selection_to_threshold(stability_selection, n_boots):
  5 |     """Converts user inputted stability selection to an array of
  6 |     thresholds. These thresholds correspond to the number of bootstraps
  7 |     that a feature must appear in to guarantee placement in the selection
  8 |     profile.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     stability_selection : int, float, or array-like
 13 |         If int, treated as the number of bootstraps that a feature must
 14 |         appear in to guarantee placement in selection profile. If float,
 15 |         must be between 0 and 1, and is instead the proportion of
 16 |         bootstraps. If array-like, must consist of either ints or floats
 17 |         between 0 and 1. In this case, each entry in the array-like object
 18 |         will act as a separate threshold for placement in the selection
 19 |         profile.
 20 | 
 21 |     n_boots: int
 22 |         The number of bootstraps that will be used for selection
 23 |     """
 24 | 
 25 |     # single float, indicating proportion of bootstraps
 26 |     if isinstance(stability_selection, float):
 27 |         selection_thresholds = np.array([int(
 28 |             stability_selection * n_boots
 29 |         )])
 30 | 
 31 |     # single int, indicating number of bootstraps
 32 |     elif isinstance(stability_selection, int):
 33 |         selection_thresholds = np.array([int(
 34 |             stability_selection
 35 |         )])
 36 | 
 37 |     # list, to be converted into numpy array
 38 |     elif isinstance(stability_selection, list):
 39 |         # list of floats
 40 |         if all(isinstance(idx, float) for idx in stability_selection):
 41 |             selection_thresholds = \
 42 |                 n_boots * np.array(stability_selection)
 43 | 
 44 |         # list of ints
 45 |         elif all(isinstance(idx, int) for idx in stability_selection):
 46 |             selection_thresholds = np.array(stability_selection)
 47 | 
 48 |         else:
 49 |             raise ValueError("Stability selection list must consist of "
 50 |                              "floats or ints.")
 51 | 
 52 |     # numpy array
 53 |     elif isinstance(stability_selection, np.ndarray):
 54 |         # np array of floats
 55 |         if np.issubdtype(stability_selection.dtype.type, np.floating):
 56 |             selection_thresholds = n_boots * stability_selection
 57 | 
 58 |         # np array of ints
 59 |         elif np.issubdtype(stability_selection.dtype.type, np.integer):
 60 |             selection_thresholds = stability_selection
 61 | 
 62 |         else:
 63 |             raise ValueError("Stability selection array must consist of "
 64 |                              "floats or ints.")
 65 | 
 66 |     else:
 67 |         raise ValueError("Stability selection must be a valid float, int "
 68 |                          "or array.")
 69 | 
 70 |     # ensure that ensuing list of selection thresholds satisfies
 71 |     # the correct bounds
 72 |     selection_thresholds = selection_thresholds.astype('int')
 73 |     if not (
 74 |         np.all(selection_thresholds <= n_boots) and
 75 |         np.all(selection_thresholds >= 1)
 76 |     ):
 77 |         raise ValueError("Stability selection thresholds must be within "
 78 |                          "the correct bounds.")
 79 | 
 80 |     return selection_thresholds
 81 | 
 82 | 
 83 | def intersection(coefs, selection_thresholds=None):
 84 |     """Performs the intersection operation on selection coefficients
 85 |     using stability selection criteria.
 86 | 
 87 |     The coefficients must be provided in the shape
 88 |         bootstraps x lambdas x features.
 89 |     The intersection operation finds, for each lambda, the features that
 90 |     exist in all bootstraps (hard intersection) or in some subset of them
 91 |     (the exact subset is provided by selection_thresholds).
 92 | 
 93 |     This parameter selection_thresholds provides the number of bootstraps
 94 |     that a feature must exist in to pass the intersection. Importantly,
 95 |     this function can take intersections with multiple selection_thresholds
 96 |     (thus, selection_thresholds is array-like).
 97 | 
 98 |     This function then outputs an array of supports, each as a binary mask.
 99 |     Only unique supports are provided, so duplicates are tossed out.
100 | 
101 |     Parameters
102 |     ----------
103 |     coefs : np.ndarray, shape (# bootstraps, # lambdas, # features)
104 |         The coefficients obtained from the selection sweep, corresponding to
105 |         each bootstrap and choice of L1 regularization strength.
106 | 
107 |     selection_thresholds: array-like, int
108 |         The selection thresholds to perform intersection across. By default,
109 |         use *coefs.shape[0]*.
110 | 
111 |     Returns
112 |     -------
113 |     supports : np.ndarray, shape (# supports, # features), bool
114 |         A list of supports (each as a binary mask with size n_features)
115 |         obtained by performing the intersection across the coefficients. Each
116 |         support is unique.
117 |     """
118 | 
119 |     if selection_thresholds is None:
120 |         selection_thresholds = np.array([coefs.shape[0]])
121 | 
122 |     n_selection_thresholds = len(selection_thresholds)
123 |     n_reg_params = coefs.shape[1]
124 |     n_features = coefs.shape[2]
125 |     supports = np.zeros(
126 |         (n_selection_thresholds, n_reg_params, n_features),
127 |         dtype=bool
128 |     )
129 | 
130 |     # iterate over each stability selection threshold
131 |     for thresh_idx, threshold in enumerate(selection_thresholds):
132 |         # calculate the support given the specific selection threshold
133 |         supports[thresh_idx, ...] = \
134 |             np.count_nonzero(coefs, axis=0) >= threshold
135 | 
136 |     # unravel the dimension corresponding to selection thresholds
137 | 
138 |     supports = np.squeeze(np.reshape(
139 |         supports,
140 |         (n_selection_thresholds * n_reg_params, n_features)
141 |     ))
142 | 
143 |     supports = np.unique(supports, axis=0)
144 | 
145 |     return supports
146 | 


--------------------------------------------------------------------------------
/src/pyuoi/mpi_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Helper functions for loading data and managing arrays across ranks with MPI.
  3 | """
  4 | import h5py
  5 | import numpy as np
  6 | 
  7 | try:
  8 |     from mpi4py import MPI
  9 |     _np2mpi = {np.dtype(np.float32): MPI.FLOAT,
 10 |                np.dtype(np.float64): MPI.DOUBLE,
 11 |                np.dtype(int): MPI.LONG,
 12 |                np.dtype(np.intc): MPI.INT}
 13 | except ImportError:
 14 |     pass
 15 | 
 16 | 
 17 | def check_valid_ndarray(X):
 18 |     """Checks whether X is a ndarray and returns a contiguous version.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     X : ndarray, `None`, or other
 23 |         Variable to check
 24 | 
 25 |     Returns
 26 |     -------
 27 |     X : ndarray or `None`
 28 |         If X is an ndarray, returns a contiguous potential copy. If X is `None`
 29 |         returns `None`. If X is anything else, raises a `ValueError`
 30 |     """
 31 |     if X is None:
 32 |         return X
 33 |     if not isinstance(X, np.ndarray):
 34 |         raise ValueError('Must be a numpy ndarray.')
 35 |     return np.ascontiguousarray(X)
 36 | 
 37 | 
 38 | def load_data_MPI(h5_name, X_key='X', y_key='y', comm=None, root=0):
 39 |     """Load data from an HDF5 file and broadcast it across MPI ranks.
 40 | 
 41 |     This is a helper function. It is also possible to load the data
 42 |     without this function.
 43 | 
 44 |     Parameters
 45 |     ----------
 46 |     h5_name : str
 47 |         Path to h5 file.
 48 |     X_key : str
 49 |         Key for the features dataset. (default: 'X')
 50 |     y_key : str
 51 |         Key for the targets dataset. (default: 'y')
 52 |     comm : MPI.COMM_WORLD
 53 |         MPI communicator.
 54 |     root : int
 55 |         This rank will load the data from file.
 56 | 
 57 |     Returns
 58 |     -------
 59 |     X : ndarray
 60 |         Features on all MPI ranks.
 61 |     y : ndarray
 62 |         Targets on all MPI ranks.
 63 |     """
 64 | 
 65 |     if comm is None:
 66 |         comm = MPI.COMM_WORLD
 67 |     rank = comm.rank
 68 |     Xshape = None
 69 |     Xdtype = None
 70 |     yshape = None
 71 |     ydtype = None
 72 |     if rank == root:
 73 |         with h5py.File(h5_name, 'r') as f:
 74 |             X = f[X_key][()]
 75 |             Xshape = X.shape
 76 |             Xdtype = X.dtype
 77 |             y = f[y_key][()]
 78 |             yshape = y.shape
 79 |             ydtype = y.dtype
 80 |     Xshape = comm.bcast(Xshape, root=root)
 81 |     Xdtype = comm.bcast(Xdtype, root=root)
 82 |     yshape = comm.bcast(yshape, root=root)
 83 |     ydtype = comm.bcast(ydtype, root=root)
 84 |     if rank != root:
 85 |         X = np.empty(Xshape, dtype=Xdtype)
 86 |         y = np.empty(yshape, dtype=ydtype)
 87 |     comm.Bcast([X, _np2mpi[np.dtype(X.dtype)]], root=root)
 88 |     comm.Bcast([y, _np2mpi[np.dtype(y.dtype)]], root=root)
 89 |     return X, y
 90 | 
 91 | 
 92 | def Bcast_from_root(send, comm=None, root=0):
 93 |     """Broadcast an array from root to all MPI ranks.
 94 | 
 95 |     Parameters
 96 |     ----------
 97 |     send : ndarray or None
 98 |         Array to send from root to all ranks. send in other ranks
 99 |         has no effect.
100 |     comm : MPI.COMM_WORLD
101 |         MPI communicator.
102 |     root : int
103 |         This rank contains the array to send.
104 | 
105 |     Returns
106 |     -------
107 |     send : ndarray
108 |         Each rank will have a copy of the array from root.
109 |     """
110 | 
111 |     send = check_valid_ndarray(send)
112 |     if comm is None:
113 |         comm = MPI.COMM_WORLD
114 |     rank = comm.rank
115 |     if rank == 0:
116 |         dtype = send.dtype
117 |         shape = send.shape
118 |     else:
119 |         dtype = None
120 |         shape = None
121 |     shape = comm.bcast(shape, root=root)
122 |     dtype = comm.bcast(dtype, root=root)
123 |     if rank != 0:
124 |         send = np.empty(shape, dtype=dtype)
125 |     comm.Bcast([send, _np2mpi[np.dtype(dtype)]], root=root)
126 |     return send
127 | 
128 | 
129 | def Gatherv_rows(send, comm=None, root=0):
130 |     """Concatenate arrays along the first axis using Gatherv on root.
131 | 
132 |     Parameters
133 |     ----------
134 |     send : ndarray
135 |         The arrays to concatenate. All dimensions must be equal except for the
136 |         first.
137 |     comm : MPI.COMM_WORLD
138 |         MPI communicator.
139 |     root : int
140 |         This rank will contain the Gatherv'ed array.
141 | 
142 |     Returns
143 |     -------
144 |     rec : ndarray or None
145 |         Gatherv'ed array on root or None on other ranks.
146 |     """
147 | 
148 |     send = check_valid_ndarray(send)
149 |     if comm is None:
150 |         comm = MPI.COMM_WORLD
151 |     rank = comm.rank
152 |     dtype = send.dtype
153 |     shape = send.shape
154 |     tot = np.zeros(1, dtype=int)
155 | 
156 |     # Gather the sizes of the first dimension on root
157 |     rank_sizes = comm.gather(shape[0], root=root)
158 |     comm.Reduce(np.array(shape[0], dtype=int),
159 |                 [tot, _np2mpi[tot.dtype]], op=MPI.SUM, root=root)
160 |     if rank == root:
161 |         rec_shape = (tot[0],) + shape[1:]
162 |         rec = np.empty(rec_shape, dtype=dtype)
163 |         sizes = [size * np.prod(rec_shape[1:]) for size in rank_sizes]
164 |         disps = np.insert(np.cumsum(sizes), 0, 0)[:-1]
165 |     else:
166 |         rec = None
167 |         sizes = None
168 |         disps = None
169 | 
170 |     comm.Gatherv(send, [rec, sizes, disps, _np2mpi[dtype]], root=0)
171 |     return rec
172 | 


--------------------------------------------------------------------------------
/src/pyuoi/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | import logging
  4 | 
  5 | 
  6 | def softmax(y, axis=-1):
  7 |     """Calculates the softmax distribution.
  8 | 
  9 |     Parameters
 10 |     ----------
 11 |     y : ndarray
 12 |         Log-probabilities.
 13 |     """
 14 | 
 15 |     yp = y - y.max(axis=axis, keepdims=True)
 16 |     epy = np.exp(yp)
 17 |     return epy / np.sum(epy, axis=axis, keepdims=True)
 18 | 
 19 | 
 20 | def sigmoid(x):
 21 |     """Calculates the bernoulli distribution.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     x : ndarray
 26 |         Log-probabilities.
 27 |     """
 28 |     return np.exp(-np.logaddexp(0, -x))
 29 | 
 30 | 
 31 | def log_likelihood_glm(model, y_true, y_pred):
 32 |     """Calculates the log-likelihood of a generalized linear model given the
 33 |     true response variables and the "predicted" response variables. The
 34 |     "predicted" response variable varies by the specific generalized linear
 35 |     model under consideration.
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     model : string
 40 |         The generalized linear model to calculate the log-likelihood for.
 41 |     y_true : nd-array, shape (n_samples,)
 42 |         Array of true response values.
 43 |     y_pred : nd-array, shape (n_samples,)
 44 |         Array of predicted response values (conditional mean).
 45 | 
 46 |     Returns
 47 |     -------
 48 |     ll : float
 49 |         The log-likelihood.
 50 |     """
 51 |     if model == 'normal':
 52 |         # this log-likelihood is calculated under the assumption that the
 53 |         # variance is the value that maximizes the log-likelihood
 54 |         rss = (y_true - y_pred)**2
 55 |         n_samples = y_true.size
 56 |         ll = -n_samples / 2 * (1 + np.log(np.mean(rss)))
 57 |     elif model == 'poisson':
 58 |         if not np.any(y_pred):
 59 |             if np.any(y_true):
 60 |                 ll = -np.inf
 61 |             else:
 62 |                 ll = 0.
 63 |         else:
 64 |             ll = np.mean(y_true * np.log(y_pred) - y_pred)
 65 |     else:
 66 |         raise ValueError('Model is not available.')
 67 |     return ll
 68 | 
 69 | 
 70 | def BIC(ll, n_features, n_samples):
 71 |     """Calculates the Bayesian Information Criterion.
 72 | 
 73 |     Parameters
 74 |     ----------
 75 |     ll : float
 76 |         The log-likelihood of the model.
 77 |     n_features : int
 78 |         The number of features used in the model.
 79 |     n_samples : int
 80 |         The number of samples in the dataset being tested.
 81 | 
 82 |     Returns
 83 |     -------
 84 |     BIC : float
 85 |         Bayesian Information Criterion
 86 |     """
 87 |     BIC = n_features * np.log(n_samples) - 2 * ll
 88 |     return BIC
 89 | 
 90 | 
 91 | def AIC(ll, n_features):
 92 |     """Calculates the Akaike Information Criterion.
 93 | 
 94 |     Parameters
 95 |     ----------
 96 |     ll : float
 97 |         The log-likelihood of the model.
 98 |     n_features : int
 99 |         The number of features used in the model.
100 |     n_samples : int
101 |         The number of samples in the dataset being tested.
102 | 
103 |     Returns
104 |     -------
105 |     AIC : float
106 |         Akaike Information Criterion
107 |     """
108 | 
109 |     AIC = 2 * n_features - 2 * ll
110 |     return AIC
111 | 
112 | 
113 | def AICc(ll, n_features, n_samples):
114 |     """Calculate the corrected Akaike Information Criterion. This criterion is
115 |     useful in cases when the number of samples is small.
116 | 
117 |     If the number of features is equal to the number of samples plus one, then
118 |     the AIC is returned (the AICc is undefined in this case).
119 | 
120 |     Parameters
121 |     ----------
122 |     ll : float
123 |         The log-likelihood of the model.
124 |     n_features : int
125 |         The number of features used in the model.
126 |     n_samples : int
127 |         The number of samples in the dataset being tested.
128 | 
129 |     Returns
130 |     -------
131 |     AIC : float
132 |         Akaike Information Criterion
133 |     """
134 |     AICc = AIC(ll, n_features)
135 |     if n_samples > (n_features + 1):
136 |         AICc += 2 * (n_features**2 + n_features) / (n_samples - n_features - 1)
137 |     return AICc
138 | 
139 | 
140 | def check_logger(logger, name='uoi', comm=None):
141 |     ret = logger
142 |     if ret is None:
143 |         if comm is not None and comm.Get_size() > 1:
144 |             r, s = comm.Get_rank(), comm.Get_size()
145 |             name += " " + str(r).rjust(int(np.log10(s)) + 1)
146 | 
147 |         ret = logging.getLogger(name=name)
148 |         handler = logging.StreamHandler(sys.stdout)
149 | 
150 |         fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
151 | 
152 |         handler.setFormatter(logging.Formatter(fmt))
153 |         ret.addHandler(handler)
154 |     return ret
155 | 


--------------------------------------------------------------------------------
/tests/test_cur.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from numpy.testing import assert_equal
  4 | from numpy.testing import assert_array_equal
  5 | from numpy.testing import assert_raises
  6 | from pyuoi.decomposition import CUR, UoI_CUR
  7 | from pyuoi.decomposition.utils import (column_select,
  8 |                                        stability_selection_to_threshold)
  9 | 
 10 | X = np.array([
 11 |     [0, 0, 0, 4, 2],
 12 |     [0, 4, 2, 1, 0],
 13 |     [3, 0, 0, 2, 1],
 14 |     [2, 2, 0, 1, 0],
 15 |     [1, 2, 4, 1, 3],
 16 |     [1, 4, 0, 0, 4],
 17 |     [3, 3, 4, 0, 0],
 18 |     [3, 2, 3, 0, 4],
 19 |     [0, 1, 2, 1, 4],
 20 |     [1, 4, 0, 2, 4]])
 21 | 
 22 | 
 23 | def test_column_select_all():
 24 |     """Test that column select function selects all columns when provided the
 25 |     entire SVD and a suitable value of c."""
 26 |     _, n_features = X.shape
 27 |     _, _, V = np.linalg.svd(X)
 28 |     column_indices = column_select(V.T, c=5)
 29 | 
 30 |     assert_array_equal(column_indices, np.arange(n_features))
 31 | 
 32 | 
 33 | def test_column_select():
 34 |     """Test that the column select function selects the column with the highest
 35 |     leverage score most often."""
 36 |     n_samples, n_features = X.shape
 37 |     rank = 3
 38 |     n_reps = 1000
 39 | 
 40 |     _, _, V = np.linalg.svd(X)
 41 |     V_subset = V[:rank].T
 42 |     column_flags = np.zeros((n_reps, n_features))
 43 | 
 44 |     for rep in range(n_reps):
 45 |         column_indices = column_select(V_subset, c=1)
 46 |         column_flags[rep, column_indices] = 1
 47 | 
 48 |     counts = np.sum(column_flags, axis=0)
 49 | 
 50 |     assert_equal(np.argmax(counts), np.argmax(np.sum(V_subset**2, axis=1)))
 51 | 
 52 | 
 53 | def test_stability_selection_to_threshold_int():
 54 |     """Tests whether stability_selection_to_threshold correctly outputs the
 55 |     correct threshold when provided a single integer."""
 56 | 
 57 |     n_boots_sel = 48
 58 |     # stability selection is a single integer
 59 |     test_int = 36
 60 |     selection_thresholds = stability_selection_to_threshold(test_int,
 61 |                                                             n_boots_sel)
 62 | 
 63 |     assert_array_equal(selection_thresholds, test_int)
 64 | 
 65 | 
 66 | def test_stability_selection_to_threshold_float():
 67 |     """Tests whether stability_selection_to_threshold correctly outputs the
 68 |     correct threshold when provided a single float."""
 69 | 
 70 |     n_boots_sel = 48
 71 |     # stability selection is a single float
 72 |     test_float = 0.5
 73 |     selection_thresholds = stability_selection_to_threshold(test_float,
 74 |                                                             n_boots_sel)
 75 | 
 76 |     assert_array_equal(selection_thresholds, np.array([24]))
 77 | 
 78 | 
 79 | def test_stability_selection_to_threshold_exceeds_n_bootstraps():
 80 |     """Tests whether stability_selection_to_threshold correctly outputs an
 81 |     error when provided an input that results in bootstraps exceeding
 82 |     n_boots_sel."""
 83 | 
 84 |     n_boots_sel = 48
 85 |     # stability selection is a list of floats
 86 |     test_float = 1.1
 87 |     test_int = 50
 88 | 
 89 |     assert_raises(
 90 |         ValueError,
 91 |         stability_selection_to_threshold,
 92 |         test_int,
 93 |         n_boots_sel)
 94 | 
 95 |     assert_raises(
 96 |         ValueError,
 97 |         stability_selection_to_threshold,
 98 |         test_float,
 99 |         n_boots_sel)
100 | 
101 | 
102 | def test_stability_selection_to_threshold_input_value_error():
103 |     """Tests whether stability_selection_to_threshold properly raises an error
104 |     when it receives objects without ints or floats."""
105 |     n_boots_sel = 48
106 |     stability_selection_list = [0, 1, 'a']
107 |     stability_selection_np_array = np.array([0, 1, 'a'])
108 |     stability_selection_dict = {0: 'a', 1: 'b'}
109 | 
110 |     assert_raises(
111 |         ValueError,
112 |         stability_selection_to_threshold,
113 |         stability_selection_list,
114 |         n_boots_sel)
115 | 
116 |     assert_raises(
117 |         ValueError,
118 |         stability_selection_to_threshold,
119 |         stability_selection_np_array,
120 |         n_boots_sel)
121 | 
122 |     assert_raises(
123 |         ValueError,
124 |         stability_selection_to_threshold,
125 |         stability_selection_dict,
126 |         n_boots_sel)
127 | 
128 | 
129 | def test_CUR():
130 |     """Tests that the CUR fitter extracts columns correctly."""
131 |     _, n_features = X.shape
132 |     max_k = 3
133 | 
134 |     cur = CUR(max_k=max_k)
135 | 
136 |     cur.fit(X, c=3)
137 |     column_indices = cur.column_indices_
138 |     columns = cur.components_
139 | 
140 |     assert np.setdiff1d(column_indices, np.arange(n_features)).size == 0
141 |     assert_array_equal(X[:, column_indices], columns)
142 | 
143 | 
144 | def test_CUR_fit():
145 |     """Tests that the CUR fitter extracts the correct columns."""
146 |     n_features = 5
147 |     n_samples = 30
148 |     max_k = 3
149 | 
150 |     # matrix has only one non-zero entry
151 |     X = np.zeros((n_samples, n_features))
152 |     X[0, 0] = 1
153 |     true_columns = np.array([0, 2, 3])
154 | 
155 |     # fit CUR decomposition
156 |     cur = CUR(max_k=max_k)
157 |     X_new = cur.fit_transform(X)
158 | 
159 |     assert_array_equal(cur.column_indices_, true_columns)
160 |     assert_array_equal(X_new, X[:, true_columns])
161 | 
162 | 
163 | def test_UoI_CUR_check_ks_and_cs():
164 |     """Tests the check_ks_and_cs function in UoI_CUR."""
165 |     n_boots = 5
166 |     max_k = 10
167 |     boots_frac = 0.9
168 | 
169 |     uoi_cur = UoI_CUR(n_boots=n_boots,
170 |                       max_k=max_k,
171 |                       boots_frac=boots_frac)
172 | 
173 |     # check ks
174 |     ks, cs = uoi_cur.check_ks_and_cs(ks=1)
175 |     assert_array_equal(ks, np.array([1]))
176 |     assert_array_equal(cs, ks + 20)
177 | 
178 |     ks, cs = uoi_cur.check_ks_and_cs(ks=[1, 2, 3])
179 |     assert_array_equal(ks, np.array([1, 2, 3]))
180 |     assert_array_equal(cs, ks + 20)
181 | 
182 |     ks, cs = uoi_cur.check_ks_and_cs(ks=None)
183 |     assert_array_equal(ks, 1 + np.arange(max_k))
184 |     assert_array_equal(cs, ks + 20)
185 | 
186 |     # check cs
187 |     ks, cs = uoi_cur.check_ks_and_cs(ks=[1, 2], cs=[3, 4])
188 |     assert_array_equal(cs, np.array([3, 4]))
189 | 
190 |     ks, cs = uoi_cur.check_ks_and_cs(ks=[1, 2, 3], cs=1)
191 |     assert_array_equal(cs, np.array([1, 1, 1]))
192 | 
193 |     ks, cs = uoi_cur.check_ks_and_cs(ks=[1, 2], cs=2.4)
194 |     assert_array_equal(cs, np.array([2.4, 2.4]))
195 | 
196 |     # value errors for ks
197 |     assert_raises(ValueError, uoi_cur.check_ks_and_cs, -1)
198 |     assert_raises(ValueError, uoi_cur.check_ks_and_cs, [11])
199 |     assert_raises(ValueError, uoi_cur.check_ks_and_cs, [0.1, -1, 2, 12])
200 |     assert_raises(ValueError, uoi_cur.check_ks_and_cs, 2.0)
201 |     assert_raises(ValueError, uoi_cur.check_ks_and_cs, uoi_cur)
202 | 
203 |     # value errors for cs
204 |     assert_raises(ValueError, uoi_cur.check_ks_and_cs, None, -1)
205 |     assert_raises(ValueError, uoi_cur.check_ks_and_cs, None, [-11])
206 |     assert_raises(ValueError, uoi_cur.check_ks_and_cs, None, np.array([-12]))
207 |     assert_raises(ValueError, uoi_cur.check_ks_and_cs, 1, [2, 3])
208 | 
209 | 
210 | def test_UoI_CUR_basic():
211 |     """Test UoI CUR with no bootstrapping."""
212 |     n_samples, n_features = X.shape
213 |     max_k = 3
214 |     n_boots = 1
215 |     boots_frac = 1
216 | 
217 |     _, _, V = np.linalg.svd(X)
218 |     V_subset = V[:max_k].T
219 | 
220 |     uoi_cur = UoI_CUR(n_boots=n_boots,
221 |                       max_k=max_k,
222 |                       boots_frac=boots_frac)
223 |     uoi_cur.fit(X, cs=3)
224 | 
225 |     max_col = np.argmax(np.sum(V_subset**2, axis=1))
226 | 
227 |     assert (max_col in uoi_cur.column_indices_)
228 | 
229 | 
230 | def test_UoI_CUR_fit():
231 |     """Tests that the CUR fitter extracts the correct columns."""
232 |     n_features = 5
233 |     n_samples = 30
234 |     max_k = 3
235 |     n_boots = 10
236 |     boots_frac = 0.95
237 | 
238 |     # matrix has only one non-zero entry
239 |     X = np.zeros((n_samples, n_features))
240 |     X[0, 0] = 1
241 |     true_columns = np.array([0, 2, 3])
242 | 
243 |     # fit CUR decomposition
244 |     uoi_cur = UoI_CUR(n_boots=n_boots,
245 |                       max_k=max_k,
246 |                       boots_frac=boots_frac,
247 |                       random_state=2332)
248 |     X_new = uoi_cur.fit_transform(X)
249 | 
250 |     assert_array_equal(uoi_cur.column_indices_, true_columns)
251 |     assert_array_equal(uoi_cur.components_, X[:, true_columns])
252 |     assert_array_equal(X_new, X[:, true_columns])
253 | 
254 | 
255 | def test_UoI_CUR_vs_CUR():
256 |     """Tests that the CUR fitter extracts columns correctly."""
257 |     _, n_features = X.shape
258 |     max_k = 3
259 |     n_boots = 10
260 |     boots_frac = 0.90
261 | 
262 |     cur = CUR(max_k=max_k,
263 |               random_state=2332)
264 |     cur.fit(X, c=3)
265 | 
266 |     uoi_cur = UoI_CUR(n_boots=n_boots,
267 |                       max_k=max_k,
268 |                       boots_frac=boots_frac,
269 |                       random_state=2332)
270 |     uoi_cur.fit(X, cs=3, ks=3)
271 | 
272 |     assert uoi_cur.column_indices_.size <= cur.column_indices_.size
273 | 


--------------------------------------------------------------------------------
/tests/test_elasticnet.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy.testing import (assert_array_equal, assert_array_almost_equal_nulp,
  3 |                            assert_equal, assert_allclose)
  4 | 
  5 | from sklearn.datasets import make_regression
  6 | from sklearn.linear_model import ElasticNet
  7 | from sklearn.metrics import r2_score
  8 | 
  9 | from pyuoi import UoI_ElasticNet
 10 | from pyuoi.datasets import make_linear_regression
 11 | 
 12 | 
 13 | def test_variable_selection():
 14 |     """Test basic functionality of UoI_ElasticNet and that it
 15 |     finds right model"""
 16 | 
 17 |     X, y, w = make_regression(coef=True, random_state=1)
 18 |     enet = UoI_ElasticNet(alphas=[1., .9])
 19 |     enet.fit(X, y)
 20 |     true_coef = np.nonzero(w)[0]
 21 |     fit_coef = np.nonzero(enet.coef_)[0]
 22 |     assert_array_equal(true_coef, fit_coef)
 23 |     assert_array_almost_equal_nulp(true_coef, fit_coef)
 24 | 
 25 | 
 26 | def test_estimation_score_usage():
 27 |     """Test the ability to change the estimation score in UoI ElasticNet."""
 28 | 
 29 |     methods = ('r2', 'AIC', 'AICc', 'BIC')
 30 |     X, y = make_regression(n_features=10, n_informative=3,
 31 |                            random_state=10)
 32 |     scores = []
 33 |     for method in methods:
 34 |         enet = UoI_ElasticNet(estimation_score=method)
 35 |         assert_equal(enet.estimation_score, method)
 36 |         enet.fit(X, y)
 37 |         y_hat = enet.predict(X)
 38 |         assert_equal(r2_score(y, y_hat), enet.score(X, y))
 39 |         score = np.max(enet.scores_)
 40 |         scores.append(score)
 41 |     assert_equal(len(np.unique(scores)), len(methods))
 42 | 
 43 | 
 44 | def test_set_random_state():
 45 |     """Tests whether random states are handled correctly."""
 46 |     X, y = make_regression(n_features=5, n_informative=3,
 47 |                            random_state=16, noise=.5)
 48 |     # same state
 49 |     l1log_0 = UoI_ElasticNet(random_state=13)
 50 |     l1log_1 = UoI_ElasticNet(random_state=13)
 51 |     l1log_0.fit(X, y)
 52 |     l1log_1.fit(X, y)
 53 |     assert_array_equal(l1log_0.coef_, l1log_1.coef_)
 54 | 
 55 |     # different state
 56 |     l1log_1 = UoI_ElasticNet(random_state=14)
 57 |     l1log_1.fit(X, y)
 58 |     assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)
 59 | 
 60 |     # different state, not set
 61 |     l1log_0 = UoI_ElasticNet()
 62 |     l1log_1 = UoI_ElasticNet()
 63 |     l1log_0.fit(X, y)
 64 |     l1log_1.fit(X, y)
 65 |     assert not np.array_equal(l1log_0.coef_, l1log_1.coef_)
 66 | 
 67 | 
 68 | def test_uoi_enet_toy():
 69 |     """Test UoI ElasticNet on a toy example."""
 70 | 
 71 |     X = np.array([
 72 |         [-1, 2],
 73 |         [4, 1],
 74 |         [1, 3],
 75 |         [4, 3],
 76 |         [8, 11]], dtype=float)
 77 |     beta = np.array([1, 4], dtype=float)
 78 |     y = np.dot(X, beta)
 79 |     X = np.tile(X, (3, 1))
 80 |     y = np.tile(y, 3)
 81 | 
 82 |     # choose selection_frac to be slightly smaller to ensure that we get
 83 |     # good test sets
 84 |     enet = UoI_ElasticNet(
 85 |         fit_intercept=False,
 86 |         selection_frac=0.75,
 87 |         estimation_frac=0.75,
 88 |     )
 89 |     enet.fit(X, y)
 90 | 
 91 |     assert_allclose(enet.coef_, beta)
 92 | 
 93 | 
 94 | def test_get_reg_params():
 95 |     """Tests whether get_reg_params works correctly for UoI ElasticNet."""
 96 | 
 97 |     X = np.array([
 98 |         [-1, 2],
 99 |         [0, 1],
100 |         [1, 3],
101 |         [4, 3]], dtype=float)
102 |     y = np.array([7, 4, 13, 16], dtype=float)
103 | 
104 |     # calculate regularization parameters manually
105 |     l1_ratio = .5
106 |     alpha_max = np.max(np.dot(X.T, y) / 4) / l1_ratio
107 |     alphas = [{'alpha': alpha_max, 'l1_ratio': .5},
108 |               {'alpha': alpha_max / 10., 'l1_ratio': .5}]
109 | 
110 |     # calculate regularization parameters with UoI_ElasticNet object
111 |     enet = UoI_ElasticNet(
112 |         n_lambdas=2,
113 |         fit_intercept=False,
114 |         eps=0.1)
115 |     reg_params = enet.get_reg_params(X, y)
116 | 
117 |     # check each regularization parameter and key
118 |     for estimate, true in zip(reg_params, alphas):
119 |         assert len(estimate) == len(true)
120 |         for key, value in estimate.items():
121 |             assert_allclose(true[key], value)
122 | 
123 | 
124 | def test_intercept_and_coefs_no_selection():
125 |     """Test that UoI Lasso properly calculates the intercept with and without
126 |     standardization."""
127 |     # create line model
128 |     X, y, beta, intercept = make_linear_regression(
129 |         n_samples=500,
130 |         n_features=2,
131 |         n_informative=2,
132 |         snr=10.,
133 |         include_intercept=True,
134 |         random_state=2332)
135 | 
136 |     # without standardization
137 |     enet = UoI_ElasticNet(
138 |         standardize=False,
139 |         fit_intercept=True
140 |     )
141 |     enet.fit(X, y)
142 |     assert_allclose(enet.intercept_, intercept, rtol=0.25)
143 |     assert_allclose(enet.coef_, beta, rtol=0.25)
144 | 
145 |     # with standardization
146 |     enet = UoI_ElasticNet(
147 |         standardize=True,
148 |         fit_intercept=True
149 |     )
150 |     enet.fit(X, y)
151 |     assert_allclose(enet.intercept_, intercept, rtol=0.25)
152 |     assert_allclose(enet.coef_, beta, rtol=0.25)
153 | 
154 | 
155 | def test_enet_selection_sweep():
156 |     """Tests uoi_selection_sweep for UoI_ElasticNet."""
157 | 
158 |     # toy data
159 |     X = np.array([
160 |         [-1, 2, 3],
161 |         [4, 1, -7],
162 |         [1, 3, 1],
163 |         [4, 3, 12],
164 |         [8, 11, 2]], dtype=float)
165 |     beta = np.array([1, 4, 2], dtype=float)
166 |     y = np.dot(X, beta)
167 | 
168 |     # toy regularization
169 |     reg_param_values = [{'alpha': 1.0}, {'alpha': 2.0}]
170 |     enet = UoI_ElasticNet(fit_intercept=True, warm_start=False)
171 |     enet1 = ElasticNet(alpha=1.0, fit_intercept=True, max_iter=enet.max_iter)
172 |     enet2 = ElasticNet(alpha=2.0, fit_intercept=True, max_iter=enet.max_iter)
173 |     enet.output_dim = 1
174 | 
175 |     coefs = enet.uoi_selection_sweep(X, y, reg_param_values)
176 |     enet1.fit(X, y)
177 |     enet2.fit(X, y)
178 | 
179 |     assert np.allclose(coefs[0], enet1.coef_)
180 |     assert np.allclose(coefs[1], enet2.coef_)
181 | 
182 | 
183 | def test_fit_intercept():
184 |     """Tests whether `include_intercept` in passed through to the linear models.
185 |     """
186 |     enet = UoI_ElasticNet(fit_intercept=True)
187 |     assert enet._selection_lm.fit_intercept
188 |     assert enet._estimation_lm.fit_intercept
189 | 
190 |     enet = UoI_ElasticNet(fit_intercept=False)
191 |     assert not enet._selection_lm.fit_intercept
192 |     assert not enet._estimation_lm.fit_intercept
193 | 


--------------------------------------------------------------------------------
/tests/test_lbfgs.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from numpy.testing import assert_array_equal, assert_array_almost_equal
  3 | 
  4 | from pyuoi.lbfgs import LBFGS, fmin_lbfgs
  5 | import numpy as np
  6 | 
  7 | 
  8 | def test_fmin_lbfgs():
  9 |     def f(x, g, *args):
 10 |         g[0] = 2 * x
 11 |         return x ** 2
 12 | 
 13 |     xmin = fmin_lbfgs(f, 100., line_search='armijo')
 14 |     assert_array_equal(xmin, [0])
 15 | 
 16 |     xmin = fmin_lbfgs(f, 100., line_search='strongwolfe')
 17 |     assert_array_equal(xmin, [0])
 18 | 
 19 | 
 20 | class TestOWLQN:
 21 | 
 22 |     def test_owl_qn_end(self):
 23 |         def f(x, g, *args):
 24 |             g[:] = 2. * (x - 1.)
 25 |             return np.sum((x - 1.) ** 2)
 26 | 
 27 |         xmin = fmin_lbfgs(f, np.zeros(10), orthantwise_c=1.,
 28 |                           orthantwise_end=5)
 29 |         assert_array_equal(xmin[5:], 1.)
 30 |         assert np.all(xmin[:5] < 1.)
 31 | 
 32 |     def test_owl_qn(self):
 33 |         def f(x, g, *args):
 34 |             g[0] = 2 * x
 35 |             return x ** 2
 36 | 
 37 |         xmin = fmin_lbfgs(f, 100., orthantwise_c=1, line_search='wolfe')
 38 |         assert_array_equal(xmin, [0])
 39 | 
 40 |     def test_owl_line_search_warning_explicit(self):
 41 |         def f(x, g, *args):
 42 |             g[0] = 2 * x
 43 |             return x ** 2
 44 | 
 45 |         with pytest.warns(UserWarning, match="OWL-QN"):
 46 |             fmin_lbfgs(f, 100., orthantwise_c=1, line_search='morethuente')
 47 |         with pytest.warns(UserWarning, match="OWL-QN"):
 48 |             fmin_lbfgs(f, 100., orthantwise_c=1, line_search='armijo')
 49 |         with pytest.warns(UserWarning, match="OWL-QN"):
 50 |             fmin_lbfgs(f, 100., orthantwise_c=1, line_search='strongwolfe')
 51 | 
 52 |     @pytest.mark.xfail(strict=True)
 53 |     def test_owl_wolfe_no_warning(self):
 54 |         """ This test is an attempt to show that wolfe throws no warnings.
 55 |         """
 56 | 
 57 |         def f(x, g, *args):
 58 |             g[0] = 2 * x
 59 |             return x ** 2
 60 | 
 61 |         with pytest.warns(UserWarning, match="OWL-QN"):
 62 |             fmin_lbfgs(f, 100., orthantwise_c=1, line_search='wolfe')
 63 | 
 64 | 
 65 | def test_2d():
 66 |     def f(x, g, f_calls):
 67 |         assert x.shape == (2, 2)
 68 |         assert g.shape == x.shape
 69 |         g[:] = 2 * x
 70 |         f_calls[0] += 1
 71 |         return (x ** 2).sum()
 72 | 
 73 |     def progress(x, g, fx, xnorm, gnorm, step, k, ls, *args):
 74 |         assert x.shape == (2, 2)
 75 |         assert g.shape == x.shape
 76 | 
 77 |         assert np.sqrt((x ** 2).sum()) == xnorm
 78 |         assert np.sqrt((g ** 2).sum()) == gnorm
 79 | 
 80 |         p_calls[0] += 1
 81 |         return 0
 82 | 
 83 |     f_calls = [0]
 84 |     p_calls = [0]
 85 | 
 86 |     xmin = fmin_lbfgs(f, [[10., 100.], [44., 55.]], progress, args=[f_calls])
 87 |     assert f_calls[0] > 0
 88 |     assert p_calls[0] > 0
 89 |     assert_array_almost_equal(xmin, [[0, 0], [0, 0]])
 90 | 
 91 | 
 92 | def test_class_interface():
 93 |     def f(x, g, *args):
 94 |         g[:] = 4 * x
 95 |         return x ** 4 + 1
 96 | 
 97 |     opt = LBFGS()
 98 |     opt.max_iterations = 3
 99 | 
100 |     assert_array_equal(opt.minimize(f, 1e6), [0])
101 | 
102 |     opt.max_iterations = 1
103 |     with pytest.warns(UserWarning):
104 |         opt.minimize(f, 1e7)
105 | 
106 | 
107 | def test_input_validation():
108 |     with pytest.raises(TypeError):
109 |         fmin_lbfgs([], 1e4)
110 |     with pytest.raises(TypeError):
111 |         fmin_lbfgs(lambda x: x, 1e4, "ham")
112 |     with pytest.raises(TypeError):
113 |         fmin_lbfgs(lambda x: x, "spam")
114 | 


--------------------------------------------------------------------------------
/tests/test_mpi/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BouchardLab/pyuoi/25e47655a07895f206c2e3ee3b259421c144a05d/tests/test_mpi/__init__.py


--------------------------------------------------------------------------------
/tests/test_mpi/test_mpi_uoi_linear_model.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | from numpy.testing import assert_array_equal, assert_array_almost_equal_nulp
 4 | from sklearn.datasets import make_regression
 5 | try:
 6 |     from mpi4py import MPI
 7 | except ImportError:
 8 |     MPI = None
 9 | 
10 | from pyuoi.datasets import make_classification, make_poisson_regression
11 | from pyuoi.linear_model import (UoI_Lasso,
12 |                                 UoI_L1Logistic,
13 |                                 UoI_ElasticNet,
14 |                                 UoI_Poisson)
15 | 
16 | 
17 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.')
18 | def test_variable_selection_lasso():
19 |     """Test basic functionality of UoI_Lasso and that it finds right model"""
20 |     X, y, w = make_regression(coef=True, random_state=1)
21 |     lasso = UoI_Lasso(comm=MPI.COMM_WORLD)
22 |     lasso.fit(X, y)
23 |     true_coef = np.nonzero(w)[0]
24 |     fit_coef = np.nonzero(lasso.coef_)[0]
25 |     assert_array_equal(true_coef, fit_coef)
26 |     assert_array_almost_equal_nulp(true_coef, fit_coef)
27 | 
28 | 
29 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.')
30 | def test_variable_selection_enet():
31 |     """Test basic functionality of UoI_Lasso and that it finds right model"""
32 |     X, y, w = make_regression(coef=True, random_state=1)
33 |     enet = UoI_ElasticNet(comm=MPI.COMM_WORLD)
34 |     enet.fit(X, y)
35 |     true_coef = np.nonzero(w)[0]
36 |     fit_coef = np.nonzero(enet.coef_)[0]
37 |     assert_array_equal(true_coef, fit_coef)
38 |     assert_array_almost_equal_nulp(true_coef, fit_coef)
39 | 
40 | 
41 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.')
42 | def test_l1logistic_binary():
43 |     """Test that binary L1 Logistic runs in the UoI framework."""
44 |     n_inf = 10
45 |     X, y, w, b = make_classification(n_samples=200,
46 |                                      random_state=6,
47 |                                      n_informative=n_inf,
48 |                                      n_features=20,
49 |                                      w_scale=4.,
50 |                                      include_intercept=True)
51 | 
52 |     l1log = UoI_L1Logistic(random_state=10, comm=MPI.COMM_WORLD).fit(X, y)
53 |     assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .7
54 | 
55 | 
56 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.')
57 | def test_l1logistic_multiclass():
58 |     """Test that multiclass L1 Logistic runs in the UoI framework when all
59 |        classes share a support."""
60 |     n_features = 20
61 |     n_inf = 10
62 |     X, y, w, b = make_classification(n_samples=200,
63 |                                      random_state=10,
64 |                                      n_classes=5,
65 |                                      n_informative=n_inf,
66 |                                      n_features=n_features,
67 |                                      shared_support=True,
68 |                                      w_scale=4.)
69 |     l1log = UoI_L1Logistic(comm=MPI.COMM_WORLD).fit(X, y)
70 |     assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .8
71 | 
72 | 
73 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.')
74 | def test_poisson():
75 |     """Test basic functionality of UoI_Lasso and that it finds right model"""
76 |     n_features = 20
77 |     n_inf = 10
78 |     X, y, w, b = make_poisson_regression(n_samples=200,
79 |                                          n_features=n_features,
80 |                                          n_informative=n_inf,
81 |                                          random_state=10)
82 |     poisson = UoI_Poisson(comm=MPI.COMM_WORLD)
83 |     poisson.fit(X, y)
84 |     assert (np.sign(abs(w)) == np.sign(abs(poisson.coef_))).mean() >= .6
85 | 


--------------------------------------------------------------------------------
/tests/test_mpi/test_mpi_utils.py:
--------------------------------------------------------------------------------
  1 | import h5py, pytest
  2 | import numpy as np
  3 | 
  4 | from numpy.testing import assert_array_equal
  5 | try:
  6 |     from mpi4py import MPI
  7 | except ImportError:
  8 |     MPI = None
  9 | 
 10 | from pyuoi.mpi_utils import (Bcast_from_root, Gatherv_rows,
 11 |                              load_data_MPI)
 12 | 
 13 | 
 14 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.')
 15 | def test_load_data_MPI(tmpdir):
 16 |     """Tests loading data from an HDF5 file into all ranks.
 17 |     """
 18 |     comm = MPI.COMM_WORLD
 19 |     rank = comm.rank
 20 |     root = 0
 21 |     X = np.random.randn(5, 10)
 22 |     y = np.random.randint(5, size=5)
 23 | 
 24 |     fname = tmpdir.join('temp.h5')
 25 |     if rank == root:
 26 |         with h5py.File(str(fname), 'w') as f:
 27 |             f.create_dataset('X', data=X)
 28 |             f.create_dataset('Xp', data=X)
 29 |             f.create_dataset('y', data=y)
 30 |             f.create_dataset('yp', data=y)
 31 | 
 32 |     # Default keys
 33 |     X_load, y_load = load_data_MPI(fname)
 34 |     if rank == root:
 35 |         assert_array_equal(X, X_load)
 36 |         assert_array_equal(y, y_load)
 37 | 
 38 |     # Set keys
 39 |     X_load, y_load = load_data_MPI(fname,
 40 |                                    X_key='Xp',
 41 |                                    y_key='yp')
 42 |     if rank == root:
 43 |         assert_array_equal(X, X_load)
 44 |         assert_array_equal(y, y_load)
 45 | 
 46 | 
 47 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.')
 48 | def test_Bcast_from_root():
 49 |     """Test the Bcast_from_root function for broadcasting
 50 |     an array from root to all ranks.
 51 |     """
 52 |     comm = MPI.COMM_WORLD
 53 |     root = 0
 54 | 
 55 |     dims = [2, 3, 5]
 56 | 
 57 |     for dtype in [int, float]:
 58 |         for ndim in range(1, 4):
 59 |             my_dim = dims[:ndim]
 60 |             X = None
 61 |             if comm.rank == root:
 62 |                 X = np.arange(np.prod(my_dim), dtype=dtype)
 63 |                 X = X.reshape(my_dim)
 64 |             X = Bcast_from_root(X, comm, root)
 65 |             Xp = np.arange(np.prod(my_dim), dtype=dtype)
 66 |             Xp = Xp.reshape(my_dim)
 67 |             assert_array_equal(X, Xp)
 68 |             assert X.dtype == dtype
 69 |             assert X.ndim == len(my_dim)
 70 | 
 71 | 
 72 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.')
 73 | def test_Gatherv_rows():
 74 |     """Test the Gatherv_rows function for Gathering and
 75 |     concatenating ndarrys along their first axes to root.
 76 |     """
 77 |     comm = MPI.COMM_WORLD
 78 |     root = 0
 79 |     rank = comm.rank
 80 |     size = comm.size
 81 | 
 82 |     for dtype in [int, float]:
 83 |         # Multiple rows per rank
 84 |         X = np.arange(151 * 3, dtype=dtype).reshape(151, 3)
 85 |         my_rows = np.array_split(X, size)[rank]
 86 |         Xp = Gatherv_rows(my_rows, comm, root)
 87 |         if rank == root:
 88 |             assert_array_equal(X, Xp)
 89 |             assert Xp.dtype == dtype
 90 | 
 91 |         # Fewer rows than ranks
 92 |         X = np.arange(2 * 3, dtype=dtype).reshape(2, 3)
 93 |         my_rows = np.array_split(X, size)[rank]
 94 |         Xp = Gatherv_rows(my_rows, comm, root)
 95 |         if rank == root:
 96 |             assert_array_equal(X, Xp)
 97 |             assert Xp.dtype == dtype
 98 | 
 99 |         # Multiple rows per rank, 3d
100 |         X = np.arange(151 * 2 * 3, dtype=dtype).reshape(151, 2, 3)
101 |         my_rows = np.array_split(X, size)[rank]
102 |         Xp = Gatherv_rows(my_rows, comm, root)
103 |         if rank == root:
104 |             assert_array_equal(X, Xp)
105 |             assert Xp.dtype == dtype
106 | 
107 |         # Fewer rows than ranks, 3d
108 |         X = np.arange(2 * 3 * 5, dtype=dtype).reshape(2, 3, 5)
109 |         my_rows = np.array_split(X, size)[rank]
110 |         Xp = Gatherv_rows(my_rows, comm, root)
111 |         if rank == root:
112 |             assert_array_equal(X, Xp)
113 |             assert Xp.dtype == dtype
114 | 
115 | 
116 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.')
117 | def test_Gatherv_random_rows():
118 |     """Test Gatherv_rows for gathering ndarrays with random
119 |     shapes along their first axis
120 |     """
121 | 
122 |     comm = MPI.COMM_WORLD
123 |     root = 0
124 |     rank = comm.rank
125 | 
126 |     data = np.random.normal(size=(np.random.randint(1, 10), 1000))
127 |     sizes = comm.gather(data.shape[0], root=root)
128 |     data = Gatherv_rows(data, comm, root)
129 | 
130 |     if rank == root:
131 |         assert data.shape[0] == np.sum(sizes)
132 | 


--------------------------------------------------------------------------------
/tests/test_nmf.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import numpy as np
  3 | 
  4 | from numpy.testing import assert_array_equal, assert_raises
  5 | from pyuoi.decomposition import UoI_NMF, UoI_NMF_Base
  6 | from pyuoi.decomposition.utils import dissimilarity
  7 | from sklearn.cluster import DBSCAN
  8 | from sklearn.decomposition import NMF
  9 | 
 10 | 
 11 | @pytest.fixture
 12 | def nmf_setup():
 13 |     W = np.random.randint(0, high=2, size=(500, 5))
 14 |     H = np.random.randint(0, high=2, size=(5, 2))
 15 |     X = np.dot(W, H)
 16 |     noise = np.random.normal(loc=0, scale=0.5, size=X.shape)**2
 17 |     X = X + noise
 18 |     return X
 19 | 
 20 | 
 21 | def test_dissimilarity():
 22 |     """Test the dissimilarity function."""
 23 |     k = 5
 24 |     n_features = 20
 25 | 
 26 |     # same bases should be a dissimilarity of zero
 27 |     H1 = np.random.randint(low=0, high=3, size=(k, n_features))
 28 |     H2 = np.copy(H1)
 29 |     assert np.allclose(dissimilarity(H1, H2), 0.)
 30 | 
 31 | 
 32 | @pytest.mark.fast
 33 | def test_UoI_NMF_Base_initialization():
 34 |     """Tests the initialization of UoI NMF Base Class."""
 35 |     n_boots = 30
 36 |     ranks = 10
 37 |     uoi = UoI_NMF(n_boots=n_boots, ranks=ranks,
 38 |                   random_state=np.random.RandomState(2332))
 39 |     assert_array_equal(uoi.ranks, np.arange(2, ranks + 1))
 40 |     assert uoi.nmf.solver == 'mu'
 41 |     assert uoi.nmf.beta_loss == 'kullback-leibler'
 42 |     assert uoi.cluster.min_samples == max(n_boots // 2, 1)
 43 | 
 44 | 
 45 | @pytest.mark.fast
 46 | def test_UoI_NMF_initialization():
 47 |     """Tests the initialization of UoI NMF."""
 48 |     n_boots = 30
 49 |     ranks = 10
 50 |     uoi = UoI_NMF(n_boots=n_boots, ranks=ranks)
 51 |     assert_array_equal(uoi.ranks, np.arange(2, ranks + 1))
 52 |     assert uoi.nmf.solver == 'mu'
 53 |     assert uoi.nmf.beta_loss == 'kullback-leibler'
 54 |     assert uoi.cluster.min_samples == max(n_boots // 2, 1)
 55 |     assert uoi.cons_meth == np.mean
 56 | 
 57 | 
 58 | @pytest.mark.fast
 59 | def test_UoI_NMF_initialization_value_error():
 60 |     """Tests that ValueErrors are correctly raised in the NMF initialization."""
 61 |     assert_raises(ValueError, UoI_NMF_Base, **{'ranks': 2.5})
 62 |     assert_raises(ValueError, UoI_NMF_Base, **{'nmf': NMF})
 63 |     assert_raises(ValueError, UoI_NMF_Base, **{'cluster': DBSCAN})
 64 |     assert_raises(ValueError, UoI_NMF_Base, **{'nnreg': 2})
 65 | 
 66 | 
 67 | @pytest.mark.fast
 68 | def test_UoI_NMF_fit(nmf_setup):
 69 |     """Tests that the fitting procedure of UoI NMF runs without error."""
 70 |     X = nmf_setup
 71 | 
 72 |     n_boots = 1
 73 |     ranks = 5
 74 |     uoi = UoI_NMF(n_boots=n_boots,
 75 |                   ranks=[ranks],
 76 |                   nmf_max_iter=1000,
 77 |                   random_state=2332,
 78 |                   use_dissimilarity=False)
 79 |     uoi.fit(X)
 80 |     assert hasattr(uoi, 'components_')
 81 | 
 82 | 
 83 | @pytest.mark.fast
 84 | def test_UoI_NMF_fit_no_dissimilarity(nmf_setup):
 85 |     """Tests that the fitting procedure of UoI NMF runs without error, when
 86 |     the algorithm does not use dissimilarity to choose a rank."""
 87 |     X = nmf_setup
 88 | 
 89 |     n_boots = 1
 90 |     ranks = 5
 91 |     uoi = UoI_NMF(n_boots=n_boots,
 92 |                   ranks=[ranks],
 93 |                   nmf_max_iter=1000,
 94 |                   random_state=2332,
 95 |                   use_dissimilarity=False)
 96 |     uoi.fit(X)
 97 |     assert hasattr(uoi, 'components_')
 98 | 
 99 | 
100 | @pytest.mark.fast
101 | def test_UoI_NMF_transform(nmf_setup):
102 |     """Tests that the transform procedure of UoI NMF runs without error."""
103 |     X = nmf_setup
104 | 
105 |     n_boots = 1
106 |     ranks = 5
107 |     uoi = UoI_NMF(n_boots=n_boots,
108 |                   ranks=[ranks],
109 |                   nmf_max_iter=1000,
110 |                   random_state=2332,
111 |                   use_dissimilarity=False)
112 |     X_tfm = uoi.fit_transform(X)
113 |     assert hasattr(uoi, 'components_')
114 |     assert X_tfm is not None
115 | 
116 | 
117 | @pytest.mark.fast
118 | def test_UoI_NMF_transform_value_error(nmf_setup):
119 |     """Tests that the transform procedure of UoI NMF correctly raises a
120 |     ValueError."""
121 |     X = nmf_setup
122 |     n_boots = 1
123 |     ranks = 5
124 |     uoi = UoI_NMF(n_boots=n_boots,
125 |                   ranks=[ranks],
126 |                   nmf_max_iter=1000,
127 |                   random_state=2332,
128 |                   use_dissimilarity=False)
129 |     uoi.fit(X)
130 | 
131 |     # transform
132 |     Y = np.random.normal(size=(X.shape[0], 2 * X.shape[1]))**2
133 |     assert_raises(ValueError, uoi.transform, Y)
134 |     # inverse transform
135 |     W = np.random.normal(size=(X.shape[0], 2 * uoi.components_.shape[0]))**2
136 |     assert_raises(ValueError, uoi.inverse_transform, W)
137 | 
138 | 
139 | @pytest.mark.fast
140 | def test_UoI_NMF_reconstruction_error(nmf_setup):
141 |     """Tests that a reconstruction error is calculated when data is
142 |     transformed."""
143 |     X = nmf_setup
144 |     n_boots = 1
145 |     ranks = 5
146 |     uoi = UoI_NMF(n_boots=n_boots,
147 |                   ranks=[ranks],
148 |                   nmf_max_iter=1000,
149 |                   random_state=2332,
150 |                   use_dissimilarity=False)
151 |     uoi.fit(X)
152 |     X_tfm = uoi.transform(X, reconstruction_err=True)
153 |     assert hasattr(uoi, 'components_')
154 |     assert hasattr(uoi, 'reconstruction_err_')
155 |     assert uoi.reconstruction_err_ is not None
156 |     assert X_tfm is not None
157 | 
158 | 
159 | @pytest.mark.slow
160 | def test_UoI_NMF_correct_number_of_components():
161 |     """Tests that, using the dissimilarity metric, UoI NMF extracts the correct
162 |     number of bases."""
163 |     k = 2
164 |     n_samples = 1000
165 |     n_features = 30
166 | 
167 |     # create data matrix
168 |     W = np.random.randint(low=0, high=3, size=(n_samples, k))
169 |     H = np.random.randint(low=0, high=3, size=(k, n_features))
170 |     noise = np.random.normal(loc=0, scale=0.5, size=(n_samples, n_features))**2
171 |     A = np.dot(W, H) + noise
172 | 
173 |     # fit uoi nmf
174 |     uoi = UoI_NMF(n_boots=10,
175 |                   ranks=[2, 4, 8],
176 |                   nmf_max_iter=5000,
177 |                   use_dissimilarity=True)
178 |     uoi.fit(A)
179 | 
180 |     assert uoi.components_.shape[0] == k
181 | 
182 | 
183 | @pytest.mark.fast
184 | def test_UoI_NMF_dissim_boots_argcheck(nmf_setup):
185 |     """Test that UoI_NMF raises ValueError when trying to use
186 |     dissimilarity with a single bootstrap."""
187 |     n_boots = 1
188 |     ranks = 5
189 |     assert_raises(ValueError, UoI_NMF,
190 |                   n_boots=n_boots,
191 |                   ranks=[ranks],
192 |                   nmf_max_iter=1000,
193 |                   random_state=2332)
194 | 


--------------------------------------------------------------------------------
/tests/test_scores.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy.testing import assert_almost_equal
  3 | from numpy.testing import assert_equal
  4 | from numpy.testing import assert_raises
  5 | 
  6 | from sklearn.datasets import make_regression, make_classification
  7 | from sklearn.model_selection import train_test_split
  8 | from sklearn.linear_model import LinearRegression, LogisticRegression
  9 | from sklearn.metrics import r2_score, accuracy_score, log_loss
 10 | 
 11 | from pyuoi.utils import log_likelihood_glm
 12 | from pyuoi.utils import (AIC, BIC, AICc)
 13 | 
 14 | from pyuoi.linear_model import (UoI_Lasso, UoI_L1Logistic, UoI_Poisson,
 15 |                                 UoI_ElasticNet)
 16 | 
 17 | 
 18 | def test_ll():
 19 |     """Tests that the log-likelihood for generalized linear models is correctly
 20 |     calculated."""
 21 | 
 22 |     # identity
 23 |     y_true = np.array([1, 2, 3])
 24 |     y_pred = np.array([np.e + 1, np.e + 2, np.e + 3])
 25 |     ll = log_likelihood_glm('normal', y_true, y_pred)
 26 |     assert_almost_equal(ll, -4.5)
 27 | 
 28 |     # poisson
 29 |     y_true = np.array([1 / np.log(2.), 1 / np.log(3.), 1 / np.log(4.)])
 30 |     y_pred = np.array([2., 3., 4.])
 31 |     ll = log_likelihood_glm('poisson', y_true, y_pred)
 32 |     assert_almost_equal(ll, -2)
 33 | 
 34 |     # poisson with all zeros
 35 |     y_true = np.zeros(3)
 36 |     y_pred = np.zeros(3)
 37 |     ll = log_likelihood_glm('poisson', y_true, y_pred)
 38 |     assert_equal(ll, 0.)
 39 | 
 40 |     # poisson with all zeros, but predicted is not all zeros
 41 |     y_pred = np.zeros(3)
 42 |     y_true = np.array([0., 0., 1.])
 43 |     ll = log_likelihood_glm('poisson', y_true, y_pred)
 44 |     assert_equal(ll, -np.inf)
 45 | 
 46 | 
 47 | def test_ll_error():
 48 |     """Tests that the log-likelihood function correctly raises an error when an
 49 |     incorrect string is passed as a parameter."""
 50 | 
 51 |     y_true = np.array([1., 2., 3.])
 52 |     y_pred = np.array([3., 4., 5.])
 53 | 
 54 |     assert_raises(ValueError,
 55 |                   log_likelihood_glm,
 56 |                   'error',
 57 |                   y_true,
 58 |                   y_pred)
 59 | 
 60 | 
 61 | def test_information_criteria():
 62 |     """Tests the information criteria (AIC, AICc, BIC) functions."""
 63 |     ll = -1.
 64 |     n_features = 5
 65 |     n_samples = 1000
 66 | 
 67 |     aic = AIC(ll, n_features)
 68 |     assert_equal(aic, 12.)
 69 | 
 70 |     aicc = AICc(ll, n_features, n_samples)
 71 |     assert_equal(aicc, 12. + 30. / 497.)
 72 | 
 73 |     # additional test: AICc should equal AIC if the number of samples is one
 74 |     # greater than the number of features
 75 |     aicc = AICc(ll, n_features, n_features + 1)
 76 |     assert_equal(aicc, aic)
 77 | 
 78 |     bic = BIC(ll, n_features, n_samples)
 79 |     assert_equal(bic, 5 * np.log(1000) + 2)
 80 | 
 81 | 
 82 | def test_LinearRegressor_scoring_defaults():
 83 |     """Tests that the correct default train/test data are being used
 84 |     for scoring estimates in UoIAbstractLinearRegressor. Further
 85 |     tests that the scoring itself is being done correctly."""
 86 |     seed = 5
 87 | 
 88 |     X, y = make_regression(n_samples=100, n_features=10, n_informative=10,
 89 |                            random_state=seed)
 90 | 
 91 |     train_idxs, test_idxs = train_test_split(np.arange(X.shape[0]),
 92 |                                              test_size=0.1,
 93 |                                              random_state=seed)
 94 |     X_train = X[train_idxs]
 95 |     y_train = y[train_idxs]
 96 | 
 97 |     X_test = X[test_idxs]
 98 |     y_test = y[test_idxs]
 99 | 
100 |     fitter = LinearRegression().fit(X_train, y_train)
101 |     support = np.ones(X.shape[1]).astype(bool)
102 |     # r2 - must use test data
103 |     uoi = UoI_Lasso(estimation_score='r2')
104 |     assert uoi._estimation_target == 1
105 | 
106 |     score = uoi._score_predictions('r2', fitter, X, y, support,
107 |                                    (train_idxs, test_idxs))
108 |     assert_equal(r2_score(y_test, fitter.predict(X_test)), score)
109 | 
110 |     ll = log_likelihood_glm('normal', y_train,
111 |                             fitter.predict(X_train[:, support]))
112 |     # BIC - must use train data
113 |     uoi = UoI_Lasso(estimation_score='BIC')
114 |     assert uoi._estimation_target == 0
115 |     score = -1 * uoi._score_predictions('BIC', fitter, X, y, support,
116 |                                         (train_idxs, test_idxs))
117 |     assert_equal(BIC(ll, *X_train.T.shape), score)
118 | 
119 |     # AIC - must use train data
120 |     uoi = UoI_Lasso(estimation_score='AIC')
121 |     assert uoi._estimation_target == 0
122 | 
123 |     score = -1 * uoi._score_predictions('AIC', fitter, X, y, support,
124 |                                         (train_idxs, test_idxs))
125 |     assert_equal(AIC(ll, X_train.shape[1]), score)
126 | 
127 |     # AICc - must use train data
128 |     uoi = UoI_Lasso(estimation_score='AICc')
129 |     assert uoi._estimation_target == 0
130 | 
131 |     score = -1 * uoi._score_predictions('AICc', fitter, X, y, support,
132 |                                         (train_idxs, test_idxs))
133 |     assert_equal(AICc(ll, *X_train.T.shape), score)
134 | 
135 | 
136 | def test_GeneralizedLinearRegressor_scoring_defaults():
137 |     """Tests that the correct default train/test data are being used
138 |     for scoring estimates in UoIAbstractGeneralizedLinearRegressor. Further
139 |     tests that the scoring itself is being done correctly."""
140 |     seed = 5
141 | 
142 |     X, y = make_classification(n_samples=100, n_features=3, n_informative=3,
143 |                                n_redundant=0, n_repeated=0, n_classes=3,
144 |                                n_clusters_per_class=2, random_state=seed)
145 | 
146 |     train_idxs, test_idxs = train_test_split(np.arange(X.shape[0]),
147 |                                              test_size=0.1,
148 |                                              random_state=seed)
149 | 
150 |     X_train = X[train_idxs]
151 |     y_train = y[train_idxs]
152 | 
153 |     X_test = X[test_idxs]
154 |     y_test = y[test_idxs]
155 | 
156 |     fitter = LogisticRegression().fit(X_train, y_train)
157 |     support = np.ones(X.shape[1]).astype(bool)
158 | 
159 |     # acc - must use test data
160 |     uoi = UoI_L1Logistic(estimation_score='acc')
161 |     assert uoi._estimation_target == 1
162 |     uoi.classes_ = np.unique(y)
163 |     score = uoi._score_predictions('acc', fitter, X, y, support,
164 |                                    (train_idxs, test_idxs))
165 |     assert_equal(accuracy_score(y_test, fitter.predict(X_test)), score)
166 | 
167 |     # log - must use test data. Note the sign difference
168 |     uoi = UoI_L1Logistic(estimation_score='log')
169 |     assert uoi._estimation_target == 1
170 |     uoi.classes_ = np.unique(y)
171 |     score = uoi._score_predictions('log', fitter, X, y, support,
172 |                                    (train_idxs, test_idxs))
173 | 
174 |     y_pred_test = fitter.predict_proba(X_test[:, support])
175 |     assert_equal(log_loss(y_test, y_pred_test, labels=np.unique(y)),
176 |                  -1 * score)
177 | 
178 |     ll = -log_loss(y_train, fitter.predict_proba(X_train[:, support]),
179 |                    labels=np.unique(y))
180 |     total_ll = ll * X_train.shape[0]
181 |     # BIC - must use train data
182 |     uoi = UoI_L1Logistic(estimation_score='BIC')
183 |     assert uoi._estimation_target == 0
184 |     uoi.classes_ = np.unique(y)
185 |     score = -1 * uoi._score_predictions('BIC', fitter, X, y, support,
186 |                                         (train_idxs, test_idxs))
187 |     assert_equal(BIC(total_ll, *X_train.T.shape), score)
188 | 
189 |     # AIC
190 |     uoi = UoI_L1Logistic(estimation_score='AIC')
191 |     assert uoi._estimation_target == 0
192 |     uoi.classes_ = np.unique(y)
193 |     score = -1 * uoi._score_predictions('AIC', fitter, X, y, support,
194 |                                         (train_idxs, test_idxs))
195 |     assert_equal(AIC(total_ll, X_train.shape[1]), score)
196 | 
197 |     # AICc
198 |     uoi = UoI_L1Logistic(estimation_score='AICc')
199 |     assert uoi._estimation_target == 0
200 |     uoi.classes_ = np.unique(y)
201 |     score = -1 * uoi._score_predictions('AICc', fitter, X, y, support,
202 |                                         (train_idxs, test_idxs))
203 |     assert_equal(AICc(total_ll, *X_train.T.shape), score)
204 | 
205 | 
206 | def test_estimation_target():
207 |     """Verify the ability for the user to set the estimation taget variable"""
208 | 
209 |     # Assess r2 on train data
210 |     uoi = UoI_Lasso(estimation_score='r2', estimation_target='train')
211 | 
212 |     # train gets converted to the index 0
213 |     assert uoi._estimation_target == 0
214 | 
215 |     # Assess BIC on test data
216 |     uoi = UoI_Lasso(estimation_score='BIC', estimation_target='test')
217 | 
218 |     # Assess r2 on train data
219 |     uoi = UoI_ElasticNet(estimation_score='r2', estimation_target='train')
220 | 
221 |     # train gets converted to the index 0
222 |     assert uoi._estimation_target == 0
223 | 
224 |     # Assess BIC on test data
225 |     uoi = UoI_ElasticNet(estimation_score='BIC', estimation_target='test')
226 | 
227 |     assert uoi._estimation_target == 1
228 | 
229 |     uoi = UoI_L1Logistic(estimation_score='acc', estimation_target='train')
230 | 
231 |     assert uoi._estimation_target == 0
232 | 
233 |     uoi = UoI_L1Logistic(estimation_score='BIC', estimation_target='test')
234 | 
235 |     assert uoi._estimation_target == 1
236 | 
237 |     uoi = UoI_Poisson(estimation_score='acc', estimation_target='train')
238 | 
239 |     assert uoi._estimation_target == 0
240 | 
241 |     uoi = UoI_Poisson(estimation_score='BIC', estimation_target='test')
242 | 
243 |     assert uoi._estimation_target == 1
244 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | import numpy as np
  4 | from numpy.testing import assert_array_equal
  5 | from numpy.testing import assert_raises
  6 | 
  7 | from pyuoi.linear_model.utils import stability_selection_to_threshold
  8 | from pyuoi.linear_model.utils import intersection
  9 | 
 10 | from pyuoi.utils import check_logger
 11 | 
 12 | import logging
 13 | try:
 14 |     from mpi4py import MPI
 15 | except ImportError:
 16 |     MPI = None
 17 | 
 18 | 
 19 | def test_stability_selection_to_threshold_int():
 20 |     """Tests whether stability_selection_to_threshold correctly outputs the
 21 |     correct threshold when provided a single integer."""
 22 | 
 23 |     n_boots_sel = 48
 24 |     # stability selection is a single integer
 25 |     test_int = 36
 26 |     selection_thresholds = stability_selection_to_threshold(
 27 |         test_int, n_boots_sel)
 28 | 
 29 |     assert_array_equal(selection_thresholds, np.array([36]))
 30 | 
 31 | 
 32 | def test_stability_selection_to_threshold_float():
 33 |     """Tests whether stability_selection_to_threshold correctly outputs the
 34 |     correct threshold when provided a single float."""
 35 | 
 36 |     n_boots_sel = 48
 37 |     # stability selection is a single float
 38 |     test_float = 0.5
 39 |     selection_thresholds = stability_selection_to_threshold(
 40 |         test_float, n_boots_sel)
 41 | 
 42 |     assert_array_equal(selection_thresholds, np.array([24]))
 43 | 
 44 | 
 45 | def test_stability_selection_to_threshold_ints():
 46 |     """Tests whether stability_selection_to_threshold correctly outputs the
 47 |     correct threshold when provided a list of ints."""
 48 | 
 49 |     n_boots_sel = 48
 50 |     # stability selection is a list of ints
 51 |     test_ints = [24, 28, 33, 38, 43, 48]
 52 |     selection_thresholds = stability_selection_to_threshold(
 53 |         test_ints, n_boots_sel)
 54 | 
 55 |     assert_array_equal(
 56 |         selection_thresholds,
 57 |         np.array([24, 28, 33, 38, 43, 48]))
 58 | 
 59 | 
 60 | def test_stability_selection_to_threshold_floats():
 61 |     """Tests whether stability_selection_to_threshold correctly outputs the
 62 |     correct threshold when provided a list of floats."""
 63 |     n_boots_sel = 48
 64 |     # stability selection is a list of floats
 65 |     test_floats = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
 66 |     selection_thresholds = stability_selection_to_threshold(
 67 |         test_floats, n_boots_sel)
 68 | 
 69 |     assert_array_equal(
 70 |         selection_thresholds,
 71 |         np.array([24, 28, 33, 38, 43, 48]))
 72 | 
 73 | 
 74 | def test_stability_selection_to_threshold_ints_np():
 75 |     """Tests whether stability_selection_to_threshold correctly outputs the
 76 |     correct threshold when provided a numpy array of ints."""
 77 | 
 78 |     n_boots_sel = 48
 79 |     # stability selection is a list of ints
 80 |     test_ints_np = np.array([24, 28, 33, 38, 43, 48])
 81 |     selection_thresholds = stability_selection_to_threshold(
 82 |         test_ints_np, n_boots_sel)
 83 | 
 84 |     assert_array_equal(
 85 |         selection_thresholds,
 86 |         np.array([24, 28, 33, 38, 43, 48]))
 87 | 
 88 | 
 89 | def test_stability_selection_to_threshold_floats_np():
 90 |     """Tests whether stability_selection_to_threshold correctly outputs the
 91 |     correct threshold when provided a numpy array of ints."""
 92 | 
 93 |     n_boots_sel = 48
 94 |     # stability selection is a list of floats
 95 |     test_floats_np = np.array([0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
 96 |     selection_thresholds = stability_selection_to_threshold(
 97 |         test_floats_np, n_boots_sel)
 98 | 
 99 |     assert_array_equal(
100 |         selection_thresholds,
101 |         np.array([24, 28, 33, 38, 43, 48]))
102 | 
103 | 
104 | def test_stability_selection_to_threshold_exceeds_n_bootstraps():
105 |     """Tests whether stability_selection_to_threshold correctly outputs an
106 |     error when provided an input that results in bootstraps exceeding
107 |     n_boots_sel."""
108 | 
109 |     n_boots_sel = 48
110 |     # stability selection is a list of floats
111 |     test_floats = np.array([0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1])
112 |     test_ints = np.array([24, 28, 33, 38, 43, 48, 52])
113 | 
114 |     assert_raises(
115 |         ValueError,
116 |         stability_selection_to_threshold,
117 |         test_ints,
118 |         n_boots_sel)
119 | 
120 |     assert_raises(
121 |         ValueError,
122 |         stability_selection_to_threshold,
123 |         test_floats,
124 |         n_boots_sel)
125 | 
126 | 
127 | def test_stability_selection_to_threshold_one_bootstrap():
128 |     """Tests whether stability_selection_to_threshold correctly handles the
129 |     edge case where one bootstrap is requested."""
130 | 
131 |     n_boots_sel = 1
132 |     # stability selection can only be one value
133 |     threshold = 1
134 | 
135 |     selection_thresholds = stability_selection_to_threshold(
136 |         n_boots_sel,
137 |         threshold)
138 | 
139 |     assert_array_equal(
140 |         selection_thresholds,
141 |         np.array([1]))
142 | 
143 | 
144 | def test_stability_selection_to_threshold_input_value_error():
145 |     """Tests whether stability_selection_to_threshold properly raises an error
146 |     when it receives objects without ints or floats."""
147 |     n_boots_sel = 48
148 |     stability_selection_list = [0, 1, 'a']
149 |     stability_selection_np_array = np.array([0, 1, 'a'])
150 |     stability_selection_dict = {0: 'a', 1: 'b'}
151 | 
152 |     assert_raises(
153 |         ValueError,
154 |         stability_selection_to_threshold,
155 |         stability_selection_list,
156 |         n_boots_sel)
157 | 
158 |     assert_raises(
159 |         ValueError,
160 |         stability_selection_to_threshold,
161 |         stability_selection_np_array,
162 |         n_boots_sel)
163 | 
164 |     assert_raises(
165 |         ValueError,
166 |         stability_selection_to_threshold,
167 |         stability_selection_dict,
168 |         n_boots_sel)
169 | 
170 | 
171 | def test_stability_selection_reject_negative_numbers():
172 |     """Tests whether stability_selection_to_threshold correctly rejects
173 |     negative thresholds."""
174 | 
175 |     n_boots_sel = 48
176 | 
177 |     # stability selection is a list of floats
178 |     test_negative = -1 * np.array([24, 28, 33, 38, 43, 48, 52])
179 | 
180 |     assert_raises(
181 |         ValueError,
182 |         stability_selection_to_threshold,
183 |         test_negative,
184 |         n_boots_sel)
185 | 
186 | 
187 | def test_intersection():
188 |     """Tests whether intersection correctly performs a hard intersection."""
189 | 
190 |     coefs = np.array([
191 |         [[2, 1, -1, 0, 4],
192 |          [4, 0, 2, -1, 5],
193 |          [1, 2, 3, 4, 5]],
194 |         [[2, 0, 0, 0, 0],
195 |          [3, 1, 1, 0, 3],
196 |          [6, 7, 8, 9, 10]],
197 |         [[2, 0, 0, 0, 0],
198 |          [2, -1, 3, 0, 2],
199 |          [2, 4, 6, 8, 9]]])
200 | 
201 |     true_intersection = np.array([
202 |         [True, False, False, False, False],
203 |         [True, False, True, False, True],
204 |         [True, True, True, True, True]])
205 | 
206 |     selection_thresholds = np.array([3])
207 |     estimated_intersection = intersection(
208 |         coefs=coefs,
209 |         selection_thresholds=selection_thresholds)
210 | 
211 |     # we sort the supports since they might not be in the same order
212 |     assert_array_equal(
213 |         np.sort(true_intersection, axis=0),
214 |         np.sort(estimated_intersection, axis=0))
215 | 
216 | 
217 | def test_intersection_with_stability_selection_one_threshold():
218 |     """Tests whether intersection correctly performs a soft intersection."""
219 | 
220 |     coefs = np.array([
221 |         [[2, 1, -1, 0, 4],
222 |          [4, 0, 2, -1, 5],
223 |          [1, 2, 3, 4, 5]],
224 |         [[2, 0, 0, 0, 0],
225 |          [3, 1, 1, 0, 3],
226 |          [6, 7, 8, 9, 10]],
227 |         [[2, 0, 0, 0, 0],
228 |          [2, -1, 3, 0, 2],
229 |          [2, 4, 6, 8, 9]]])
230 | 
231 |     true_intersection = np.array([
232 |         [True, False, False, False, False],
233 |         [True, True, True, False, True],
234 |         [True, True, True, True, True]])
235 | 
236 |     selection_thresholds = np.array([2])
237 |     estimated_intersection = intersection(
238 |         coefs=coefs,
239 |         selection_thresholds=selection_thresholds)
240 | 
241 |     # we sort the supports since they might not be in the same order
242 |     assert_array_equal(
243 |         np.sort(true_intersection, axis=0),
244 |         np.sort(estimated_intersection, axis=0))
245 | 
246 | 
247 | def test_intersection_with_stability_selection_multiple_thresholds():
248 |     """Tests whether intersection correctly performs an intersection with
249 |     multiple thresholds. This test also covers the case when there are
250 |     duplicates."""
251 | 
252 |     coefs = np.array([
253 |         [[2, 1, -1, 0, 4],
254 |          [4, 0, 2, -1, 5],
255 |          [1, 2, 3, 4, 5]],
256 |         [[2, 0, 0, 0, 0],
257 |          [3, 1, 1, 0, 3],
258 |          [6, 7, 8, 9, 10]],
259 |         [[2, 0, 0, 0, 0],
260 |          [2, -1, 3, 0, 2],
261 |          [2, 4, 6, 8, 9]]])
262 | 
263 |     true_intersection = np.array([
264 |         [True, False, False, False, False],
265 |         [True, True, True, False, True],
266 |         [True, True, True, True, True],
267 |         [True, False, True, False, True]])
268 | 
269 |     selection_thresholds = np.array([2, 3])
270 |     estimated_intersection = intersection(
271 |         coefs=coefs,
272 |         selection_thresholds=selection_thresholds)
273 | 
274 |     # we sort the supports since they might not be in the same order
275 |     assert_array_equal(
276 |         np.sort(true_intersection, axis=0),
277 |         np.sort(estimated_intersection, axis=0))
278 | 
279 | 
280 | def test_intersection_no_thresholds():
281 |     """Tests that the intersection method correctly calculates the intersection
282 |     using the number of bootstraps as the default selection threshold."""
283 | 
284 |     coefs = np.array([
285 |         [[2, 1, -1, 0, 4],
286 |          [4, 0, 2, -1, 5],
287 |          [1, 2, 3, 4, 5]],
288 |         [[2, 0, 0, 0, 0],
289 |          [3, 1, 1, 0, 3],
290 |          [6, 7, 8, 9, 10]],
291 |         [[2, 0, 0, 0, 0],
292 |          [2, -1, 3, 0, 2],
293 |          [2, 4, 6, 8, 9]]])
294 | 
295 |     true_intersection = np.array([
296 |         [True, False, False, False, False],
297 |         [True, True, True, True, True],
298 |         [True, False, True, False, True]])
299 | 
300 |     estimated_intersection = intersection(
301 |         coefs=coefs,
302 |         selection_thresholds=None)
303 | 
304 |     # we sort the supports since they might not be in the same order
305 |     assert_array_equal(
306 |         np.sort(true_intersection, axis=0),
307 |         np.sort(estimated_intersection, axis=0))
308 | 
309 | 
310 | @pytest.mark.fast
311 | def test_check_logger():
312 |     """Test that check_logger builds logger correctly"""
313 |     ret = check_logger(None, name="test_check_logger")
314 |     assert ret is not None
315 |     assert ret.name == 'test_check_logger'
316 | 
317 | 
318 | @pytest.mark.fast
319 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.')
320 | def test_check_logger_mpi():
321 |     """Test that passing in a MPI communicatorj object works with
322 |     check_logger"""
323 |     comm = MPI.COMM_WORLD
324 |     ret = check_logger(None, comm=comm)
325 |     assert ret is not None
326 | 
327 | 
328 | @pytest.mark.fast
329 | def test_check_logger_exists():
330 |     """Test that logger returns the argued logger when it gets passed in"""
331 |     logger = logging.getLogger()
332 |     ret = check_logger(logger)
333 |     assert ret is logger
334 | 


--------------------------------------------------------------------------------