├── .flake8 ├── .github └── workflows │ └── tests.yml ├── .gitignore ├── .readthedocs.yaml ├── .travis.yml ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.md ├── bin ├── generate_build.sh └── test_mpi.sh ├── codecov.yaml ├── compat └── win32 │ └── stdint.h ├── docs ├── Makefile └── source │ ├── api.rst │ ├── art │ ├── pyuoi.pdf │ └── pyuoi.png │ ├── conf.py │ ├── contributing.rst │ ├── index.rst │ ├── installation.rst │ ├── introduction.rst │ ├── mpi.rst │ └── pyuoi │ ├── datasets │ └── datasets.rst │ ├── decomposition │ └── decomposition.rst │ ├── linear_model │ └── linear_model.rst │ ├── mpi_utils.rst │ └── utils.rst ├── examples ├── README.rst ├── plot_swimmer.py └── plot_uoi_lasso.py ├── liblbfgs ├── COPYING ├── README ├── arithmetic_ansi.h ├── arithmetic_sse_double.h ├── arithmetic_sse_float.h ├── lbfgs.c └── lbfgs.h ├── paper ├── paper.bib └── paper.md ├── pyproject.toml ├── pytest.ini ├── requirements-dev.txt ├── requirements.txt ├── setup.py ├── src └── pyuoi │ ├── __init__.py │ ├── data │ └── Swimmer.h5 │ ├── datasets │ └── __init__.py │ ├── decomposition │ ├── CUR.py │ ├── NMF.py │ ├── __init__.py │ ├── base.py │ └── utils.py │ ├── lbfgs │ ├── LICENSE │ ├── __init__.py │ └── _lowlevel.pyx │ ├── linear_model │ ├── __init__.py │ ├── base.py │ ├── elasticnet.py │ ├── lasso.py │ ├── logistic.py │ ├── poisson.py │ ├── scikit-learn_license │ └── utils.py │ ├── mpi_utils.py │ └── utils.py └── tests ├── test_cur.py ├── test_elasticnet.py ├── test_lbfgs.py ├── test_mpi ├── __init__.py ├── test_mpi_uoi_linear_model.py └── test_mpi_utils.py ├── test_nmf.py ├── test_poisson.py ├── test_scores.py ├── test_uoi_l1logistic.py ├── test_uoi_lasso.py └── test_utils.py /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E401,W503,W504 3 | max-line-length = 100 4 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: pyuoi_tests 2 | on: 3 | push: 4 | branches: 5 | - main 6 | pull_request: 7 | branches: 8 | - main 9 | jobs: 10 | run-tests: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: ["3.7", "3.8", "3.9"] 16 | os: [ubuntu-latest, macOS-latest] 17 | steps: 18 | - name: Test pyuoi 19 | uses: actions/checkout@v3 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v3 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install dependencies 25 | run: | 26 | 27 | python -m pip install --upgrade pip 28 | python -m pip install -r requirements-dev.txt 29 | if [ "${{matrix.os}}" = "ubuntu-latest" ]; then 30 | sudo apt-get update 31 | sudo apt-get install -y openmpi-bin libopenmpi-dev gcc 32 | python -m pip install mpi4py 33 | fi 34 | python -m pip install codecov pytest-cov pycasso 35 | python -m pip install -e . 36 | - name: Lint with flake8 37 | run: | 38 | python -m flake8 src/pyuoi tests examples 39 | - name: Test with pytest 40 | run: | 41 | python -m pytest -sv --cov=./ tests 42 | - name: Build docs 43 | run: | 44 | sphinx-build -b html docs/source docs/build 45 | - name: Codecov 46 | run: | 47 | python -m codecov 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # pdfs 2 | *.pdf 3 | 4 | # docs 5 | docs/ 6 | 7 | # pip wheel metadata 8 | pip-wheel-metadata/ 9 | 10 | # Pycharm settings files 11 | .idea 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | env/ 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *,cover 58 | .hypothesis/ 59 | .pytest_cache 60 | 61 | # Translations 62 | *.mo 63 | *.pot 64 | 65 | # Django stuff: 66 | *.log 67 | local_settings.py 68 | 69 | # Flask stuff: 70 | instance/ 71 | .webassets-cache 72 | 73 | # Scrapy stuff: 74 | .scrapy 75 | 76 | # Sphinx documentation 77 | docs/_build/ 78 | 79 | # PyBuilder 80 | target/ 81 | 82 | # IPython Notebook 83 | .ipynb_checkpoints 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # celery beat schedule file 89 | celerybeat-schedule 90 | 91 | # dotenv 92 | .env 93 | 94 | # virtualenv 95 | venv/ 96 | ENV/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # Macs 105 | .DS_Store 106 | 107 | # VS Code 108 | .vscode 109 | 110 | # lbfgs solver stuff 111 | pyuoi/lbfgs/_lowlevel.c 112 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Optionally set the version of Python and requirements required to build your docs 9 | python: 10 | install: 11 | - requirements: requirements-dev.txt 12 | - method: pip 13 | path: . 14 | extra_requirements: 15 | - dev 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | os: linux 3 | python: 4 | - 3.6 5 | - 3.7 6 | - 3.8 7 | matrix: 8 | include: 9 | - os: osx 10 | language: generic 11 | env: PYTHON=3.7.4 12 | addons: 13 | homebrew: 14 | packages: 15 | - openmpi 16 | - pyenv-virtualenv 17 | before_install: 18 | - | 19 | if [ "$TRAVIS_OS_NAME" = "osx" ]; then 20 | pyenv install $PYTHON 21 | export PYENV_VERSION=$PYTHON 22 | export PATH="/Users/travis/.pyenv/shims:${PATH}" 23 | pyenv virtualenv venv 24 | source /Users/travis/.pyenv/versions/3.7.4/envs/venv/bin/activate 25 | fi 26 | - | 27 | if [ "$TRAVIS_OS_NAME" = "linux" ]; then 28 | sudo apt-get update 29 | sudo apt-get install -y openmpi-bin libopenmpi-dev gcc 30 | fi 31 | install: 32 | - pip install -r requirements-dev.txt 33 | - pip install codecov 34 | - pip install mpi4py 35 | - pip install pycasso 36 | - python setup.py build 37 | - python setup.py develop 38 | - pip install pytest-cov 39 | script: 40 | - flake8 pyuoi tests examples 41 | - pytest --cov=./ tests 42 | - sphinx-build -W -b html docs/source docs/build 43 | after_success: 44 | - codecov 45 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | PyUoI Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | 13 | You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form. 14 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include pyproject.toml 2 | include requirements.txt 3 | include requirements-dev.txt 4 | include LICENSE.txt 5 | include liblbfgs/*.h 6 | include liblbfgs/*.c 7 | include pyuoi/lbfgs/*.pyx 8 | exclude pyuoi/lbfgs/*.c 9 | recursive-include test* *.py 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTHON = python 2 | FLAKE = flake8 3 | COVERAGE = coverage 4 | 5 | help: 6 | @echo "Please use \`make ' where is one of" 7 | @echo " init to install required packages" 8 | @echo " build to build the python package(s)" 9 | @echo " install to build and install the python package(s)" 10 | @echo " develop to build and install the python package(s) for development" 11 | @echo " test to run all integration and unit tests" 12 | @echo " htmldoc to make the HTML documentation and open it with the default browser" 13 | @echo " coverage to run tests, build coverage HTML report and open it with the default browser" 14 | @echo "" 15 | @echo "Advanced targets" 16 | @echo " apidoc to generate API docs *.rst files from sources" 17 | @echo " coverage-only to run tests and build coverage report" 18 | @echo " coverage-open to open coverage HTML report in the default browser" 19 | @echo " htmlclean to remove all generated documentation" 20 | @echo " htmldoc-only to make the HTML documentation" 21 | @echo " htmldoc-open to open the HTML documentation with the default browser" 22 | @echo " pdfdoc to make the LaTeX sources and build the PDF of the documentation" 23 | 24 | init: 25 | pip install -r requirements.txt -r requirements-dev.txt -r requirements-doc.txt 26 | 27 | build: 28 | $(PYTHON) setup.py build 29 | 30 | install: build 31 | $(PYTHON) setup.py install 32 | 33 | develop: build 34 | $(PYTHON) setup.py develop 35 | 36 | test: 37 | pip install -r requirements-dev.txt 38 | tox 39 | 40 | flake: 41 | $(FLAKE) pyuoi/ 42 | $(FLAKE) tests/ 43 | $(FLAKE) --ignore E402,W504 docs/gallery 44 | 45 | checkpdb: 46 | find {pyuoi,tests} -name "*.py" -exec grep -Hn pdb {} \; 47 | 48 | #devtest: 49 | #$(PYTHON) -W ignore:::pynwb.form.build.map: test.py -fpi 50 | 51 | testclean: 52 | rm *.npy *.nwb *.yaml 53 | 54 | apidoc: 55 | pip install -r requirements-doc.txt 56 | cd docs && $(MAKE) apidoc 57 | 58 | htmldoc-only: apidoc 59 | cd docs && $(MAKE) html 60 | 61 | htmlclean: 62 | cd docs && $(MAKE) clean 63 | 64 | htmldoc-open: 65 | @echo "" 66 | @echo "To view the HTML documentation open: docs/_build/html/index.html" 67 | open docs/_build/html/index.html || xdg-open docs/_build/html/index.html 68 | 69 | htmldoc: htmldoc-only htmldoc-open 70 | 71 | pdfdoc: 72 | cd docs && $(MAKE) latexpdf 73 | @echo "" 74 | @echo "To view the PDF documentation open: docs/_build/latex/PyNWB.pdf" 75 | 76 | coverage-only: 77 | tox -e localcoverage 78 | 79 | coverage-open: 80 | @echo "To view coverage data open: ./tests/coverage/htmlcov/index.html" 81 | open ./tests/coverage/htmlcov/index.html || xdg-open ./tests/coverage/htmlcov/index.html 82 | 83 | coverage: coverage-only coverage-open 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PyUoI logo 2 | 3 | [![Actions Status](https://github.com/BouchardLab/pyuoi/workflows/pyuoi_tests/badge.svg)](https://github.com/BouchardLab/pyuoi/actions) 4 | [![codecov](https://codecov.io/gh/BouchardLab/pyuoi/branch/main/graph/badge.svg?token=DxEQxVEam8)](https://codecov.io/gh/BouchardLab/pyuoi) 5 | [![Documentation Status](https://readthedocs.org/projects/pyuoi/badge/?version=latest)](https://pyuoi.readthedocs.io/en/latest/?badge=latest) 6 | ![PyPI](https://img.shields.io/pypi/v/pyuoi) 7 | [![Anaconda-Server Badge](https://anaconda.org/conda-forge/pyuoi/badges/installer/conda.svg)](https://conda.anaconda.org/conda-forge) 8 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.01799/status.svg)](https://doi.org/10.21105/joss.01799) 9 | 10 | 11 | PyUoI contains implementations of Union of Intersections framework for a variety 12 | of penalized generalized linear models as well as dimensionality reductions 13 | techniques such as column subset selection and non-negative matrix 14 | factorization. In general, UoI is a statistical machine learning framework that 15 | leverages two concepts in model inference: 16 | 17 | 1. Separating the selection and estimation problems to simultaneously achieve 18 | sparse models with low-bias and low-variance parameter estimates. 19 | 2. Stability to perturbations in both selection and estimation. 20 | 21 | 22 | PyUoI is designed to function similarly to ``scikit-learn``, as it often builds 23 | upon ``scikit-learn``'s implementations of the aforementioned algorithms. 24 | 25 | Further details on the UoI framework can be found in the NeurIPS paper (Bouchard et al., 2017). 26 | 27 | # Installation 28 | 29 | PyUoI is available for Python 3 on PyPI: 30 | 31 | ``` 32 | pip install pyuoi 33 | ``` 34 | 35 | and through conda-forge: 36 | 37 | ``` 38 | conda install pyuoi -c conda-forge 39 | ``` 40 | 41 | # Requirements 42 | 43 | ## Runtime 44 | 45 | PyUoI requires 46 | 47 | * numpy>=1.14 48 | * h5py>=2.8 49 | * scikit-learn>=0.24 50 | 51 | and optionally 52 | 53 | * pycasso 54 | * mpi4py 55 | 56 | to run. 57 | 58 | ## Develop 59 | 60 | To develop PyUoI you will additionally need 61 | 62 | * cython 63 | 64 | to build from source and 65 | 66 | * pytest 67 | * flake8 68 | 69 | to run the tests and check formatting. 70 | 71 | PyUoI has been built and tested on Python 3.9.18 with 72 | 73 | * numpy==1.26.1 74 | * h5py==3.10.0 75 | * scikit-learn==1.3.1 76 | * cython==3.0.4 77 | * pytest==7.4.2 78 | * flake8==6.1.0 79 | 80 | # Features 81 | 82 | PyUoI is split up into two modules, with the following UoI algorithms: 83 | 84 | * `linear_model` (generalized linear models) 85 | * Lasso penalized linear regression UoILasso. 86 | * Elastic-net penalized linear regression (UoIElasticNet). 87 | * Logistic regression (Bernoulli and multinomial) (UoILogistic). 88 | * Poisson regression (UoIPoisson). 89 | * `decomposition` (dimensionality reduction) 90 | * Column subset selection (UoICSS). 91 | * Non-negative matrix factorization (UoINMF). 92 | 93 | Similar to `scikit-learn`, each UoI algorithm has its own Python class. 94 | 95 | # Documentation 96 | 97 | Please see our ReadTheDocs page for an introduction to Union of Intersections, usage of PyUoI, and the API. 98 | 99 | # Copyright 100 | 101 | PyUoI Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. 102 | 103 | If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov referring to " PyUoI" (LBNL Ref 2019-157)." 104 | 105 | NOTICE. This software was developed under funding from the U.S. Department of Energy. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, and perform publicly and display publicly. The U.S. Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. 106 | -------------------------------------------------------------------------------- /bin/generate_build.sh: -------------------------------------------------------------------------------- 1 | eval "$(conda shell.bash hook)" 2 | mkdir dist 3 | for py in 3.6 3.7 3.8; do 4 | git clone https://github.com/BouchardLab/pyuoi.git 5 | cd pyuoi 6 | conda create -y -n temp_build_env python=$py 7 | conda activate temp_build_env 8 | conda install -y numpy cython 9 | pip install setuptools wheel 10 | python setup.py sdist bdist_wheel 11 | conda deactivate 12 | conda remove -y -n temp_build_env --all 13 | mv dist/* ../dist/. 14 | cd .. 15 | rm -rf pyuoi 16 | done 17 | -------------------------------------------------------------------------------- /bin/test_mpi.sh: -------------------------------------------------------------------------------- 1 | mpiexec -n 4 pytest -sv $1 2 | mpiexec -n 49 pytest -sv $1 3 | -------------------------------------------------------------------------------- /codecov.yaml: -------------------------------------------------------------------------------- 1 | ignore: 2 | - "setup.py" 3 | - "tests" 4 | -------------------------------------------------------------------------------- /compat/win32/stdint.h: -------------------------------------------------------------------------------- 1 | // ISO C9x compliant stdint.h for Microsoft Visual Studio 2 | // Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 3 | // 4 | // Copyright (c) 2006-2008 Alexander Chemeris 5 | // 6 | // Redistribution and use in source and binary forms, with or without 7 | // modification, are permitted provided that the following conditions are met: 8 | // 9 | // 1. Redistributions of source code must retain the above copyright notice, 10 | // this list of conditions and the following disclaimer. 11 | // 12 | // 2. Redistributions in binary form must reproduce the above copyright 13 | // notice, this list of conditions and the following disclaimer in the 14 | // documentation and/or other materials provided with the distribution. 15 | // 16 | // 3. The name of the author may be used to endorse or promote products 17 | // derived from this software without specific prior written permission. 18 | // 19 | // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 20 | // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 21 | // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 22 | // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 24 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 25 | // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 26 | // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 27 | // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 28 | // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | // 30 | /////////////////////////////////////////////////////////////////////////////// 31 | 32 | #ifndef _MSC_VER // [ 33 | #error "Use this header only with Microsoft Visual C++ compilers!" 34 | #endif // _MSC_VER ] 35 | 36 | #ifndef _MSC_STDINT_H_ // [ 37 | #define _MSC_STDINT_H_ 38 | 39 | #if _MSC_VER > 1000 40 | #pragma once 41 | #endif 42 | 43 | #include 44 | 45 | // For Visual Studio 6 in C++ mode and for many Visual Studio versions when 46 | // compiling for ARM we should wrap include with 'extern "C++" {}' 47 | // or compiler give many errors like this: 48 | // error C2733: second C linkage of overloaded function 'wmemchr' not allowed 49 | #ifdef __cplusplus 50 | extern "C" { 51 | #endif 52 | # include 53 | #ifdef __cplusplus 54 | } 55 | #endif 56 | 57 | // Define _W64 macros to mark types changing their size, like intptr_t. 58 | #ifndef _W64 59 | # if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 60 | # define _W64 __w64 61 | # else 62 | # define _W64 63 | # endif 64 | #endif 65 | 66 | 67 | // 7.18.1 Integer types 68 | 69 | // 7.18.1.1 Exact-width integer types 70 | 71 | // Visual Studio 6 and Embedded Visual C++ 4 doesn't 72 | // realize that, e.g. char has the same size as __int8 73 | // so we give up on __intX for them. 74 | #if (_MSC_VER < 1300) 75 | typedef signed char int8_t; 76 | typedef signed short int16_t; 77 | typedef signed int int32_t; 78 | typedef unsigned char uint8_t; 79 | typedef unsigned short uint16_t; 80 | typedef unsigned int uint32_t; 81 | #else 82 | typedef signed __int8 int8_t; 83 | typedef signed __int16 int16_t; 84 | typedef signed __int32 int32_t; 85 | typedef unsigned __int8 uint8_t; 86 | typedef unsigned __int16 uint16_t; 87 | typedef unsigned __int32 uint32_t; 88 | #endif 89 | typedef signed __int64 int64_t; 90 | typedef unsigned __int64 uint64_t; 91 | 92 | 93 | // 7.18.1.2 Minimum-width integer types 94 | typedef int8_t int_least8_t; 95 | typedef int16_t int_least16_t; 96 | typedef int32_t int_least32_t; 97 | typedef int64_t int_least64_t; 98 | typedef uint8_t uint_least8_t; 99 | typedef uint16_t uint_least16_t; 100 | typedef uint32_t uint_least32_t; 101 | typedef uint64_t uint_least64_t; 102 | 103 | // 7.18.1.3 Fastest minimum-width integer types 104 | typedef int8_t int_fast8_t; 105 | typedef int16_t int_fast16_t; 106 | typedef int32_t int_fast32_t; 107 | typedef int64_t int_fast64_t; 108 | typedef uint8_t uint_fast8_t; 109 | typedef uint16_t uint_fast16_t; 110 | typedef uint32_t uint_fast32_t; 111 | typedef uint64_t uint_fast64_t; 112 | 113 | // 7.18.1.4 Integer types capable of holding object pointers 114 | #ifdef _WIN64 // [ 115 | typedef signed __int64 intptr_t; 116 | typedef unsigned __int64 uintptr_t; 117 | #else // _WIN64 ][ 118 | typedef _W64 signed int intptr_t; 119 | typedef _W64 unsigned int uintptr_t; 120 | #endif // _WIN64 ] 121 | 122 | // 7.18.1.5 Greatest-width integer types 123 | typedef int64_t intmax_t; 124 | typedef uint64_t uintmax_t; 125 | 126 | 127 | // 7.18.2 Limits of specified-width integer types 128 | 129 | #if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 130 | 131 | // 7.18.2.1 Limits of exact-width integer types 132 | #define INT8_MIN ((int8_t)_I8_MIN) 133 | #define INT8_MAX _I8_MAX 134 | #define INT16_MIN ((int16_t)_I16_MIN) 135 | #define INT16_MAX _I16_MAX 136 | #define INT32_MIN ((int32_t)_I32_MIN) 137 | #define INT32_MAX _I32_MAX 138 | #define INT64_MIN ((int64_t)_I64_MIN) 139 | #define INT64_MAX _I64_MAX 140 | #define UINT8_MAX _UI8_MAX 141 | #define UINT16_MAX _UI16_MAX 142 | #define UINT32_MAX _UI32_MAX 143 | #define UINT64_MAX _UI64_MAX 144 | 145 | // 7.18.2.2 Limits of minimum-width integer types 146 | #define INT_LEAST8_MIN INT8_MIN 147 | #define INT_LEAST8_MAX INT8_MAX 148 | #define INT_LEAST16_MIN INT16_MIN 149 | #define INT_LEAST16_MAX INT16_MAX 150 | #define INT_LEAST32_MIN INT32_MIN 151 | #define INT_LEAST32_MAX INT32_MAX 152 | #define INT_LEAST64_MIN INT64_MIN 153 | #define INT_LEAST64_MAX INT64_MAX 154 | #define UINT_LEAST8_MAX UINT8_MAX 155 | #define UINT_LEAST16_MAX UINT16_MAX 156 | #define UINT_LEAST32_MAX UINT32_MAX 157 | #define UINT_LEAST64_MAX UINT64_MAX 158 | 159 | // 7.18.2.3 Limits of fastest minimum-width integer types 160 | #define INT_FAST8_MIN INT8_MIN 161 | #define INT_FAST8_MAX INT8_MAX 162 | #define INT_FAST16_MIN INT16_MIN 163 | #define INT_FAST16_MAX INT16_MAX 164 | #define INT_FAST32_MIN INT32_MIN 165 | #define INT_FAST32_MAX INT32_MAX 166 | #define INT_FAST64_MIN INT64_MIN 167 | #define INT_FAST64_MAX INT64_MAX 168 | #define UINT_FAST8_MAX UINT8_MAX 169 | #define UINT_FAST16_MAX UINT16_MAX 170 | #define UINT_FAST32_MAX UINT32_MAX 171 | #define UINT_FAST64_MAX UINT64_MAX 172 | 173 | // 7.18.2.4 Limits of integer types capable of holding object pointers 174 | #ifdef _WIN64 // [ 175 | # define INTPTR_MIN INT64_MIN 176 | # define INTPTR_MAX INT64_MAX 177 | # define UINTPTR_MAX UINT64_MAX 178 | #else // _WIN64 ][ 179 | # define INTPTR_MIN INT32_MIN 180 | # define INTPTR_MAX INT32_MAX 181 | # define UINTPTR_MAX UINT32_MAX 182 | #endif // _WIN64 ] 183 | 184 | // 7.18.2.5 Limits of greatest-width integer types 185 | #define INTMAX_MIN INT64_MIN 186 | #define INTMAX_MAX INT64_MAX 187 | #define UINTMAX_MAX UINT64_MAX 188 | 189 | // 7.18.3 Limits of other integer types 190 | 191 | #ifdef _WIN64 // [ 192 | # define PTRDIFF_MIN _I64_MIN 193 | # define PTRDIFF_MAX _I64_MAX 194 | #else // _WIN64 ][ 195 | # define PTRDIFF_MIN _I32_MIN 196 | # define PTRDIFF_MAX _I32_MAX 197 | #endif // _WIN64 ] 198 | 199 | #define SIG_ATOMIC_MIN INT_MIN 200 | #define SIG_ATOMIC_MAX INT_MAX 201 | 202 | #ifndef SIZE_MAX // [ 203 | # ifdef _WIN64 // [ 204 | # define SIZE_MAX _UI64_MAX 205 | # else // _WIN64 ][ 206 | # define SIZE_MAX _UI32_MAX 207 | # endif // _WIN64 ] 208 | #endif // SIZE_MAX ] 209 | 210 | // WCHAR_MIN and WCHAR_MAX are also defined in 211 | #ifndef WCHAR_MIN // [ 212 | # define WCHAR_MIN 0 213 | #endif // WCHAR_MIN ] 214 | #ifndef WCHAR_MAX // [ 215 | # define WCHAR_MAX _UI16_MAX 216 | #endif // WCHAR_MAX ] 217 | 218 | #define WINT_MIN 0 219 | #define WINT_MAX _UI16_MAX 220 | 221 | #endif // __STDC_LIMIT_MACROS ] 222 | 223 | 224 | // 7.18.4 Limits of other integer types 225 | 226 | #if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 227 | 228 | // 7.18.4.1 Macros for minimum-width integer constants 229 | 230 | #define INT8_C(val) val##i8 231 | #define INT16_C(val) val##i16 232 | #define INT32_C(val) val##i32 233 | #define INT64_C(val) val##i64 234 | 235 | #define UINT8_C(val) val##ui8 236 | #define UINT16_C(val) val##ui16 237 | #define UINT32_C(val) val##ui32 238 | #define UINT64_C(val) val##ui64 239 | 240 | // 7.18.4.2 Macros for greatest-width integer constants 241 | #define INTMAX_C INT64_C 242 | #define UINTMAX_C UINT64_C 243 | 244 | #endif // __STDC_CONSTANT_MACROS ] 245 | 246 | 247 | #endif // _MSC_STDINT_H_ ] 248 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | .. PyUoI 2 | 3 | === 4 | API 5 | === 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | pyuoi/linear_model/linear_model 11 | pyuoi/decomposition/decomposition 12 | pyuoi/datasets/datasets 13 | pyuoi/utils 14 | pyuoi/mpi_utils 15 | -------------------------------------------------------------------------------- /docs/source/art/pyuoi.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BouchardLab/pyuoi/25e47655a07895f206c2e3ee3b259421c144a05d/docs/source/art/pyuoi.pdf -------------------------------------------------------------------------------- /docs/source/art/pyuoi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BouchardLab/pyuoi/25e47655a07895f206c2e3ee3b259421c144a05d/docs/source/art/pyuoi.png -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | import sphinx_rtd_theme 18 | 19 | # Get the project root dir, which is the parent parent dir of this 20 | project_root = os.path.dirname(os.getcwd()) 21 | 22 | # Insert the project root dir as the first element in the PYTHONPATH. 23 | # This lets us ensure that the source package is imported, and that its 24 | # version is used. 25 | sys.path.insert(0, project_root) 26 | 27 | 28 | # -- Project information ----------------------------------------------------- 29 | 30 | project = 'PyUoI' 31 | copyright = 'The Regents of the University of California, through Lawrence Berkeley National Laboratory' 32 | author = 'Contributors' 33 | 34 | # The short X.Y version 35 | version = '' 36 | # The full version, including alpha/beta/rc tags 37 | release = 'alpha' 38 | 39 | 40 | # -- General configuration --------------------------------------------------- 41 | 42 | # If your documentation needs a minimal Sphinx version, state it here. 43 | # 44 | # needs_sphinx = '1.0' 45 | 46 | # Add any Sphinx extension module names here, as strings. They can be 47 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 48 | # ones. 49 | extensions = [ 50 | 'sphinx.ext.autodoc', 51 | 'sphinx.ext.doctest', 52 | 'sphinx.ext.intersphinx', 53 | 'sphinx.ext.todo', 54 | 'sphinx.ext.coverage', 55 | 'sphinx.ext.mathjax', 56 | 'sphinx.ext.ifconfig', 57 | 'sphinx.ext.viewcode', 58 | 'sphinx.ext.githubpages', 59 | 'sphinx.ext.napoleon', 60 | 'sphinx.ext.mathjax', 61 | 'sphinx_rtd_theme', 62 | 'sphinx_gallery.gen_gallery' 63 | ] 64 | 65 | sphinx_gallery_conf = { 66 | # path to your examples scripts 67 | 'examples_dirs': ['../../examples'], 68 | # path where to save gallery generated examples 69 | 'gallery_dirs': ['auto_examples'], 70 | #'subsection_order': ExplicitOrder(['../gallery/general', '../gallery/domain']), 71 | 'backreferences_dir': 'gen_modules/backreferences', 72 | 'min_reported_time': 5 73 | } 74 | 75 | # Add any paths that contain templates here, relative to this directory. 76 | templates_path = ['_templates'] 77 | 78 | # The suffix(es) of source filenames. 79 | # You can specify multiple suffix as a list of string: 80 | # 81 | # source_suffix = ['.rst', '.md'] 82 | source_suffix = '.rst' 83 | 84 | # The master toctree document. 85 | master_doc = 'index' 86 | 87 | # The language for content autogenerated by Sphinx. Refer to documentation 88 | # for a list of supported languages. 89 | # 90 | # This is also used if you do content translation via gettext catalogs. 91 | # Usually you set "language" from the command line for these cases. 92 | language = 'en' 93 | 94 | # List of patterns, relative to source directory, that match files and 95 | # directories to ignore when looking for source files. 96 | # This pattern also affects html_static_path and html_extra_path. 97 | exclude_patterns = [] 98 | 99 | # The name of the Pygments (syntax highlighting) style to use. 100 | pygments_style = None 101 | 102 | 103 | # -- Options for HTML output ------------------------------------------------- 104 | 105 | # The theme to use for HTML and HTML Help pages. See the documentation for 106 | # a list of builtin themes. 107 | # 108 | html_theme = "sphinx_rtd_theme" 109 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 110 | 111 | # Theme options are theme-specific and customize the look and feel of a theme 112 | # further. For a list of options available for each theme, see the 113 | # documentation. 114 | # 115 | # html_theme_options = {} 116 | 117 | # Add any paths that contain custom static files (such as style sheets) here, 118 | # relative to this directory. They are copied after the builtin static files, 119 | # so a file named "default.css" will overwrite the builtin "default.css". 120 | # html_static_path = ['_static'] 121 | 122 | # Custom sidebar templates, must be a dictionary that maps document names 123 | # to template names. 124 | # 125 | # The default sidebars (for documents that don't match any pattern) are 126 | # defined by theme itself. Builtin themes are using these templates by 127 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 128 | # 'searchbox.html']``. 129 | # 130 | # html_sidebars = {} 131 | 132 | html_logo = 'art/pyuoi.png' 133 | html_theme_options = {'logo_only': True} 134 | 135 | 136 | # -- Options for HTMLHelp output --------------------------------------------- 137 | 138 | # Output file base name for HTML help builder. 139 | htmlhelp_basename = 'PyUoIdoc' 140 | 141 | 142 | # -- Options for LaTeX output ------------------------------------------------ 143 | 144 | latex_elements = { 145 | # The paper size ('letterpaper' or 'a4paper'). 146 | # 147 | # 'papersize': 'letterpaper', 148 | 149 | # The font size ('10pt', '11pt' or '12pt'). 150 | # 151 | # 'pointsize': '10pt', 152 | 153 | # Additional stuff for the LaTeX preamble. 154 | # 155 | # 'preamble': '', 156 | 157 | # Latex figure (float) alignment 158 | # 159 | # 'figure_align': 'htbp', 160 | } 161 | 162 | # Grouping the document tree into LaTeX files. List of tuples 163 | # (source start file, target name, title, 164 | # author, documentclass [howto, manual, or own class]). 165 | latex_documents = [ 166 | (master_doc, 'PyUoI.tex', 'PyUoI Documentation', 167 | 'BouchardLab', 'manual'), 168 | ] 169 | 170 | 171 | # -- Options for manual page output ------------------------------------------ 172 | 173 | # One entry per manual page. List of tuples 174 | # (source start file, name, description, authors, manual section). 175 | man_pages = [ 176 | (master_doc, 'pyuoi', 'PyUoI Documentation', 177 | [author], 1) 178 | ] 179 | 180 | 181 | # -- Options for Texinfo output ---------------------------------------------- 182 | 183 | # Grouping the document tree into Texinfo files. List of tuples 184 | # (source start file, target name, title, author, 185 | # dir menu entry, description, category) 186 | texinfo_documents = [ 187 | (master_doc, 'PyUoI', 'PyUoI Documentation', 188 | author, 'PyUoI', 'One line description of project.', 189 | 'Miscellaneous'), 190 | ] 191 | 192 | 193 | # -- Options for Epub output ------------------------------------------------- 194 | 195 | # Bibliographic Dublin Core info. 196 | epub_title = project 197 | 198 | # The unique identifier of the text. This can be a ISBN number 199 | # or the project homepage. 200 | # 201 | # epub_identifier = '' 202 | 203 | # A unique identification for the text. 204 | # 205 | # epub_uid = '' 206 | 207 | # A list of files that should not be packed into the epub file. 208 | epub_exclude_files = ['search.html'] 209 | 210 | 211 | # -- Extension configuration ------------------------------------------------- 212 | 213 | # -- Options for intersphinx extension --------------------------------------- 214 | 215 | # Example configuration for intersphinx: refer to the Python standard library. 216 | intersphinx_mapping = {"python": ("https://docs.python.org/", None), 217 | "sklearn": ("https://scikit-learn.org/stable/", None)} 218 | 219 | # -- Options for todo extension ---------------------------------------------- 220 | 221 | # If true, `todo` and `todoList` produce output, else they produce nothing. 222 | todo_include_todos = True 223 | -------------------------------------------------------------------------------- /docs/source/contributing.rst: -------------------------------------------------------------------------------- 1 | .. PyUoI 2 | 3 | ========================== 4 | How to contribute to PyUoI 5 | ========================== 6 | 7 | Code of Conduct 8 | --------------- 9 | 10 | Contributing Patches and Changes 11 | -------------------------------- 12 | 13 | First, check whether the feature or change has already been contributed. If not, from your local copy directory, use the following commands. 14 | 15 | If you have not already, you will need to clone the repo: 16 | 17 | .. code-block:: bash 18 | 19 | $ git clone https://github.com/BouchardLab/PyUoI.git 20 | 21 | 1) First create a new branch to work on 22 | 23 | .. code-block:: bash 24 | 25 | $ git checkout -b 26 | 27 | 2) Make your changes. 28 | 29 | 3) We will automatically run tests to ensure that your contributions didn't break anything and that they follow our style guide. You can speed up the testing cycle by running these tests locally on your own computer running ``pytest -sv tests``, ``flake8 pyuoi``, and ``flake8 tests``. 30 | 31 | 4) Push your feature branch to origin 32 | 33 | .. code-block:: bash 34 | 35 | $ git push origin 36 | 37 | 5) Once you have tested and finalized your changes, create a pull request (PR): 38 | 39 | * Ensure the PR description clearly describes the issue and changes. 40 | * Close the relevant issue number if applicable. Writing "Closes #29" in the PR description will automatically close issue #29 when the PR is merged. 41 | * If your changes fix a bug or add a feature, write a test so that it will not break in the future. 42 | * Before submitting, please ensure that the tests pass and that the code follows the standard coding style. 43 | 44 | Styleguides 45 | ----------- 46 | 47 | Documentation Styleguide 48 | ^^^^^^^^^^^^^^^^^^^^^^^^ 49 | 50 | All documentations is written in reStructuredText (RST) using Sphinx. 51 | 52 | Format Specification Styleguide 53 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 54 | 55 | Python Code Styleguide 56 | ^^^^^^^^^^^^^^^^^^^^^^ 57 | 58 | Python coding style is checked via ``flake8`` for automatic checking of PEP8 style during pull requests. 59 | 60 | License and Copyright 61 | --------------------- 62 | 63 | PyUol Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. 64 | 65 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 66 | 67 | (1) Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 68 | 69 | (2) Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 70 | 71 | (3) Neither the name of the University of California, Lawrence Berkeley National Laboratory, U.S. Dept. of Energy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 72 | 73 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 74 | 75 | You are under no obligation whatsoever to provide any bug fixes, patches, or upgrades to the features, functionality or performance of the source code ("Enhancements") to anyone; however, if you choose to make your Enhancements available either publicly, or directly to Lawrence Berkeley National Laboratory, without imposing a separate written license agreement for such Enhancements, then you hereby grant the following license: a non-exclusive, royalty-free perpetual license to install, use, modify, prepare derivative works, incorporate into other computer software, distribute, and sublicense such enhancements or derivative works thereof, in binary and source code form. 76 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. PyUoI 2 | 3 | ===================================================== 4 | PyUoI: The Union of Intersections Framework in Python 5 | ===================================================== 6 | 7 | PyUoI contains implementations of Union of Intersections framework for a variety 8 | of penalized generalized linear models as well as dimensionality reduction 9 | techniques such as column subset selection and non-negative matrix 10 | factorization. In general, UoI is a statistical machine learning framework that 11 | leverages two concepts in model inference: 12 | 13 | #. Separating the selection and estimation problems to simultaneously achieve 14 | sparse models with low-bias and low-variance parameter estimates. 15 | #. Stability to perturbations in both selection and estimation. 16 | 17 | 18 | PyUoI is designed to function similarly to scikit-learn, as it often builds 19 | upon scikit-learn's implementations of the aforementioned algorithms. 20 | 21 | Further details on the UoI framework can be found in [Bouchard2017]_ and 22 | [Ubaru2017]_. 23 | 24 | .. toctree:: 25 | :maxdepth: 2 26 | :caption: Contents: 27 | 28 | introduction 29 | installation 30 | auto_examples/index 31 | contributing 32 | mpi 33 | api 34 | 35 | .. rubric:: References 36 | 37 | .. [Bouchard2017] Bouchard, K., Bujan, A., Roosta-Khorasani, F., Ubaru, S., 38 | Prabhat, M., Snijders, A., ... & Bhattacharya, S. (2017). Union of 39 | intersections (UoI) for interpretable data driven discovery and 40 | prediction. In Advances in Neural Information Processing 41 | Systems (pp. 1078-1086). 42 | .. [Ubaru2017] Ubaru, S., Wu, K., & Bouchard, K. E. (2017, December). UoI-NMF 43 | cluster: a robust nonnegative matrix factorization algorithm for improved 44 | parts-based decomposition and reconstruction of noisy data. In 2017 16th 45 | IEEE International Conference on Machine Learning and Applications (ICMLA) 46 | (pp. 241-248). IEEE. 47 | 48 | 49 | Indices and tables 50 | ------------------ 51 | 52 | * :ref:`genindex` 53 | * :ref:`modindex` 54 | * :ref:`search` 55 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | .. PyUoI 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | PyUoI is available for Python 3 on PyPI: 8 | 9 | .. code-block:: bash 10 | 11 | $ pip install pyuoi 12 | 13 | and through conda-forge: 14 | 15 | .. code-block:: bash 16 | 17 | $ conda install pyuoi -c conda-forge 18 | 19 | ``pip`` and ``conda`` will install the required dependencies. 20 | 21 | Requirements 22 | ------------ 23 | 24 | Runtime 25 | ^^^^^^^ 26 | 27 | PyUoI requires 28 | 29 | * numpy>=1.14 30 | * h5py>=2.8 31 | * scikit-learn>=0.24 32 | 33 | and optionally 34 | 35 | * pycasso 36 | * mpi4py 37 | 38 | to run. 39 | 40 | Develop 41 | ^^^^^^^ 42 | 43 | To develop PyUoI you will additionally need 44 | 45 | * cython 46 | 47 | to build from source and 48 | 49 | * pytest 50 | * flake8 51 | 52 | to run the tests and check formatting. 53 | 54 | PyUoI has been built and tested on Python 3.9.18 with 55 | 56 | * numpy==1.26.1 57 | * h5py==3.10.0 58 | * scikit-learn==1.3.1 59 | * cython==3.0.4 60 | * pytest==7.4.2 61 | * flake8==6.1.0 62 | 63 | Docs 64 | ^^^^ 65 | 66 | To build the docs you will additionally need 67 | 68 | * sphinx 69 | * sphinx_rtd_theme 70 | 71 | Install from source 72 | ------------------- 73 | 74 | The latest development version of the code can be installed from https://github.com/BouchardLab/PyUoI 75 | 76 | .. code-block:: bash 77 | 78 | # use ssh 79 | $ git clone git@github.com:BouchardLab/pyuoi.git 80 | # or use https 81 | $ git clone https://github.com/BouchardLab/pyuoi.git 82 | $ cd pyuoi 83 | $ pip install -e .[dev] 84 | -------------------------------------------------------------------------------- /docs/source/mpi.rst: -------------------------------------------------------------------------------- 1 | .. PyUoI 2 | 3 | === 4 | MPI 5 | === 6 | 7 | MPI (Message Passing Interface) is a parallel computing interface that can be 8 | used through the ``mpi4py`` library in Python. Currently, the models in the 9 | ``linear_model`` module can take advantage of MPI parallelism during model 10 | fitting. We assume some familiarity with using ``mpi4py`` here. 11 | 12 | During the UoI feature selection step, many models are fit across bootstraps and 13 | regularization parameters. These can all potentially be done in parallel using 14 | MPI. Similarly, during UoI estimation, many models are fit across bootstraps 15 | and supports. These can also be done in parallel. 16 | 17 | Using MPI parallelism requires ``mpi4py`` to be installed. In your code, the 18 | two extra things you will need to do to use MPI parallelism is 1) to make sure 19 | the dataset is on all ranks and 2) pass an MPI communicator into the model. 20 | 21 | Broadcasting the dataset to all ranks 22 | ------------------------------------- 23 | 24 | PyUoI provides helper functions to share data across MPI ranks. The two 25 | strategies we support are 1) load the data from a HDF5 file and 2) the user 26 | can load the data on a single rank by hand and broadcast the data. 27 | 28 | Loading data from an HDF5 file 29 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 30 | 31 | .. code:: python 32 | 33 | from pyuoi.mpi_utils import load_data_MPI 34 | 35 | # file with keys 'X' and 'y' 36 | h5_file = 'my_file.h5' 37 | 38 | X, y = load_data_MPI(h5_file) 39 | 40 | Loading data by hand 41 | ^^^^^^^^^^^^^^^^^^^^ 42 | 43 | .. code:: python 44 | 45 | from mpi4py import MPI 46 | import numpy as np 47 | from pyuoi.mpi_utils import Bcast_from_root 48 | 49 | 50 | comm = MPI.COMM_WORLD 51 | rank = comm.rank 52 | 53 | X = None 54 | y = None 55 | if rank == 0: 56 | # file with keys 'X' and 'y' 57 | data = np.load('my_file.npz') 58 | X = data['X'] 59 | y = data['y'] 60 | 61 | X = Bcast_from_root(X, comm) 62 | y = Bcast_from_root(y, comm) 63 | 64 | Fitting with MPI parallelism 65 | ------------------------------------- 66 | 67 | Fitting models with MPI parallelism is similar to fitting models with no 68 | parallelism. 69 | 70 | .. code:: python 71 | 72 | from mpi4py import MPI 73 | from pyuoi.mpi_utils import load_data_MPI 74 | from pyuoi.linear_model import UoI_Lasso 75 | 76 | comm = MPI.COMM_WORLD 77 | rank = comm.rank 78 | 79 | # file with keys 'X' and 'y' 80 | h5_file = 'my_file.h5' 81 | 82 | X, y = load_data_MPI(h5_file) 83 | 84 | model = UoI_Lasso(comm=comm) 85 | model.fit(X, y) 86 | 87 | # model will now have fit parameters across all ranks 88 | -------------------------------------------------------------------------------- /docs/source/pyuoi/datasets/datasets.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | pyuoi.datasets 3 | ============== 4 | 5 | Dataset utility functions for the ``pyuoi`` package. 6 | 7 | Testing Utilities 8 | ----------------- 9 | 10 | .. automodule:: pyuoi.datasets 11 | :noindex: 12 | :members: make_linear_regression, make_classification, 13 | make_poisson_regression 14 | -------------------------------------------------------------------------------- /docs/source/pyuoi/decomposition/decomposition.rst: -------------------------------------------------------------------------------- 1 | ############# 2 | decomposition 3 | ############# 4 | 5 | Abstract Base Class 6 | ------------------- 7 | 8 | Decomposition classes are built through an ``AbstractDecompositionModel``, which 9 | extends ``scikit-learn``'s ``BaseEstimator`` class to include methods that are 10 | relevant for decomposition methods. 11 | 12 | .. automodule:: pyuoi.decomposition.base 13 | :members: AbstractDecompositionModel 14 | 15 | CUR Decomposition 16 | ----------------- 17 | 18 | The ``pyuoi`` package includes a class to perform ordinary CUR decomposition in 19 | addition to a class that performs UoI\ :sub:`CUR`. 20 | 21 | .. automodule:: pyuoi.decomposition.CUR 22 | :members: CUR, UoI_CUR 23 | 24 | Non-negative Matrix Factorization 25 | --------------------------------- 26 | 27 | UoI\ :sub:`NMF` can be customized with various NMF, clustering, 28 | non-negative least squares, and consensus algorithms. A base class accepts 29 | general objects or functions to perform the desired NMF, clustering, 30 | regression, and consensus grouping (provided that they have the correct 31 | structure). A derived class which uses 32 | 33 | * ``scikit-learn``'s NMF object 34 | 35 | * DBSCAN for clustering 36 | 37 | * ``scipy``'s non-negative least squares function 38 | 39 | * the median function for consensus grouping 40 | 41 | is also provided. This derived class accepts keyword arguments that correspond 42 | to the keyword arguments of the above algorithms, so that the user does not 43 | have to provide instantiated objects. 44 | 45 | .. automodule:: pyuoi.decomposition.NMF 46 | :members: UoI_NMF_Base, UoI_NMF 47 | -------------------------------------------------------------------------------- /docs/source/pyuoi/linear_model/linear_model.rst: -------------------------------------------------------------------------------- 1 | ############ 2 | linear_model 3 | ############ 4 | All linear models operate through the basic structure provided by the base 5 | class. The base class performs the necessary bootstrapping, fitting procedures, 6 | intersection step, and model averaging. The derived classes simply provide 7 | objects to the base class that perform the actual fits (e.g., UoI\ :sub:`Lasso` 8 | provides ``Lasso`` and ``LinearRegression`` objects to the base class). 9 | 10 | Base Classes 11 | ------------ 12 | 13 | The base class for all linear models is ``AbstractUoILinearModel``. 14 | Intermediate derived classes, ``AbstractUoILinearRegressor`` (for lasso and 15 | elastic net), and ``AbstractUoIGeneralizedLinearRegressor`` (for logistic and 16 | Poisson regression) are also provided. 17 | 18 | .. automodule:: pyuoi.linear_model.base 19 | :members: AbstractUoILinearModel, AbstractUoILinearRegressor, 20 | AbstractUoIGeneralizedLinearRegressor 21 | 22 | Lasso 23 | ----- 24 | The ``UoI_Lasso`` object provides the base class with a ``Lasso`` object for 25 | the selection module and a ``LinearRegression`` object for the estimation 26 | module. Additionally, the ``pycasso`` solver is provided as the ``PycLasso`` 27 | class. 28 | 29 | .. automodule:: pyuoi.linear_model.lasso 30 | :members: UoI_Lasso, PycLasso 31 | 32 | Elastic Net 33 | ----------- 34 | The ``UoI_ElasticNet`` object provides the base class with an ``ElasticNet`` 35 | object for the selection module and a ``LinearRegression`` object for the 36 | estimation module. 37 | 38 | .. automodule:: pyuoi.linear_model.elasticnet 39 | :members: UoI_ElasticNet 40 | 41 | Logistic Regression 42 | ------------------- 43 | The ``UoI_L1Logistic`` module uses a custom logistic regression solver for both 44 | the selection and estimation modules. This solver uses a modified orthant-wise 45 | limited memory quasi-Newton algorithm. For estimation, no regularization is 46 | performed. 47 | 48 | .. automodule:: pyuoi.linear_model.logistic 49 | :members: UoI_L1Logistic 50 | 51 | Poisson Regression 52 | ------------------ 53 | The ``poisson`` module provides a Poisson regression solver that uses either 54 | coordinate descent or a modified orthant-wise limited memory quasi-Newton 55 | solver. ``UoI_Poisson`` uses ``Poisson`` objects for both selection and 56 | estimation; however, the estimation module uses no regularization penalties. 57 | 58 | .. automodule:: pyuoi.linear_model.poisson 59 | :members: UoI_Poisson, Poisson 60 | -------------------------------------------------------------------------------- /docs/source/pyuoi/mpi_utils.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | pyuoi.mpi_utils 3 | =============== 4 | 5 | .. automodule:: pyuoi.mpi_utils 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/pyuoi/utils.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | pyuoi.utils 3 | =========== 4 | 5 | Utility functions for the ``pyuoi`` package. 6 | 7 | Scoring Utilities 8 | ----------------- 9 | 10 | .. automodule:: pyuoi.utils 11 | :members: AIC, BIC, AICc, log_likelihood_glm 12 | 13 | Other Utilities 14 | --------------- 15 | 16 | .. automodule:: pyuoi.utils 17 | :noindex: 18 | :members: softmax, sigmoid, check_logger 19 | -------------------------------------------------------------------------------- /examples/README.rst: -------------------------------------------------------------------------------- 1 | .. _tutorials: 2 | 3 | 4 | Tutorials 5 | ========= 6 | -------------------------------------------------------------------------------- /examples/plot_swimmer.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. _swimmer: 3 | 4 | UoI-NMF for robust parts-based decomposition of noisy data 5 | ========================================================== 6 | 7 | This example will demonstrate parts-based decomposition with 8 | UoI-NMF on the swimmer dataset. 9 | The swimmer dataset is the canonical example of separable data. 10 | 11 | """ 12 | 13 | ############################################################################### 14 | # Swimmer dataset 15 | # --------------- 16 | 17 | 18 | import matplotlib 19 | import matplotlib.pyplot as plt 20 | import numpy as np 21 | 22 | from sklearn.preprocessing import minmax_scale 23 | from sklearn.manifold import TSNE 24 | 25 | from pyuoi.decomposition import UoI_NMF 26 | from pyuoi.datasets import load_swimmer 27 | 28 | 29 | matplotlib.rcParams['figure.figsize'] = [4, 4] 30 | np.random.seed(10) 31 | 32 | swimmers = load_swimmer() 33 | swimmers = minmax_scale(swimmers, axis=1) 34 | 35 | 36 | ############################################################################### 37 | # Original Swimmer samples 38 | # ------------------------ 39 | 40 | fig, ax = plt.subplots(4, 4, subplot_kw={'xticks': [], 'yticks': []}) 41 | indices = np.random.randint(16, size=16) + np.arange(0, 256, 16) 42 | ax = ax.flatten() 43 | for i in range(len(indices)): 44 | ax[i].imshow(swimmers[indices[i]].reshape(32, 32).T, 45 | aspect='auto', cmap='gray') 46 | 47 | 48 | ############################################################################### 49 | # Swimmer samples corrupted with Absolute Gaussian noise 50 | # ------------------------------------------------------ 51 | # 52 | # Corrupt the images with with absolute Gaussian noise with ``std = 0.25``. 53 | 54 | 55 | reps = 1 56 | n_swim = swimmers.shape[0] 57 | corrupted = np.zeros((n_swim * reps, swimmers.shape[1])) 58 | for r in range(reps): 59 | noise = np.abs(np.random.normal(scale=0.25, size=swimmers.shape)) 60 | corrupted[r * n_swim:(r + 1) * n_swim] = swimmers + noise 61 | 62 | fig, ax = plt.subplots(4, 4, subplot_kw={'xticks': [], 'yticks': []}) 63 | ax = ax.flatten() 64 | for i in range(len(indices)): 65 | ax[i].imshow(corrupted[indices[i]].reshape(32, 32).T, 66 | aspect='auto', cmap='gray') 67 | 68 | ############################################################################### 69 | # Run UoI NMF on corrupted Swimmer data 70 | # ------------------------------------- 71 | # 72 | # Twenty bootstraps should be enough. 73 | # ``min_pts`` should be half of the number of bootstraps. 74 | 75 | nboot = 20 76 | min_pts = max(nboot // 2, 1) 77 | ranks = [16] 78 | 79 | shape = corrupted.shape 80 | 81 | uoi_nmf = UoI_NMF(n_boots=nboot, ranks=ranks, db_min_samples=min_pts, 82 | nmf_max_iter=800) 83 | 84 | transformed = uoi_nmf.fit_transform(corrupted) 85 | recovered = transformed @ uoi_nmf.components_ 86 | 87 | ############################################################################### 88 | # NMF Swimmer bases 89 | # ----------------- 90 | 91 | order = np.argsort(np.sum(uoi_nmf.components_, axis=1)) 92 | 93 | fig, ax = plt.subplots(4, 4, subplot_kw={'xticks': [], 'yticks': []}) 94 | ax = ax.flatten() 95 | for i in range(uoi_nmf.components_.shape[0]): 96 | ax[i].imshow(uoi_nmf.components_[order[i]].reshape(32, 32).T, 97 | aspect='auto', cmap='gray') 98 | 99 | 100 | ############################################################################### 101 | # Recovered Swimmers 102 | # ------------------ 103 | 104 | 105 | fig, ax = plt.subplots(4, 4, subplot_kw={'xticks': [], 'yticks': []}) 106 | ax = ax.flatten() 107 | for i in range(len(indices)): 108 | ax[i].imshow(recovered[indices[i]].reshape(32, 32).T, 109 | aspect='auto', cmap='gray') 110 | 111 | 112 | ############################################################################### 113 | # Plot them all together so we can see how well we recovered 114 | # the original swimmer data. 115 | 116 | 117 | fig, ax = plt.subplots(3, 16, figsize=(27, 5), 118 | subplot_kw={'xticks': [], 'yticks': []}) 119 | indices = np.random.randint(16, size=16) + np.arange(0, 256, 16) 120 | ax = ax.flatten() 121 | 122 | # plot Original 123 | ax[0].set_ylabel('Original', rotation=0, fontsize=25, labelpad=40) 124 | ax[0].yaxis.set_label_coords(-1.0, 0.5) 125 | for i in range(len(indices)): 126 | ax[i].imshow(swimmers[indices[i]].reshape(32, 32).T, 127 | aspect='auto', cmap='gray') 128 | 129 | # plot Corrupted 130 | ax[16].set_ylabel('Corrupted', rotation=0, fontsize=25, labelpad=40) 131 | ax[16].yaxis.set_label_coords(-1.1, 0.5) 132 | for i in range(len(indices)): 133 | ax[16 + i].imshow(corrupted[indices[i]].reshape(32, 32).T, 134 | aspect='auto', cmap='gray') 135 | 136 | # plot Recovered 137 | ax[32].set_ylabel('Recovered', rotation=0, fontsize=25, labelpad=40) 138 | ax[32].yaxis.set_label_coords(-1.1, 0.5) 139 | for i in range(len(indices)): 140 | ax[32 + i].imshow(recovered[indices[i]].reshape(32, 32).T, 141 | aspect='auto', cmap='gray') 142 | 143 | ############################################################################### 144 | # To see what DBSCAN is doing, let's look at the bases samples. 145 | 146 | plt.figure() 147 | embedding = TSNE(n_components=2).fit_transform(uoi_nmf.bases_samples_) 148 | sc = plt.scatter(embedding[:, 0], embedding[:, 1], 149 | c=uoi_nmf.bases_samples_labels_, s=80, cmap="nipy_spectral") 150 | sc.set_facecolor('none') 151 | plt.show() 152 | -------------------------------------------------------------------------------- /examples/plot_uoi_lasso.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. _uoi_lasso: 3 | 4 | UoI-Lasso for sparse, minimal bias, regression 5 | ============================================== 6 | 7 | This example with demonstrate the ability of UoI-Lasso to recover sparse 8 | models with minimal bias. 9 | 10 | """ 11 | 12 | ############################################################################### 13 | # Load synthetic data 14 | # ------------------- 15 | # 16 | # The synthetic data will have 40 features, 10 of which are informative and 17 | # 1 response variable. 18 | 19 | 20 | import matplotlib 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | 24 | from sklearn.linear_model import LinearRegression, LassoCV 25 | 26 | from pyuoi.linear_model import UoI_Lasso 27 | from pyuoi.datasets import make_linear_regression 28 | 29 | 30 | matplotlib.rcParams['figure.figsize'] = [4, 4] 31 | np.random.seed(0) 32 | 33 | X, y, beta, intercept = make_linear_regression(n_features=40, n_informative=10, 34 | X_loc=0., beta_low=-1., 35 | beta_high=1.) 36 | 37 | 38 | ############################################################################### 39 | # Visualize data 40 | # -------------- 41 | # 42 | # Some features are informative and others are not. 43 | 44 | 45 | fig, axes = plt.subplots(2, 2) 46 | for ii, ax in enumerate(axes.ravel()): 47 | ax.scatter(X[:, ii], y.ravel(), marker='.') 48 | ax.set_xlabel('Feature {}'.format(ii)) 49 | for ax in axes[:, 0]: 50 | ax.set_ylabel('Response') 51 | fig.tight_layout() 52 | 53 | 54 | ############################################################################### 55 | # Fit a UoI-Lasso model 56 | # --------------------- 57 | # 58 | # UoI-Lasso can fit low bias model parameters with feature selectivity. We can 59 | # evaluate the predictions of the model, compare the fit :math:`\beta`, and look 60 | # at the fraction of false positive and false negatives. 61 | 62 | 63 | uoi_lasso = UoI_Lasso() 64 | uoi_lasso.fit(X, y) 65 | yhat = uoi_lasso.predict(X) 66 | 67 | fig, axes = plt.subplots(1, 3, figsize=(7.5, 2.5)) 68 | ax = axes[0] 69 | ax.scatter(y, yhat, marker='.') 70 | ax.set_xlabel('True response') 71 | ax.set_ylabel('Predicted response') 72 | 73 | ax = axes[1] 74 | val = max(abs(beta).max(), abs(uoi_lasso.coef_).max()) * 1.1 75 | ax.scatter(beta.ravel(), uoi_lasso.coef_.ravel(), marker='.') 76 | ax.set_xlabel(r'True $\beta_i$') 77 | ax.set_ylabel(r'Fit $\beta_i$') 78 | ax.set_xlim(-val, val) 79 | ax.set_ylim(-val, val) 80 | ax.plot([-val, val], [-val, val], c='k', lw=1.) 81 | 82 | ax = axes[2] 83 | fp = np.logical_and(uoi_lasso.coef_ != 0, beta == 0).mean() 84 | fn = np.logical_and(uoi_lasso.coef_ == 0, beta != 0).mean() 85 | ax.bar([0, 1], [fp, fn], align='center') 86 | ax.set_xticks([0, 1]) 87 | ax.set_xticklabels(['False\npositive', 'False\nnegative']) 88 | ax.set_ylabel('Fraction') 89 | ax.set_ylim(0, 1) 90 | fig.tight_layout() 91 | 92 | ############################################################################### 93 | # Ordinary Least Squares 94 | # ---------------------- 95 | # 96 | # OLS will have low bias fits, but will not generally have feature selectivity 97 | # resulting in many false positives. 98 | 99 | lr = LinearRegression() 100 | lr.fit(X, y) 101 | yhat = lr.predict(X) 102 | 103 | fig, axes = plt.subplots(1, 3, figsize=(7.5, 2.5)) 104 | ax = axes[0] 105 | ax.scatter(y, yhat, marker='.') 106 | ax.set_xlabel('True response') 107 | ax.set_ylabel('Predicted response') 108 | 109 | ax = axes[1] 110 | val = max(abs(beta).max(), abs(lr.coef_).max()) * 1.1 111 | ax.scatter(beta.ravel(), lr.coef_.ravel(), marker='.') 112 | ax.set_xlabel(r'True $\beta_i$') 113 | ax.set_ylabel(r'Fit $\beta_i$') 114 | ax.set_xlim(-val, val) 115 | ax.set_ylim(-val, val) 116 | ax.plot([-val, val], [-val, val], c='k', lw=1.) 117 | 118 | ax = axes[2] 119 | fp = np.logical_and(lr.coef_ != 0, beta == 0).mean() 120 | fn = np.logical_and(lr.coef_ == 0, beta != 0).mean() 121 | ax.bar([0, 1], [fp, fn], align='center') 122 | ax.set_xticks([0, 1]) 123 | ax.set_xticklabels(['False\npositive', 'False\nnegative']) 124 | ax.set_ylabel('Fraction') 125 | ax.set_ylim(0, 1) 126 | fig.tight_layout() 127 | 128 | ############################################################################### 129 | # Cross-validated Lasso 130 | # --------------------- 131 | # 132 | # Lasso can fit models with feature selectivity, but will have biased estimates 133 | # of the parameters and will typically have more false positives and false 134 | # negatives than UoI-Lasso. 135 | 136 | lr = LassoCV(cv=5) 137 | lr.fit(X, y) 138 | yhat = lr.predict(X) 139 | 140 | fig, axes = plt.subplots(1, 3, figsize=(7.5, 2.5)) 141 | ax = axes[0] 142 | ax.scatter(y, yhat, marker='.') 143 | ax.set_xlabel('True response') 144 | ax.set_ylabel('Predicted response') 145 | 146 | ax = axes[1] 147 | val = max(abs(beta).max(), abs(lr.coef_).max()) * 1.1 148 | ax.scatter(beta.ravel(), lr.coef_.ravel(), marker='.') 149 | ax.set_xlabel(r'True $\beta_i$') 150 | ax.set_ylabel(r'Fit $\beta_i$') 151 | ax.set_xlim(-val, val) 152 | ax.set_ylim(-val, val) 153 | ax.plot([-val, val], [-val, val], c='k', lw=1.) 154 | 155 | ax = axes[2] 156 | fp = np.logical_and(lr.coef_ != 0, beta == 0).mean() 157 | fn = np.logical_and(lr.coef_ == 0, beta != 0).mean() 158 | ax.bar([0, 1], [fp, fn], align='center') 159 | ax.set_xticks([0, 1]) 160 | ax.set_xticklabels(['False\npositive', 'False\nnegative']) 161 | ax.set_ylabel('Fraction') 162 | ax.set_ylim(0, 1) 163 | fig.tight_layout() 164 | plt.show() 165 | -------------------------------------------------------------------------------- /liblbfgs/COPYING: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 1990 Jorge Nocedal 4 | Copyright (c) 2007-2010 Naoaki Okazaki 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a 7 | copy of this software and associated documentation files (the "Software"), 8 | to deal in the Software without restriction, including without limitation 9 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | and/or sell copies of the Software, and to permit persons to whom the 11 | Software is furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /liblbfgs/README: -------------------------------------------------------------------------------- 1 | 2 | libLBFGS: C library of limited-memory BFGS (L-BFGS) 3 | 4 | Copyright (c) 1990, Jorge Nocedal 5 | Copyright (c) 2007-2010, Naoaki Okazaki 6 | 7 | ========================================================================= 8 | 1. Introduction 9 | ========================================================================= 10 | libLBFGS is a C port of the implementation of Limited-memory 11 | Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal. 12 | The original FORTRAN source code is available at: 13 | http://www.ece.northwestern.edu/~nocedal/lbfgs.html 14 | 15 | The L-BFGS method solves the unconstrainted minimization problem: 16 | minimize F(x), x = (x1, x2, ..., xN), 17 | only if the objective function F(x) and its gradient G(x) are computable. 18 | 19 | Refer to the libLBFGS web site for more information. 20 | http://www.chokkan.org/software/liblbfgs/ 21 | 22 | 23 | 24 | ========================================================================= 25 | 2. How to build 26 | ========================================================================= 27 | [Microsoft Visual Studio 2008] 28 | Open the solution file "lbfgs.sln" and build it. 29 | 30 | [GCC] 31 | $ ./configure 32 | $ make 33 | $ make install # To install libLBFGS library and header. 34 | 35 | 36 | 37 | ========================================================================= 38 | 3. Note on SSE/SSE2 optimization 39 | ========================================================================= 40 | This library has SSE/SSE2 optimization routines for vector arithmetic 41 | operations on Intel/AMD processors. The SSE2 routine is for 64 bit double 42 | values, and the SSE routine is for 32 bit float values. Since the default 43 | parameters in libLBFGS are tuned for double precision values, it may need 44 | to modify these parameters to use the SSE optimization routines. 45 | 46 | To use the SSE2 optimization routine, specify --enable-sse2 option to the 47 | configure script. 48 | 49 | $ ./configure --enable-sse2 50 | 51 | To build libLBFGS with SSE2 optimization enabled on Microsoft Visual 52 | Studio 2005, define USE_SSE and __SSE2__ symbols. 53 | 54 | Make sure to run libLBFGS on processors where SSE2 instrunctions are 55 | available. The library does not check the existence of SSE2 instructions. 56 | 57 | To package maintainers, 58 | 59 | Please do not enable SSE/SSE2 optimization routine. The library built 60 | with SSE/SSE2 optimization will crash without any notice when necessary 61 | SSE/SSE2 instructions are unavailable on CPUs. 62 | 63 | 64 | 65 | ========================================================================= 66 | 4. License 67 | ========================================================================= 68 | libLBFGS is distributed under the term of the MIT license. 69 | Please refer to COPYING file in the distribution. 70 | 71 | $Id$ 72 | -------------------------------------------------------------------------------- /liblbfgs/arithmetic_ansi.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ANSI C implementation of vector operations. 3 | * 4 | * Copyright (c) 2007-2010 Naoaki Okazaki 5 | * All rights reserved. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | /* $Id$ */ 27 | 28 | #include 29 | #include 30 | 31 | #if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT 32 | #define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U) 33 | #else 34 | #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.) 35 | #endif/*LBFGS_IEEE_FLOAT*/ 36 | 37 | inline static void* vecalloc(size_t size) 38 | { 39 | void *memblock = malloc(size); 40 | if (memblock) { 41 | memset(memblock, 0, size); 42 | } 43 | return memblock; 44 | } 45 | 46 | inline static void vecfree(void *memblock) 47 | { 48 | free(memblock); 49 | } 50 | 51 | inline static void vecset(lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n) 52 | { 53 | int i; 54 | 55 | for (i = 0;i < n;++i) { 56 | x[i] = c; 57 | } 58 | } 59 | 60 | inline static void veccpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) 61 | { 62 | int i; 63 | 64 | for (i = 0;i < n;++i) { 65 | y[i] = x[i]; 66 | } 67 | } 68 | 69 | inline static void vecncpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) 70 | { 71 | int i; 72 | 73 | for (i = 0;i < n;++i) { 74 | y[i] = -x[i]; 75 | } 76 | } 77 | 78 | inline static void vecadd(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n) 79 | { 80 | int i; 81 | 82 | for (i = 0;i < n;++i) { 83 | y[i] += c * x[i]; 84 | } 85 | } 86 | 87 | inline static void vecdiff(lbfgsfloatval_t *z, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n) 88 | { 89 | int i; 90 | 91 | for (i = 0;i < n;++i) { 92 | z[i] = x[i] - y[i]; 93 | } 94 | } 95 | 96 | inline static void vecscale(lbfgsfloatval_t *y, const lbfgsfloatval_t c, const int n) 97 | { 98 | int i; 99 | 100 | for (i = 0;i < n;++i) { 101 | y[i] *= c; 102 | } 103 | } 104 | 105 | inline static void vecmul(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n) 106 | { 107 | int i; 108 | 109 | for (i = 0;i < n;++i) { 110 | y[i] *= x[i]; 111 | } 112 | } 113 | 114 | inline static void vecdot(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n) 115 | { 116 | int i; 117 | *s = 0.; 118 | for (i = 0;i < n;++i) { 119 | *s += x[i] * y[i]; 120 | } 121 | } 122 | 123 | inline static void vec2norm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n) 124 | { 125 | vecdot(s, x, x, n); 126 | *s = (lbfgsfloatval_t)sqrt(*s); 127 | } 128 | 129 | inline static void vec2norminv(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n) 130 | { 131 | vec2norm(s, x, n); 132 | *s = (lbfgsfloatval_t)(1.0 / *s); 133 | } 134 | -------------------------------------------------------------------------------- /liblbfgs/arithmetic_sse_double.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SSE2 implementation of vector oprations (64bit double). 3 | * 4 | * Copyright (c) 2007-2010 Naoaki Okazaki 5 | * All rights reserved. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | /* $Id$ */ 27 | 28 | #include 29 | #ifndef __APPLE__ 30 | #include 31 | #endif 32 | #include 33 | 34 | #if 1400 <= _MSC_VER 35 | #include 36 | #endif/*1400 <= _MSC_VER*/ 37 | 38 | #if HAVE_EMMINTRIN_H 39 | #include 40 | #endif/*HAVE_EMMINTRIN_H*/ 41 | 42 | inline static void* vecalloc(size_t size) 43 | { 44 | #if defined(_MSC_VER) 45 | void *memblock = _aligned_malloc(size, 16); 46 | #elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */ 47 | void *memblock = malloc(size); 48 | #else 49 | void *memblock = NULL, *p = NULL; 50 | if (posix_memalign(&p, 16, size) == 0) { 51 | memblock = p; 52 | } 53 | #endif 54 | if (memblock != NULL) { 55 | memset(memblock, 0, size); 56 | } 57 | return memblock; 58 | } 59 | 60 | inline static void vecfree(void *memblock) 61 | { 62 | #ifdef _MSC_VER 63 | _aligned_free(memblock); 64 | #else 65 | free(memblock); 66 | #endif 67 | } 68 | 69 | #define fsigndiff(x, y) \ 70 | ((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002) 71 | 72 | #define vecset(x, c, n) \ 73 | { \ 74 | int i; \ 75 | __m128d XMM0 = _mm_set1_pd(c); \ 76 | for (i = 0;i < (n);i += 8) { \ 77 | _mm_store_pd((x)+i , XMM0); \ 78 | _mm_store_pd((x)+i+2, XMM0); \ 79 | _mm_store_pd((x)+i+4, XMM0); \ 80 | _mm_store_pd((x)+i+6, XMM0); \ 81 | } \ 82 | } 83 | 84 | #define veccpy(y, x, n) \ 85 | { \ 86 | int i; \ 87 | for (i = 0;i < (n);i += 8) { \ 88 | __m128d XMM0 = _mm_load_pd((x)+i ); \ 89 | __m128d XMM1 = _mm_load_pd((x)+i+2); \ 90 | __m128d XMM2 = _mm_load_pd((x)+i+4); \ 91 | __m128d XMM3 = _mm_load_pd((x)+i+6); \ 92 | _mm_store_pd((y)+i , XMM0); \ 93 | _mm_store_pd((y)+i+2, XMM1); \ 94 | _mm_store_pd((y)+i+4, XMM2); \ 95 | _mm_store_pd((y)+i+6, XMM3); \ 96 | } \ 97 | } 98 | 99 | #define vecncpy(y, x, n) \ 100 | { \ 101 | int i; \ 102 | for (i = 0;i < (n);i += 8) { \ 103 | __m128d XMM0 = _mm_setzero_pd(); \ 104 | __m128d XMM1 = _mm_setzero_pd(); \ 105 | __m128d XMM2 = _mm_setzero_pd(); \ 106 | __m128d XMM3 = _mm_setzero_pd(); \ 107 | __m128d XMM4 = _mm_load_pd((x)+i ); \ 108 | __m128d XMM5 = _mm_load_pd((x)+i+2); \ 109 | __m128d XMM6 = _mm_load_pd((x)+i+4); \ 110 | __m128d XMM7 = _mm_load_pd((x)+i+6); \ 111 | XMM0 = _mm_sub_pd(XMM0, XMM4); \ 112 | XMM1 = _mm_sub_pd(XMM1, XMM5); \ 113 | XMM2 = _mm_sub_pd(XMM2, XMM6); \ 114 | XMM3 = _mm_sub_pd(XMM3, XMM7); \ 115 | _mm_store_pd((y)+i , XMM0); \ 116 | _mm_store_pd((y)+i+2, XMM1); \ 117 | _mm_store_pd((y)+i+4, XMM2); \ 118 | _mm_store_pd((y)+i+6, XMM3); \ 119 | } \ 120 | } 121 | 122 | #define vecadd(y, x, c, n) \ 123 | { \ 124 | int i; \ 125 | __m128d XMM7 = _mm_set1_pd(c); \ 126 | for (i = 0;i < (n);i += 4) { \ 127 | __m128d XMM0 = _mm_load_pd((x)+i ); \ 128 | __m128d XMM1 = _mm_load_pd((x)+i+2); \ 129 | __m128d XMM2 = _mm_load_pd((y)+i ); \ 130 | __m128d XMM3 = _mm_load_pd((y)+i+2); \ 131 | XMM0 = _mm_mul_pd(XMM0, XMM7); \ 132 | XMM1 = _mm_mul_pd(XMM1, XMM7); \ 133 | XMM2 = _mm_add_pd(XMM2, XMM0); \ 134 | XMM3 = _mm_add_pd(XMM3, XMM1); \ 135 | _mm_store_pd((y)+i , XMM2); \ 136 | _mm_store_pd((y)+i+2, XMM3); \ 137 | } \ 138 | } 139 | 140 | #define vecdiff(z, x, y, n) \ 141 | { \ 142 | int i; \ 143 | for (i = 0;i < (n);i += 8) { \ 144 | __m128d XMM0 = _mm_load_pd((x)+i ); \ 145 | __m128d XMM1 = _mm_load_pd((x)+i+2); \ 146 | __m128d XMM2 = _mm_load_pd((x)+i+4); \ 147 | __m128d XMM3 = _mm_load_pd((x)+i+6); \ 148 | __m128d XMM4 = _mm_load_pd((y)+i ); \ 149 | __m128d XMM5 = _mm_load_pd((y)+i+2); \ 150 | __m128d XMM6 = _mm_load_pd((y)+i+4); \ 151 | __m128d XMM7 = _mm_load_pd((y)+i+6); \ 152 | XMM0 = _mm_sub_pd(XMM0, XMM4); \ 153 | XMM1 = _mm_sub_pd(XMM1, XMM5); \ 154 | XMM2 = _mm_sub_pd(XMM2, XMM6); \ 155 | XMM3 = _mm_sub_pd(XMM3, XMM7); \ 156 | _mm_store_pd((z)+i , XMM0); \ 157 | _mm_store_pd((z)+i+2, XMM1); \ 158 | _mm_store_pd((z)+i+4, XMM2); \ 159 | _mm_store_pd((z)+i+6, XMM3); \ 160 | } \ 161 | } 162 | 163 | #define vecscale(y, c, n) \ 164 | { \ 165 | int i; \ 166 | __m128d XMM7 = _mm_set1_pd(c); \ 167 | for (i = 0;i < (n);i += 4) { \ 168 | __m128d XMM0 = _mm_load_pd((y)+i ); \ 169 | __m128d XMM1 = _mm_load_pd((y)+i+2); \ 170 | XMM0 = _mm_mul_pd(XMM0, XMM7); \ 171 | XMM1 = _mm_mul_pd(XMM1, XMM7); \ 172 | _mm_store_pd((y)+i , XMM0); \ 173 | _mm_store_pd((y)+i+2, XMM1); \ 174 | } \ 175 | } 176 | 177 | #define vecmul(y, x, n) \ 178 | { \ 179 | int i; \ 180 | for (i = 0;i < (n);i += 8) { \ 181 | __m128d XMM0 = _mm_load_pd((x)+i ); \ 182 | __m128d XMM1 = _mm_load_pd((x)+i+2); \ 183 | __m128d XMM2 = _mm_load_pd((x)+i+4); \ 184 | __m128d XMM3 = _mm_load_pd((x)+i+6); \ 185 | __m128d XMM4 = _mm_load_pd((y)+i ); \ 186 | __m128d XMM5 = _mm_load_pd((y)+i+2); \ 187 | __m128d XMM6 = _mm_load_pd((y)+i+4); \ 188 | __m128d XMM7 = _mm_load_pd((y)+i+6); \ 189 | XMM4 = _mm_mul_pd(XMM4, XMM0); \ 190 | XMM5 = _mm_mul_pd(XMM5, XMM1); \ 191 | XMM6 = _mm_mul_pd(XMM6, XMM2); \ 192 | XMM7 = _mm_mul_pd(XMM7, XMM3); \ 193 | _mm_store_pd((y)+i , XMM4); \ 194 | _mm_store_pd((y)+i+2, XMM5); \ 195 | _mm_store_pd((y)+i+4, XMM6); \ 196 | _mm_store_pd((y)+i+6, XMM7); \ 197 | } \ 198 | } 199 | 200 | 201 | 202 | #if 3 <= __SSE__ || defined(__SSE3__) 203 | /* 204 | Horizontal add with haddps SSE3 instruction. The work register (rw) 205 | is unused. 206 | */ 207 | #define __horizontal_sum(r, rw) \ 208 | r = _mm_hadd_ps(r, r); \ 209 | r = _mm_hadd_ps(r, r); 210 | 211 | #else 212 | /* 213 | Horizontal add with SSE instruction. The work register (rw) is used. 214 | */ 215 | #define __horizontal_sum(r, rw) \ 216 | rw = r; \ 217 | r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \ 218 | r = _mm_add_ps(r, rw); \ 219 | rw = r; \ 220 | r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \ 221 | r = _mm_add_ps(r, rw); 222 | 223 | #endif 224 | 225 | #define vecdot(s, x, y, n) \ 226 | { \ 227 | int i; \ 228 | __m128d XMM0 = _mm_setzero_pd(); \ 229 | __m128d XMM1 = _mm_setzero_pd(); \ 230 | __m128d XMM2, XMM3, XMM4, XMM5; \ 231 | for (i = 0;i < (n);i += 4) { \ 232 | XMM2 = _mm_load_pd((x)+i ); \ 233 | XMM3 = _mm_load_pd((x)+i+2); \ 234 | XMM4 = _mm_load_pd((y)+i ); \ 235 | XMM5 = _mm_load_pd((y)+i+2); \ 236 | XMM2 = _mm_mul_pd(XMM2, XMM4); \ 237 | XMM3 = _mm_mul_pd(XMM3, XMM5); \ 238 | XMM0 = _mm_add_pd(XMM0, XMM2); \ 239 | XMM1 = _mm_add_pd(XMM1, XMM3); \ 240 | } \ 241 | XMM0 = _mm_add_pd(XMM0, XMM1); \ 242 | XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ 243 | XMM0 = _mm_add_pd(XMM0, XMM1); \ 244 | _mm_store_sd((s), XMM0); \ 245 | } 246 | 247 | #define vec2norm(s, x, n) \ 248 | { \ 249 | int i; \ 250 | __m128d XMM0 = _mm_setzero_pd(); \ 251 | __m128d XMM1 = _mm_setzero_pd(); \ 252 | __m128d XMM2, XMM3, XMM4, XMM5; \ 253 | for (i = 0;i < (n);i += 4) { \ 254 | XMM2 = _mm_load_pd((x)+i ); \ 255 | XMM3 = _mm_load_pd((x)+i+2); \ 256 | XMM4 = XMM2; \ 257 | XMM5 = XMM3; \ 258 | XMM2 = _mm_mul_pd(XMM2, XMM4); \ 259 | XMM3 = _mm_mul_pd(XMM3, XMM5); \ 260 | XMM0 = _mm_add_pd(XMM0, XMM2); \ 261 | XMM1 = _mm_add_pd(XMM1, XMM3); \ 262 | } \ 263 | XMM0 = _mm_add_pd(XMM0, XMM1); \ 264 | XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ 265 | XMM0 = _mm_add_pd(XMM0, XMM1); \ 266 | XMM0 = _mm_sqrt_pd(XMM0); \ 267 | _mm_store_sd((s), XMM0); \ 268 | } 269 | 270 | 271 | #define vec2norminv(s, x, n) \ 272 | { \ 273 | int i; \ 274 | __m128d XMM0 = _mm_setzero_pd(); \ 275 | __m128d XMM1 = _mm_setzero_pd(); \ 276 | __m128d XMM2, XMM3, XMM4, XMM5; \ 277 | for (i = 0;i < (n);i += 4) { \ 278 | XMM2 = _mm_load_pd((x)+i ); \ 279 | XMM3 = _mm_load_pd((x)+i+2); \ 280 | XMM4 = XMM2; \ 281 | XMM5 = XMM3; \ 282 | XMM2 = _mm_mul_pd(XMM2, XMM4); \ 283 | XMM3 = _mm_mul_pd(XMM3, XMM5); \ 284 | XMM0 = _mm_add_pd(XMM0, XMM2); \ 285 | XMM1 = _mm_add_pd(XMM1, XMM3); \ 286 | } \ 287 | XMM2 = _mm_set1_pd(1.0); \ 288 | XMM0 = _mm_add_pd(XMM0, XMM1); \ 289 | XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \ 290 | XMM0 = _mm_add_pd(XMM0, XMM1); \ 291 | XMM0 = _mm_sqrt_pd(XMM0); \ 292 | XMM2 = _mm_div_pd(XMM2, XMM0); \ 293 | _mm_store_sd((s), XMM2); \ 294 | } 295 | -------------------------------------------------------------------------------- /liblbfgs/arithmetic_sse_float.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SSE/SSE3 implementation of vector oprations (32bit float). 3 | * 4 | * Copyright (c) 2007-2010 Naoaki Okazaki 5 | * All rights reserved. 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to deal 9 | * in the Software without restriction, including without limitation the rights 10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | * copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in 15 | * all copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | * THE SOFTWARE. 24 | */ 25 | 26 | /* $Id$ */ 27 | 28 | #include 29 | #ifndef __APPLE__ 30 | #include 31 | #endif 32 | #include 33 | 34 | #if 1400 <= _MSC_VER 35 | #include 36 | #endif/*_MSC_VER*/ 37 | 38 | #if HAVE_XMMINTRIN_H 39 | #include 40 | #endif/*HAVE_XMMINTRIN_H*/ 41 | 42 | #if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT 43 | #define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U) 44 | #else 45 | #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.) 46 | #endif/*LBFGS_IEEE_FLOAT*/ 47 | 48 | inline static void* vecalloc(size_t size) 49 | { 50 | #if defined(_MSC_VER) 51 | void *memblock = _aligned_malloc(size, 16); 52 | #elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */ 53 | void *memblock = malloc(size); 54 | #else 55 | void *memblock = NULL, *p = NULL; 56 | if (posix_memalign(&p, 16, size) == 0) { 57 | memblock = p; 58 | } 59 | #endif 60 | if (memblock != NULL) { 61 | memset(memblock, 0, size); 62 | } 63 | return memblock; 64 | } 65 | 66 | inline static void vecfree(void *memblock) 67 | { 68 | #ifdef _MSC_VER 69 | _aligned_free(memblock); 70 | #else 71 | free(memblock); 72 | #endif 73 | } 74 | 75 | #define vecset(x, c, n) \ 76 | { \ 77 | int i; \ 78 | __m128 XMM0 = _mm_set_ps1(c); \ 79 | for (i = 0;i < (n);i += 16) { \ 80 | _mm_store_ps((x)+i , XMM0); \ 81 | _mm_store_ps((x)+i+ 4, XMM0); \ 82 | _mm_store_ps((x)+i+ 8, XMM0); \ 83 | _mm_store_ps((x)+i+12, XMM0); \ 84 | } \ 85 | } 86 | 87 | #define veccpy(y, x, n) \ 88 | { \ 89 | int i; \ 90 | for (i = 0;i < (n);i += 16) { \ 91 | __m128 XMM0 = _mm_load_ps((x)+i ); \ 92 | __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ 93 | __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ 94 | __m128 XMM3 = _mm_load_ps((x)+i+12); \ 95 | _mm_store_ps((y)+i , XMM0); \ 96 | _mm_store_ps((y)+i+ 4, XMM1); \ 97 | _mm_store_ps((y)+i+ 8, XMM2); \ 98 | _mm_store_ps((y)+i+12, XMM3); \ 99 | } \ 100 | } 101 | 102 | #define vecncpy(y, x, n) \ 103 | { \ 104 | int i; \ 105 | const uint32_t mask = 0x80000000; \ 106 | __m128 XMM4 = _mm_load_ps1((float*)&mask); \ 107 | for (i = 0;i < (n);i += 16) { \ 108 | __m128 XMM0 = _mm_load_ps((x)+i ); \ 109 | __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ 110 | __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ 111 | __m128 XMM3 = _mm_load_ps((x)+i+12); \ 112 | XMM0 = _mm_xor_ps(XMM0, XMM4); \ 113 | XMM1 = _mm_xor_ps(XMM1, XMM4); \ 114 | XMM2 = _mm_xor_ps(XMM2, XMM4); \ 115 | XMM3 = _mm_xor_ps(XMM3, XMM4); \ 116 | _mm_store_ps((y)+i , XMM0); \ 117 | _mm_store_ps((y)+i+ 4, XMM1); \ 118 | _mm_store_ps((y)+i+ 8, XMM2); \ 119 | _mm_store_ps((y)+i+12, XMM3); \ 120 | } \ 121 | } 122 | 123 | #define vecadd(y, x, c, n) \ 124 | { \ 125 | int i; \ 126 | __m128 XMM7 = _mm_set_ps1(c); \ 127 | for (i = 0;i < (n);i += 8) { \ 128 | __m128 XMM0 = _mm_load_ps((x)+i ); \ 129 | __m128 XMM1 = _mm_load_ps((x)+i+4); \ 130 | __m128 XMM2 = _mm_load_ps((y)+i ); \ 131 | __m128 XMM3 = _mm_load_ps((y)+i+4); \ 132 | XMM0 = _mm_mul_ps(XMM0, XMM7); \ 133 | XMM1 = _mm_mul_ps(XMM1, XMM7); \ 134 | XMM2 = _mm_add_ps(XMM2, XMM0); \ 135 | XMM3 = _mm_add_ps(XMM3, XMM1); \ 136 | _mm_store_ps((y)+i , XMM2); \ 137 | _mm_store_ps((y)+i+4, XMM3); \ 138 | } \ 139 | } 140 | 141 | #define vecdiff(z, x, y, n) \ 142 | { \ 143 | int i; \ 144 | for (i = 0;i < (n);i += 16) { \ 145 | __m128 XMM0 = _mm_load_ps((x)+i ); \ 146 | __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ 147 | __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ 148 | __m128 XMM3 = _mm_load_ps((x)+i+12); \ 149 | __m128 XMM4 = _mm_load_ps((y)+i ); \ 150 | __m128 XMM5 = _mm_load_ps((y)+i+ 4); \ 151 | __m128 XMM6 = _mm_load_ps((y)+i+ 8); \ 152 | __m128 XMM7 = _mm_load_ps((y)+i+12); \ 153 | XMM0 = _mm_sub_ps(XMM0, XMM4); \ 154 | XMM1 = _mm_sub_ps(XMM1, XMM5); \ 155 | XMM2 = _mm_sub_ps(XMM2, XMM6); \ 156 | XMM3 = _mm_sub_ps(XMM3, XMM7); \ 157 | _mm_store_ps((z)+i , XMM0); \ 158 | _mm_store_ps((z)+i+ 4, XMM1); \ 159 | _mm_store_ps((z)+i+ 8, XMM2); \ 160 | _mm_store_ps((z)+i+12, XMM3); \ 161 | } \ 162 | } 163 | 164 | #define vecscale(y, c, n) \ 165 | { \ 166 | int i; \ 167 | __m128 XMM7 = _mm_set_ps1(c); \ 168 | for (i = 0;i < (n);i += 8) { \ 169 | __m128 XMM0 = _mm_load_ps((y)+i ); \ 170 | __m128 XMM1 = _mm_load_ps((y)+i+4); \ 171 | XMM0 = _mm_mul_ps(XMM0, XMM7); \ 172 | XMM1 = _mm_mul_ps(XMM1, XMM7); \ 173 | _mm_store_ps((y)+i , XMM0); \ 174 | _mm_store_ps((y)+i+4, XMM1); \ 175 | } \ 176 | } 177 | 178 | #define vecmul(y, x, n) \ 179 | { \ 180 | int i; \ 181 | for (i = 0;i < (n);i += 16) { \ 182 | __m128 XMM0 = _mm_load_ps((x)+i ); \ 183 | __m128 XMM1 = _mm_load_ps((x)+i+ 4); \ 184 | __m128 XMM2 = _mm_load_ps((x)+i+ 8); \ 185 | __m128 XMM3 = _mm_load_ps((x)+i+12); \ 186 | __m128 XMM4 = _mm_load_ps((y)+i ); \ 187 | __m128 XMM5 = _mm_load_ps((y)+i+ 4); \ 188 | __m128 XMM6 = _mm_load_ps((y)+i+ 8); \ 189 | __m128 XMM7 = _mm_load_ps((y)+i+12); \ 190 | XMM4 = _mm_mul_ps(XMM4, XMM0); \ 191 | XMM5 = _mm_mul_ps(XMM5, XMM1); \ 192 | XMM6 = _mm_mul_ps(XMM6, XMM2); \ 193 | XMM7 = _mm_mul_ps(XMM7, XMM3); \ 194 | _mm_store_ps((y)+i , XMM4); \ 195 | _mm_store_ps((y)+i+ 4, XMM5); \ 196 | _mm_store_ps((y)+i+ 8, XMM6); \ 197 | _mm_store_ps((y)+i+12, XMM7); \ 198 | } \ 199 | } 200 | 201 | 202 | 203 | #if 3 <= __SSE__ || defined(__SSE3__) 204 | /* 205 | Horizontal add with haddps SSE3 instruction. The work register (rw) 206 | is unused. 207 | */ 208 | #define __horizontal_sum(r, rw) \ 209 | r = _mm_hadd_ps(r, r); \ 210 | r = _mm_hadd_ps(r, r); 211 | 212 | #else 213 | /* 214 | Horizontal add with SSE instruction. The work register (rw) is used. 215 | */ 216 | #define __horizontal_sum(r, rw) \ 217 | rw = r; \ 218 | r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \ 219 | r = _mm_add_ps(r, rw); \ 220 | rw = r; \ 221 | r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \ 222 | r = _mm_add_ps(r, rw); 223 | 224 | #endif 225 | 226 | #define vecdot(s, x, y, n) \ 227 | { \ 228 | int i; \ 229 | __m128 XMM0 = _mm_setzero_ps(); \ 230 | __m128 XMM1 = _mm_setzero_ps(); \ 231 | __m128 XMM2, XMM3, XMM4, XMM5; \ 232 | for (i = 0;i < (n);i += 8) { \ 233 | XMM2 = _mm_load_ps((x)+i ); \ 234 | XMM3 = _mm_load_ps((x)+i+4); \ 235 | XMM4 = _mm_load_ps((y)+i ); \ 236 | XMM5 = _mm_load_ps((y)+i+4); \ 237 | XMM2 = _mm_mul_ps(XMM2, XMM4); \ 238 | XMM3 = _mm_mul_ps(XMM3, XMM5); \ 239 | XMM0 = _mm_add_ps(XMM0, XMM2); \ 240 | XMM1 = _mm_add_ps(XMM1, XMM3); \ 241 | } \ 242 | XMM0 = _mm_add_ps(XMM0, XMM1); \ 243 | __horizontal_sum(XMM0, XMM1); \ 244 | _mm_store_ss((s), XMM0); \ 245 | } 246 | 247 | #define vec2norm(s, x, n) \ 248 | { \ 249 | int i; \ 250 | __m128 XMM0 = _mm_setzero_ps(); \ 251 | __m128 XMM1 = _mm_setzero_ps(); \ 252 | __m128 XMM2, XMM3; \ 253 | for (i = 0;i < (n);i += 8) { \ 254 | XMM2 = _mm_load_ps((x)+i ); \ 255 | XMM3 = _mm_load_ps((x)+i+4); \ 256 | XMM2 = _mm_mul_ps(XMM2, XMM2); \ 257 | XMM3 = _mm_mul_ps(XMM3, XMM3); \ 258 | XMM0 = _mm_add_ps(XMM0, XMM2); \ 259 | XMM1 = _mm_add_ps(XMM1, XMM3); \ 260 | } \ 261 | XMM0 = _mm_add_ps(XMM0, XMM1); \ 262 | __horizontal_sum(XMM0, XMM1); \ 263 | XMM2 = XMM0; \ 264 | XMM1 = _mm_rsqrt_ss(XMM0); \ 265 | XMM3 = XMM1; \ 266 | XMM1 = _mm_mul_ss(XMM1, XMM1); \ 267 | XMM1 = _mm_mul_ss(XMM1, XMM3); \ 268 | XMM1 = _mm_mul_ss(XMM1, XMM0); \ 269 | XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \ 270 | XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \ 271 | XMM3 = _mm_add_ss(XMM3, XMM1); \ 272 | XMM3 = _mm_mul_ss(XMM3, XMM2); \ 273 | _mm_store_ss((s), XMM3); \ 274 | } 275 | 276 | #define vec2norminv(s, x, n) \ 277 | { \ 278 | int i; \ 279 | __m128 XMM0 = _mm_setzero_ps(); \ 280 | __m128 XMM1 = _mm_setzero_ps(); \ 281 | __m128 XMM2, XMM3; \ 282 | for (i = 0;i < (n);i += 16) { \ 283 | XMM2 = _mm_load_ps((x)+i ); \ 284 | XMM3 = _mm_load_ps((x)+i+4); \ 285 | XMM2 = _mm_mul_ps(XMM2, XMM2); \ 286 | XMM3 = _mm_mul_ps(XMM3, XMM3); \ 287 | XMM0 = _mm_add_ps(XMM0, XMM2); \ 288 | XMM1 = _mm_add_ps(XMM1, XMM3); \ 289 | } \ 290 | XMM0 = _mm_add_ps(XMM0, XMM1); \ 291 | __horizontal_sum(XMM0, XMM1); \ 292 | XMM2 = XMM0; \ 293 | XMM1 = _mm_rsqrt_ss(XMM0); \ 294 | XMM3 = XMM1; \ 295 | XMM1 = _mm_mul_ss(XMM1, XMM1); \ 296 | XMM1 = _mm_mul_ss(XMM1, XMM3); \ 297 | XMM1 = _mm_mul_ss(XMM1, XMM0); \ 298 | XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \ 299 | XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \ 300 | XMM3 = _mm_add_ss(XMM3, XMM1); \ 301 | _mm_store_ss((s), XMM3); \ 302 | } 303 | -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | @incollection{bouchard2017, 2 | title = {Union of {I}ntersections ({UoI}) for Interpretable Data Driven Discovery and Prediction}, 3 | author = {Bouchard, Kristofer and Bujan, Alejandro and Roosta-Khorasani, Farbod and Ubaru, Shashanka and Prabhat, Mr. and Snijders, Antoine and Mao, Jian-Hua and Chang, Edward and Mahoney, Michael W and Bhattacharya, Sharmodeep}, 4 | booktitle = {{Advances in Neural Information Processing Systems 30}}, 5 | pages = {1078--1086}, 6 | year = {2017}, 7 | } 8 | 9 | @INPROCEEDINGS{ubaru2017, 10 | author = {S. {Ubaru} and K. {Wu} and K. E. {Bouchard}}, 11 | booktitle = {{2017 16th IEEE International Conference on Machine Learning and Applications (ICMLA)}}, 12 | title = {{UoI-NMF} Cluster: A Robust Nonnegative Matrix Factorization Algorithm for Improved Parts-Based Decomposition and Reconstruction of Noisy Data}, 13 | year = {2017}, 14 | volume = {}, 15 | number = {}, 16 | pages = {241-248}, 17 | doi = "10.1109/ICMLA.2017.0-152" 18 | } 19 | 20 | @ARTICLE{tibshirani1994, 21 | author = {Robert Tibshirani}, 22 | title = {Regression Shrinkage and Selection Via the Lasso}, 23 | journal = {Journal of the Royal Statistical Society, Series B}, 24 | year = {1994}, 25 | volume = {58}, 26 | pages = {267--288} 27 | } 28 | 29 | @article{bickel2006, 30 | title={Regularization in statistics}, 31 | author={Peter J. Bickel and Bo Li and Alexandre B. Tsybakov and Sara A. van de Geer and Bin Yu and Te{\'o}filo Vald{\'e}s and Carlos Rivero and Jianqing Fan and Aad van der Vaart}, 32 | journal={Test}, 33 | year={2006}, 34 | volume={15}, 35 | pages={271-344}, 36 | doi="10.1007/BF02607055" 37 | } 38 | 39 | @inproceedings{sklearn_api, 40 | author = {Lars Buitinck and Gilles Louppe and Mathieu Blondel and 41 | Fabian Pedregosa and Andreas Mueller and Olivier Grisel and 42 | Vlad Niculae and Peter Prettenhofer and Alexandre Gramfort 43 | and Jaques Grobler and Robert Layton and Jake VanderPlas and 44 | Arnaud Joly and Brian Holt and Ga{\"{e}}l Varoquaux}, 45 | title = {{API} design for machine learning software: experiences from the scikit-learn 46 | project}, 47 | booktitle = {{ECML PKDD Workshop: Languages for Data Mining and Machine Learning}}, 48 | year = {2013}, 49 | pages = {108--122}, 50 | } 51 | 52 | @inproceedings{gong2015, 53 | author = {Gong, Pinghua and Ye, Jieping}, 54 | title = {A Modified Orthant-Wise Limited Memory Quasi-Newton Method with Convergence Analysis}, 55 | booktitle = {{Proceedings of the 32nd International Conference on International Conference on Machine Learning - Volume 37}}, 56 | year = {2015}, 57 | pages = {276--284}, 58 | numpages = {9}, 59 | } 60 | 61 | @misc{ge2019, 62 | author = {Ge, Jason}, 63 | title = {PICASSO: PathwIse CalibrAted Sparse Shooting algOrithm}, 64 | year = {2019}, 65 | publisher = {GitHub}, 66 | journal = {GitHub repository}, 67 | howpublished = {\url{https://github.com/jasonge27/picasso}}, 68 | } 69 | 70 | @article {murdoch2019, 71 | author = {Murdoch, W. James and Singh, Chandan and Kumbier, Karl and Abbasi-Asl, Reza and Yu, Bin}, 72 | title = {Definitions, methods, and applications in interpretable machine learning}, 73 | volume = {116}, 74 | number = {44}, 75 | pages = {22071--22080}, 76 | year = {2019}, 77 | doi = {10.1073/pnas.1900654116}, 78 | publisher = {National Academy of Sciences}, 79 | journal = {Proceedings of the National Academy of Sciences} 80 | } 81 | 82 | @article{dalcin2005, 83 | title={{MPI} for {P}ython}, 84 | author={Dalc{\'\i}n, Lisandro and Paz, Rodrigo and Storti, Mario}, 85 | journal={Journal of Parallel and Distributed Computing}, 86 | volume={65}, 87 | number={9}, 88 | pages={1108--1115}, 89 | year={2005}, 90 | publisher={Elsevier} 91 | } 92 | 93 | @article{scikit-learn, 94 | title={Scikit-learn: Machine Learning in {P}ython}, 95 | author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. 96 | and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. 97 | and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and 98 | Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, 99 | journal={Journal of Machine Learning Research}, 100 | volume={12}, 101 | pages={2825--2830}, 102 | year={2011} 103 | } 104 | 105 | @incollection{akaike1998, 106 | title={Information theory and an extension of the maximum likelihood principle}, 107 | author={Akaike, Hirotogu}, 108 | booktitle={{Selected papers of Hirotugu Akaike}}, 109 | pages={199--213}, 110 | year={1998}, 111 | publisher={Springer}, 112 | doi="10.1007/978-1-4612-1694-0_15" 113 | } 114 | 115 | @article{schwarz1978, 116 | title={Estimating the dimension of a model}, 117 | author={Schwarz}, 118 | journal={The Annals of Statistics}, 119 | volume={6}, 120 | number={2}, 121 | pages={461--464}, 122 | year={1978}, 123 | publisher={Institute of Mathematical Statistics}, 124 | doi="10.1214/aos/1176344136" 125 | } 126 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | # Minimum requirements for the build system to execute. 3 | requires = ["setuptools>=42", "wheel", "numpy", "cython"] 4 | build-backend = "setuptools.build_meta" 5 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | norecursedirs = mpi 3 | markers = 4 | fast: mark a test as a fast test e.g. unit test 5 | slow: mark a test as a slow test e.g. end-to-end test 6 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | matplotlib 3 | pytest 4 | flake8 5 | cython 6 | sphinx-gallery 7 | sphinx-rtd-theme 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.14 2 | h5py>=2.8 3 | scikit-learn>=0.24 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages, Extension 2 | from setuptools.command.build_ext import build_ext 3 | from distutils.ccompiler import get_default_compiler 4 | # To use a consistent encoding 5 | from codecs import open 6 | from os import path 7 | 8 | import numpy as np 9 | 10 | 11 | here = path.abspath(path.dirname(__file__)) 12 | 13 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 14 | long_description = f.read() 15 | with open(path.join(here, 'requirements.txt'), encoding='utf-8') as f: 16 | requirements = f.read().splitlines() 17 | with open(path.join(here, 'requirements-dev.txt'), encoding='utf-8') as f: 18 | dev_requirements = f.read().splitlines() 19 | dev_requirements = dev_requirements[1:] # Throw away the first line which is not a package. 20 | 21 | # Prepare lbfgs 22 | from Cython.Build import cythonize 23 | 24 | class custom_build_ext(build_ext): 25 | def finalize_options(self): 26 | build_ext.finalize_options(self) 27 | if self.compiler is None: 28 | compiler = get_default_compiler() 29 | else: 30 | compiler = self.compiler 31 | 32 | if compiler == 'msvc': 33 | include_dirs.append('compat/win32') 34 | 35 | include_dirs = ['liblbfgs', np.get_include()] 36 | 37 | ext_modules = cythonize( 38 | [Extension('pyuoi.lbfgs._lowlevel', 39 | ['src/pyuoi/lbfgs/_lowlevel.pyx', 'liblbfgs/lbfgs.c'], 40 | include_dirs=include_dirs)]) 41 | 42 | 43 | setup( 44 | name='pyuoi', 45 | 46 | # Versions should comply with PEP440. For a discussion on single-sourcing 47 | # the version across setup.py and the project code, see 48 | # https://packaging.python.org/en/latest/single_source_version.html 49 | version='1.1.1', 50 | 51 | description='The Union of Intersections framework in Python.', 52 | long_description=long_description, 53 | long_description_content_type="text/markdown", 54 | 55 | 56 | # Author details 57 | author='', 58 | author_email='', 59 | 60 | # See https://pypi.python.org/pypi?%3Aaction=list_classifiers 61 | classifiers=[ 62 | # How mature is this project? Common values are 63 | # 3 - Alpha 64 | # 4 - Beta 65 | # 5 - Production/Stable 66 | 'Development Status :: 4 - Beta', 67 | 68 | # Indicate who your project is intended for 69 | 'Intended Audience :: Science/Research', 70 | 'Topic :: Scientific/Engineering', 71 | 72 | # Pick your license as you wish (should match "license" above) 73 | 'License :: OSI Approved :: BSD License', 74 | 75 | # Specify the Python versions you support here. In particular, ensure 76 | # that you indicate whether you support Python 2, Python 3 or both. 77 | 'Programming Language :: Python :: 3', 78 | 'Programming Language :: Python :: 3.7', 79 | 'Programming Language :: Python :: 3.8', 80 | 'Programming Language :: Python :: 3.9', 81 | ], 82 | 83 | # What does your project relate to? 84 | keywords='UoI', 85 | 86 | # You can just specify the packages manually here if your project is 87 | # simple. Or you can use find_packages(). 88 | package_dir={'': 'src'}, 89 | packages=find_packages() + 90 | find_packages(where="src"), 91 | package_data={'pyuoi': ['data/*.h5']}, 92 | 93 | # Alternatively, if you want to distribute just a my_module.py, uncomment 94 | # this: 95 | # py_modules=["my_module"], 96 | 97 | # List run-time dependencies here. These will be installed by pip when 98 | # your project is installed. For an analysis of "install_requires" vs pip's 99 | # requirements files see: 100 | # https://packaging.python.org/en/latest/requirements.html 101 | install_requires=requirements, 102 | 103 | # List additional groups of dependencies here (e.g. development 104 | # dependencies). You can install these using the following syntax, 105 | # for example: 106 | # $ pip install -e .[dev,test] 107 | extras_require={ 108 | 'perf': ['mpi4py', 'pycasso'], 109 | 'dev': dev_requirements 110 | }, 111 | 112 | url='https://github.com/BouchardLab/pyuoi', 113 | ext_modules=ext_modules, 114 | cmdclass={'build_ext': custom_build_ext} 115 | 116 | 117 | # To provide executable scripts, use entry points in preference to the 118 | # "scripts" keyword. Entry points provide cross-platform support and allow 119 | # pip to create the appropriate form of executable for the target platform. 120 | # entry_points={ 121 | # 'console_scripts': [ 122 | # 'sample=sample:main', 123 | # ], 124 | # }, 125 | ) 126 | -------------------------------------------------------------------------------- /src/pyuoi/__init__.py: -------------------------------------------------------------------------------- 1 | from .linear_model import UoI_Lasso 2 | from .linear_model import UoI_ElasticNet 3 | from .linear_model import UoI_L1Logistic 4 | from .decomposition import UoI_NMF 5 | from .decomposition import UoI_CUR 6 | 7 | 8 | __all__ = ["UoI_Lasso", 9 | "UoI_L1Logistic", 10 | "UoI_ElasticNet", 11 | "UoI_NMF", 12 | "UoI_CUR"] 13 | 14 | name = "pyuoi" 15 | -------------------------------------------------------------------------------- /src/pyuoi/data/Swimmer.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BouchardLab/pyuoi/25e47655a07895f206c2e3ee3b259421c144a05d/src/pyuoi/data/Swimmer.h5 -------------------------------------------------------------------------------- /src/pyuoi/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.utils import check_random_state 3 | 4 | from ..utils import softmax, sigmoid 5 | 6 | 7 | def load_swimmer(flatten=True): 8 | from pkg_resources import resource_filename 9 | import h5py 10 | with h5py.File(resource_filename('pyuoi', 'data/Swimmer.h5'), 'r+') as f: 11 | swimmers = f['Y'][:].astype(float) 12 | if flatten: 13 | swimmers = swimmers.T.reshape(256, 1024) 14 | return swimmers 15 | 16 | 17 | def make_linear_regression(n_samples=100, n_features=5, n_informative=2, 18 | X_loc=3., X_scale=1., snr=5., 19 | beta=None, beta_low=1., beta_high=3., 20 | include_intercept=False, random_state=None): 21 | """Make a Linear regression dataset. 22 | 23 | Parameters 24 | ---------- 25 | n_samples : int 26 | The number of samples to make. 27 | n_features : int 28 | The number of feature to use. 29 | n_informative : int 30 | The number of feature with non-zero weights. 31 | X_loc : float 32 | The mean of the features in the design matrix. 33 | X_scale : float 34 | The standard deviation of the features in the design matrix. 35 | snr : float 36 | The signal-to-noise ratio, which informs the variance of the noise 37 | term. 38 | beta : np.ndarray or None 39 | The beta values to use. If None, beta values will be drawn from a 40 | uniform distribution. 41 | beta_low : float 42 | The lower bound for the beta values. 43 | beta_high : float 44 | The upper bound for the beta values. 45 | include_intercept : bool 46 | If true, includes an intercept in the model, if False, the intercept is 47 | set to 0. 48 | random_state : int, np.random.RandomState instance, or None 49 | Random number seed or state. 50 | 51 | Returns 52 | ------- 53 | X : ndarray, shape (n_samples, n_features) 54 | The design matrix. 55 | y : ndarray, shape (n_samples,) 56 | The response vector. 57 | beta : ndarray, shape (n_features,) 58 | The feature coefficients. 59 | intercept : float 60 | The intercept. If include_intercept is False, then intercept is zero. 61 | """ 62 | rng = check_random_state(random_state) 63 | 64 | # create design matrix 65 | X = rng.normal(loc=X_loc, 66 | scale=X_scale, 67 | size=(n_samples, n_features)) 68 | 69 | # create coefficients 70 | if beta is None: 71 | # draw beta values from gamma distribution 72 | beta = rng.uniform(low=beta_low, 73 | high=beta_high, 74 | size=n_features) 75 | 76 | # choose sparsity mask 77 | zero_idx = np.zeros(n_features) 78 | zero_idx[:n_informative] = 1 79 | rng.shuffle(zero_idx) 80 | # randomly assign beta elements to zero 81 | beta = beta * zero_idx 82 | 83 | # create intercept 84 | if include_intercept: 85 | intercept = rng.uniform(low=beta_low, high=beta_high) 86 | else: 87 | intercept = 0 88 | 89 | # draw response variable 90 | eta = intercept + np.dot(X, beta) 91 | signal_var = np.var(eta) 92 | noise_var = signal_var / snr 93 | noise = rng.normal(loc=0, scale=np.sqrt(noise_var), size=eta.shape) 94 | y = eta + noise 95 | 96 | return X, y, beta, intercept 97 | 98 | 99 | def make_classification(n_samples=100, n_features=20, n_informative=2, 100 | n_classes=2, shared_support=False, random_state=None, 101 | w_scale=1., include_intercept=False): 102 | """Make a linear classification dataset. 103 | 104 | Parameters 105 | ---------- 106 | n_samples : int 107 | The number of samples to make. 108 | n_features : int 109 | The number of feature to use. 110 | n_informative : int 111 | The number of feature with non-zero weights. 112 | n_classes : int 113 | The number of classes. 114 | shared_support : bool 115 | If True, all classes will share the same random support. If False, they 116 | will each have randomly chooses support. 117 | random_state : int or np.random.RandomState instance 118 | Random number seed or state. 119 | w_scale : float 120 | The model parameter matrix, w, will be drawn from a normal distribution 121 | with std=w_scale. 122 | include_intercept : bool 123 | If true, includes an intercept in the model, if False, the intercept is 124 | set to 0. 125 | """ 126 | if isinstance(random_state, int): 127 | rng = np.random.RandomState(random_state) 128 | else: 129 | if random_state is None: 130 | rng = np.random.RandomState() 131 | else: 132 | rng = random_state 133 | n_not_informative = n_features - n_informative 134 | 135 | X = rng.randn(n_samples, n_features) 136 | X -= X.mean(axis=-1, keepdims=True) 137 | X /= X.std(axis=-1, keepdims=True) 138 | 139 | if n_classes > 2: 140 | w = rng.randn(n_features, n_classes) 141 | if include_intercept: 142 | intercept = rng.randn(1, n_classes) 143 | intercept -= intercept.max() 144 | else: 145 | intercept = np.zeros((1, n_classes)) 146 | if n_not_informative > 0: 147 | if shared_support: 148 | idxs = rng.permutation(n_features)[:n_not_informative] 149 | w[idxs] = 0. 150 | else: 151 | for ii in range(n_classes): 152 | idxs = rng.permutation(n_features)[:n_not_informative] 153 | w[idxs, ii * np.ones_like(idxs, dtype=int)] = 0. 154 | else: 155 | w = rng.randn(n_features, 1) 156 | if include_intercept: 157 | intercept = rng.randn(1, 1) 158 | else: 159 | intercept = np.zeros((1, 1)) 160 | if n_not_informative > 0: 161 | idxs = rng.permutation(n_features)[:n_not_informative] 162 | w[idxs] = 0. 163 | w *= w_scale 164 | 165 | log_p = X.dot(w) 166 | if include_intercept: 167 | log_p += intercept 168 | if n_classes > 2: 169 | p = softmax(log_p) 170 | y = np.array([rng.multinomial(1, pi) for pi in p]) 171 | y = y.argmax(axis=-1) 172 | else: 173 | p = sigmoid(np.squeeze(log_p)) 174 | y = np.array([rng.binomial(1, pi) for pi in p]) 175 | 176 | return X, y, w.T, intercept 177 | 178 | 179 | def make_poisson_regression(n_samples=100, n_features=5, n_informative=2, 180 | X_loc=0., X_scale=1. / 8, 181 | beta=None, beta_shape=1., beta_scale=3., 182 | include_intercept=False, random_state=None): 183 | """Make a Poisson regression dataset. 184 | 185 | Parameters 186 | ---------- 187 | n_samples : int 188 | The number of samples to make. 189 | n_features : int 190 | The number of feature to use. 191 | n_informative : int 192 | The number of feature with non-zero weights. 193 | X_loc : float 194 | The mean of the features in the design matrix. 195 | X_scale : float 196 | The standard deviation of the features in the design matrix. 197 | beta : np.ndarray or None 198 | The beta values to use. If None, beta values will be drawn from a gamma 199 | distribution. 200 | beta_shape : float 201 | The shape parameter for the beta values. 202 | beta_scale : float 203 | The scale parameter for the beta values. 204 | include_intercept : bool 205 | If true, includes an intercept in the model, if False, the intercept is 206 | set to 0. 207 | random_state : int, np.random.RandomState instance, or None 208 | Random number seed or state. 209 | 210 | Returns 211 | ------- 212 | X : ndarray, shape (n_samples, n_features) 213 | The design matrix. 214 | y : ndarray, shape (n_samples,) 215 | The response vector. 216 | beta : ndarray, shape (n_features,) 217 | The feature coefficients. 218 | intercept : float 219 | The intercept. If include_intercept is False, then intercept is zero. 220 | """ 221 | rng = check_random_state(random_state) 222 | 223 | # create design matrix 224 | X = rng.normal(loc=X_loc, 225 | scale=X_scale, 226 | size=(n_samples, n_features)) 227 | 228 | # create coefficients 229 | if beta is None: 230 | # draw beta values from gamma distribution 231 | beta = rng.gamma(shape=beta_shape, 232 | scale=beta_scale, 233 | size=n_features) 234 | # choose sparsity mask 235 | zero_idx = np.zeros(n_features) 236 | zero_idx[:n_informative] = 1 237 | rng.shuffle(zero_idx) 238 | # randomly assign beta elements to zero 239 | beta = beta * zero_idx 240 | 241 | # create intercept 242 | if include_intercept: 243 | intercept = rng.gamma(shape=beta_shape, scale=beta_scale) 244 | else: 245 | intercept = 0 246 | 247 | # draw response variable 248 | eta = intercept + np.dot(X, beta) 249 | y = rng.poisson(np.exp(eta)) 250 | 251 | return X, y, beta, intercept 252 | -------------------------------------------------------------------------------- /src/pyuoi/decomposition/__init__.py: -------------------------------------------------------------------------------- 1 | """Union of Intersection models with matrix decomposition.""" 2 | from .CUR import UoI_CUR 3 | from .CUR import CUR 4 | from .NMF import UoI_NMF 5 | from .NMF import UoI_NMF_Base 6 | 7 | __all__ = ["UoI_CUR", 8 | "CUR", 9 | "UoI_NMF", 10 | "UoI_NMF_Base"] 11 | -------------------------------------------------------------------------------- /src/pyuoi/decomposition/base.py: -------------------------------------------------------------------------------- 1 | import abc as _abc 2 | 3 | from sklearn.linear_model._base import BaseEstimator 4 | 5 | 6 | class AbstractDecompositionModel(BaseEstimator, metaclass=_abc.ABCMeta): 7 | @_abc.abstractmethod 8 | def fit(X): 9 | """Placeholder for fit. Subclasses should implement this method. 10 | Fit the model with X. 11 | 12 | Parameters 13 | ---------- 14 | X : array-like, shape (n_samples, n_features) 15 | Training data. 16 | 17 | Returns 18 | ------- 19 | self : object 20 | Returns the instance itself. 21 | """ 22 | pass 23 | 24 | @_abc.abstractmethod 25 | def transform(self, X): 26 | """Apply dimensionality reduction to X. 27 | 28 | Parameters 29 | ---------- 30 | X : array-like, shape (n_samples, n_features) 31 | Data matrix to be transformed. 32 | 33 | Returns 34 | ------- 35 | X_new : array-like, shape (n_samples, n_components) 36 | The transformed data matrix. 37 | """ 38 | pass 39 | 40 | @_abc.abstractmethod 41 | def fit_transform(self, X): 42 | """Transform the data X according to the fitted decomposition. 43 | 44 | Parameters 45 | ---------- 46 | X : array-like, shape (n_samples, n_features) 47 | Data matrix to be decomposed. 48 | 49 | Returns 50 | ------- 51 | X_new : array-like, shape (n_samples, n_components) 52 | Transformed data. 53 | """ 54 | pass 55 | -------------------------------------------------------------------------------- /src/pyuoi/decomposition/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def column_select(V, c, leverage_sort=False, random_state=None): 5 | """Chooses column indices from a matrix given its SVD. 6 | 7 | Parameters 8 | ---------- 9 | V : ndarray, shape (n_features, rank) 10 | The set of singular vectors. 11 | 12 | c : float 13 | The expected number of columns to select. 14 | 15 | leverage_sort : bool 16 | If True, resorts the column indices in increasing order of leverage 17 | score. If False, the column indices are sorted normally. 18 | 19 | Returns 20 | ------- 21 | column_indices : ndarray of ints 22 | An array of indices denoting which columns were selected. If 23 | leverage_sort was true, this array is arranged by increasing leverage 24 | score. 25 | """ 26 | # random state 27 | rng = np.random.RandomState(seed=None) 28 | 29 | # extract number of samples and rank 30 | n_features, k = V.shape 31 | 32 | # calculate normalized leverage score 33 | pi = np.sum(V**2, axis=1) / k 34 | 35 | # iterate through columns 36 | column_flags = np.zeros(n_features, dtype=bool) 37 | for column in range(n_features): 38 | # Mahoney (2009), eqn 3 39 | p = min(1, c * pi[column]) 40 | # selected column randomly 41 | column_flags[column] = p > rng.rand() 42 | 43 | column_indices = np.argwhere(column_flags).ravel() 44 | 45 | # if desired, sort by increasing leverage score 46 | if leverage_sort: 47 | pi_subset = pi[column_indices] 48 | column_indices = column_indices[np.argsort(pi_subset)] 49 | 50 | return column_indices 51 | 52 | 53 | def stability_selection_to_threshold(stability_selection, n_boots): 54 | """Converts user inputted stability selection to an array of 55 | thresholds. These thresholds correspond to the number of bootstraps 56 | that a feature must appear in to guarantee placement in the selection 57 | profile. 58 | 59 | Parameters 60 | ---------- 61 | stability_selection : int, float, or array-like 62 | If int, treated as the number of bootstraps that a feature must 63 | appear in to guarantee placement in selection profile. If float, 64 | must be between 0 and 1, and is instead the proportion of 65 | bootstraps. 66 | 67 | n_boots: int 68 | The number of bootstraps that will be used for selection 69 | """ 70 | 71 | # float, indicating proportion of bootstraps 72 | if isinstance(stability_selection, float): 73 | selection_threshold = int(stability_selection * n_boots) 74 | 75 | # int, indicating number of bootstraps 76 | elif isinstance(stability_selection, int): 77 | selection_threshold = stability_selection 78 | 79 | else: 80 | raise ValueError("Stability selection must be a valid float or int.") 81 | 82 | # ensure that ensuing list of selection thresholds satisfies 83 | # the correct bounds 84 | if not ( 85 | selection_threshold <= n_boots and selection_threshold >= 1 86 | ): 87 | raise ValueError("Stability selection thresholds must be within " 88 | "the correct bounds.") 89 | 90 | return selection_threshold 91 | 92 | 93 | def dissimilarity(H1, H2): 94 | """Calculates the dissimilarity between two sets of NMF bases. 95 | 96 | Parameters 97 | ---------- 98 | H1 : ndarray, shape (n_components, n_features) 99 | First set of bases. 100 | 101 | H2 : ndarray, shape (n_components, n_features) 102 | Second set of bases. 103 | 104 | Returns 105 | ------- 106 | diss : float 107 | Dissimilarity between the two sets of bases. 108 | """ 109 | k = H1.shape[0] 110 | H1 = H1 / np.linalg.norm(H1, axis=1, keepdims=True) 111 | H2 = H2 / np.linalg.norm(H2, axis=1, keepdims=True) 112 | C = np.dot(H1, H2.T) 113 | diss = 1 - ((np.max(C, axis=0).sum() + np.max(C, axis=1).sum()) / (2. * k)) 114 | return diss 115 | -------------------------------------------------------------------------------- /src/pyuoi/lbfgs/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright 2011 University of Amsterdam 4 | Copyright 2011-2012 Lars Buitinck 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a 7 | copy of this software and associated documentation files (the "Software"), 8 | to deal in the Software without restriction, including without limitation 9 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | and/or sell copies of the Software, and to permit persons to whom the 11 | Software is furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /src/pyuoi/lbfgs/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | LBFGS and OWL-QN optimization algorithms 3 | 4 | Python wrapper around liblbfgs. 5 | """ 6 | 7 | import warnings 8 | from ._lowlevel import LBFGS, AllZeroLBFGSError # noqa: F401 9 | 10 | 11 | def fmin_lbfgs(f, x0, progress=None, args=(), orthantwise_c=0, 12 | orthantwise_start=0, orthantwise_end=-1, m=10, 13 | epsilon=1e-5, past=0, delta=0., max_iterations=0, 14 | line_search="default", max_linesearch=20, min_step=1e-20, 15 | max_step=1e+20, ftol=1e-4, wolfe=0.9, gtol=0.9, xtol=1e-30): 16 | """Minimize a function using LBFGS or OWL-QN 17 | 18 | Parameters 19 | ---------- 20 | f : callable(x, g, *args) 21 | Computes function to minimize and its gradient. 22 | Called with the current position x (a numpy.ndarray), a gradient 23 | vector g (a numpy.ndarray) to be filled in and *args. 24 | Must return the value at x and set the gradient vector g. 25 | 26 | x0 : array-like 27 | Initial values. A copy of this array is made prior to optimization. 28 | 29 | progress : callable(x, g, fx, xnorm, gnorm, step, k, num_eval, *args), 30 | optional 31 | If not None, called at each iteration after the call to f with the 32 | current values of x, g and f(x), the L2 norms of x and g, the line 33 | search step, the iteration number, the number of evaluations at 34 | this iteration and args (see below). 35 | If the return value from this callable is not 0 and not None, 36 | optimization is stopped and LBFGSError is raised. 37 | 38 | args : sequence 39 | Arbitrary list of arguments, passed on to f and progress as *args. 40 | 41 | orthantwise_c: float, optional (default=0) 42 | Coefficient for the L1 norm of variables. 43 | This parameter should be set to zero for standard minimization 44 | problems. Setting this parameter to a positive value activates 45 | Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which 46 | minimizes the objective function F(x) combined with the L1 norm |x| 47 | of the variables, {F(x) + C |x|}. This parameter is the coefficient 48 | for the |x|, i.e., C. As the L1 norm |x| is not differentiable at 49 | zero, the library modifies function and gradient evaluations from 50 | a client program suitably; a client program thus have only to return 51 | the function value F(x) and gradients G(x) as usual. The default value 52 | is zero. 53 | 54 | If orthantwise_c is set, then line_search cannot be the default 55 | and must be one of 'armijo', 'wolfe', or 'strongwolfe'. 56 | 57 | orthantwise_start: int, optional (default=0) 58 | Start index for computing L1 norm of the variables. 59 | This parameter is valid only for OWL-QN method 60 | (i.e., orthantwise_c != 0). This parameter b (0 <= b < N) 61 | specifies the index number from which the library computes the 62 | L1 norm of the variables x, 63 | |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| . 64 | In other words, variables x_1, ..., x_{b-1} are not used for 65 | computing the L1 norm. Setting b (0 < b < N), one can protect 66 | variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic 67 | regression) from being regularized. The default value is zero. 68 | 69 | orthantwise_end: int, optional (default=-1) 70 | End index for computing L1 norm of the variables. 71 | This parameter is valid only for OWL-QN method 72 | (i.e., orthantwise_c != 0). This parameter e (0 < e <= N) 73 | specifies the index number at which the library stops computing the 74 | L1 norm of the variables x, 75 | 76 | m: int, optional, default=6 77 | The number of corrections to approximate the inverse hessian matrix. 78 | The L-BFGS routine stores the computation results of previous `m` 79 | iterations to approximate the inverse hessian matrix of the current 80 | iteration. This parameter controls the size of the limited memories 81 | (corrections). The default value is 6. Values less than 3 are 82 | not recommended. Large values will result in excessive computing time. 83 | 84 | epsilon: float, optional (default=1e-5) 85 | Epsilon for convergence test. 86 | This parameter determines the accuracy with which the solution is to 87 | be found. A minimization terminates when 88 | ||g|| < \ref epsilon * max(1, ||x||), 89 | where ||.|| denotes the Euclidean (L2) norm. The default value is 90 | 1e-5. 91 | 92 | past: int, optional (default=0) 93 | Distance for delta-based convergence test. 94 | This parameter determines the distance, in iterations, to compute 95 | the rate of decrease of the objective function. If the value of this 96 | parameter is zero, the library does not perform the delta-based 97 | convergence test. The default value is 0. 98 | 99 | delta: float, optional (default=0.) 100 | Delta for convergence test. 101 | This parameter determines the minimum rate of decrease of the 102 | objective function. The library stops iterations when the 103 | following condition is met: 104 | (f' - f) / f < delta, 105 | where f' is the objective value of `past` iterations ago, and f is 106 | the objective value of the current iteration. 107 | The default value is 0. 108 | 109 | max_iterations: int, optional (default=0) 110 | The maximum number of iterations. Setting this parameter to zero 111 | continues an optimization process until a convergence or error. The 112 | default value is 0. 113 | 114 | line_search: str, optional (default="default") 115 | The line search algorithm. 116 | This parameter specifies a line search algorithm to be used by the 117 | L-BFGS routine. Possible values are: 118 | 119 | - 'default': same as 'morethuente' 120 | - 'morethuente': Method proposed by More and Thuente 121 | - 'armijo': backtracking with Armijo's conditions 122 | - 'wolfe': backtracking with Wolfe's conditions 123 | - 'strongwolfe': backtracking with strong Wolfe's conditions 124 | 125 | max_linesearch: int, optional (default=20) 126 | The maximum number of trials for the line search. 127 | This parameter controls the number of function and gradients evaluations 128 | per iteration for the line search routine. The default value is 20. 129 | 130 | min_step: float, optional (default=1e-20) 131 | The minimum step of the line search routine. 132 | The default value is 1e-20. This value need not be modified unless 133 | the exponents are too large for the machine being used, or unless the 134 | problem is extremely badly scaled (in which case the exponents should 135 | be increased). 136 | 137 | max_step: float, optional (default=1e20) 138 | The maximum step of the line search. 139 | The default value is 1e+20. This value need not be modified unless 140 | the exponents are too large for the machine being used, or unless the 141 | problem is extremely badly scaled (in which case the exponents should 142 | be increased). 143 | 144 | ftol: float, optional (default=1e-4) 145 | A parameter to control the accuracy of the line search routine. 146 | The default value is 1e-4. This parameter should be greater 147 | than zero and smaller than 0.5. 148 | 149 | wolfe: float, optional (default=0.9) 150 | A coefficient for the Wolfe condition. This parameter is valid only 151 | when the backtracking line-search algorithm is used with the Wolfe 152 | condition (`line_search='wolfe'` or `line_search='strongwolfe'`), 153 | The default value is 0.9. This parameter should be greater 154 | the `ftol` parameter and smaller than 1.0. 155 | 156 | gtol: float, optional (default=0.9) 157 | A parameter to control the accuracy of the line search routine. 158 | The default value is 0.9. If the function and gradient 159 | evaluations are inexpensive with respect to the cost of the 160 | iteration (which is sometimes the case when solving very large 161 | problems) it may be advantageous to set this parameter to a small 162 | value. A typical small value is 0.1. This parameter should be 163 | greater than the ftol parameter (1e-4) and smaller than 164 | 1.0. 165 | 166 | 167 | xtol: float, optional (default=1e-30) 168 | The machine precision for floating-point values. 169 | This parameter must be a positive value set by a client program to 170 | estimate the machine precision. The line search routine will terminate 171 | with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width 172 | of the interval of uncertainty is less than this parameter. 173 | 174 | 175 | """ 176 | 177 | # Input validation to make sure defaults with OWL-QN are adapted correctly 178 | assert orthantwise_c >= 0, "Orthantwise_c cannot be negative" 179 | 180 | if orthantwise_c > 0 and line_search not in ['wolfe', 'default']: 181 | line_search = 'wolfe' 182 | warnings.warn("When using OWL-QN, 'wolfe' is the only valid " 183 | + "line_search. line_search has been set to 'wolfe'.") 184 | elif orthantwise_c > 0 and line_search == 'default': 185 | line_search = 'wolfe' 186 | 187 | opt = LBFGS() 188 | opt.orthantwise_c = orthantwise_c 189 | opt.orthantwise_start = orthantwise_start 190 | opt.orthantwise_end = orthantwise_end 191 | opt.m = m 192 | opt.epsilon = epsilon 193 | opt.past = past 194 | opt.delta = delta 195 | opt.max_iterations = max_iterations 196 | opt.linesearch = line_search 197 | opt.max_linesearch = max_linesearch 198 | opt.min_step = min_step 199 | opt.max_step = max_step 200 | opt.ftol = ftol 201 | opt.wolfe = wolfe 202 | opt.gtol = gtol 203 | opt.xtol = xtol 204 | 205 | return opt.minimize(f, x0, progress=progress, args=args) 206 | -------------------------------------------------------------------------------- /src/pyuoi/linear_model/__init__.py: -------------------------------------------------------------------------------- 1 | """Union of Intersection models with linear selection and estimation. 2 | 3 | Provides both abstract base classes for creating user-defined UoI models 4 | and several concrete implementations. 5 | """ 6 | from .base import (AbstractUoILinearModel, AbstractUoILinearRegressor, 7 | AbstractUoIGeneralizedLinearRegressor) 8 | from .lasso import UoI_Lasso 9 | from .elasticnet import UoI_ElasticNet 10 | from .logistic import MaskedCoefLogisticRegression, UoI_L1Logistic 11 | from .poisson import Poisson, UoI_Poisson 12 | 13 | __all__ = ["AbstractUoILinearModel", 14 | "AbstractUoILinearRegressor", 15 | "AbstractUoIGeneralizedLinearRegressor", 16 | "MaskedCoefLogisticRegression", 17 | "UoI_L1Logistic", 18 | "UoI_Lasso", 19 | "UoI_ElasticNet", 20 | "Poisson", 21 | "UoI_Poisson"] 22 | -------------------------------------------------------------------------------- /src/pyuoi/linear_model/elasticnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .base import AbstractUoILinearRegressor 4 | 5 | from sklearn.linear_model import LinearRegression 6 | from sklearn.linear_model._coordinate_descent import _alpha_grid 7 | from sklearn.linear_model import ElasticNet 8 | 9 | 10 | class UoI_ElasticNet(AbstractUoILinearRegressor, LinearRegression): 11 | r"""UoI\ :sub:`ElasticNet` solver. 12 | 13 | Parameters 14 | ---------- 15 | n_boots_sel : int 16 | The number of data bootstraps to use in the selection module. 17 | Increasing this number will make selection more strict. 18 | n_boots_est : int 19 | The number of data bootstraps to use in the estimation module. 20 | Increasing this number will relax selection and decrease variance. 21 | selection_frac : float 22 | The fraction of the dataset to use for training in each resampled 23 | bootstrap, during the selection module. Small values of this parameter 24 | imply larger "perturbations" to the dataset. 25 | estimation_frac : float 26 | The fraction of the dataset to use for training in each resampled 27 | bootstrap, during the estimation module. The remaining data is used 28 | to obtain validation scores. Small values of this parameters imply 29 | larger "perturbations" to the dataset. IGNORED - Leaving this here 30 | to double check later 31 | n_lambdas : int 32 | The number of regularization values to use for selection. 33 | alphas : list or ndarray 34 | The parameter that trades off L1 versus L2 regularization for a given 35 | lambda. 36 | stability_selection : int, float, or array-like 37 | If int, treated as the number of bootstraps that a feature must 38 | appear in to guarantee placement in selection profile. If float, 39 | must be between 0 and 1, and is instead the proportion of 40 | bootstraps. If array-like, must consist of either ints or floats 41 | between 0 and 1. In this case, each entry in the array-like object 42 | will act as a separate threshold for placement in the selection 43 | profile. 44 | estimation_score : string, "r2" | "AIC" | "AICc" | "BIC" 45 | Objective used to choose the best estimates per bootstrap. 46 | estimation_target : string, "train" | "test" 47 | Decide whether to assess the estimation_score on the train 48 | or test data across each bootstrap. By default, a sensible 49 | choice is made based on the chosen estimation_score. 50 | warm_start : bool 51 | When set to ``True``, reuse the solution of the previous call to fit as 52 | initialization, otherwise, just erase the previous solution 53 | eps : float 54 | Length of the lasso path. ``eps=1e-3`` means that 55 | ``alpha_min / alpha_max = 1e-3``. 56 | copy_X : bool 57 | If ``True``, X will be copied; else, it may be overwritten. 58 | fit_intercept : bool 59 | Whether to calculate the intercept for this model. If set 60 | to False, no intercept will be used in calculations 61 | (e.g. data is expected to be already centered). 62 | standardize : bool 63 | If True, the regressors X will be standardized before regression by 64 | subtracting the mean and dividing by their standard deviations. 65 | max_iter : int 66 | Maximum number of iterations for iterative fitting methods. 67 | tol : float 68 | Stopping criteria for solver. 69 | random_state : int, RandomState instance, or None 70 | The seed of the pseudo random number generator that selects a random 71 | feature to update. If int, random_state is the seed used by the random 72 | number generator; If RandomState instance, random_state is the random 73 | number generator; If None, the random number generator is the 74 | RandomState instance used by `np.random`. 75 | comm : MPI communicator 76 | If passed, the selection and estimation steps are parallelized. 77 | logger : Logger 78 | The logger to use for messages when ``verbose=True`` in ``fit``. 79 | If *None* is passed, a logger that writes to ``sys.stdout`` will be 80 | used. 81 | 82 | Attributes 83 | ---------- 84 | coef_ : array, shape (n_features,) or (n_targets, n_features) 85 | Estimated coefficients for the linear regression problem. 86 | intercept_ : float 87 | Independent term in the linear model. 88 | supports_ : ndarray, shape (n_supports, n_features) 89 | Boolean array indicating whether a given regressor (column) is selected 90 | for estimation for a given regularization parameter value (row). 91 | """ 92 | def __init__(self, n_boots_sel=24, n_boots_est=24, selection_frac=0.9, 93 | estimation_frac=0.9, n_lambdas=48, 94 | alphas=np.array([0.5]), stability_selection=1., 95 | estimation_score='r2', estimation_target=None, 96 | warm_start=True, eps=1e-3, copy_X=True, 97 | fit_intercept=True, standardize=True, 98 | max_iter=1000, tol=1e-4, random_state=None, 99 | comm=None, logger=None): 100 | super(UoI_ElasticNet, self).__init__( 101 | n_boots_sel=n_boots_sel, 102 | n_boots_est=n_boots_est, 103 | selection_frac=selection_frac, 104 | estimation_frac=estimation_frac, 105 | stability_selection=stability_selection, 106 | estimation_score=estimation_score, 107 | estimation_target=estimation_target, 108 | copy_X=copy_X, 109 | fit_intercept=fit_intercept, 110 | standardize=standardize, 111 | random_state=random_state, 112 | comm=comm, 113 | max_iter=max_iter, 114 | tol=tol, 115 | logger=logger) 116 | self.n_lambdas = n_lambdas 117 | self.alphas = alphas 118 | self.n_alphas = len(alphas) 119 | self.warm_start = warm_start 120 | self.eps = eps 121 | self.lambdas = None 122 | self._selection_lm = ElasticNet( 123 | fit_intercept=fit_intercept, 124 | max_iter=max_iter, 125 | tol=tol, 126 | copy_X=copy_X, 127 | warm_start=warm_start, 128 | random_state=random_state) 129 | self._estimation_lm = LinearRegression(fit_intercept=fit_intercept) 130 | 131 | def get_reg_params(self, X, y): 132 | r"""Calculates the regularization parameters (alpha and lambda) to be 133 | used for the provided data. 134 | 135 | Note that the Elastic Net penalty is given by 136 | 137 | .. math:: 138 | \frac{1}{2\ \text{n_samples}} ||y - Xb||^2_2 139 | + \lambda (\alpha |b|_1 + 0.5 (1 - \alpha) |b|^2_2) 140 | 141 | where lambda and alpha are regularization parameters. 142 | 143 | ``scikit-learn`` does not use these names. Instead, ``scitkit-learn`` 144 | denotes alpha by 'l1_ratio' and lambda by 'alpha'. 145 | 146 | Parameters 147 | ---------- 148 | X : array-like, shape (n_samples, n_features) 149 | The design matrix. 150 | 151 | y : array-like, shape (n_samples) 152 | The response vector. 153 | 154 | Returns 155 | ------- 156 | reg_params : a list of dictionaries 157 | A list containing dictionaries with the value of each 158 | (lambda, alpha) describing the type of regularization to impose. 159 | The keys adhere to scikit-learn's terminology (lambda->alpha, 160 | alpha->l1_ratio). This allows easy passing into the ElasticNet 161 | object. 162 | """ 163 | if self.lambdas is None: 164 | self.lambdas = np.zeros((self.n_alphas, self.n_lambdas)) 165 | # a set of lambdas are generated for each alpha value (l1_ratio in 166 | # sci-kit learn parlance) 167 | for alpha_idx, alpha in enumerate(self.alphas): 168 | self.lambdas[alpha_idx, :] = _alpha_grid( 169 | X=X, y=y, 170 | l1_ratio=alpha, 171 | fit_intercept=self.fit_intercept, 172 | eps=self.eps, 173 | n_alphas=self.n_lambdas) 174 | 175 | # place the regularization parameters into a list of dictionaries 176 | reg_params = list() 177 | for alpha_idx, alpha in enumerate(self.alphas): 178 | for lamb_idx, lamb in enumerate(self.lambdas[alpha_idx]): 179 | # reset the regularization parameter 180 | reg_params.append(dict(alpha=lamb, l1_ratio=alpha)) 181 | 182 | return reg_params 183 | -------------------------------------------------------------------------------- /src/pyuoi/linear_model/lasso.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.exceptions import NotFittedError 4 | from sklearn.linear_model import Lasso, LinearRegression 5 | from sklearn.linear_model._coordinate_descent import _alpha_grid 6 | try: 7 | import pycasso 8 | except ImportError: 9 | pycasso = None 10 | 11 | from .base import AbstractUoILinearRegressor 12 | 13 | 14 | class PycLasso(): 15 | """Lasso using the pycasso solver. Solves for an entire regularization path 16 | at once. 17 | 18 | Parameters 19 | ---------- 20 | alphas : nd-array 21 | The regularization path. Defaults to None for compatibility with UoI, 22 | but needs to be set prior to fitting. 23 | fit_intercept : bool 24 | Whether to calculate the intercept for this model. If set to ``False``, 25 | no intercept will be used in calculations. 26 | max_iter : int 27 | Maximum number of iterations for pycasso solver. 28 | tol : float 29 | Stopping criteria for solver. 30 | 31 | Attributes 32 | ---------- 33 | coef_ : ndarray, shape (n_features,) or (n_targets, n_features) 34 | Estimated coefficients for the linear regression problem. 35 | intercept_ : float 36 | Independent term in the linear model. 37 | """ 38 | def __init__(self, alphas=None, fit_intercept=True, max_iter=1000, 39 | tol=1e-4): 40 | if fit_intercept is False: 41 | string = ('There is currently a bug in picasso that prevents ' + 42 | 'its use with `fit_intercept=False.`' + 43 | 'See https://github.com/jasonge27/picasso/' + 44 | 'issues/25 for resolution.') 45 | raise ValueError(string) 46 | self.max_iter = max_iter 47 | self.tol = tol 48 | self.fit_intercept = fit_intercept 49 | self.alphas = alphas 50 | 51 | # Flag to prevent us from predicting before fitting 52 | self.isfitted = False 53 | self.tol = tol 54 | 55 | def set_params(self, **kwargs): 56 | """Sets the parameters of this estimator.""" 57 | _valid_params = ['alphas', 'fit_intercept', 'max_iter'] 58 | 59 | for key, value in kwargs.items(): 60 | if key in _valid_params: 61 | setattr(self, key, value) 62 | else: 63 | raise ValueError('Invalid parameter %s' % key) 64 | 65 | def predict(self, X): 66 | """Predicts responses given a design matrix. 67 | 68 | Parameters 69 | ---------- 70 | X : ndarray, (n_samples, n_features) 71 | The design matrix. 72 | 73 | Returns 74 | ------- 75 | y : ndarray, shape (n_samples,) 76 | Predicted response vector. 77 | """ 78 | if self.isfitted: 79 | return np.matmul(X, self.coef_.T) + self.intercept_ 80 | else: 81 | raise NotFittedError('Estimator is not fit.') 82 | 83 | def fit(self, X, y): 84 | """Fit data according to the pycasso object. 85 | 86 | Parameters 87 | ---------- 88 | X : ndarray, (n_samples, n_features) 89 | The design matrix. 90 | y : ndarray, shape (n_samples,) 91 | Response vector. Will be cast to X's dtype if necessary. 92 | Currently, this implementation does not handle multiple response 93 | variables. 94 | """ 95 | if self.alphas is None: 96 | raise Exception('Set alphas before fitting.') 97 | if self.fit_intercept is False: 98 | string = ('There is currently a bug in picasso that prevents ' + 99 | 'its use with `fit_intercept=False.`' + 100 | 'See https://github.com/jasonge27/picasso/' + 101 | 'issues/25 for resolution.') 102 | raise ValueError(string) 103 | 104 | self.solver = pycasso.Solver(X, y, family='gaussian', 105 | useintercept=self.fit_intercept, 106 | lambdas=self.alphas, 107 | penalty='l1', 108 | max_ite=self.max_iter, 109 | prec=self.tol) 110 | self.solver.train() 111 | # Coefs across the entire solution path 112 | self.coef_ = self.solver.result['beta'] 113 | self.intercept_ = self.solver.result['intercept'] 114 | self.isfitted = True 115 | return self 116 | 117 | 118 | class UoI_Lasso(AbstractUoILinearRegressor, LinearRegression): 119 | r"""UoI\ :sub:`Lasso` solver. 120 | 121 | Parameters 122 | ---------- 123 | n_boots_sel : int 124 | The number of data bootstraps/resamples to use in the selection module. 125 | Increasing this number will make selection more strict. 126 | n_boots_est : int 127 | The number of data bootstraps/resamples to use in the estimation 128 | module. Increasing this number will relax selection and decrease 129 | variance. 130 | n_lambdas : int 131 | The number of regularization values to use for selection. 132 | selection_frac : float 133 | The fraction of the dataset to use for training in each resampled 134 | bootstrap, during the selection module. Small values of this parameter 135 | imply larger "perturbations" to the dataset. 136 | estimation_frac : float 137 | The fraction of the dataset to use for training in each resampled 138 | bootstrap, during the estimation module. The remaining data is used 139 | to obtain validation scores. Small values of this parameters imply 140 | larger "perturbations" to the dataset. 141 | stability_selection : int, float, or array-like 142 | If int, treated as the number of bootstraps that a feature must 143 | appear in order to guarantee placement in selection profile. If float, 144 | must be between 0 and 1, and is instead the proportion of 145 | bootstraps. If array-like, must consist of either ints or floats 146 | between 0 and 1. In this case, each entry in the array-like object 147 | will act as a separate threshold for placement in the selection 148 | profile. 149 | estimation_score : string, "r2" | "AIC" | "AICc" | "BIC" 150 | Objective used to choose the best estimates per bootstrap. 151 | estimation_target : string, "train" | "test" 152 | Decide whether to assess the estimation_score on the train 153 | or test data across each bootstrap. By deafult, a sensible 154 | choice is made based on the chosen estimation_score 155 | warm_start : bool 156 | When set to ``True``, reuse the solution of the previous call to fit as 157 | initialization, otherwise, just erase the previous solution 158 | eps : float 159 | Length of the lasso path. ``eps=1e-3`` means that 160 | ``lambda_min / lambda_max = 1e-3`` 161 | copy_X : bool 162 | If ``True``, X will be copied; else, it may be overwritten. 163 | fit_intercept : bool 164 | Whether to calculate the intercept for this model. If set 165 | to False, no intercept will be used in calculations 166 | (e.g. data is expected to be already centered). 167 | standardize : bool 168 | If True, the regressors X will be standardized before regression by 169 | subtracting the mean and dividing by their standard deviations. This 170 | parameter is equivalent to ``normalize`` in ``scikit-learn`` models. 171 | max_iter : int 172 | Maximum number of iterations for iterative fitting methods. 173 | tol : float 174 | Stopping criteria for solver. 175 | random_state : int, RandomState instance, or None 176 | The seed of the pseudo random number generator that selects a random 177 | feature to update. If int, random_state is the seed used by the random 178 | number generator; If RandomState instance, random_state is the random 179 | number generator; If None, the random number generator is the 180 | RandomState instance used by ``np.random``. 181 | comm : MPI communicator 182 | If passed, the selection and estimation steps are parallelized. 183 | logger : Logger 184 | The logger to use for messages when ``verbose=True`` in ``fit``. 185 | If *None* is passed, a logger that writes to ``sys.stdout`` will be 186 | used. 187 | solver : string, 'cd' | 'pyc' 188 | If cd, will use the ``scikit-learn`` lasso implementation (via 189 | coordinate descent). If pyc, will use pyclasso, built off of the 190 | pycasso path-wise solver. 191 | 192 | 193 | Attributes 194 | ---------- 195 | coef_ : nd-array, shape (n_features,) or (n_targets, n_features) 196 | Estimated coefficients for the linear regression problem. 197 | intercept_ : float 198 | Independent term in the linear model. 199 | supports_ : array, shape 200 | boolean array indicating whether a given regressor (column) is selected 201 | for estimation for a given regularization parameter value (row). 202 | """ 203 | def __init__(self, n_boots_sel=24, n_boots_est=24, selection_frac=0.9, 204 | estimation_frac=0.9, n_lambdas=48, stability_selection=1., 205 | estimation_score='r2', estimation_target=None, eps=1e-3, 206 | warm_start=True, copy_X=True, fit_intercept=True, 207 | standardize=True, max_iter=1000, tol=1e-4, random_state=None, 208 | comm=None, logger=None, solver='cd'): 209 | super(UoI_Lasso, self).__init__( 210 | n_boots_sel=n_boots_sel, 211 | n_boots_est=n_boots_est, 212 | selection_frac=selection_frac, 213 | estimation_frac=estimation_frac, 214 | estimation_target=estimation_target, 215 | stability_selection=stability_selection, 216 | copy_X=copy_X, 217 | fit_intercept=fit_intercept, 218 | standardize=standardize, 219 | random_state=random_state, 220 | comm=comm, 221 | estimation_score=estimation_score, 222 | max_iter=max_iter, 223 | tol=tol, 224 | logger=logger) 225 | self.n_lambdas = n_lambdas 226 | self.eps = eps 227 | self.solver = solver 228 | self.tol = tol 229 | 230 | if solver == 'cd': 231 | self._selection_lm = Lasso( 232 | max_iter=max_iter, 233 | tol=tol, 234 | warm_start=warm_start, 235 | random_state=random_state, 236 | fit_intercept=fit_intercept) 237 | elif solver == 'pyc': 238 | if pycasso is None: 239 | raise ImportError('pycasso is not installed.') 240 | self._selection_lm = PycLasso( 241 | fit_intercept=fit_intercept, 242 | max_iter=max_iter, 243 | tol=tol) 244 | 245 | self._estimation_lm = LinearRegression(fit_intercept=fit_intercept) 246 | 247 | def get_reg_params(self, X, y): 248 | alphas = _alpha_grid( 249 | X=X, y=y, 250 | l1_ratio=1.0, 251 | fit_intercept=self.fit_intercept, 252 | eps=self.eps, 253 | n_alphas=self.n_lambdas) 254 | 255 | return [{'alpha': a} for a in alphas] 256 | 257 | def uoi_selection_sweep(self, X, y, reg_param_values): 258 | """Overwrite base class selection sweep to accommodate pycasso 259 | path-wise solution""" 260 | 261 | if self.solver == 'pyc': 262 | alphas = np.array([reg_param['alpha'] 263 | for reg_param in reg_param_values]) 264 | self._selection_lm.set_params(alphas=alphas) 265 | self._selection_lm.fit(X, y) 266 | 267 | return self._selection_lm.coef_ 268 | else: 269 | return super(UoI_Lasso, self).uoi_selection_sweep(X, y, 270 | reg_param_values) 271 | -------------------------------------------------------------------------------- /src/pyuoi/linear_model/scikit-learn_license: -------------------------------------------------------------------------------- 1 | Portions of logistic.py including 2 | MaskedCoefLogisticRegression, 3 | _logistic_regression_path, 4 | _intercept_dot, 5 | _logistic_loss_and_grad, and 6 | _multinomial_loss_grad 7 | are based on code from scikit-learn. The scikit-learn license is below. 8 | 9 | --------------- 10 | New BSD License 11 | 12 | Copyright (c) 2007–2019 The scikit-learn developers. 13 | All rights reserved. 14 | 15 | 16 | Redistribution and use in source and binary forms, with or without 17 | modification, are permitted provided that the following conditions are met: 18 | 19 | a. Redistributions of source code must retain the above copyright notice, 20 | this list of conditions and the following disclaimer. 21 | b. Redistributions in binary form must reproduce the above copyright 22 | notice, this list of conditions and the following disclaimer in the 23 | documentation and/or other materials provided with the distribution. 24 | c. Neither the name of the Scikit-learn Developers nor the names of 25 | its contributors may be used to endorse or promote products 26 | derived from this software without specific prior written 27 | permission. 28 | 29 | 30 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 31 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 34 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 36 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 37 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 40 | DAMAGE. 41 | -------------------------------------------------------------------------------- /src/pyuoi/linear_model/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def stability_selection_to_threshold(stability_selection, n_boots): 5 | """Converts user inputted stability selection to an array of 6 | thresholds. These thresholds correspond to the number of bootstraps 7 | that a feature must appear in to guarantee placement in the selection 8 | profile. 9 | 10 | Parameters 11 | ---------- 12 | stability_selection : int, float, or array-like 13 | If int, treated as the number of bootstraps that a feature must 14 | appear in to guarantee placement in selection profile. If float, 15 | must be between 0 and 1, and is instead the proportion of 16 | bootstraps. If array-like, must consist of either ints or floats 17 | between 0 and 1. In this case, each entry in the array-like object 18 | will act as a separate threshold for placement in the selection 19 | profile. 20 | 21 | n_boots: int 22 | The number of bootstraps that will be used for selection 23 | """ 24 | 25 | # single float, indicating proportion of bootstraps 26 | if isinstance(stability_selection, float): 27 | selection_thresholds = np.array([int( 28 | stability_selection * n_boots 29 | )]) 30 | 31 | # single int, indicating number of bootstraps 32 | elif isinstance(stability_selection, int): 33 | selection_thresholds = np.array([int( 34 | stability_selection 35 | )]) 36 | 37 | # list, to be converted into numpy array 38 | elif isinstance(stability_selection, list): 39 | # list of floats 40 | if all(isinstance(idx, float) for idx in stability_selection): 41 | selection_thresholds = \ 42 | n_boots * np.array(stability_selection) 43 | 44 | # list of ints 45 | elif all(isinstance(idx, int) for idx in stability_selection): 46 | selection_thresholds = np.array(stability_selection) 47 | 48 | else: 49 | raise ValueError("Stability selection list must consist of " 50 | "floats or ints.") 51 | 52 | # numpy array 53 | elif isinstance(stability_selection, np.ndarray): 54 | # np array of floats 55 | if np.issubdtype(stability_selection.dtype.type, np.floating): 56 | selection_thresholds = n_boots * stability_selection 57 | 58 | # np array of ints 59 | elif np.issubdtype(stability_selection.dtype.type, np.integer): 60 | selection_thresholds = stability_selection 61 | 62 | else: 63 | raise ValueError("Stability selection array must consist of " 64 | "floats or ints.") 65 | 66 | else: 67 | raise ValueError("Stability selection must be a valid float, int " 68 | "or array.") 69 | 70 | # ensure that ensuing list of selection thresholds satisfies 71 | # the correct bounds 72 | selection_thresholds = selection_thresholds.astype('int') 73 | if not ( 74 | np.all(selection_thresholds <= n_boots) and 75 | np.all(selection_thresholds >= 1) 76 | ): 77 | raise ValueError("Stability selection thresholds must be within " 78 | "the correct bounds.") 79 | 80 | return selection_thresholds 81 | 82 | 83 | def intersection(coefs, selection_thresholds=None): 84 | """Performs the intersection operation on selection coefficients 85 | using stability selection criteria. 86 | 87 | The coefficients must be provided in the shape 88 | bootstraps x lambdas x features. 89 | The intersection operation finds, for each lambda, the features that 90 | exist in all bootstraps (hard intersection) or in some subset of them 91 | (the exact subset is provided by selection_thresholds). 92 | 93 | This parameter selection_thresholds provides the number of bootstraps 94 | that a feature must exist in to pass the intersection. Importantly, 95 | this function can take intersections with multiple selection_thresholds 96 | (thus, selection_thresholds is array-like). 97 | 98 | This function then outputs an array of supports, each as a binary mask. 99 | Only unique supports are provided, so duplicates are tossed out. 100 | 101 | Parameters 102 | ---------- 103 | coefs : np.ndarray, shape (# bootstraps, # lambdas, # features) 104 | The coefficients obtained from the selection sweep, corresponding to 105 | each bootstrap and choice of L1 regularization strength. 106 | 107 | selection_thresholds: array-like, int 108 | The selection thresholds to perform intersection across. By default, 109 | use *coefs.shape[0]*. 110 | 111 | Returns 112 | ------- 113 | supports : np.ndarray, shape (# supports, # features), bool 114 | A list of supports (each as a binary mask with size n_features) 115 | obtained by performing the intersection across the coefficients. Each 116 | support is unique. 117 | """ 118 | 119 | if selection_thresholds is None: 120 | selection_thresholds = np.array([coefs.shape[0]]) 121 | 122 | n_selection_thresholds = len(selection_thresholds) 123 | n_reg_params = coefs.shape[1] 124 | n_features = coefs.shape[2] 125 | supports = np.zeros( 126 | (n_selection_thresholds, n_reg_params, n_features), 127 | dtype=bool 128 | ) 129 | 130 | # iterate over each stability selection threshold 131 | for thresh_idx, threshold in enumerate(selection_thresholds): 132 | # calculate the support given the specific selection threshold 133 | supports[thresh_idx, ...] = \ 134 | np.count_nonzero(coefs, axis=0) >= threshold 135 | 136 | # unravel the dimension corresponding to selection thresholds 137 | 138 | supports = np.squeeze(np.reshape( 139 | supports, 140 | (n_selection_thresholds * n_reg_params, n_features) 141 | )) 142 | 143 | supports = np.unique(supports, axis=0) 144 | 145 | return supports 146 | -------------------------------------------------------------------------------- /src/pyuoi/mpi_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper functions for loading data and managing arrays across ranks with MPI. 3 | """ 4 | import h5py 5 | import numpy as np 6 | 7 | try: 8 | from mpi4py import MPI 9 | _np2mpi = {np.dtype(np.float32): MPI.FLOAT, 10 | np.dtype(np.float64): MPI.DOUBLE, 11 | np.dtype(int): MPI.LONG, 12 | np.dtype(np.intc): MPI.INT} 13 | except ImportError: 14 | pass 15 | 16 | 17 | def check_valid_ndarray(X): 18 | """Checks whether X is a ndarray and returns a contiguous version. 19 | 20 | Parameters 21 | ---------- 22 | X : ndarray, `None`, or other 23 | Variable to check 24 | 25 | Returns 26 | ------- 27 | X : ndarray or `None` 28 | If X is an ndarray, returns a contiguous potential copy. If X is `None` 29 | returns `None`. If X is anything else, raises a `ValueError` 30 | """ 31 | if X is None: 32 | return X 33 | if not isinstance(X, np.ndarray): 34 | raise ValueError('Must be a numpy ndarray.') 35 | return np.ascontiguousarray(X) 36 | 37 | 38 | def load_data_MPI(h5_name, X_key='X', y_key='y', comm=None, root=0): 39 | """Load data from an HDF5 file and broadcast it across MPI ranks. 40 | 41 | This is a helper function. It is also possible to load the data 42 | without this function. 43 | 44 | Parameters 45 | ---------- 46 | h5_name : str 47 | Path to h5 file. 48 | X_key : str 49 | Key for the features dataset. (default: 'X') 50 | y_key : str 51 | Key for the targets dataset. (default: 'y') 52 | comm : MPI.COMM_WORLD 53 | MPI communicator. 54 | root : int 55 | This rank will load the data from file. 56 | 57 | Returns 58 | ------- 59 | X : ndarray 60 | Features on all MPI ranks. 61 | y : ndarray 62 | Targets on all MPI ranks. 63 | """ 64 | 65 | if comm is None: 66 | comm = MPI.COMM_WORLD 67 | rank = comm.rank 68 | Xshape = None 69 | Xdtype = None 70 | yshape = None 71 | ydtype = None 72 | if rank == root: 73 | with h5py.File(h5_name, 'r') as f: 74 | X = f[X_key][()] 75 | Xshape = X.shape 76 | Xdtype = X.dtype 77 | y = f[y_key][()] 78 | yshape = y.shape 79 | ydtype = y.dtype 80 | Xshape = comm.bcast(Xshape, root=root) 81 | Xdtype = comm.bcast(Xdtype, root=root) 82 | yshape = comm.bcast(yshape, root=root) 83 | ydtype = comm.bcast(ydtype, root=root) 84 | if rank != root: 85 | X = np.empty(Xshape, dtype=Xdtype) 86 | y = np.empty(yshape, dtype=ydtype) 87 | comm.Bcast([X, _np2mpi[np.dtype(X.dtype)]], root=root) 88 | comm.Bcast([y, _np2mpi[np.dtype(y.dtype)]], root=root) 89 | return X, y 90 | 91 | 92 | def Bcast_from_root(send, comm=None, root=0): 93 | """Broadcast an array from root to all MPI ranks. 94 | 95 | Parameters 96 | ---------- 97 | send : ndarray or None 98 | Array to send from root to all ranks. send in other ranks 99 | has no effect. 100 | comm : MPI.COMM_WORLD 101 | MPI communicator. 102 | root : int 103 | This rank contains the array to send. 104 | 105 | Returns 106 | ------- 107 | send : ndarray 108 | Each rank will have a copy of the array from root. 109 | """ 110 | 111 | send = check_valid_ndarray(send) 112 | if comm is None: 113 | comm = MPI.COMM_WORLD 114 | rank = comm.rank 115 | if rank == 0: 116 | dtype = send.dtype 117 | shape = send.shape 118 | else: 119 | dtype = None 120 | shape = None 121 | shape = comm.bcast(shape, root=root) 122 | dtype = comm.bcast(dtype, root=root) 123 | if rank != 0: 124 | send = np.empty(shape, dtype=dtype) 125 | comm.Bcast([send, _np2mpi[np.dtype(dtype)]], root=root) 126 | return send 127 | 128 | 129 | def Gatherv_rows(send, comm=None, root=0): 130 | """Concatenate arrays along the first axis using Gatherv on root. 131 | 132 | Parameters 133 | ---------- 134 | send : ndarray 135 | The arrays to concatenate. All dimensions must be equal except for the 136 | first. 137 | comm : MPI.COMM_WORLD 138 | MPI communicator. 139 | root : int 140 | This rank will contain the Gatherv'ed array. 141 | 142 | Returns 143 | ------- 144 | rec : ndarray or None 145 | Gatherv'ed array on root or None on other ranks. 146 | """ 147 | 148 | send = check_valid_ndarray(send) 149 | if comm is None: 150 | comm = MPI.COMM_WORLD 151 | rank = comm.rank 152 | dtype = send.dtype 153 | shape = send.shape 154 | tot = np.zeros(1, dtype=int) 155 | 156 | # Gather the sizes of the first dimension on root 157 | rank_sizes = comm.gather(shape[0], root=root) 158 | comm.Reduce(np.array(shape[0], dtype=int), 159 | [tot, _np2mpi[tot.dtype]], op=MPI.SUM, root=root) 160 | if rank == root: 161 | rec_shape = (tot[0],) + shape[1:] 162 | rec = np.empty(rec_shape, dtype=dtype) 163 | sizes = [size * np.prod(rec_shape[1:]) for size in rank_sizes] 164 | disps = np.insert(np.cumsum(sizes), 0, 0)[:-1] 165 | else: 166 | rec = None 167 | sizes = None 168 | disps = None 169 | 170 | comm.Gatherv(send, [rec, sizes, disps, _np2mpi[dtype]], root=0) 171 | return rec 172 | -------------------------------------------------------------------------------- /src/pyuoi/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import logging 4 | 5 | 6 | def softmax(y, axis=-1): 7 | """Calculates the softmax distribution. 8 | 9 | Parameters 10 | ---------- 11 | y : ndarray 12 | Log-probabilities. 13 | """ 14 | 15 | yp = y - y.max(axis=axis, keepdims=True) 16 | epy = np.exp(yp) 17 | return epy / np.sum(epy, axis=axis, keepdims=True) 18 | 19 | 20 | def sigmoid(x): 21 | """Calculates the bernoulli distribution. 22 | 23 | Parameters 24 | ---------- 25 | x : ndarray 26 | Log-probabilities. 27 | """ 28 | return np.exp(-np.logaddexp(0, -x)) 29 | 30 | 31 | def log_likelihood_glm(model, y_true, y_pred): 32 | """Calculates the log-likelihood of a generalized linear model given the 33 | true response variables and the "predicted" response variables. The 34 | "predicted" response variable varies by the specific generalized linear 35 | model under consideration. 36 | 37 | Parameters 38 | ---------- 39 | model : string 40 | The generalized linear model to calculate the log-likelihood for. 41 | y_true : nd-array, shape (n_samples,) 42 | Array of true response values. 43 | y_pred : nd-array, shape (n_samples,) 44 | Array of predicted response values (conditional mean). 45 | 46 | Returns 47 | ------- 48 | ll : float 49 | The log-likelihood. 50 | """ 51 | if model == 'normal': 52 | # this log-likelihood is calculated under the assumption that the 53 | # variance is the value that maximizes the log-likelihood 54 | rss = (y_true - y_pred)**2 55 | n_samples = y_true.size 56 | ll = -n_samples / 2 * (1 + np.log(np.mean(rss))) 57 | elif model == 'poisson': 58 | if not np.any(y_pred): 59 | if np.any(y_true): 60 | ll = -np.inf 61 | else: 62 | ll = 0. 63 | else: 64 | ll = np.mean(y_true * np.log(y_pred) - y_pred) 65 | else: 66 | raise ValueError('Model is not available.') 67 | return ll 68 | 69 | 70 | def BIC(ll, n_features, n_samples): 71 | """Calculates the Bayesian Information Criterion. 72 | 73 | Parameters 74 | ---------- 75 | ll : float 76 | The log-likelihood of the model. 77 | n_features : int 78 | The number of features used in the model. 79 | n_samples : int 80 | The number of samples in the dataset being tested. 81 | 82 | Returns 83 | ------- 84 | BIC : float 85 | Bayesian Information Criterion 86 | """ 87 | BIC = n_features * np.log(n_samples) - 2 * ll 88 | return BIC 89 | 90 | 91 | def AIC(ll, n_features): 92 | """Calculates the Akaike Information Criterion. 93 | 94 | Parameters 95 | ---------- 96 | ll : float 97 | The log-likelihood of the model. 98 | n_features : int 99 | The number of features used in the model. 100 | n_samples : int 101 | The number of samples in the dataset being tested. 102 | 103 | Returns 104 | ------- 105 | AIC : float 106 | Akaike Information Criterion 107 | """ 108 | 109 | AIC = 2 * n_features - 2 * ll 110 | return AIC 111 | 112 | 113 | def AICc(ll, n_features, n_samples): 114 | """Calculate the corrected Akaike Information Criterion. This criterion is 115 | useful in cases when the number of samples is small. 116 | 117 | If the number of features is equal to the number of samples plus one, then 118 | the AIC is returned (the AICc is undefined in this case). 119 | 120 | Parameters 121 | ---------- 122 | ll : float 123 | The log-likelihood of the model. 124 | n_features : int 125 | The number of features used in the model. 126 | n_samples : int 127 | The number of samples in the dataset being tested. 128 | 129 | Returns 130 | ------- 131 | AIC : float 132 | Akaike Information Criterion 133 | """ 134 | AICc = AIC(ll, n_features) 135 | if n_samples > (n_features + 1): 136 | AICc += 2 * (n_features**2 + n_features) / (n_samples - n_features - 1) 137 | return AICc 138 | 139 | 140 | def check_logger(logger, name='uoi', comm=None): 141 | ret = logger 142 | if ret is None: 143 | if comm is not None and comm.Get_size() > 1: 144 | r, s = comm.Get_rank(), comm.Get_size() 145 | name += " " + str(r).rjust(int(np.log10(s)) + 1) 146 | 147 | ret = logging.getLogger(name=name) 148 | handler = logging.StreamHandler(sys.stdout) 149 | 150 | fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 151 | 152 | handler.setFormatter(logging.Formatter(fmt)) 153 | ret.addHandler(handler) 154 | return ret 155 | -------------------------------------------------------------------------------- /tests/test_cur.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from numpy.testing import assert_equal 4 | from numpy.testing import assert_array_equal 5 | from numpy.testing import assert_raises 6 | from pyuoi.decomposition import CUR, UoI_CUR 7 | from pyuoi.decomposition.utils import (column_select, 8 | stability_selection_to_threshold) 9 | 10 | X = np.array([ 11 | [0, 0, 0, 4, 2], 12 | [0, 4, 2, 1, 0], 13 | [3, 0, 0, 2, 1], 14 | [2, 2, 0, 1, 0], 15 | [1, 2, 4, 1, 3], 16 | [1, 4, 0, 0, 4], 17 | [3, 3, 4, 0, 0], 18 | [3, 2, 3, 0, 4], 19 | [0, 1, 2, 1, 4], 20 | [1, 4, 0, 2, 4]]) 21 | 22 | 23 | def test_column_select_all(): 24 | """Test that column select function selects all columns when provided the 25 | entire SVD and a suitable value of c.""" 26 | _, n_features = X.shape 27 | _, _, V = np.linalg.svd(X) 28 | column_indices = column_select(V.T, c=5) 29 | 30 | assert_array_equal(column_indices, np.arange(n_features)) 31 | 32 | 33 | def test_column_select(): 34 | """Test that the column select function selects the column with the highest 35 | leverage score most often.""" 36 | n_samples, n_features = X.shape 37 | rank = 3 38 | n_reps = 1000 39 | 40 | _, _, V = np.linalg.svd(X) 41 | V_subset = V[:rank].T 42 | column_flags = np.zeros((n_reps, n_features)) 43 | 44 | for rep in range(n_reps): 45 | column_indices = column_select(V_subset, c=1) 46 | column_flags[rep, column_indices] = 1 47 | 48 | counts = np.sum(column_flags, axis=0) 49 | 50 | assert_equal(np.argmax(counts), np.argmax(np.sum(V_subset**2, axis=1))) 51 | 52 | 53 | def test_stability_selection_to_threshold_int(): 54 | """Tests whether stability_selection_to_threshold correctly outputs the 55 | correct threshold when provided a single integer.""" 56 | 57 | n_boots_sel = 48 58 | # stability selection is a single integer 59 | test_int = 36 60 | selection_thresholds = stability_selection_to_threshold(test_int, 61 | n_boots_sel) 62 | 63 | assert_array_equal(selection_thresholds, test_int) 64 | 65 | 66 | def test_stability_selection_to_threshold_float(): 67 | """Tests whether stability_selection_to_threshold correctly outputs the 68 | correct threshold when provided a single float.""" 69 | 70 | n_boots_sel = 48 71 | # stability selection is a single float 72 | test_float = 0.5 73 | selection_thresholds = stability_selection_to_threshold(test_float, 74 | n_boots_sel) 75 | 76 | assert_array_equal(selection_thresholds, np.array([24])) 77 | 78 | 79 | def test_stability_selection_to_threshold_exceeds_n_bootstraps(): 80 | """Tests whether stability_selection_to_threshold correctly outputs an 81 | error when provided an input that results in bootstraps exceeding 82 | n_boots_sel.""" 83 | 84 | n_boots_sel = 48 85 | # stability selection is a list of floats 86 | test_float = 1.1 87 | test_int = 50 88 | 89 | assert_raises( 90 | ValueError, 91 | stability_selection_to_threshold, 92 | test_int, 93 | n_boots_sel) 94 | 95 | assert_raises( 96 | ValueError, 97 | stability_selection_to_threshold, 98 | test_float, 99 | n_boots_sel) 100 | 101 | 102 | def test_stability_selection_to_threshold_input_value_error(): 103 | """Tests whether stability_selection_to_threshold properly raises an error 104 | when it receives objects without ints or floats.""" 105 | n_boots_sel = 48 106 | stability_selection_list = [0, 1, 'a'] 107 | stability_selection_np_array = np.array([0, 1, 'a']) 108 | stability_selection_dict = {0: 'a', 1: 'b'} 109 | 110 | assert_raises( 111 | ValueError, 112 | stability_selection_to_threshold, 113 | stability_selection_list, 114 | n_boots_sel) 115 | 116 | assert_raises( 117 | ValueError, 118 | stability_selection_to_threshold, 119 | stability_selection_np_array, 120 | n_boots_sel) 121 | 122 | assert_raises( 123 | ValueError, 124 | stability_selection_to_threshold, 125 | stability_selection_dict, 126 | n_boots_sel) 127 | 128 | 129 | def test_CUR(): 130 | """Tests that the CUR fitter extracts columns correctly.""" 131 | _, n_features = X.shape 132 | max_k = 3 133 | 134 | cur = CUR(max_k=max_k) 135 | 136 | cur.fit(X, c=3) 137 | column_indices = cur.column_indices_ 138 | columns = cur.components_ 139 | 140 | assert np.setdiff1d(column_indices, np.arange(n_features)).size == 0 141 | assert_array_equal(X[:, column_indices], columns) 142 | 143 | 144 | def test_CUR_fit(): 145 | """Tests that the CUR fitter extracts the correct columns.""" 146 | n_features = 5 147 | n_samples = 30 148 | max_k = 3 149 | 150 | # matrix has only one non-zero entry 151 | X = np.zeros((n_samples, n_features)) 152 | X[0, 0] = 1 153 | true_columns = np.array([0, 2, 3]) 154 | 155 | # fit CUR decomposition 156 | cur = CUR(max_k=max_k) 157 | X_new = cur.fit_transform(X) 158 | 159 | assert_array_equal(cur.column_indices_, true_columns) 160 | assert_array_equal(X_new, X[:, true_columns]) 161 | 162 | 163 | def test_UoI_CUR_check_ks_and_cs(): 164 | """Tests the check_ks_and_cs function in UoI_CUR.""" 165 | n_boots = 5 166 | max_k = 10 167 | boots_frac = 0.9 168 | 169 | uoi_cur = UoI_CUR(n_boots=n_boots, 170 | max_k=max_k, 171 | boots_frac=boots_frac) 172 | 173 | # check ks 174 | ks, cs = uoi_cur.check_ks_and_cs(ks=1) 175 | assert_array_equal(ks, np.array([1])) 176 | assert_array_equal(cs, ks + 20) 177 | 178 | ks, cs = uoi_cur.check_ks_and_cs(ks=[1, 2, 3]) 179 | assert_array_equal(ks, np.array([1, 2, 3])) 180 | assert_array_equal(cs, ks + 20) 181 | 182 | ks, cs = uoi_cur.check_ks_and_cs(ks=None) 183 | assert_array_equal(ks, 1 + np.arange(max_k)) 184 | assert_array_equal(cs, ks + 20) 185 | 186 | # check cs 187 | ks, cs = uoi_cur.check_ks_and_cs(ks=[1, 2], cs=[3, 4]) 188 | assert_array_equal(cs, np.array([3, 4])) 189 | 190 | ks, cs = uoi_cur.check_ks_and_cs(ks=[1, 2, 3], cs=1) 191 | assert_array_equal(cs, np.array([1, 1, 1])) 192 | 193 | ks, cs = uoi_cur.check_ks_and_cs(ks=[1, 2], cs=2.4) 194 | assert_array_equal(cs, np.array([2.4, 2.4])) 195 | 196 | # value errors for ks 197 | assert_raises(ValueError, uoi_cur.check_ks_and_cs, -1) 198 | assert_raises(ValueError, uoi_cur.check_ks_and_cs, [11]) 199 | assert_raises(ValueError, uoi_cur.check_ks_and_cs, [0.1, -1, 2, 12]) 200 | assert_raises(ValueError, uoi_cur.check_ks_and_cs, 2.0) 201 | assert_raises(ValueError, uoi_cur.check_ks_and_cs, uoi_cur) 202 | 203 | # value errors for cs 204 | assert_raises(ValueError, uoi_cur.check_ks_and_cs, None, -1) 205 | assert_raises(ValueError, uoi_cur.check_ks_and_cs, None, [-11]) 206 | assert_raises(ValueError, uoi_cur.check_ks_and_cs, None, np.array([-12])) 207 | assert_raises(ValueError, uoi_cur.check_ks_and_cs, 1, [2, 3]) 208 | 209 | 210 | def test_UoI_CUR_basic(): 211 | """Test UoI CUR with no bootstrapping.""" 212 | n_samples, n_features = X.shape 213 | max_k = 3 214 | n_boots = 1 215 | boots_frac = 1 216 | 217 | _, _, V = np.linalg.svd(X) 218 | V_subset = V[:max_k].T 219 | 220 | uoi_cur = UoI_CUR(n_boots=n_boots, 221 | max_k=max_k, 222 | boots_frac=boots_frac) 223 | uoi_cur.fit(X, cs=3) 224 | 225 | max_col = np.argmax(np.sum(V_subset**2, axis=1)) 226 | 227 | assert (max_col in uoi_cur.column_indices_) 228 | 229 | 230 | def test_UoI_CUR_fit(): 231 | """Tests that the CUR fitter extracts the correct columns.""" 232 | n_features = 5 233 | n_samples = 30 234 | max_k = 3 235 | n_boots = 10 236 | boots_frac = 0.95 237 | 238 | # matrix has only one non-zero entry 239 | X = np.zeros((n_samples, n_features)) 240 | X[0, 0] = 1 241 | true_columns = np.array([0, 2, 3]) 242 | 243 | # fit CUR decomposition 244 | uoi_cur = UoI_CUR(n_boots=n_boots, 245 | max_k=max_k, 246 | boots_frac=boots_frac, 247 | random_state=2332) 248 | X_new = uoi_cur.fit_transform(X) 249 | 250 | assert_array_equal(uoi_cur.column_indices_, true_columns) 251 | assert_array_equal(uoi_cur.components_, X[:, true_columns]) 252 | assert_array_equal(X_new, X[:, true_columns]) 253 | 254 | 255 | def test_UoI_CUR_vs_CUR(): 256 | """Tests that the CUR fitter extracts columns correctly.""" 257 | _, n_features = X.shape 258 | max_k = 3 259 | n_boots = 10 260 | boots_frac = 0.90 261 | 262 | cur = CUR(max_k=max_k, 263 | random_state=2332) 264 | cur.fit(X, c=3) 265 | 266 | uoi_cur = UoI_CUR(n_boots=n_boots, 267 | max_k=max_k, 268 | boots_frac=boots_frac, 269 | random_state=2332) 270 | uoi_cur.fit(X, cs=3, ks=3) 271 | 272 | assert uoi_cur.column_indices_.size <= cur.column_indices_.size 273 | -------------------------------------------------------------------------------- /tests/test_elasticnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import (assert_array_equal, assert_array_almost_equal_nulp, 3 | assert_equal, assert_allclose) 4 | 5 | from sklearn.datasets import make_regression 6 | from sklearn.linear_model import ElasticNet 7 | from sklearn.metrics import r2_score 8 | 9 | from pyuoi import UoI_ElasticNet 10 | from pyuoi.datasets import make_linear_regression 11 | 12 | 13 | def test_variable_selection(): 14 | """Test basic functionality of UoI_ElasticNet and that it 15 | finds right model""" 16 | 17 | X, y, w = make_regression(coef=True, random_state=1) 18 | enet = UoI_ElasticNet(alphas=[1., .9]) 19 | enet.fit(X, y) 20 | true_coef = np.nonzero(w)[0] 21 | fit_coef = np.nonzero(enet.coef_)[0] 22 | assert_array_equal(true_coef, fit_coef) 23 | assert_array_almost_equal_nulp(true_coef, fit_coef) 24 | 25 | 26 | def test_estimation_score_usage(): 27 | """Test the ability to change the estimation score in UoI ElasticNet.""" 28 | 29 | methods = ('r2', 'AIC', 'AICc', 'BIC') 30 | X, y = make_regression(n_features=10, n_informative=3, 31 | random_state=10) 32 | scores = [] 33 | for method in methods: 34 | enet = UoI_ElasticNet(estimation_score=method) 35 | assert_equal(enet.estimation_score, method) 36 | enet.fit(X, y) 37 | y_hat = enet.predict(X) 38 | assert_equal(r2_score(y, y_hat), enet.score(X, y)) 39 | score = np.max(enet.scores_) 40 | scores.append(score) 41 | assert_equal(len(np.unique(scores)), len(methods)) 42 | 43 | 44 | def test_set_random_state(): 45 | """Tests whether random states are handled correctly.""" 46 | X, y = make_regression(n_features=5, n_informative=3, 47 | random_state=16, noise=.5) 48 | # same state 49 | l1log_0 = UoI_ElasticNet(random_state=13) 50 | l1log_1 = UoI_ElasticNet(random_state=13) 51 | l1log_0.fit(X, y) 52 | l1log_1.fit(X, y) 53 | assert_array_equal(l1log_0.coef_, l1log_1.coef_) 54 | 55 | # different state 56 | l1log_1 = UoI_ElasticNet(random_state=14) 57 | l1log_1.fit(X, y) 58 | assert not np.array_equal(l1log_0.coef_, l1log_1.coef_) 59 | 60 | # different state, not set 61 | l1log_0 = UoI_ElasticNet() 62 | l1log_1 = UoI_ElasticNet() 63 | l1log_0.fit(X, y) 64 | l1log_1.fit(X, y) 65 | assert not np.array_equal(l1log_0.coef_, l1log_1.coef_) 66 | 67 | 68 | def test_uoi_enet_toy(): 69 | """Test UoI ElasticNet on a toy example.""" 70 | 71 | X = np.array([ 72 | [-1, 2], 73 | [4, 1], 74 | [1, 3], 75 | [4, 3], 76 | [8, 11]], dtype=float) 77 | beta = np.array([1, 4], dtype=float) 78 | y = np.dot(X, beta) 79 | X = np.tile(X, (3, 1)) 80 | y = np.tile(y, 3) 81 | 82 | # choose selection_frac to be slightly smaller to ensure that we get 83 | # good test sets 84 | enet = UoI_ElasticNet( 85 | fit_intercept=False, 86 | selection_frac=0.75, 87 | estimation_frac=0.75, 88 | ) 89 | enet.fit(X, y) 90 | 91 | assert_allclose(enet.coef_, beta) 92 | 93 | 94 | def test_get_reg_params(): 95 | """Tests whether get_reg_params works correctly for UoI ElasticNet.""" 96 | 97 | X = np.array([ 98 | [-1, 2], 99 | [0, 1], 100 | [1, 3], 101 | [4, 3]], dtype=float) 102 | y = np.array([7, 4, 13, 16], dtype=float) 103 | 104 | # calculate regularization parameters manually 105 | l1_ratio = .5 106 | alpha_max = np.max(np.dot(X.T, y) / 4) / l1_ratio 107 | alphas = [{'alpha': alpha_max, 'l1_ratio': .5}, 108 | {'alpha': alpha_max / 10., 'l1_ratio': .5}] 109 | 110 | # calculate regularization parameters with UoI_ElasticNet object 111 | enet = UoI_ElasticNet( 112 | n_lambdas=2, 113 | fit_intercept=False, 114 | eps=0.1) 115 | reg_params = enet.get_reg_params(X, y) 116 | 117 | # check each regularization parameter and key 118 | for estimate, true in zip(reg_params, alphas): 119 | assert len(estimate) == len(true) 120 | for key, value in estimate.items(): 121 | assert_allclose(true[key], value) 122 | 123 | 124 | def test_intercept_and_coefs_no_selection(): 125 | """Test that UoI Lasso properly calculates the intercept with and without 126 | standardization.""" 127 | # create line model 128 | X, y, beta, intercept = make_linear_regression( 129 | n_samples=500, 130 | n_features=2, 131 | n_informative=2, 132 | snr=10., 133 | include_intercept=True, 134 | random_state=2332) 135 | 136 | # without standardization 137 | enet = UoI_ElasticNet( 138 | standardize=False, 139 | fit_intercept=True 140 | ) 141 | enet.fit(X, y) 142 | assert_allclose(enet.intercept_, intercept, rtol=0.25) 143 | assert_allclose(enet.coef_, beta, rtol=0.25) 144 | 145 | # with standardization 146 | enet = UoI_ElasticNet( 147 | standardize=True, 148 | fit_intercept=True 149 | ) 150 | enet.fit(X, y) 151 | assert_allclose(enet.intercept_, intercept, rtol=0.25) 152 | assert_allclose(enet.coef_, beta, rtol=0.25) 153 | 154 | 155 | def test_enet_selection_sweep(): 156 | """Tests uoi_selection_sweep for UoI_ElasticNet.""" 157 | 158 | # toy data 159 | X = np.array([ 160 | [-1, 2, 3], 161 | [4, 1, -7], 162 | [1, 3, 1], 163 | [4, 3, 12], 164 | [8, 11, 2]], dtype=float) 165 | beta = np.array([1, 4, 2], dtype=float) 166 | y = np.dot(X, beta) 167 | 168 | # toy regularization 169 | reg_param_values = [{'alpha': 1.0}, {'alpha': 2.0}] 170 | enet = UoI_ElasticNet(fit_intercept=True, warm_start=False) 171 | enet1 = ElasticNet(alpha=1.0, fit_intercept=True, max_iter=enet.max_iter) 172 | enet2 = ElasticNet(alpha=2.0, fit_intercept=True, max_iter=enet.max_iter) 173 | enet.output_dim = 1 174 | 175 | coefs = enet.uoi_selection_sweep(X, y, reg_param_values) 176 | enet1.fit(X, y) 177 | enet2.fit(X, y) 178 | 179 | assert np.allclose(coefs[0], enet1.coef_) 180 | assert np.allclose(coefs[1], enet2.coef_) 181 | 182 | 183 | def test_fit_intercept(): 184 | """Tests whether `include_intercept` in passed through to the linear models. 185 | """ 186 | enet = UoI_ElasticNet(fit_intercept=True) 187 | assert enet._selection_lm.fit_intercept 188 | assert enet._estimation_lm.fit_intercept 189 | 190 | enet = UoI_ElasticNet(fit_intercept=False) 191 | assert not enet._selection_lm.fit_intercept 192 | assert not enet._estimation_lm.fit_intercept 193 | -------------------------------------------------------------------------------- /tests/test_lbfgs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from numpy.testing import assert_array_equal, assert_array_almost_equal 3 | 4 | from pyuoi.lbfgs import LBFGS, fmin_lbfgs 5 | import numpy as np 6 | 7 | 8 | def test_fmin_lbfgs(): 9 | def f(x, g, *args): 10 | g[0] = 2 * x 11 | return x ** 2 12 | 13 | xmin = fmin_lbfgs(f, 100., line_search='armijo') 14 | assert_array_equal(xmin, [0]) 15 | 16 | xmin = fmin_lbfgs(f, 100., line_search='strongwolfe') 17 | assert_array_equal(xmin, [0]) 18 | 19 | 20 | class TestOWLQN: 21 | 22 | def test_owl_qn_end(self): 23 | def f(x, g, *args): 24 | g[:] = 2. * (x - 1.) 25 | return np.sum((x - 1.) ** 2) 26 | 27 | xmin = fmin_lbfgs(f, np.zeros(10), orthantwise_c=1., 28 | orthantwise_end=5) 29 | assert_array_equal(xmin[5:], 1.) 30 | assert np.all(xmin[:5] < 1.) 31 | 32 | def test_owl_qn(self): 33 | def f(x, g, *args): 34 | g[0] = 2 * x 35 | return x ** 2 36 | 37 | xmin = fmin_lbfgs(f, 100., orthantwise_c=1, line_search='wolfe') 38 | assert_array_equal(xmin, [0]) 39 | 40 | def test_owl_line_search_warning_explicit(self): 41 | def f(x, g, *args): 42 | g[0] = 2 * x 43 | return x ** 2 44 | 45 | with pytest.warns(UserWarning, match="OWL-QN"): 46 | fmin_lbfgs(f, 100., orthantwise_c=1, line_search='morethuente') 47 | with pytest.warns(UserWarning, match="OWL-QN"): 48 | fmin_lbfgs(f, 100., orthantwise_c=1, line_search='armijo') 49 | with pytest.warns(UserWarning, match="OWL-QN"): 50 | fmin_lbfgs(f, 100., orthantwise_c=1, line_search='strongwolfe') 51 | 52 | @pytest.mark.xfail(strict=True) 53 | def test_owl_wolfe_no_warning(self): 54 | """ This test is an attempt to show that wolfe throws no warnings. 55 | """ 56 | 57 | def f(x, g, *args): 58 | g[0] = 2 * x 59 | return x ** 2 60 | 61 | with pytest.warns(UserWarning, match="OWL-QN"): 62 | fmin_lbfgs(f, 100., orthantwise_c=1, line_search='wolfe') 63 | 64 | 65 | def test_2d(): 66 | def f(x, g, f_calls): 67 | assert x.shape == (2, 2) 68 | assert g.shape == x.shape 69 | g[:] = 2 * x 70 | f_calls[0] += 1 71 | return (x ** 2).sum() 72 | 73 | def progress(x, g, fx, xnorm, gnorm, step, k, ls, *args): 74 | assert x.shape == (2, 2) 75 | assert g.shape == x.shape 76 | 77 | assert np.sqrt((x ** 2).sum()) == xnorm 78 | assert np.sqrt((g ** 2).sum()) == gnorm 79 | 80 | p_calls[0] += 1 81 | return 0 82 | 83 | f_calls = [0] 84 | p_calls = [0] 85 | 86 | xmin = fmin_lbfgs(f, [[10., 100.], [44., 55.]], progress, args=[f_calls]) 87 | assert f_calls[0] > 0 88 | assert p_calls[0] > 0 89 | assert_array_almost_equal(xmin, [[0, 0], [0, 0]]) 90 | 91 | 92 | def test_class_interface(): 93 | def f(x, g, *args): 94 | g[:] = 4 * x 95 | return x ** 4 + 1 96 | 97 | opt = LBFGS() 98 | opt.max_iterations = 3 99 | 100 | assert_array_equal(opt.minimize(f, 1e6), [0]) 101 | 102 | opt.max_iterations = 1 103 | with pytest.warns(UserWarning): 104 | opt.minimize(f, 1e7) 105 | 106 | 107 | def test_input_validation(): 108 | with pytest.raises(TypeError): 109 | fmin_lbfgs([], 1e4) 110 | with pytest.raises(TypeError): 111 | fmin_lbfgs(lambda x: x, 1e4, "ham") 112 | with pytest.raises(TypeError): 113 | fmin_lbfgs(lambda x: x, "spam") 114 | -------------------------------------------------------------------------------- /tests/test_mpi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BouchardLab/pyuoi/25e47655a07895f206c2e3ee3b259421c144a05d/tests/test_mpi/__init__.py -------------------------------------------------------------------------------- /tests/test_mpi/test_mpi_uoi_linear_model.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from numpy.testing import assert_array_equal, assert_array_almost_equal_nulp 4 | from sklearn.datasets import make_regression 5 | try: 6 | from mpi4py import MPI 7 | except ImportError: 8 | MPI = None 9 | 10 | from pyuoi.datasets import make_classification, make_poisson_regression 11 | from pyuoi.linear_model import (UoI_Lasso, 12 | UoI_L1Logistic, 13 | UoI_ElasticNet, 14 | UoI_Poisson) 15 | 16 | 17 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.') 18 | def test_variable_selection_lasso(): 19 | """Test basic functionality of UoI_Lasso and that it finds right model""" 20 | X, y, w = make_regression(coef=True, random_state=1) 21 | lasso = UoI_Lasso(comm=MPI.COMM_WORLD) 22 | lasso.fit(X, y) 23 | true_coef = np.nonzero(w)[0] 24 | fit_coef = np.nonzero(lasso.coef_)[0] 25 | assert_array_equal(true_coef, fit_coef) 26 | assert_array_almost_equal_nulp(true_coef, fit_coef) 27 | 28 | 29 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.') 30 | def test_variable_selection_enet(): 31 | """Test basic functionality of UoI_Lasso and that it finds right model""" 32 | X, y, w = make_regression(coef=True, random_state=1) 33 | enet = UoI_ElasticNet(comm=MPI.COMM_WORLD) 34 | enet.fit(X, y) 35 | true_coef = np.nonzero(w)[0] 36 | fit_coef = np.nonzero(enet.coef_)[0] 37 | assert_array_equal(true_coef, fit_coef) 38 | assert_array_almost_equal_nulp(true_coef, fit_coef) 39 | 40 | 41 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.') 42 | def test_l1logistic_binary(): 43 | """Test that binary L1 Logistic runs in the UoI framework.""" 44 | n_inf = 10 45 | X, y, w, b = make_classification(n_samples=200, 46 | random_state=6, 47 | n_informative=n_inf, 48 | n_features=20, 49 | w_scale=4., 50 | include_intercept=True) 51 | 52 | l1log = UoI_L1Logistic(random_state=10, comm=MPI.COMM_WORLD).fit(X, y) 53 | assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .7 54 | 55 | 56 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.') 57 | def test_l1logistic_multiclass(): 58 | """Test that multiclass L1 Logistic runs in the UoI framework when all 59 | classes share a support.""" 60 | n_features = 20 61 | n_inf = 10 62 | X, y, w, b = make_classification(n_samples=200, 63 | random_state=10, 64 | n_classes=5, 65 | n_informative=n_inf, 66 | n_features=n_features, 67 | shared_support=True, 68 | w_scale=4.) 69 | l1log = UoI_L1Logistic(comm=MPI.COMM_WORLD).fit(X, y) 70 | assert (np.sign(abs(w)) == np.sign(abs(l1log.coef_))).mean() >= .8 71 | 72 | 73 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.') 74 | def test_poisson(): 75 | """Test basic functionality of UoI_Lasso and that it finds right model""" 76 | n_features = 20 77 | n_inf = 10 78 | X, y, w, b = make_poisson_regression(n_samples=200, 79 | n_features=n_features, 80 | n_informative=n_inf, 81 | random_state=10) 82 | poisson = UoI_Poisson(comm=MPI.COMM_WORLD) 83 | poisson.fit(X, y) 84 | assert (np.sign(abs(w)) == np.sign(abs(poisson.coef_))).mean() >= .6 85 | -------------------------------------------------------------------------------- /tests/test_mpi/test_mpi_utils.py: -------------------------------------------------------------------------------- 1 | import h5py, pytest 2 | import numpy as np 3 | 4 | from numpy.testing import assert_array_equal 5 | try: 6 | from mpi4py import MPI 7 | except ImportError: 8 | MPI = None 9 | 10 | from pyuoi.mpi_utils import (Bcast_from_root, Gatherv_rows, 11 | load_data_MPI) 12 | 13 | 14 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.') 15 | def test_load_data_MPI(tmpdir): 16 | """Tests loading data from an HDF5 file into all ranks. 17 | """ 18 | comm = MPI.COMM_WORLD 19 | rank = comm.rank 20 | root = 0 21 | X = np.random.randn(5, 10) 22 | y = np.random.randint(5, size=5) 23 | 24 | fname = tmpdir.join('temp.h5') 25 | if rank == root: 26 | with h5py.File(str(fname), 'w') as f: 27 | f.create_dataset('X', data=X) 28 | f.create_dataset('Xp', data=X) 29 | f.create_dataset('y', data=y) 30 | f.create_dataset('yp', data=y) 31 | 32 | # Default keys 33 | X_load, y_load = load_data_MPI(fname) 34 | if rank == root: 35 | assert_array_equal(X, X_load) 36 | assert_array_equal(y, y_load) 37 | 38 | # Set keys 39 | X_load, y_load = load_data_MPI(fname, 40 | X_key='Xp', 41 | y_key='yp') 42 | if rank == root: 43 | assert_array_equal(X, X_load) 44 | assert_array_equal(y, y_load) 45 | 46 | 47 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.') 48 | def test_Bcast_from_root(): 49 | """Test the Bcast_from_root function for broadcasting 50 | an array from root to all ranks. 51 | """ 52 | comm = MPI.COMM_WORLD 53 | root = 0 54 | 55 | dims = [2, 3, 5] 56 | 57 | for dtype in [int, float]: 58 | for ndim in range(1, 4): 59 | my_dim = dims[:ndim] 60 | X = None 61 | if comm.rank == root: 62 | X = np.arange(np.prod(my_dim), dtype=dtype) 63 | X = X.reshape(my_dim) 64 | X = Bcast_from_root(X, comm, root) 65 | Xp = np.arange(np.prod(my_dim), dtype=dtype) 66 | Xp = Xp.reshape(my_dim) 67 | assert_array_equal(X, Xp) 68 | assert X.dtype == dtype 69 | assert X.ndim == len(my_dim) 70 | 71 | 72 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.') 73 | def test_Gatherv_rows(): 74 | """Test the Gatherv_rows function for Gathering and 75 | concatenating ndarrys along their first axes to root. 76 | """ 77 | comm = MPI.COMM_WORLD 78 | root = 0 79 | rank = comm.rank 80 | size = comm.size 81 | 82 | for dtype in [int, float]: 83 | # Multiple rows per rank 84 | X = np.arange(151 * 3, dtype=dtype).reshape(151, 3) 85 | my_rows = np.array_split(X, size)[rank] 86 | Xp = Gatherv_rows(my_rows, comm, root) 87 | if rank == root: 88 | assert_array_equal(X, Xp) 89 | assert Xp.dtype == dtype 90 | 91 | # Fewer rows than ranks 92 | X = np.arange(2 * 3, dtype=dtype).reshape(2, 3) 93 | my_rows = np.array_split(X, size)[rank] 94 | Xp = Gatherv_rows(my_rows, comm, root) 95 | if rank == root: 96 | assert_array_equal(X, Xp) 97 | assert Xp.dtype == dtype 98 | 99 | # Multiple rows per rank, 3d 100 | X = np.arange(151 * 2 * 3, dtype=dtype).reshape(151, 2, 3) 101 | my_rows = np.array_split(X, size)[rank] 102 | Xp = Gatherv_rows(my_rows, comm, root) 103 | if rank == root: 104 | assert_array_equal(X, Xp) 105 | assert Xp.dtype == dtype 106 | 107 | # Fewer rows than ranks, 3d 108 | X = np.arange(2 * 3 * 5, dtype=dtype).reshape(2, 3, 5) 109 | my_rows = np.array_split(X, size)[rank] 110 | Xp = Gatherv_rows(my_rows, comm, root) 111 | if rank == root: 112 | assert_array_equal(X, Xp) 113 | assert Xp.dtype == dtype 114 | 115 | 116 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.') 117 | def test_Gatherv_random_rows(): 118 | """Test Gatherv_rows for gathering ndarrays with random 119 | shapes along their first axis 120 | """ 121 | 122 | comm = MPI.COMM_WORLD 123 | root = 0 124 | rank = comm.rank 125 | 126 | data = np.random.normal(size=(np.random.randint(1, 10), 1000)) 127 | sizes = comm.gather(data.shape[0], root=root) 128 | data = Gatherv_rows(data, comm, root) 129 | 130 | if rank == root: 131 | assert data.shape[0] == np.sum(sizes) 132 | -------------------------------------------------------------------------------- /tests/test_nmf.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from numpy.testing import assert_array_equal, assert_raises 5 | from pyuoi.decomposition import UoI_NMF, UoI_NMF_Base 6 | from pyuoi.decomposition.utils import dissimilarity 7 | from sklearn.cluster import DBSCAN 8 | from sklearn.decomposition import NMF 9 | 10 | 11 | @pytest.fixture 12 | def nmf_setup(): 13 | W = np.random.randint(0, high=2, size=(500, 5)) 14 | H = np.random.randint(0, high=2, size=(5, 2)) 15 | X = np.dot(W, H) 16 | noise = np.random.normal(loc=0, scale=0.5, size=X.shape)**2 17 | X = X + noise 18 | return X 19 | 20 | 21 | def test_dissimilarity(): 22 | """Test the dissimilarity function.""" 23 | k = 5 24 | n_features = 20 25 | 26 | # same bases should be a dissimilarity of zero 27 | H1 = np.random.randint(low=0, high=3, size=(k, n_features)) 28 | H2 = np.copy(H1) 29 | assert np.allclose(dissimilarity(H1, H2), 0.) 30 | 31 | 32 | @pytest.mark.fast 33 | def test_UoI_NMF_Base_initialization(): 34 | """Tests the initialization of UoI NMF Base Class.""" 35 | n_boots = 30 36 | ranks = 10 37 | uoi = UoI_NMF(n_boots=n_boots, ranks=ranks, 38 | random_state=np.random.RandomState(2332)) 39 | assert_array_equal(uoi.ranks, np.arange(2, ranks + 1)) 40 | assert uoi.nmf.solver == 'mu' 41 | assert uoi.nmf.beta_loss == 'kullback-leibler' 42 | assert uoi.cluster.min_samples == max(n_boots // 2, 1) 43 | 44 | 45 | @pytest.mark.fast 46 | def test_UoI_NMF_initialization(): 47 | """Tests the initialization of UoI NMF.""" 48 | n_boots = 30 49 | ranks = 10 50 | uoi = UoI_NMF(n_boots=n_boots, ranks=ranks) 51 | assert_array_equal(uoi.ranks, np.arange(2, ranks + 1)) 52 | assert uoi.nmf.solver == 'mu' 53 | assert uoi.nmf.beta_loss == 'kullback-leibler' 54 | assert uoi.cluster.min_samples == max(n_boots // 2, 1) 55 | assert uoi.cons_meth == np.mean 56 | 57 | 58 | @pytest.mark.fast 59 | def test_UoI_NMF_initialization_value_error(): 60 | """Tests that ValueErrors are correctly raised in the NMF initialization.""" 61 | assert_raises(ValueError, UoI_NMF_Base, **{'ranks': 2.5}) 62 | assert_raises(ValueError, UoI_NMF_Base, **{'nmf': NMF}) 63 | assert_raises(ValueError, UoI_NMF_Base, **{'cluster': DBSCAN}) 64 | assert_raises(ValueError, UoI_NMF_Base, **{'nnreg': 2}) 65 | 66 | 67 | @pytest.mark.fast 68 | def test_UoI_NMF_fit(nmf_setup): 69 | """Tests that the fitting procedure of UoI NMF runs without error.""" 70 | X = nmf_setup 71 | 72 | n_boots = 1 73 | ranks = 5 74 | uoi = UoI_NMF(n_boots=n_boots, 75 | ranks=[ranks], 76 | nmf_max_iter=1000, 77 | random_state=2332, 78 | use_dissimilarity=False) 79 | uoi.fit(X) 80 | assert hasattr(uoi, 'components_') 81 | 82 | 83 | @pytest.mark.fast 84 | def test_UoI_NMF_fit_no_dissimilarity(nmf_setup): 85 | """Tests that the fitting procedure of UoI NMF runs without error, when 86 | the algorithm does not use dissimilarity to choose a rank.""" 87 | X = nmf_setup 88 | 89 | n_boots = 1 90 | ranks = 5 91 | uoi = UoI_NMF(n_boots=n_boots, 92 | ranks=[ranks], 93 | nmf_max_iter=1000, 94 | random_state=2332, 95 | use_dissimilarity=False) 96 | uoi.fit(X) 97 | assert hasattr(uoi, 'components_') 98 | 99 | 100 | @pytest.mark.fast 101 | def test_UoI_NMF_transform(nmf_setup): 102 | """Tests that the transform procedure of UoI NMF runs without error.""" 103 | X = nmf_setup 104 | 105 | n_boots = 1 106 | ranks = 5 107 | uoi = UoI_NMF(n_boots=n_boots, 108 | ranks=[ranks], 109 | nmf_max_iter=1000, 110 | random_state=2332, 111 | use_dissimilarity=False) 112 | X_tfm = uoi.fit_transform(X) 113 | assert hasattr(uoi, 'components_') 114 | assert X_tfm is not None 115 | 116 | 117 | @pytest.mark.fast 118 | def test_UoI_NMF_transform_value_error(nmf_setup): 119 | """Tests that the transform procedure of UoI NMF correctly raises a 120 | ValueError.""" 121 | X = nmf_setup 122 | n_boots = 1 123 | ranks = 5 124 | uoi = UoI_NMF(n_boots=n_boots, 125 | ranks=[ranks], 126 | nmf_max_iter=1000, 127 | random_state=2332, 128 | use_dissimilarity=False) 129 | uoi.fit(X) 130 | 131 | # transform 132 | Y = np.random.normal(size=(X.shape[0], 2 * X.shape[1]))**2 133 | assert_raises(ValueError, uoi.transform, Y) 134 | # inverse transform 135 | W = np.random.normal(size=(X.shape[0], 2 * uoi.components_.shape[0]))**2 136 | assert_raises(ValueError, uoi.inverse_transform, W) 137 | 138 | 139 | @pytest.mark.fast 140 | def test_UoI_NMF_reconstruction_error(nmf_setup): 141 | """Tests that a reconstruction error is calculated when data is 142 | transformed.""" 143 | X = nmf_setup 144 | n_boots = 1 145 | ranks = 5 146 | uoi = UoI_NMF(n_boots=n_boots, 147 | ranks=[ranks], 148 | nmf_max_iter=1000, 149 | random_state=2332, 150 | use_dissimilarity=False) 151 | uoi.fit(X) 152 | X_tfm = uoi.transform(X, reconstruction_err=True) 153 | assert hasattr(uoi, 'components_') 154 | assert hasattr(uoi, 'reconstruction_err_') 155 | assert uoi.reconstruction_err_ is not None 156 | assert X_tfm is not None 157 | 158 | 159 | @pytest.mark.slow 160 | def test_UoI_NMF_correct_number_of_components(): 161 | """Tests that, using the dissimilarity metric, UoI NMF extracts the correct 162 | number of bases.""" 163 | k = 2 164 | n_samples = 1000 165 | n_features = 30 166 | 167 | # create data matrix 168 | W = np.random.randint(low=0, high=3, size=(n_samples, k)) 169 | H = np.random.randint(low=0, high=3, size=(k, n_features)) 170 | noise = np.random.normal(loc=0, scale=0.5, size=(n_samples, n_features))**2 171 | A = np.dot(W, H) + noise 172 | 173 | # fit uoi nmf 174 | uoi = UoI_NMF(n_boots=10, 175 | ranks=[2, 4, 8], 176 | nmf_max_iter=5000, 177 | use_dissimilarity=True) 178 | uoi.fit(A) 179 | 180 | assert uoi.components_.shape[0] == k 181 | 182 | 183 | @pytest.mark.fast 184 | def test_UoI_NMF_dissim_boots_argcheck(nmf_setup): 185 | """Test that UoI_NMF raises ValueError when trying to use 186 | dissimilarity with a single bootstrap.""" 187 | n_boots = 1 188 | ranks = 5 189 | assert_raises(ValueError, UoI_NMF, 190 | n_boots=n_boots, 191 | ranks=[ranks], 192 | nmf_max_iter=1000, 193 | random_state=2332) 194 | -------------------------------------------------------------------------------- /tests/test_scores.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_almost_equal 3 | from numpy.testing import assert_equal 4 | from numpy.testing import assert_raises 5 | 6 | from sklearn.datasets import make_regression, make_classification 7 | from sklearn.model_selection import train_test_split 8 | from sklearn.linear_model import LinearRegression, LogisticRegression 9 | from sklearn.metrics import r2_score, accuracy_score, log_loss 10 | 11 | from pyuoi.utils import log_likelihood_glm 12 | from pyuoi.utils import (AIC, BIC, AICc) 13 | 14 | from pyuoi.linear_model import (UoI_Lasso, UoI_L1Logistic, UoI_Poisson, 15 | UoI_ElasticNet) 16 | 17 | 18 | def test_ll(): 19 | """Tests that the log-likelihood for generalized linear models is correctly 20 | calculated.""" 21 | 22 | # identity 23 | y_true = np.array([1, 2, 3]) 24 | y_pred = np.array([np.e + 1, np.e + 2, np.e + 3]) 25 | ll = log_likelihood_glm('normal', y_true, y_pred) 26 | assert_almost_equal(ll, -4.5) 27 | 28 | # poisson 29 | y_true = np.array([1 / np.log(2.), 1 / np.log(3.), 1 / np.log(4.)]) 30 | y_pred = np.array([2., 3., 4.]) 31 | ll = log_likelihood_glm('poisson', y_true, y_pred) 32 | assert_almost_equal(ll, -2) 33 | 34 | # poisson with all zeros 35 | y_true = np.zeros(3) 36 | y_pred = np.zeros(3) 37 | ll = log_likelihood_glm('poisson', y_true, y_pred) 38 | assert_equal(ll, 0.) 39 | 40 | # poisson with all zeros, but predicted is not all zeros 41 | y_pred = np.zeros(3) 42 | y_true = np.array([0., 0., 1.]) 43 | ll = log_likelihood_glm('poisson', y_true, y_pred) 44 | assert_equal(ll, -np.inf) 45 | 46 | 47 | def test_ll_error(): 48 | """Tests that the log-likelihood function correctly raises an error when an 49 | incorrect string is passed as a parameter.""" 50 | 51 | y_true = np.array([1., 2., 3.]) 52 | y_pred = np.array([3., 4., 5.]) 53 | 54 | assert_raises(ValueError, 55 | log_likelihood_glm, 56 | 'error', 57 | y_true, 58 | y_pred) 59 | 60 | 61 | def test_information_criteria(): 62 | """Tests the information criteria (AIC, AICc, BIC) functions.""" 63 | ll = -1. 64 | n_features = 5 65 | n_samples = 1000 66 | 67 | aic = AIC(ll, n_features) 68 | assert_equal(aic, 12.) 69 | 70 | aicc = AICc(ll, n_features, n_samples) 71 | assert_equal(aicc, 12. + 30. / 497.) 72 | 73 | # additional test: AICc should equal AIC if the number of samples is one 74 | # greater than the number of features 75 | aicc = AICc(ll, n_features, n_features + 1) 76 | assert_equal(aicc, aic) 77 | 78 | bic = BIC(ll, n_features, n_samples) 79 | assert_equal(bic, 5 * np.log(1000) + 2) 80 | 81 | 82 | def test_LinearRegressor_scoring_defaults(): 83 | """Tests that the correct default train/test data are being used 84 | for scoring estimates in UoIAbstractLinearRegressor. Further 85 | tests that the scoring itself is being done correctly.""" 86 | seed = 5 87 | 88 | X, y = make_regression(n_samples=100, n_features=10, n_informative=10, 89 | random_state=seed) 90 | 91 | train_idxs, test_idxs = train_test_split(np.arange(X.shape[0]), 92 | test_size=0.1, 93 | random_state=seed) 94 | X_train = X[train_idxs] 95 | y_train = y[train_idxs] 96 | 97 | X_test = X[test_idxs] 98 | y_test = y[test_idxs] 99 | 100 | fitter = LinearRegression().fit(X_train, y_train) 101 | support = np.ones(X.shape[1]).astype(bool) 102 | # r2 - must use test data 103 | uoi = UoI_Lasso(estimation_score='r2') 104 | assert uoi._estimation_target == 1 105 | 106 | score = uoi._score_predictions('r2', fitter, X, y, support, 107 | (train_idxs, test_idxs)) 108 | assert_equal(r2_score(y_test, fitter.predict(X_test)), score) 109 | 110 | ll = log_likelihood_glm('normal', y_train, 111 | fitter.predict(X_train[:, support])) 112 | # BIC - must use train data 113 | uoi = UoI_Lasso(estimation_score='BIC') 114 | assert uoi._estimation_target == 0 115 | score = -1 * uoi._score_predictions('BIC', fitter, X, y, support, 116 | (train_idxs, test_idxs)) 117 | assert_equal(BIC(ll, *X_train.T.shape), score) 118 | 119 | # AIC - must use train data 120 | uoi = UoI_Lasso(estimation_score='AIC') 121 | assert uoi._estimation_target == 0 122 | 123 | score = -1 * uoi._score_predictions('AIC', fitter, X, y, support, 124 | (train_idxs, test_idxs)) 125 | assert_equal(AIC(ll, X_train.shape[1]), score) 126 | 127 | # AICc - must use train data 128 | uoi = UoI_Lasso(estimation_score='AICc') 129 | assert uoi._estimation_target == 0 130 | 131 | score = -1 * uoi._score_predictions('AICc', fitter, X, y, support, 132 | (train_idxs, test_idxs)) 133 | assert_equal(AICc(ll, *X_train.T.shape), score) 134 | 135 | 136 | def test_GeneralizedLinearRegressor_scoring_defaults(): 137 | """Tests that the correct default train/test data are being used 138 | for scoring estimates in UoIAbstractGeneralizedLinearRegressor. Further 139 | tests that the scoring itself is being done correctly.""" 140 | seed = 5 141 | 142 | X, y = make_classification(n_samples=100, n_features=3, n_informative=3, 143 | n_redundant=0, n_repeated=0, n_classes=3, 144 | n_clusters_per_class=2, random_state=seed) 145 | 146 | train_idxs, test_idxs = train_test_split(np.arange(X.shape[0]), 147 | test_size=0.1, 148 | random_state=seed) 149 | 150 | X_train = X[train_idxs] 151 | y_train = y[train_idxs] 152 | 153 | X_test = X[test_idxs] 154 | y_test = y[test_idxs] 155 | 156 | fitter = LogisticRegression().fit(X_train, y_train) 157 | support = np.ones(X.shape[1]).astype(bool) 158 | 159 | # acc - must use test data 160 | uoi = UoI_L1Logistic(estimation_score='acc') 161 | assert uoi._estimation_target == 1 162 | uoi.classes_ = np.unique(y) 163 | score = uoi._score_predictions('acc', fitter, X, y, support, 164 | (train_idxs, test_idxs)) 165 | assert_equal(accuracy_score(y_test, fitter.predict(X_test)), score) 166 | 167 | # log - must use test data. Note the sign difference 168 | uoi = UoI_L1Logistic(estimation_score='log') 169 | assert uoi._estimation_target == 1 170 | uoi.classes_ = np.unique(y) 171 | score = uoi._score_predictions('log', fitter, X, y, support, 172 | (train_idxs, test_idxs)) 173 | 174 | y_pred_test = fitter.predict_proba(X_test[:, support]) 175 | assert_equal(log_loss(y_test, y_pred_test, labels=np.unique(y)), 176 | -1 * score) 177 | 178 | ll = -log_loss(y_train, fitter.predict_proba(X_train[:, support]), 179 | labels=np.unique(y)) 180 | total_ll = ll * X_train.shape[0] 181 | # BIC - must use train data 182 | uoi = UoI_L1Logistic(estimation_score='BIC') 183 | assert uoi._estimation_target == 0 184 | uoi.classes_ = np.unique(y) 185 | score = -1 * uoi._score_predictions('BIC', fitter, X, y, support, 186 | (train_idxs, test_idxs)) 187 | assert_equal(BIC(total_ll, *X_train.T.shape), score) 188 | 189 | # AIC 190 | uoi = UoI_L1Logistic(estimation_score='AIC') 191 | assert uoi._estimation_target == 0 192 | uoi.classes_ = np.unique(y) 193 | score = -1 * uoi._score_predictions('AIC', fitter, X, y, support, 194 | (train_idxs, test_idxs)) 195 | assert_equal(AIC(total_ll, X_train.shape[1]), score) 196 | 197 | # AICc 198 | uoi = UoI_L1Logistic(estimation_score='AICc') 199 | assert uoi._estimation_target == 0 200 | uoi.classes_ = np.unique(y) 201 | score = -1 * uoi._score_predictions('AICc', fitter, X, y, support, 202 | (train_idxs, test_idxs)) 203 | assert_equal(AICc(total_ll, *X_train.T.shape), score) 204 | 205 | 206 | def test_estimation_target(): 207 | """Verify the ability for the user to set the estimation taget variable""" 208 | 209 | # Assess r2 on train data 210 | uoi = UoI_Lasso(estimation_score='r2', estimation_target='train') 211 | 212 | # train gets converted to the index 0 213 | assert uoi._estimation_target == 0 214 | 215 | # Assess BIC on test data 216 | uoi = UoI_Lasso(estimation_score='BIC', estimation_target='test') 217 | 218 | # Assess r2 on train data 219 | uoi = UoI_ElasticNet(estimation_score='r2', estimation_target='train') 220 | 221 | # train gets converted to the index 0 222 | assert uoi._estimation_target == 0 223 | 224 | # Assess BIC on test data 225 | uoi = UoI_ElasticNet(estimation_score='BIC', estimation_target='test') 226 | 227 | assert uoi._estimation_target == 1 228 | 229 | uoi = UoI_L1Logistic(estimation_score='acc', estimation_target='train') 230 | 231 | assert uoi._estimation_target == 0 232 | 233 | uoi = UoI_L1Logistic(estimation_score='BIC', estimation_target='test') 234 | 235 | assert uoi._estimation_target == 1 236 | 237 | uoi = UoI_Poisson(estimation_score='acc', estimation_target='train') 238 | 239 | assert uoi._estimation_target == 0 240 | 241 | uoi = UoI_Poisson(estimation_score='BIC', estimation_target='test') 242 | 243 | assert uoi._estimation_target == 1 244 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | from numpy.testing import assert_array_equal 5 | from numpy.testing import assert_raises 6 | 7 | from pyuoi.linear_model.utils import stability_selection_to_threshold 8 | from pyuoi.linear_model.utils import intersection 9 | 10 | from pyuoi.utils import check_logger 11 | 12 | import logging 13 | try: 14 | from mpi4py import MPI 15 | except ImportError: 16 | MPI = None 17 | 18 | 19 | def test_stability_selection_to_threshold_int(): 20 | """Tests whether stability_selection_to_threshold correctly outputs the 21 | correct threshold when provided a single integer.""" 22 | 23 | n_boots_sel = 48 24 | # stability selection is a single integer 25 | test_int = 36 26 | selection_thresholds = stability_selection_to_threshold( 27 | test_int, n_boots_sel) 28 | 29 | assert_array_equal(selection_thresholds, np.array([36])) 30 | 31 | 32 | def test_stability_selection_to_threshold_float(): 33 | """Tests whether stability_selection_to_threshold correctly outputs the 34 | correct threshold when provided a single float.""" 35 | 36 | n_boots_sel = 48 37 | # stability selection is a single float 38 | test_float = 0.5 39 | selection_thresholds = stability_selection_to_threshold( 40 | test_float, n_boots_sel) 41 | 42 | assert_array_equal(selection_thresholds, np.array([24])) 43 | 44 | 45 | def test_stability_selection_to_threshold_ints(): 46 | """Tests whether stability_selection_to_threshold correctly outputs the 47 | correct threshold when provided a list of ints.""" 48 | 49 | n_boots_sel = 48 50 | # stability selection is a list of ints 51 | test_ints = [24, 28, 33, 38, 43, 48] 52 | selection_thresholds = stability_selection_to_threshold( 53 | test_ints, n_boots_sel) 54 | 55 | assert_array_equal( 56 | selection_thresholds, 57 | np.array([24, 28, 33, 38, 43, 48])) 58 | 59 | 60 | def test_stability_selection_to_threshold_floats(): 61 | """Tests whether stability_selection_to_threshold correctly outputs the 62 | correct threshold when provided a list of floats.""" 63 | n_boots_sel = 48 64 | # stability selection is a list of floats 65 | test_floats = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0] 66 | selection_thresholds = stability_selection_to_threshold( 67 | test_floats, n_boots_sel) 68 | 69 | assert_array_equal( 70 | selection_thresholds, 71 | np.array([24, 28, 33, 38, 43, 48])) 72 | 73 | 74 | def test_stability_selection_to_threshold_ints_np(): 75 | """Tests whether stability_selection_to_threshold correctly outputs the 76 | correct threshold when provided a numpy array of ints.""" 77 | 78 | n_boots_sel = 48 79 | # stability selection is a list of ints 80 | test_ints_np = np.array([24, 28, 33, 38, 43, 48]) 81 | selection_thresholds = stability_selection_to_threshold( 82 | test_ints_np, n_boots_sel) 83 | 84 | assert_array_equal( 85 | selection_thresholds, 86 | np.array([24, 28, 33, 38, 43, 48])) 87 | 88 | 89 | def test_stability_selection_to_threshold_floats_np(): 90 | """Tests whether stability_selection_to_threshold correctly outputs the 91 | correct threshold when provided a numpy array of ints.""" 92 | 93 | n_boots_sel = 48 94 | # stability selection is a list of floats 95 | test_floats_np = np.array([0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) 96 | selection_thresholds = stability_selection_to_threshold( 97 | test_floats_np, n_boots_sel) 98 | 99 | assert_array_equal( 100 | selection_thresholds, 101 | np.array([24, 28, 33, 38, 43, 48])) 102 | 103 | 104 | def test_stability_selection_to_threshold_exceeds_n_bootstraps(): 105 | """Tests whether stability_selection_to_threshold correctly outputs an 106 | error when provided an input that results in bootstraps exceeding 107 | n_boots_sel.""" 108 | 109 | n_boots_sel = 48 110 | # stability selection is a list of floats 111 | test_floats = np.array([0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1]) 112 | test_ints = np.array([24, 28, 33, 38, 43, 48, 52]) 113 | 114 | assert_raises( 115 | ValueError, 116 | stability_selection_to_threshold, 117 | test_ints, 118 | n_boots_sel) 119 | 120 | assert_raises( 121 | ValueError, 122 | stability_selection_to_threshold, 123 | test_floats, 124 | n_boots_sel) 125 | 126 | 127 | def test_stability_selection_to_threshold_one_bootstrap(): 128 | """Tests whether stability_selection_to_threshold correctly handles the 129 | edge case where one bootstrap is requested.""" 130 | 131 | n_boots_sel = 1 132 | # stability selection can only be one value 133 | threshold = 1 134 | 135 | selection_thresholds = stability_selection_to_threshold( 136 | n_boots_sel, 137 | threshold) 138 | 139 | assert_array_equal( 140 | selection_thresholds, 141 | np.array([1])) 142 | 143 | 144 | def test_stability_selection_to_threshold_input_value_error(): 145 | """Tests whether stability_selection_to_threshold properly raises an error 146 | when it receives objects without ints or floats.""" 147 | n_boots_sel = 48 148 | stability_selection_list = [0, 1, 'a'] 149 | stability_selection_np_array = np.array([0, 1, 'a']) 150 | stability_selection_dict = {0: 'a', 1: 'b'} 151 | 152 | assert_raises( 153 | ValueError, 154 | stability_selection_to_threshold, 155 | stability_selection_list, 156 | n_boots_sel) 157 | 158 | assert_raises( 159 | ValueError, 160 | stability_selection_to_threshold, 161 | stability_selection_np_array, 162 | n_boots_sel) 163 | 164 | assert_raises( 165 | ValueError, 166 | stability_selection_to_threshold, 167 | stability_selection_dict, 168 | n_boots_sel) 169 | 170 | 171 | def test_stability_selection_reject_negative_numbers(): 172 | """Tests whether stability_selection_to_threshold correctly rejects 173 | negative thresholds.""" 174 | 175 | n_boots_sel = 48 176 | 177 | # stability selection is a list of floats 178 | test_negative = -1 * np.array([24, 28, 33, 38, 43, 48, 52]) 179 | 180 | assert_raises( 181 | ValueError, 182 | stability_selection_to_threshold, 183 | test_negative, 184 | n_boots_sel) 185 | 186 | 187 | def test_intersection(): 188 | """Tests whether intersection correctly performs a hard intersection.""" 189 | 190 | coefs = np.array([ 191 | [[2, 1, -1, 0, 4], 192 | [4, 0, 2, -1, 5], 193 | [1, 2, 3, 4, 5]], 194 | [[2, 0, 0, 0, 0], 195 | [3, 1, 1, 0, 3], 196 | [6, 7, 8, 9, 10]], 197 | [[2, 0, 0, 0, 0], 198 | [2, -1, 3, 0, 2], 199 | [2, 4, 6, 8, 9]]]) 200 | 201 | true_intersection = np.array([ 202 | [True, False, False, False, False], 203 | [True, False, True, False, True], 204 | [True, True, True, True, True]]) 205 | 206 | selection_thresholds = np.array([3]) 207 | estimated_intersection = intersection( 208 | coefs=coefs, 209 | selection_thresholds=selection_thresholds) 210 | 211 | # we sort the supports since they might not be in the same order 212 | assert_array_equal( 213 | np.sort(true_intersection, axis=0), 214 | np.sort(estimated_intersection, axis=0)) 215 | 216 | 217 | def test_intersection_with_stability_selection_one_threshold(): 218 | """Tests whether intersection correctly performs a soft intersection.""" 219 | 220 | coefs = np.array([ 221 | [[2, 1, -1, 0, 4], 222 | [4, 0, 2, -1, 5], 223 | [1, 2, 3, 4, 5]], 224 | [[2, 0, 0, 0, 0], 225 | [3, 1, 1, 0, 3], 226 | [6, 7, 8, 9, 10]], 227 | [[2, 0, 0, 0, 0], 228 | [2, -1, 3, 0, 2], 229 | [2, 4, 6, 8, 9]]]) 230 | 231 | true_intersection = np.array([ 232 | [True, False, False, False, False], 233 | [True, True, True, False, True], 234 | [True, True, True, True, True]]) 235 | 236 | selection_thresholds = np.array([2]) 237 | estimated_intersection = intersection( 238 | coefs=coefs, 239 | selection_thresholds=selection_thresholds) 240 | 241 | # we sort the supports since they might not be in the same order 242 | assert_array_equal( 243 | np.sort(true_intersection, axis=0), 244 | np.sort(estimated_intersection, axis=0)) 245 | 246 | 247 | def test_intersection_with_stability_selection_multiple_thresholds(): 248 | """Tests whether intersection correctly performs an intersection with 249 | multiple thresholds. This test also covers the case when there are 250 | duplicates.""" 251 | 252 | coefs = np.array([ 253 | [[2, 1, -1, 0, 4], 254 | [4, 0, 2, -1, 5], 255 | [1, 2, 3, 4, 5]], 256 | [[2, 0, 0, 0, 0], 257 | [3, 1, 1, 0, 3], 258 | [6, 7, 8, 9, 10]], 259 | [[2, 0, 0, 0, 0], 260 | [2, -1, 3, 0, 2], 261 | [2, 4, 6, 8, 9]]]) 262 | 263 | true_intersection = np.array([ 264 | [True, False, False, False, False], 265 | [True, True, True, False, True], 266 | [True, True, True, True, True], 267 | [True, False, True, False, True]]) 268 | 269 | selection_thresholds = np.array([2, 3]) 270 | estimated_intersection = intersection( 271 | coefs=coefs, 272 | selection_thresholds=selection_thresholds) 273 | 274 | # we sort the supports since they might not be in the same order 275 | assert_array_equal( 276 | np.sort(true_intersection, axis=0), 277 | np.sort(estimated_intersection, axis=0)) 278 | 279 | 280 | def test_intersection_no_thresholds(): 281 | """Tests that the intersection method correctly calculates the intersection 282 | using the number of bootstraps as the default selection threshold.""" 283 | 284 | coefs = np.array([ 285 | [[2, 1, -1, 0, 4], 286 | [4, 0, 2, -1, 5], 287 | [1, 2, 3, 4, 5]], 288 | [[2, 0, 0, 0, 0], 289 | [3, 1, 1, 0, 3], 290 | [6, 7, 8, 9, 10]], 291 | [[2, 0, 0, 0, 0], 292 | [2, -1, 3, 0, 2], 293 | [2, 4, 6, 8, 9]]]) 294 | 295 | true_intersection = np.array([ 296 | [True, False, False, False, False], 297 | [True, True, True, True, True], 298 | [True, False, True, False, True]]) 299 | 300 | estimated_intersection = intersection( 301 | coefs=coefs, 302 | selection_thresholds=None) 303 | 304 | # we sort the supports since they might not be in the same order 305 | assert_array_equal( 306 | np.sort(true_intersection, axis=0), 307 | np.sort(estimated_intersection, axis=0)) 308 | 309 | 310 | @pytest.mark.fast 311 | def test_check_logger(): 312 | """Test that check_logger builds logger correctly""" 313 | ret = check_logger(None, name="test_check_logger") 314 | assert ret is not None 315 | assert ret.name == 'test_check_logger' 316 | 317 | 318 | @pytest.mark.fast 319 | @pytest.mark.skipif(MPI is None, reason='MPI not installed.') 320 | def test_check_logger_mpi(): 321 | """Test that passing in a MPI communicatorj object works with 322 | check_logger""" 323 | comm = MPI.COMM_WORLD 324 | ret = check_logger(None, comm=comm) 325 | assert ret is not None 326 | 327 | 328 | @pytest.mark.fast 329 | def test_check_logger_exists(): 330 | """Test that logger returns the argued logger when it gets passed in""" 331 | logger = logging.getLogger() 332 | ret = check_logger(logger) 333 | assert ret is logger 334 | --------------------------------------------------------------------------------