├── tests
├── __init__.py
├── lme
│ ├── __init__.py
│ ├── test_select_covariates.py
│ ├── test_LMEProblem.py
│ ├── test_LMEModels.py
│ └── test_LMEOracle.py
├── linear
│ ├── __init__.py
│ └── test_LinearModels.py
└── test_core
│ ├── __init__.py
│ ├── test_Regularizers.py
│ └── test_Priors.py
├── src
└── pysr3
│ ├── __init__.py
│ ├── linear
│ ├── __init__.py
│ ├── problems.py
│ └── oracles.py
│ ├── lme
│ ├── __init__.py
│ ├── model_selectors.py
│ └── priors.py
│ ├── __about__.py
│ ├── logger.py
│ ├── priors.py
│ ├── solvers.py
│ └── regularizers.py
├── images
├── summary_improved.png
└── summary_picture.png
├── README_files
├── README_16_0.png
└── README_21_0.png
├── docs
├── sr3_mixed_intuition.png
├── regenerate_docs.sh
├── Makefile
├── index.rst
├── make.bat
├── community_guidelines.ipynb
└── conf.py
├── requirements.txt
├── .coveragerc
├── MANIFEST.in
├── .idea
├── vcs.xml
├── misc.xml
├── modules.xml
├── dataSources.xml
└── pysr3.iml
├── .gitignore
├── readthedocs.yml
├── .github
└── workflows
│ ├── joss_pdf.yml
│ ├── update-readme.yml
│ ├── testing_and_coverage.yml
│ └── deploy-docs.yml
├── paper
├── readme.md
└── synthetic_data_4_1
│ └── problem_90.csv
├── setup.py
├── CODE_OF_CONDUCT.md
├── paper.md
├── paper.bib
└── README.md
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/pysr3/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/lme/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/pysr3/linear/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/pysr3/lme/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/linear/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/test_core/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/images/summary_improved.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aksholokhov/pysr3/HEAD/images/summary_improved.png
--------------------------------------------------------------------------------
/images/summary_picture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aksholokhov/pysr3/HEAD/images/summary_picture.png
--------------------------------------------------------------------------------
/README_files/README_16_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aksholokhov/pysr3/HEAD/README_files/README_16_0.png
--------------------------------------------------------------------------------
/README_files/README_21_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aksholokhov/pysr3/HEAD/README_files/README_21_0.png
--------------------------------------------------------------------------------
/docs/sr3_mixed_intuition.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aksholokhov/pysr3/HEAD/docs/sr3_mixed_intuition.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.21.1
2 | pandas>=1.3.1
3 | scipy>=1.7.1
4 | PyYAML>=5.4.1
5 | scikit_learn>=0.24.2
6 | ipython
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 | setup.py
4 | docs/*
5 | build/*
6 | tests/*
7 | */__init__.py
8 | */__about__.py
9 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE
2 | include README.md
3 | include requirements.txt
4 |
5 | recursive-include docs *
6 | prune docs/_build
7 |
8 | recursive-include src/pysr3 *.py
9 | recursive-include tests *.py
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/docs/regenerate_docs.sh:
--------------------------------------------------------------------------------
1 | cd ..
2 | pip uninstall -y pysr3
3 | rm -rf -y dist/*
4 | python setup.py sdist bdist_wheel
5 | pip install sphinx_rtd_theme
6 | pip install dist/pysr3-*.tar.gz
7 | cd docs || exit
8 | make clean
9 | rm -rf source/*
10 | sphinx-apidoc --separate -f -o source/ ../src/pysr3
11 | make html
12 |
13 |
--------------------------------------------------------------------------------
/tests/test_core/test_Regularizers.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 |
4 | class TestRegularizers(unittest.TestCase):
5 |
6 | def test_l0(self):
7 | pass
8 |
9 | def test_l1(self):
10 | pass
11 |
12 | def test_cad(self):
13 | pass
14 |
15 | def test_scad(self):
16 | pass
17 |
--------------------------------------------------------------------------------
/tests/test_core/test_Priors.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from pysr3.priors import GaussianPrior
4 |
5 |
6 | class TestPriors(unittest.TestCase):
7 |
8 | def test_gaussian_prior(self):
9 | prior = GaussianPrior(params={"intercept": (0, 2)})
10 | prior.instantiate(problem_columns=["intercept"])
11 | self.assertEqual(prior.loss(2), 1)
12 | self.assertEqual(prior.gradient(2)[0], 1)
13 | self.assertEqual(prior.hessian(2)[0], 1 / 2)
14 | prior.forget()
15 | self.assertIsNone(prior.weights)
16 |
--------------------------------------------------------------------------------
/.idea/dataSources.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | sqlite.xerial
6 | true
7 | org.sqlite.JDBC
8 | jdbc:sqlite:$PROJECT_DIR$/.coverage
9 | $ProjectFileDir$
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Caches
2 | .DS_*
3 | *pycache*
4 | *dask*
5 | *.egg-info
6 | dist
7 | build
8 | .ipynb_checkpoints
9 |
10 | # Idea files (see https://intellij-support.jetbrains.com/hc/en-us/articles/206544839-How-to-manage-projects-under-Version-Control-Systems)
11 | .idea/workspace.xml
12 | .idea/tasks.xml
13 | .idea/inspectionProfiles
14 | .idea/*
15 | # environment
16 | skmixed_env
17 |
18 | # coverage
19 | .coverage
20 |
21 | # docs
22 | docs/generated
23 | docs/_build
24 |
25 | # Specific files extensions
26 | *.csv
27 | *.pdf
28 | #*.png
29 | *.jpeg
30 | *.jpg
31 |
32 | # outputs in examples
33 | examples/evidence score/backups
34 | examples/*/figures
35 |
--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Build documentation in the docs/ directory with Sphinx
9 | sphinx:
10 | configuration: docs/conf.py
11 |
12 | # Build documentation with MkDocs
13 | #mkdocs:
14 | # configuration: mkdocs.yml
15 |
16 | # Optionally build your docs in additional formats such as PDF and ePub
17 | formats: all
18 |
19 | # Optionally set the version of Python and requirements required to build your docs
20 | python:
21 | version: 3.7
22 | install:
23 | - method: setuptools
24 | path: .
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to PySR3 documentation!
2 | ===================================
3 |
4 | SR3 is a relaxation method designed for accurate feature selection. It
5 | currently supports:
6 |
7 | - Linear Models (L0, LASSO, A-LASSO, CAD, SCAD)
8 | - Linear Mixed-Effect Models (L0, LASSO, A-LASSO, CAD, SCAD)
9 |
10 | .. toctree::
11 | :caption: Getting Started
12 |
13 | Quickstart
14 | Models Overview
15 |
16 | .. toctree::
17 | :maxdepth: 2
18 | :caption: Developers
19 |
20 | Community Guidelines
21 | Modules
22 |
23 |
24 | Indices and tables
25 | ------------------
26 |
27 | * :ref:`genindex`
28 | * :ref:`modindex`
29 | * :ref:`search`
30 |
--------------------------------------------------------------------------------
/.github/workflows/joss_pdf.yml:
--------------------------------------------------------------------------------
1 | name: JOSS Paper Draft Generation
2 |
3 | on: [ push ]
4 |
5 | jobs:
6 | paper:
7 | runs-on: ubuntu-latest
8 | name: Paper Draft
9 | steps:
10 | - name: Checkout
11 | uses: actions/checkout@v2
12 | - name: Build draft PDF
13 | uses: openjournals/openjournals-draft-action@master
14 | with:
15 | journal: joss
16 | # This should be the path to the paper within your repo.
17 | paper-path: paper.md
18 | - name: Upload
19 | uses: actions/upload-artifact@v1
20 | with:
21 | name: paper
22 | # This is the output path where Pandoc will write the compiled
23 | # PDF. Note, this should be the same directory as the input
24 | # paper.md
25 | path: paper.pdf
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/.idea/pysr3.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/paper/readme.md:
--------------------------------------------------------------------------------
1 | # Datasets for [Sholokhov et. al. 2022 "A Relaxation Approach to Feature Selection for Linear Mixed Effects Models"](https://arxiv.org/abs/2205.06925?context=stat)
2 |
3 | ## Reproducibility Guide
4 | The detailed reproducibility guide for all synthetic data, plots, and tables is located [here](https://github.com/aksholokhov/msr3-paper). This folder only contains synthetic datasets that we used in Chapter 4 of our paper.
5 |
6 | ## Chapter 4.1: Experiments on synthetic data
7 | The folder `synthetic_data` contains 100 CSV tables. The rows are observations (objects). Each table has the following columns:
8 |
9 | * `group` -- which group does this object belong to
10 | * `target` -- target varable a.k.a. observations a.k.a. `y`
11 | * `variance` -- variance of observation noise
12 | * `fixed`, `random`, or `fixed+random` -- features a.k.a. covariates.
13 |
14 | The names of feature columns indicate whether they factor into the model as `fixed` effects, `random` effects, or both (`fixed+random`).
15 |
--------------------------------------------------------------------------------
/.github/workflows/update-readme.yml:
--------------------------------------------------------------------------------
1 | name: Convert Jupyter README
2 |
3 | on:
4 | push:
5 | branches:
6 | - sr3
7 |
8 | jobs:
9 | release:
10 | if: startsWith(github.event.head_commit.message, 'Update README')
11 | name: Build
12 | runs-on: ubuntu-latest
13 | steps:
14 |
15 | - uses: actions/checkout@v1
16 | - name: Set up Python 3.7
17 | uses: actions/setup-python@v1
18 | with:
19 | python-version: 3.7
20 |
21 | - name: Install dependencies & Convert README.ipynb
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install nbconvert nbformat
25 | jupyter nbconvert --to markdown --output ../README.md docs/quickstart.ipynb
26 | - name: Commit files
27 | run: |
28 | git config --local user.email "action@github.com"
29 | git config --local user.name "GitHub Action"
30 | git add README.md
31 | git commit -m "Convert README.ipynb to README.md" -a
32 | - name: Push changes
33 | if: success()
34 | uses: ad-m/github-push-action@master
35 | with:
36 | branch: sr3
37 | github_token: ${{ secrets.ACCESS_TOKEN }}
--------------------------------------------------------------------------------
/.github/workflows/testing_and_coverage.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Testing and Coverage
5 | on: [ push ]
6 | jobs:
7 | run:
8 | runs-on: ${{ matrix.os }}
9 | strategy:
10 | matrix:
11 | os: [ ubuntu-latest, macos-latest, windows-latest ]
12 | env:
13 | OS: ${{ matrix.os }}
14 | PYTHON: '3.7'
15 | steps:
16 | - uses: actions/checkout@master
17 | - name: Setup Python
18 | uses: actions/setup-python@master
19 | with:
20 | python-version: 3.7
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install -r requirements.txt
25 | python setup.py develop
26 | - name: Generate coverage report
27 | run: |
28 | pip install pytest
29 | pip install pytest-cov
30 | pytest --cov=./ --cov-report=xml
31 | - name: Upload coverage to Codecov
32 | uses: codecov/codecov-action@v2
33 | with:
34 | env_vars: OS,PYTHON
35 | fail_ci_if_error: true
36 | files: ./coverage.xml
37 | flags: unittests
38 | name: codecov-umbrella
39 | verbose: true
40 | directory: ./coverage/reports/
41 | path_to_write_report: ./coverage/codecov_report.txt
42 |
43 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 |
3 | from setuptools import setup, find_packages
4 |
5 | if __name__ == "__main__":
6 | base_dir = Path(__file__).parent
7 | src_dir = base_dir / 'src'
8 |
9 | about = {}
10 | with (src_dir / "pysr3" / "__about__.py").open() as f:
11 | exec(f.read(), about)
12 |
13 | install_requirements = [t.strip() for t in open("requirements.txt", 'r').readlines()]
14 |
15 | test_requirements = [
16 | 'pytest',
17 | ]
18 |
19 | doc_requirements = [
20 | 'sphinx',
21 | 'sphinx-rtd-theme'
22 | 'nbconvert',
23 | 'nbformat'
24 | ]
25 |
26 | setup(
27 | name=about['__title__'],
28 | version=about['__version__'],
29 |
30 | description=about['__summary__'],
31 | long_description=about['__long_description__'],
32 | long_description_content_type="text/markdown",
33 | license=about['__license__'],
34 | url=about["__uri__"],
35 |
36 | author=about["__author__"],
37 | author_email=about["__email__"],
38 |
39 | package_dir={'': 'src'},
40 | packages=find_packages(where='src'),
41 |
42 | python_requires='>=3.8',
43 | install_requires=install_requirements,
44 | tests_require=test_requirements,
45 | extras_require={
46 | 'docs': doc_requirements,
47 | 'test': test_requirements,
48 | 'dev': [doc_requirements, test_requirements]
49 | },
50 | zip_safe=False,
51 | )
52 |
--------------------------------------------------------------------------------
/.github/workflows/deploy-docs.yml:
--------------------------------------------------------------------------------
1 | name: Build & Publish Docs with Sphinx
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 |
8 | permissions:
9 | contents: write
10 |
11 | jobs:
12 | release:
13 | name: Build
14 | runs-on: ubuntu-latest
15 | steps:
16 | - uses: actions/checkout@v1
17 | - uses: r-lib/actions/setup-pandoc@v1
18 | - name: Set up Python 3.8
19 | uses: actions/setup-python@v1
20 | with:
21 | python-version: 3.8
22 | - name: Install dependencies
23 | run: |
24 | python -m pip install --upgrade pip
25 | python setup.py develop
26 | pip install jupyter nbconvert nbformat sphinx sphinx-rtd-theme
27 | - name: Convert Jupytet Notebooks to Documentation Pages
28 | run: |
29 | cd docs
30 | jupyter nbconvert --to rst quickstart.ipynb
31 | jupyter nbconvert --to rst models_overview.ipynb
32 | jupyter nbconvert --to rst community_guidelines.ipynb
33 | cd ..
34 | - name: Generate API docs & Build sphinx documentation
35 | run: |
36 | cd docs
37 | sphinx-apidoc --separate -f -o source/ ../src/pysr3
38 | make clean
39 | make html
40 | cd ..
41 | - name: Deploy 🚀
42 | uses: JamesIves/github-pages-deploy-action@v4
43 | with:
44 | token: ${{ secrets.ACCESS_TOKEN }}
45 | branch: gh-pages # The branch the action should deploy to.
46 | folder: docs/_build/html # The folder the action should deploy.
--------------------------------------------------------------------------------
/src/pysr3/__about__.py:
--------------------------------------------------------------------------------
1 | # skmixed: Library for Feature Selection in Linear Mixed-Effect Models
2 | # Copyright (C) 2020 Aleksei Sholokhov
3 | #
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU General Public License as published by
6 | # the Free Software Foundation, either version 3 of the License, or
7 | # (at your option) any later version.
8 | #
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program. If not, see .
16 |
17 | __all__ = [
18 | "__title__", "__summary__", "__uri__", "__version__", "__author__",
19 | "__email__", "__license__", "__copyright__"
20 | ]
21 |
22 | __title__ = "PySR3"
23 | __summary__ = "Python Library for Sparse Relaxed Regularized Regression."
24 | __long_description__ = ("This package implements classic and novel feature selection algorithms " +
25 | " for linear and mixed-effect models." +
26 | " It supports many widely used regularization techniques, like LASSO, A-LASSO, CAD and SCAD." +
27 | " See README.md for details and examples.")
28 | __uri__ = "https://github.com/aksholokhov/pysr3"
29 | __classifiers__ = [
30 | "Programming Language :: Python :: 3",
31 | 'License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)',
32 | "Operating System :: OS Independent",
33 | ],
34 |
35 | __version__ = "0.3.5"
36 |
37 | __author__ = "Aleksei Sholokhov"
38 | __email__ = "aksh@uw.edu"
39 |
40 | __license__ = "GNU GPLv3"
41 | __copyright__ = f"Copyright 2020-2021 {__author__}"
42 |
--------------------------------------------------------------------------------
/src/pysr3/logger.py:
--------------------------------------------------------------------------------
1 | from typing import Set
2 |
3 |
4 | class Logger:
5 | """
6 | Helper class for logging the progress of iterative methods.
7 | """
8 | def __init__(self, list_of_keys: Set = ()):
9 | """
10 | Initializes the logger
11 |
12 | Parameters
13 | ----------
14 | list_of_keys: set[str]
15 | list of keys for the logger
16 | """
17 | self.keys = list_of_keys
18 | self.dict = {key: [] for key in list_of_keys}
19 |
20 | def log(self, parameters):
21 | """
22 | Records all values of parameters which keys are already in the logger.
23 | Ignores the rest.
24 |
25 | Parameters
26 | ----------
27 | parameters: dict
28 | dictionary with parameters to record.
29 |
30 | Returns
31 | -------
32 | self
33 | """
34 | for key in self.keys:
35 | if type(self.dict[key]) == list and (key in parameters):
36 | self.dict[key].append(parameters.get(key, None))
37 | return self
38 |
39 | def add(self, key, value):
40 | """
41 | Adds a key-value pair to the logger
42 |
43 | Parameters
44 | ----------
45 | key: str
46 | key
47 | value: Any
48 | value for this key
49 |
50 | Returns
51 | -------
52 |
53 | """
54 | self.dict[key] = value
55 | if key not in self.keys:
56 | self.keys = self.keys + tuple([key])
57 | return self
58 |
59 | def append(self, key, value):
60 | """
61 | Adds value to what's already stored in the logger. If no such key then it starts with 0.
62 |
63 | Parameters
64 | ----------
65 | key: str
66 | key
67 | value: Any additive
68 | value to add
69 |
70 | Returns
71 | -------
72 | self
73 | """
74 | self.dict[key] += value
75 | return self
76 |
77 | def get(self, key):
78 | """
79 | Returns the value by key
80 |
81 | Parameters
82 | ----------
83 | key: str
84 | key
85 |
86 | Returns
87 | -------
88 | value for this key
89 | """
90 | return self.dict[key]
91 |
--------------------------------------------------------------------------------
/docs/community_guidelines.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "collapsed": true
7 | },
8 | "source": [
9 | "# Community Guidelines\n",
10 | "\n",
11 | "We encourage the broader community to contribute to `pysr3`! Please submit a pull request on GitHub\n",
12 | "if you fixed a bug or developed an extension. If you experience any issues please open a new issue on the Issues page, and we will do our best to help.\n",
13 | "\n",
14 | "As a community, we follow the list of rules below:\n",
15 | "\n",
16 | "1. We adhere to [sklearn's interfaces and standards](https://scikit-learn.org/stable/developers/develop.html)\n",
17 | " * Please use `sklearn.utils.estimator_checks.check_estimator` to ensure that your contributions don't break this compatibility.\n",
18 | "2. For comments and docstrings, we use [numpy dostring standards](https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard)\n",
19 | "3. For version numbers, we use [Semantic Versioning](https://semver.org/).\n",
20 | "4. We pledge to [Contributor Covenant](https://www.contributor-covenant.org/version/2/0/code_of_conduct/) code of conduct.\n",
21 | "\n",
22 | "## Developing `pysr3`\n",
23 | "`pysr3` does not require any special hardware and can be developed on your personal computer.\n",
24 | "To start, install `pysr3` in the developer mode:\n",
25 | "\n",
26 | "```bash\n",
27 | "git clone https://github.com/aksholokhov/pysr3.git\n",
28 | "cd pysr3\n",
29 | "python setup.py develop\n",
30 | "```\n",
31 | "\n",
32 | "## Testing `pysr3`\n",
33 | "To test the installation, invoke the tests with `pytest`:\n",
34 | "\n",
35 | "```bash\n",
36 | "pytest .\n",
37 | "```"
38 | ]
39 | }
40 | ],
41 | "metadata": {
42 | "kernelspec": {
43 | "display_name": "Python 3",
44 | "language": "python",
45 | "name": "python3"
46 | },
47 | "language_info": {
48 | "codemirror_mode": {
49 | "name": "ipython",
50 | "version": 2
51 | },
52 | "file_extension": ".py",
53 | "mimetype": "text/x-python",
54 | "name": "python",
55 | "nbconvert_exporter": "python",
56 | "pygments_lexer": "ipython2",
57 | "version": "2.7.6"
58 | }
59 | },
60 | "nbformat": 4,
61 | "nbformat_minor": 0
62 | }
63 |
--------------------------------------------------------------------------------
/tests/lme/test_select_covariates.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import numpy as np
4 | import pandas as pd
5 | import yaml
6 | from pathlib import Path
7 |
8 | from pysr3.lme.problems import LMEProblem, FIXED_RANDOM
9 | from pysr3.lme.model_selectors import select_covariates, MODELS_NAMES
10 |
11 |
12 | class TestSelectCovariates(unittest.TestCase):
13 |
14 | def test_feature_selector(self):
15 |
16 | trials = 1
17 |
18 | problem_parameters = {
19 | "groups_sizes": [20, 15, 10, 50],
20 | "features_labels": [FIXED_RANDOM] * 3,
21 | "fit_fixed_intercept": False,
22 | "fit_random_intercept": False,
23 | "features_covariance_matrix": np.array([
24 | [1, 0, 0],
25 | [0, 1, 0.7],
26 | [0, 0.7, 1]
27 | ]),
28 | "obs_var": 0.1,
29 | }
30 |
31 | for i in range(trials):
32 | with self.subTest(i=i):
33 | for model_name in MODELS_NAMES:
34 | with self.subTest(model_name=model_name):
35 | true_beta = true_gamma = np.array([1, 0, 1])
36 | problem, _ = LMEProblem.generate(**problem_parameters, seed=i,
37 | beta=true_beta, gamma=true_gamma)
38 | x, y, labels = problem.to_x_y()
39 | data = pd.DataFrame(x, columns=["group", "x1", "x2", "x3", "variance"])
40 | # TODO: figure it out
41 | data["se"] = np.sqrt(data["variance"])
42 | data["target"] = y
43 | select_covariates(df=data,
44 | covs={
45 | "fixed_effects": ["x1", "x2", "x3"],
46 | "random_effects": ["x1", "x2", "x3"]
47 | },
48 | target="target",
49 | variance="se",
50 | group="group",
51 | model_name=model_name
52 | )
53 | with open('sel_covs.yaml', 'r') as f:
54 | answers = yaml.safe_load(f)
55 | self.assertEqual(tuple(answers['fixed_effects']), ("x1", "x3"))
56 | self.assertEqual(tuple(answers['random_effects']), ("x1", "x3"))
57 | if Path('sel_covs.yaml').exists():
58 | Path('sel_covs.yaml').unlink()
59 |
60 | return None
61 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 |
16 | from pysr3.__about__ import __author__, __version__, __title__
17 |
18 | sys.path.insert(0, os.path.abspath('.'))
19 |
20 | # -- Project information -----------------------------------------------------
21 |
22 | project = __title__
23 | copyright = f'2021, {__author__}'
24 | author = __author__
25 |
26 | # The full version, including alpha/beta/rc tags
27 | release = __version__
28 |
29 | # -- General configuration ------------------------------------------------
30 | master_doc = 'index'
31 | autoclass_content = "both" # include both class docstring and __init__
32 | autodoc_default_flags = [
33 | # Make sure that any autodoc declarations show the right members
34 | "members",
35 | "inherited-members",
36 | "private-members",
37 | "show-inheritance",
38 | ]
39 |
40 | # Add any Sphinx extension module names here, as strings. They can be
41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
42 | # ones.
43 | extensions = ['sphinx.ext.autodoc',
44 | 'sphinx.ext.coverage',
45 | 'sphinx.ext.napoleon',
46 | 'sphinx.ext.autosummary',
47 | 'IPython.sphinxext.ipython_console_highlighting'
48 | ]
49 |
50 | autosummary_generate = True # Make _autosummary files and include them
51 | napoleon_numpy_docstring = True # Use NumPy style
52 | napoleon_use_rtype = False # More legible
53 | napoleon_google_docstring = False
54 | napoleon_use_param = False
55 | napoleon_use_ivar = True
56 |
57 | # Add any paths that contain templates here, relative to this directory.
58 | templates_path = ['_templates']
59 |
60 | # Static path
61 | html_static_path = []
62 |
63 | # List of patterns, relative to source directory, that match files and
64 | # directories to ignore when looking for source files.
65 | # This pattern also affects html_static_path and html_extra_path.
66 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
67 |
68 | # -- Options for HTML output -------------------------------------------------
69 |
70 | # The theme to use for HTML and HTML Help pages. See the documentation for
71 | # a list of builtin themes.
72 | #
73 | html_theme = 'sphinx_rtd_theme'
74 | # html_theme = 'sphinxbootstrap4theme'
75 | # Add any paths that contain custom static files (such as style sheets) here,
76 | # relative to this directory. They are copied after the builtin static files,
77 | # so a file named "default.css" will overwrite the builtin "default.css".
78 | # html_static_path = ['_static']
79 |
--------------------------------------------------------------------------------
/src/pysr3/priors.py:
--------------------------------------------------------------------------------
1 | # Prior distributions for model parameters
2 | # Copyright (C) 2021 Aleksei Sholokhov, aksh@uw.edu
3 | #
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU General Public License as published by
6 | # the Free Software Foundation, either version 3 of the License, or
7 | # (at your option) any later version.
8 | #
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program. If not, see .
16 |
17 | """
18 | Prior distributions for model parameters
19 | """
20 |
21 | from typing import Dict
22 |
23 | import numpy as np
24 |
25 |
26 | class Prior:
27 | pass
28 |
29 |
30 | class GaussianPrior:
31 | """
32 | Implements Gaussian Prior for various models
33 | """
34 |
35 | def __init__(self, params: Dict):
36 | """
37 | Creates GaussianPrior
38 |
39 | Parameters
40 | ----------
41 | params: dict[str: tuple(float, float)]
42 | gaussian prior for variances of random effects. Same format as above.
43 | """
44 | self.params = params
45 | self.means = None
46 | self.stds = None
47 | self.weights = None
48 |
49 | def instantiate(self, problem_columns):
50 | """
51 | Instantiates a Gaussian prior with problem-dependent quantities
52 |
53 | Parameters
54 | ----------
55 | problem_columns: List[str]
56 | Names of the columns for a particular dataset. Matches the elements of self.params (dict)
57 | with the columns of a particular dataset.
58 |
59 | Returns
60 | -------
61 | None
62 | """
63 | assert all(key in problem_columns for key in self.params.keys()), \
64 | (f"Some keys are listed in the prior but not listed in the problem's column labels:" +
65 | f" {[key for key in self.params.keys() if key not in problem_columns]}")
66 |
67 | means = []
68 | stds = []
69 | weights = []
70 | for label in problem_columns:
71 | mean, std = self.params.get(label, (0, 0))
72 | assert std >= 0
73 | means.append(mean)
74 | weights.append(1 if std > 0 else 0)
75 | stds.append(std if std > 0 else 1)
76 | self.means = np.array(means)
77 | self.stds = np.array(stds)
78 | self.weights = np.array(weights)
79 |
80 | def forget(self):
81 | """
82 | Releases all problem-dependent quantities
83 |
84 | Returns
85 | -------
86 | None
87 | """
88 | self.means = None
89 | self.stds = None
90 | self.weights = None
91 |
92 | def loss(self, x):
93 | """
94 | Value of the prior at beta, gamma.
95 |
96 | Parameters
97 | ----------
98 | x: ndarray
99 | vector of parameters
100 |
101 | Returns
102 | -------
103 | value of the prior.
104 | """
105 | return (self.weights * (1 / (2 * self.stds)) * ((x - self.means) ** 2)).sum()
106 |
107 | def gradient(self, x):
108 | """
109 | Evaluates the gradient of the prior with respect to the vector of fixed effects
110 |
111 | Parameters
112 | ----------
113 | x: ndarray
114 | vector of parameters
115 |
116 | Returns
117 | -------
118 | gradient
119 | """
120 | return self.weights * (1 / self.stds) * (x - self.means)
121 |
122 | def hessian(self, _):
123 | """
124 | Evaluates Hessian of the prior with respect to the vector of fixed effects
125 |
126 | Returns
127 | -------
128 | Hessian
129 | """
130 | return np.diag(self.weights * (1 / self.stds))
131 |
132 |
133 | class NonInformativePrior(Prior):
134 | """
135 | Implements a non-informative prior
136 | """
137 |
138 | def __init__(self):
139 | """
140 | Creates NonInformativePrior
141 | """
142 | pass
143 |
144 | def instantiate(self, problem):
145 | """
146 | Instantiates the prior based on the problem
147 |
148 | Parameters
149 | ----------
150 | problem: LMEProblem
151 |
152 | Returns
153 | -------
154 | None
155 | """
156 | pass
157 |
158 | def forget(self):
159 | """
160 | Releases all problem-dependent values
161 |
162 | Returns
163 | -------
164 | None
165 | """
166 | pass
167 |
168 | @staticmethod
169 | def loss(_):
170 | """
171 | Value of the prior at beta, gamma.
172 |
173 | Returns
174 | -------
175 | value of the prior.
176 | """
177 | return 0
178 |
179 | @staticmethod
180 | def gradient(_):
181 | """
182 | Evaluates the gradient of the prior with respect to the vector of fixed effects
183 |
184 | Returns
185 | -------
186 | gradient
187 | """
188 | return 0
189 |
190 | @staticmethod
191 | def hessian(_):
192 | """
193 | Evaluates Hessian of the prior with respect to the vector of random effects
194 |
195 | Returns
196 | -------
197 | Hessian w.r.t. (gamma, gamma)
198 | """
199 | return 0
200 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for
6 | everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity
7 | and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion,
8 | or sexual identity and orientation.
9 |
10 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to a positive environment for our community include:
15 |
16 | * Demonstrating empathy and kindness toward other people
17 | * Being respectful of differing opinions, viewpoints, and experiences
18 | * Giving and gracefully accepting constructive feedback
19 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
20 | * Focusing on what is best not just for us as individuals, but for the overall community
21 |
22 | Examples of unacceptable behavior include:
23 |
24 | * The use of sexualized language or imagery, and sexual attention or advances of any kind
25 | * Trolling, insulting or derogatory comments, and personal or political attacks
26 | * Public or private harassment
27 | * Publishing others' private information, such as a physical or email address, without their explicit permission
28 | * Other conduct which could reasonably be considered inappropriate in a professional setting
29 |
30 | ## Enforcement Responsibilities
31 |
32 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take
33 | appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive,
34 | or harmful.
35 |
36 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits,
37 | issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for
38 | moderation decisions when appropriate.
39 |
40 | ## Scope
41 |
42 | This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing
43 | the community in public spaces. Examples of representing our community include using an official e-mail address, posting
44 | via an official social media account, or acting as an appointed representative at an online or offline event.
45 |
46 | ## Enforcement
47 |
48 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible
49 | for enforcement at
50 | [INSERT CONTACT METHOD]. All complaints will be reviewed and investigated promptly and fairly.
51 |
52 | All community leaders are obligated to respect the privacy and security of the reporter of any incident.
53 |
54 | ## Enforcement Guidelines
55 |
56 | Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem
57 | in violation of this Code of Conduct:
58 |
59 | ### 1. Correction
60 |
61 | **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the
62 | community.
63 |
64 | **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation
65 | and an explanation of why the behavior was inappropriate. A public apology may be requested.
66 |
67 | ### 2. Warning
68 |
69 | **Community Impact**: A violation through a single incident or series of actions.
70 |
71 | **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including
72 | unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding
73 | interactions in community spaces as well as external channels like social media. Violating these terms may lead to a
74 | temporary or permanent ban.
75 |
76 | ### 3. Temporary Ban
77 |
78 | **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior.
79 |
80 | **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified
81 | period of time. No public or private interaction with the people involved, including unsolicited interaction with those
82 | enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
83 |
84 | ### 4. Permanent Ban
85 |
86 | **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate
87 | behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
88 |
89 | **Consequence**: A permanent ban from any sort of public interaction within the community.
90 |
91 | ## Attribution
92 |
93 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at
94 | [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
95 |
96 | Community Impact Guidelines were inspired by
97 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
98 |
99 | For answers to common questions about this code of conduct, see the FAQ at
100 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available
101 | at [https://www.contributor-covenant.org/translations][translations].
102 |
103 | [homepage]: https://www.contributor-covenant.org
104 |
105 | [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html
106 |
107 | [Mozilla CoC]: https://github.com/mozilla/diversity
108 |
109 | [FAQ]: https://www.contributor-covenant.org/faq
110 |
111 | [translations]: https://www.contributor-covenant.org/translations
112 |
--------------------------------------------------------------------------------
/src/pysr3/linear/problems.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 | import pandas as pd
5 |
6 |
7 | class LinearProblem:
8 | """
9 | Helper class which implements Linear models' abstractions over a given dataset.
10 |
11 | It also can generate random problems with specific characteristics.
12 | """
13 |
14 | def __init__(self,
15 | a,
16 | b,
17 | c=None,
18 | obs_std=None,
19 | regularization_weights=None):
20 | """
21 | Constructs LinearProblem -- a helper class that abstracts the data for the models.
22 |
23 | Parameters
24 | ----------
25 | a: ndarray (n, p)
26 | data matrix
27 | b: ndarray (n, )
28 | target variable
29 | obs_std: ndarray (n, )
30 | variances of mean-zero Gaussian noise for each observation
31 | regularization_weights: ndarray (n, )
32 | observation-specific weights for the regularizer. Inverse-proportional to the objects' importance.
33 | """
34 | self.a = np.array(a, dtype='float64')
35 | self.b = np.array(b, dtype='float64')
36 | self.num_objects = a.shape[0]
37 | self.num_features = a.shape[1]
38 | self.c = c if c else np.eye(self.num_features)
39 | self.obs_std = obs_std
40 | self.regularization_weights = regularization_weights
41 |
42 | @staticmethod
43 | def generate(num_objects=100,
44 | num_features=10,
45 | obs_std=0.1,
46 | true_x=None,
47 | seed=42):
48 | """
49 | Generates a random dataset with a linear dependence between observations and features
50 |
51 | Parameters
52 | ----------
53 | num_objects: int
54 | number of objects (rows) in the dataset
55 | num_features: int
56 | number of features (columns) in the dataset
57 | obs_std: float | ndarray (num_objects, )
58 | variances of mean-zero Gaussian noise for each observation (array) OR for all observations (float)
59 | true_x: ndarray (num_features, )
60 | true vector of coefficients. If None then generates a random one from U[0, 1]^num_features
61 | seed: int
62 | random seed
63 | Returns
64 | -------
65 | problem: LinearProblem
66 | generated problem
67 | """
68 | np.random.seed(seed)
69 | a = np.random.rand(num_objects, num_features)
70 | a[:, 0] = 1
71 | x = true_x if true_x is not None else np.random.rand(num_features)
72 | b = a.dot(x) + obs_std * np.random.randn(num_objects)
73 | return LinearProblem(a=a, b=b, regularization_weights=np.ones(num_features))
74 |
75 | @staticmethod
76 | def from_x_y(x, y, c=None, regularization_weights=None):
77 | """
78 | Creates a LinearProblem from provided dataset
79 |
80 | Parameters
81 | ----------
82 | x: ndarray (n, p)
83 | design matrix with objects being rows and columns being features
84 | y: ndarray (n, )
85 | vector of observations
86 | c: ndarray (p, p), optional
87 | matrix C for SR3 relaxation, see the paper. If None then an identity is used.
88 | regularization_weights: ndarray (n, )
89 | observation-specific weights for the regularizer. Inverse-proportional to the objects' importance.
90 |
91 | Returns
92 | -------
93 | problem: LinearProblem
94 | problem with provided data inside
95 | """
96 | return LinearProblem(a=x, b=y, c=c, regularization_weights=regularization_weights)
97 |
98 | def to_x_y(self):
99 | """
100 | Converts LinearProblem class to array representation
101 | Returns
102 | -------
103 | x: ndarray (n, p)
104 | design matrix with objects being rows and columns being features
105 | y: ndarray (n, )
106 | vector of observations
107 |
108 | """
109 | return self.a, self.b
110 |
111 | @staticmethod
112 | def from_dataframe(data: pd.DataFrame,
113 | features: List[str],
114 | target: str,
115 | must_include_features: List[str] = None,
116 | obs_std: str = None,
117 | c=None,
118 | ):
119 | """
120 | Creates LinearProblem from a Pandas dataframe
121 |
122 | Parameters
123 | ----------
124 | data: pd.DataFrame
125 | pandas dataframe with dataset
126 | features: List[str]
127 | list of column names that should be included as features
128 | target: str
129 | name of the column containing the observations
130 | must_include_features: List[str]
131 | list of column names that are not going to be affected by regularization.
132 | In other words, list of features that receive regularization_weight=0. All others receive 1.
133 | obs_std: float | ndarray (num_objects, )
134 | variances of mean-zero Gaussian noise for each observation (array) OR for all observations (float)
135 |
136 | c: ndarray (p, p), optional
137 | matrix C for SR3 relaxation, see the paper. If None then an identity is used.
138 |
139 |
140 | Returns
141 | -------
142 | problem: LinearProblem
143 | problem with the dataset inside
144 | """
145 | n = len(features)
146 | regularization_weights = [1] * n if not must_include_features \
147 | else [int(feature not in must_include_features) for feature in features]
148 | return LinearProblem(a=data[features].to_numpy(),
149 | b=data[target].to_numpy(),
150 | c=c,
151 | regularization_weights=regularization_weights,
152 | obs_std=obs_std)
153 |
--------------------------------------------------------------------------------
/paper.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: 'pysr3: A Python Package for Sparse Relaxed Regularized Regression'
3 |
4 | tags:
5 | - Python
6 | - feature selection
7 | - linear models
8 | - mixed-effect models
9 | - regularization
10 |
11 | authors:
12 | - name: Aleksei Sholokhov
13 | orcid: 0000-0001-8173-6236
14 | affiliation: 1
15 | - name: Peng Zheng
16 | orcid: 0000-0003-3313-215X
17 | affiliation: 2
18 | - name: Aleksandr Aravkin
19 | orcid: 0000-0002-1875-1801
20 | affiliation: "1, 2"
21 |
22 | affiliations:
23 | - name: Department of Applied Mathematics, University of Washington
24 | index: 1
25 | - name: Department of Health Metrics Sciences, University of Washington
26 | index: 2
27 | date: 04.01.2023
28 | bibliography: paper.bib
29 |
30 | ---
31 |
32 | # Summary
33 |
34 | Datasets increasingly contain more and more potential covariates that may be related to different research questions. The presence of irrelevant or weakly relevant features can be counterproductive to modeling, as over-parametrized models may lead to unstable estimates, invalid inference, and low prediction accuracy. Reliable feature selection is a requirement for a wide range of regression settings.
35 |
36 | Feature selection methods are a hot research topic [@Buscemi2019Survey],[@miao2016survey],[@li2020survey], with a plethora of numerical approaches and corresponding implementations.
37 | However, most of the current state-of-the-art tools custom-tailor their implementation to their mathematical approach (e.g. use an optimization scheme that works only for a particular regularizer).
38 | In practice, it forces practitioners to re-implement their workflow for each method that they want to use or try, even when the difference between methods is minor. The absence of universal open source implementations effectively blocks practitioners from comparing all available methods, with the effect
39 | of slowing uptake of research results in the field.
40 |
41 | We fill this gap by implementing recently developed universal solvers [@zheng2018unified], [@sholokhov2022relaxation] that (1) work with most popular regularized regression techniques, and (2) improve selection accuracy of any regularized regression approach using new relaxation reformulations.
42 | To date, the library supports linear models (classic regression) and linear mixed effects models. Because of full compatibility with `sklearn`, all `pysr3` models can be used in pipeline with classic modelling blocks such as data pre-processors, randomized grid search, cross-validation, and quality metrics.
43 |
44 |
45 | # Statement of Need
46 |
47 | Reliable automated feature selection requires easy-to-use libraries, so that practitioners can test and compare multiple regularization approaches using their data, choose the most effective method, and apply the analysis at scale. These libraries should be flexible and modular enough to accommodate future developments, such as newly proposed regularizers, without forcing the practitioner to implement new solvers for each approach. At the same time, the libraries must be efficient and robust enough to handle common challenges such as ill-conditioned problems that arise in datasets with correlated predictors. The PySR3 library is designed to easily include new loss functions, constraints, information criteria, and regularization strategies. All PySR3 models fully adhere the standards and interface requirements of `sklearn` [@sklearn_api], providing a familiar interface to users.
48 |
49 | Currently, PySR3 models can be used to automatically select features in both linear regression models and linear mixed effects (LME) models, which extend linear regression to clustered data settings. LME models commonly arise in longitudinal data analysis and meta-analysis. Feature selection for linear mixed-effects models is harder than for linear regression models due to non-linearity of LMEs and within-group correlations. To the best of our knowledge, there are no standalone Python packages for mixed-effect feature selection, while the alternatives implemented in R, such as lmmlasso [@schelldorfer2011estimation] and glmmlasso [@schelldorfer2014glmmlasso], take more time to converge and yield worse selection accuracy, as discussed in [@sholokhov2022relaxation].
50 |
51 | # Core idea and structure of `pysr3`
52 |
53 | The baseline optimization method of PySR3 is proximal gradient descent (PGD). PGD exploits the fact that most feature selection methods minimize the sum of a smooth loss that captures the negative log likelihood of the data and a non-smooth sparsity promoting regularizer.
54 | PGD works well as long as the regularizer has an implementable proximal operator.
55 | Many widely-used regularizers have proximal operators that are well known and either have efficient numerical routines or closed form solutions [@zheng2018unified]. Examples include the zero-norm (L0), least absolute shrinkage and selection operator (LASSO), adaptive LASSO (A-LASSO), and smoothly clipped absolute deviation (SCAD) regularizers.
56 |
57 | Each regularizer included in PySR3 can also be used in its relaxed SR3 form [@zheng2018unified]. SR3 preconditions the likelihood, improving the performance of feature selection methods.
58 | PGD on the SR3-transformed problem takes fewer iterations to converge, and the features selected
59 | are more accurate and have lower false positive rates across simulated examples for both linear regression [@zheng2018unified] and LME [@sholokhov2022relaxation] models, as illustrated on \autoref{fig:lme_summary} (adapted from Figure 1 from [@sholokhov2022relaxation]).
60 |
61 | 
62 |
63 | More information about the structure of the library can be found in [documentation](https://aksholokhov.github.io/pysr3/),
64 | while the mathematical contributions are extensively discussed in [@zheng2018unified] for linear regression
65 | and in [@sholokhov2022relaxation],[@aravkin2022relaxationb] for linear mixed effects models.
66 |
67 |
68 | # Ongoing Research and Dissemination
69 |
70 | The manuscripts "A Relaxation Approach to Feature Selection for Linear Mixed Effects Models"
71 | and "Analysis of Relaxation Methods for Feature Selection in Mixed Effects Models"
72 | are undergoing simultaneous peer-review. Since its introduction in [@zheng2018unified], SR3 has been cited 92 times, and used in model discovery [@Mendible2020], optimal dose management [@levin2019proof] and inverse problems [@baraldi2019basis]. The LME extensions of PySR3 was developed for variable selection in meta-analysis, which is a fundamental problem in risk factor analysis for the Global Burden of Disease study [@murray2020global].
73 |
74 | # References
75 |
--------------------------------------------------------------------------------
/src/pysr3/lme/model_selectors.py:
--------------------------------------------------------------------------------
1 | """
2 | Black-box routines for automatic feature selection for mixed-models.
3 | """
4 |
5 | from collections import defaultdict
6 | from pathlib import Path
7 | from typing import Dict, List, Optional, Union
8 |
9 | import numpy as np
10 | import pandas as pd
11 | import yaml
12 |
13 | from pysr3.lme.models import L0LmeModel, L1LmeModel, CADLmeModel, SCADLmeModel
14 | from pysr3.lme.models import L0LmeModelSR3, L1LmeModelSR3, CADLmeModelSR3, SCADLmeModelSR3
15 | from pysr3.lme.problems import LMEProblem
16 |
17 | MODELS_NAMES = ("L0", "L1", "CAD", "SCAD", "L0_SR3", "L1_SR3", "CAD_SR3", "SCAD_SR3")
18 |
19 |
20 | def select_covariates(df: pd.DataFrame,
21 | target: str,
22 | variance: str,
23 | group: str,
24 | covs: Optional[Dict[str, List[str]]] = None,
25 | pre_sel_covs: Optional[Dict[str, List[str]]] = None,
26 | output_folder: Union[str, Path] = ".",
27 | model_name: str = "L1_SR3",
28 | **kwargs) -> None:
29 | """Implements black-box functionality for selecting most important fixed and random features
30 | in Linear Mixed-Effect Models.
31 |
32 | Parameters
33 | ----------
34 | df : pd.DataFrame
35 | Data frame contains all the necessary columns.
36 | target : str
37 | Column name of observation.
38 | variance : str
39 | Column name of the observation variances
40 | group : str
41 | Column name of the group, usually specified as `study_id`.
42 | covs : Optional[Dict[str, List[str]]]
43 | Dictionary contains all the covariate candidates. Keys of the dictionary
44 | are `fixed_effects` and `random_effects`, and corresponding value is a
45 | list of covariates names which can be empty. Default to `None`, and when
46 | `covs` is None, it will be automatically parsed as Dictionary with empty
47 | list as values.
48 | pre_sel_covs : Optional[Dict[str, List[str]]]
49 | Same structure with `covs`. Default to `None`.
50 | output_folder : Union[str, Path]
51 | Path for output folder to store the results. Default to `"."`.
52 | model_name : str
53 | which model to use. Can be "L0", "L0_SR3", "L1", "L1_SR3", "CAD", "CAD_SR3", "SCAD", "SCAD_SR3"
54 |
55 | Returns
56 | -------
57 | None
58 | Return nothing. Store a yaml file contains selected fixed and random
59 | effects and all other diagnostic figures.
60 | """
61 | # parse covs and pre_sel_covs
62 | covs = defaultdict(list) if covs is None else covs
63 | pre_sel_covs = defaultdict(list) if pre_sel_covs is None else pre_sel_covs
64 | for key in ["fixed_effects", "random_effects"]:
65 | covs[key] = list({*covs[key], *pre_sel_covs[key]})
66 |
67 | # check df contain all cols
68 | cols = {target, variance, group, *covs["fixed_effects"], *covs["random_effects"]}
69 | for col in cols:
70 | if col not in df.columns:
71 | raise ValueError(f"df does not contain col={col}.")
72 |
73 | # parse output folder
74 | output_folder = Path(output_folder)
75 | if not output_folder.exists():
76 | output_folder.mkdir()
77 |
78 | problem = LMEProblem.from_dataframe(data=df,
79 | fixed_effects=covs.get("fixed_effects", []),
80 | random_effects=covs.get("random_effects", []),
81 | groups=group,
82 | variance=variance,
83 | target=target,
84 | not_regularized_fe=pre_sel_covs.get("fixed_effects", []),
85 | not_regularized_re=pre_sel_covs.get("random_effects", []),
86 | )
87 |
88 | model_constructor, selection_spectrum = get_model(model_name, problem)
89 | best_model = None
90 | best_score = +np.infty
91 | for params in selection_spectrum:
92 | model = model_constructor({**params, "logger_keys": ('converged', 'jones_bic')})
93 | model.fit_problem(problem)
94 | score = model.logger_.get('jones_bic')
95 | if score < best_score:
96 | best_model = model
97 | best_score = score
98 | # print(f"{model}: {params}, score={score}")
99 |
100 | sel_covs = {
101 | "fixed_effects": [label for label, coef in zip(problem.fixed_features_columns, best_model.coef_["beta"]) if
102 | abs(coef) >= 1e-2],
103 | "random_effects": [label for label, coef in zip(problem.random_features_columns, best_model.coef_["gamma"]) if
104 | abs(coef) >= 1e-2]
105 | }
106 |
107 | # save results
108 | with open(output_folder / "sel_covs.yaml", "w") as f:
109 | yaml.dump(sel_covs, f)
110 |
111 | print(sel_covs)
112 |
113 |
114 | def get_model(model: str, problem: LMEProblem):
115 | """
116 | Takes the name of the model. Returns the constructor for it,
117 | as well as a suitable parameter grid for various sparsity levels.
118 |
119 | Parameters
120 | ----------
121 | model: str
122 | a name of a model
123 | problem: LMEProblem
124 | an instance of the problem
125 |
126 | Returns
127 | -------
128 | constructor: function: params -> model(**params)
129 | model constructor
130 | selection_spectrum: Dict[str, float]
131 | points for parameter grid search
132 | """
133 | if model == "L0" or model == "L0_SR3":
134 | selection_spectrum = [{"nnz_tbeta": p, "nnz_tgamma": q} for p in range(1, problem.num_fixed_features) for q in
135 | range(1, problem.num_random_features) if p >= q]
136 | return lambda params: L0LmeModel(**params) if model == "L0" else L0LmeModelSR3(**params), selection_spectrum
137 |
138 | selection_spectrum = [{"lam": lam} for lam in np.logspace(start=-4, stop=5, num=100)]
139 | if model == "L1":
140 | return lambda params: L1LmeModel(**params), selection_spectrum
141 | elif model == "L1_SR3":
142 | return lambda params: L1LmeModelSR3(**params), selection_spectrum
143 | elif model == "CAD":
144 | return lambda params: CADLmeModel(**params), selection_spectrum
145 | elif model == "CAD_SR3":
146 | return lambda params: CADLmeModelSR3(**params), selection_spectrum
147 | elif model == "SCAD":
148 | return lambda params: SCADLmeModel(**params), selection_spectrum
149 | elif model == "SCAD_SR3":
150 | return lambda params: SCADLmeModelSR3(**params), selection_spectrum
151 | else:
152 | raise ValueError(f"Model name is not recognized: {model}. Should be one of: {MODELS_NAMES}")
153 |
--------------------------------------------------------------------------------
/tests/lme/test_LMEProblem.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import numpy as np
4 |
5 | from pysr3.lme.problems import LMEProblem
6 |
7 |
8 | class TestLinearLMEProblem(unittest.TestCase):
9 | def test_correctness(self):
10 | problem_parameters = {
11 | "groups_sizes": [20, 5, 10, 50],
12 | "features_labels": ["fixed+random"] * 3,
13 | "features_covariance_matrix": np.array([
14 | [1, 0, 0],
15 | [0, 1, 0.7],
16 | [0, 0.7, 1]
17 | ]),
18 | "obs_var": 0.1,
19 | }
20 | problem, true_parameters = LMEProblem.generate(**problem_parameters,
21 | seed=0)
22 | x1, y1, columns_labels = problem.to_x_y()
23 | problem2 = LMEProblem.from_x_y(x1, y1, columns_labels=columns_labels)
24 | for i, (x, y, z, _) in enumerate(problem2):
25 | self.assertTrue(np.allclose(y, x.dot(true_parameters['beta']) + z.dot(
26 | true_parameters['random_effects'][i][1]) + true_parameters['errors'][i]))
27 |
28 | def test_creation_and_from_to_x_y(self):
29 | problem, true_parameters = LMEProblem.generate(groups_sizes=[20, 30, 50],
30 | features_labels=["fixed+random",
31 | "fixed+random",
32 | "fixed",
33 | "random"],
34 | fit_fixed_intercept=True,
35 | obs_var=0.1,
36 | seed=42)
37 | x1, y1, columns_labels = problem.to_x_y()
38 | problem2 = LMEProblem.from_x_y(x1, y1, columns_labels=columns_labels, fit_fixed_intercept=True)
39 | x2, y2, columns_labels_2 = problem2.to_x_y()
40 | self.assertTrue(np.allclose(x1, x2))
41 | self.assertTrue(np.allclose(y1, y2))
42 | self.assertEqual(columns_labels, columns_labels_2)
43 |
44 | test_problem, true_test_parameters = LMEProblem.generate(groups_sizes=[3, 4, 5],
45 | features_labels=["fixed+random",
46 | "fixed+random",
47 | "fixed",
48 | "random"],
49 | fit_fixed_intercept=True,
50 | beta=true_parameters["beta"],
51 | gamma=true_parameters["gamma"],
52 | true_random_effects=true_parameters[
53 | "random_effects"],
54 | obs_var=0.1,
55 | seed=43)
56 |
57 | self.assertTrue(np.allclose(true_parameters["beta"], true_test_parameters["beta"]))
58 | self.assertTrue(np.allclose(true_parameters["gamma"], true_test_parameters["gamma"]))
59 | test_us = dict(true_test_parameters["random_effects"])
60 | for k, u1 in true_parameters["random_effects"]:
61 | u2 = test_us.get(k, None)
62 | if u2 is not None:
63 | self.assertTrue(np.allclose(u1, u2))
64 |
65 | def test_creation_from_no_data(self):
66 | problem, true_parameters = LMEProblem.generate(groups_sizes=[4, 5, 10],
67 | features_labels=[],
68 | fit_fixed_intercept=True,
69 | fit_random_intercept=True,
70 | obs_var=0.1,
71 | seed=42)
72 |
73 | self.assertEqual(len(true_parameters["beta"]), 1, msg="Beta should be of len = 1 for no-data problem")
74 | self.assertEqual(len(true_parameters["gamma"]), 1, msg="Gamma should be of len = 1 for no-data problem")
75 | self.assertTrue(np.all([np.all(x == 1) and np.all(z == 1) for x, y, z, l in
76 | problem]), msg="All fixed and random features should be 1 for no-data problem")
77 |
78 | def test_from_to_xy_preserves_dataset_structure(self):
79 | study_sizes = [20, 15, 10]
80 | num_studies = sum(study_sizes)
81 | num_fixed_features = 6
82 | num_random_features = 4
83 | np.random.seed(42)
84 | x = np.random.rand(num_studies, 1 + (num_fixed_features - 1) + 1 + (num_random_features - 1) + 1)
85 | y = np.random.rand(num_studies)
86 | x[:, 0] = np.repeat([0, 1, 2], study_sizes)
87 | columns_labels = (["group"] + ["fixed"] * (num_fixed_features - 1) + ["fixed+random"]
88 | + ["random"] * (num_random_features - 1) + ["variance"])
89 | np.random.shuffle(x)
90 | problem = LMEProblem.from_x_y(x, y, columns_labels=columns_labels)
91 | x2, y2, columns_labels_2 = problem.to_x_y()
92 | self.assertTrue(np.all(x2 == x), msg="x is not the same after from/to transformation")
93 | self.assertTrue(np.all(y2 == y), msg="y is not the same after from/to transformation")
94 | self.assertTrue(np.all(columns_labels_2 == columns_labels))
95 |
96 | def test_from_dataframe(self):
97 | problem, _ = LMEProblem.generate(groups_sizes=[40, 30, 50],
98 | features_labels=["fixed+random"] * 2,
99 | fit_fixed_intercept=True,
100 | fit_random_intercept=True,
101 | obs_var=0.1,
102 | seed=42,
103 | )
104 | x, y, columns_labels = problem.to_x_y()
105 | import pandas as pd
106 | data = pd.DataFrame(data=np.hstack([x, y.reshape(-1, 1)]),
107 | columns=["groups", "x1", "x2", "obs_var", "target"])
108 | data["intercept"] = 1
109 | problem2 = LMEProblem.from_dataframe(data,
110 | fixed_effects=["intercept", "x1", "x2"],
111 | random_effects=["intercept", "x1", "x2"],
112 | variance="obs_var",
113 | target="target",
114 | groups="groups",
115 | not_regularized_fe=[],
116 | not_regularized_re=["x1"])
117 | x2, y2, columns_labels_2 = problem2.to_x_y()
118 | self.assertTrue(np.all(x == x2))
119 | self.assertTrue(np.all(y == y2))
120 | self.assertTrue(np.all(columns_labels == columns_labels_2))
121 |
122 |
123 | if __name__ == '__main__':
124 | unittest.main()
125 |
--------------------------------------------------------------------------------
/src/pysr3/solvers.py:
--------------------------------------------------------------------------------
1 | # Solvers for numerical optimization problems (PGD etc).
2 | # Copyright (C) 2021 Aleksei Sholokhov, aksh@uw.edu
3 | #
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU General Public License as published by
6 | # the Free Software Foundation, either version 3 of the License, or
7 | # (at your option) any later version.
8 | #
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program. If not, see .
16 |
17 | """
18 | Implements general purpose numerical solvers, like PGD
19 | """
20 |
21 | import numpy as np
22 |
23 | from pysr3.lme.oracles import LinearLMEOracle
24 | from pysr3.logger import Logger
25 | from pysr3.regularizers import Regularizer
26 |
27 |
28 | class PGDSolver:
29 | """
30 | Implements a general Proximal Gradient Descent solver.
31 | """
32 |
33 | def __init__(self, tol=1e-4, max_iter=1000, stepping="fixed", fixed_step_len=1):
34 | """
35 | Creates an instance of the solver.
36 |
37 | Parameters
38 | ----------
39 | tol: float
40 | Tolerance for the stop-criterion: norm(x - x0) is less than tol.
41 | max_iter: int
42 | Maximum number of iterations that the solver is allowed to make.
43 | stepping: str
44 | Stepping policy. Can be either "line-search" or "fixed".
45 | fixed_step_len: float
46 | Length of the step size. If stepping="fixed" then this step-size is always used.
47 | If stepping="line-search" then the line-search starts shrinking the step from this step size.
48 | """
49 | self.tol = tol
50 | self.max_iter = max_iter
51 | self.stepping = stepping
52 | self.fixed_step_len = fixed_step_len
53 |
54 | def optimize(self, x0, oracle: LinearLMEOracle = None, regularizer: Regularizer = None, logger: Logger = None):
55 | """
56 | Solves the optimization problem for
57 |
58 | Loss(x) = oracle(x) + regularizer(x)
59 |
60 | Parameters
61 | ----------
62 | x0: ndarray
63 | starting point of the optimizer.
64 | oracle: LinearLMEOracle
65 | provides the value and the gradient of the smooth part of the loss.
66 | regularizer: Regularizer
67 | provides the value and the proximal operator of the non-smooth part of the loss.
68 | logger: Logger
69 | logs the progress (loss, convergence, etc).
70 |
71 | Returns
72 | -------
73 | x: ndarray
74 | the minimum.
75 | """
76 | if not oracle:
77 | raise ValueError("oracle can't be None")
78 | x = x0
79 | x_prev = np.infty
80 | iteration = 0
81 |
82 | if 'loss' in logger.keys:
83 | loss = oracle.value_function(x) + regularizer.value(x)
84 |
85 | if len(logger.keys) > 0:
86 | logger.log(locals())
87 |
88 | while np.linalg.norm(x - x_prev) > self.tol and iteration < self.max_iter:
89 | x_prev = x
90 |
91 | direction = -oracle.gradient_value_function(x)
92 |
93 | if self.stepping == "line-search":
94 | step_len = self.fixed_step_len
95 | while step_len > 1e-14:
96 | y = x + step_len * direction
97 | z = regularizer.prox(y, step_len)
98 | if oracle.value_function(z) <= oracle.value_function(x) - direction.dot(z - x) + (
99 | 1 / (2 * step_len)) * np.linalg.norm(z - x) ** 2:
100 | break
101 | else:
102 | step_len *= 0.5
103 |
104 | elif self.stepping == "fixed":
105 | step_len = self.fixed_step_len
106 | else:
107 | step_len = self.fixed_step_len
108 |
109 | y = x + step_len * direction
110 | x = regularizer.prox(y, step_len)
111 | iteration += 1
112 |
113 | if 'loss' in logger.keys:
114 | loss = oracle.value_function(x) + regularizer.value(x)
115 |
116 | if len(logger.keys) > 0:
117 | logger.log(locals())
118 |
119 | logger.add("converged", iteration < self.max_iter)
120 | logger.add("iteration", iteration)
121 |
122 | return x
123 |
124 |
125 | class FakePGDSolver:
126 | """
127 | This class is designed for the situations where the oracle can provide the optimal
128 | solution by itself, e.g. when it's accessible analytically.
129 | It's also used for PracticalSR3 methods, when the relaxed variables are
130 | updated together with the original ones inside the oracle's subroutine.
131 | """
132 |
133 | def __init__(self, tol=1e-4, max_iter=1000, fixed_step_len=1, update_prox_every=1):
134 | """
135 | Initializes the solver
136 |
137 | Parameters
138 | ----------
139 | tol: float
140 | tolerance for internal routines
141 | max_iter: int
142 | maximal number of iterations for internal routines
143 | fixed_step_len: float
144 | step-size
145 | update_prox_every: int
146 | how often should the oracle update the relaxed variable (every X steps).
147 | """
148 | self.fixed_step_len = fixed_step_len
149 | self.update_prox_every = update_prox_every
150 | self.tol = tol
151 | self.max_iter = max_iter
152 |
153 | def optimize(self, x0, oracle=None, regularizer: Regularizer = None, logger: Logger = None,
154 | **kwargs):
155 | """
156 | Solves the optimization problem for
157 |
158 | Loss(x) = oracle(x) + regularizer(x)
159 |
160 | Parameters
161 | ----------
162 | x0: ndarray
163 | starting point of the optimizer.
164 | oracle: LinearLMEOracle
165 | provides the value and the gradient of the smooth part of the loss.
166 | regularizer: Regularizer
167 | provides the value and the proximal operator of the non-smooth part of the loss.
168 | logger: Logger
169 | logs the progress (loss, convergence, etc).
170 |
171 | Returns
172 | -------
173 | x: ndarray
174 | the minimum.
175 | """
176 | if not oracle:
177 | raise ValueError("oracle can't be None")
178 | if not regularizer:
179 | raise ValueError("regularizer can't be None")
180 |
181 | x = oracle.find_optimal_parameters(x0,
182 | regularizer=regularizer,
183 | tol=self.tol,
184 | max_iter=self.max_iter,
185 | prox_step_len=self.fixed_step_len,
186 | update_prox_every=self.update_prox_every,
187 | logger=logger,
188 | **kwargs)
189 | if 'loss' in logger.keys:
190 | loss = oracle.value_function(x) + regularizer.value(x)
191 | logger.add('loss', loss)
192 |
193 | logger.add("converged", True)
194 | return x
195 |
--------------------------------------------------------------------------------
/paper.bib:
--------------------------------------------------------------------------------
1 | @article{Buscemi2019Survey,
2 | abstract = {Linear mixed-effects models are a class of models widely used for analyzing different types of data: longitudinal, clustered and panel data. Many fields, in which a statistical methodology is required, involve the employment of linear mixed models, such as biology, chemistry, medicine, finance and so forth. One of the most important processes, in a statistical analysis, is given by model_name selection. Hence, since there are a large number of linear mixed model_name selection procedures available in the literature, a pressing issue is how to identify the best approach to adopt in a specific case. We outline mainly all approaches focusing on the part of the model_name subject to selection (fixed and/or random), the dimensionality of models and the structure of variance and covariance matrices, and also, wherever possible, the existence of an implemented application of the methodologies set out.},
3 | annote = {The most up-to-date literature review found on this issue.},
4 | author = {Buscemi, Simona and Plaia, Antonella},
5 | doi = {10.1007/s10182-019-00359-z},
6 | file = {:Users/aksh/Documents/Papers/2020/Buscemi, Plaia/Model selection in linear mixed-effect models/Buscemi, Plaia - 2020 - Model selection in linear mixed-effect models.pdf:pdf},
7 | issn = {1863-8171},
8 | journal = {AStA Advances in Statistical Analysis},
9 | keywords = {AIC,BIC,LASSO,Linear mixed model_name,MCP,MDL,Mixed model_name selection,Shrinkage methods},
10 | mendeley-groups = {Feature/Effects Selection,Surveys Summaries Overviews},
11 | month = {dec},
12 | number = {4},
13 | pages = {529--575},
14 | publisher = {Springer Berlin Heidelberg},
15 | title = {{Model selection in linear mixed-effect models}},
16 | url = {https://doi.org/10.1007/s10182-019-00359-z http://link.springer.com/10.1007/s10182-019-00359-z},
17 | volume = {104},
18 | year = {2020}
19 | }
20 |
21 | @article{zheng2018unified,
22 | title={A unified framework for sparse relaxed regularized regression: SR3},
23 | author={Zheng, Peng and Askham, Travis and Brunton, Steven L and Kutz, J Nathan and Aravkin, Aleksandr Y},
24 | journal={IEEE Access},
25 | volume={7},
26 | pages={1404--1423},
27 | year={2018},
28 | publisher={IEEE},
29 | doi = {10.1109/ACCESS.2018.2886528}
30 | }
31 |
32 | @article{sholokhov2022relaxation,
33 | title={A Relaxation Approach to Feature Selection for Linear Mixed Effects Models},
34 | author={Sholokhov, Aleksei and Burke, James V and Santomauro, Damian F and Zheng, Peng and Aravkin, Aleksandr},
35 | journal={arXiv preprint arXiv:2205.06925},
36 | year={2022},
37 | doi={10.48550/arXiv.2205.06925}
38 | }
39 | @article{aravkin2022relaxationb,
40 | title={Analysis of Relaxation Methods for Feature Selection in Mixed Effects Models},
41 | author={Aravkin, Aleksandr and Burke, James and Sholokhov, Aleksei and Zheng, Peng},
42 | journal={arXiv preprint arXiv:2209.10575},
43 | year={2022},
44 | doi={10.48550/arXiv.2209.10575}
45 | }
46 |
47 | @article{baraldi2019basis,
48 | title={Basis pursuit denoise with nonsmooth constraints},
49 | author={Baraldi, Robert and Kumar, Rajiv and Aravkin, Aleksandr},
50 | journal={IEEE Transactions on Signal Processing},
51 | volume={67},
52 | number={22},
53 | pages={5811--5823},
54 | year={2019},
55 | publisher={IEEE},
56 | doi={10.1109/tsp.2019.2946029}
57 | }
58 |
59 | @article{murray2020global,
60 | title={Global burden of 87 risk factors in 204 countries and territories, 1990--2019: a systematic analysis for the Global Burden of Disease Study 2019},
61 | author={Murray, Christopher JL and Aravkin, Aleksandr Y and Zheng, Peng and Abbafati, Cristiana and Abbas, Kaja M and Abbasi-Kangevari, Mohsen and Abd-Allah, Foad and Abdelalim, Ahmed and Abdollahi, Mohammad and Abdollahpour, Ibrahim and others},
62 | journal={The Lancet},
63 | volume={396},
64 | number={10258},
65 | pages={1223--1249},
66 | year={2020},
67 | publisher={Elsevier},
68 | doi={10.1016/S0140-6736(20)30752-2},
69 | }
70 |
71 |
72 | @article{schelldorfer2014glmmlasso,
73 | title={Glmmlasso: an algorithm for high-dimensional generalized linear mixed models using L1-penalization},
74 | author={Schelldorfer, J{\"u}rg and Meier, Lukas and B{\"u}hlmann, Peter},
75 | journal={Journal of Computational and Graphical Statistics},
76 | volume={23},
77 | number={2},
78 | pages={460--477},
79 | year={2014},
80 | publisher={Taylor \& Francis},
81 | doi={10.1080/10618600.2013.773239}
82 | }
83 |
84 |
85 | @article{li2020survey,
86 | title={A survey on sparse learning models for feature selection},
87 | author={Li, Xiaoping and Wang, Yadi and Ruiz, Rub{\'e}n},
88 | journal={IEEE transactions on cybernetics},
89 | year={2020},
90 | publisher={IEEE},
91 | doi={10.1109/TCYB.2020.2982445}
92 | }
93 |
94 | @article{miao2016survey,
95 | title={A survey on feature selection},
96 | author={Miao, Jianyu and Niu, Lingfeng},
97 | journal={Procedia Computer Science},
98 | volume={91},
99 | pages={919--926},
100 | year={2016},
101 | publisher={Elsevier},
102 | doi={10.1016/j.procs.2016.07.111}
103 | }
104 |
105 | @article{Mendible2020,
106 | abstract = {We develop an unsupervised machine learning algorithm for the automated discovery and identification of traveling waves in spatiotemporal systems governed by partial differential equations (PDEs). Our method uses sparse regression and subspace clustering to robustly identify translational invariances that can be leveraged to build improved reduced-order models (ROMs). Invariances, whether translational or rotational, are well known to compromise the ability of ROMs to produce accurate and/or low-rank representations of the spatiotemporal dynamics. However, by discovering translations in a principled way, data can be shifted into a coordinate systems where quality, low-dimensional ROMs can be constructed. This approach can be used on either numerical or experimental data with or without knowledge of the governing equations. We demonstrate our method on a variety of PDEs of increasing difficulty, taken from the field of fluid dynamics, showing the efficacy and robustness of the proposed approach.},
107 | archivePrefix = {arXiv},
108 | arxivId = {1911.00565},
109 | author = {Mendible, Ariana and Brunton, Steven L. and Aravkin, Aleksandr Y. and Lowrie, Wes and Kutz, J. Nathan},
110 | doi = {10.1007/s00162-020-00529-9},
111 | eprint = {1911.00565},
112 | file = {:Users/aksh/Documents/Papers/2020/Mendible et al/Dimensionality reduction and reduced-order modeling for traveling wave physics/Mendible et al. - 2020 - Dimensionality reduction and reduced-order modeling for traveling wave physics.pdf:pdf},
113 | issn = {14322250},
114 | journal = {Theoretical and Computational Fluid Dynamics},
115 | keywords = {Data decomposition,Reduced-order modeling,Transported quantities,Traveling waves},
116 | number = {4},
117 | pages = {385--400},
118 | title = {{Dimensionality reduction and reduced-order modeling for traveling wave physics}},
119 | volume = {34},
120 | year = {2020}
121 | }
122 |
123 | @article{levin2019proof,
124 | title={A Proof of Principle: Multi-Modality Radiotherapy Optimization},
125 | author={Levin, Roman and Aravkin, Aleksandr Y and Kim, Minsun},
126 | journal={arXiv preprint arXiv:1911.05182},
127 | year={2019},
128 | doi={10.48550/arXiv.1911.05182}
129 | }
130 |
131 | @inproceedings{sklearn_api,
132 | author = {Lars Buitinck and Gilles Louppe and Mathieu Blondel and
133 | Fabian Pedregosa and Andreas Mueller and Olivier Grisel and
134 | Vlad Niculae and Peter Prettenhofer and Alexandre Gramfort
135 | and Jaques Grobler and Robert Layton and Jake VanderPlas and
136 | Arnaud Joly and Brian Holt and Ga{\"{e}}l Varoquaux},
137 | title = {{API} design for machine learning software: experiences from the scikit-learn
138 | project},
139 | booktitle = {ECML PKDD Workshop: Languages for Data Mining and Machine Learning},
140 | year = {2013},
141 | pages = {108--122},
142 | doi = {10.48550/arXiv.1309.0238}
143 | }
144 |
145 | @article{schelldorfer2011estimation,
146 | title={Estimation for high-dimensional linear mixed-effects models using l1-penalization},
147 | author={Schelldorfer, J{\"u}rg and B{\"u}hlmann, Peter and DE GEER, SARA VAN},
148 | journal={Scandinavian Journal of Statistics},
149 | volume={38},
150 | number={2},
151 | pages={197--214},
152 | year={2011},
153 | publisher={Wiley Online Library},
154 | doi={10.1111/j.1467-9469.2011.00740.x}
155 | }
156 |
--------------------------------------------------------------------------------
/tests/linear/test_LinearModels.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from sklearn.metrics import mean_squared_error, explained_variance_score, accuracy_score
6 | from sklearn.utils.estimator_checks import check_estimator
7 |
8 | from pysr3.linear.models import SimpleLinearModel, SimpleLinearModelSR3, LinearL1Model, LinearL1ModelSR3, \
9 | LinearCADModel, LinearCADModelSR3, LinearSCADModel, LinearSCADModelSR3, LinearL0ModelSR3, LinearL0Model
10 | from pysr3.linear.problems import LinearProblem
11 |
12 |
13 | class TestLinearModels(unittest.TestCase):
14 |
15 | def test_meeting_sklearn_standards(self):
16 | models_to_test = {
17 | "Simple": SimpleLinearModel(),
18 | "L0": LinearL0Model(),
19 | "L1": LinearL1Model(),
20 | "CAD": LinearCADModel(),
21 | "SCAD": LinearSCADModel(),
22 | "Simple_SR3": SimpleLinearModelSR3(),
23 | "L0_SR3": LinearL0ModelSR3(),
24 | "L1_SR3": LinearL1ModelSR3(),
25 | "CAD_SR3": LinearCADModelSR3(),
26 | "SCAD_SR3": LinearSCADModelSR3()
27 | }
28 | for name, model in models_to_test.items():
29 | with self.subTest(name=name):
30 | check_estimator(model)
31 |
32 | def test_solving_dense_problem(self):
33 |
34 | problem_parameters = {
35 | "num_objects": 100,
36 | "num_features": 10,
37 | "obs_std": 0.1,
38 | }
39 |
40 | models_to_test = {
41 | "Simple": (SimpleLinearModel, {}),
42 | "L0": (LinearL0Model, {"nnz": problem_parameters['num_features']}),
43 | "L1": (LinearL1Model, {}),
44 | "CAD": (LinearCADModel, {"rho": 0.5}),
45 | "SCAD": (LinearSCADModel, {"rho": 3.7, "sigma": 0.5}),
46 | "Simple_SR3": (SimpleLinearModelSR3, {}),
47 | "L0_SR3": (LinearL0ModelSR3, {"nnz": problem_parameters['num_features']}),
48 | "L1_SR3": (LinearL1ModelSR3, {}),
49 | "CAD_SR3": (LinearCADModelSR3, {"rho": 0.5}),
50 | "SCAD_SR3": (LinearSCADModelSR3, {"rho": 3.7, "sigma": 0.5})
51 | }
52 |
53 | trials = 3
54 |
55 | default_params = {
56 | "el": 1,
57 | "lam": 0.0, # we expect the answers to be dense so the regularizers are small
58 | # "stepping": "line-search",
59 | "logger_keys": ('converged', 'loss', 'aic', 'bic'),
60 | "tol_solver": 1e-6,
61 | "max_iter_solver": 1000
62 | }
63 |
64 | max_mse = 0.05
65 | min_explained_variance = 0.9
66 |
67 | for i in range(trials):
68 | with self.subTest(i=i):
69 | for model_name, (model_constructor, local_params) in models_to_test.items():
70 | with self.subTest(model_name=model_name):
71 | problem = LinearProblem.generate(**problem_parameters, seed=i)
72 | x, y = problem.to_x_y()
73 |
74 | features_labels = [f'x{i}' for i in range(problem_parameters['num_features'])]
75 | data = pd.DataFrame(x, columns=features_labels)
76 | data['y'] = y
77 | data['std'] = 1
78 | problem2 = LinearProblem.from_dataframe(data, features=features_labels,
79 | target='y', obs_std='std')
80 | model_params = default_params.copy()
81 | model_params.update(local_params)
82 |
83 | model = model_constructor(**model_params)
84 | model.fit_problem(problem2)
85 |
86 | y_pred = model.predict_problem(problem2)
87 | explained_variance = explained_variance_score(y, y_pred)
88 | mse = mean_squared_error(y, y_pred)
89 |
90 | self.assertGreater(explained_variance, min_explained_variance,
91 | msg="%d) Explained variance is too small: %.3f < %.3f. (seed=%d)"
92 | % (i,
93 | explained_variance,
94 | min_explained_variance,
95 | i))
96 | self.assertGreater(max_mse, mse,
97 | msg="%d) MSE is too big: %.3f > %.2f (seed=%d)"
98 | % (i,
99 | mse,
100 | max_mse,
101 | i))
102 | aic = model.get_information_criterion(x, y, ic='aic')
103 | self.assertAlmostEqual(aic, model.logger_.get('aic'))
104 | bic = model.get_information_criterion(x, y, ic='bic')
105 | self.assertAlmostEqual(bic, model.logger_.get('bic'))
106 |
107 | return None
108 |
109 | def test_solving_sparse_problem(self):
110 |
111 | models_to_test = {
112 | "L0": (LinearL0Model, {}),
113 | "L1": (LinearL1Model, {"lam": 2}),
114 | "CAD": (LinearCADModel, {"rho": 0.5}),
115 | "SCAD": (LinearSCADModel, {"lam": 1, "rho": 3.7, "sigma": 2.5}),
116 | "L0_SR3": (LinearL0ModelSR3, {}),
117 | "L0_SR3P": (LinearL0ModelSR3, {"practical": True}),
118 | "L1_SR3": (LinearL1ModelSR3, {"lam": 0.1}),
119 | "L1_SR3P": (LinearL1ModelSR3, {"lam": 0.1, "practical": True}),
120 | "CAD_SR3": (LinearCADModelSR3, {"rho": 0.5}),
121 | "CAD_SR3P": (LinearCADModelSR3, {"rho": 0.5, "practical": True}),
122 | "SCAD_SR3": (LinearSCADModelSR3, {"lam": 0.2, "rho": 3.7, "sigma": 0.5}),
123 | "SCAD_SR3P": (LinearSCADModelSR3, {"lam": 0.2, "rho": 3.7, "sigma": 0.5, "practical": True})
124 | }
125 | trials = 5
126 |
127 | problem_parameters = {
128 | "num_objects": 100,
129 | "num_features": 20,
130 | "obs_std": 0.1,
131 | }
132 |
133 | default_params = {
134 | "el": 1,
135 | "lam": 1,
136 | "rho": 0.3,
137 | "logger_keys": ('converged', 'loss',),
138 | "tol_solver": 1e-6,
139 | "max_iter_solver": 5000
140 | }
141 |
142 | max_mse = 0.5
143 | min_explained_variance = 0.9
144 | min_selection_accuracy = 0.9
145 |
146 | for i in range(trials):
147 | with self.subTest(i=i):
148 | for model_name, (model_constructor, local_params) in models_to_test.items():
149 | with self.subTest(model_name=model_name):
150 | seed = i + 42
151 | np.random.seed(seed)
152 | true_x = np.random.choice(2, size=problem_parameters["num_features"], p=np.array([0.5, 0.5]))
153 | if sum(true_x) == 0:
154 | true_x[0] = 1
155 | problem = LinearProblem.generate(**problem_parameters,
156 | true_x=true_x,
157 | seed=seed)
158 | x, y = problem.to_x_y()
159 |
160 | model_params = default_params.copy()
161 | model_params.update(local_params)
162 | if "L0" in model_name:
163 | model_params["nnz"] = sum(true_x != 0)
164 |
165 | model = model_constructor(**model_params)
166 | model.fit_problem(problem)
167 |
168 | y_pred = model.predict_problem(problem)
169 | explained_variance = explained_variance_score(y, y_pred)
170 | mse = mean_squared_error(y, y_pred)
171 |
172 | coefficients = model.coef_
173 | maybe_x = coefficients["x"]
174 | selection_accuracy = accuracy_score(true_x, abs(maybe_x) > np.sqrt(model.tol_solver))
175 |
176 | self.assertGreaterEqual(explained_variance, min_explained_variance,
177 | msg=f"{model_name}: Explained variance is too small: {explained_variance} < {min_explained_variance} (seed={seed})")
178 | self.assertGreaterEqual(max_mse, mse,
179 | msg=f"{model_name}: MSE is too big: {max_mse} > {mse} (seed={seed})")
180 | self.assertGreaterEqual(selection_accuracy, min_selection_accuracy,
181 | msg=f"{model_name}: Selection Accuracy is too small: {selection_accuracy} < {min_selection_accuracy} (seed={seed})")
182 |
183 | return None
184 |
185 |
186 | if __name__ == '__main__':
187 | unittest.main()
188 |
--------------------------------------------------------------------------------
/src/pysr3/lme/priors.py:
--------------------------------------------------------------------------------
1 | from typing import Dict
2 |
3 | from pysr3.lme.problems import LMEProblem
4 | from pysr3.priors import Prior, GaussianPrior
5 |
6 |
7 | class GaussianPriorLME:
8 | """
9 | Implements Gaussian Prior for various models
10 | """
11 |
12 | def __init__(self, fe_params: Dict, re_params: Dict):
13 | """
14 | Creates GaussianPrior
15 |
16 | Parameters
17 | ----------
18 | fe_params: dict[str: tuple(float, float)]
19 | gaussian prior parameters for fixed effects. The format is {"name": (mean, std), ...}
20 | E.g. {"intercept": (0, 2), "time": (1, 1)}
21 | re_params: dict[str: tuple(float, float)]
22 | gaussian prior for variances of random effects. Same format as above.
23 | """
24 | self.fe_params = fe_params
25 | self.re_params = re_params
26 | self.beta_prior = GaussianPrior(params=fe_params)
27 | self.gamma_prior = GaussianPrior(params=re_params)
28 |
29 | def instantiate(self, problem: LMEProblem):
30 | """
31 | Instantiates a Gaussian prior with problem-dependent quantities
32 |
33 | Parameters
34 | ----------
35 | problem: LMEProblem
36 | problem to fit
37 |
38 | Returns
39 | -------
40 | None
41 | """
42 | assert problem.fixed_features_columns and problem.random_features_columns, "Problem does not have column names attached"
43 | assert all(key in problem.fixed_features_columns for key in self.fe_params.keys()), \
44 | F"Some keys are listed in the prior for FE but not listed in the prolem's column labels: {[key for key in self.fe_params.keys() if key not in problem.fixed_features_columns]}"
45 | assert all(key in problem.fixed_features_columns for key in self.fe_params.keys()), \
46 | F"Some keys are listed in the prior for RE but not listed in the prolem's column labels: {[key for key in self.re_params.keys() if key not in problem.random_features_columns]}"
47 |
48 | self.beta_prior.instantiate(problem_columns=problem.fixed_features_columns)
49 | self.gamma_prior.instantiate(problem_columns=problem.random_features_columns)
50 |
51 | def forget(self):
52 | """
53 | Releases all problem-dependent quantities
54 |
55 | Returns
56 | -------
57 | None
58 | """
59 | self.fe_params = None
60 | self.re_params = None
61 | self.beta_prior.forget()
62 | self.gamma_prior.forget()
63 |
64 | def loss(self, beta, gamma):
65 | """
66 | Value of the prior at beta, gamma.
67 |
68 | Parameters
69 | ----------
70 | beta: ndarray
71 | vector of fixed effects
72 |
73 | gamma: ndarray
74 | vector of random effects
75 |
76 | Returns
77 | -------
78 | value of the prior.
79 | """
80 | return self.beta_prior.loss(beta) + self.gamma_prior.loss(gamma)
81 |
82 | def gradient_beta(self, beta, *args, **kwargs):
83 | """
84 | Evaluates the gradient of the prior with respect to the vector of fixed effects
85 |
86 | Parameters
87 | ----------
88 | beta: ndarray
89 | vector of fixed effects
90 |
91 | Returns
92 | -------
93 | gradient w.r.t. beta
94 | """
95 | return self.beta_prior.gradient(beta)
96 |
97 | def gradient_gamma(self, beta, gamma):
98 | """
99 | Evaluates the gradient of the prior with respect to the vector of random effects
100 |
101 | Parameters
102 | ----------
103 | beta: ndarray
104 | vector of fixed effects
105 |
106 | gamma: ndarray
107 | vector of random effects
108 |
109 | Returns
110 | -------
111 | gradient w.r.t. gamma
112 | """
113 | return self.gamma_prior.gradient(gamma)
114 |
115 | def hessian_beta(self, beta, gamma):
116 | """
117 | Evaluates Hessian of the prior with respect to the vector of fixed effects
118 |
119 | Parameters
120 | ----------
121 | beta: ndarray
122 | vector of fixed effects
123 |
124 | gamma: ndarray
125 | vector of random effects
126 |
127 | Returns
128 | -------
129 | Hessian w.r.t. (beta, beta)
130 | """
131 | return self.beta_prior.hessian(beta)
132 |
133 | def hessian_gamma(self, beta, gamma):
134 | """
135 | Evaluates Hessian of the prior with respect to the vector of random effects
136 |
137 | Parameters
138 | ----------
139 | beta: ndarray
140 | vector of fixed effects
141 |
142 | gamma: ndarray
143 | vector of random effects
144 |
145 | Returns
146 | -------
147 | Hessian w.r.t. (gamma, gamma)
148 | """
149 | return self.gamma_prior.hessian(gamma)
150 |
151 | def hessian_beta_gamma(self, beta, gamma):
152 | """
153 | Evaluates the mixed Hessian of the prior with respect to the vector of fixed and random effects
154 |
155 | Parameters
156 | ----------
157 | beta: ndarray
158 | vector of fixed effects
159 |
160 | gamma: ndarray
161 | vector of random effects
162 |
163 | Returns
164 | -------
165 | Hessian w.r.t. (beta, gamma)
166 | """
167 | return 0
168 |
169 |
170 | class NonInformativePriorLME(Prior):
171 | """
172 | Implements a non-informative prior
173 | """
174 |
175 | def __init__(self):
176 | """
177 | Creates NonInformativePrior
178 | """
179 | pass
180 |
181 | def instantiate(self, problem):
182 | """
183 | Instantiates the prior based on the problem
184 |
185 | Parameters
186 | ----------
187 | problem: LMEProblem
188 |
189 | Returns
190 | -------
191 | None
192 | """
193 | pass
194 |
195 | def forget(self):
196 | """
197 | Releases all problem-dependent values
198 |
199 | Returns
200 | -------
201 | None
202 | """
203 | pass
204 |
205 | def loss(self, beta, gamma):
206 | """
207 | Value of the prior at beta, gamma.
208 |
209 | Parameters
210 | ----------
211 | beta: ndarray
212 | vector of fixed effects
213 |
214 | gamma: ndarray
215 | vector of random effects
216 |
217 | Returns
218 | -------
219 | value of the prior.
220 | """
221 | return 0
222 |
223 | def gradient_beta(self, beta, gamma):
224 | """
225 | Evaluates the gradient of the prior with respect to the vector of fixed effects
226 |
227 | Parameters
228 | ----------
229 | beta: ndarray
230 | vector of fixed effects
231 |
232 | gamma: ndarray
233 | vector of random effects
234 |
235 | Returns
236 | -------
237 | gradient w.r.t. beta
238 | """
239 | return 0
240 |
241 | def gradient_gamma(self, beta, gamma):
242 | """
243 | Evaluates the gradient of the prior with respect to the vector of random effects
244 |
245 | Parameters
246 | ----------
247 | beta: ndarray
248 | vector of fixed effects
249 |
250 | gamma: ndarray
251 | vector of random effects
252 |
253 | Returns
254 | -------
255 | gradient w.r.t. gamma
256 | """
257 | return 0
258 |
259 | def hessian_beta(self, beta, gamma):
260 | """
261 | Evaluates Hessian of the prior with respect to the vector of fixed effects
262 |
263 | Parameters
264 | ----------
265 | beta: ndarray
266 | vector of fixed effects
267 |
268 | gamma: ndarray
269 | vector of random effects
270 |
271 | Returns
272 | -------
273 | Hessian w.r.t. (beta, beta)
274 | """
275 | return 0
276 |
277 | def hessian_gamma(self, beta, gamma):
278 | """
279 | Evaluates Hessian of the prior with respect to the vector of random effects
280 |
281 | Parameters
282 | ----------
283 | beta: ndarray
284 | vector of fixed effects
285 |
286 | gamma: ndarray
287 | vector of random effects
288 |
289 | Returns
290 | -------
291 | Hessian w.r.t. (gamma, gamma)
292 | """
293 | return 0
294 |
295 | def hessian_beta_gamma(self, beta, gamma):
296 | """
297 | Evaluates the mixed Hessian of the prior with respect to the vector of fixed and random effects
298 |
299 | Parameters
300 | ----------
301 | beta: ndarray
302 | vector of fixed effects
303 |
304 | gamma: ndarray
305 | vector of random effects
306 |
307 | Returns
308 | -------
309 | Hessian w.r.t. (beta, gamma)
310 | """
311 | return 0
312 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | [](https://aksholokhov.github.io/pysr3/)
5 | [](https://codecov.io/gh/aksholokhov/pysr3)
6 | [](https://www.codacy.com/gh/aksholokhov/pysr3/dashboard?utm_source=github.com&utm_medium=referral&utm_content=aksholokhov/pysr3&utm_campaign=Badge_Grade)
7 | [](https://joss.theoj.org/papers/67ea0de9a219ad072073a2304f11f820)
8 |
9 | # Quickstart with `pysr3`
10 |
11 | SR3 is a relaxation method designed for accurate feature selection.
12 | It currently supports:
13 |
14 | * Linear Models (L0, LASSO, A-LASSO, CAD, SCAD)
15 | * Linear Mixed-Effect Models (L0, LASSO, A-LASSO, CAD, SCAD)
16 |
17 | ## Installation
18 |
19 | pysr3 can be installed via
20 | ```bash
21 | pip install pysr3>=0.3.5
22 | ```
23 |
24 |
25 | ```python
26 | from pysr3.__about__ import __version__
27 | print(f"This tutorial was generated using PySR3 v{__version__}\n"
28 | "You might see slightly different numerical results if you are using a different version of the library.")
29 | ```
30 |
31 | This tutorial was generated using PySR3 v0.3.5
32 | You might see slightly different numerical results if you are using a different version of the library.
33 |
34 |
35 | ## Requirements
36 | Make sure that Python 3.6 or higher is installed. The package has the following
37 | dependencies, as listed in requirements.txt:
38 |
39 | * numpy>=1.21.1
40 | * pandas>=1.3.1
41 | * scipy>=1.7.1
42 | * PyYAML>=5.4.1
43 | * scikit_learn>=0.24.2
44 |
45 | ## Usage
46 | pysr3 models are fully compatible to [sklearn standards](https://scikit-learn.org/stable/developers/develop.html),
47 | so you can use them as you normally would use a sklearn model.
48 |
49 | ### Linear Models
50 | A simple example of using SR3-empowered LASSO for feature selection is shown below.
51 |
52 |
53 | ```python
54 | import numpy as np
55 |
56 | from pysr3.linear.problems import LinearProblem
57 |
58 | # Create a sample dataset
59 | seed = 42
60 | num_objects = 300
61 | num_features = 500
62 | np.random.seed(seed)
63 | # create a vector of true model's coefficients
64 | true_x = np.random.choice(2, size=num_features, p=np.array([0.9, 0.1]))
65 | # create sample data
66 | a = 10 * np.random.randn(num_objects, num_features)
67 | b = a.dot(true_x) + np.random.randn(num_objects)
68 |
69 | print(f"The dataset has {a.shape[0]} objects and {a.shape[1]} features; \n"
70 | f"The vector of true parameters contains {sum(true_x != 0)} non-zero elements out of {num_features}.")
71 | ```
72 |
73 | The dataset has 300 objects and 500 features;
74 | The vector of true parameters contains 55 non-zero elements out of 500.
75 |
76 |
77 | First, let's fit a model with a fixed parameter lambda:
78 |
79 |
80 | ```python
81 | from pysr3.linear.models import LinearL1ModelSR3
82 | from sklearn.metrics import confusion_matrix
83 | lam = 0.1*np.max(np.abs(a.T.dot(b)))
84 | model = LinearL1ModelSR3(lam=lam, el=1e5)
85 | ```
86 |
87 |
88 | ```python
89 | %%timeit
90 | model.fit(a, b)
91 | ```
92 |
93 | 38.6 ms ± 236 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
94 |
95 |
96 |
97 | ```python
98 | maybe_x = model.coef_['x']
99 | tn, fp, fn, tp = confusion_matrix(true_x, np.abs(maybe_x) > np.sqrt(model.tol_solver)).ravel()
100 |
101 | print(f"The model found {tp} out of {tp + fn} features correctly, but also chose {fp} out of {tn+fp} extra irrelevant features. \n")
102 | ```
103 |
104 | The model found 55 out of 55 features correctly, but also chose 5 out of 445 extra irrelevant features.
105 |
106 |
107 |
108 | Now let's see if we can improve it by adding grid-search:
109 |
110 |
111 | ```python
112 | # Automatic features selection using information criterion
113 | from pysr3.linear.models import LinearL1ModelSR3
114 | from sklearn.model_selection import RandomizedSearchCV
115 | from sklearn.utils.fixes import loguniform
116 |
117 | # Here we use SR3-empowered LASSO, but many other popular regularizers are also available
118 | # See the glossary of models for more details.
119 | model = LinearL1ModelSR3()
120 | # We will search for the best model over the range of strengths for the regularizer
121 | params = {
122 | "lam": loguniform(1e-1, 1e2)
123 | }
124 | selector = RandomizedSearchCV(estimator=model,
125 | param_distributions=params,
126 | n_iter=50,
127 | # The function below evaluates an information criterion
128 | # on the test portion of CV-splits.
129 | scoring=lambda clf, x, y: -clf.get_information_criterion(x, y, ic='bic'))
130 |
131 | selector.fit(a, b)
132 | maybe_x = selector.best_estimator_.coef_['x']
133 | tn, fp, fn, tp = confusion_matrix(true_x, np.abs(maybe_x) > np.sqrt(model.tol_solver)).ravel()
134 |
135 | print(f"The model found {tp} out of {tp + fn} features correctly, but also chose {fp} out of {tn+fp} extra irrelevant features. \n"
136 | f"The best parameter is {selector.best_params_}")
137 | ```
138 |
139 | The model found 55 out of 55 features correctly, but also chose 1 out of 445 extra irrelevant features.
140 | The best parameter is {'lam': 0.15055187290939537}
141 |
142 |
143 | Note that the discovered coefficients will be biased downwards due to L1 regularization.
144 |
145 |
146 | ```python
147 | import matplotlib.pyplot as plt
148 | fig, ax = plt.subplots()
149 | indep = list(range(num_features))
150 | ax.plot(indep, maybe_x, label='Discovered Coefficients')
151 | ax.plot(indep, true_x, alpha=0.5, label='True Coefficients')
152 | ax.legend(bbox_to_anchor=(1.05, 1))
153 | plt.show()
154 | ```
155 |
156 |
157 |
158 | 
159 |
160 |
161 |
162 | You can get rid of the bias by refitting the model using only features that were selected.
163 |
164 | ### Linear Mixed-Effects Models
165 |
166 | Below we show how to use Linear Mixed-Effects (LME) models for simultaneous selection
167 | of fixed and random effects.
168 |
169 |
170 | ```python
171 | from pysr3.lme.models import L1LmeModelSR3
172 | from pysr3.lme.problems import LMEProblem, LMEStratifiedShuffleSplit
173 |
174 |
175 | # Here we generate a random linear mixed-effects problem.
176 | # To use your own dataset check LMEProblem.from_dataframe and LMEProblem.from_x_y
177 | problem, true_parameters = LMEProblem.generate(
178 | groups_sizes=[10] * 8, # 8 groups, 10 objects each
179 | features_labels=["fixed+random"] * 20, # 20 features, each one having both fixed and random components
180 | beta=np.array([0, 1] * 10), # True beta (fixed effects) has every other coefficient active
181 | gamma=np.array([0, 0, 0, 1] * 5), # True gamma (variances of random effects) has every fourth coefficient active
182 | obs_var=0.1, # The errors have standard errors of sqrt(0.1) ~= 0.33
183 | seed=seed # random seed, for reproducibility
184 | )
185 |
186 | # LMEProblem provides a very convenient representation
187 | # of the problem. See the documentation for more details.
188 |
189 | # It also can be converted to a more familiar representation
190 | x, y, columns_labels = problem.to_x_y()
191 | # columns_labels describe the roles of the columns in x:
192 | # fixed effect, random effect, or both of those, as well as groups labels and observation standard deviation.
193 |
194 | # You can also convert it to pandas dataframe if you'd like.
195 | pandas_dataframe = problem.to_dataframe()
196 | ```
197 |
198 |
199 | ```python
200 | # We use SR3-empowered LASSO model, but many other popular models are also available.
201 | # See the glossary of models for more details.
202 | model = L1LmeModelSR3(practical=True)
203 |
204 | # We're going to select features by varying the strength of the prior
205 | # and choosing the model that yields the best information criterion
206 | # on the validation set.
207 | params = {
208 | "lam": loguniform(1e-3, 1e2),
209 | "ell": loguniform(1e-1, 1e2)
210 | }
211 | # We use standard functionality of sklearn to perform grid-search.
212 | selector = RandomizedSearchCV(estimator=model,
213 | param_distributions=params,
214 | n_iter=30, # number of points from parameters space to sample
215 | # the class below implements CV-splits for LME models
216 | cv=LMEStratifiedShuffleSplit(n_splits=2, test_size=0.5,
217 | random_state=seed,
218 | columns_labels=columns_labels),
219 | # The function below will evaluate the information criterion
220 | # on the test-sets during cross-validation.
221 | # We use cAIC from Vaida, but other options (BIC, Muller's IC) are also available
222 | scoring=lambda clf, x, y: -clf.get_information_criterion(x,
223 | y,
224 | columns_labels=columns_labels,
225 | ic="vaida_aic"),
226 | random_state=seed,
227 | n_jobs=20
228 | )
229 | selector.fit(x, y, columns_labels=columns_labels)
230 | best_model = selector.best_estimator_
231 |
232 | maybe_beta = best_model.coef_["beta"]
233 | maybe_gamma = best_model.coef_["gamma"]
234 |
235 | # Since the solver stops witin sqrt(tol) from the minimum, we use it as a criterion for whether the feature
236 | # is selected or not
237 | ftn, ffp, ffn, ftp = confusion_matrix(y_true=true_parameters["beta"],
238 | y_pred=abs(maybe_beta) > np.sqrt(best_model.tol_solver)
239 | ).ravel()
240 | rtn, rfp, rfn, rtp = confusion_matrix(y_true=true_parameters["gamma"],
241 | y_pred=abs(maybe_gamma) > np.sqrt(best_model.tol_solver)
242 | ).ravel()
243 |
244 | print(
245 | f"The model found {ftp} out of {ftp + ffn} correct fixed features, and also chose {ffp} out of {ftn + ffp} extra irrelevant fixed features. \n"
246 | f"It also identified {rtp} out of {rtp + rfn} random effects correctly, and got {rfp} out of {rtn + rfp} non-present random effects. \n"
247 | f"The best sparsity parameter is {selector.best_params_}")
248 | ```
249 |
250 | The model found 10 out of 10 correct fixed features, and also chose 0 out of 10 extra irrelevant fixed features.
251 | It also identified 5 out of 5 random effects correctly, and got 0 out of 15 non-present random effects.
252 | The best sparsity parameter is {'ell': 0.3972110727381912, 'lam': 0.3725393839578885}
253 |
254 |
255 |
256 | ```python
257 | fig, axs = plt.subplots(1, 2, figsize=(9, 3), sharey=True)
258 |
259 | indep_beta = list(range(np.size(true_parameters["beta"])))
260 | indep_gamma = list(range(np.size(true_parameters["gamma"])))
261 |
262 | axs[0].set_title(r"$\beta$, Fixed Effects")
263 | axs[0].scatter(indep_beta, maybe_beta, label='Discovered')
264 | axs[0].scatter(indep_beta, true_parameters["beta"], alpha=0.5, label='True')
265 |
266 | axs[1].set_title(r"$\gamma$, Variances of Random Effects")
267 | axs[1].scatter(indep_gamma, maybe_gamma, label='Discovered')
268 | axs[1].scatter(indep_gamma, true_parameters["gamma"], alpha=0.5, label='True')
269 | axs[1].legend(bbox_to_anchor=(1.55, 1))
270 | plt.show()
271 | ```
272 |
273 |
274 |
275 | 
276 |
277 |
278 |
279 |
280 | ```python
281 |
282 | ```
283 |
--------------------------------------------------------------------------------
/src/pysr3/linear/oracles.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from pysr3.linear.problems import LinearProblem
4 | from pysr3.priors import Prior, NonInformativePrior
5 |
6 |
7 | class LinearOracle:
8 | """
9 | Implements a supplementary class that abstracts model. That is, it takes a problem
10 | and provides losses and gradients with respect to the parameters of the model.
11 |
12 | It separates the model form the optimization routine for better code patterns.
13 | The solver takes an oracle and optimizes its loss using its gradient, but it does not know which model it optimizes.
14 | The oracle, in its turn, has no idea how the solution for its model will be obtained.
15 | """
16 |
17 | def __init__(self, problem: LinearProblem = None, prior: Prior = None):
18 | """
19 | Initializes LinearOracle
20 |
21 | Parameters
22 | ----------
23 | problem: LinearProblem, optional
24 | an instance of LinearProblem containing the data
25 | prior: Prior
26 | an instance of Prior for the models' coefficients, if needed. See the docs for pysr3.priors module.
27 | """
28 | self.problem = problem
29 | self.prior = prior if prior else NonInformativePrior()
30 |
31 | def instantiate(self, problem):
32 | """
33 | Attaches the given problem to the oracle
34 |
35 | Parameters
36 | ----------
37 | problem: LinearProblem
38 | instance of the problem
39 |
40 | Returns
41 | -------
42 | oracle: LinearOracle
43 | oracle for this problem
44 | """
45 | self.problem = problem
46 | self.prior.instantiate(problem)
47 |
48 | def forget(self):
49 | """
50 | Detaches the problem from the oracle
51 | """
52 | self.problem = None
53 | self.prior.forget()
54 |
55 | def loss(self, x):
56 | """
57 | Calculates Gaussian negative log-likelihood for the given set of models parameters x.
58 |
59 | Parameters
60 | ----------
61 | x: ndarray (num_features, )
62 | models parameters
63 |
64 | Returns
65 | -------
66 | loss: float
67 | loss value
68 | """
69 | return 1 / 2 * np.linalg.norm(self.problem.a.dot(x) - self.problem.b, ord=2) ** 2 + self.prior.loss(x)
70 |
71 | def gradient(self, x):
72 | """
73 | Calculates gradient of Gaussian negative log-likelihood with respect to the given set of models parameters x.
74 |
75 | Parameters
76 | ----------
77 | x: ndarray (num_features, )
78 | models parameters
79 |
80 | Returns
81 | -------
82 | gradient: ndarray (num_features, )
83 | gradient
84 | """
85 | return self.problem.a.T.dot(self.problem.a.dot(x) - self.problem.b) + self.prior.gradient(x)
86 |
87 | def hessian(self, x):
88 | """
89 | Calculates Hessian of Gaussian negative log-likelihood with respect to the given set of models parameters x.
90 |
91 | Parameters
92 | ----------
93 | x: ndarray (num_features, )
94 | models parameters
95 |
96 | Returns
97 | -------
98 | hessian: ndarray (num_features, num_features)
99 | Hessian
100 | """
101 | return self.problem.a.T.dot(self.problem.a) + self.prior.hessian(x)
102 |
103 | def value_function(self, x):
104 | """
105 | Calculates value function for the given set of models parameters x. It's the same as loss
106 | if the oracle does not implement an SR3 relaxation.
107 |
108 | Parameters
109 | ----------
110 | x: ndarray (num_features, )
111 | models parameters
112 |
113 | Returns
114 | -------
115 | loss: float
116 | loss value
117 | """
118 | return self.loss(x)
119 |
120 | def gradient_value_function(self, x):
121 | """
122 | Calculates gradient of the value function with respect to the given set of models parameters x.
123 | It's the same as normal gradient if the oracle does not implement an SR3 relaxation.
124 |
125 | Parameters
126 | ----------
127 | x: ndarray (num_features, )
128 | models parameters
129 |
130 | Returns
131 | -------
132 | gradient: ndarray (num_features, )
133 | gradient
134 | """
135 | return self.gradient(x)
136 |
137 | def aic(self, x):
138 | """
139 | Calculates Akaike information criterion (AIC)
140 |
141 | Parameters
142 | ----------
143 | x: ndarray (num_features, )
144 | models parameters
145 |
146 | Returns
147 | -------
148 | aic: float
149 | AIC
150 | """
151 | p = sum(x != 0)
152 | return self.loss(x) + 2 * p
153 |
154 | def bic(self, x):
155 | """
156 | Calculates Bayess information criterion (BIC)
157 |
158 | Parameters
159 | ----------
160 | x: ndarray (num_features, )
161 | models parameters
162 |
163 | Returns
164 | -------
165 | bic: float
166 | BIC
167 | """
168 | p = sum(x != 0)
169 | return self.loss(x) + np.log(self.problem.num_objects) * p
170 |
171 |
172 | class LinearOracleSR3:
173 | """
174 | Implements a supplementary class that abstracts SR3-model. That is, it takes a problem
175 | and provides losses and gradients with respect to the parameters of the model.
176 |
177 | It separates the model form the optimization routine for better code patterns.
178 | The solver takes an oracle and optimizes its loss using its gradient, but it does not know which model it optimizes.
179 | The oracle, in its turn, has no idea how the solution for its model will be obtained.
180 | """
181 |
182 | def __init__(self, problem: LinearProblem = None, lam: float = 1, practical: bool = False, prior: Prior = None):
183 | """
184 | Instantiates an oracle
185 |
186 | Parameters
187 | ----------
188 | problem: LinearProblem, optional
189 | an instance of LinearProblem containing the data
190 | prior: Prior
191 | an instance of Prior for the models' coefficients, if needed. See the docs for pysr3.priors module.
192 | lam: float
193 | coefficient for the strength SR3-relaxation. It's NOT the same as the regularization (sparsity)
194 | coefficient. See the paper for more details.
195 | practical: bool
196 | whether to use an optimization method that is much faster than the default.
197 | """
198 | assert not prior, "Priors for LinearOracleSR3 are not supported yet"
199 | self.prior = prior if prior else NonInformativePrior()
200 | self.lam = lam
201 | self.practical = practical
202 | self.problem = problem
203 | self.f_matrix = None
204 | self.g_matrix = None
205 | self.h_matrix = None
206 | self.h_inv = None
207 | self.g = None
208 | self.ab = None
209 |
210 | def instantiate(self, problem):
211 | """
212 | Attaches the given problem to the oracle
213 |
214 | Parameters
215 | ----------
216 | problem: LinearProblem
217 | instance of the problem
218 |
219 | Returns
220 | -------
221 | oracle: LinearOracleSR3
222 | oracle for this problem
223 | """
224 | self.problem = problem
225 | a = problem.a
226 | c = problem.c
227 | lam = self.lam
228 | self.h_matrix = a.T.dot(a) + lam * c.dot(c)
229 | self.h_inv = np.linalg.inv(self.h_matrix)
230 | self.ab = a.T.dot(problem.b)
231 | if not self.practical:
232 | self.f_matrix = np.vstack([lam * a.dot(self.h_inv).dot(c.T),
233 | (np.sqrt(lam) * (np.eye(c.shape[0]) - lam * c.dot(self.h_inv).dot(c.T)))])
234 | self.g_matrix = np.vstack([np.eye(a.shape[0]) - a.dot(self.h_inv).dot(a.T),
235 | np.sqrt(lam) * c.dot(self.h_inv).dot(a.T)])
236 | self.g = self.g_matrix.dot(problem.b)
237 |
238 | def forget(self):
239 | """
240 | Detaches the problem from the oracle
241 | """
242 | self.problem = None
243 | self.f_matrix = None
244 | self.g_matrix = None
245 | self.h_matrix = None
246 | self.h_inv = None
247 | self.g = None
248 | self.ab = None
249 |
250 | def loss(self, x, w):
251 | """
252 | Calculates Gaussian negative log-likelihood of SR3 relaxation for the given set of models parameters x.
253 |
254 | Parameters
255 | ----------
256 | x: ndarray (num_features, )
257 | models parameters
258 | w: ndarray (num_features, )
259 | dual (relaxed) parameters that SR3-relaxation introduces
260 | Returns
261 | -------
262 | loss: float
263 | loss value
264 | """
265 |
266 | return (1 / 2 * np.linalg.norm(self.problem.a.dot(x) - self.problem.b, ord=2) ** 2 +
267 | self.lam / 2 * np.linalg.norm(self.problem.c.dot(x) - w, ord=2) ** 2) + self.prior.loss(x)
268 |
269 | def value_function(self, x):
270 | """
271 | Calculates value function for the given set of models parameters x.
272 |
273 | Parameters
274 | ----------
275 | x: ndarray (num_features, )
276 | models parameters
277 |
278 | Returns
279 | -------
280 | loss: float
281 | loss value
282 | """
283 | assert not self.practical, "The oracle is in 'practical' mode. The value function is inaccessible."
284 | return 1 / 2 * np.linalg.norm(self.f_matrix.dot(x) - self.g, ord=2) ** 2
285 |
286 | def gradient_value_function(self, x):
287 | """
288 | Calculates gradient of the value function with respect to the given set of models parameters x.
289 | It's the same as normal gradient if the oracle does not implement an SR3 relaxation.
290 |
291 | Parameters
292 | ----------
293 | x: ndarray (num_features, )
294 | models parameters
295 |
296 | Returns
297 | -------
298 | gradient: ndarray (num_features, )
299 | gradient
300 | """
301 | assert not self.practical, "The oracle is in 'practical' mode. The value function is inaccessible."
302 | return self.f_matrix.T.dot(self.f_matrix.dot(x) - self.g)
303 |
304 | def find_optimal_parameters(self, x0, regularizer=None, tol: float = 1e-4, max_iter: int = 1000, **kwargs):
305 | """
306 | Implements a "practical" optimization scheme that works faster than just a standard gradient descent.
307 | This function is meant to be called by pysr3.solvers.FakePGDSolver
308 |
309 | Parameters
310 | ----------
311 | x0: ndarray (num_features, )
312 | starting point for the optimization
313 | regularizer: Regularizer
314 | regularizer that implements sparsifiction prior.
315 | tol: float
316 | tolerance for the solver
317 | max_iter: int
318 | maximum number of iterations
319 | kwargs:
320 | other keyword arguments
321 |
322 | Returns
323 | -------
324 | x: ndarray (num_features, )
325 | the optimal solution
326 | """
327 | x = x0
328 | step_len = 1 / self.lam
329 | x_prev = np.infty
330 | iteration = 0
331 |
332 | while np.linalg.norm(x - x_prev) > tol and iteration < max_iter:
333 | x_prev = x
334 | y = self.h_inv.dot(self.ab + self.lam * self.problem.c.T.dot(x))
335 | x = regularizer.prox(y, step_len)
336 | iteration += 1
337 |
338 | return x
339 |
340 | def aic(self, x):
341 | """
342 | Calculates Akaike information criterion (AIC)
343 |
344 | Parameters
345 | ----------
346 | x: ndarray (num_features, )
347 | models parameters
348 |
349 | Returns
350 | -------
351 | aic: float
352 | AIC
353 | """
354 | p = sum(x != 0)
355 | oracle = LinearOracle(self.problem, self.prior)
356 | return oracle.loss(x) + 2 * p
357 |
358 | def bic(self, x):
359 | """
360 | Calculates Bayess information criterion (BIC)
361 |
362 | Parameters
363 | ----------
364 | x: ndarray (num_features, )
365 | models parameters
366 |
367 | Returns
368 | -------
369 | bic: float
370 | BIC
371 | """
372 | p = sum(x != 0)
373 | oracle = LinearOracle(self.problem, self.prior)
374 | return oracle.loss(x) + np.log(self.problem.num_objects) * p
375 |
--------------------------------------------------------------------------------
/tests/lme/test_LMEModels.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | import numpy as np
4 | from sklearn.metrics import mean_squared_error, explained_variance_score, accuracy_score
5 | from sklearn.model_selection import RandomizedSearchCV
6 | from sklearn.utils.estimator_checks import check_estimator
7 | from sklearn.utils.fixes import loguniform
8 |
9 | from pysr3.lme.models import L0LmeModelSR3, L0LmeModel, L1LmeModel, L1LmeModelSR3, CADLmeModel, CADLmeModelSR3, \
10 | SCADLmeModel, SCADLmeModelSR3, SimpleLMEModel, SimpleLMEModelSR3
11 | from pysr3.lme.priors import GaussianPriorLME
12 | from pysr3.lme.problems import LMEProblem, LMEStratifiedShuffleSplit, FIXED_RANDOM
13 | from pysr3.lme.problems import random_effects_to_matrix
14 |
15 |
16 | class TestLmeModels(unittest.TestCase):
17 |
18 | def test_meeting_sklearn_standards(self):
19 | models_to_test = {
20 | "Simple": SimpleLMEModel(),
21 | "Simple_SR3": SimpleLMEModelSR3(),
22 | "L0": L0LmeModel(),
23 | "L1": L1LmeModel(),
24 | "CAD": CADLmeModel(),
25 | "SCAD": SCADLmeModel(),
26 | "L0_SR3": L0LmeModelSR3(),
27 | "L1_SR3": L1LmeModelSR3(),
28 | "CAD_SR3": CADLmeModelSR3(),
29 | "SCAD_SR3": SCADLmeModelSR3()
30 | }
31 |
32 | for name, model in models_to_test.items():
33 | with self.subTest(name=name):
34 | check_estimator(model)
35 |
36 | def test_solving_dense_problem(self):
37 |
38 | models_to_test = {
39 | "L0": (L0LmeModel, {"stepping": "line-search"}),
40 | "L1": (L1LmeModel, {"stepping": "line-search"}),
41 | "CAD": (CADLmeModel, {"stepping": "line-search"}),
42 | "SCAD": (SCADLmeModel, {"stepping": "line-search", "rho": 3.7, "sigma": 2.5}),
43 | "L0SR3": (L0LmeModelSR3, {"practical": True}),
44 | "L1SR3": (L1LmeModelSR3, {"practical": True}),
45 | "CADSR3": (CADLmeModelSR3, {"practical": True}),
46 | "SCADSR3": (SCADLmeModelSR3, {"rho": 3.7, "sigma": 2.5, "practical": True})
47 | }
48 |
49 | trials = 3
50 |
51 | problem_parameters = {
52 | "groups_sizes": [20, 15, 10, 50],
53 | "features_labels": [FIXED_RANDOM] * 3,
54 | "fit_fixed_intercept": True,
55 | "fit_random_intercept": True,
56 | "features_covariance_matrix": np.array([
57 | [1, 0, 0],
58 | [0, 1, 0.7],
59 | [0, 0.7, 1]
60 | ]),
61 | "obs_var": 0.1,
62 | }
63 | default_params = {
64 | "nnz_tbeta": 4,
65 | "nnz_tgamma": 4,
66 | "ell": 1,
67 | "rho": 0.1,
68 | "lam": 0.0, # we expect the answers to be dense so the regularizers are small
69 | # "stepping": "line-search",
70 | "initializer": 'None',
71 | "logger_keys": ('converged',),
72 | "tol_oracle": 1e-4,
73 | "tol_solver": 1e-6,
74 | "max_iter_oracle": 1000,
75 | "max_iter_solver": 5000,
76 | "prior": GaussianPriorLME(fe_params={"x1": (1, 1)}, re_params={})
77 | }
78 |
79 | max_mse = 0.1
80 | min_explained_variance = 0.9
81 |
82 | for i in range(trials):
83 | with self.subTest(i=i):
84 | for model_name, (model_constructor, local_params) in models_to_test.items():
85 | with self.subTest(model_name=model_name):
86 | problem, _ = LMEProblem.generate(**problem_parameters, seed=i)
87 | _, y, _ = problem.to_x_y()
88 |
89 | model_params = default_params.copy()
90 | model_params.update(local_params)
91 |
92 | model = model_constructor(**model_params)
93 | model.fit_problem(problem)
94 |
95 | y_pred = model.predict_problem(problem)
96 | explained_variance = explained_variance_score(y, y_pred)
97 | mse = mean_squared_error(y, y_pred)
98 |
99 | self.assertGreater(explained_variance, min_explained_variance,
100 | msg="%d) Explained variance is too small: %.3f < %.3f. (seed=%d)"
101 | % (i,
102 | explained_variance,
103 | min_explained_variance,
104 | i))
105 | self.assertGreater(max_mse, mse,
106 | msg="%d) MSE is too big: %.3f > %.2f (seed=%d)"
107 | % (i,
108 | mse,
109 | max_mse,
110 | i))
111 | return None
112 |
113 | def test_solving_sparse_problem(self):
114 |
115 | models_to_test = {
116 | "L0": (L0LmeModel, {"stepping": "line-search"}),
117 | "L1": (L1LmeModel, {"stepping": "line-search"}),
118 | "CAD": (CADLmeModel, {"rho": 0.3, "stepping": "line-search"}),
119 | "SCAD": (SCADLmeModel, {"rho": 3.7, "lam": 10, "stepping": "line-search"}),
120 | "L0_SR3": (L0LmeModelSR3, {"practical": True}),
121 | "L1_SR3": (L1LmeModelSR3, {"practical": True}),
122 | "CAD_SR3": (CADLmeModelSR3, {"rho": 0.3, "practical": True}),
123 | "SCAD_SR3": (SCADLmeModelSR3, {"rho": 3.7, "practical": True})
124 | }
125 |
126 | trials = 2
127 | n_features = 20
128 | problem_parameters = {
129 | "groups_sizes": [30] * 6,
130 | "features_labels": [FIXED_RANDOM] * n_features,
131 | "obs_var": 0.01,
132 | }
133 |
134 | default_params = {
135 | "ell": 2,
136 | "initializer": "EM",
137 | "lam": 0.1,
138 | "rho": 0.3,
139 | "sigma": 0.5,
140 | # "stepping": "line-search",
141 | "logger_keys": ('converged', 'vaida_aic', 'jones_bic', 'muller_ic'),
142 | "tol_oracle": 1e-4,
143 | "tol_solver": 1e-5,
144 | "max_iter_oracle": 1000,
145 | "max_iter_solver": 5000
146 | }
147 |
148 | max_mse = 0.2
149 | min_explained_variance = 0.9
150 | fixed_effects_min_accuracy = 0.7
151 | random_effects_min_accuracy = 0.7
152 |
153 | for i in range(trials):
154 | with self.subTest(i=i):
155 | for model_name, (model_constructor, local_params) in models_to_test.items():
156 | with self.subTest(model_name=model_name):
157 |
158 | seed = i
159 | np.random.seed(seed)
160 | true_beta = np.random.choice(2, size=n_features, p=np.array([0.5, 0.5]))
161 | if sum(true_beta) == 0:
162 | true_beta[0] = 1
163 | np.random.seed(2 + 5 * seed)
164 | true_gamma = np.random.choice(2, size=n_features, p=np.array([0.2, 0.8])) * true_beta
165 |
166 | problem, true_model_parameters = LMEProblem.generate(**problem_parameters,
167 | beta=true_beta,
168 | gamma=true_gamma,
169 | seed=seed)
170 | x, y, columns_labels = problem.to_x_y()
171 |
172 | model_params = default_params.copy()
173 | model_params.update(local_params)
174 |
175 | model = model_constructor(**model_params,
176 | nnz_tbeta=sum(true_beta), # only L0-methods make use of those.
177 | nnz_tgamma=sum(true_gamma))
178 | if not (model_name.startswith("L0") or model_name.endswith("SR3")):
179 | params = {
180 | "lam": loguniform(1e-1, 3e2)
181 | }
182 | selector = RandomizedSearchCV(estimator=model,
183 | param_distributions=params,
184 | n_iter=10,
185 | cv=LMEStratifiedShuffleSplit(n_splits=2, test_size=0.5,
186 | random_state=seed,
187 | columns_labels=columns_labels),
188 | scoring=lambda clf, x, y: -clf.get_information_criterion(x, y,
189 | columns_labels=columns_labels,
190 | ic="muller_ic"),
191 | random_state=seed,
192 | n_jobs=20
193 | )
194 |
195 | selector.fit(x, y, columns_labels=columns_labels)
196 | model = selector.best_estimator_
197 | else:
198 | model.fit_problem(problem)
199 |
200 | y_pred = model.predict_problem(problem)
201 | explained_variance = explained_variance_score(y, y_pred)
202 | mse = mean_squared_error(y, y_pred)
203 |
204 | coefficients = model.coef_
205 | maybe_tbeta = coefficients["beta"]
206 | maybe_tgamma = coefficients["gamma"]
207 | fixed_effects_accuracy = accuracy_score(true_beta, abs(maybe_tbeta) > 1e-2)
208 | random_effects_accuracy = accuracy_score(true_gamma, abs(maybe_tgamma) > 1e-2)
209 |
210 | self.assertGreaterEqual(explained_variance, min_explained_variance,
211 | msg=f"{model_name}: Explained variance is too small: {explained_variance} < {min_explained_variance} (seed={seed})")
212 | self.assertGreaterEqual(max_mse, mse,
213 | msg=f"{model_name}: MSE is too big: {max_mse} > {mse} (seed={seed})")
214 | self.assertGreaterEqual(fixed_effects_accuracy, fixed_effects_min_accuracy,
215 | msg=f"{model_name}: Fixed Effects Selection Accuracy is too small: {fixed_effects_accuracy} < {fixed_effects_min_accuracy} (seed={seed})")
216 | self.assertGreaterEqual(random_effects_accuracy, random_effects_min_accuracy,
217 | msg=f"{model_name}: Random Effects Selection Accuracy is too small: {random_effects_accuracy} < {random_effects_min_accuracy} (seed={seed})")
218 |
219 | return None
220 |
221 | def test_score_function(self):
222 | # this is only a basic test which checks R^2 in two points: nearly perfect prediction and constant prediction.
223 | models_to_test = {
224 | "L0": L0LmeModel,
225 | "L1": L1LmeModel,
226 | "L0SR3": L0LmeModelSR3,
227 | "L1SR3": L1LmeModelSR3,
228 | }
229 | problem_parameters = {
230 | "groups_sizes": [20, 5, 10, 50],
231 | "features_labels": [FIXED_RANDOM] * 3,
232 | "fit_fixed_intercept": True,
233 | "fit_random_intercept": True,
234 | "features_covariance_matrix": np.array([
235 | [1, 0, 0],
236 | [0, 1, 0.7],
237 | [0, 0.7, 1]
238 | ]),
239 | "obs_var": 0.1,
240 | }
241 |
242 | model_parameters = {
243 | "nnz_tbeta": 4,
244 | "nnz_tgamma": 4,
245 | "lb": 0, # We expect the coefficient vectors to be dense so we turn regularization off.
246 | "lg": 0, # Same.
247 | "lam": 10,
248 | "initializer": 'EM',
249 | "logger_keys": ('converged', 'loss',),
250 | "tol_oracle": 1e-6,
251 | "tol_solver": 1e-6,
252 | "max_iter_oracle": 1,
253 | "max_iter_solver": 1,
254 | "practical": True
255 | }
256 |
257 | problem, true_model_parameters = LMEProblem.generate(**problem_parameters, seed=42)
258 | x, y, column_labels = problem.to_x_y()
259 | for model_name, model_constructor in models_to_test.items():
260 | with self.subTest(model_name=model_name):
261 | model = model_constructor(**model_parameters)
262 | model.fit_problem(problem)
263 | model.coef_["beta"] = true_model_parameters["beta"]
264 | model.coef_["random_effects"] = random_effects_to_matrix(true_model_parameters["random_effects"])
265 | good_score = model.score(x, y, columns_labels=column_labels,
266 | fit_fixed_intercept=True, fit_random_intercept=True)
267 | self.assertGreaterEqual(good_score, 0.9)
268 | model.coef_["beta"] = np.zeros(4)
269 | model.coef_["random_effects"] = np.zeros((4, 4))
270 | bad_score = model.score(x, y, columns_labels=column_labels,
271 | fit_fixed_intercept=True, fit_random_intercept=True)
272 | self.assertLessEqual(abs(bad_score), 0.1)
273 |
274 |
275 | if __name__ == '__main__':
276 | unittest.main()
277 |
--------------------------------------------------------------------------------
/tests/lme/test_LMEOracle.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from unittest import TestCase
3 |
4 | import numpy as np
5 | from numpy import allclose
6 |
7 | from pysr3.lme.oracles import LinearLMEOracle, LinearLMEOracleSR3
8 | from pysr3.lme.problems import LMEProblem, FIXED, RANDOM, FIXED_RANDOM
9 | from pysr3.lme.problems import random_effects_to_matrix
10 |
11 |
12 | class TestLinearLMEOracle(TestCase):
13 |
14 | def test_gradients(self):
15 | trials = 100
16 | random_seed = 34
17 | r = 1e-6
18 | rtol = 1e-4
19 | atol = 1e-5
20 | problem, _ = LMEProblem.generate(seed=random_seed)
21 | oracle = LinearLMEOracle(problem)
22 | np.random.seed(random_seed)
23 | for j in range(trials):
24 | with self.subTest(j=j):
25 | beta = np.random.rand(problem.num_fixed_features)
26 | gamma = np.random.rand(problem.num_random_features)
27 | db = np.random.rand(problem.num_fixed_features)
28 | gradient_beta = oracle.gradient_beta(beta, gamma)
29 | maybe_dir = gradient_beta.dot(db)
30 | true_dir = (oracle.loss(beta + r * db, gamma)
31 | - oracle.loss(beta - r * db, gamma)
32 | ) / (2 * r)
33 | self.assertTrue(allclose(maybe_dir, true_dir, rtol=rtol, atol=atol),
34 | msg="Gradient beta does not look right")
35 | dg = np.random.rand(problem.num_random_features)
36 | gradient_gamma = oracle.gradient_gamma(beta, gamma)
37 | maybe_dir = gradient_gamma.dot(dg)
38 | true_dir = (oracle.loss(beta, gamma + r * dg)
39 | - oracle.loss(beta, gamma - r * dg)
40 | ) / (2 * r)
41 | self.assertTrue(allclose(maybe_dir, true_dir, rtol=rtol, atol=atol),
42 | msg="Gradient gamma does not look right")
43 |
44 | def test_hessians(self):
45 | trials = 40
46 | random_seed = 34
47 | r = 1e-5
48 | rtol = 1e-4
49 | atol = 1e-4
50 | problem, _ = LMEProblem.generate(groups_sizes=[5, 8, 10],
51 | features_labels=[FIXED_RANDOM, FIXED, RANDOM],
52 | seed=random_seed,
53 | fit_fixed_intercept=True,
54 | fit_random_intercept=True)
55 | oracle = LinearLMEOracle(problem)
56 |
57 | for j in range(trials):
58 | with self.subTest(j=j):
59 | np.random.seed(random_seed + j)
60 |
61 | beta = np.random.rand(problem.num_fixed_features)
62 | gamma = np.random.rand(problem.num_random_features)
63 |
64 | db = np.random.rand(problem.num_fixed_features)
65 | hess = oracle.hessian_beta(beta, gamma)
66 | maybe_dir = hess.dot(db)
67 | true_dir = (oracle.gradient_beta(beta + r * db, gamma)
68 | - oracle.gradient_beta(beta - r * db, gamma)
69 | ) / (2 * r)
70 | self.assertTrue(allclose(maybe_dir, true_dir, rtol=rtol, atol=atol),
71 | msg="Hessian beta does not look right")
72 |
73 | dg = np.random.rand(problem.num_random_features)
74 | hess = oracle.hessian_gamma(beta, gamma)
75 | maybe_dir = hess.dot(dg)
76 | true_dir = (oracle.gradient_gamma(beta, gamma + r * dg)
77 | - oracle.gradient_gamma(beta, gamma - r * dg)
78 | ) / (2 * r)
79 | self.assertTrue(allclose(maybe_dir, true_dir, rtol=rtol, atol=atol),
80 | msg="Hessian gamma does not look right")
81 |
82 | db = np.random.rand(problem.num_fixed_features)
83 | hess = oracle.hessian_beta_gamma(beta, gamma)
84 | maybe_dir = hess.T.dot(db)
85 | true_dir = (oracle.gradient_gamma(beta + r * db, gamma)
86 | - oracle.gradient_gamma(beta - r * db, gamma)
87 | ) / (2 * r)
88 | self.assertTrue(allclose(maybe_dir, true_dir, rtol=rtol, atol=atol),
89 | msg="Hessian gamma-beta does not look right")
90 |
91 | dg = np.random.rand(problem.num_random_features)
92 | hess = oracle.hessian_beta_gamma(beta, gamma)
93 | maybe_dir = hess.dot(dg)
94 | true_dir = (oracle.gradient_beta(beta, gamma + r * dg)
95 | - oracle.gradient_beta(beta, gamma - r * dg)
96 | ) / (2 * r)
97 | self.assertTrue(allclose(maybe_dir, true_dir, rtol=rtol, atol=atol),
98 | msg="Hessian beta-gamma does not look right")
99 |
100 | def test_optimal_gamma_consistency_ip_vs_pgd(self):
101 | trials = 10
102 | rtol = 1e-2
103 | atol = 1e-2
104 | for j in range(trials):
105 | with self.subTest(j=j):
106 | problem, true_parameters = LMEProblem.generate(seed=j + 42,
107 | groups_sizes=[5, 10, 5],
108 | fit_fixed_intercept=True,
109 | fit_random_intercept=True,
110 | features_labels=[FIXED_RANDOM])
111 | oracle = LinearLMEOracle(problem, n_iter_inner=1000)
112 | beta = np.random.rand(problem.num_fixed_features)
113 | gamma = np.random.rand(problem.num_random_features)
114 | optimal_gamma_pgd = oracle.optimal_gamma(beta, gamma, method="pgd", log_progress=False)
115 | # pgd_log = np.array(oracle.logger)
116 | optimal_gamma_ip = oracle.optimal_gamma(beta, gamma, method="ip", log_progress=False)
117 | # ip_log = np.array(oracle.logger)
118 | # from matplotlib import pyplot as plt
119 | # plt.scatter(ip_log[:, 0], ip_log[:, 1], label="ip")
120 | # plt.scatter(pgd_log[:, 0], pgd_log[:, 1], label="pgd")
121 | # plt.legend()
122 | # plt.show()
123 | self.assertTrue(allclose(optimal_gamma_pgd, optimal_gamma_ip, rtol=rtol, atol=atol),
124 | msg="PGD and IP do not match")
125 | loss_pgd = oracle.loss(beta, optimal_gamma_pgd)
126 | loss_ip = oracle.loss(beta, optimal_gamma_ip)
127 | self.assertTrue(allclose(loss_pgd, loss_ip, rtol=rtol, atol=atol))
128 |
129 | def test_no_data_problem(self):
130 | random_seed = 43
131 | problem, true_parameters = LMEProblem.generate(groups_sizes=[10, 10, 10],
132 | features_labels=[],
133 | fit_fixed_intercept=True,
134 | fit_random_intercept=True,
135 | seed=random_seed)
136 | beta = true_parameters['beta']
137 | us = random_effects_to_matrix(true_parameters['random_effects'])
138 | empirical_gamma = np.sum(us ** 2, axis=0) / problem.num_groups
139 | rtol = 1e-1
140 | atol = 1e-1
141 | oracle = LinearLMEOracle(problem)
142 |
143 | maybe_beta = oracle.optimal_beta(empirical_gamma)
144 | maybe_us = oracle.optimal_random_effects(maybe_beta, empirical_gamma)
145 | self.assertTrue(allclose(maybe_beta + maybe_us, beta + us, rtol=rtol, atol=atol),
146 | msg="No-data-problem is not right")
147 | return None
148 |
149 | def test_non_regularized_oracle_is_zero_regularized_oracle(self):
150 | num_fixed_effects = 4
151 | num_random_effects = 3
152 | problem, true_parameters = LMEProblem.generate(groups_sizes=[4, 5, 10],
153 | features_labels=[FIXED_RANDOM,
154 | FIXED_RANDOM,
155 | FIXED,
156 | RANDOM],
157 | fit_fixed_intercept=True,
158 | fit_random_intercept=False,
159 | obs_var=0.1,
160 | seed=42)
161 | # when both regularization coefficients are zero, these two oracles should be exactly equivalent
162 | oracle_non_regularized = LinearLMEOracle(problem)
163 | oracle_regularized = LinearLMEOracleSR3(problem, lg=0, lb=0)
164 | np.random.seed(42)
165 | trials = 100
166 | rtol = 1e-14
167 | atol = 1e-14
168 | for random_beta, random_gamma, random_tbeta, random_tgamma in zip(np.random.rand(trials, num_fixed_effects),
169 | np.random.rand(trials, num_random_effects),
170 | np.random.rand(trials, num_fixed_effects),
171 | np.random.rand(trials, num_random_effects),
172 | ):
173 | loss1 = oracle_regularized.loss(random_beta, random_gamma, random_tbeta, random_tgamma)
174 | loss2 = oracle_non_regularized.loss(random_beta, random_gamma)
175 | self.assertAlmostEqual(loss1, loss2, delta=atol,
176 | msg="Loss of zero-regularized and non-regularized oracles is different")
177 | gradient1 = oracle_regularized.gradient_gamma(random_beta, random_gamma, random_tgamma)
178 | gradient2 = oracle_non_regularized.gradient_gamma(random_beta, random_gamma)
179 | self.assertTrue(allclose(gradient1, gradient2, rtol=rtol, atol=atol),
180 | msg="Gradients w.r.t. gamma of zero-regularized and non-regularized oracles are different")
181 | hessian1 = oracle_regularized.hessian_gamma(random_beta, random_gamma)
182 | hessian2 = oracle_non_regularized.hessian_gamma(random_beta, random_gamma)
183 | self.assertTrue(allclose(hessian1, hessian2, rtol=100 * rtol, atol=100 * atol),
184 | msg="Hessian w.r.t. gamma of zero-regularized and non-regularized oracles are different")
185 | beta1 = oracle_regularized.optimal_beta(random_gamma, tbeta=random_beta)
186 | beta2 = oracle_non_regularized.optimal_beta(random_gamma)
187 | self.assertTrue(allclose(beta1, beta2, rtol=rtol, atol=atol),
188 | msg="Optimal betas of zero-regularized and non-regularized oracles are different")
189 | us1 = oracle_regularized.optimal_random_effects(random_beta, random_gamma)
190 | us2 = oracle_non_regularized.optimal_random_effects(random_beta, random_gamma)
191 | self.assertTrue(allclose(us1, us2, rtol=rtol, atol=atol),
192 | msg="Optimal random effects of zero-regularized and non-regularized oracles is different")
193 | return None
194 |
195 | def test_beta_to_gamma_map(self):
196 | problem, true_parameters = LMEProblem.generate(groups_sizes=[4, 5, 10],
197 | features_labels=[FIXED_RANDOM,
198 | FIXED_RANDOM,
199 | FIXED,
200 | RANDOM,
201 | FIXED_RANDOM,
202 | FIXED,
203 | RANDOM],
204 | fit_fixed_intercept=True,
205 | fit_random_intercept=False,
206 | obs_var=0.1,
207 | seed=42)
208 | oracle = LinearLMEOracle(problem)
209 | true_beta_to_gamma_map = np.array([-1, 0, 1, -1, 3, -1])
210 | for e1, e2 in zip(true_beta_to_gamma_map, oracle.beta_to_gamma_map):
211 | self.assertEqual(e1, e2, msg="Beta-to-gamma mask is not right: \n %s is not \n %s as should be" % (
212 | true_beta_to_gamma_map,
213 | oracle.beta_to_gamma_map
214 | ))
215 |
216 | def test_jones2010n_eff(self):
217 | # This test is based on the fact that
218 | # in case of a random intercept model the n_eff can be represented through intraclass correlation rho.
219 | # See original Jones2010 paper for more details.
220 | for seed in range(10):
221 | problem, true_parameters = LMEProblem.generate(groups_sizes=[40, 30, 50],
222 | features_labels=[],
223 | fit_fixed_intercept=True,
224 | fit_random_intercept=True,
225 | obs_var=0.1,
226 | seed=seed)
227 | oracle = LinearLMEOracle(problem)
228 | gamma = true_parameters['gamma']
229 | rho = gamma / (gamma + 0.1)
230 | oracle._recalculate_cholesky(true_parameters['gamma'])
231 | n_eff = oracle._jones2010n_eff()
232 | self.assertTrue(np.allclose(n_eff, sum([ni / (1 + (ni - 1) * rho) for ni in problem.groups_sizes])))
233 |
234 | def test_hodges2001ddf(self):
235 | # From here:
236 | # https://www.jstor.org/stable/2673485?seq=1
237 | problem, true_parameters = LMEProblem.generate(groups_sizes=[40, 30, 50],
238 | features_labels=[FIXED_RANDOM] * 3,
239 | fit_random_intercept=True,
240 | obs_var=0.1,
241 | seed=42)
242 | oracle = LinearLMEOracle(problem)
243 | true_gamma = true_parameters['gamma']
244 | ddf = oracle._hodges2001ddf(true_gamma)
245 | # #|beta| <= DDoF <= #|beta| + num_groups*#|u|
246 | self.assertTrue(4 <= ddf <= 4 + 4 * 3)
247 |
248 | def test_hat_matrix(self):
249 | for seed in range(10):
250 | problem, true_parameters = LMEProblem.generate(groups_sizes=[40, 30, 50],
251 | features_labels=[FIXED_RANDOM] * 3,
252 | fit_random_intercept=True,
253 | obs_var=0.1,
254 | seed=seed)
255 | oracle = LinearLMEOracle(problem)
256 | gamma = true_parameters['gamma']
257 | optimal_beta = oracle.optimal_beta(gamma)
258 | us = oracle.optimal_random_effects(optimal_beta, gamma)
259 | ys_true = []
260 | ys_optimal_true = []
261 | for (x, y, z, _), u in zip(problem, us):
262 | ys_optimal_true.append(x.dot(optimal_beta) + z.dot(u))
263 | ys_true.append(y)
264 | ys_true = np.concatenate(ys_true)
265 | ys_optimal_true = np.concatenate(ys_optimal_true)
266 | hat_matrix = oracle._hat_matrix(gamma)
267 | ys_optimal_hat = hat_matrix.dot(ys_true)
268 | self.assertTrue(np.allclose(ys_optimal_true, ys_optimal_hat))
269 |
270 | def test_flip_probabilities(self):
271 | problem, true_parameters = LMEProblem.generate(groups_sizes=[40, 30, 50],
272 | features_labels=[FIXED_RANDOM] * 2,
273 | fit_random_intercept=True,
274 | obs_var=0.1)
275 | oracle = LinearLMEOracle(problem)
276 | flip_probabilities = oracle.flip_probabilities_beta(**true_parameters)
277 | self.assertTrue((0 <= flip_probabilities).all() and (flip_probabilities <= 1).all())
278 | oracle.forget()
279 |
280 |
281 | if __name__ == '__main__':
282 | unittest.main()
283 |
--------------------------------------------------------------------------------
/src/pysr3/regularizers.py:
--------------------------------------------------------------------------------
1 | # Various regularizers (L0, LASSO, CAD, SCAD, etc)
2 | # Copyright (C) 2021 Aleksei Sholokhov, aksh@uw.edu
3 | #
4 | # This program is free software: you can redistribute it and/or modify
5 | # it under the terms of the GNU General Public License as published by
6 | # the Free Software Foundation, either version 3 of the License, or
7 | # (at your option) any later version.
8 | #
9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program. If not, see .
16 |
17 | """
18 | Various regularizers (L0, LASSO, CAD, SCAD, etc)
19 | """
20 |
21 | import numpy as np
22 |
23 | from pysr3.lme.oracles import LinearLMEOracle
24 |
25 |
26 | class Regularizer:
27 | """
28 | Template class for regularizers
29 | """
30 |
31 | def instantiate(self, **kwargs):
32 | """
33 | Attaches weights to the regularizer.
34 |
35 | Parameters
36 | ----------
37 | kwargs:
38 | whatever is needed for the regularizer to work
39 |
40 | Returns
41 | -------
42 | None
43 | """
44 | pass
45 |
46 | def forget(self):
47 | """
48 | Unlinks all problem-dependent information from the regularizer.
49 |
50 | Returns
51 | -------
52 | None
53 | """
54 | pass
55 |
56 | def value(self, x) -> float:
57 | """
58 | Returns the value for the regularizer at the point x
59 |
60 | Parameters
61 | ----------
62 | x: ndarray
63 | point
64 |
65 | Returns
66 | -------
67 | the value of the regularizer
68 | """
69 | pass
70 |
71 | def prox(self, x, alpha):
72 | """
73 | Return the value of the proximal operator evaluated at the point x and the step parameter alpha.
74 |
75 | Parameters
76 | ----------
77 | x: ndarray
78 | point.
79 | alpha:
80 | step parameter.
81 |
82 | Returns
83 | -------
84 | result of the application of the proximal operator to x
85 | """
86 | pass
87 |
88 |
89 | def _take_only_k_max(x: np.ndarray, k: int):
90 | """
91 | Returns a vector b which consists of largest k elements of x (at the same places) and zeros everywhere else.
92 |
93 | Parameters
94 | ----------
95 | x : np.ndarray, shape = [n]
96 | Vector from which to take largest k elements.
97 | k : int
98 | How many elements we take from x
99 |
100 | Returns
101 | -------
102 | b : np.ndarray, shape = [n]
103 | A vector which consists of largest k elements of x (at the same places) and zeros everywhere else.
104 | """
105 |
106 | b = np.zeros(len(x))
107 | if k > 0:
108 | idx_k_max = np.abs(x).argsort()[-k:]
109 | b[idx_k_max] = x[idx_k_max]
110 | return b
111 |
112 |
113 | class L0Regularizer(Regularizer):
114 | """
115 | Implements an L0-type regularizer, where the desired number of non-zero coordinates for features is given
116 | """
117 |
118 | def __init__(self,
119 | nnz=None):
120 | """
121 | Create the regularizer.
122 |
123 | Parameters
124 | ----------
125 | nnz: int
126 | desired number of non-zero features
127 | """
128 | self.nnz = nnz
129 | self.weights = None
130 | self.participation_in_selection = None
131 |
132 | def instantiate(self, weights, **kwargs):
133 | """
134 | Attaches weights to the regularizer.
135 |
136 | Parameters
137 | ----------
138 | weights:
139 | regularization weights
140 |
141 | Returns
142 | -------
143 | None
144 |
145 | """
146 | self.weights = weights
147 | self.participation_in_selection = weights.astype(bool)
148 |
149 | def forget(self):
150 | """
151 | Unlinks all problem-dependent information from the regularizer.
152 |
153 | Returns
154 | -------
155 | None
156 | """
157 | self.weights = None
158 |
159 | def prox(self, x, alpha):
160 | """
161 | Return the value of the proximal operator evaluated at the point x and the step parameter alpha.
162 |
163 | Parameters
164 | ----------
165 | x: ndarray
166 | point.
167 | alpha:
168 | step parameter.
169 |
170 | Returns
171 | -------
172 | result of the application of the proximal operator to x
173 | """
174 | if self.nnz is None:
175 | nnz = len(x)
176 | else:
177 | nnz = self.nnz
178 | if self.weights is not None:
179 | result = np.copy(x)
180 |
181 | result[self.participation_in_selection] = _take_only_k_max(
182 | x[self.participation_in_selection],
183 | nnz - sum(
184 | ~self.participation_in_selection))
185 | return result
186 | else:
187 | return _take_only_k_max(x, nnz)
188 |
189 | def value(self, x):
190 | """
191 | Returns the value for the regularizer at the point x
192 |
193 | Parameters
194 | ----------
195 | x: ndarray
196 | point
197 |
198 | Returns
199 | -------
200 | the value of the regularizer
201 | """
202 | k = sum(x != 0)
203 | if k > self.nnz:
204 | return np.infty
205 | return 0
206 |
207 |
208 | class L0RegularizerLME(Regularizer):
209 | """
210 | Implements an L0-type regularizer, where the desired number of non-zero coordinates for
211 | fixed and random effects is given
212 | """
213 |
214 | def __init__(self,
215 | nnz_tbeta=None,
216 | nnz_tgamma=None,
217 | independent_beta_and_gamma=False,
218 | oracle: LinearLMEOracle = None):
219 | """
220 | Create the regularizer.
221 |
222 | Parameters
223 | ----------
224 | nnz_tbeta: int
225 | desired number of non-zero fixed effects
226 | nnz_tgamma: int
227 | desired number of non-zero random effects
228 | independent_beta_and_gamma: bool
229 | If true then we only can set an element of gamma as non-zero when the respective
230 | element of beta is non-zero too.
231 | oracle: LinearLMEOracle
232 | class that encompasses the information about the problem
233 | """
234 | self.nnz_tbeta = nnz_tbeta
235 | self.nnz_tgamma = nnz_tgamma
236 | self.oracle = oracle
237 | self.independent_beta_and_gamma = independent_beta_and_gamma
238 | self.beta_weights = None
239 | self.gamma_weights = None
240 | self.beta_participation_in_selection = None
241 | self.gamma_participation_in_selection = None
242 |
243 | def instantiate(self, weights, **kwargs):
244 | """
245 | Attaches weights to the regularizer.
246 |
247 | Parameters
248 | ----------
249 | weights:
250 | regularization weights
251 |
252 | Returns
253 | -------
254 | None
255 |
256 | """
257 | beta_weights, gamma_weights = self.oracle.x_to_beta_gamma(weights)
258 | self.beta_weights = beta_weights
259 | self.gamma_weights = gamma_weights
260 | self.beta_participation_in_selection = beta_weights.astype(bool)
261 | self.gamma_participation_in_selection = gamma_weights.astype(bool)
262 | if self.nnz_tbeta is None:
263 | self.nnz_tbeta = len(beta_weights)
264 | if self.nnz_tgamma is None:
265 | self.nnz_tgamma = len(gamma_weights)
266 |
267 | def forget(self):
268 | """
269 | Unlinks all problem-dependent information from the regularizer.
270 |
271 | Returns
272 | -------
273 | None
274 | """
275 | self.beta_weights = None
276 | self.gamma_weights = None
277 | self.beta_participation_in_selection = None
278 | self.gamma_participation_in_selection = None
279 |
280 | def optimal_tbeta(self, beta: np.ndarray):
281 | """
282 | Returns tbeta which minimizes the loss function with all other variables fixed.
283 |
284 | It is a projection of beta on the sparse subspace with no more than k elements, which can be constructed by
285 | taking largest k elements from beta and setting the rest to be 0.
286 |
287 | Parameters
288 | ----------
289 | beta : np.ndarray, shape = [n]
290 | Vector of estimates of fixed effects.
291 |
292 | Returns
293 | -------
294 | tbeta : np.ndarray, shape = [n]
295 | Minimizer of the loss function w.r.t tbeta with other arguments fixed.
296 | """
297 | if self.beta_weights is not None:
298 | result = np.copy(beta)
299 |
300 | result[self.beta_participation_in_selection] = _take_only_k_max(
301 | beta[self.beta_participation_in_selection],
302 | self.nnz_tbeta - sum(
303 | ~self.beta_participation_in_selection))
304 | return result
305 | else:
306 | return _take_only_k_max(beta, self.nnz_tbeta)
307 |
308 | def optimal_tgamma(self, tbeta, gamma):
309 | """
310 | Returns tgamma which minimizes the loss function with all other variables fixed.
311 |
312 | It is a projection of gamma on the sparse subspace with no more than nnz_gamma elements,
313 | which can be constructed by taking largest nnz_gamma elements from gamma and setting the rest to be 0.
314 | In addition, it preserves that for all the elements where tbeta = 0 it implies that tgamma = 0 as well.
315 |
316 | Parameters
317 | ----------
318 | tbeta : np.ndarray, shape = [n]
319 | Vector of (nnz_beta)-sparse estimates for fixed parameters.
320 | gamma : np.ndarray, shape = [k]
321 | Vector of covariance estimates of random effects.
322 |
323 | Returns
324 | -------
325 | tgamma : np.ndarray, shape = [k]
326 | Minimizer of the loss function w.r.t tgamma with other arguments fixed.
327 | """
328 | tgamma = np.copy(gamma)
329 | if not self.independent_beta_and_gamma:
330 | idx = tbeta == 0
331 | idx_gamma = self.oracle.beta_to_gamma_map[idx]
332 | idx_gamma = [i for i in (idx_gamma[idx_gamma >= 0]).astype(int) if self.gamma_participation_in_selection[i]]
333 | tgamma[idx_gamma] = 0
334 |
335 | tgamma[self.gamma_participation_in_selection] = _take_only_k_max(
336 | tgamma[self.gamma_participation_in_selection],
337 | self.nnz_tgamma - sum(~self.gamma_participation_in_selection))
338 | return tgamma
339 |
340 | def prox(self, x, alpha):
341 | """
342 | Return the value of the proximal operator evaluated at the point x and the step parameter alpha.
343 |
344 | Parameters
345 | ----------
346 | x: ndarray
347 | point.
348 | alpha:
349 | step parameter.
350 |
351 | Returns
352 | -------
353 | result of the application of the proximal operator to x
354 | """
355 | beta, gamma = self.oracle.x_to_beta_gamma(x)
356 | tbeta = self.optimal_tbeta(beta)
357 | tgamma = self.optimal_tgamma(tbeta, gamma)
358 | return self.oracle.beta_gamma_to_x(tbeta, tgamma)
359 |
360 | def value(self, x):
361 | """
362 | Returns the value for the regularizer at the point x
363 |
364 | Parameters
365 | ----------
366 | x: ndarray
367 | point
368 |
369 | Returns
370 | -------
371 | the value of the regularizer
372 | """
373 | k = sum(x != 0)
374 | if k > self.nnz_tbeta + self.nnz_tgamma:
375 | return np.infty
376 | return 0
377 |
378 |
379 | class L1Regularizer(Regularizer):
380 | """
381 | Implements an L1-regularizer, a.k.a. LASSO.
382 | N.B. Adaptive LASSO is implemented by providing custom weights.
383 | """
384 |
385 | def __init__(self, lam):
386 | """
387 | Creates LASSO regularizer
388 |
389 | Parameters
390 | ----------
391 | lam: float
392 | strength of the regularizer
393 | """
394 | self.lam = lam
395 | self.weights = None
396 |
397 | def instantiate(self, weights, **kwargs):
398 | """
399 | Attach regularization weights
400 |
401 | Parameters
402 | ----------
403 | weights: ndarray
404 | individual weights for the regularizer's coordinates.
405 |
406 | Returns
407 | -------
408 | None
409 | """
410 | self.weights = weights
411 |
412 | def forget(self):
413 | """
414 | Unlinks all problem-dependent information from the regularizer.
415 |
416 | Returns
417 | -------
418 | None
419 | """
420 | self.weights = None
421 |
422 | def value(self, x):
423 | """
424 | Returns the value for the regularizer at the point x
425 |
426 | Parameters
427 | ----------
428 | x: ndarray
429 | point
430 |
431 | Returns
432 | -------
433 | the value of the regularizer
434 | """
435 | if self.weights is not None:
436 | return self.weights.dot(np.abs(x))
437 | return self.lam * np.abs(x).sum()
438 |
439 | def prox(self, x, alpha):
440 | """
441 | Return the value of the proximal operator evaluated at the point x and the step parameter alpha.
442 |
443 | Parameters
444 | ----------
445 | x: ndarray
446 | point.
447 | alpha:
448 | step parameter.
449 |
450 | Returns
451 | -------
452 | result of the application of the proximal operator to x
453 | """
454 | if self.weights is not None:
455 | return (x - alpha * self.weights * self.lam).clip(0, None) \
456 | - (- x - alpha * self.weights * self.lam).clip(0,
457 | None)
458 | return (x - alpha * self.lam).clip(0, None) - (- x - alpha * self.lam).clip(0, None)
459 |
460 |
461 | class CADRegularizer(Regularizer):
462 | """
463 | Implement Clipped Absolute Deviation (CAD) regularizer
464 | """
465 |
466 | def __init__(self, rho, lam):
467 | """
468 | Creates CAD regularizer.
469 |
470 | Parameters
471 | ----------
472 | rho: float
473 | constant that prevents values larger than it from being penalized.
474 | lam: float
475 | strength of the regularizer
476 | """
477 | self.rho = rho
478 | self.lam = lam
479 | self.weights = None
480 |
481 | def instantiate(self, weights=None, **kwargs):
482 | """
483 | Attach regularization weights
484 |
485 | Parameters
486 | ----------
487 | weights: ndarray
488 | individual weights for the regularizer's coordinates.
489 |
490 | Returns
491 | -------
492 | None
493 | """
494 | self.weights = weights
495 |
496 | def forget(self):
497 | """
498 | Unlinks all problem-dependent information from the regularizer.
499 |
500 | Returns
501 | -------
502 | None
503 | """
504 | self.weights = None
505 |
506 | def value(self, x):
507 | """
508 | Returns the value for the regularizer at the point x
509 |
510 | Parameters
511 | ----------
512 | x: ndarray
513 | point
514 |
515 | Returns
516 | -------
517 | the value of the regularizer
518 | """
519 | if self.weights is not None:
520 | return self.lam * np.minimum(self.weights * np.abs(x), self.rho).sum()
521 | return self.lam * np.minimum(np.abs(x), self.rho).sum()
522 |
523 | def prox(self, x, alpha):
524 | """
525 | Return the value of the proximal operator evaluated at the point x and the step parameter alpha.
526 |
527 | Parameters
528 | ----------
529 | x: ndarray
530 | point.
531 | alpha:
532 | step parameter.
533 |
534 | Returns
535 | -------
536 | result of the application of the proximal operator to x
537 | """
538 | x = np.atleast_1d(x)
539 | v = np.copy(x)
540 | idx_small = np.where((np.abs(x) <= self.rho) & (self.weights > 0 if self.weights is not None else True))
541 | if self.weights is not None:
542 | v[idx_small] = (x[idx_small] - self.weights[idx_small] * alpha * self.lam).clip(0, None) - (
543 | - x[idx_small] - self.weights[idx_small] * alpha * self.lam).clip(0,
544 | None)
545 | else:
546 | v[idx_small] = (x[idx_small] - alpha * self.lam).clip(0, None) - (
547 | - x[idx_small] - alpha * self.lam).clip(0, None)
548 | return v
549 |
550 |
551 | class SCADRegularizer(Regularizer):
552 | """
553 | Implements Smoothly Clipped Absolute Deviations (SCAD) regularizer.
554 | """
555 |
556 | def __init__(self, rho, sigma, lam):
557 | """
558 | Creates SCAD regularizer
559 |
560 | Parameters
561 | ----------
562 | rho: float, rho > 1
563 | first knot of the spline
564 | sigma: float, sigma > 1
565 | sigma*rho is the second knot of the spline
566 | lam: float, lambda > 1
567 | strength of the regularizer
568 | """
569 | assert rho > 1
570 | self.rho = rho
571 | self.sigma = sigma
572 | self.lam = lam
573 | self.weights = None
574 |
575 | def instantiate(self, weights=None, **kwargs):
576 | """
577 | Attach regularization weights
578 |
579 | Parameters
580 | ----------
581 | weights: ndarray
582 | individual weights for the regularizer's coordinates.
583 |
584 | Returns
585 | -------
586 | None
587 | """
588 |
589 | self.weights = weights
590 |
591 | def forget(self):
592 | """
593 | Unlinks all problem-dependent information from the regularizer.
594 |
595 | Returns
596 | -------
597 | None
598 | """
599 | self.weights = None
600 |
601 | def value(self, x):
602 | """
603 | Returns the value for the regularizer at the point x
604 |
605 | Parameters
606 | ----------
607 | x: ndarray
608 | point
609 |
610 | Returns
611 | -------
612 | the value of the regularizer
613 | """
614 | total = 0
615 | x = np.atleast_1d(x)
616 | for x_i, w in zip(x, self.weights if self.weights is not None else np.ones(x.shape)):
617 | if abs(x_i) < self.sigma:
618 | total += w * self.sigma * abs(x_i)
619 | elif self.sigma <= abs(x_i) <= self.rho * self.sigma:
620 | total += w * (-x_i ** 2 + 2 * self.rho * self.sigma * abs(x_i) - self.sigma ** 2) / (2 * (self.rho - 1))
621 | else:
622 | total += w * self.sigma ** 2 * (self.rho + 1) / 2
623 | return self.lam * total
624 |
625 | def prox(self, x, alpha):
626 | """
627 | Return the value of the proximal operator evaluated at the point x and the step parameter alpha.
628 |
629 | Parameters
630 | ----------
631 | x: ndarray
632 | point.
633 | alpha:
634 | step parameter.
635 |
636 | Returns
637 | -------
638 | result of the application of the proximal operator to x
639 | """
640 | x = np.atleast_1d(x)
641 | v = np.zeros(x.shape)
642 | for i, w in enumerate(self.weights if self.weights is not None else np.ones(x.shape)):
643 | alpha_eff = alpha * self.lam * w
644 | if w == 0:
645 | v[i] = x[i]
646 | elif abs(x[i]) > max(self.rho, 1 + alpha_eff) * self.sigma:
647 | v[i] = x[i]
648 | elif self.sigma * (1 + alpha_eff) <= abs(x[i]) <= max(self.rho, 1 + alpha_eff) * self.sigma:
649 | v[i] = ((self.rho - 1) * x[i] - np.sign(x[i]) * self.rho * self.sigma * alpha_eff) / (
650 | self.rho - 1 - alpha_eff)
651 | else:
652 | v[i] = (x[i] - self.sigma * alpha_eff).clip(0, None) - (- x[i] - self.sigma * alpha_eff).clip(0, None)
653 | return v
654 |
655 |
656 | class DummyRegularizer(Regularizer):
657 | """
658 | Fake regularizer that has no effect.
659 | """
660 |
661 | def value(self, x):
662 | """
663 | Returns the value for the regularizer at the point x
664 |
665 | Parameters
666 | ----------
667 | x: ndarray
668 | point
669 |
670 | Returns
671 | -------
672 | the value of the regularizer
673 | """
674 | return 0
675 |
676 | def prox(self, x, alpha):
677 | """
678 | Return the value of the proximal operator evaluated at the point x and the step parameter alpha.
679 |
680 | Parameters
681 | ----------
682 | x: ndarray
683 | point.
684 | alpha:
685 | step parameter.
686 |
687 | Returns
688 | -------
689 | result of the application of the proximal operator to x
690 | """
691 | return x
692 |
693 |
694 | class PositiveQuadrantRegularizer(Regularizer):
695 |
696 | def __init__(self, other_regularizer: Regularizer = None):
697 | self.other_regularizer = other_regularizer
698 | self.positive_coordinates = None
699 |
700 | def instantiate(self, weights, oracle=None, **kwargs):
701 | self.positive_coordinates = ([False] * oracle.problem.num_fixed_features +
702 | [True] * oracle.problem.num_random_features)
703 | if self.other_regularizer:
704 | self.other_regularizer.instantiate(weights=weights, **kwargs)
705 |
706 | def value(self, x):
707 | y = np.infty if any(x[self.positive_coordinates] < 0) else 0
708 | if self.other_regularizer:
709 | return y + self.other_regularizer.value(x)
710 | else:
711 | return y
712 |
713 | def prox(self, x, alpha):
714 | y = x.copy()
715 | y[self.positive_coordinates] = np.clip(x[self.positive_coordinates], 0, None)
716 | if self.other_regularizer:
717 | return self.other_regularizer.prox(y, alpha)
718 | else:
719 | return y
720 |
721 |
722 | class ElasticRegularizer(Regularizer):
723 |
724 | def __init__(self, eps=0, other_regularizer: Regularizer = None):
725 | self.other_regularizer = other_regularizer
726 | self.eps = eps
727 |
728 | def instantiate(self, weights, **kwargs):
729 | if self.other_regularizer:
730 | self.other_regularizer.instantiate(weights=weights, **kwargs)
731 |
732 | def value(self, x):
733 | y = self.eps / 2 * np.linalg.norm(x) ** 2
734 | if self.other_regularizer:
735 | return y + self.other_regularizer.value(x)
736 | else:
737 | return y
738 |
739 | def prox(self, x, alpha):
740 | if self.other_regularizer:
741 | return self.other_regularizer.prox(x / (1 + alpha*self.eps), alpha / (1 + alpha*self.eps))
742 | else:
743 | return x / (1 + alpha*self.eps)
744 |
--------------------------------------------------------------------------------
/paper/synthetic_data_4_1/problem_90.csv:
--------------------------------------------------------------------------------
1 | ,group,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,fixed+random,variance,target
2 | 0,0.0,-0.2244866681993267,-0.22752393952995908,-0.5913989309642067,-0.7992276540992244,1.1901095006597047,-1.3658948654777796,0.7209558973473876,-1.3255617524873,1.8569316481033233,1.2471803372874875,-1.3542583644005486,-0.7404734708592728,0.23407542652578145,-1.3622347570763116,0.01387541411235449,1.3095642156178804,1.0820754788466143,0.7819487421178428,-1.2604084386482806,0.3,2.1321111518039486
3 | 1,0.0,1.5597206287423628,0.419246294128639,0.08438582694076066,0.969270028202663,1.4934167749035216,1.6484453301762796,0.900786757739135,-0.1450230810732206,1.2284007159787826,0.5819337202677335,0.060853639037884565,-0.13209662692169502,-1.8245634410519302,0.4149332991528329,1.2163714771052068,-1.1236592914271428,1.8237395679616226,-0.7023460732492226,1.0089522194215859,0.3,-6.678868486869064
4 | 2,0.0,-0.5505488876569176,1.428376922084137,0.5817385938293546,1.4406867641685772,2.0851441107398876,-1.276785380754898,0.7030489137159247,0.13181065693088848,0.14912108491756956,0.06173337344195309,-0.044377442920244835,-0.5456863995058929,-0.7143913212497325,0.23638158838397447,-0.20709683358511174,0.1772850933560696,0.7372495044515823,0.3069635892791023,-0.0008623274393857236,0.3,3.0052439436765823
5 | 3,0.0,-1.170731723710973,-2.0853980733008974,-0.2835974666630017,0.062108036252566105,-0.7348280578481532,-1.3755286618821698,0.9095170868137478,0.14361849952607975,-0.9484937700099666,-0.2982120773712407,-0.34111262957757943,0.1886547790927999,-1.2687955033827998,0.046303461261291694,0.00918547451446067,-0.13598645681380966,0.6997766765418909,-0.04163458761380407,-1.2515487572320647,0.3,-6.932533328282803
6 | 4,0.0,0.5649962038443794,-1.01341966573429,-0.30155674342997557,0.5766815284738619,0.643279266668083,-0.4665966102659386,0.37212936442009314,0.013438865714259039,0.21693267913188832,0.79168412479072,-1.0336667493425835,0.991105082796066,-1.7353605213115408,-1.3647624992570242,1.7886210173751815,0.22029561802429923,0.6442448653393879,0.05013137195413789,0.5974484330156349,0.3,2.838403522506481
7 | 5,0.0,-0.800868158635225,-0.8616090386661326,-0.15555673647062118,-0.18144080647551136,-1.7802349387587317,-0.3749639167242035,0.03799234335683255,-1.3669511457967298,-0.1339257222437861,1.3441658105002983,1.942533815608267,-0.14279198032605536,1.447322273491823,1.6903320460198743,-0.16301713748611285,1.1671450028485435,0.5190016276802424,-1.2459804678675948,-0.953205530093717,0.3,-4.095318653879607
8 | 6,0.0,-0.33827165436918766,-0.31735903798623827,-0.7113735768178557,1.0128002955701916,-1.255725065335872,1.2292798485535157,-0.8913331061619351,-1.4734737767030266,0.6319273869798578,-1.0939224024497243,-1.6514906313273483,-0.05682001092167354,-1.2469878023873158,1.2179679711294935,-0.2534454882949324,0.8780793295146261,-0.3663854821427183,-0.2484268674218272,1.2348765272650484,0.3,2.613642652839045
9 | 7,0.0,-0.8677696855866175,-0.8649712649548833,-0.9021264779345428,3.277735854043973,0.5288572849750095,0.6417328366983951,-1.9451020148955005,-0.3954940504918989,-0.7220661409364774,0.1112526370134621,1.4258084792633385,-0.23710745491210036,-0.43031853848939966,-0.5359354737602308,0.9713974997553474,0.4041889068820203,1.166228801472284,1.1365884910969293,1.2527327736424987,0.3,15.341850621561624
10 | 8,0.0,-1.5162257656987126,-0.47620694214372555,0.7123941763290083,0.4724908696686891,-0.288589734082507,0.952003070761885,-0.6860453228128426,0.9386415785432609,0.07770297594854315,-0.7224784837613516,-0.7259846470022948,1.6999435928994961,0.6494253799745658,1.8389963840404926,0.5495150431384747,1.0426580024010335,0.5024065968595639,2.4947215683829476,0.10716443472979692,0.3,49.77578108873976
11 | 9,0.0,1.5642692650763743,-0.539171993858925,1.514447834090836,0.17607961110248457,-0.03742169238494478,-0.2947328358402955,1.3652833963206066,-0.7635995194062135,-0.1308433964660136,0.7497235604379646,-0.583882845674773,1.243544296444007,-1.9526220956446398,-1.5547837865678327,-0.2061645879778252,-0.0968616433060692,-1.01950567498876,-1.0230452083825747,-1.2340300611957375,0.3,-10.76064687175977
12 | 10,1.0,0.25704744620507164,1.1127451676575806,-0.7921816230544015,0.3253975871930723,-0.2082767255588266,-1.9342609210600927,-0.5588924329910933,0.9996705639432863,-1.618528616018604,2.287046266575617,2.3681388154398513,-1.2614458364997643,-0.8324643455870864,-1.241512578971333,0.22631698534090883,1.5372954605999285,-0.4561145128569585,-0.939110623761003,-0.49655894748872215,0.3,4.184241626552608
13 | 11,1.0,-0.5053247959116823,-0.7194778833775737,0.2987019938438615,-1.2472488488068785,-1.267042391301875,0.41486721389030856,0.1389170991491709,1.0923514351766703,0.9017171244531973,0.5498867642843676,0.23593635329464346,0.48218132423278004,-0.6799540866878336,-1.3299941063289222,-0.4268588497125723,0.6331265905071692,-1.7088700581536764,-0.46138295173920735,1.3757947320310653,0.3,1.1163806381618464
14 | 12,1.0,-1.0960688976032682,1.3229130273465874,-0.6386486669135573,-1.3285979744330632,0.31728228269844716,0.1751891890481305,1.5499691675424028,0.5473329440416272,-1.7716134427884502,-0.13200323339521353,0.45327639079696275,-1.7577283395548553,-0.4801854785558956,-0.31207220578186223,-0.16297193844441818,-0.29253053887198366,-0.7124280344676523,-1.9839088297544512,0.6020578116537944,0.3,-28.890530573060605
15 | 13,1.0,-1.7415232018615825,1.101332979462528,1.1539925277216818,-1.3690822988612168,-1.6127313002174553,0.8329502408334575,-0.3325623063532351,-1.069578931618106,0.8927889684277418,-0.2488761456318799,-1.6403889760953834,-2.062024108281236,-0.5621956987325923,-0.3650932685607454,-0.9730733207179624,0.3037526443089218,0.7941677638821358,-1.2905784255056272,-0.8756435643173559,0.3,-19.798467428809538
16 | 14,1.0,0.538259250123183,0.1286173835515705,1.012158575439957,0.8682827792658456,-0.8112201783989348,0.529280923636759,-0.2135362355956471,-1.4727263838231854,-0.8362708162150879,0.1818904200527307,0.7510299554736201,1.0802004926560727,0.30893546306091096,1.618288334800015,0.08502810630508995,-0.9704888537105852,1.5767542378063597,0.299217581424797,0.4238876124207526,0.3,13.238691740329955
17 | 15,1.0,-0.7401814872567841,-1.5463321054208934,0.0989059567971568,-0.3359654609466631,0.9836042415140316,-0.41844717554838073,-1.5963259980690094,-1.312526343474097,0.343781256440967,-1.1269823443441918,-0.29634892062974266,-0.6531268344053315,-0.6642054880722552,0.21264247235163256,-0.3596229676802594,-0.25121430102668446,2.427284555773872,-0.8212959422180346,0.1658479426354693,0.3,-23.831242786375757
18 | 16,1.0,0.3141677446063289,0.3962386509366521,0.7665592941992769,-1.484768337780162,-0.7488949631880683,0.833904137297132,1.4543370568410434,0.4272660850828291,0.23742665149011624,0.964813285219046,1.72910719466984,1.041328910614811,-0.28968902381550377,1.2508685423007708,1.5950974641277573,-1.1336050332509608,-0.16319398717367026,1.1535922987690024,-0.23550833995891432,0.3,26.623667523344054
19 | 17,1.0,-0.7264007714032545,-1.4481214566975855,1.3675510143893428,0.10713015616921548,0.6638086659542387,-0.6258478888084051,-0.4140722036938379,0.49626014942634306,-1.120870308845793,0.19783203643013908,0.30759435479018976,-0.5753008263162712,1.1938206017342985,1.468441131787554,0.36320032240308786,-1.044913537958718,-1.1587991339041015,-0.20960058927434003,-0.8552331159675324,0.3,-4.774044466723708
20 | 18,1.0,-1.9910402280087491,-1.328115085905879,1.0552737368622298,-0.4774285491346546,0.7725337437221986,2.989015087176148,0.629331429029443,0.5838570847375355,-0.9697568530385351,0.15086813122079112,-1.0450517083246371,-0.9304677671196631,0.6509346306814128,0.9990571069249184,-0.2473285117118233,-1.5662828448677704,-0.2234838066438084,-0.3116611217877492,-0.9457319077244073,0.3,-4.811804226286589
21 | 19,1.0,-2.468976486238379,-1.2918749362480642,-0.3384343040915324,-0.57544255133516,0.692230631317249,0.08084373844058473,1.865951287650184,-0.23312006017671175,0.5890182796759609,-0.7710059505761877,0.7648660650525987,0.13366225325185904,-0.30830542705516584,0.13860052342736578,-0.3006077895393274,0.0286518845534361,-0.020707098778540274,0.22606113671120692,1.2851122590363937,0.3,-2.389078077703908
22 | 20,1.0,-0.7166892099088773,-0.5786639274392535,0.8513433078084659,-0.1492331524147842,1.4664742370693837,0.5747177491129308,-1.5249138600751073,1.6557061559652253,1.2274898606463815,2.114216699930639,0.4105984342056868,-1.2094455566308313,-0.12222173786249033,-0.3252296747323041,-0.012332048875747039,1.075820388808208,-0.8119354349563246,-0.7703834522787498,-0.04300360263511327,0.3,17.32033508410814
23 | 21,1.0,0.25946632243040474,0.04202761754527817,1.4769953795615103,1.4976293928319415,0.35039674045274666,-1.8509622321583903,-0.38328132303661616,-0.48074477997923304,0.6385216743901344,1.3194031690451278,0.07596729267939853,0.21692785296711492,-2.1697283413267,-1.3494596298074157,-0.5866006760153842,0.7467623911668098,-0.5660377019619993,0.5897018419961683,-0.4031557931651266,0.3,1.193615801228944
24 | 22,1.0,-0.3107313055675877,1.0407708843333197,0.3260970156354438,-0.35071193373569465,-0.45725633722770054,-0.06388703371122254,0.3547437444116614,2.1080898389431355,-0.2919801171071986,-0.0289753509614359,0.5750904118229366,-0.7448380441674244,-1.7896797950698144,1.439577791283145,1.8153776419478536,1.209938937636575,-1.354406509169351,0.4254786504461752,0.5441112326778246,0.3,33.137493720707354
25 | 23,1.0,-1.1812048763729304,0.4720250675374311,-1.2693727695921146,1.3806701330764108,1.1340116877776332,0.8888957592485491,0.2677799108768402,-1.7670014237797713,2.677255821182534,0.7486404217577289,-0.38030166281595457,0.036374911091457525,-0.8282672464964551,0.9134551043851957,-2.3078996211289926,0.463991311311547,-1.3452851489449063,-0.1587919058329169,-0.1700048620562423,0.3,19.580594383452038
26 | 24,1.0,0.008398088871171112,2.508723486104378,-1.3060311073277906,-1.489495567052419,3.814797659819095,-0.5601523786440298,0.3066099298659933,-0.33072638963896683,-0.22410692223861975,1.3120174755204101,-3.1330077318244736,0.7937644071905875,-0.002350002919974134,-1.0224811841494879,-0.30420798147872463,-0.0819586670173242,0.09904151821731265,0.5662856595324889,-1.5461241181381435,0.3,8.272635094260105
27 | 25,2.0,1.3263276633901901,0.5483371183891796,0.16812890598009553,0.8717214522669134,-0.8520301547884715,1.0292508098818611,-0.5902496080624249,-0.014198319341497104,-1.8376121538118613,0.4089080166673226,-0.8615810438424262,-1.5554421969836179,0.25937930822523075,-1.010701701380683,0.5779119509702065,-0.6555331859590128,2.2876374718858346,0.5037311767195386,0.4506564527671961,0.3,-4.902565296797794
28 | 26,2.0,-1.249916112051954,-0.23785176674317274,-2.3925428196950076,0.3822604786558177,-1.0432546782983414,-1.6099766955092778,2.107432029324337,-0.7504649941285705,0.7665421859525311,-0.20963066145726167,-1.8449657275694233,0.3604086464098424,0.7394023712027864,-0.37892461152059614,-0.6656184751736267,0.7420928409939934,0.64649437241238,-2.689177654254134,0.3151185614663755,0.3,-28.03779948701791
29 | 27,2.0,-0.5160095667309368,0.8013223463379382,0.45452134105948666,-0.11154588137917014,-0.3791421018204273,-0.4597018378127177,-2.5743193339317716,1.0699443510203255,-0.2231406490425237,-0.7181806348527214,1.1597186116054168,-1.600723300657653,0.828567960986994,1.1270467257843326,-0.4316648371525521,-0.36408088705478303,0.49550145511894844,0.33402510588837936,-0.8649775924578073,0.3,-8.460559210076953
30 | 28,2.0,-0.8099993005350782,0.41274682951166175,-0.31065150819789733,-0.1441644124199604,-1.0286238184721483,0.7494639432524223,0.0840612690643611,1.2047024699609303,1.0358962477062872,1.8931989602503054,-1.6382281496880426,-0.5481330925314356,1.0407574768166634,1.4143627555363494,-0.9587787095225043,-0.5939657753286195,-0.44512924453370156,0.8082773624221609,0.7184400940568124,0.3,39.465121564920324
31 | 29,3.0,0.2500312576076877,1.9882899127988514,-0.3492342510890661,-0.012466515236356408,-0.5699466003154572,-1.3538642011797042,0.6078802060770054,0.26516885587410177,0.774422632186062,0.9634615905978853,-2.026898741023672,0.6681779536700524,1.0558676357042103,-0.5440577662779537,-0.4014190165122289,2.1091714521166236,0.18544165561582251,0.09843847016960951,0.7335075458152306,0.3,28.365513450110978
32 | 30,3.0,0.4561948493307074,0.9947219021153157,-0.38787080367730076,0.28173858773146143,-0.3030809514518942,-0.7024838745453851,-0.7635709764407437,0.13677618189912805,-0.22263653471732772,2.381829645858025,-0.33228703523735165,-0.5672415708352935,1.1034952108021923,1.7456401941366986,-1.6105576287156191,0.7000755245151421,-0.26067232368892584,-0.00512369493008181,-0.5425663540648475,0.3,38.728714105714616
33 | 31,3.0,1.4322947738174614,1.895949951432936,3.3665633295303956,0.14654412131033528,0.07837633173314554,0.05693848233113363,0.27930369144194717,0.031321149989744945,-0.040351026008271486,0.1095209859368495,-0.6075322946421503,-0.562641458254659,1.792064622882827,0.8246523684429106,-0.609681228987642,0.34503975394910186,-0.1302059034667614,0.19771297456750755,0.5129035573300385,0.3,15.632692617980098
34 | 32,3.0,-0.03426326800939846,-0.5314358683007532,-0.16888917488087218,-0.1829977188579266,-0.5100042686003975,0.5695217897787942,0.6831368842858903,-1.0096375784410474,1.6707454946555629,-0.6614326034939323,0.06628431978386971,1.1315733047038048,-0.64486001057315,1.0763501553333839,-0.7471956325265975,1.3730449259016524,-0.6025204776569497,1.2578651622329413,-2.085136949486377,0.3,20.62083184701268
35 | 33,3.0,1.8939155359559976,0.5518888177533614,-1.1746898305146116,0.5290623307218517,-1.645222123423259,1.1572881948210894,-2.1047333107620463,0.5495623113980533,0.4561569147070618,-0.031861330937840285,0.8341262098027968,-0.288814886636109,0.4379428377343563,0.17590126114282947,-2.669047669449244,2.367320994751389,0.4271893097947252,0.7593543717153868,1.660325702304021,0.3,40.014064132248905
36 | 34,3.0,1.7779346870395423,0.441528769440468,0.14239123771009915,-0.7936758449937273,-0.6956932472504288,-1.8732531480808845,-0.89402240402269,-0.25102217302222046,-0.48982126590661235,-0.158468658001881,0.42071393915702743,-0.25998808095271586,1.0865781614523886,1.3938961119833377,-0.708650281550852,0.9486510802456547,2.173043653072448,0.4939825650478497,-1.3198613729520745,0.3,11.126476560742606
37 | 35,3.0,0.12218027929538877,0.5916122615859635,-0.9099308226593869,-0.6973987219742181,0.44612293883755505,1.2408562243409993,-1.2845878946243163,0.3991147107668119,1.7009056554301867,0.09380108188549527,0.5270461428610641,0.6076674262850609,0.30916332474353075,-0.9304693064208932,-0.19415054830391368,0.3334490424736431,0.5295034480997639,0.09907299267633109,-0.0555725580731796,0.3,4.182655424546208
38 | 36,3.0,1.1263054133343513,0.3302836177446436,2.1479115341896997,-0.5924024455666762,1.1512825206330206,-0.9311553506099102,-0.7359038113379008,-0.8719794437467372,1.1487176295599222,0.37084874986287564,-1.1355088923131254,0.19617266781095885,1.0298244230871691,-0.5215234841312226,1.3020438747851835,-1.3060361320234743,-0.8368635294696872,-0.546145295793722,-0.14271233021256777,0.3,-25.76541484482733
39 | 37,4.0,-1.5724730714593051,-0.6409511288680404,0.4374372385742216,0.29427083325825404,0.4545725947202606,-0.18363050747459458,0.7974427134203903,-0.6157173582716559,-2.5513373981949123,-0.5455045398834357,1.3859539183781915,-0.7403128501392187,-0.4742948264436197,1.0453030014157374,0.5488663531197803,-0.8590754158337854,0.998023703147144,0.7145269729300804,1.22255067915234,0.3,-6.36155269642005
40 | 38,4.0,1.910605177953172,1.0080652205414087,-0.788104332345124,-0.626841454741378,1.0546301390203254,0.48800310856073426,0.654453172322898,2.1352925779532548,-0.10069847749400035,1.0462838212562842,0.9178690287715038,0.3775525072867151,-0.859837765259325,0.948536146119574,1.2835781078129518,-1.0711762294357619,-1.6411154273097701,-0.7973706433168535,-0.4252332452505015,0.3,10.222060138890367
41 | 39,4.0,1.4306395880549652,-0.4154428271677703,0.274855247900105,1.9555211637886727,-0.025919539577754572,2.187009375231521,1.856535105613084,-0.9318309930199622,0.2894018507274458,-0.6026095954790165,-1.2130845797338685,1.2255279713437894,-1.608235097266588,0.3209047995611994,0.7618523003845404,0.8900184084974753,-2.0759575014963696,0.5264491205287875,0.4371550398629969,0.3,15.860625757865987
42 | 40,5.0,1.0911972445231644,1.2927545135899752,1.368887311585816,-0.09191648775816327,-0.14995661087677292,-0.8162217652798667,0.3761663085433162,2.618984625640187,0.2541428681071078,1.0412936270551103,-0.5005897896771014,-0.30473738197747563,-0.40856174503226766,-1.305909037989187,0.022176330427500703,0.17068945918320327,1.4293648151035308,2.4075666382150236,0.8141642214085875,0.3,32.31629302837598
43 | 41,5.0,0.1463284474651884,-0.5105647720825695,0.44138435190608644,-0.42393758266351084,-0.3962746451122455,-1.1518882579031113,-2.0489637405400525,2.835445681614868,1.2504975938135239,0.13547542236227242,0.10563300340949965,0.372151346183034,0.4207473426340737,-0.8976529206318832,0.6434607385938469,0.7620153929315264,-0.29920075332085355,1.0373858654577197,0.7696505435167661,0.3,14.188736305345557
44 | 42,5.0,-0.33711028841933904,0.09369231598112872,0.48278435296627825,-0.07079058145621267,0.7819493299553844,-0.22372706781163015,-1.3674673103544694,-0.6958459079265052,0.9467922000072062,0.5354375184548659,-0.32620304400144473,1.7836210554379934,1.14267330152584,-0.754643319431238,0.6748536358304776,-1.5397907440111784,0.30690156366420207,0.1976541781681481,-0.543726708723864,0.3,-4.467308273777878
45 | 43,5.0,0.4808899997428752,-1.1713907853963523,-0.007986162438611983,1.7370422156956609,1.1290159126992723,-1.599027578518095,-1.081276412877631,2.5917302490433816,0.18815622138419322,1.2396250420680457,-0.13573195959621773,-0.707607735542635,-0.5236068897601127,2.16951785245597,-0.2556541849537443,0.5525108633328203,-0.5331951944953579,0.3349474505228615,-1.1678759101983176,0.3,19.452197164765348
46 | 44,5.0,1.5497261522500392,-0.8387080413744996,0.8988970311063798,0.4078923671132976,-1.104651909365117,-1.2158314370142254,1.1120521477392087,0.020840826069611965,1.838338668052657,1.367795331538051,2.358253283023413,-0.6929251132150605,1.5972577748066057,0.12659664387180478,0.6880247315181186,-0.5531450022165406,-1.567811310361761,-0.33924162865835444,-0.8614648267336321,0.3,-10.828901121678847
47 | 45,6.0,-2.1360228338340206,0.33403424875950016,0.3513132352394641,-0.1245261111595957,1.498766797630689,1.3107556333325647,0.5750602494880356,-1.5763378188814265,0.020537404933153085,1.8296378719351625,0.45506657871946476,0.5907533184536697,-1.289237629921717,2.205085099322303,-0.8637530312217283,0.9315328750541942,-0.05037910967664721,-1.0815057533038623,2.130886500374426,0.3,27.11514569166623
48 | 46,6.0,-0.3847235125044858,0.14444355688349708,0.21366282675281,-0.42660688725572354,0.2651612318302846,-0.17425392291417954,-1.1134260755175664,-0.8568697717489484,0.6017885045015057,-1.3387105017731067,0.7825178403779215,1.9041175126917342,-0.7602497324836589,1.6987180296158926,1.5851884304984585,-0.9011592852171273,0.2544791288446418,-2.8496281935772036,1.203409573079691,0.3,-14.706534194239298
49 | 47,6.0,0.644005045223983,-1.4096372035503415,0.7695453095571158,-1.0708597119038095,-1.662382854654681,-1.9792890745074936,1.4310054671509493,-1.3614328349754987,-0.42169022773928594,0.9859330832138322,0.09276323384434841,1.528138908014638,1.0231951817210665,0.5389815758349855,0.007246634633935868,0.7129953187060203,0.46776696769727366,0.17565360356738652,0.38880725063999755,0.3,8.817365732165715
50 | 48,6.0,1.3044105987167611,-1.3562772438978055,-2.1223236897586117,0.29396012555766143,-0.7607256200100277,1.490901961579517,-0.08897300849800145,0.5779754308317309,2.525460384995302,0.9117693455296231,0.9629230318381274,-0.5366459007182826,-0.4020263710988043,0.19397176036231176,0.7685290962149435,-1.383830324184141,-0.13654341797471614,-1.6781203514009664,-1.5625002778972663,0.3,-15.903424419230982
51 | 49,6.0,0.3258840060538605,1.0649482438981221,-1.4506645983753366,-0.7302407423018472,-0.4209934464172174,1.2999662548074875,1.6009600780035824,-1.2078784639056674,-0.678540800879476,-1.1471085321500027,-0.5226279946500502,-0.35051571625052824,-1.0751087401941386,0.7698920833279113,-0.017266908677497278,-0.6808489048204371,-0.5990487877218934,1.600035145915842,0.17390205140006984,0.3,4.332488167790178
52 | 50,6.0,-0.7767391565800194,-0.7685556037894881,0.0322857125755935,-0.011300575058198993,0.5424100480448364,0.4137593868259467,-0.8969790716199076,1.2245112776887481,-0.4328000002840578,0.7771969412973068,-1.096897756121186,1.7364832731080249,-0.5353602294077046,-0.4446569953704932,-1.3083496287151721,-1.2834298601136886,-0.5532694071001789,0.4060327775019225,-0.8994741458330577,0.3,11.876172963468427
53 | 51,6.0,-0.32612862773987666,-1.12181178735017,-0.5953065733337085,0.7976858455010328,0.33347943151027365,-0.01840423502877494,-0.18561949985080622,-1.821544647858444,-0.015622400542305565,1.7257944457246257,0.8246609727756451,-0.8670569071911065,0.05854179762234863,2.2921931453194055,-0.6928761536192614,2.33804594132147,-0.6641945885882768,-0.11527364193745424,-1.5956179110044046,0.3,26.25231243172783
54 | 52,6.0,0.3690018294247442,0.40846712388791,1.0072690806620392,0.35974190651335763,1.6157723900968168,-1.731542079768297,0.44734177256043894,1.222506633225322,0.40317515826964634,0.32929488860489936,0.7262551250223797,-0.43125997432348134,0.06318162808789493,0.31457936875929604,-1.1414820984967846,-0.7039950837952071,-0.6410865854565188,0.6581367840992023,-0.5327890573244032,0.3,-3.0779623647690912
55 | 53,6.0,1.9310972189013216,1.9504060564997063,0.6044423937981037,-0.3117220041240606,2.7966401756011754,0.8093622382928843,-0.5373419746254818,1.117429277076799,-0.9471917479363178,-0.38369629396148897,-0.10821444862510363,-1.1193044149921605,-0.31955162924074926,-0.031139265919216754,-0.052889193453770404,0.7110976145086788,-2.279717141347936,1.6797853831023275,0.5115615025244667,0.3,17.574580561338994
56 | 54,6.0,-2.4577212172705267,0.97459983685503,-0.6269424586412868,2.413485669166955,-1.1373294654240305,0.41534626077539494,-0.8557187674673804,0.24787838787840344,0.5902430868253131,0.9676261231248107,0.13851920066257706,0.23629973389617198,0.8629069944065353,-0.12589238321360738,-1.0644317916210861,-1.5395741213115173,1.1880664051906553,1.6776478899192924,-1.7572135247612781,0.3,8.701831232358156
57 | 55,6.0,-0.6526842044497884,-1.7332893371042801,1.189284474788592,-0.9240544101497922,-1.0527466272799646,0.7642069221514296,0.303229097175603,0.25519181373170485,-0.1819200760063786,-0.2254035423649898,-0.4303460895881007,-0.29735124276065766,0.18081023581979927,1.3926820675728222,-0.506508783573557,-1.0172225824799352,-1.0218869888749817,0.8212641652129304,-0.4928266767064192,0.3,1.211757656217886
58 | 56,6.0,0.12482057479243489,-2.106352478886781,-1.4776563006800114,1.6824686011638286,-0.43577057256864316,0.09204859264941058,0.9606057906334398,0.2946461192341341,-0.5752616718028564,0.02748800529075304,-0.8808896220568726,0.1065159252394585,0.7897702980011676,-0.1317469167480569,0.5213071980706361,0.6755267415373684,-0.9999672171571113,1.080351115378003,-0.25123094454395906,0.3,12.316285276318544
59 | 57,6.0,1.4207315613239788,-2.0638860389900673,1.2235565478668855,-1.558474037636106,1.7559104855368957,-0.8617530756379504,-0.43121134313640425,-0.743514368698659,1.4108080573195971,1.211673917925988,-1.142061628793611,1.7416595784600082,0.5584577784922123,0.02844933486935366,-0.8496782965811163,0.8537122248777599,0.33302907070697146,1.635351028048647,-0.8807569040079136,0.3,27.034995316880966
60 | 58,6.0,0.04805219538952717,-0.3268335685503063,0.8644034222785685,0.6456720884796178,-0.5453679308040373,0.6103744928310331,1.1058760343976566,-0.8374142627061846,-0.3420670116554816,0.893954808572103,-0.4418719888673857,-0.3673534468786584,0.28767818352673546,0.46811358334295866,-1.5692012847836843,0.8130980206318503,0.1571508915183928,-1.203863955907753,0.700418355909946,0.3,1.951979981693818
61 | 59,6.0,-0.28143943262431637,-0.34264052761657066,0.7555065758650248,0.728945940763142,-1.4864923173885531,1.6560695699998202,0.42032046655043176,-0.45909073257302835,-0.309658128476906,-1.2047589593314443,-1.3275233601311367,-1.324822038295371,0.8041213861226736,-1.8468093498676392,0.8635905955876891,-0.376216367598314,0.1292563308705244,0.8727109281967239,0.3609307753246037,0.3,-18.06168263469628
62 | 60,6.0,-1.2777592277868646,0.14712882713613842,0.6485322125659319,1.1486825055278258,0.6051461765824917,0.6659993282179921,1.3910595335905622,-0.312703934433372,-1.6425348718565798,0.8765787516468784,0.8753431716199411,-0.7866938424108566,-1.0056673158658942,-0.11658266018988862,0.3575935493643408,0.5990567452270522,-0.3260823989816051,-0.2202339172443418,-1.0389071415803166,0.3,4.309121600207897
63 | 61,6.0,-1.9517769177051687,-1.5938781927531132,-1.2169282329257547,-2.8233064262462966,-0.29622329010711235,0.21205456888646396,-0.24868897634784368,-0.539141020615397,-1.096533105742008,0.9313985033749356,0.23561323451949737,-0.31552971022367343,0.8167349458357017,0.9322855722958902,-0.7507434649291804,-0.4035028714422291,0.24061206432242815,1.153231922131976,-0.46689253374743567,0.3,7.359093850172344
64 | 62,6.0,0.5161677765260436,-0.30201100161262734,0.3173304115739042,-0.10161039890652211,-0.23759901079980617,-0.11804973485647666,1.3090452546992906,-0.8889178729427898,0.01826889433107548,-0.4431693167142069,-0.825505214673138,-0.9189279127216265,-0.3432742156362213,-0.16993290832784955,-1.079921181222734,-2.2909184087712853,1.0637003561123066,-2.414677685481196,0.8582548983677535,0.3,-50.76421433580461
65 | 63,7.0,-0.5488430295858576,0.5407440261429346,1.62601566488994,1.8210157861174565,0.4607005428164519,-1.0490331655693688,-1.4774560239288752,-0.22136277650883912,-1.3715524892474542,-0.6620844925645676,-0.22267526344076288,-1.1004228184833917,0.8663458938564048,1.331306206881577,0.6322514115884523,-1.3671276844270612,-1.0960757211069458,0.6134377657043416,-0.2842780520563757,0.3,-9.138895992809756
66 | 64,7.0,0.9632088225153572,1.2764604846367205,-0.17172958237457164,-1.2346194683748923,1.9717741329696141,-1.08257607782286,1.3849766328254427,-0.5527869074232855,-0.5190140250132845,1.0361117008377754,0.648019313425669,1.6135989959315724,-0.7198478838836735,0.41438997122505,0.17882901776443133,1.7609189470915647,-2.758976516199775,0.7098898566440575,-1.1486042188043235,0.3,24.63513765258931
67 | 65,7.0,0.8572080050395556,1.1156873515281251,1.148908193037563,1.052439989894458,0.6129918202355035,0.07312937924197613,1.0697510924151346,1.9370855162650693,-1.179833863500429,0.04552307751792673,0.5436638591415489,-0.09699325945193386,-1.0437814340378018,-1.4747361008805782,0.6762938733332373,-0.002201964127703294,0.302880840231823,0.6190850221991457,0.7980304474421607,0.3,13.286772717579355
68 | 66,7.0,-0.9630383724558821,-2.206359497023054,-1.4266809197795844,-0.3477222132302199,-0.3390557566607199,0.761417576803639,0.23129580929482482,-0.17300953314439413,1.1053783204572785,0.46657061037303615,-0.9496025993847973,-0.1728347484983485,0.2596888183370722,-0.2526006391873094,0.889393045871513,1.1946831990607607,-1.9541233202054695,1.0115632166669435,0.766667852315038,0.3,15.779847814125437
69 | 67,7.0,0.26485515172243357,0.9699425631333096,-1.4586163350728112,1.378166417383271,0.6089829900366123,-1.3707601621288688,-0.6221303526638593,0.13283145075030675,2.0536710645126233,0.842355809583988,-0.011666304134421265,-1.3747433953814345,0.31014590301603806,0.2853295705052163,-1.012740417924718,1.88687566830239,1.2466006785032484,-0.34191293116508786,-1.7074598077909857,0.3,8.25129050526271
70 | 68,7.0,1.4512842566071438,-1.5847855818412273,-2.1235108712523183,-1.7893701260169474,-0.051534481941348916,-0.27559173600634884,-1.5117496055145592,-0.1435396160926693,-0.7101907715701367,0.27359652454068023,-0.45596633231639927,-1.7141963934436288,0.34788338769362454,0.006334988182185717,0.2318793883870595,0.9322738247061396,-0.6495381927446522,-0.09115552037232433,-0.48488909900375304,0.3,-7.450575582861295
71 | 69,7.0,-0.7963275246173567,1.0475006850847093,-0.7030986161221805,0.46478522539971395,-0.1364632622008057,-0.0816948146502653,-0.3985491518684708,-0.8131541270092641,-0.9609178410079326,1.642214048253701,1.374102887016856,-0.223533672331042,-0.5984255564400937,-0.10611918096417727,-0.6648319490434031,0.37051412876442635,-1.9204211166778575,2.161115930522335,0.014749323000708555,0.3,15.79122040941386
72 | 70,7.0,-0.21992155943077218,0.7941842476274679,-0.1396850416594722,0.049935675636799896,0.4086106772853094,-0.6934157485784403,-0.9940589628717789,-0.35715266861940725,-0.4318731695565698,0.7962985101890033,1.247150497499835,0.5938712424651954,0.10248588405667833,-0.2690795155902734,-0.13542393254130414,0.030509409058623982,-0.40919156809899565,-0.1803677662347727,-0.4321201587715146,0.3,-3.2543274750348585
73 | 71,7.0,0.25002219470515574,-0.23828972882179375,-1.1125799084305432,-0.8328706249699608,0.10604064550430531,-0.6839484488326125,0.20855836481141582,-0.4059537236743924,1.22473830385828,-1.2201345547113849,-0.7553933883251247,-1.8821827181982422,0.5596978334077415,1.4073761335094386,-0.15271306530753254,0.780372727822269,0.6579354695170798,-2.0533019650044726,1.5957370698210225,0.3,-16.960839180074323
74 | 72,8.0,0.12095226047314366,0.0580943244532565,0.6100403154088369,1.5506092440988297,0.20532247530863676,-0.02450567060555754,0.3857102941806678,0.5983924594878389,-1.1384423771615704,-0.028022620087935265,-0.2687069781487458,0.0481629168628262,-1.0733930112931287,1.134763249949494,-0.16331452236778599,1.2634151120829318,-0.9799885011240256,0.6401384638553066,1.0367766154590485,0.3,35.58143205785365
75 | 73,8.0,0.6301249203914255,2.0797152775446652,0.8103935045359427,0.6708559392832171,-0.7324545889197448,0.8800992110783451,-0.5163630595857417,2.3685957999539746,1.0765661218145866,1.7909175231393335,0.40977112815541555,0.32410619866810086,0.6326544883087448,1.5032559701318056,0.21798929318298865,2.6884242401942378,-2.05547693783521,1.1063061827714533,-0.23891014019512746,0.3,81.32425234900268
76 | 74,8.0,0.6580284143078705,-2.6593876067332656,0.6065406505986044,0.4887485488654352,-1.8943832295825016,-0.3171659708037408,0.6947124708578345,-1.4209535577091856,0.8199056844394174,-1.8699191480289121,0.3593965365046562,-1.7608163268917971,-0.24748297968621574,-2.48266247055282,0.7633691285413844,0.43251347440572574,0.3488321237589135,-0.7021841072867839,-1.9241595035794035,0.3,-49.21864926510675
77 | 75,8.0,0.5470216550229823,0.8007667376113594,-1.1805469473415717,-1.0718815261737125,1.6748068740322128,0.19307534812904079,0.48221875310032225,-1.642270626336212,-1.632307868921503,1.4147213507412537,-0.6630589973708769,-0.08796778884232444,0.6733588275291714,0.9320257526150199,-1.4041422515987483,0.8335864986775715,0.9638351908240477,1.2916792939690684,0.05695100799611364,0.3,26.040148278211902
78 | 76,8.0,-0.7216072303830088,1.2195217156569544,-0.22133773092531928,-2.0747779114994107,-0.4534491688805634,0.11623004072297016,-0.798643083925196,-0.33895055736847296,0.4113635951212493,0.7939674038462254,-0.7070523221226606,-1.18747576163982,-0.6734345870048707,-0.42217780053227105,-1.5934799652606917,-0.22529764512860062,-0.6396991418839693,-0.004565638113517411,-0.25047203570343923,0.3,-15.116136424258139
79 | 77,8.0,-0.46930461933048284,0.05625833174550483,-0.02606118287517814,1.0190430270389592,0.1329894905327601,0.45428102202678344,-0.7300778654616489,-1.6272616401879998,0.08297868356853112,0.03338087269997062,1.6483714863599717,-0.9871262700867279,0.12212600376137651,-0.28936373128418336,-0.9347244744034329,-1.247113632039571,-0.995947196393444,0.6621104889348459,-0.8416236406590963,0.3,-16.161346964942908
80 |
--------------------------------------------------------------------------------