├── .github └── workflows │ ├── autocheck.yml │ ├── autopytest.yml │ ├── autopytest_legacy.yml │ └── update_timestamp.yml ├── .gitignore ├── CITATION.cff ├── LICENSE ├── README.md ├── docs ├── coverage-badge.svg ├── dev │ └── requirements.txt ├── dev_docs.md ├── img │ ├── bins.png │ ├── loreplot.png │ ├── loreplot_confounder.png │ ├── loreplot_custom_color.png │ ├── loreplot_custom_markers.png │ ├── loreplot_jitter.png │ ├── loreplot_no_dots.png │ ├── loreplot_other_clf.png │ ├── loreplot_subplot.png │ ├── percentiles.png │ ├── threshold.png │ ├── uncertainty_confounder.png │ ├── uncertainty_custom_classifier.png │ ├── uncertainty_custom_color.png │ ├── uncertainty_default.png │ └── uncertainty_jackknife.png ├── lorepy_github_header.png └── lorepy_vs_bar_plots.md ├── example.py ├── example_uncertainty.py ├── pytest.ini ├── setup.py ├── src └── lorepy │ ├── __init__.py │ ├── lorepy.py │ └── uncertainty.py ├── tests ├── __init__.py ├── test_plot.py └── test_uncertainty.py └── timestamp /.github/workflows/autocheck.yml: -------------------------------------------------------------------------------- 1 | # GitHub Action that uses Ruff and Black to check the code 2 | 3 | name: Run Checks 4 | on: [ push, pull_request ] 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | with: # https://github.com/stefanzweifel/git-auto-commit-action#checkout-the-correct-branch 11 | ref: ${{ github.head_ref }} 12 | - uses: chartboost/ruff-action@v1 13 | - name: Set up Python 3.10 14 | uses: actions/setup-python@v5 15 | with: 16 | python-version: "3.10" 17 | architecture: 'x64' 18 | - run: pip install black 19 | - run: black --check ./ 20 | - name: If needed, commit black changes to a new pull request 21 | if: failure() 22 | run: | 23 | black ./ 24 | git config --global user.name autoblack_push 25 | git config --global user.email '${GITHUB_ACTOR}@users.noreply.github.com' 26 | git commit -am "fixup! Format Python code with psf/black push" 27 | git push -------------------------------------------------------------------------------- /.github/workflows/autopytest.yml: -------------------------------------------------------------------------------- 1 | # GitHub Action that runs pytest 2 | 3 | name: Run Pytest 4 | on: 5 | workflow_dispatch: 6 | push: 7 | schedule: 8 | # execute once a week on monday 9 | - cron: '0 1 * * 1' 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | python-version: ["3.9", "3.10", "3.11", "3.12"] 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: # https://github.com/stefanzweifel/git-auto-commit-action#checkout-the-correct-branch 21 | ref: ${{ github.head_ref }} 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v5 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | architecture: 'x64' 27 | - run: | 28 | pip install . 29 | pip install pytest 30 | pip install pytest-cov 31 | - name: Run tests 32 | run: | 33 | pytest --exitfirst --verbose --failed-first --cov=src tests/ --cov-report=term-missing --cov-report=xml 34 | - name: Generate Coverage Badge 35 | run: | 36 | pip install setuptools 37 | pip install genbadge[coverage] 38 | genbadge coverage -i coverage.xml -o docs/coverage-badge.svg 39 | - run: git diff --exit-code ./docs/coverage-badge.svg 40 | - name: Update Coverage Badge if needed 41 | if: failure() 42 | run: | 43 | git config --local user.email '${GITHUB_ACTOR}@users.noreply.github.com' 44 | git config --local user.name "test-webservices[bot]" 45 | git add ./docs/coverage-badge.svg 46 | git commit -m "Update cover badge" 47 | git push 48 | 49 | -------------------------------------------------------------------------------- /.github/workflows/autopytest_legacy.yml: -------------------------------------------------------------------------------- 1 | # GitHub Action that runs pytest with the oldest packages specified in setup.py 2 | 3 | name: Run Legacy Pytest 4 | on: 5 | workflow_dispatch: 6 | push: 7 | schedule: 8 | # execute once a week on monday 9 | - cron: '30 0 * * 1' 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v4 16 | with: # https://github.com/stefanzweifel/git-auto-commit-action#checkout-the-correct-branch 17 | ref: ${{ github.head_ref }} 18 | - name: Set up Python 3.8 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: '3.8' 22 | architecture: 'x64' 23 | - run: | 24 | pip install matplotlib==3.4.1 numpy==1.20.2 pandas==1.2.4 scikit-learn==0.24.1 25 | pip install . 26 | pip install pytest 27 | pip install pytest-cov 28 | - name: Run tests 29 | run: | 30 | pytest --exitfirst --verbose --failed-first --cov=src tests/ --cov-report=term-missing --cov-report=xml -------------------------------------------------------------------------------- /.github/workflows/update_timestamp.yml: -------------------------------------------------------------------------------- 1 | # Updates the timestamp monthly so tests keep working 2 | 3 | name: Update Timestamp 4 | on: 5 | workflow_dispatch: 6 | schedule: 7 | # Execute the first of every month 8 | - cron: '1 0 1 * *' 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | with: # https://github.com/stefanzweifel/git-auto-commit-action#checkout-the-correct-branch 16 | ref: ${{ github.head_ref }} 17 | # Update timestamp last run 18 | - name: Update timestamp 19 | run: | 20 | date > timestamp 21 | # Commit and Push timestamp 22 | - name: Commit and Push files 23 | run: | 24 | git config --local user.email '${GITHUB_ACTOR}@users.noreply.github.com' 25 | git config --local user.name "test-webservices[bot]" 26 | git add timestamp 27 | git commit -m "Update timestamp last run" -a 28 | git push 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | .idea/ 161 | 162 | .ruff_cache/ -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Proost" 5 | given-names: "Sebastian" 6 | orcid: "https://orcid.org/0000-0002-6792-9442" 7 | - family-names: "Vieira-Silva" 8 | given-names: "Sara" 9 | orcid: "https://orcid.org/0000-0002-4616-7602" 10 | - family-names: "Raes" 11 | given-names: "Jeroen" 12 | orcid: "https://orcid.org/0000-0002-1337-041X" 13 | title: "lorepy: Logistic Regression Plots for Python" 14 | version: 0.2.0 15 | doi: 10.5281/zenodo.8321785 16 | date-released: 2023-09-07 17 | url: "https://github.com/raeslab/lorepy" 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution-NonCommercial-ShareAlike 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International 58 | Public License 59 | 60 | By exercising the Licensed Rights (defined below), You accept and agree 61 | to be bound by the terms and conditions of this Creative Commons 62 | Attribution-NonCommercial-ShareAlike 4.0 International Public License 63 | ("Public License"). To the extent this Public License may be 64 | interpreted as a contract, You are granted the Licensed Rights in 65 | consideration of Your acceptance of these terms and conditions, and the 66 | Licensor grants You such rights in consideration of benefits the 67 | Licensor receives from making the Licensed Material available under 68 | these terms and conditions. 69 | 70 | 71 | Section 1 -- Definitions. 72 | 73 | a. Adapted Material means material subject to Copyright and Similar 74 | Rights that is derived from or based upon the Licensed Material 75 | and in which the Licensed Material is translated, altered, 76 | arranged, transformed, or otherwise modified in a manner requiring 77 | permission under the Copyright and Similar Rights held by the 78 | Licensor. For purposes of this Public License, where the Licensed 79 | Material is a musical work, performance, or sound recording, 80 | Adapted Material is always produced where the Licensed Material is 81 | synched in timed relation with a moving image. 82 | 83 | b. Adapter's License means the license You apply to Your Copyright 84 | and Similar Rights in Your contributions to Adapted Material in 85 | accordance with the terms and conditions of this Public License. 86 | 87 | c. BY-NC-SA Compatible License means a license listed at 88 | creativecommons.org/compatiblelicenses, approved by Creative 89 | Commons as essentially the equivalent of this Public License. 90 | 91 | d. Copyright and Similar Rights means copyright and/or similar rights 92 | closely related to copyright including, without limitation, 93 | performance, broadcast, sound recording, and Sui Generis Database 94 | Rights, without regard to how the rights are labeled or 95 | categorized. For purposes of this Public License, the rights 96 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 97 | Rights. 98 | 99 | e. Effective Technological Measures means those measures that, in the 100 | absence of proper authority, may not be circumvented under laws 101 | fulfilling obligations under Article 11 of the WIPO Copyright 102 | Treaty adopted on December 20, 1996, and/or similar international 103 | agreements. 104 | 105 | f. Exceptions and Limitations means fair use, fair dealing, and/or 106 | any other exception or limitation to Copyright and Similar Rights 107 | that applies to Your use of the Licensed Material. 108 | 109 | g. License Elements means the license attributes listed in the name 110 | of a Creative Commons Public License. The License Elements of this 111 | Public License are Attribution, NonCommercial, and ShareAlike. 112 | 113 | h. Licensed Material means the artistic or literary work, database, 114 | or other material to which the Licensor applied this Public 115 | License. 116 | 117 | i. Licensed Rights means the rights granted to You subject to the 118 | terms and conditions of this Public License, which are limited to 119 | all Copyright and Similar Rights that apply to Your use of the 120 | Licensed Material and that the Licensor has authority to license. 121 | 122 | j. Licensor means the individual(s) or entity(ies) granting rights 123 | under this Public License. 124 | 125 | k. NonCommercial means not primarily intended for or directed towards 126 | commercial advantage or monetary compensation. For purposes of 127 | this Public License, the exchange of the Licensed Material for 128 | other material subject to Copyright and Similar Rights by digital 129 | file-sharing or similar means is NonCommercial provided there is 130 | no payment of monetary compensation in connection with the 131 | exchange. 132 | 133 | l. Share means to provide material to the public by any means or 134 | process that requires permission under the Licensed Rights, such 135 | as reproduction, public display, public performance, distribution, 136 | dissemination, communication, or importation, and to make material 137 | available to the public including in ways that members of the 138 | public may access the material from a place and at a time 139 | individually chosen by them. 140 | 141 | m. Sui Generis Database Rights means rights other than copyright 142 | resulting from Directive 96/9/EC of the European Parliament and of 143 | the Council of 11 March 1996 on the legal protection of databases, 144 | as amended and/or succeeded, as well as other essentially 145 | equivalent rights anywhere in the world. 146 | 147 | n. You means the individual or entity exercising the Licensed Rights 148 | under this Public License. Your has a corresponding meaning. 149 | 150 | 151 | Section 2 -- Scope. 152 | 153 | a. License grant. 154 | 155 | 1. Subject to the terms and conditions of this Public License, 156 | the Licensor hereby grants You a worldwide, royalty-free, 157 | non-sublicensable, non-exclusive, irrevocable license to 158 | exercise the Licensed Rights in the Licensed Material to: 159 | 160 | a. reproduce and Share the Licensed Material, in whole or 161 | in part, for NonCommercial purposes only; and 162 | 163 | b. produce, reproduce, and Share Adapted Material for 164 | NonCommercial purposes only. 165 | 166 | 2. Exceptions and Limitations. For the avoidance of doubt, where 167 | Exceptions and Limitations apply to Your use, this Public 168 | License does not apply, and You do not need to comply with 169 | its terms and conditions. 170 | 171 | 3. Term. The term of this Public License is specified in Section 172 | 6(a). 173 | 174 | 4. Media and formats; technical modifications allowed. The 175 | Licensor authorizes You to exercise the Licensed Rights in 176 | all media and formats whether now known or hereafter created, 177 | and to make technical modifications necessary to do so. The 178 | Licensor waives and/or agrees not to assert any right or 179 | authority to forbid You from making technical modifications 180 | necessary to exercise the Licensed Rights, including 181 | technical modifications necessary to circumvent Effective 182 | Technological Measures. For purposes of this Public License, 183 | simply making modifications authorized by this Section 2(a) 184 | (4) never produces Adapted Material. 185 | 186 | 5. Downstream recipients. 187 | 188 | a. Offer from the Licensor -- Licensed Material. Every 189 | recipient of the Licensed Material automatically 190 | receives an offer from the Licensor to exercise the 191 | Licensed Rights under the terms and conditions of this 192 | Public License. 193 | 194 | b. Additional offer from the Licensor -- Adapted Material. 195 | Every recipient of Adapted Material from You 196 | automatically receives an offer from the Licensor to 197 | exercise the Licensed Rights in the Adapted Material 198 | under the conditions of the Adapter's License You apply. 199 | 200 | c. No downstream restrictions. You may not offer or impose 201 | any additional or different terms or conditions on, or 202 | apply any Effective Technological Measures to, the 203 | Licensed Material if doing so restricts exercise of the 204 | Licensed Rights by any recipient of the Licensed 205 | Material. 206 | 207 | 6. No endorsement. Nothing in this Public License constitutes or 208 | may be construed as permission to assert or imply that You 209 | are, or that Your use of the Licensed Material is, connected 210 | with, or sponsored, endorsed, or granted official status by, 211 | the Licensor or others designated to receive attribution as 212 | provided in Section 3(a)(1)(A)(i). 213 | 214 | b. Other rights. 215 | 216 | 1. Moral rights, such as the right of integrity, are not 217 | licensed under this Public License, nor are publicity, 218 | privacy, and/or other similar personality rights; however, to 219 | the extent possible, the Licensor waives and/or agrees not to 220 | assert any such rights held by the Licensor to the limited 221 | extent necessary to allow You to exercise the Licensed 222 | Rights, but not otherwise. 223 | 224 | 2. Patent and trademark rights are not licensed under this 225 | Public License. 226 | 227 | 3. To the extent possible, the Licensor waives any right to 228 | collect royalties from You for the exercise of the Licensed 229 | Rights, whether directly or through a collecting society 230 | under any voluntary or waivable statutory or compulsory 231 | licensing scheme. In all other cases the Licensor expressly 232 | reserves any right to collect such royalties, including when 233 | the Licensed Material is used other than for NonCommercial 234 | purposes. 235 | 236 | 237 | Section 3 -- License Conditions. 238 | 239 | Your exercise of the Licensed Rights is expressly made subject to the 240 | following conditions. 241 | 242 | a. Attribution. 243 | 244 | 1. If You Share the Licensed Material (including in modified 245 | form), You must: 246 | 247 | a. retain the following if it is supplied by the Licensor 248 | with the Licensed Material: 249 | 250 | i. identification of the creator(s) of the Licensed 251 | Material and any others designated to receive 252 | attribution, in any reasonable manner requested by 253 | the Licensor (including by pseudonym if 254 | designated); 255 | 256 | ii. a copyright notice; 257 | 258 | iii. a notice that refers to this Public License; 259 | 260 | iv. a notice that refers to the disclaimer of 261 | warranties; 262 | 263 | v. a URI or hyperlink to the Licensed Material to the 264 | extent reasonably practicable; 265 | 266 | b. indicate if You modified the Licensed Material and 267 | retain an indication of any previous modifications; and 268 | 269 | c. indicate the Licensed Material is licensed under this 270 | Public License, and include the text of, or the URI or 271 | hyperlink to, this Public License. 272 | 273 | 2. You may satisfy the conditions in Section 3(a)(1) in any 274 | reasonable manner based on the medium, means, and context in 275 | which You Share the Licensed Material. For example, it may be 276 | reasonable to satisfy the conditions by providing a URI or 277 | hyperlink to a resource that includes the required 278 | information. 279 | 3. If requested by the Licensor, You must remove any of the 280 | information required by Section 3(a)(1)(A) to the extent 281 | reasonably practicable. 282 | 283 | b. ShareAlike. 284 | 285 | In addition to the conditions in Section 3(a), if You Share 286 | Adapted Material You produce, the following conditions also apply. 287 | 288 | 1. The Adapter's License You apply must be a Creative Commons 289 | license with the same License Elements, this version or 290 | later, or a BY-NC-SA Compatible License. 291 | 292 | 2. You must include the text of, or the URI or hyperlink to, the 293 | Adapter's License You apply. You may satisfy this condition 294 | in any reasonable manner based on the medium, means, and 295 | context in which You Share Adapted Material. 296 | 297 | 3. You may not offer or impose any additional or different terms 298 | or conditions on, or apply any Effective Technological 299 | Measures to, Adapted Material that restrict exercise of the 300 | rights granted under the Adapter's License You apply. 301 | 302 | 303 | Section 4 -- Sui Generis Database Rights. 304 | 305 | Where the Licensed Rights include Sui Generis Database Rights that 306 | apply to Your use of the Licensed Material: 307 | 308 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 309 | to extract, reuse, reproduce, and Share all or a substantial 310 | portion of the contents of the database for NonCommercial purposes 311 | only; 312 | 313 | b. if You include all or a substantial portion of the database 314 | contents in a database in which You have Sui Generis Database 315 | Rights, then the database in which You have Sui Generis Database 316 | Rights (but not its individual contents) is Adapted Material, 317 | including for purposes of Section 3(b); and 318 | 319 | c. You must comply with the conditions in Section 3(a) if You Share 320 | all or a substantial portion of the contents of the database. 321 | 322 | For the avoidance of doubt, this Section 4 supplements and does not 323 | replace Your obligations under this Public License where the Licensed 324 | Rights include other Copyright and Similar Rights. 325 | 326 | 327 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 328 | 329 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 330 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 331 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 332 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 333 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 334 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 335 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 336 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 337 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 338 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 339 | 340 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 341 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 342 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 343 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 344 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 345 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 346 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 347 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 348 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 349 | 350 | c. The disclaimer of warranties and limitation of liability provided 351 | above shall be interpreted in a manner that, to the extent 352 | possible, most closely approximates an absolute disclaimer and 353 | waiver of all liability. 354 | 355 | 356 | Section 6 -- Term and Termination. 357 | 358 | a. This Public License applies for the term of the Copyright and 359 | Similar Rights licensed here. However, if You fail to comply with 360 | this Public License, then Your rights under this Public License 361 | terminate automatically. 362 | 363 | b. Where Your right to use the Licensed Material has terminated under 364 | Section 6(a), it reinstates: 365 | 366 | 1. automatically as of the date the violation is cured, provided 367 | it is cured within 30 days of Your discovery of the 368 | violation; or 369 | 370 | 2. upon express reinstatement by the Licensor. 371 | 372 | For the avoidance of doubt, this Section 6(b) does not affect any 373 | right the Licensor may have to seek remedies for Your violations 374 | of this Public License. 375 | 376 | c. For the avoidance of doubt, the Licensor may also offer the 377 | Licensed Material under separate terms or conditions or stop 378 | distributing the Licensed Material at any time; however, doing so 379 | will not terminate this Public License. 380 | 381 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 382 | License. 383 | 384 | 385 | Section 7 -- Other Terms and Conditions. 386 | 387 | a. The Licensor shall not be bound by any additional or different 388 | terms or conditions communicated by You unless expressly agreed. 389 | 390 | b. Any arrangements, understandings, or agreements regarding the 391 | Licensed Material not stated herein are separate from and 392 | independent of the terms and conditions of this Public License. 393 | 394 | 395 | Section 8 -- Interpretation. 396 | 397 | a. For the avoidance of doubt, this Public License does not, and 398 | shall not be interpreted to, reduce, limit, restrict, or impose 399 | conditions on any use of the Licensed Material that could lawfully 400 | be made without permission under this Public License. 401 | 402 | b. To the extent possible, if any provision of this Public License is 403 | deemed unenforceable, it shall be automatically reformed to the 404 | minimum extent necessary to make it enforceable. If the provision 405 | cannot be reformed, it shall be severed from this Public License 406 | without affecting the enforceability of the remaining terms and 407 | conditions. 408 | 409 | c. No term or condition of this Public License will be waived and no 410 | failure to comply consented to unless expressly agreed to by the 411 | Licensor. 412 | 413 | d. Nothing in this Public License constitutes or may be interpreted 414 | as a limitation upon, or waiver of, any privileges and immunities 415 | that apply to the Licensor or You, including from the legal 416 | processes of any jurisdiction or authority. 417 | 418 | ======================================================================= 419 | 420 | Creative Commons is not a party to its public 421 | licenses. Notwithstanding, Creative Commons may elect to apply one of 422 | its public licenses to material it publishes and in those instances 423 | will be considered the “Licensor.” The text of the Creative Commons 424 | public licenses is dedicated to the public domain under the CC0 Public 425 | Domain Dedication. Except for the limited purpose of indicating that 426 | material is shared under a Creative Commons public license or as 427 | otherwise permitted by the Creative Commons policies published at 428 | creativecommons.org/policies, Creative Commons does not authorize the 429 | use of the trademark "Creative Commons" or any other trademark or logo 430 | of Creative Commons without its prior written consent including, 431 | without limitation, in connection with any unauthorized modifications 432 | to any of its public licenses or any other arrangements, 433 | understandings, or agreements concerning use of licensed material. For 434 | the avoidance of doubt, this paragraph does not form part of the 435 | public licenses. 436 | 437 | Creative Commons may be contacted at creativecommons.org. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Run Pytest](https://github.com/raeslab/lorepy/actions/workflows/autopytest.yml/badge.svg)](https://github.com/raeslab/lorepy/actions/workflows/autopytest.yml) [![Coverage](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/coverage-badge.svg)](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/coverage-badge.svg) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![DOI](https://zenodo.org/badge/686018963.svg)](https://zenodo.org/badge/latestdoi/686018963) [![PyPI version](https://badge.fury.io/py/lorepy.svg)](https://badge.fury.io/py/lorepy) [![License: CC BY-NC-SA 4.0](https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by-nc-sa/4.0/) 2 | 3 | # lorepy: Logistic Regression Plots for Python 4 | 5 | Logistic Regression plots are used to plot the distribution of a categorical dependent variable in function of a 6 | continuous independent variable. 7 | 8 | If you prefer an R implementation of this package, have a look at [loreplotr](https://github.com/raeslab/loreplotr). 9 | 10 | ![LoRePlot example on Iris Dataset](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot.png) 11 | 12 | ## Why use lorepy ? 13 | 14 | Lorepy offers distinct advantages over traditional methods like stacked bar plots. By employing a linear model, Lorepy 15 | captures overall trends across the entire feature range. It avoids arbitrary cut-offs and segmentation, enabling the 16 | visualization of uncertainty throughout the data range. 17 | 18 | You can find examples of the Iris data visualized using stacked bar plots [here](https://github.com/raeslab/lorepy/blob/main/docs/lorepy_vs_bar_plots.md) for comparison. 19 | 20 | ## Installation 21 | 22 | Lorepy can be installed using pip using the command below. 23 | 24 | ``` 25 | pip install lorepy 26 | ``` 27 | 28 | 29 | ## Usage 30 | 31 | Data needs to be provided as a DataFrame and the columns for the x (independent continuous) and y (dependant categorical) 32 | variables need to be defined. Here the iris dataset is loaded and converted to an appropriate DataFrame. Once the data 33 | is in shape it can be plotted using a single line of code ```loreplot(data=iris_df, x="sepal width (cm)", y="species")```. 34 | 35 | ```python 36 | from lorepy import loreplot 37 | 38 | from sklearn.datasets import load_iris 39 | import matplotlib.pyplot as plt 40 | import pandas as pd 41 | 42 | iris_obj = load_iris() 43 | iris_df = pd.DataFrame(iris_obj.data, columns=iris_obj.feature_names) 44 | 45 | iris_df["species"] = [iris_obj.target_names[s] for s in iris_obj.target] 46 | 47 | loreplot(data=iris_df, x="sepal width (cm)", y="species") 48 | 49 | plt.show() 50 | ``` 51 | 52 | ## Options 53 | 54 | While lorepy has very few customizations, it is possible to pass arguments through to Pandas' 55 | [DataFrame.plot.area](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.area.html) 56 | and Matplotlib's [pyplot.scatter](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html) to change 57 | the aesthetics of the plots. 58 | 59 | ### Disable sample dots 60 | 61 | Dots indicating where samples are located can be en-/disabled using the ```add_dots``` argument. 62 | 63 | ```python 64 | loreplot(data=iris_df, x="sepal width (cm)", y="species", add_dots=False) 65 | plt.show() 66 | ``` 67 | 68 | ![LoRePlot dots can be disabled](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_no_dots.png) 69 | 70 | ### Custom styles 71 | 72 | Additional keyword arguments are passed to Pandas' [DataFrame.plot.area](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.area.html). 73 | This can be used, among other things, to define a custom colormap. For more options to customize these plots consult 74 | Pandas' documentation. 75 | 76 | ```python 77 | from matplotlib.colors import ListedColormap 78 | 79 | colormap=ListedColormap(['red', 'green', 'blue']) 80 | 81 | loreplot(data=iris_df, x="sepal width (cm)", y="species", colormap=colormap) 82 | plt.show() 83 | ``` 84 | ![LoRePlot custom colors](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_custom_color.png) 85 | 86 | 87 | Using ```scatter_kws``` arguments for [pyplot.scatter](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html) 88 | can be set to change the appearance of the sample markers. 89 | 90 | ```python 91 | scatter_options = { 92 | 's': 20, # Marker size 93 | 'alpha': 1, # Fully opaque 94 | 'color': 'black', # Set color to black 95 | 'marker': 'x' # Set style to crosses 96 | } 97 | 98 | loreplot(data=iris_df, x="sepal width (cm)", y="species", scatter_kws=scatter_options) 99 | plt.show() 100 | ``` 101 | ![LoRePlot custom markers](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_custom_markers.png) 102 | 103 | You can use LoRePlots in subplots as you would expect. 104 | 105 | ```python 106 | fig, ax = plt.subplots(1,2, sharex=False, sharey=True) 107 | loreplot(data=iris_df, x="sepal width (cm)", y="species", ax=ax[0]) 108 | loreplot(data=iris_df, x="petal width (cm)", y="species", ax=ax[1]) 109 | 110 | ax[0].get_legend().remove() 111 | ax[0].set_title("Sepal Width") 112 | ax[1].set_title("Petal Width") 113 | 114 | plt.savefig('./docs/img/loreplot_subplot.png', dpi=150) 115 | plt.show() 116 | ``` 117 | 118 | ![LoRePlot in subplots](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_subplot.png) 119 | 120 | By default lorepy uses a multi-class logistic regression model, however this can be replaced with any classifier 121 | from scikit-learn that implements ```predict_proba``` and ```fit```. Below you can see the code and output with a 122 | Support Vector Classifier (SVC) and Random Forest Classifier (RF). 123 | 124 | ```python 125 | from sklearn.svm import SVC 126 | from sklearn.ensemble import RandomForestClassifier 127 | 128 | fig, ax = plt.subplots(1, 2, sharex=False, sharey=True) 129 | 130 | svc = SVC(probability=True) 131 | rf = RandomForestClassifier(n_estimators=10, max_depth=2) 132 | 133 | loreplot(data=iris_df, x="sepal width (cm)", y="species", clf=svc, ax=ax[0]) 134 | loreplot(data=iris_df, x="sepal width (cm)", y="species", clf=rf, ax=ax[1]) 135 | 136 | ax[0].get_legend().remove() 137 | ax[0].set_title("SVC") 138 | ax[1].set_title("RF") 139 | 140 | plt.savefig("./docs/img/loreplot_other_clf.png", dpi=150) 141 | plt.show() 142 | ``` 143 | 144 | ![Lorepy with different types of classifiers](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_other_clf.png) 145 | 146 | 147 | In case there are confounders, these can be taken into account using the ```confounders``` argument. This requires a 148 | list of tuples, with the feature and the reference value for that feature to use in plots. E.g. if you wish to deconfound 149 | for Body Mass Index (BMI) and use a BMI of 25 in plots, set this to [("BMI", 25)]. 150 | 151 | ```python 152 | loreplot( 153 | data=iris_df, 154 | x="sepal width (cm)", 155 | y="species", 156 | confounders=[("petal width (cm)", 1)], 157 | ) 158 | plt.savefig("./docs/img/loreplot_confounder.png", dpi=150) 159 | plt.show() 160 | ``` 161 | 162 | ![Loreplot with a confounder](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_confounder.png) 163 | 164 | In some cases the numerical feature on the x-axis isn't continuous (e.g. an integer number), this can lead to 165 | overplotting the dots. To avoid this to some extent a `jitter` feature is included, that adds some uniform noise to 166 | the x-coordinates of the dots. The value specifies the range of the uniform noise added, the value of 0.05 in the 167 | example sets this range to [-0.05, 0.05]. 168 | 169 | ```python 170 | iris_df["sepal width (cm)"] = ( 171 | np.round(iris_df["sepal width (cm)"] * 3) / 3 172 | ) # Round values 173 | 174 | loreplot(data=iris_df, x="sepal width (cm)", y="species", jitter=0.05) 175 | plt.savefig("./docs/img/loreplot_jitter.png", dpi=150) 176 | plt.show() 177 | ``` 178 | ![Loreplot with a confounder](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_jitter.png) 179 | 180 | ### Assess uncertainty 181 | 182 | From loreplots it isn't possible to assess how certain we are of the prevalence of each group across the range. To 183 | provide a view into this there is a function ```uncertainty_plot```, which can be used as shown below. This will use 184 | ```resampling``` (or ```jackknifing```) to determine the 50% and 95% interval of predicted values and show these in a 185 | multi-panel plot with one plot per category. 186 | 187 | ```python 188 | from lorepy import uncertainty_plot 189 | 190 | uncertainty_plot( 191 | data=iris_df, 192 | x="sepal width (cm)", 193 | y="species", 194 | ) 195 | plt.savefig("./docs/img/uncertainty_default.png", dpi=150) 196 | plt.show() 197 | ``` 198 | 199 | ![Default uncertainty plot](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/uncertainty_default.png) 200 | 201 | This also supports custom colors, ranges and classifiers. More examples are available in ```example_uncertainty.py```. 202 | 203 | 204 | ## Development 205 | 206 | Additional [documentation for developers](https://github.com/raeslab/lorepy/blob/main/docs/dev_docs.md) is included with details on running tests, building and deploying to PyPi. 207 | 208 | ## Contributing 209 | 210 | Any contributions you make are **greatly appreciated**. 211 | 212 | * Found a bug or have some suggestions? Open an [issue](https://github.com/raeslab/lorepy/issues). 213 | * Pull requests are welcome! Though open an [issue](https://github.com/raeslab/lorepy/issues) first to discuss which features/changes you wish to implement. 214 | 215 | ## Contact 216 | 217 | lorepy was developed by [Sebastian Proost](https://sebastian.proost.science/) at the 218 | [RaesLab](https://raeslab.sites.vib.be/en) and was based on R code written by 219 | [Sara Vieira-Silva](https://saravsilva.github.io/). As of version 0.2.0 lorepy is available under the 220 | [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/) 221 | license. 222 | 223 | For commercial access inquiries, please contact [Jeroen Raes](mailto:jeroen.raes@kuleuven.vib.be). 224 | -------------------------------------------------------------------------------- /docs/coverage-badge.svg: -------------------------------------------------------------------------------- 1 | coverage: 100.00%coverage100.00% -------------------------------------------------------------------------------- /docs/dev/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/dev/requirements.txt -------------------------------------------------------------------------------- /docs/dev_docs.md: -------------------------------------------------------------------------------- 1 | # Lorepy - Development documentation 2 | 3 | ## Setting up the environment 4 | 5 | To recreate the environment used by the devs, you can get a [requirements.txt](./dev/requirements.txt) file that has the 6 | same versions we have been using pinned. To install these after creating a virtual environment use the command below 7 | (from the root of the project) 8 | 9 | ```bash 10 | pip install -r ./docs/dev/requirements.txt 11 | ``` 12 | 13 | 14 | ## Running tests 15 | 16 | Lorepy is fully covered with unit-tests, to run them you need the pytest package installed (```pip install pytest pytest-cov```). 17 | Next, run the command below to run the test suite. Note: if you use the environment listed above you will get these. 18 | 19 | ```bash 20 | pytest 21 | ``` 22 | To enable coverage stats run the command below. 23 | 24 | ```bash 25 | pytest --exitfirst --verbose --failed-first --cov=src 26 | ``` 27 | 28 | ## Deploying on PyPi 29 | 30 | ### Building the package 31 | 32 | To build the source distribution along with a wheel, use the command below. 33 | 34 | ```bash 35 | python setup.py sdist bdist_wheel 36 | ``` 37 | 38 | ### Push the package to PyPi 39 | 40 | **Note** that these commands will upload the code to publicly available platforms, use with caution ! 41 | 42 | This will require the twine package, install twine using ```pip install twine``` if needed. 43 | 44 | You can upload a new build to [TestPyPi] using the command below: 45 | 46 | ```bash 47 | twine upload --repository-url https://test.pypi.org/legacy/ dist/* 48 | ``` 49 | 50 | Once everything is ready to be uploaded to [PyPi], one more command is necessary: 51 | 52 | ```bash 53 | twine upload dist/* 54 | ``` 55 | 56 | When prompted for credentials, use `__token__` as the username and the API token generated on [PyPi] as the password. 57 | 58 | [TestPyPi]: https://test.pypi.org/ 59 | [PyPi]: https://pypi.org/ -------------------------------------------------------------------------------- /docs/img/bins.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/bins.png -------------------------------------------------------------------------------- /docs/img/loreplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot.png -------------------------------------------------------------------------------- /docs/img/loreplot_confounder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_confounder.png -------------------------------------------------------------------------------- /docs/img/loreplot_custom_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_custom_color.png -------------------------------------------------------------------------------- /docs/img/loreplot_custom_markers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_custom_markers.png -------------------------------------------------------------------------------- /docs/img/loreplot_jitter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_jitter.png -------------------------------------------------------------------------------- /docs/img/loreplot_no_dots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_no_dots.png -------------------------------------------------------------------------------- /docs/img/loreplot_other_clf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_other_clf.png -------------------------------------------------------------------------------- /docs/img/loreplot_subplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_subplot.png -------------------------------------------------------------------------------- /docs/img/percentiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/percentiles.png -------------------------------------------------------------------------------- /docs/img/threshold.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/threshold.png -------------------------------------------------------------------------------- /docs/img/uncertainty_confounder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/uncertainty_confounder.png -------------------------------------------------------------------------------- /docs/img/uncertainty_custom_classifier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/uncertainty_custom_classifier.png -------------------------------------------------------------------------------- /docs/img/uncertainty_custom_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/uncertainty_custom_color.png -------------------------------------------------------------------------------- /docs/img/uncertainty_default.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/uncertainty_default.png -------------------------------------------------------------------------------- /docs/img/uncertainty_jackknife.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/uncertainty_jackknife.png -------------------------------------------------------------------------------- /docs/lorepy_github_header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/lorepy_github_header.png -------------------------------------------------------------------------------- /docs/lorepy_vs_bar_plots.md: -------------------------------------------------------------------------------- 1 | # Why use lorepy over histograms 2 | 3 | The Iris dataset, shown in a loreplot below, is visualized using a few different options further down this page. Note 4 | how using stacked bar plots in some cases can distort the data. 5 | 6 | ![LoRePlot example on Iris Dataset](./img/loreplot.png) 7 | 8 | ## A threshold is used to separate the data 9 | 10 | In the plot below, individuals are separated into "large" and "small" groups based on an arbitrary threshold for sepal 11 | width. This approach can obscure how sepal width is distributed within species, particularly for the *virginica* species. 12 | 13 | ![Iris dataset separated in two arbitrary groups](./img/threshold.png) 14 | 15 | ## Using bins with equal range 16 | 17 | Here, individuals are divided into six equal segments (bins) based on sepal width. The plot below emphasizes the small 18 | number of *setosa* specimens with small sepals, showing how they disproportionately influence the plot. 19 | 20 | ![Iris dataset separated in six bins](./img/bins.png) 21 | 22 | ## Using percentiles 23 | 24 | The plot below slices the data into percentile ranks, which leads to bins of varying widths. For instance, the largest bin 25 | covers a range of ~1 cm, while others span just 1-2 mm. This can distort the perception of data distribution. 26 | 27 | ![Iris dataset separated in percentiles](./img/percentiles.png) 28 | 29 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | import numpy as np 4 | from lorepy import loreplot 5 | from matplotlib.colors import ListedColormap 6 | from sklearn.datasets import load_iris 7 | from sklearn.ensemble import RandomForestClassifier 8 | from sklearn.svm import SVC 9 | 10 | # Load iris dataset and convert to dataframe 11 | iris_obj = load_iris() 12 | iris_df = pd.DataFrame(iris_obj.data, columns=iris_obj.feature_names) 13 | 14 | iris_df["species"] = [iris_obj.target_names[s] for s in iris_obj.target] 15 | 16 | # Basic Lore Plot with default style 17 | loreplot(data=iris_df, x="sepal width (cm)", y="species") 18 | plt.savefig("./docs/img/loreplot.png", dpi=150) 19 | plt.show() 20 | 21 | # Key word arguments (like colormap) can be passed to the DataFrame.plot.area 22 | 23 | colormap = ListedColormap(["red", "green", "blue"]) 24 | loreplot(data=iris_df, x="sepal width (cm)", y="species", colormap=colormap) 25 | plt.savefig("./docs/img/loreplot_custom_color.png", dpi=150) 26 | plt.show() 27 | 28 | # En-/disable sample markers with add_dots 29 | loreplot(data=iris_df, x="sepal width (cm)", y="species", add_dots=False) 30 | plt.savefig("./docs/img/loreplot_no_dots.png", dpi=150) 31 | plt.show() 32 | 33 | # Pass custom styles for markers using scatter_kws 34 | scatter_options = { 35 | "s": 20, # Marker size 36 | "alpha": 1, # Fully opaque 37 | "color": "black", # Set color to black 38 | "marker": "x", # Set style to crosses 39 | } 40 | 41 | loreplot(data=iris_df, x="sepal width (cm)", y="species", scatter_kws=scatter_options) 42 | plt.savefig("./docs/img/loreplot_custom_markers.png", dpi=150) 43 | plt.show() 44 | 45 | # Test in subplots 46 | 47 | fig, ax = plt.subplots(1, 2, sharex=False, sharey=True) 48 | loreplot(data=iris_df, x="sepal width (cm)", y="species", ax=ax[0]) 49 | loreplot(data=iris_df, x="petal width (cm)", y="species", ax=ax[1]) 50 | 51 | ax[0].get_legend().remove() 52 | ax[0].set_title("Sepal Width") 53 | ax[1].set_title("Petal Width") 54 | 55 | plt.savefig("./docs/img/loreplot_subplot.png", dpi=150) 56 | plt.show() 57 | 58 | # Basic Lore Plot with default style but different classifier 59 | fig, ax = plt.subplots(1, 2, sharex=False, sharey=True) 60 | 61 | svc = SVC(probability=True) 62 | rf = RandomForestClassifier(n_estimators=10, max_depth=2) 63 | 64 | loreplot(data=iris_df, x="sepal width (cm)", y="species", clf=svc, ax=ax[0]) 65 | loreplot(data=iris_df, x="sepal width (cm)", y="species", clf=rf, ax=ax[1]) 66 | 67 | ax[0].get_legend().remove() 68 | ax[0].set_title("SVC") 69 | ax[1].set_title("RF") 70 | 71 | plt.savefig("./docs/img/loreplot_other_clf.png", dpi=150) 72 | plt.show() 73 | 74 | # Basic Lore Plot with default style with one confounder 75 | loreplot( 76 | data=iris_df, 77 | x="sepal width (cm)", 78 | y="species", 79 | confounders=[("petal width (cm)", 1)], 80 | ) 81 | plt.savefig("./docs/img/loreplot_confounder.png", dpi=150) 82 | plt.show() 83 | 84 | # Basic Lore Plot with some jitter 85 | iris_df["sepal width (cm)"] = ( 86 | np.round(iris_df["sepal width (cm)"] * 3) / 3 87 | ) # Round values 88 | 89 | loreplot(data=iris_df, x="sepal width (cm)", y="species", jitter=0.05) 90 | plt.savefig("./docs/img/loreplot_jitter.png", dpi=150) 91 | plt.show() 92 | 93 | ### Generate some plots that can be used for the documentation 94 | 95 | iris_obj = load_iris() 96 | iris_df = pd.DataFrame(iris_obj.data, columns=iris_obj.feature_names) 97 | 98 | iris_df["species"] = [iris_obj.target_names[s] for s in iris_obj.target] 99 | 100 | iris_df["sepal_group"] = iris_df["sepal width (cm)"].apply( 101 | lambda x: "small" if x < 3 else "large" 102 | ) 103 | count_df = ( 104 | iris_df.groupby(["species", "sepal_group"], as_index=False) 105 | .size() 106 | .pivot_table(index="sepal_group", columns="species", values="size") 107 | ) 108 | 109 | totals = count_df.sum(axis=1) 110 | 111 | count_df = count_df.div(totals, axis=0).sort_index(ascending=False) 112 | count_df.plot.bar(stacked=True) 113 | 114 | plt.tight_layout() 115 | plt.savefig("./docs/img/threshold.png", dpi=150) 116 | plt.show() 117 | 118 | iris_df["sepal_bin"] = pd.cut(iris_df["sepal width (cm)"], 6) 119 | count_df = ( 120 | iris_df.groupby(["species", "sepal_bin"], as_index=False, observed=False) 121 | .size() 122 | .pivot_table(index="sepal_bin", columns="species", values="size", observed=False) 123 | ) 124 | 125 | label_df = iris_df.groupby("sepal_bin", as_index=False, observed=False).size() 126 | label_df["label"] = label_df.apply( 127 | lambda x: str(x["sepal_bin"]) + " (n=" + str(x["size"]) + ")", axis=1 128 | ) 129 | 130 | totals = count_df.sum(axis=1) 131 | 132 | count_df = count_df.div(totals, axis=0).sort_index(ascending=True) 133 | count_df = ( 134 | pd.merge(count_df, label_df, left_index=True, right_on="sepal_bin") 135 | .set_index("label") 136 | .drop(columns=["sepal_bin", "size"]) 137 | ) 138 | count_df.plot.bar(stacked=True) 139 | 140 | plt.tight_layout() 141 | plt.savefig("./docs/img/bins.png", dpi=150) 142 | plt.show() 143 | 144 | 145 | iris_df["sepal_cut"] = pd.qcut(iris_df["sepal width (cm)"], 6, duplicates="drop") 146 | count_df = ( 147 | iris_df.groupby(["species", "sepal_cut"], as_index=False, observed=False) 148 | .size() 149 | .pivot_table(index="sepal_cut", columns="species", values="size", observed=False) 150 | ) 151 | 152 | label_df = iris_df.groupby("sepal_cut", as_index=False, observed=False).size() 153 | label_df["label"] = label_df.apply( 154 | lambda x: str(x["sepal_cut"]) + " (n=" + str(x["size"]) + ")", axis=1 155 | ) 156 | 157 | totals = count_df.sum(axis=1) 158 | 159 | count_df = count_df.div(totals, axis=0).sort_index(ascending=True) 160 | count_df = ( 161 | pd.merge(count_df, label_df, left_index=True, right_on="sepal_cut") 162 | .set_index("label") 163 | .drop(columns=["sepal_cut", "size"]) 164 | ) 165 | count_df.plot.bar(stacked=True) 166 | 167 | plt.tight_layout() 168 | plt.savefig("./docs/img/percentiles.png", dpi=150) 169 | plt.show() 170 | -------------------------------------------------------------------------------- /example_uncertainty.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import pandas as pd 3 | from lorepy import uncertainty_plot 4 | from matplotlib.colors import ListedColormap 5 | from sklearn.datasets import load_iris 6 | from sklearn.svm import SVC 7 | 8 | # Load iris dataset and convert to dataframe 9 | iris_obj = load_iris() 10 | iris_df = pd.DataFrame(iris_obj.data, columns=iris_obj.feature_names) 11 | 12 | iris_df["species"] = [iris_obj.target_names[s] for s in iris_obj.target] 13 | 14 | # Default uncertainty plot 15 | uncertainty_plot(data=iris_df, x="sepal width (cm)", y="species", iterations=100) 16 | plt.savefig("./docs/img/uncertainty_default.png", dpi=150) 17 | plt.show() 18 | 19 | # Using jackknife instead of resample to assess uncertainty 20 | uncertainty_plot( 21 | data=iris_df, 22 | x="sepal width (cm)", 23 | y="species", 24 | iterations=100, 25 | jackknife_fraction=0.8, 26 | ) 27 | plt.savefig("./docs/img/uncertainty_jackknife.png", dpi=150) 28 | plt.show() 29 | 30 | # Uncertainty plot with custom colors 31 | 32 | 33 | colormap = ListedColormap(["red", "green", "blue"]) 34 | uncertainty_plot( 35 | data=iris_df, 36 | x="sepal width (cm)", 37 | y="species", 38 | iterations=100, 39 | mode="resample", 40 | colormap=colormap, 41 | ) 42 | plt.savefig("./docs/img/uncertainty_custom_color.png", dpi=150) 43 | plt.show() 44 | 45 | # Uncertainty plot with a confounder 46 | uncertainty_plot( 47 | data=iris_df, 48 | x="sepal width (cm)", 49 | y="species", 50 | iterations=100, 51 | mode="resample", 52 | confounders=[("petal width (cm)", 1)], 53 | ) 54 | plt.savefig("./docs/img/uncertainty_confounder.png", dpi=150) 55 | plt.show() 56 | 57 | # Uncertainty plot with a custom classifier 58 | svc = SVC(probability=True) 59 | 60 | uncertainty_plot( 61 | data=iris_df, 62 | x="sepal width (cm)", 63 | y="species", 64 | iterations=100, 65 | mode="resample", 66 | clf=svc, 67 | ) 68 | plt.savefig("./docs/img/uncertainty_custom_classifier.png", dpi=150) 69 | plt.show() 70 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | pythonpath = src -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | with open("README.md", "r", encoding="utf-8") as fh: 4 | long_description = fh.read() 5 | 6 | setup( 7 | name="lorepy", 8 | version="0.4.4", 9 | author="Sebastian Proost", 10 | author_email="sebastian.proost@gmail.com", 11 | description="Draw Logistic Regression Plots in Python", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/raeslab/lorepy/", 15 | project_urls={ 16 | "Bug Tracker": "https://github.com/raeslab/lorepy/issues", 17 | }, 18 | install_requires=[ 19 | "matplotlib>=3.4.1", 20 | "numpy>=1.20.2", 21 | "pandas>=1.2.4", 22 | "scikit-learn>=1.5.0", 23 | ], 24 | classifiers=[ 25 | "Programming Language :: Python :: 3", 26 | "Operating System :: OS Independent", 27 | ], 28 | license="Creative Commons Attribution-NonCommercial-ShareAlike 4.0. https://creativecommons.org/licenses/by-nc-sa/4.0/", 29 | packages=find_packages("src"), 30 | package_dir={"": "src"}, 31 | python_requires=">=3.9", 32 | ) 33 | -------------------------------------------------------------------------------- /src/lorepy/__init__.py: -------------------------------------------------------------------------------- 1 | from .lorepy import loreplot as loreplot 2 | from .uncertainty import uncertainty_plot as uncertainty_plot 3 | -------------------------------------------------------------------------------- /src/lorepy/lorepy.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Tuple 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import pandas as pd 6 | from pandas import DataFrame 7 | from sklearn.linear_model import LogisticRegression 8 | 9 | 10 | def _prepare_data(data, x, y, confounders): 11 | x_features = [x] + [i[0] for i in confounders] 12 | 13 | tmp_df = data[x_features + [y]].dropna() 14 | X_reg = np.array(tmp_df[x_features]) 15 | y_reg = np.array(tmp_df[y]) 16 | 17 | x_range = (X_reg[:, 0].min(), X_reg[:, 0].max()) 18 | 19 | return X_reg, y_reg, x_range 20 | 21 | 22 | def _get_area_df(lg, x_feature, x_range, confounders=[]) -> DataFrame: 23 | values = np.linspace(x_range[0], x_range[1], num=200) 24 | 25 | predict_df = pd.DataFrame({"values": values}) 26 | 27 | for k, v in confounders: 28 | predict_df[k] = v 29 | 30 | proba = lg.predict_proba(predict_df.values) 31 | proba_df = DataFrame(proba, columns=lg.classes_) 32 | proba_df[x_feature] = values 33 | proba_df.set_index(x_feature, inplace=True) 34 | 35 | return proba_df 36 | 37 | 38 | def _get_dots_df(X, y, lg, y_feature, confounders=[], jitter=0) -> DataFrame: 39 | output = [] 40 | 41 | for x, s in zip(X, y): 42 | if jitter != 0: 43 | x[0] += np.random.uniform(low=-jitter, high=jitter) 44 | 45 | proba = lg.predict_proba([x] + [i[1] for i in confounders]) 46 | i = list(lg.classes_).index(s) 47 | min_value = sum(proba[0][:i]) 48 | max_value = sum(proba[0][: i + 1]) 49 | margin = (max_value - min_value) / 10 50 | ypos = np.random.uniform(low=min_value + margin, high=max_value - margin) 51 | output.append({y_feature: s, "x": x[0], "y": ypos}) 52 | 53 | return DataFrame(output) 54 | 55 | 56 | def loreplot( 57 | data: DataFrame, 58 | x: str, 59 | y: str, 60 | add_dots: bool = True, 61 | x_range: Optional[Tuple[float, float]] = None, 62 | scatter_kws: dict = dict({}), 63 | ax=None, 64 | clf=None, 65 | confounders=[], 66 | jitter=0, 67 | **kwargs, 68 | ): 69 | """ 70 | Code to create a loreplot with a numerical feature on the v-axis and categorical y from a pandas dataset 71 | 72 | :param data: Pandas dataframe with data 73 | :param x: Needs to be a numerical feature 74 | :param y: Categorical feature 75 | :param add_dots: Shows where true samples are in the plot (cannot be enabled when deconfounding for additional variables) 76 | :param x_range: Either None (range will be selected automatically) or a tuple with min and max value for the v-axis 77 | :param scatter_kws: Dictionary with keyword arguments to pass to the scatter function 78 | :param ax: subplot to draw on, in case lorepy is used in a subplot 79 | :param clf: provide a different scikit-learn classifier for the function. Should implement the predict_proba() and fit() 80 | :param confounders: list of tuples with the feature and reference value e.g. [("BMI", 25)] will confounders BMI and use a reference of 25 for plots 81 | :param jitter: adds random noise to the x-position of dots. This can help avoid overplotting when integer values are used for the numerical features 82 | :param kwargs: Additional arguments to pass to pandas' plot.area function 83 | """ 84 | if ax is None: 85 | ax = plt.gca() 86 | 87 | X_reg, y_reg, r = _prepare_data(data, x, y, confounders) 88 | 89 | if x_range is None: 90 | x_range = r 91 | 92 | lg = LogisticRegression() if clf is None else clf 93 | lg.fit(X_reg, y_reg) 94 | 95 | if "linestyle" not in kwargs.keys(): 96 | kwargs["linestyle"] = "None" 97 | 98 | area_df = _get_area_df(lg, x, x_range, confounders=confounders) 99 | area_df.plot.area(ax=ax, **kwargs) 100 | 101 | if add_dots and len(confounders) == 0: 102 | dot_df = _get_dots_df(X_reg, y_reg, lg, y, jitter=jitter) 103 | if "color" not in scatter_kws.keys(): 104 | scatter_kws["color"] = "w" 105 | if "alpha" not in scatter_kws.keys(): 106 | scatter_kws["alpha"] = 0.3 107 | ax.scatter(dot_df["x"], dot_df["y"], **scatter_kws) 108 | 109 | ax.set_xlim(*x_range) 110 | 111 | ax.set_ylim(0, 1) 112 | -------------------------------------------------------------------------------- /src/lorepy/uncertainty.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from matplotlib import pyplot as plt 4 | from pandas import DataFrame 5 | from sklearn.linear_model import LogisticRegression 6 | from sklearn.model_selection import train_test_split 7 | from sklearn.utils import resample 8 | 9 | from lorepy.lorepy import _get_area_df, _prepare_data 10 | 11 | 12 | def _get_uncertainty_data( 13 | x: str, 14 | X_reg, 15 | y_reg, 16 | x_range, 17 | mode="resample", 18 | jackknife_fraction: float = 0.8, 19 | iterations: int = 100, 20 | confounders=None, 21 | clf=None, 22 | ): 23 | confounders = [] if confounders is None else confounders 24 | 25 | areas = [] 26 | for i in range(iterations): 27 | if mode == "jackknife": 28 | X_keep, _, y_keep, _ = train_test_split( 29 | X_reg, y_reg, train_size=jackknife_fraction 30 | ) 31 | elif mode == "resample": 32 | X_keep, y_keep = resample(X_reg, y_reg, replace=True) 33 | else: 34 | raise NotImplementedError( 35 | f"Mode {mode} is unsupported, only jackknife and resample are valid modes" 36 | ) 37 | 38 | lg = LogisticRegression() if clf is None else clf 39 | lg.fit(X_keep, y_keep) 40 | new_area = _get_area_df(lg, x, x_range, confounders=confounders).reset_index() 41 | 42 | areas.append(new_area) 43 | 44 | long_df = pd.concat(areas).melt(id_vars=[x]).sort_values(x) 45 | 46 | output = ( 47 | long_df.groupby([x, "variable"]) 48 | .agg( 49 | min=pd.NamedAgg(column="value", aggfunc="min"), 50 | mean=pd.NamedAgg(column="value", aggfunc="mean"), 51 | max=pd.NamedAgg(column="value", aggfunc="max"), 52 | low_95=pd.NamedAgg(column="value", aggfunc=lambda v: np.percentile(v, 2.5)), 53 | high_95=pd.NamedAgg( 54 | column="value", aggfunc=lambda v: np.percentile(v, 97.5) 55 | ), 56 | low_50=pd.NamedAgg(column="value", aggfunc=lambda v: np.percentile(v, 25)), 57 | high_50=pd.NamedAgg(column="value", aggfunc=lambda v: np.percentile(v, 75)), 58 | ) 59 | .reset_index() 60 | ) 61 | 62 | return output, long_df 63 | 64 | 65 | def uncertainty_plot( 66 | data: DataFrame, 67 | x: str, 68 | y: str, 69 | x_range=None, 70 | mode="resample", 71 | jackknife_fraction=0.8, 72 | iterations=100, 73 | confounders=[], 74 | colormap=None, 75 | clf=None, 76 | ax=None, 77 | ): 78 | """ 79 | Code to create a multi-panel plot, one panel for each category, with the prevalence of that category across the 80 | range of x-values, along with the uncertainty (intervals containing 50% and 95% of the samples are shown) 81 | 82 | :param data: Pandas dataframe with data 83 | :param x: Needs to be a numerical feature 84 | :param y: Categorical feature 85 | :param x_range: Either None (range will be selected automatically) or a tuple with min and max value for the x-axis 86 | :param mode: Sampling method, either "resample" (bootstrap) or "jackknife" (default = "resample") 87 | :param jackknife_fraction: Fraction of data to retain for each jackknife sample (default = 0.8) 88 | :param iterations: Number of iterations for resampling or jackknife (default = 100) 89 | :param confounders: List of tuples with the feature and reference value e.g., [("BMI", 25)] will use a reference of 25 for plots 90 | :param colormap: Colormap to use for the plot, default is None in which case matplotlib's default will be used 91 | :param clf: Provide a different scikit-learn classifier for the function. Should implement the predict_proba() and fit(). If None a LogisticRegression will be used. 92 | :param ax: Optional. List of matplotlib Axes to plot into. If None, a new figure and axes will be created. 93 | :return: A tuple containing the figure and axes objects 94 | """ 95 | X_reg, y_reg, r = _prepare_data(data, x, y, confounders) 96 | 97 | if x_range is None: 98 | x_range = r 99 | 100 | plot_df, _ = _get_uncertainty_data( 101 | x, 102 | X_reg, 103 | y_reg, 104 | x_range, 105 | mode=mode, 106 | jackknife_fraction=jackknife_fraction, 107 | iterations=iterations, 108 | confounders=confounders, 109 | clf=clf, 110 | ) 111 | 112 | categories = plot_df.variable.unique() 113 | 114 | if ax is None: 115 | fig, axs = plt.subplots(ncols=len(categories), sharex=True, sharey=True) 116 | else: 117 | assert len(ax) == len( 118 | categories 119 | ), "Length of ax must match number of categories" 120 | fig = ax[0].figure 121 | axs = ax 122 | 123 | cmap = plt.get_cmap("tab10") if colormap is None else colormap 124 | 125 | for idx, category in enumerate(categories): 126 | cat_df = plot_df[plot_df.variable == category] 127 | 128 | axs[idx].fill_between( 129 | cat_df[x], cat_df["low_95"], cat_df["high_95"], alpha=0.1, color=cmap(idx) 130 | ) 131 | axs[idx].fill_between( 132 | cat_df[x], cat_df["low_50"], cat_df["high_50"], alpha=0.2, color=cmap(idx) 133 | ) 134 | axs[idx].plot(cat_df[x], cat_df["mean"], color=cmap(idx)) 135 | axs[idx].set_title(categories[idx]) 136 | axs[idx].set_xlabel(x) 137 | 138 | axs[idx].set_xlim(*x_range) 139 | axs[idx].set_ylim(0, 1) 140 | 141 | return fig, axs 142 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import pandas as pd 4 | from lorepy.lorepy import _get_area_df, _get_dots_df, loreplot 5 | from pandas import DataFrame 6 | from sklearn.linear_model import LogisticRegression 7 | from sklearn.svm import SVC 8 | 9 | import pytest 10 | 11 | 12 | @pytest.fixture 13 | def sample_data(): 14 | X = np.concatenate([np.random.randint(0, 10, 50), np.random.randint(2, 12, 50)]) 15 | y = [0] * 50 + [1] * 50 16 | z = X 17 | return pd.DataFrame({"x": X, "y": y, "z": z}) 18 | 19 | 20 | @pytest.fixture 21 | def logistic_regression_model(): 22 | X_reg = np.array([1.0, 2.0, 3.0, 4.0, 5.0]).reshape(-1, 1) 23 | y_reg = np.array([0, 1, 0, 1, 1]) 24 | lg = LogisticRegression() 25 | lg.fit(X_reg, y_reg) 26 | return X_reg, y_reg, lg 27 | 28 | 29 | # Test case for loreplot with default parameters 30 | def test_loreplot_default(sample_data): 31 | loreplot(sample_data, "x", "y") # first test without specifying the axis 32 | 33 | fig, ax = plt.subplots() 34 | loreplot(sample_data, "x", "y", ax=ax) 35 | assert ax.get_title() == "" 36 | assert ax.get_xlabel() == "x" 37 | assert ax.get_ylabel() == "" 38 | 39 | 40 | # Test case for loreplot with jitter 41 | def test_loreplot_jitter(sample_data): 42 | loreplot(sample_data, "x", "y") # first test without specifying the axis 43 | 44 | fig, ax = plt.subplots() 45 | loreplot(sample_data, "x", "y", ax=ax, jitter=0.05) 46 | assert ax.get_title() == "" 47 | assert ax.get_xlabel() == "x" 48 | assert ax.get_ylabel() == "" 49 | 50 | 51 | # Test case for loreplot with confounder 52 | def test_loreplot_confounder(sample_data): 53 | loreplot( 54 | sample_data, "x", "y", confounders=[("z", 1)] 55 | ) # first test without specifying the axis 56 | 57 | fig, ax = plt.subplots() 58 | loreplot(sample_data, "x", "y", ax=ax) 59 | assert ax.get_title() == "" 60 | assert ax.get_xlabel() == "x" 61 | assert ax.get_ylabel() == "" 62 | 63 | 64 | # Test case for loreplot with custom clf 65 | def test_loreplot_custom_clf(sample_data): 66 | svc = SVC(probability=True) 67 | loreplot(sample_data, "x", "y", clf=svc) 68 | 69 | fig, ax = plt.subplots() 70 | loreplot(sample_data, "x", "y", ax=ax) 71 | assert ax.get_title() == "" 72 | assert ax.get_xlabel() == "x" 73 | assert ax.get_ylabel() == "" 74 | 75 | 76 | # Test case for loreplot with custom parameters 77 | def test_loreplot_custom(sample_data): 78 | fig, ax = plt.subplots() 79 | loreplot( 80 | sample_data, 81 | "x", 82 | "y", 83 | add_dots=False, 84 | x_range=(0, 5), 85 | ax=ax, 86 | color=["r", "b"], 87 | linestyle="-", 88 | ) 89 | assert ax.get_title() == "" 90 | assert ax.get_xlabel() == "x" 91 | assert ax.get_ylabel() == "" 92 | 93 | 94 | # Test case for loreplot with add_dots=True 95 | def test_loreplot_with_dots(sample_data): 96 | fig, ax = plt.subplots() 97 | loreplot(sample_data, "x", "y", add_dots=True, ax=ax) 98 | assert ax.get_title() == "" 99 | assert ax.get_xlabel() == "x" 100 | assert ax.get_ylabel() == "" 101 | 102 | 103 | # Sample data for testing internal functions 104 | X_reg = np.array([1.0, 2.0, 3.0, 4.0, 5.0]).reshape(-1, 1) 105 | y_reg = np.array([0, 1, 0, 1, 1]) 106 | lg = LogisticRegression() 107 | lg.fit(X_reg, y_reg) 108 | 109 | 110 | # Test case for _get_dots_df 111 | def test_get_dots_df(): 112 | dots_df = _get_dots_df(X_reg, y_reg, lg, "y") 113 | assert isinstance(dots_df, DataFrame) 114 | assert "x" in dots_df.columns 115 | assert "y" in dots_df.columns 116 | assert "y_feature" not in dots_df.columns 117 | assert len(dots_df) == len(X_reg) 118 | 119 | 120 | # Test case for _get_area_df 121 | def test_get_area_df(): 122 | area_df = _get_area_df(lg, "x", (X_reg.min(), X_reg.max())) 123 | assert isinstance(area_df, DataFrame) 124 | assert "x" not in area_df.columns 125 | assert 0 in area_df.columns 126 | assert 1 in area_df.columns 127 | assert len(area_df) == 200 128 | assert area_df.index[0] == X_reg.min() 129 | assert area_df.index[-1] == X_reg.max() 130 | -------------------------------------------------------------------------------- /tests/test_uncertainty.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pytest 4 | from lorepy import uncertainty_plot 5 | from matplotlib.colors import ListedColormap 6 | from matplotlib import pyplot as plt 7 | from sklearn.svm import SVC 8 | 9 | 10 | @pytest.fixture 11 | def sample_data(): 12 | X = np.concatenate([np.random.randint(0, 10, 50), np.random.randint(2, 12, 50)]) 13 | y = [0] * 50 + [1] * 50 14 | z = X 15 | return pd.DataFrame({"x": X, "y": y, "z": z}) 16 | 17 | 18 | @pytest.fixture 19 | def custom_colormap(): 20 | return ListedColormap(["red", "green", "blue"]) 21 | 22 | 23 | # Test case for lorepy's uncertainty plot with default parameters 24 | def test_uncertainty_default(sample_data): 25 | fig, axs = uncertainty_plot(sample_data, "x", "y") # first test with default params 26 | 27 | assert len(axs) == 2 28 | assert axs[0].get_title() == "0" 29 | assert axs[0].get_xlabel() == "x" 30 | assert axs[0].get_ylabel() == "" 31 | 32 | 33 | # Test case for lorepy's uncertainty plot with alternative parameters 34 | def test_uncertainty_alternative(sample_data, custom_colormap): 35 | svc = SVC(probability=True) 36 | fig, axs = uncertainty_plot( 37 | sample_data, 38 | "x", 39 | "y", 40 | mode="jackknife", 41 | x_range=(5, 40), 42 | colormap=custom_colormap, 43 | clf=svc, 44 | ) 45 | 46 | assert len(axs) == 2 47 | assert axs[0].get_title() == "0" 48 | assert axs[0].get_xlabel() == "x" 49 | assert axs[0].get_ylabel() == "" 50 | 51 | 52 | def test_get_uncertainty_confounder(sample_data): 53 | fig, axs = uncertainty_plot( 54 | sample_data, "x", "y", confounders=[("z", 5)] 55 | ) # first test with default params 56 | 57 | assert len(axs) == 2 58 | assert axs[0].get_title() == "0" 59 | assert axs[0].get_xlabel() == "x" 60 | assert axs[0].get_ylabel() == "" 61 | 62 | 63 | # Test error handling when an unsupported mode is selected 64 | def test_uncertainty_incorrect_mode(sample_data): 65 | with pytest.raises(NotImplementedError): 66 | assert uncertainty_plot(sample_data, "x", "y", mode="fail") 67 | 68 | 69 | def test_uncertainty_with_existing_ax(sample_data): 70 | fig, ax = plt.subplots(1, 2) # Create 2 axes manually 71 | returned_fig, returned_axs = uncertainty_plot(sample_data, "x", "y", ax=ax) 72 | 73 | assert returned_fig is not None 74 | assert returned_axs[0] == ax[0] 75 | assert returned_axs[1] == ax[1] 76 | assert len(returned_axs) == 2 77 | assert returned_axs[0].get_title() == "0" 78 | assert returned_axs[0].get_xlabel() == "x" 79 | 80 | 81 | def test_uncertainty_incorrect_ax_length(sample_data): 82 | fig, ax = plt.subplots(1, 1) # Only one axis created, but we expect two 83 | with pytest.raises(AssertionError): 84 | uncertainty_plot(sample_data, "x", "y", ax=[ax]) 85 | -------------------------------------------------------------------------------- /timestamp: -------------------------------------------------------------------------------- 1 | Sun Jun 1 00:44:11 UTC 2025 2 | --------------------------------------------------------------------------------