├── .github
    └── workflows
    │   ├── autocheck.yml
    │   ├── autopytest.yml
    │   ├── autopytest_legacy.yml
    │   └── update_timestamp.yml
├── .gitignore
├── CITATION.cff
├── LICENSE
├── README.md
├── docs
    ├── coverage-badge.svg
    ├── dev
    │   └── requirements.txt
    ├── dev_docs.md
    ├── img
    │   ├── bins.png
    │   ├── loreplot.png
    │   ├── loreplot_confounder.png
    │   ├── loreplot_custom_color.png
    │   ├── loreplot_custom_markers.png
    │   ├── loreplot_jitter.png
    │   ├── loreplot_no_dots.png
    │   ├── loreplot_other_clf.png
    │   ├── loreplot_subplot.png
    │   ├── percentiles.png
    │   ├── threshold.png
    │   ├── uncertainty_confounder.png
    │   ├── uncertainty_custom_classifier.png
    │   ├── uncertainty_custom_color.png
    │   ├── uncertainty_default.png
    │   └── uncertainty_jackknife.png
    ├── lorepy_github_header.png
    └── lorepy_vs_bar_plots.md
├── example.py
├── example_uncertainty.py
├── pytest.ini
├── setup.py
├── src
    └── lorepy
    │   ├── __init__.py
    │   ├── lorepy.py
    │   └── uncertainty.py
├── tests
    ├── __init__.py
    ├── test_plot.py
    └── test_uncertainty.py
└── timestamp


/.github/workflows/autocheck.yml:
--------------------------------------------------------------------------------
 1 | # GitHub Action that uses Ruff and Black to check the code
 2 | 
 3 | name: Run Checks
 4 | on: [ push, pull_request ]
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |         with: # https://github.com/stefanzweifel/git-auto-commit-action#checkout-the-correct-branch
11 |           ref: ${{ github.head_ref }}
12 |       - uses: chartboost/ruff-action@v1
13 |       - name: Set up Python 3.10
14 |         uses: actions/setup-python@v5
15 |         with:
16 |           python-version: "3.10"
17 |           architecture: 'x64'
18 |       - run: pip install black
19 |       - run: black --check ./
20 |       - name: If needed, commit black changes to a new pull request
21 |         if: failure()
22 |         run: |
23 |           black ./
24 |           git config --global user.name autoblack_push
25 |           git config --global user.email '${GITHUB_ACTOR}@users.noreply.github.com'
26 |           git commit -am "fixup! Format Python code with psf/black push"
27 |           git push


--------------------------------------------------------------------------------
/.github/workflows/autopytest.yml:
--------------------------------------------------------------------------------
 1 | # GitHub Action that runs pytest
 2 | 
 3 | name: Run Pytest
 4 | on:
 5 |   workflow_dispatch:
 6 |   push:
 7 |   schedule:
 8 |     # execute once a week on monday 
 9 |     - cron:  '0 1 * * 1'
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 | 
14 |     strategy:
15 |       matrix:
16 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |         with: # https://github.com/stefanzweifel/git-auto-commit-action#checkout-the-correct-branch
21 |           ref: ${{ github.head_ref }}
22 |       - name: Set up Python ${{ matrix.python-version }}
23 |         uses: actions/setup-python@v5
24 |         with:
25 |           python-version: ${{ matrix.python-version }}
26 |           architecture: 'x64'
27 |       - run: |
28 |           pip install .
29 |           pip install pytest
30 |           pip install pytest-cov
31 |       - name: Run tests
32 |         run: |
33 |           pytest --exitfirst --verbose --failed-first --cov=src tests/ --cov-report=term-missing --cov-report=xml
34 |       - name: Generate Coverage Badge
35 |         run: |
36 |           pip install setuptools
37 |           pip install genbadge[coverage]         
38 |           genbadge coverage -i coverage.xml -o docs/coverage-badge.svg
39 |       - run: git diff --exit-code ./docs/coverage-badge.svg
40 |       - name: Update Coverage Badge if needed
41 |         if: failure()
42 |         run:  |
43 |           git config --local user.email '${GITHUB_ACTOR}@users.noreply.github.com'
44 |           git config --local user.name "test-webservices[bot]"
45 |           git add ./docs/coverage-badge.svg
46 |           git commit -m "Update cover badge"
47 |           git push
48 | 
49 | 


--------------------------------------------------------------------------------
/.github/workflows/autopytest_legacy.yml:
--------------------------------------------------------------------------------
 1 | # GitHub Action that runs pytest with the oldest packages specified in setup.py
 2 | 
 3 | name: Run Legacy Pytest
 4 | on:
 5 |   workflow_dispatch:
 6 |   push:
 7 |   schedule:
 8 |     # execute once a week on monday 
 9 |     - cron:  '30 0 * * 1'
10 | jobs:
11 |   build:
12 |     runs-on: ubuntu-latest
13 | 
14 |     steps:
15 |       - uses: actions/checkout@v4
16 |         with: # https://github.com/stefanzweifel/git-auto-commit-action#checkout-the-correct-branch
17 |           ref: ${{ github.head_ref }}
18 |       - name: Set up Python 3.8
19 |         uses: actions/setup-python@v5
20 |         with:
21 |           python-version: '3.8'
22 |           architecture: 'x64'
23 |       - run: |
24 |           pip install matplotlib==3.4.1 numpy==1.20.2 pandas==1.2.4 scikit-learn==0.24.1
25 |           pip install .
26 |           pip install pytest
27 |           pip install pytest-cov
28 |       - name: Run tests
29 |         run: |
30 |           pytest --exitfirst --verbose --failed-first --cov=src tests/ --cov-report=term-missing --cov-report=xml


--------------------------------------------------------------------------------
/.github/workflows/update_timestamp.yml:
--------------------------------------------------------------------------------
 1 | # Updates the timestamp monthly so tests keep working
 2 | 
 3 | name: Update Timestamp
 4 | on:
 5 |   workflow_dispatch:
 6 |   schedule:
 7 |     # Execute the first of every month
 8 |     - cron:  '1 0 1 * *'
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 | 
13 |     steps:
14 |       - uses: actions/checkout@v4
15 |         with: # https://github.com/stefanzweifel/git-auto-commit-action#checkout-the-correct-branch
16 |           ref: ${{ github.head_ref }}
17 |       # Update timestamp last run
18 |       - name: Update timestamp
19 |         run: |
20 |           date > timestamp
21 |       # Commit and Push timestamp
22 |       - name: Commit and Push files
23 |         run: |
24 |           git config --local user.email '${GITHUB_ACTOR}@users.noreply.github.com'
25 |           git config --local user.name "test-webservices[bot]"
26 |           git add timestamp
27 |           git commit -m "Update timestamp last run" -a
28 |           git push
29 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 | 
162 | .ruff_cache/


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Proost"
 5 |   given-names: "Sebastian"
 6 |   orcid: "https://orcid.org/0000-0002-6792-9442"
 7 | - family-names: "Vieira-Silva"
 8 |   given-names: "Sara"
 9 |   orcid: "https://orcid.org/0000-0002-4616-7602"
10 | - family-names: "Raes"
11 |   given-names: "Jeroen"
12 |   orcid: "https://orcid.org/0000-0002-1337-041X"
13 | title: "lorepy: Logistic Regression Plots for Python"
14 | version: 0.2.0
15 | doi: 10.5281/zenodo.8321785
16 | date-released: 2023-09-07
17 | url: "https://github.com/raeslab/lorepy"
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution-NonCommercial-ShareAlike 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |     wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More considerations
 52 |      for the public:
 53 |     wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
 58 | Public License
 59 | 
 60 | By exercising the Licensed Rights (defined below), You accept and agree
 61 | to be bound by the terms and conditions of this Creative Commons
 62 | Attribution-NonCommercial-ShareAlike 4.0 International Public License
 63 | ("Public License"). To the extent this Public License may be
 64 | interpreted as a contract, You are granted the Licensed Rights in
 65 | consideration of Your acceptance of these terms and conditions, and the
 66 | Licensor grants You such rights in consideration of benefits the
 67 | Licensor receives from making the Licensed Material available under
 68 | these terms and conditions.
 69 | 
 70 | 
 71 | Section 1 -- Definitions.
 72 | 
 73 |   a. Adapted Material means material subject to Copyright and Similar
 74 |      Rights that is derived from or based upon the Licensed Material
 75 |      and in which the Licensed Material is translated, altered,
 76 |      arranged, transformed, or otherwise modified in a manner requiring
 77 |      permission under the Copyright and Similar Rights held by the
 78 |      Licensor. For purposes of this Public License, where the Licensed
 79 |      Material is a musical work, performance, or sound recording,
 80 |      Adapted Material is always produced where the Licensed Material is
 81 |      synched in timed relation with a moving image.
 82 | 
 83 |   b. Adapter's License means the license You apply to Your Copyright
 84 |      and Similar Rights in Your contributions to Adapted Material in
 85 |      accordance with the terms and conditions of this Public License.
 86 | 
 87 |   c. BY-NC-SA Compatible License means a license listed at
 88 |      creativecommons.org/compatiblelicenses, approved by Creative
 89 |      Commons as essentially the equivalent of this Public License.
 90 | 
 91 |   d. Copyright and Similar Rights means copyright and/or similar rights
 92 |      closely related to copyright including, without limitation,
 93 |      performance, broadcast, sound recording, and Sui Generis Database
 94 |      Rights, without regard to how the rights are labeled or
 95 |      categorized. For purposes of this Public License, the rights
 96 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 97 |      Rights.
 98 | 
 99 |   e. Effective Technological Measures means those measures that, in the
100 |      absence of proper authority, may not be circumvented under laws
101 |      fulfilling obligations under Article 11 of the WIPO Copyright
102 |      Treaty adopted on December 20, 1996, and/or similar international
103 |      agreements.
104 | 
105 |   f. Exceptions and Limitations means fair use, fair dealing, and/or
106 |      any other exception or limitation to Copyright and Similar Rights
107 |      that applies to Your use of the Licensed Material.
108 | 
109 |   g. License Elements means the license attributes listed in the name
110 |      of a Creative Commons Public License. The License Elements of this
111 |      Public License are Attribution, NonCommercial, and ShareAlike.
112 | 
113 |   h. Licensed Material means the artistic or literary work, database,
114 |      or other material to which the Licensor applied this Public
115 |      License.
116 | 
117 |   i. Licensed Rights means the rights granted to You subject to the
118 |      terms and conditions of this Public License, which are limited to
119 |      all Copyright and Similar Rights that apply to Your use of the
120 |      Licensed Material and that the Licensor has authority to license.
121 | 
122 |   j. Licensor means the individual(s) or entity(ies) granting rights
123 |      under this Public License.
124 | 
125 |   k. NonCommercial means not primarily intended for or directed towards
126 |      commercial advantage or monetary compensation. For purposes of
127 |      this Public License, the exchange of the Licensed Material for
128 |      other material subject to Copyright and Similar Rights by digital
129 |      file-sharing or similar means is NonCommercial provided there is
130 |      no payment of monetary compensation in connection with the
131 |      exchange.
132 | 
133 |   l. Share means to provide material to the public by any means or
134 |      process that requires permission under the Licensed Rights, such
135 |      as reproduction, public display, public performance, distribution,
136 |      dissemination, communication, or importation, and to make material
137 |      available to the public including in ways that members of the
138 |      public may access the material from a place and at a time
139 |      individually chosen by them.
140 | 
141 |   m. Sui Generis Database Rights means rights other than copyright
142 |      resulting from Directive 96/9/EC of the European Parliament and of
143 |      the Council of 11 March 1996 on the legal protection of databases,
144 |      as amended and/or succeeded, as well as other essentially
145 |      equivalent rights anywhere in the world.
146 | 
147 |   n. You means the individual or entity exercising the Licensed Rights
148 |      under this Public License. Your has a corresponding meaning.
149 | 
150 | 
151 | Section 2 -- Scope.
152 | 
153 |   a. License grant.
154 | 
155 |        1. Subject to the terms and conditions of this Public License,
156 |           the Licensor hereby grants You a worldwide, royalty-free,
157 |           non-sublicensable, non-exclusive, irrevocable license to
158 |           exercise the Licensed Rights in the Licensed Material to:
159 | 
160 |             a. reproduce and Share the Licensed Material, in whole or
161 |                in part, for NonCommercial purposes only; and
162 | 
163 |             b. produce, reproduce, and Share Adapted Material for
164 |                NonCommercial purposes only.
165 | 
166 |        2. Exceptions and Limitations. For the avoidance of doubt, where
167 |           Exceptions and Limitations apply to Your use, this Public
168 |           License does not apply, and You do not need to comply with
169 |           its terms and conditions.
170 | 
171 |        3. Term. The term of this Public License is specified in Section
172 |           6(a).
173 | 
174 |        4. Media and formats; technical modifications allowed. The
175 |           Licensor authorizes You to exercise the Licensed Rights in
176 |           all media and formats whether now known or hereafter created,
177 |           and to make technical modifications necessary to do so. The
178 |           Licensor waives and/or agrees not to assert any right or
179 |           authority to forbid You from making technical modifications
180 |           necessary to exercise the Licensed Rights, including
181 |           technical modifications necessary to circumvent Effective
182 |           Technological Measures. For purposes of this Public License,
183 |           simply making modifications authorized by this Section 2(a)
184 |           (4) never produces Adapted Material.
185 | 
186 |        5. Downstream recipients.
187 | 
188 |             a. Offer from the Licensor -- Licensed Material. Every
189 |                recipient of the Licensed Material automatically
190 |                receives an offer from the Licensor to exercise the
191 |                Licensed Rights under the terms and conditions of this
192 |                Public License.
193 | 
194 |             b. Additional offer from the Licensor -- Adapted Material.
195 |                Every recipient of Adapted Material from You
196 |                automatically receives an offer from the Licensor to
197 |                exercise the Licensed Rights in the Adapted Material
198 |                under the conditions of the Adapter's License You apply.
199 | 
200 |             c. No downstream restrictions. You may not offer or impose
201 |                any additional or different terms or conditions on, or
202 |                apply any Effective Technological Measures to, the
203 |                Licensed Material if doing so restricts exercise of the
204 |                Licensed Rights by any recipient of the Licensed
205 |                Material.
206 | 
207 |        6. No endorsement. Nothing in this Public License constitutes or
208 |           may be construed as permission to assert or imply that You
209 |           are, or that Your use of the Licensed Material is, connected
210 |           with, or sponsored, endorsed, or granted official status by,
211 |           the Licensor or others designated to receive attribution as
212 |           provided in Section 3(a)(1)(A)(i).
213 | 
214 |   b. Other rights.
215 | 
216 |        1. Moral rights, such as the right of integrity, are not
217 |           licensed under this Public License, nor are publicity,
218 |           privacy, and/or other similar personality rights; however, to
219 |           the extent possible, the Licensor waives and/or agrees not to
220 |           assert any such rights held by the Licensor to the limited
221 |           extent necessary to allow You to exercise the Licensed
222 |           Rights, but not otherwise.
223 | 
224 |        2. Patent and trademark rights are not licensed under this
225 |           Public License.
226 | 
227 |        3. To the extent possible, the Licensor waives any right to
228 |           collect royalties from You for the exercise of the Licensed
229 |           Rights, whether directly or through a collecting society
230 |           under any voluntary or waivable statutory or compulsory
231 |           licensing scheme. In all other cases the Licensor expressly
232 |           reserves any right to collect such royalties, including when
233 |           the Licensed Material is used other than for NonCommercial
234 |           purposes.
235 | 
236 | 
237 | Section 3 -- License Conditions.
238 | 
239 | Your exercise of the Licensed Rights is expressly made subject to the
240 | following conditions.
241 | 
242 |   a. Attribution.
243 | 
244 |        1. If You Share the Licensed Material (including in modified
245 |           form), You must:
246 | 
247 |             a. retain the following if it is supplied by the Licensor
248 |                with the Licensed Material:
249 | 
250 |                  i. identification of the creator(s) of the Licensed
251 |                     Material and any others designated to receive
252 |                     attribution, in any reasonable manner requested by
253 |                     the Licensor (including by pseudonym if
254 |                     designated);
255 | 
256 |                 ii. a copyright notice;
257 | 
258 |                iii. a notice that refers to this Public License;
259 | 
260 |                 iv. a notice that refers to the disclaimer of
261 |                     warranties;
262 | 
263 |                  v. a URI or hyperlink to the Licensed Material to the
264 |                     extent reasonably practicable;
265 | 
266 |             b. indicate if You modified the Licensed Material and
267 |                retain an indication of any previous modifications; and
268 | 
269 |             c. indicate the Licensed Material is licensed under this
270 |                Public License, and include the text of, or the URI or
271 |                hyperlink to, this Public License.
272 | 
273 |        2. You may satisfy the conditions in Section 3(a)(1) in any
274 |           reasonable manner based on the medium, means, and context in
275 |           which You Share the Licensed Material. For example, it may be
276 |           reasonable to satisfy the conditions by providing a URI or
277 |           hyperlink to a resource that includes the required
278 |           information.
279 |        3. If requested by the Licensor, You must remove any of the
280 |           information required by Section 3(a)(1)(A) to the extent
281 |           reasonably practicable.
282 | 
283 |   b. ShareAlike.
284 | 
285 |      In addition to the conditions in Section 3(a), if You Share
286 |      Adapted Material You produce, the following conditions also apply.
287 | 
288 |        1. The Adapter's License You apply must be a Creative Commons
289 |           license with the same License Elements, this version or
290 |           later, or a BY-NC-SA Compatible License.
291 | 
292 |        2. You must include the text of, or the URI or hyperlink to, the
293 |           Adapter's License You apply. You may satisfy this condition
294 |           in any reasonable manner based on the medium, means, and
295 |           context in which You Share Adapted Material.
296 | 
297 |        3. You may not offer or impose any additional or different terms
298 |           or conditions on, or apply any Effective Technological
299 |           Measures to, Adapted Material that restrict exercise of the
300 |           rights granted under the Adapter's License You apply.
301 | 
302 | 
303 | Section 4 -- Sui Generis Database Rights.
304 | 
305 | Where the Licensed Rights include Sui Generis Database Rights that
306 | apply to Your use of the Licensed Material:
307 | 
308 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
309 |      to extract, reuse, reproduce, and Share all or a substantial
310 |      portion of the contents of the database for NonCommercial purposes
311 |      only;
312 | 
313 |   b. if You include all or a substantial portion of the database
314 |      contents in a database in which You have Sui Generis Database
315 |      Rights, then the database in which You have Sui Generis Database
316 |      Rights (but not its individual contents) is Adapted Material,
317 |      including for purposes of Section 3(b); and
318 | 
319 |   c. You must comply with the conditions in Section 3(a) if You Share
320 |      all or a substantial portion of the contents of the database.
321 | 
322 | For the avoidance of doubt, this Section 4 supplements and does not
323 | replace Your obligations under this Public License where the Licensed
324 | Rights include other Copyright and Similar Rights.
325 | 
326 | 
327 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
328 | 
329 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
330 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
331 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
332 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
333 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
334 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
335 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
336 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
337 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
338 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
339 | 
340 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
341 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
342 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
343 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
344 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
345 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
346 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
347 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
348 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
349 | 
350 |   c. The disclaimer of warranties and limitation of liability provided
351 |      above shall be interpreted in a manner that, to the extent
352 |      possible, most closely approximates an absolute disclaimer and
353 |      waiver of all liability.
354 | 
355 | 
356 | Section 6 -- Term and Termination.
357 | 
358 |   a. This Public License applies for the term of the Copyright and
359 |      Similar Rights licensed here. However, if You fail to comply with
360 |      this Public License, then Your rights under this Public License
361 |      terminate automatically.
362 | 
363 |   b. Where Your right to use the Licensed Material has terminated under
364 |      Section 6(a), it reinstates:
365 | 
366 |        1. automatically as of the date the violation is cured, provided
367 |           it is cured within 30 days of Your discovery of the
368 |           violation; or
369 | 
370 |        2. upon express reinstatement by the Licensor.
371 | 
372 |      For the avoidance of doubt, this Section 6(b) does not affect any
373 |      right the Licensor may have to seek remedies for Your violations
374 |      of this Public License.
375 | 
376 |   c. For the avoidance of doubt, the Licensor may also offer the
377 |      Licensed Material under separate terms or conditions or stop
378 |      distributing the Licensed Material at any time; however, doing so
379 |      will not terminate this Public License.
380 | 
381 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
382 |      License.
383 | 
384 | 
385 | Section 7 -- Other Terms and Conditions.
386 | 
387 |   a. The Licensor shall not be bound by any additional or different
388 |      terms or conditions communicated by You unless expressly agreed.
389 | 
390 |   b. Any arrangements, understandings, or agreements regarding the
391 |      Licensed Material not stated herein are separate from and
392 |      independent of the terms and conditions of this Public License.
393 | 
394 | 
395 | Section 8 -- Interpretation.
396 | 
397 |   a. For the avoidance of doubt, this Public License does not, and
398 |      shall not be interpreted to, reduce, limit, restrict, or impose
399 |      conditions on any use of the Licensed Material that could lawfully
400 |      be made without permission under this Public License.
401 | 
402 |   b. To the extent possible, if any provision of this Public License is
403 |      deemed unenforceable, it shall be automatically reformed to the
404 |      minimum extent necessary to make it enforceable. If the provision
405 |      cannot be reformed, it shall be severed from this Public License
406 |      without affecting the enforceability of the remaining terms and
407 |      conditions.
408 | 
409 |   c. No term or condition of this Public License will be waived and no
410 |      failure to comply consented to unless expressly agreed to by the
411 |      Licensor.
412 | 
413 |   d. Nothing in this Public License constitutes or may be interpreted
414 |      as a limitation upon, or waiver of, any privileges and immunities
415 |      that apply to the Licensor or You, including from the legal
416 |      processes of any jurisdiction or authority.
417 | 
418 | =======================================================================
419 | 
420 | Creative Commons is not a party to its public
421 | licenses. Notwithstanding, Creative Commons may elect to apply one of
422 | its public licenses to material it publishes and in those instances
423 | will be considered the “Licensor.” The text of the Creative Commons
424 | public licenses is dedicated to the public domain under the CC0 Public
425 | Domain Dedication. Except for the limited purpose of indicating that
426 | material is shared under a Creative Commons public license or as
427 | otherwise permitted by the Creative Commons policies published at
428 | creativecommons.org/policies, Creative Commons does not authorize the
429 | use of the trademark "Creative Commons" or any other trademark or logo
430 | of Creative Commons without its prior written consent including,
431 | without limitation, in connection with any unauthorized modifications
432 | to any of its public licenses or any other arrangements,
433 | understandings, or agreements concerning use of licensed material. For
434 | the avoidance of doubt, this paragraph does not form part of the
435 | public licenses.
436 | 
437 | Creative Commons may be contacted at creativecommons.org.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Run Pytest](https://github.com/raeslab/lorepy/actions/workflows/autopytest.yml/badge.svg)](https://github.com/raeslab/lorepy/actions/workflows/autopytest.yml) [![Coverage](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/coverage-badge.svg)](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/coverage-badge.svg) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![DOI](https://zenodo.org/badge/686018963.svg)](https://zenodo.org/badge/latestdoi/686018963) [![PyPI version](https://badge.fury.io/py/lorepy.svg)](https://badge.fury.io/py/lorepy) [![License: CC BY-NC-SA 4.0](https://img.shields.io/badge/License-CC%20BY--NC--SA%204.0-lightgrey.svg)](https://creativecommons.org/licenses/by-nc-sa/4.0/)
  2 | 
  3 | # lorepy: Logistic Regression Plots for Python
  4 | 
  5 | Logistic Regression plots are used to plot the distribution of a categorical dependent variable in function of a 
  6 | continuous independent variable.
  7 | 
  8 | If you prefer an R implementation of this package, have a look at [loreplotr](https://github.com/raeslab/loreplotr).
  9 | 
 10 | ![LoRePlot example on Iris Dataset](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot.png)
 11 | 
 12 | ## Why use lorepy ?
 13 | 
 14 | Lorepy offers distinct advantages over traditional methods like stacked bar plots. By employing a linear model, Lorepy 
 15 | captures overall trends across the entire feature range. It avoids arbitrary cut-offs and segmentation, enabling the 
 16 | visualization of uncertainty throughout the data range.
 17 | 
 18 | You can find examples of the Iris data visualized using stacked bar plots [here](https://github.com/raeslab/lorepy/blob/main/docs/lorepy_vs_bar_plots.md) for comparison.
 19 | 
 20 | ## Installation
 21 | 
 22 | Lorepy can be installed using pip using the command below.
 23 | 
 24 | ```
 25 | pip install lorepy
 26 | ```
 27 | 
 28 | 
 29 | ## Usage
 30 | 
 31 | Data needs to be provided as a DataFrame and the columns for the x (independent continuous) and y (dependant categorical)
 32 | variables need to be defined. Here the iris dataset is loaded and converted to an appropriate DataFrame. Once the data
 33 | is in shape it can be plotted using a single line of code ```loreplot(data=iris_df, x="sepal width (cm)", y="species")```.
 34 | 
 35 | ```python
 36 | from lorepy import loreplot
 37 | 
 38 | from sklearn.datasets import load_iris
 39 | import matplotlib.pyplot as plt
 40 | import pandas as pd
 41 | 
 42 | iris_obj = load_iris()
 43 | iris_df = pd.DataFrame(iris_obj.data, columns=iris_obj.feature_names)
 44 | 
 45 | iris_df["species"] = [iris_obj.target_names[s] for s in iris_obj.target]
 46 | 
 47 | loreplot(data=iris_df, x="sepal width (cm)", y="species")
 48 | 
 49 | plt.show()
 50 | ```
 51 | 
 52 | ## Options
 53 | 
 54 | While lorepy has very few customizations, it is possible to pass arguments through to Pandas' 
 55 | [DataFrame.plot.area](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.area.html)
 56 | and Matplotlib's [pyplot.scatter](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html) to change
 57 | the aesthetics of the plots.
 58 | 
 59 | ### Disable sample dots
 60 | 
 61 | Dots indicating where samples are located can be en-/disabled using the ```add_dots``` argument.
 62 | 
 63 | ```python
 64 | loreplot(data=iris_df, x="sepal width (cm)", y="species", add_dots=False)
 65 | plt.show()
 66 | ```
 67 | 
 68 | ![LoRePlot dots can be disabled](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_no_dots.png)
 69 | 
 70 | ### Custom styles
 71 | 
 72 | Additional keyword arguments are passed to Pandas' [DataFrame.plot.area](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.plot.area.html).
 73 | This can be used, among other things, to define a custom colormap. For more options to customize these plots consult
 74 | Pandas' documentation.
 75 | 
 76 | ```python
 77 | from matplotlib.colors import ListedColormap
 78 | 
 79 | colormap=ListedColormap(['red', 'green', 'blue'])
 80 | 
 81 | loreplot(data=iris_df, x="sepal width (cm)", y="species", colormap=colormap)
 82 | plt.show()
 83 | ```
 84 | ![LoRePlot custom colors](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_custom_color.png)
 85 | 
 86 | 
 87 | Using ```scatter_kws``` arguments for [pyplot.scatter](https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html)
 88 | can be set to change the appearance of the sample markers.
 89 | 
 90 | ```python
 91 | scatter_options = {
 92 |     's': 20,                  # Marker size
 93 |     'alpha': 1,               # Fully opaque
 94 |     'color': 'black',         # Set color to black
 95 |     'marker': 'x'             # Set style to crosses
 96 | }
 97 | 
 98 | loreplot(data=iris_df, x="sepal width (cm)", y="species", scatter_kws=scatter_options)
 99 | plt.show()
100 | ```
101 | ![LoRePlot custom markers](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_custom_markers.png)
102 | 
103 | You can use LoRePlots in subplots as you would expect.
104 | 
105 | ```python
106 | fig, ax = plt.subplots(1,2, sharex=False, sharey=True)
107 | loreplot(data=iris_df, x="sepal width (cm)", y="species", ax=ax[0])
108 | loreplot(data=iris_df, x="petal width (cm)", y="species", ax=ax[1])
109 | 
110 | ax[0].get_legend().remove()
111 | ax[0].set_title("Sepal Width")
112 | ax[1].set_title("Petal Width")
113 | 
114 | plt.savefig('./docs/img/loreplot_subplot.png', dpi=150)
115 | plt.show()
116 | ```
117 | 
118 | ![LoRePlot in subplots](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_subplot.png)
119 | 
120 | By default lorepy uses a multi-class logistic regression model, however this can be replaced with any classifier
121 | from scikit-learn that implements ```predict_proba``` and ```fit```. Below you can see the code and output with a
122 | Support Vector Classifier (SVC) and Random Forest Classifier (RF).
123 | 
124 | ```python
125 | from sklearn.svm import SVC
126 | from sklearn.ensemble import RandomForestClassifier
127 | 
128 | fig, ax = plt.subplots(1, 2, sharex=False, sharey=True)
129 | 
130 | svc = SVC(probability=True)
131 | rf = RandomForestClassifier(n_estimators=10, max_depth=2)
132 | 
133 | loreplot(data=iris_df, x="sepal width (cm)", y="species", clf=svc, ax=ax[0])
134 | loreplot(data=iris_df, x="sepal width (cm)", y="species", clf=rf, ax=ax[1])
135 | 
136 | ax[0].get_legend().remove()
137 | ax[0].set_title("SVC")
138 | ax[1].set_title("RF")
139 | 
140 | plt.savefig("./docs/img/loreplot_other_clf.png", dpi=150)
141 | plt.show()
142 | ```
143 | 
144 | ![Lorepy with different types of classifiers](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_other_clf.png)
145 | 
146 | 
147 | In case there are confounders, these can be taken into account using the ```confounders``` argument. This requires a
148 | list of tuples, with the feature and the reference value for that feature to use in plots. E.g. if you wish to deconfound
149 | for Body Mass Index (BMI) and use a BMI of 25 in plots, set this to [("BMI", 25)].
150 | 
151 | ```python
152 | loreplot(
153 |     data=iris_df,
154 |     x="sepal width (cm)",
155 |     y="species",
156 |     confounders=[("petal width (cm)", 1)],
157 | )
158 | plt.savefig("./docs/img/loreplot_confounder.png", dpi=150)
159 | plt.show()
160 | ```
161 | 
162 | ![Loreplot with a confounder](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_confounder.png)
163 | 
164 | In some cases the numerical feature on the x-axis isn't continuous (e.g. an integer number), this can lead to 
165 | overplotting the dots. To avoid this to some extent a `jitter` feature is included, that adds some uniform noise to
166 | the x-coordinates of the dots. The value specifies the range of the uniform noise added, the value of 0.05 in the 
167 | example sets this range to [-0.05, 0.05].
168 | 
169 | ```python
170 | iris_df["sepal width (cm)"] = (
171 |     np.round(iris_df["sepal width (cm)"] * 3) / 3
172 | )  # Round values
173 | 
174 | loreplot(data=iris_df, x="sepal width (cm)", y="species", jitter=0.05)
175 | plt.savefig("./docs/img/loreplot_jitter.png", dpi=150)
176 | plt.show()
177 | ```
178 | ![Loreplot with a confounder](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/loreplot_jitter.png)
179 | 
180 | ### Assess uncertainty
181 | 
182 | From loreplots it isn't possible to assess how certain we are of the prevalence of each group across the range. To
183 | provide a view into this there is a function ```uncertainty_plot```, which can be used as shown below. This will use
184 | ```resampling``` (or ```jackknifing```) to determine the 50% and 95% interval of predicted values and show these in a
185 | multi-panel plot with one plot per category.
186 | 
187 | ```python
188 | from lorepy import uncertainty_plot
189 | 
190 | uncertainty_plot(
191 |     data=iris_df,
192 |     x="sepal width (cm)",
193 |     y="species",
194 | )
195 | plt.savefig("./docs/img/uncertainty_default.png", dpi=150)
196 | plt.show()
197 | ```
198 | 
199 | ![Default uncertainty plot](https://raw.githubusercontent.com/raeslab/lorepy/main/docs/img/uncertainty_default.png)
200 | 
201 | This also supports custom colors, ranges and classifiers. More examples are available in ```example_uncertainty.py```.
202 | 
203 | 
204 | ## Development
205 | 
206 | Additional [documentation for developers](https://github.com/raeslab/lorepy/blob/main/docs/dev_docs.md) is included with details on running tests, building and deploying to PyPi.
207 | 
208 | ## Contributing
209 | 
210 | Any contributions you make are **greatly appreciated**.
211 | 
212 |   * Found a bug or have some suggestions? Open an [issue](https://github.com/raeslab/lorepy/issues).
213 |   * Pull requests are welcome! Though open an [issue](https://github.com/raeslab/lorepy/issues) first to discuss which features/changes you wish to implement.
214 | 
215 | ## Contact
216 | 
217 | lorepy was developed by [Sebastian Proost](https://sebastian.proost.science/) at the 
218 | [RaesLab](https://raeslab.sites.vib.be/en) and was based on R code written by 
219 | [Sara Vieira-Silva](https://saravsilva.github.io/). As of version 0.2.0 lorepy is available under the 
220 | [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/) 
221 | license. 
222 | 
223 | For commercial access inquiries, please contact [Jeroen Raes](mailto:jeroen.raes@kuleuven.vib.be).
224 | 


--------------------------------------------------------------------------------
/docs/coverage-badge.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" width="122" height="20" role="img" aria-label="coverage: 100.00%"><title>coverage: 100.00%</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="122" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="61" height="20" fill="#555"/><rect x="61" width="61" height="20" fill="#4c1"/><rect width="122" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="315" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">coverage</text><text x="315" y="140" transform="scale(.1)" fill="#fff" textLength="510">coverage</text><text aria-hidden="true" x="905" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">100.00%</text><text x="905" y="140" transform="scale(.1)" fill="#fff" textLength="510">100.00%</text></g></svg>


--------------------------------------------------------------------------------
/docs/dev/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/dev/requirements.txt


--------------------------------------------------------------------------------
/docs/dev_docs.md:
--------------------------------------------------------------------------------
 1 | # Lorepy - Development documentation
 2 | 
 3 | ## Setting up the environment
 4 | 
 5 | To recreate the environment used by the devs, you can get a [requirements.txt](./dev/requirements.txt) file that has the
 6 | same versions we have been using pinned. To install these after creating a virtual environment use the command below
 7 | (from the root of the project)
 8 | 
 9 | ```bash
10 | pip install -r ./docs/dev/requirements.txt
11 | ```
12 | 
13 | 
14 | ## Running tests
15 | 
16 | Lorepy is fully covered with unit-tests, to run them you need the pytest package installed (```pip install pytest pytest-cov```).
17 | Next, run the command below to run the test suite. Note: if you use the environment listed above you will get these.
18 | 
19 | ```bash
20 | pytest
21 | ```
22 | To enable coverage stats run the command below.
23 | 
24 | ```bash
25 | pytest --exitfirst --verbose --failed-first --cov=src
26 | ```
27 | 
28 | ## Deploying on PyPi
29 | 
30 | ### Building the package
31 | 
32 | To build the source distribution along with a wheel, use the command below. 
33 | 
34 | ```bash
35 | python setup.py sdist bdist_wheel
36 | ```
37 | 
38 | ### Push the package to PyPi
39 | 
40 | **Note** that these commands will upload the code to publicly available platforms, use with caution !
41 | 
42 | This will require the twine package, install twine using ```pip install twine``` if needed.
43 | 
44 | You can upload a new build to [TestPyPi] using the command below:
45 | 
46 | ```bash
47 | twine upload --repository-url https://test.pypi.org/legacy/ dist/*
48 | ```
49 | 
50 | Once everything is ready to be uploaded to [PyPi], one more command is necessary:
51 | 
52 | ```bash
53 | twine upload dist/*
54 | ```
55 | 
56 | When prompted for credentials, use `__token__` as the username and the API token generated on [PyPi] as the password.
57 | 
58 | [TestPyPi]: https://test.pypi.org/
59 | [PyPi]: https://pypi.org/


--------------------------------------------------------------------------------
/docs/img/bins.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/bins.png


--------------------------------------------------------------------------------
/docs/img/loreplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot.png


--------------------------------------------------------------------------------
/docs/img/loreplot_confounder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_confounder.png


--------------------------------------------------------------------------------
/docs/img/loreplot_custom_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_custom_color.png


--------------------------------------------------------------------------------
/docs/img/loreplot_custom_markers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_custom_markers.png


--------------------------------------------------------------------------------
/docs/img/loreplot_jitter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_jitter.png


--------------------------------------------------------------------------------
/docs/img/loreplot_no_dots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_no_dots.png


--------------------------------------------------------------------------------
/docs/img/loreplot_other_clf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_other_clf.png


--------------------------------------------------------------------------------
/docs/img/loreplot_subplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/loreplot_subplot.png


--------------------------------------------------------------------------------
/docs/img/percentiles.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/percentiles.png


--------------------------------------------------------------------------------
/docs/img/threshold.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/threshold.png


--------------------------------------------------------------------------------
/docs/img/uncertainty_confounder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/uncertainty_confounder.png


--------------------------------------------------------------------------------
/docs/img/uncertainty_custom_classifier.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/uncertainty_custom_classifier.png


--------------------------------------------------------------------------------
/docs/img/uncertainty_custom_color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/uncertainty_custom_color.png


--------------------------------------------------------------------------------
/docs/img/uncertainty_default.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/uncertainty_default.png


--------------------------------------------------------------------------------
/docs/img/uncertainty_jackknife.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/img/uncertainty_jackknife.png


--------------------------------------------------------------------------------
/docs/lorepy_github_header.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/docs/lorepy_github_header.png


--------------------------------------------------------------------------------
/docs/lorepy_vs_bar_plots.md:
--------------------------------------------------------------------------------
 1 | # Why use lorepy over histograms
 2 | 
 3 | The Iris dataset, shown in a loreplot below, is visualized using a few different options further down this page. Note
 4 | how using stacked bar plots in some cases can distort the data.
 5 | 
 6 | ![LoRePlot example on Iris Dataset](./img/loreplot.png)
 7 | 
 8 | ## A threshold is used to separate the data
 9 | 
10 | In the plot below, individuals are separated into "large" and "small" groups based on an arbitrary threshold for sepal 
11 | width. This approach can obscure how sepal width is distributed within species, particularly for the *virginica* species.
12 | 
13 | ![Iris dataset separated in two arbitrary groups](./img/threshold.png)
14 | 
15 | ## Using bins with equal range
16 | 
17 | Here, individuals are divided into six equal segments (bins) based on sepal width. The plot below emphasizes the small 
18 | number of *setosa* specimens with small sepals, showing how they disproportionately influence the plot.
19 | 
20 | ![Iris dataset separated in six bins](./img/bins.png)
21 | 
22 | ## Using percentiles
23 | 
24 | The plot below slices the data into percentile ranks, which leads to bins of varying widths. For instance, the largest bin 
25 | covers a range of ~1 cm, while others span just 1-2 mm. This can distort the perception of data distribution.
26 | 
27 | ![Iris dataset separated in percentiles](./img/percentiles.png)
28 | 
29 | 


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import pandas as pd
  3 | import numpy as np
  4 | from lorepy import loreplot
  5 | from matplotlib.colors import ListedColormap
  6 | from sklearn.datasets import load_iris
  7 | from sklearn.ensemble import RandomForestClassifier
  8 | from sklearn.svm import SVC
  9 | 
 10 | # Load iris dataset and convert to dataframe
 11 | iris_obj = load_iris()
 12 | iris_df = pd.DataFrame(iris_obj.data, columns=iris_obj.feature_names)
 13 | 
 14 | iris_df["species"] = [iris_obj.target_names[s] for s in iris_obj.target]
 15 | 
 16 | # Basic Lore Plot with default style
 17 | loreplot(data=iris_df, x="sepal width (cm)", y="species")
 18 | plt.savefig("./docs/img/loreplot.png", dpi=150)
 19 | plt.show()
 20 | 
 21 | # Key word arguments (like colormap) can be passed to the DataFrame.plot.area
 22 | 
 23 | colormap = ListedColormap(["red", "green", "blue"])
 24 | loreplot(data=iris_df, x="sepal width (cm)", y="species", colormap=colormap)
 25 | plt.savefig("./docs/img/loreplot_custom_color.png", dpi=150)
 26 | plt.show()
 27 | 
 28 | # En-/disable sample markers with add_dots
 29 | loreplot(data=iris_df, x="sepal width (cm)", y="species", add_dots=False)
 30 | plt.savefig("./docs/img/loreplot_no_dots.png", dpi=150)
 31 | plt.show()
 32 | 
 33 | # Pass custom styles for markers using scatter_kws
 34 | scatter_options = {
 35 |     "s": 20,  # Marker size
 36 |     "alpha": 1,  # Fully opaque
 37 |     "color": "black",  # Set color to black
 38 |     "marker": "x",  # Set style to crosses
 39 | }
 40 | 
 41 | loreplot(data=iris_df, x="sepal width (cm)", y="species", scatter_kws=scatter_options)
 42 | plt.savefig("./docs/img/loreplot_custom_markers.png", dpi=150)
 43 | plt.show()
 44 | 
 45 | # Test in subplots
 46 | 
 47 | fig, ax = plt.subplots(1, 2, sharex=False, sharey=True)
 48 | loreplot(data=iris_df, x="sepal width (cm)", y="species", ax=ax[0])
 49 | loreplot(data=iris_df, x="petal width (cm)", y="species", ax=ax[1])
 50 | 
 51 | ax[0].get_legend().remove()
 52 | ax[0].set_title("Sepal Width")
 53 | ax[1].set_title("Petal Width")
 54 | 
 55 | plt.savefig("./docs/img/loreplot_subplot.png", dpi=150)
 56 | plt.show()
 57 | 
 58 | # Basic Lore Plot with default style but different classifier
 59 | fig, ax = plt.subplots(1, 2, sharex=False, sharey=True)
 60 | 
 61 | svc = SVC(probability=True)
 62 | rf = RandomForestClassifier(n_estimators=10, max_depth=2)
 63 | 
 64 | loreplot(data=iris_df, x="sepal width (cm)", y="species", clf=svc, ax=ax[0])
 65 | loreplot(data=iris_df, x="sepal width (cm)", y="species", clf=rf, ax=ax[1])
 66 | 
 67 | ax[0].get_legend().remove()
 68 | ax[0].set_title("SVC")
 69 | ax[1].set_title("RF")
 70 | 
 71 | plt.savefig("./docs/img/loreplot_other_clf.png", dpi=150)
 72 | plt.show()
 73 | 
 74 | # Basic Lore Plot with default style with one confounder
 75 | loreplot(
 76 |     data=iris_df,
 77 |     x="sepal width (cm)",
 78 |     y="species",
 79 |     confounders=[("petal width (cm)", 1)],
 80 | )
 81 | plt.savefig("./docs/img/loreplot_confounder.png", dpi=150)
 82 | plt.show()
 83 | 
 84 | # Basic Lore Plot with some jitter
 85 | iris_df["sepal width (cm)"] = (
 86 |     np.round(iris_df["sepal width (cm)"] * 3) / 3
 87 | )  # Round values
 88 | 
 89 | loreplot(data=iris_df, x="sepal width (cm)", y="species", jitter=0.05)
 90 | plt.savefig("./docs/img/loreplot_jitter.png", dpi=150)
 91 | plt.show()
 92 | 
 93 | ### Generate some plots that can be used for the documentation
 94 | 
 95 | iris_obj = load_iris()
 96 | iris_df = pd.DataFrame(iris_obj.data, columns=iris_obj.feature_names)
 97 | 
 98 | iris_df["species"] = [iris_obj.target_names[s] for s in iris_obj.target]
 99 | 
100 | iris_df["sepal_group"] = iris_df["sepal width (cm)"].apply(
101 |     lambda x: "small" if x < 3 else "large"
102 | )
103 | count_df = (
104 |     iris_df.groupby(["species", "sepal_group"], as_index=False)
105 |     .size()
106 |     .pivot_table(index="sepal_group", columns="species", values="size")
107 | )
108 | 
109 | totals = count_df.sum(axis=1)
110 | 
111 | count_df = count_df.div(totals, axis=0).sort_index(ascending=False)
112 | count_df.plot.bar(stacked=True)
113 | 
114 | plt.tight_layout()
115 | plt.savefig("./docs/img/threshold.png", dpi=150)
116 | plt.show()
117 | 
118 | iris_df["sepal_bin"] = pd.cut(iris_df["sepal width (cm)"], 6)
119 | count_df = (
120 |     iris_df.groupby(["species", "sepal_bin"], as_index=False, observed=False)
121 |     .size()
122 |     .pivot_table(index="sepal_bin", columns="species", values="size", observed=False)
123 | )
124 | 
125 | label_df = iris_df.groupby("sepal_bin", as_index=False, observed=False).size()
126 | label_df["label"] = label_df.apply(
127 |     lambda x: str(x["sepal_bin"]) + " (n=" + str(x["size"]) + ")", axis=1
128 | )
129 | 
130 | totals = count_df.sum(axis=1)
131 | 
132 | count_df = count_df.div(totals, axis=0).sort_index(ascending=True)
133 | count_df = (
134 |     pd.merge(count_df, label_df, left_index=True, right_on="sepal_bin")
135 |     .set_index("label")
136 |     .drop(columns=["sepal_bin", "size"])
137 | )
138 | count_df.plot.bar(stacked=True)
139 | 
140 | plt.tight_layout()
141 | plt.savefig("./docs/img/bins.png", dpi=150)
142 | plt.show()
143 | 
144 | 
145 | iris_df["sepal_cut"] = pd.qcut(iris_df["sepal width (cm)"], 6, duplicates="drop")
146 | count_df = (
147 |     iris_df.groupby(["species", "sepal_cut"], as_index=False, observed=False)
148 |     .size()
149 |     .pivot_table(index="sepal_cut", columns="species", values="size", observed=False)
150 | )
151 | 
152 | label_df = iris_df.groupby("sepal_cut", as_index=False, observed=False).size()
153 | label_df["label"] = label_df.apply(
154 |     lambda x: str(x["sepal_cut"]) + " (n=" + str(x["size"]) + ")", axis=1
155 | )
156 | 
157 | totals = count_df.sum(axis=1)
158 | 
159 | count_df = count_df.div(totals, axis=0).sort_index(ascending=True)
160 | count_df = (
161 |     pd.merge(count_df, label_df, left_index=True, right_on="sepal_cut")
162 |     .set_index("label")
163 |     .drop(columns=["sepal_cut", "size"])
164 | )
165 | count_df.plot.bar(stacked=True)
166 | 
167 | plt.tight_layout()
168 | plt.savefig("./docs/img/percentiles.png", dpi=150)
169 | plt.show()
170 | 


--------------------------------------------------------------------------------
/example_uncertainty.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import pandas as pd
 3 | from lorepy import uncertainty_plot
 4 | from matplotlib.colors import ListedColormap
 5 | from sklearn.datasets import load_iris
 6 | from sklearn.svm import SVC
 7 | 
 8 | # Load iris dataset and convert to dataframe
 9 | iris_obj = load_iris()
10 | iris_df = pd.DataFrame(iris_obj.data, columns=iris_obj.feature_names)
11 | 
12 | iris_df["species"] = [iris_obj.target_names[s] for s in iris_obj.target]
13 | 
14 | # Default uncertainty plot
15 | uncertainty_plot(data=iris_df, x="sepal width (cm)", y="species", iterations=100)
16 | plt.savefig("./docs/img/uncertainty_default.png", dpi=150)
17 | plt.show()
18 | 
19 | # Using jackknife instead of resample to assess uncertainty
20 | uncertainty_plot(
21 |     data=iris_df,
22 |     x="sepal width (cm)",
23 |     y="species",
24 |     iterations=100,
25 |     jackknife_fraction=0.8,
26 | )
27 | plt.savefig("./docs/img/uncertainty_jackknife.png", dpi=150)
28 | plt.show()
29 | 
30 | # Uncertainty plot with custom colors
31 | 
32 | 
33 | colormap = ListedColormap(["red", "green", "blue"])
34 | uncertainty_plot(
35 |     data=iris_df,
36 |     x="sepal width (cm)",
37 |     y="species",
38 |     iterations=100,
39 |     mode="resample",
40 |     colormap=colormap,
41 | )
42 | plt.savefig("./docs/img/uncertainty_custom_color.png", dpi=150)
43 | plt.show()
44 | 
45 | # Uncertainty plot with a confounder
46 | uncertainty_plot(
47 |     data=iris_df,
48 |     x="sepal width (cm)",
49 |     y="species",
50 |     iterations=100,
51 |     mode="resample",
52 |     confounders=[("petal width (cm)", 1)],
53 | )
54 | plt.savefig("./docs/img/uncertainty_confounder.png", dpi=150)
55 | plt.show()
56 | 
57 | # Uncertainty plot with a custom classifier
58 | svc = SVC(probability=True)
59 | 
60 | uncertainty_plot(
61 |     data=iris_df,
62 |     x="sepal width (cm)",
63 |     y="species",
64 |     iterations=100,
65 |     mode="resample",
66 |     clf=svc,
67 | )
68 | plt.savefig("./docs/img/uncertainty_custom_classifier.png", dpi=150)
69 | plt.show()
70 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | pythonpath = src


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setup(
 7 |     name="lorepy",
 8 |     version="0.4.4",
 9 |     author="Sebastian Proost",
10 |     author_email="sebastian.proost@gmail.com",
11 |     description="Draw Logistic Regression Plots in Python",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/raeslab/lorepy/",
15 |     project_urls={
16 |         "Bug Tracker": "https://github.com/raeslab/lorepy/issues",
17 |     },
18 |     install_requires=[
19 |         "matplotlib>=3.4.1",
20 |         "numpy>=1.20.2",
21 |         "pandas>=1.2.4",
22 |         "scikit-learn>=1.5.0",
23 |     ],
24 |     classifiers=[
25 |         "Programming Language :: Python :: 3",
26 |         "Operating System :: OS Independent",
27 |     ],
28 |     license="Creative Commons Attribution-NonCommercial-ShareAlike 4.0. https://creativecommons.org/licenses/by-nc-sa/4.0/",
29 |     packages=find_packages("src"),
30 |     package_dir={"": "src"},
31 |     python_requires=">=3.9",
32 | )
33 | 


--------------------------------------------------------------------------------
/src/lorepy/__init__.py:
--------------------------------------------------------------------------------
1 | from .lorepy import loreplot as loreplot
2 | from .uncertainty import uncertainty_plot as uncertainty_plot
3 | 


--------------------------------------------------------------------------------
/src/lorepy/lorepy.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, Tuple
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import pandas as pd
  6 | from pandas import DataFrame
  7 | from sklearn.linear_model import LogisticRegression
  8 | 
  9 | 
 10 | def _prepare_data(data, x, y, confounders):
 11 |     x_features = [x] + [i[0] for i in confounders]
 12 | 
 13 |     tmp_df = data[x_features + [y]].dropna()
 14 |     X_reg = np.array(tmp_df[x_features])
 15 |     y_reg = np.array(tmp_df[y])
 16 | 
 17 |     x_range = (X_reg[:, 0].min(), X_reg[:, 0].max())
 18 | 
 19 |     return X_reg, y_reg, x_range
 20 | 
 21 | 
 22 | def _get_area_df(lg, x_feature, x_range, confounders=[]) -> DataFrame:
 23 |     values = np.linspace(x_range[0], x_range[1], num=200)
 24 | 
 25 |     predict_df = pd.DataFrame({"values": values})
 26 | 
 27 |     for k, v in confounders:
 28 |         predict_df[k] = v
 29 | 
 30 |     proba = lg.predict_proba(predict_df.values)
 31 |     proba_df = DataFrame(proba, columns=lg.classes_)
 32 |     proba_df[x_feature] = values
 33 |     proba_df.set_index(x_feature, inplace=True)
 34 | 
 35 |     return proba_df
 36 | 
 37 | 
 38 | def _get_dots_df(X, y, lg, y_feature, confounders=[], jitter=0) -> DataFrame:
 39 |     output = []
 40 | 
 41 |     for x, s in zip(X, y):
 42 |         if jitter != 0:
 43 |             x[0] += np.random.uniform(low=-jitter, high=jitter)
 44 | 
 45 |         proba = lg.predict_proba([x] + [i[1] for i in confounders])
 46 |         i = list(lg.classes_).index(s)
 47 |         min_value = sum(proba[0][:i])
 48 |         max_value = sum(proba[0][: i + 1])
 49 |         margin = (max_value - min_value) / 10
 50 |         ypos = np.random.uniform(low=min_value + margin, high=max_value - margin)
 51 |         output.append({y_feature: s, "x": x[0], "y": ypos})
 52 | 
 53 |     return DataFrame(output)
 54 | 
 55 | 
 56 | def loreplot(
 57 |     data: DataFrame,
 58 |     x: str,
 59 |     y: str,
 60 |     add_dots: bool = True,
 61 |     x_range: Optional[Tuple[float, float]] = None,
 62 |     scatter_kws: dict = dict({}),
 63 |     ax=None,
 64 |     clf=None,
 65 |     confounders=[],
 66 |     jitter=0,
 67 |     **kwargs,
 68 | ):
 69 |     """
 70 |     Code to create a loreplot with a numerical feature on the v-axis and categorical y from a pandas dataset
 71 | 
 72 |     :param data: Pandas dataframe with data
 73 |     :param x: Needs to be a numerical feature
 74 |     :param y: Categorical feature
 75 |     :param add_dots: Shows where true samples are in the plot (cannot be enabled when deconfounding for additional variables)
 76 |     :param x_range: Either None (range will be selected automatically) or a tuple with min and max value for the v-axis
 77 |     :param scatter_kws: Dictionary with keyword arguments to pass to the scatter function
 78 |     :param ax: subplot to draw on, in case lorepy is used in a subplot
 79 |     :param clf: provide a different scikit-learn classifier for the function. Should implement the predict_proba() and fit()
 80 |     :param confounders: list of tuples with the feature and reference value e.g. [("BMI", 25)] will confounders BMI and use a reference of 25 for plots
 81 |     :param jitter: adds random noise to the x-position of dots. This can help avoid overplotting when integer values are used for the numerical features
 82 |     :param kwargs: Additional arguments to pass to pandas' plot.area function
 83 |     """
 84 |     if ax is None:
 85 |         ax = plt.gca()
 86 | 
 87 |     X_reg, y_reg, r = _prepare_data(data, x, y, confounders)
 88 | 
 89 |     if x_range is None:
 90 |         x_range = r
 91 | 
 92 |     lg = LogisticRegression() if clf is None else clf
 93 |     lg.fit(X_reg, y_reg)
 94 | 
 95 |     if "linestyle" not in kwargs.keys():
 96 |         kwargs["linestyle"] = "None"
 97 | 
 98 |     area_df = _get_area_df(lg, x, x_range, confounders=confounders)
 99 |     area_df.plot.area(ax=ax, **kwargs)
100 | 
101 |     if add_dots and len(confounders) == 0:
102 |         dot_df = _get_dots_df(X_reg, y_reg, lg, y, jitter=jitter)
103 |         if "color" not in scatter_kws.keys():
104 |             scatter_kws["color"] = "w"
105 |         if "alpha" not in scatter_kws.keys():
106 |             scatter_kws["alpha"] = 0.3
107 |         ax.scatter(dot_df["x"], dot_df["y"], **scatter_kws)
108 | 
109 |     ax.set_xlim(*x_range)
110 | 
111 |     ax.set_ylim(0, 1)
112 | 


--------------------------------------------------------------------------------
/src/lorepy/uncertainty.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from matplotlib import pyplot as plt
  4 | from pandas import DataFrame
  5 | from sklearn.linear_model import LogisticRegression
  6 | from sklearn.model_selection import train_test_split
  7 | from sklearn.utils import resample
  8 | 
  9 | from lorepy.lorepy import _get_area_df, _prepare_data
 10 | 
 11 | 
 12 | def _get_uncertainty_data(
 13 |     x: str,
 14 |     X_reg,
 15 |     y_reg,
 16 |     x_range,
 17 |     mode="resample",
 18 |     jackknife_fraction: float = 0.8,
 19 |     iterations: int = 100,
 20 |     confounders=None,
 21 |     clf=None,
 22 | ):
 23 |     confounders = [] if confounders is None else confounders
 24 | 
 25 |     areas = []
 26 |     for i in range(iterations):
 27 |         if mode == "jackknife":
 28 |             X_keep, _, y_keep, _ = train_test_split(
 29 |                 X_reg, y_reg, train_size=jackknife_fraction
 30 |             )
 31 |         elif mode == "resample":
 32 |             X_keep, y_keep = resample(X_reg, y_reg, replace=True)
 33 |         else:
 34 |             raise NotImplementedError(
 35 |                 f"Mode {mode} is unsupported, only jackknife and resample are valid modes"
 36 |             )
 37 | 
 38 |         lg = LogisticRegression() if clf is None else clf
 39 |         lg.fit(X_keep, y_keep)
 40 |         new_area = _get_area_df(lg, x, x_range, confounders=confounders).reset_index()
 41 | 
 42 |         areas.append(new_area)
 43 | 
 44 |     long_df = pd.concat(areas).melt(id_vars=[x]).sort_values(x)
 45 | 
 46 |     output = (
 47 |         long_df.groupby([x, "variable"])
 48 |         .agg(
 49 |             min=pd.NamedAgg(column="value", aggfunc="min"),
 50 |             mean=pd.NamedAgg(column="value", aggfunc="mean"),
 51 |             max=pd.NamedAgg(column="value", aggfunc="max"),
 52 |             low_95=pd.NamedAgg(column="value", aggfunc=lambda v: np.percentile(v, 2.5)),
 53 |             high_95=pd.NamedAgg(
 54 |                 column="value", aggfunc=lambda v: np.percentile(v, 97.5)
 55 |             ),
 56 |             low_50=pd.NamedAgg(column="value", aggfunc=lambda v: np.percentile(v, 25)),
 57 |             high_50=pd.NamedAgg(column="value", aggfunc=lambda v: np.percentile(v, 75)),
 58 |         )
 59 |         .reset_index()
 60 |     )
 61 | 
 62 |     return output, long_df
 63 | 
 64 | 
 65 | def uncertainty_plot(
 66 |     data: DataFrame,
 67 |     x: str,
 68 |     y: str,
 69 |     x_range=None,
 70 |     mode="resample",
 71 |     jackknife_fraction=0.8,
 72 |     iterations=100,
 73 |     confounders=[],
 74 |     colormap=None,
 75 |     clf=None,
 76 |     ax=None,
 77 | ):
 78 |     """
 79 |     Code to create a multi-panel plot, one panel for each category, with the prevalence of that category across the
 80 |     range of x-values, along with the uncertainty (intervals containing 50% and 95% of the samples are shown)
 81 | 
 82 |     :param data: Pandas dataframe with data
 83 |     :param x: Needs to be a numerical feature
 84 |     :param y: Categorical feature
 85 |     :param x_range: Either None (range will be selected automatically) or a tuple with min and max value for the x-axis
 86 |     :param mode: Sampling method, either "resample" (bootstrap) or "jackknife" (default = "resample")
 87 |     :param jackknife_fraction: Fraction of data to retain for each jackknife sample (default = 0.8)
 88 |     :param iterations: Number of iterations for resampling or jackknife (default = 100)
 89 |     :param confounders: List of tuples with the feature and reference value e.g., [("BMI", 25)] will use a reference of 25 for plots
 90 |     :param colormap: Colormap to use for the plot, default is None in which case matplotlib's default will be used
 91 |     :param clf: Provide a different scikit-learn classifier for the function. Should implement the predict_proba() and fit(). If None a LogisticRegression will be used.
 92 |     :param ax: Optional. List of matplotlib Axes to plot into. If None, a new figure and axes will be created.
 93 |     :return: A tuple containing the figure and axes objects
 94 |     """
 95 |     X_reg, y_reg, r = _prepare_data(data, x, y, confounders)
 96 | 
 97 |     if x_range is None:
 98 |         x_range = r
 99 | 
100 |     plot_df, _ = _get_uncertainty_data(
101 |         x,
102 |         X_reg,
103 |         y_reg,
104 |         x_range,
105 |         mode=mode,
106 |         jackknife_fraction=jackknife_fraction,
107 |         iterations=iterations,
108 |         confounders=confounders,
109 |         clf=clf,
110 |     )
111 | 
112 |     categories = plot_df.variable.unique()
113 | 
114 |     if ax is None:
115 |         fig, axs = plt.subplots(ncols=len(categories), sharex=True, sharey=True)
116 |     else:
117 |         assert len(ax) == len(
118 |             categories
119 |         ), "Length of ax must match number of categories"
120 |         fig = ax[0].figure
121 |         axs = ax
122 | 
123 |     cmap = plt.get_cmap("tab10") if colormap is None else colormap
124 | 
125 |     for idx, category in enumerate(categories):
126 |         cat_df = plot_df[plot_df.variable == category]
127 | 
128 |         axs[idx].fill_between(
129 |             cat_df[x], cat_df["low_95"], cat_df["high_95"], alpha=0.1, color=cmap(idx)
130 |         )
131 |         axs[idx].fill_between(
132 |             cat_df[x], cat_df["low_50"], cat_df["high_50"], alpha=0.2, color=cmap(idx)
133 |         )
134 |         axs[idx].plot(cat_df[x], cat_df["mean"], color=cmap(idx))
135 |         axs[idx].set_title(categories[idx])
136 |         axs[idx].set_xlabel(x)
137 | 
138 |         axs[idx].set_xlim(*x_range)
139 |         axs[idx].set_ylim(0, 1)
140 | 
141 |     return fig, axs
142 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/raeslab/lorepy/e1758a09b2d9eb390c9b54aabcceaf9e9dcf8d4b/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_plot.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import pandas as pd
  4 | from lorepy.lorepy import _get_area_df, _get_dots_df, loreplot
  5 | from pandas import DataFrame
  6 | from sklearn.linear_model import LogisticRegression
  7 | from sklearn.svm import SVC
  8 | 
  9 | import pytest
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def sample_data():
 14 |     X = np.concatenate([np.random.randint(0, 10, 50), np.random.randint(2, 12, 50)])
 15 |     y = [0] * 50 + [1] * 50
 16 |     z = X
 17 |     return pd.DataFrame({"x": X, "y": y, "z": z})
 18 | 
 19 | 
 20 | @pytest.fixture
 21 | def logistic_regression_model():
 22 |     X_reg = np.array([1.0, 2.0, 3.0, 4.0, 5.0]).reshape(-1, 1)
 23 |     y_reg = np.array([0, 1, 0, 1, 1])
 24 |     lg = LogisticRegression()
 25 |     lg.fit(X_reg, y_reg)
 26 |     return X_reg, y_reg, lg
 27 | 
 28 | 
 29 | # Test case for loreplot with default parameters
 30 | def test_loreplot_default(sample_data):
 31 |     loreplot(sample_data, "x", "y")  # first test without specifying the axis
 32 | 
 33 |     fig, ax = plt.subplots()
 34 |     loreplot(sample_data, "x", "y", ax=ax)
 35 |     assert ax.get_title() == ""
 36 |     assert ax.get_xlabel() == "x"
 37 |     assert ax.get_ylabel() == ""
 38 | 
 39 | 
 40 | # Test case for loreplot with jitter
 41 | def test_loreplot_jitter(sample_data):
 42 |     loreplot(sample_data, "x", "y")  # first test without specifying the axis
 43 | 
 44 |     fig, ax = plt.subplots()
 45 |     loreplot(sample_data, "x", "y", ax=ax, jitter=0.05)
 46 |     assert ax.get_title() == ""
 47 |     assert ax.get_xlabel() == "x"
 48 |     assert ax.get_ylabel() == ""
 49 | 
 50 | 
 51 | # Test case for loreplot with confounder
 52 | def test_loreplot_confounder(sample_data):
 53 |     loreplot(
 54 |         sample_data, "x", "y", confounders=[("z", 1)]
 55 |     )  # first test without specifying the axis
 56 | 
 57 |     fig, ax = plt.subplots()
 58 |     loreplot(sample_data, "x", "y", ax=ax)
 59 |     assert ax.get_title() == ""
 60 |     assert ax.get_xlabel() == "x"
 61 |     assert ax.get_ylabel() == ""
 62 | 
 63 | 
 64 | # Test case for loreplot with custom clf
 65 | def test_loreplot_custom_clf(sample_data):
 66 |     svc = SVC(probability=True)
 67 |     loreplot(sample_data, "x", "y", clf=svc)
 68 | 
 69 |     fig, ax = plt.subplots()
 70 |     loreplot(sample_data, "x", "y", ax=ax)
 71 |     assert ax.get_title() == ""
 72 |     assert ax.get_xlabel() == "x"
 73 |     assert ax.get_ylabel() == ""
 74 | 
 75 | 
 76 | # Test case for loreplot with custom parameters
 77 | def test_loreplot_custom(sample_data):
 78 |     fig, ax = plt.subplots()
 79 |     loreplot(
 80 |         sample_data,
 81 |         "x",
 82 |         "y",
 83 |         add_dots=False,
 84 |         x_range=(0, 5),
 85 |         ax=ax,
 86 |         color=["r", "b"],
 87 |         linestyle="-",
 88 |     )
 89 |     assert ax.get_title() == ""
 90 |     assert ax.get_xlabel() == "x"
 91 |     assert ax.get_ylabel() == ""
 92 | 
 93 | 
 94 | # Test case for loreplot with add_dots=True
 95 | def test_loreplot_with_dots(sample_data):
 96 |     fig, ax = plt.subplots()
 97 |     loreplot(sample_data, "x", "y", add_dots=True, ax=ax)
 98 |     assert ax.get_title() == ""
 99 |     assert ax.get_xlabel() == "x"
100 |     assert ax.get_ylabel() == ""
101 | 
102 | 
103 | # Sample data for testing internal functions
104 | X_reg = np.array([1.0, 2.0, 3.0, 4.0, 5.0]).reshape(-1, 1)
105 | y_reg = np.array([0, 1, 0, 1, 1])
106 | lg = LogisticRegression()
107 | lg.fit(X_reg, y_reg)
108 | 
109 | 
110 | # Test case for _get_dots_df
111 | def test_get_dots_df():
112 |     dots_df = _get_dots_df(X_reg, y_reg, lg, "y")
113 |     assert isinstance(dots_df, DataFrame)
114 |     assert "x" in dots_df.columns
115 |     assert "y" in dots_df.columns
116 |     assert "y_feature" not in dots_df.columns
117 |     assert len(dots_df) == len(X_reg)
118 | 
119 | 
120 | # Test case for _get_area_df
121 | def test_get_area_df():
122 |     area_df = _get_area_df(lg, "x", (X_reg.min(), X_reg.max()))
123 |     assert isinstance(area_df, DataFrame)
124 |     assert "x" not in area_df.columns
125 |     assert 0 in area_df.columns
126 |     assert 1 in area_df.columns
127 |     assert len(area_df) == 200
128 |     assert area_df.index[0] == X_reg.min()
129 |     assert area_df.index[-1] == X_reg.max()
130 | 


--------------------------------------------------------------------------------
/tests/test_uncertainty.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import pytest
 4 | from lorepy import uncertainty_plot
 5 | from matplotlib.colors import ListedColormap
 6 | from matplotlib import pyplot as plt
 7 | from sklearn.svm import SVC
 8 | 
 9 | 
10 | @pytest.fixture
11 | def sample_data():
12 |     X = np.concatenate([np.random.randint(0, 10, 50), np.random.randint(2, 12, 50)])
13 |     y = [0] * 50 + [1] * 50
14 |     z = X
15 |     return pd.DataFrame({"x": X, "y": y, "z": z})
16 | 
17 | 
18 | @pytest.fixture
19 | def custom_colormap():
20 |     return ListedColormap(["red", "green", "blue"])
21 | 
22 | 
23 | # Test case for lorepy's uncertainty plot with default parameters
24 | def test_uncertainty_default(sample_data):
25 |     fig, axs = uncertainty_plot(sample_data, "x", "y")  # first test with default params
26 | 
27 |     assert len(axs) == 2
28 |     assert axs[0].get_title() == "0"
29 |     assert axs[0].get_xlabel() == "x"
30 |     assert axs[0].get_ylabel() == ""
31 | 
32 | 
33 | # Test case for lorepy's uncertainty plot with alternative parameters
34 | def test_uncertainty_alternative(sample_data, custom_colormap):
35 |     svc = SVC(probability=True)
36 |     fig, axs = uncertainty_plot(
37 |         sample_data,
38 |         "x",
39 |         "y",
40 |         mode="jackknife",
41 |         x_range=(5, 40),
42 |         colormap=custom_colormap,
43 |         clf=svc,
44 |     )
45 | 
46 |     assert len(axs) == 2
47 |     assert axs[0].get_title() == "0"
48 |     assert axs[0].get_xlabel() == "x"
49 |     assert axs[0].get_ylabel() == ""
50 | 
51 | 
52 | def test_get_uncertainty_confounder(sample_data):
53 |     fig, axs = uncertainty_plot(
54 |         sample_data, "x", "y", confounders=[("z", 5)]
55 |     )  # first test with default params
56 | 
57 |     assert len(axs) == 2
58 |     assert axs[0].get_title() == "0"
59 |     assert axs[0].get_xlabel() == "x"
60 |     assert axs[0].get_ylabel() == ""
61 | 
62 | 
63 | # Test error handling when an unsupported mode is selected
64 | def test_uncertainty_incorrect_mode(sample_data):
65 |     with pytest.raises(NotImplementedError):
66 |         assert uncertainty_plot(sample_data, "x", "y", mode="fail")
67 | 
68 | 
69 | def test_uncertainty_with_existing_ax(sample_data):
70 |     fig, ax = plt.subplots(1, 2)  # Create 2 axes manually
71 |     returned_fig, returned_axs = uncertainty_plot(sample_data, "x", "y", ax=ax)
72 | 
73 |     assert returned_fig is not None
74 |     assert returned_axs[0] == ax[0]
75 |     assert returned_axs[1] == ax[1]
76 |     assert len(returned_axs) == 2
77 |     assert returned_axs[0].get_title() == "0"
78 |     assert returned_axs[0].get_xlabel() == "x"
79 | 
80 | 
81 | def test_uncertainty_incorrect_ax_length(sample_data):
82 |     fig, ax = plt.subplots(1, 1)  # Only one axis created, but we expect two
83 |     with pytest.raises(AssertionError):
84 |         uncertainty_plot(sample_data, "x", "y", ax=[ax])
85 | 


--------------------------------------------------------------------------------
/timestamp:
--------------------------------------------------------------------------------
1 | Sun Jun  1 00:44:11 UTC 2025
2 | 


--------------------------------------------------------------------------------