├── .coveragerc
├── .gitattributes
├── .github
├── workflows
│ └── codecov.yml
└── workflows_inactive
│ └── codecov.yml
├── .gitignore
├── CHANGELOGS.md
├── CODE_OF_CONDUCT.md
├── EXAMPLES
├── attention.ipynb
├── attention1.png
├── cars.ipynb
├── cars1.png
├── diamonds.ipynb
├── diamonds1.png
├── diamonds2.png
├── fmri.ipynb
├── fmri.xlsx
├── fmri1.png
├── fmri2.png
├── iris.ipynb
├── iris1.png
├── qpcr(presentation).ipynb
├── qpcr.ipynb
├── qpcr1.png
├── tips.ipynb
└── tips1.png
├── HOW_TO_USE
├── dimensions.ipynb
├── plot_editing.ipynb
├── plot_legend.ipynb
├── plot_multiplots.ipynb
├── plot_styles.ipynb
└── plotting.ipynb
├── IMAGES
└── LOGO
│ ├── plotastic_logo.afdesign
│ ├── plotastic_logo.png
│ ├── plotastic_logo_2.afdesign
│ ├── plotastic_logo_3.afdesign
│ └── plotastic_logo_3.png
├── LICENSE
├── MANIFEST.in
├── README.md
├── class_diagram.mmd
├── class_diagram.svg
├── devtools
├── readme_for_pypi.py
├── setupvenv.py
└── upload_to_pypi.sh
├── paper.bib
├── paper.md
├── paper.pdf
├── pyproject.toml
├── qpcr1.png
├── requirements.txt
├── src
└── plotastic
│ ├── __init__.py
│ ├── caches.py
│ ├── dataanalysis
│ ├── __init__.py
│ ├── annotator.py
│ ├── dataanalysis.py
│ └── filer.py
│ ├── dimensions
│ ├── __init__.py
│ ├── dataframetool.py
│ ├── dataintegrity.py
│ ├── dims.py
│ ├── dimsandlevels.py
│ ├── hierarchical_dims.py
│ └── subject.py
│ ├── docstrings.py
│ ├── example_data
│ ├── __init__.py
│ ├── data
│ │ ├── fmri.xlsx
│ │ ├── qpcr.xlsx
│ │ └── tips.xlsx
│ └── load_dataset.py
│ ├── plotting
│ ├── __init__.py
│ ├── multiplot.py
│ ├── plot.py
│ ├── plotedits.py
│ ├── rc.py
│ ├── rc_utils.py
│ └── subplot.py
│ ├── py.typed
│ ├── stat
│ ├── __init__.py
│ ├── assumptions.py
│ ├── bivariate.py
│ ├── omnibus.py
│ ├── posthoc.py
│ ├── statresults.py
│ └── stattest.py
│ └── utils
│ ├── __init__.py
│ ├── subcache.py
│ └── utils.py
├── testing
└── make_htmlcov.sh
└── tests
├── DA_configs.py
├── DA_configs2.py
├── DA_utils.py
├── _annotator_test.py
├── _assumptions_test.py
├── _dims_test.py
├── _dimsandlevels_test.py
├── _filer_test.py
├── _hierarchical_dims_test.py
├── _load_dataset_test.py
├── _multiplot_test.py
├── _omnibus_test.py
├── _paper_test.py
├── _plotedits_test.py
├── _rc_test.py
├── _save_test.py
├── _utils_test.py
├── conftest.py
├── non_pytest_checks
├── hspace_wspace.py
└── legend_position.py
├── pytest.ini
└── run_tests_in_new_env.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | ; https://coverage.readthedocs.io/en/latest/config.html
2 |
3 | [paths]
4 | source =
5 | src
6 | ; */site-packages
7 |
8 |
9 | ; [run]
10 | ; parallel = true
11 | ; omit = tests/*
12 | ; branch = true
13 | ; source =
14 | ; sampleproject
15 |
16 | [report]
17 | show_missing = true
18 | precision = 1
19 | ; omit = *migrations*
20 | exclude_lines =
21 | if __name__ == .__main__.:
22 | if TYPE_CHECKING:
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.github/workflows/codecov.yml:
--------------------------------------------------------------------------------
1 | name: test coverage
2 | run-name: ${{ github.actor }} ${{ github.event_name }}; Setup testenv, run tests, upload coverage
3 |
4 | on: [push, pull_request]
5 |
6 | jobs:
7 | build:
8 | runs-on: ubuntu-latest
9 | name: Setup testenv, run tests, upload coverage
10 | steps:
11 | - uses: actions/checkout@v3
12 | - uses: actions/setup-python@v2
13 | with:
14 | python-version: '3.11'
15 | - name: Install requirements #' Using requirements.txt should be faster
16 | run: pip install -r requirements.txt
17 | - name: Install plotastic (in editable mode! Otherwise 0% coverage)
18 | run: pip install -e .[dev]
19 | - name: Run tests and collect coverage
20 | run: pytest tests --cov --cov-report=xml
21 | - name: Upload coverage reports to Codecov with GitHub Action
22 | uses: codecov/codecov-action@v3
23 | with:
24 | token: ${{ secrets.CODECOV_TOKEN }}
25 | env:
26 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
--------------------------------------------------------------------------------
/.github/workflows_inactive/codecov.yml:
--------------------------------------------------------------------------------
1 | name: Codecov
2 | run-name: ${{ github.actor }} uploaded coverage reports to Codecov
3 |
4 | on: [push, pull_request]
5 |
6 | jobs:
7 | build:
8 | runs-on: ubuntu-latest
9 | name: upload coverage reports to Codecov
10 | steps:
11 | - name: Upload coverage reports to Codecov
12 | uses: codecov/codecov-action@v3
13 | env:
14 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
15 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Figures
2 | IMAGES/CLASS_DIAGRAMS
3 |
4 | # Examples
5 | EXAMPLES/Ezgitest
6 | EXAMPLES/qpcr.pdf
7 |
8 | # Readme for Pypi
9 | README_pypi.md
10 | README_pypi.pdf
11 | README.pdf
12 |
13 | # setup_env
14 | setup_env.ipynb
15 |
16 | # DS_store
17 | .DS_Store
18 |
19 | # joblib cache
20 | .joblib_cache
21 | joblib_cache
22 |
23 | # Folder for storing not to publish things
24 | _experimental
25 | GITIGNORE
26 | GIT_IGNORE
27 | Dont_Publish
28 | DEVELOPMENT
29 |
30 | # !!! paper, publish it only when publishing
31 | paper.docx
32 |
33 |
34 |
35 | # vscode
36 | .vscode
37 | .vscode/
38 |
39 | # Byte-compiled / optimized / DLL files
40 | __pycache__/
41 | *.py[cod]
42 | *$py.class
43 |
44 | # C extensions
45 | *.so
46 |
47 | # Distribution / packaging
48 | .Python
49 | build/
50 | develop-eggs/
51 | dist/
52 | downloads/
53 | eggs/
54 | .eggs/
55 | lib/
56 | lib64/
57 | parts/
58 | sdist/
59 | var/
60 | wheels/
61 | share/python-wheels/
62 | *.egg-info/
63 | .installed.cfg
64 | *.egg
65 | MANIFEST
66 |
67 | # PyInstaller
68 | # Usually these files are written by a python script from a template
69 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
70 | *.manifest
71 | *.spec
72 |
73 | # Installer logs
74 | pip-log.txt
75 | pip-delete-this-directory.txt
76 |
77 | # Unit test / coverage reports
78 | htmlcov/
79 | .tox/
80 | .nox/
81 | .coverage
82 | .coverage.*
83 | .cache
84 | nosetests.xml
85 | coverage.xml
86 | *.cover
87 | *.py,cover
88 | .hypothesis/
89 | .pytest_cache/
90 | cover/
91 |
92 | # Translations
93 | *.mo
94 | *.pot
95 |
96 | # Django stuff:
97 | *.log
98 | local_settings.py
99 | db.sqlite3
100 | db.sqlite3-journal
101 |
102 | # Flask stuff:
103 | instance/
104 | .webassets-cache
105 |
106 | # Scrapy stuff:
107 | .scrapy
108 |
109 | # Sphinx documentation
110 | docs/_build/
111 | src/docs/
112 |
113 | # PyBuilder
114 | .pybuilder/
115 | target/
116 |
117 | # Jupyter Notebook
118 | .ipynb_checkpoints
119 |
120 | # IPython
121 | profile_default/
122 | ipython_config.py
123 |
124 | # pyenv
125 | # For a library or package, you might want to ignore these files since the code is
126 | # intended to run in multiple environments; otherwise, check them in:
127 | # .python-version
128 |
129 | # pipenv
130 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
131 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
132 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
133 | # install all needed dependencies.
134 | #Pipfile.lock
135 |
136 | # poetry
137 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
138 | # This is especially recommended for binary packages to ensure reproducibility, and is more
139 | # commonly ignored for libraries.
140 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
141 | #poetry.lock
142 |
143 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
144 | __pypackages__/
145 |
146 | # Celery stuff
147 | celerybeat-schedule
148 | celerybeat.pid
149 |
150 | # SageMath parsed files
151 | *.sage.py
152 |
153 | # Environments
154 | .env
155 | .venv
156 | _venv
157 | env/
158 | venv*/
159 | ENV/
160 | env.bak/
161 | venv.bak/
162 |
163 | # Spyder project settings
164 | .spyderproject
165 | .spyproject
166 |
167 | # Rope project settings
168 | .ropeproject
169 |
170 | # mkdocs documentation
171 | /site
172 |
173 | # mypy
174 | .mypy_cache/
175 | .dmypy.json
176 | dmypy.json
177 |
178 | # Pyre type checker
179 | .pyre/
180 |
181 | # pytype static type analyzer
182 | .pytype/
183 |
184 | # Cython debug symbols
185 | cython_debug/
186 |
187 | # PyCharm
188 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
189 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
190 | # and can be added to the global gitignore or merged into this file. For a more nuclear
191 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
192 | #.idea/
193 |
--------------------------------------------------------------------------------
/CHANGELOGS.md:
--------------------------------------------------------------------------------
1 | # 0.1.2
2 | ### Available on:
3 | - github
4 |
5 | ### New Features:
6 | - None yet
7 |
8 |
9 |
10 | # 0.1.1
11 | ### Available on:
12 | - github
13 | - PyPi
14 |
15 | ### New Features:
16 | - Runtime config `plotting.rc`
17 | - `set_style()` now passes all available matplotlib styles to `matplotlib.style.use()`
18 |
19 | ### Experimental Features:
20 | - Plot Paired Data by Joining subjects with line for each facet/x/hue-level!
21 | - To my knowledge, the solutions provided by matplolib or seaborn are
22 | way too difficult.
23 | - I implemented a solution that worked, but since I
24 | found a case where it didn't, this feature is experimental
25 |
26 |
27 | ### Changes:
28 | - Legends
29 | - Added `legend_kws` parameter to all multiplots
30 | - It seems strange to enforce `.edit_legend()` in chain
31 | - Also, the multiplot should decide, which legend should be
32 | displayed (e.g. by correct order of calling `.edit_legend()`
33 | inbetween or after `.fillaxes()`)
34 |
35 | ### Fixes:
36 | - Rewrote .edit_titles_with_func() becasue it didn't work
37 | - Legend is now outside of the plot no matter the figure width!
38 |
39 | ### Others:
40 | - Added Documentation notebooks to Readme
41 |
42 |
43 | # 0.1.0 - Initial Release
44 | ### Available on:
45 | - github
46 | - pypi
47 |
48 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our
7 | project and our community a harassment-free experience for everyone,
8 | regardless of age, body size, disability, ethnicity, gender identity and
9 | expression, level of experience, nationality, personal appearance, race,
10 | religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual
26 | attention or advances
27 | * Trolling, insulting/derogatory comments, and personal or political
28 | attacks
29 | * Public or private harassment
30 | * Publishing others' private information, such as a physical or
31 | electronic address, without explicit permission
32 | * Other conduct which could reasonably be considered inappropriate in a
33 | professional setting
34 |
35 | ## Our Responsibilities
36 |
37 | Project maintainers are responsible for clarifying the standards of
38 | acceptable behavior and are expected to take appropriate and fair
39 | corrective action in response to any instances of unacceptable behavior.
40 |
41 | Project maintainers have the right and responsibility to remove, edit,
42 | or reject comments, commits, code, wiki edits, issues, and other
43 | contributions that are not aligned to this Code of Conduct, or to ban
44 | temporarily or permanently any contributor for other behaviors that they
45 | deem inappropriate, threatening, offensive, or harmful.
46 |
47 | ## Scope
48 |
49 | This Code of Conduct applies both within project spaces and in public
50 | spaces when an individual is representing the project or its community.
51 | Examples of representing a project or community include using an
52 | official project e-mail address, posting via an official social media
53 | account, or acting as an appointed representative at an online or
54 | offline event. Representation of a project may be further defined and
55 | clarified by project maintainers.
56 |
57 | ## Enforcement
58 |
59 | Instances of abusive, harassing, or otherwise unacceptable behavior may
60 | be reported by contacting the project team at martin.kur4@gmail.com. The
61 | project team will review and investigate all complaints, and will
62 | respond in a way that it deems appropriate to the circumstances. The
63 | project team is obligated to maintain confidentiality with regard to the
64 | reporter of an incident. Further details of specific enforcement
65 | policies may be posted separately.
66 |
67 | Project maintainers who do not follow or enforce the Code of Conduct in
68 | good faith may face temporary or permanent repercussions as determined
69 | by other members of the project's leadership.
70 |
71 | ## Attribution
72 |
73 | This Code of Conduct is adapted from the [Contributor
74 | Covenant][homepage], version 1.4, available at
75 | [http://contributor-covenant.org/version/1/4][version]
76 |
77 | [homepage]: http://contributor-covenant.org
78 | [version]: http://contributor-covenant.org/version/1/4/
--------------------------------------------------------------------------------
/EXAMPLES/attention1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/attention1.png
--------------------------------------------------------------------------------
/EXAMPLES/cars1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/cars1.png
--------------------------------------------------------------------------------
/EXAMPLES/diamonds1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/diamonds1.png
--------------------------------------------------------------------------------
/EXAMPLES/diamonds2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/diamonds2.png
--------------------------------------------------------------------------------
/EXAMPLES/fmri.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/fmri.xlsx
--------------------------------------------------------------------------------
/EXAMPLES/fmri1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/fmri1.png
--------------------------------------------------------------------------------
/EXAMPLES/fmri2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/fmri2.png
--------------------------------------------------------------------------------
/EXAMPLES/iris1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/iris1.png
--------------------------------------------------------------------------------
/EXAMPLES/qpcr1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/qpcr1.png
--------------------------------------------------------------------------------
/EXAMPLES/tips1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/tips1.png
--------------------------------------------------------------------------------
/HOW_TO_USE/plot_editing.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "tags": []
7 | },
8 | "source": [
9 | "# Editing Plots\n",
10 | "Here we explain the low-end interface for plotting with plotastic."
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": []
19 | }
20 | ],
21 | "metadata": {
22 | "language_info": {
23 | "name": "python"
24 | }
25 | },
26 | "nbformat": 4,
27 | "nbformat_minor": 2
28 | }
29 |
--------------------------------------------------------------------------------
/HOW_TO_USE/plot_styles.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Styles"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "### Get Example Data"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {},
21 | "outputs": [
22 | {
23 | "data": {
24 | "text/html": [
25 | "
\n",
26 | "\n",
39 | "
\n",
40 | " \n",
41 | " \n",
42 | " | \n",
43 | " Unnamed: 0 | \n",
44 | " subject | \n",
45 | " timepoint | \n",
46 | " event | \n",
47 | " region | \n",
48 | " signal | \n",
49 | "
\n",
50 | " \n",
51 | " \n",
52 | " \n",
53 | " 0 | \n",
54 | " 17 | \n",
55 | " s7 | \n",
56 | " 9 | \n",
57 | " stim | \n",
58 | " parietal | \n",
59 | " 0.058897 | \n",
60 | "
\n",
61 | " \n",
62 | " 1 | \n",
63 | " 36 | \n",
64 | " s8 | \n",
65 | " 9 | \n",
66 | " stim | \n",
67 | " parietal | \n",
68 | " 0.170227 | \n",
69 | "
\n",
70 | " \n",
71 | " 2 | \n",
72 | " 67 | \n",
73 | " s0 | \n",
74 | " 0 | \n",
75 | " stim | \n",
76 | " frontal | \n",
77 | " -0.021452 | \n",
78 | "
\n",
79 | " \n",
80 | " 3 | \n",
81 | " 84 | \n",
82 | " s1 | \n",
83 | " 0 | \n",
84 | " stim | \n",
85 | " parietal | \n",
86 | " -0.064454 | \n",
87 | "
\n",
88 | " \n",
89 | " 4 | \n",
90 | " 127 | \n",
91 | " s13 | \n",
92 | " 9 | \n",
93 | " stim | \n",
94 | " parietal | \n",
95 | " 0.013245 | \n",
96 | "
\n",
97 | " \n",
98 | "
\n",
99 | "
"
100 | ],
101 | "text/plain": [
102 | " Unnamed: 0 subject timepoint event region signal\n",
103 | "0 17 s7 9 stim parietal 0.058897\n",
104 | "1 36 s8 9 stim parietal 0.170227\n",
105 | "2 67 s0 0 stim frontal -0.021452\n",
106 | "3 84 s1 0 stim parietal -0.064454\n",
107 | "4 127 s13 9 stim parietal 0.013245"
108 | ]
109 | },
110 | "execution_count": 1,
111 | "metadata": {},
112 | "output_type": "execute_result"
113 | }
114 | ],
115 | "source": [
116 | "import plotastic as plst\n",
117 | "\n",
118 | "# Import Example Data\n",
119 | "DF, _dims = plst.load_dataset(\"fmri\", verbose=False)\n",
120 | "\n",
121 | "DF.head()"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 2,
127 | "metadata": {},
128 | "outputs": [
129 | {
130 | "name": "stdout",
131 | "output_type": "stream",
132 | "text": [
133 | "===============================================================================\n",
134 | "#! Checking data integrity...\n",
135 | "✅ DATA COMPLETE: All combinations of levels from selected factors are present\n",
136 | " in the Dataframe, including x.\n",
137 | "✅ GROUPS COMPLETE: No groups with NaNs.\n",
138 | "✅ GROUPS EQUAL: All groups (40 total) have the same samplesize n = 14.0.\n",
139 | "🌳 LEVELS WELL CONNECTED: These Factors have levels that are always found\n",
140 | " together: ['region', 'event'].\n",
141 | " 👉 Call .levels_combocount() or .levels_dendrogram() to see them all.\n",
142 | "✅ Subjects complete: No subjects with missing data\n",
143 | "===============================================================================\n"
144 | ]
145 | }
146 | ],
147 | "source": [
148 | "# Define dimensions explicitly\n",
149 | "dims = dict(\n",
150 | " y=\"signal\",\n",
151 | " x=\"timepoint\",\n",
152 | " hue=\"event\",\n",
153 | " col=\"region\",\n",
154 | ")\n",
155 | "\n",
156 | "# Make DataAnalysis object\n",
157 | "DA = plst.DataAnalysis(\n",
158 | " data=DF, # Dataframe\n",
159 | " dims=dims, # Dictionary with y, x, hue, col, row\n",
160 | " subject=\"subject\", # Data is paired by subject (optional)\n",
161 | " verbose=True, # Print out Info about the Data\n",
162 | ")"
163 | ]
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "metadata": {},
168 | "source": [
169 | "## What Styles are there?"
170 | ]
171 | },
172 | {
173 | "cell_type": "markdown",
174 | "metadata": {},
175 | "source": [
176 | "#### Print style names"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 3,
182 | "metadata": {},
183 | "outputs": [
184 | {
185 | "name": "stdout",
186 | "output_type": "stream",
187 | "text": [
188 | "plotastic:\n",
189 | "\t['default', 'paper']\n",
190 | "seaborn:\n",
191 | "\t['white', 'dark', 'whitegrid', 'darkgrid', 'ticks']\n",
192 | "matplotlib:\n",
193 | "\t['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-v0_8', 'seaborn-v0_8-bright', 'seaborn-v0_8-colorblind', 'seaborn-v0_8-dark', 'seaborn-v0_8-dark-palette', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-deep', 'seaborn-v0_8-muted', 'seaborn-v0_8-notebook', 'seaborn-v0_8-paper', 'seaborn-v0_8-pastel', 'seaborn-v0_8-poster', 'seaborn-v0_8-talk', 'seaborn-v0_8-ticks', 'seaborn-v0_8-white', 'seaborn-v0_8-whitegrid', 'tableau-colorblind10']\n"
194 | ]
195 | }
196 | ],
197 | "source": [
198 | "plst.print_styles()"
199 | ]
200 | },
201 | {
202 | "cell_type": "markdown",
203 | "metadata": {},
204 | "source": [
205 | "#### Show all styles"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": null,
211 | "metadata": {},
212 | "outputs": [],
213 | "source": []
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {},
218 | "source": [
219 | "## `plst.set_style()` takes styles from matplotlib, seaborn and plotastic!"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 4,
225 | "metadata": {},
226 | "outputs": [],
227 | "source": [
228 | "plst.set_style(\"paper\")"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "metadata": {},
234 | "source": [
235 | "## Colors"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": null,
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "plst.set_palette([\"blue\", \"green\", \"red\"], verbose=False)"
245 | ]
246 | },
247 | {
248 | "cell_type": "markdown",
249 | "metadata": {},
250 | "source": []
251 | }
252 | ],
253 | "metadata": {
254 | "kernelspec": {
255 | "display_name": "venv",
256 | "language": "python",
257 | "name": "python3"
258 | },
259 | "language_info": {
260 | "codemirror_mode": {
261 | "name": "ipython",
262 | "version": 3
263 | },
264 | "file_extension": ".py",
265 | "mimetype": "text/x-python",
266 | "name": "python",
267 | "nbconvert_exporter": "python",
268 | "pygments_lexer": "ipython3",
269 | "version": "3.11.6"
270 | }
271 | },
272 | "nbformat": 4,
273 | "nbformat_minor": 2
274 | }
275 |
--------------------------------------------------------------------------------
/IMAGES/LOGO/plotastic_logo.afdesign:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/IMAGES/LOGO/plotastic_logo.afdesign
--------------------------------------------------------------------------------
/IMAGES/LOGO/plotastic_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/IMAGES/LOGO/plotastic_logo.png
--------------------------------------------------------------------------------
/IMAGES/LOGO/plotastic_logo_2.afdesign:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/IMAGES/LOGO/plotastic_logo_2.afdesign
--------------------------------------------------------------------------------
/IMAGES/LOGO/plotastic_logo_3.afdesign:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/IMAGES/LOGO/plotastic_logo_3.afdesign
--------------------------------------------------------------------------------
/IMAGES/LOGO/plotastic_logo_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/IMAGES/LOGO/plotastic_logo_3.png
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | exclude \
2 | src/plotastic/.vscode/*.json \
3 |
4 | include \
5 | # LICENSE \
6 | # README.md \
7 | # How_To_Use/*.ipynb \
8 | # tests/pytest.ini \
9 | # tests/*.py \
10 | # tests/*_test/*.py \
11 | # src/plotastic/py.typed \
12 | # src/plotastic/dataanalysis/py.typed \
13 | # src/plotastic/example_data/data/*.xlsx
--------------------------------------------------------------------------------
/class_diagram.mmd:
--------------------------------------------------------------------------------
1 |
2 |
3 | classDiagram
4 |
5 | %% == ANALYSIS ==============================================================
6 |
7 | class pd_DataFrame{
8 | ...
9 | ....()
10 | }
11 | class Dims {
12 | x: str
13 | y: str
14 | hue: str =None
15 | row: str =None
16 | col: str =None
17 | set(**kwargs, inplace: bool =False)
18 | switch(*keys, **kwargs inplace: bool =False)
19 | }
20 | class DimsAndLevels {
21 |
22 | data: pd.DataFrame
23 | dims: Dims
24 |
25 | %%_empty_groups(property)
26 | factors_all(property) list[x,y,hue,row,col]
27 | factors_xhue(property) list[x,hue]
28 | factors_rowcol(property) list[row,col]
29 | levels_dict_factor(property) dict[f1:[l1, l2, ...], f2:[...], ...]
30 | levelkeys(property) list[tuple[l1, l2], ...]
31 | ....()
32 | }
33 | class Subject{
34 | subject: str
35 | subjectlist(property): list[str]
36 | ....()
37 | }
38 | class HierarchicalDims{
39 | _factors_hierarchical(property)
40 | ...
41 | data_hierarchicize()
42 | ....()
43 | }
44 | class DataFrameTool{
45 | verbose: bool =False
46 | levels: list[tuple[str]] =None
47 | catplot(kind="strip") -> sns.FacetGrid
48 | transform_y() -> self
49 | data_describe() -> pd.DataFrame
50 | data_categorize() -> self
51 | data_iter__key_facet(property) -> Generator
52 | ....()
53 | }
54 | class DataIntegrity{
55 | data_check_integrity()
56 | ....()
57 | }
58 |
59 |
60 | pd_DataFrame *-- DimsAndLevels
61 | Dims *-- DimsAndLevels
62 | DimsAndLevels <|-- Subject
63 | Subject <|-- HierarchicalDims
64 | HierarchicalDims <|-- DataFrameTool
65 | DataFrameTool <|-- DataIntegrity
66 | DataIntegrity <|-- SubPlot
67 | DataIntegrity <|-- StatTest
68 |
69 |
70 | %% == STATISTICS ============================================================
71 |
72 | class pingouin{
73 | <>
74 | anova()
75 | rm_anova()
76 | pairwise_tests()
77 | ....()
78 | }
79 | class StatResults{
80 | <>
81 | DF_normality: pd.DataFrame = "not tested"
82 | DF_homoscedasticity: pd.DataFrame = "not tested"
83 | DF_sphericity: pd.DataFrame = "not tested"
84 | DF_posthoc: pd.DataFrame = "not tested"
85 | DF_omnibus: pd.DataFrame = "not tested"
86 | DF_bivariate: pd.DataFrame = "not tested"
87 | ...
88 | normal(property):bool ="not assessed"
89 | homoscedastic(property):bool ="unknown"
90 | spherical(property):bool ="unknown"
91 | parametric(property):bool =None
92 | assess_normality()
93 | save()
94 | ....()
95 | }
96 | class StatTest{
97 | <>
98 | ALPHA: float = 0.05
99 | ALPHA_TOLERANCE: float = 0.075
100 | results: StatResults
101 | ...
102 | set_alpha()
103 | set_alpha_tolerance()
104 | _p_to_stars(p: float) -> str
105 | _effectsize_to_words(effectsize: float) -> str
106 | ....()
107 | }
108 | class Assumptions{
109 | ...
110 | check_normality()
111 | check_sphericity()
112 | check_homoscedasticity()
113 | ....()
114 | }
115 | class Omnibus{
116 | ...
117 | omnibus_anova()
118 | omnibus_rmanova()
119 | omnibus_kruskal()
120 | ....()
121 | }
122 | class PostHoc{
123 | ...
124 | test_pairwise(paired, parametric)
125 | ....()
126 | }
127 | class Bivariate{
128 | ...
129 | test_pearson()
130 | test_spearman()
131 | test_kendall()
132 | ....()
133 | }
134 |
135 | StatResults *-- StatTest
136 | StatTest <|-- Assumptions
137 |
138 | Assumptions <|-- PostHoc
139 | Assumptions <|-- Omnibus
140 | Assumptions <|-- Bivariate
141 | pingouin .. Assumptions: Uses
142 |
143 |
144 | %% == PLOTTING ==============================================================
145 |
146 | class rc{
147 | <>
148 | FONTSIZE
149 | STYLE_PAPER
150 | STYLE_PRESENTATION
151 | set_style()
152 | set_palette()
153 | }
154 | class matplotlib{
155 | <>
156 | ...
157 | Axes
158 | Figure
159 | fig.legend()
160 | ....()
161 | }
162 | class SubPlot{
163 | fig: mpl.figure.Figure
164 | axes: mpl.axes.Axes
165 | ...
166 | subplots() -> (fig, axes)
167 | fillaxes(kind="strip") -> (fig, axes)
168 |
169 | axes_nested(property) -> np.ndarray(axes).shape(1,1)
170 | axes_iter__key_ax(property) -> ax
171 |
172 | }
173 | class PlotEdits{
174 | edit_titles(titles:dict) -> None
175 | edit_xy_axis_labels(labels:dict) -> None
176 | edit_yticklabels_log_minor(ticks:dict) -> None
177 | ...()
178 | }
179 | class Plot{
180 | plot()
181 | plot_connect_subjects()
182 | ...()
183 | }
184 | class MultiPlot{
185 | <>
186 | plot_box_strip()
187 | plot_bar_swarm()
188 | plot_qqplot()
189 | ...()
190 | }
191 |
192 | matplotlib *-- SubPlot
193 | matplotlib <.. rc: Configures
194 | SubPlot <|-- PlotEdits
195 | PlotEdits <|-- Plot
196 | Plot <|-- MultiPlot
197 |
198 |
199 | %% == DATAANALYSIS ==========================================================
200 |
201 | class Annotator{
202 | _annotated: bool =False
203 | ...
204 | _check_include_exclude()
205 | iter__key_df_ax(PH:pd.DataFrame) -> Generator
206 | annotate_pairwise()
207 | ....()
208 | }
209 | class Filer{
210 | <>
211 | title: str ="untitled"
212 | prevent_overwrite()
213 | }
214 | class DataAnalysis{
215 | <>
216 | %% FIGURES DON'T NEED TITLES, WE EDIT THEM AFTERWARDS
217 | title = "untitled"
218 | filer: Filer
219 | ...
220 | title_add()
221 | save_statistics()
222 | ....()
223 | }
224 |
225 | MultiPlot <|-- Annotator
226 | Omnibus <|-- Annotator
227 | PostHoc <|-- Annotator
228 | Bivariate <|-- Annotator
229 |
230 | Filer *-- DataAnalysis
231 |
232 | Annotator <|-- DataAnalysis
233 |
234 |
235 | %% == Links =================================================================
236 |
237 | %% dimensions
238 | click Dims href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/dims.py"
239 | click DimsAndLevels href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/dimsandlevels.py"
240 | click DataFrameTool href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/dataframetool.py"
241 | click HierarchicalDims href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/hierarchical_dims.py"
242 | click Subject href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/subject.py"
243 | click DataIntegrity href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/dataintegrity.py"
244 |
245 | %% stat
246 | click StatResults href "https://github.com/markur4/plotastic/blob/main/src/plotastic/stat/statresults.py"
247 | click StatTest href "https://github.com/markur4/plotastic/blob/main/src/plotastic/stat/stattest.py"
248 | click Assumptions href "https://github.com/markur4/plotastic/blob/main/src/plotastic/stat/assumptions.py"
249 | click Omnibus href "https://github.com/markur4/plotastic/blob/main/src/plotastic/stat/omnibus.py"
250 | click PostHoc href "https://github.com/markur4/plotastic/blob/main/src/plotastic/stat/posthoc.py"
251 |
252 | %% plotting
253 | click rc href "https://github.com/markur4/plotastic/blob/main/src/plotastic/plotting/rc.py"
254 | click SubPlot href "https://github.com/markur4/plotastic/blob/main/src/plotastic/plotting/SubPlot.py"
255 | click Plot href "https://github.com/markur4/plotastic/blob/main/src/plotastic/plotting/plot.py"
256 | click PlotEdits href "https://github.com/markur4/plotastic/blob/main/src/plotastic/plotting/plotedits.py"
257 | click MultiPlot href "https://github.com/markur4/plotastic/blob/main/src/plotastic/plotting/multiplot.py"
258 |
259 | %% dataanalysis
260 | click Annotator href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dataanalysis/annotator.py"
261 | click Filer href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dataanalysis/filer.py"
262 | click DataAnalysis href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dataanalysis/dataanalysis.py"
263 |
264 |
265 |
266 |
--------------------------------------------------------------------------------
/devtools/readme_for_pypi.py:
--------------------------------------------------------------------------------
1 | """Removes Parts from README.md that PyPi can't handle by removing parts
2 | enclosed by a marker line"""
3 |
4 | # %%
5 | import argparse
6 |
7 | # %%
8 |
9 |
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("-i", "--input", default="README.md")
12 |
13 |
14 |
15 | # %%
16 | def open_readme(path: str) -> str:
17 | with open(path, "r") as f:
18 | README = f.read()
19 | return README
20 |
21 |
22 | def write_readme(path: str, text: str) -> None:
23 | with open(path, "w") as f:
24 | f.write(text)
25 |
26 |
27 | # %%
28 | if __name__ == "__main__":
29 | # readme_in = os.path.join("..", "README.md") # ::
30 | # readme_out = os.path.join("..", "README_pypi.md") # ::
31 | args = parser.parse_args() # ::
32 | readme_in = args.input # ::
33 | readme_out = "README_pypi.md" # ::
34 |
35 | README = open_readme(readme_in)
36 | print(README)
37 |
38 | # %%
39 | split = README.split("")
40 | split
41 | # %%
42 | ### Remove those parts from readme that end with
43 | split_r = [p for p in split if not "" in p]
44 |
45 | joined = "\n".join(split_r)
46 |
47 | # %%
48 | write_readme(readme_out, joined)
49 |
--------------------------------------------------------------------------------
/devtools/setupvenv.py:
--------------------------------------------------------------------------------
1 | #
2 | # %%
3 | from pathlib import Path
4 | import shutil
5 | import venv
6 | import subprocess as sp
7 |
8 |
9 | # %%
10 | ### Define paths
11 | def makepath(*args) -> str:
12 | return str(Path(*args))
13 |
14 | # :: Check Project Root!
15 | ROOT = ".."
16 |
17 | PROJ = makepath(ROOT, ".") # > Project location for editable install
18 | VENV = makepath(ROOT, "venv") # > Virtual environment location
19 | PYTHON = makepath(ROOT, "venv", "bin", "python") # > Python executable
20 | REQUIREMENTS = makepath(ROOT, "requirements.txt")
21 |
22 |
23 | # %%
24 | ### make virtual environment
25 | # > Delete venv if it exists
26 | if Path(VENV).exists():
27 | shutil.rmtree(VENV)
28 |
29 | venv.create(VENV, with_pip=True)
30 |
31 |
32 | # %%
33 | ### Install this project
34 | sp.run([PYTHON, "-m", "pip", "install", "-e", PROJ])
35 | # %%
36 | ### Create requirements.txt
37 | with open(REQUIREMENTS, "w") as f:
38 | sp.call(
39 | [
40 | PYTHON,
41 | "-m",
42 | "pip",
43 | "freeze",
44 | "--exclude-editable",
45 | "-l",
46 | ">",
47 | REQUIREMENTS,
48 | ],
49 | stdout=f,
50 | )
51 | # %%
52 | ### Install devtools
53 | sp.run([PYTHON, "-m", "pip", "install", "-e", f"{PROJ}[dev]"])
54 |
55 |
56 | # %%
57 | #:: Switch to venv !! ==================================================
58 | # %%
59 | ### test packages
60 | import numpy as np
61 |
62 | np.__version__
63 |
64 | # %%
65 | import pytest
66 |
67 | pytest.__version__
68 |
69 |
70 | # %%
71 | ### Make a user venv
72 | import venv
73 | venv.create("venv_user", with_pip=True)
--------------------------------------------------------------------------------
/devtools/upload_to_pypi.sh:
--------------------------------------------------------------------------------
1 |
2 | # Convert README.md to README_pypi.md
3 | python devtools/readme_for_pypi.py -i README.md
4 |
5 | # Update version on pyproject.toml
6 | # !! Don't do this, removes comments
7 | # python devtools/update_version.py -i pyproject.toml -o pyproject_test.toml
8 |
9 | # Remove old build
10 | rm -r dist
11 |
12 | # BUILD
13 | python -m build
14 |
15 | # Require API token if two-factor was enabled
16 | # provide API token as password
17 | twine upload --repository testpypi dist/* --username __token__
18 |
19 | # UPLOAD TO REAL PyPi
20 | twine upload dist/* --username __token__
21 |
22 | # use this in a venv
23 | pip install -i https://test.pypi.org/simple/ plotastic
--------------------------------------------------------------------------------
/paper.bib:
--------------------------------------------------------------------------------
1 | @misc{charlierTrevismdStatannotationsV02022,
2 | title = {Trevismd/Statannotations: V0.5},
3 | shorttitle = {Trevismd/Statannotations},
4 | author = {Charlier, Florian and Weber, Marc and Izak, Dariusz and Harkin, Emerson and Magnus, Marcin and Lalli, Joseph and Fresnais, Louison and Chan, Matt and Markov, Nikolay and Amsalem, Oren and Proost, Sebastian and {Agamemnon Krasoulis} and {Getzze} and Repplinger, Stefan},
5 | year = {2022},
6 | month = oct,
7 | doi = {10.5281/ZENODO.7213391},
8 | url = {https://zenodo.org/record/7213391},
9 | urldate = {2023-11-16},
10 | abstract = {Add scipy's Brunner-Munzel test Fix applying statannotations for non-string group labels (Issue \#65) Get Zenodo DOI},
11 | copyright = {Open Access},
12 | howpublished = {Zenodo}
13 | }
14 |
15 | @article{hunterMatplotlib2DGraphics2007,
16 | title = {Matplotlib: {{A 2D Graphics Environment}}},
17 | shorttitle = {Matplotlib},
18 | author = {Hunter, John D.},
19 | year = {2007},
20 | month = may,
21 | journal = {Computing in Science \& Engineering},
22 | volume = {9},
23 | number = {3},
24 | pages = {90--95},
25 | issn = {1558-366X},
26 | doi = {10.1109/MCSE.2007.55},
27 | url = {https://ieeexplore.ieee.org/document/4160265},
28 | urldate = {2023-11-15},
29 | abstract = {Matplotlib is a 2D graphics package used for Python for application development, interactive scripting,and publication-quality image generation across user interfaces and operating systems},
30 | file = {/Users/martinkuric/Zotero/storage/W4FJZDNY/§-hunterMatplotlib2DGraphics2007.pdf;/Users/martinkuric/Zotero/storage/GW3HZZHR/4160265.html}
31 | }
32 |
33 | @inproceedings{mckinneyDataStructuresStatistical2010,
34 | title = {Data {{Structures}} for {{Statistical Computing}} in {{Python}}},
35 | author = {McKinney, Wes},
36 | year = {2010},
37 | month = jan,
38 | pages = {56--61},
39 | doi = {10.25080/Majora-92bf1922-00a}
40 | }
41 |
42 | @article{mckinneyPandasFoundationalPython2011,
43 | title = {Pandas: A {{Foundational Python Library}} for {{Data Analysis}} and {{Statistics}}},
44 | shorttitle = {Pandas},
45 | author = {Mckinney, Wes},
46 | year = {2011},
47 | month = jan,
48 | journal = {Python High Performance Science Computer},
49 | abstract = {---In this paper we will discuss pandas, a Python library of rich data structures and tools for working with structured data sets common to statistics, finance, social sciences, and many other fields. The library provides integrated, intuitive routines for performing common data manipulations and analysis on such data sets. It aims to be the foundational layer for the future of statistical computing in Python. It serves as a strong complement to the existing scientific Python stack while implementing and improving upon the kinds of data manipulation tools found in other statistical programming languages such as R. In addition to detailing its design and features of pandas, we will discuss future avenues of work and growth opportunities for statistics and data analysis applications in the Python language.},
50 | file = {/Users/martinkuric/Zotero/storage/IH5C5UZ3/§-mckinneyPandasFoundationalPython2011.pdf}
51 | }
52 |
53 | @misc{reback2020pandas,
54 | title = {Pandas-Dev/Pandas: {{Pandas}}},
55 | author = {The Pandas Development Team},
56 | year = {2020},
57 | month = feb,
58 | doi = {10.5281/zenodo.3509134},
59 | url = {https://doi.org/10.5281/zenodo.3509134},
60 | howpublished = {Zenodo}
61 | }
62 |
63 | @article{vallatPingouinStatisticsPython2018,
64 | title = {Pingouin: Statistics in {{Python}}},
65 | shorttitle = {Pingouin},
66 | author = {Vallat, Raphael},
67 | year = {2018},
68 | month = nov,
69 | journal = {Journal of Open Source Software},
70 | volume = {3},
71 | number = {31},
72 | pages = {1026},
73 | issn = {2475-9066},
74 | doi = {10.21105/joss.01026},
75 | url = {https://joss.theoj.org/papers/10.21105/joss.01026},
76 | urldate = {2023-05-29},
77 | abstract = {Vallat, (2018). Pingouin: statistics in Python. Journal of Open Source Software, 3(31), 1026, https://doi.org/10.21105/joss.01026},
78 | langid = {english},
79 | file = {/Users/martinkuric/Zotero/storage/ECARCXLJ/§-vallatPingouinStatisticsPython2018.pdf}
80 | }
81 |
82 | @article{waskomSeabornStatisticalData2021,
83 | title = {Seaborn: Statistical Data Visualization},
84 | shorttitle = {Seaborn},
85 | author = {Waskom, Michael L.},
86 | year = {2021},
87 | month = apr,
88 | journal = {Journal of Open Source Software},
89 | volume = {6},
90 | number = {60},
91 | pages = {3021},
92 | issn = {2475-9066},
93 | doi = {10.21105/joss.03021},
94 | url = {https://joss.theoj.org/papers/10.21105/joss.03021},
95 | urldate = {2023-03-26},
96 | abstract = {Waskom, M. L., (2021). seaborn: statistical data visualization. Journal of Open Source Software, 6(60), 3021, https://doi.org/10.21105/joss.03021},
97 | langid = {english},
98 | file = {/Users/martinkuric/Zotero/storage/2ZWPNQDG/§-waskomSeabornStatisticalData2021.pdf}
99 | }
100 |
101 | @article{wickhamTidyData2014a,
102 | title = {Tidy {{Data}}},
103 | author = {Wickham, Hadley},
104 | year = {2014},
105 | month = sep,
106 | journal = {Journal of Statistical Software},
107 | volume = {59},
108 | pages = {1--23},
109 | issn = {1548-7660},
110 | doi = {10.18637/jss.v059.i10},
111 | url = {https://doi.org/10.18637/jss.v059.i10},
112 | urldate = {2023-11-15},
113 | abstract = {A huge amount of effort is spent cleaning data to get it ready for analysis, but there has been little research on how to make data cleaning as easy and effective as possible. This paper tackles a small, but important, component of data cleaning: data tidying. Tidy datasets are easy to manipulate, model and visualize, and have a specific structure: each variable is a column, each observation is a row, and each type of observational unit is a table. This framework makes it easy to tidy messy datasets because only a small set of tools are needed to deal with a wide range of un-tidy datasets. This structure also makes it easier to develop tidy tools for data analysis, tools that both input and output tidy datasets. The advantages of a consistent data structure and matching tools are demonstrated with a case study free from mundane data manipulation chores.},
114 | copyright = {Copyright (c) 2013 Hadley Wickham},
115 | langid = {english}
116 | }
117 |
--------------------------------------------------------------------------------
/paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/paper.pdf
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | #' https://setuptools.readthedocs.io/en/latest/setuptools.html#metadata
2 |
3 |
4 | # https://setuptools.pypa.io/en/latest/references/keywords.html
5 |
6 | ### LAYOUT:
7 | #' project_root/ # Project root: 'plotastic'
8 | #' ├── .gitattributes
9 | #' ├── .gitignore
10 | #' ├── LICENSE
11 | #' ├── MANIFEST.in
12 | #' ├── README.md
13 | #' ├── pyproject.toml
14 | #' ├── requirements.txt
15 | #' ├── (setup.cfg) # No longer needed, but still supported
16 | #' ├── (paper.md) # For publication
17 | #' ├── ...
18 | #' └── src/ # Source root
19 | #' └── package/ # Package root: 'plotastic'
20 | #' ├── __init__.py
21 | #' ├── .vscode
22 | #' ├── py.typed
23 | #' ├── ...
24 | #' ├── (module.py)
25 | #' ├── subpkg1/ # Subpackage root: 'plotastic.dimensions'
26 | #' │ ├── __init__.py
27 | #' │ ├── ...
28 | #' │ └── module1.py
29 | #' └── subpkg2/ # Subpackage root: 'plotastic.plotting'
30 | #' ├── __init__.py
31 | #' ├── ...
32 | #' └── module2.py
33 |
34 | [build-system] # =======================================================
35 | requires = ["setuptools", "setuptools-scm"]
36 | build-backend = "setuptools.build_meta"
37 |
38 |
39 | [project] # ============================================================
40 | name = "plotastic"
41 | version = "0.1.1" # ' ...
42 | authors = [{ name = "Martin Kuric", email = "martin.kur4@gmail.com" }]
43 | description = "Streamlining statistical analysis by using plotting keywords in Python."
44 | readme = "README_pypi.md"
45 | license = { file = "LICENSE" } # ' or { text = "GPLv3" }
46 | keywords = [
47 | "plotting",
48 | "statistics",
49 | "data analysis",
50 | "data visualization",
51 | "data science",
52 | "data",
53 | "science",
54 | "visualization",
55 | ]
56 | classifiers = [
57 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
58 | "Development Status :: 5 - Production/Stable",
59 | "Framework :: IPython",
60 | "Framework :: Jupyter",
61 | "Intended Audience :: Science/Research",
62 | "Intended Audience :: Education",
63 | "Intended Audience :: Healthcare Industry",
64 | "Intended Audience :: Financial and Insurance Industry",
65 | "Topic :: Scientific/Engineering",
66 | "Topic :: Scientific/Engineering :: Visualization",
67 | "Topic :: Scientific/Engineering :: Information Analysis",
68 | "Topic :: Scientific/Engineering :: Bio-Informatics",
69 | "Programming Language :: Python :: 3.11",
70 | ]
71 | ### Python version
72 | #' Lower versions than 3.11 have not been tested
73 | requires-python = ">=3.11"
74 |
75 | ### Dependencies
76 | #' Specify version only if concrete incompatibilities exist
77 | dependencies = [
78 | #* Core
79 | "numpy",
80 | "pandas==1.5.3", #!! pingouin Not working with pandas 2.0 yet
81 | # * Plotting
82 | "matplotlib",
83 | "seaborn<=0.12.2", #!! 0.13 has issues with hue
84 | "Pillow>=10.2.0", #!! github security risk
85 | #* Statistics
86 | "scipy",
87 | # "statannot", #' Superseded by statannotations
88 | "statannotations",
89 | "pingouin",
90 | #* Excel
91 | "xlsxwriter", #' For saving results to excel
92 | "openpyxl", #' Optional for Pandas, but error when not installed
93 | #* Misc
94 | "joblib", #' Caching
95 | "colour", #' For custom colour maps
96 | "ipynbname", #' Used by utils
97 | "icecream", #' Better than print (and maybe later logging)
98 | ]
99 |
100 | ### Dynamic fields
101 | # dynamic = ["version"]
102 |
103 |
104 | [project.optional-dependencies] # ======================================
105 | ### Install with:
106 | # ' $ pip install sampleproject[dev]
107 | dev = [
108 | "pytest",
109 | "ipytest",
110 | "pytest-cov", # * Displays how much of code was covered by testing
111 | "pytest-xdist", # * Parallel testing
112 | "nbconvert", # * For converting notebooks to markdown
113 | "build", # * For building the package into dist
114 | "twine", # * For uploading to PyPI
115 | ]
116 |
117 |
118 | [project.urls] # =======================================================
119 | "Homepage" = "https://github.com/markur4/plotastic"
120 | "Documentation" = "https://github.com/markur4/plotastic"
121 | "Source Code" = "https://github.com/markur4/plotastic"
122 | "Bug Reports" = "https://github.com/markur4/plotastic/issues"
123 | # "Funding" = "https://donate.pypi.org"
124 |
125 |
126 | [tool.setuptools] # ====================================================
127 | # package-data = { "example_data" = ["*.xlsx"]}
128 | include-package-data = true # ? Defaults to true, should I keep this?
129 |
130 | [tool.setuptools.packages.find]
131 | where = ["src"] # ? it also worked without this..?
132 |
133 | ### Package-data handled in MANIFEST.in
134 | # [tool.setuptools.exclude-package-data]
135 | # plotastic = [".vscode"]
136 |
137 | # [tool.setuptools.package-data]
138 | # "*" = ["LICENSE"]
139 | # plotastic = ["example_data/*.xlsx"]
140 |
--------------------------------------------------------------------------------
/qpcr1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/qpcr1.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | appnope==0.1.3
2 | asttokens==2.4.1
3 | colorama==0.4.6
4 | colour==0.1.5
5 | comm==0.2.1
6 | contourpy==1.2.0
7 | cycler==0.12.1
8 | debugpy==1.8.0
9 | decorator==5.1.1
10 | et-xmlfile==1.1.0
11 | executing==2.0.1
12 | fonttools==4.47.2
13 | icecream==2.1.3
14 | ipykernel==6.29.0
15 | ipynbname==2023.2.0.0
16 | ipython==8.20.0
17 | jedi==0.19.1
18 | joblib==1.3.2
19 | jupyter_client==8.6.0
20 | jupyter_core==5.7.1
21 | kiwisolver==1.4.5
22 | matplotlib==3.8.2
23 | matplotlib-inline==0.1.6
24 | nest-asyncio==1.6.0
25 | numpy==1.26.3
26 | openpyxl==3.1.2
27 | packaging==23.2
28 | pandas==1.5.3
29 | pandas-flavor==0.6.0
30 | parso==0.8.3
31 | patsy==0.5.6
32 | pexpect==4.9.0
33 | pillow==10.2.0
34 | pingouin==0.5.4
35 | platformdirs==4.1.0
36 | prompt-toolkit==3.0.43
37 | psutil==5.9.8
38 | ptyprocess==0.7.0
39 | pure-eval==0.2.2
40 | Pygments==2.17.2
41 | pyparsing==3.1.1
42 | python-dateutil==2.8.2
43 | pytz==2023.3.post1
44 | pyzmq==25.1.2
45 | scikit-learn==1.4.0
46 | scipy==1.12.0
47 | seaborn==0.11.2
48 | six==1.16.0
49 | stack-data==0.6.3
50 | statannotations==0.6.0
51 | statsmodels==0.14.1
52 | tabulate==0.9.0
53 | threadpoolctl==3.2.0
54 | tornado==6.4
55 | traitlets==5.14.1
56 | wcwidth==0.2.13
57 | xarray==2024.1.1
58 | XlsxWriter==3.1.9
59 |
--------------------------------------------------------------------------------
/src/plotastic/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # == Metadata ==========================================================
3 | from importlib import metadata
4 |
5 | # ? https://packaging.python.org/guides/single-sourcing-package-version/
6 | # ? Do we need this?
7 | __version__ = metadata.version(__name__)
8 | __author__ = "markur4"
9 |
10 |
11 | # == Flatten module access ============================================
12 | from .plotting.rc_utils import set_style, print_styles, set_palette
13 | from .dataanalysis.dataanalysis import DataAnalysis
14 | from .example_data.load_dataset import load_dataset
15 | from .utils import utils
16 |
17 |
18 | # == __all__ ===========================================================
19 | __all__ = [
20 | DataAnalysis,
21 | set_style,
22 | print_styles,
23 | set_palette,
24 | load_dataset,
25 | ]
26 |
--------------------------------------------------------------------------------
/src/plotastic/caches.py:
--------------------------------------------------------------------------------
1 | # %%
2 |
3 |
4 | import os
5 | from plotastic.utils.subcache import SubCache
6 |
7 | # %%
8 | ### Define Home
9 | home = os.path.join(
10 | os.path.expanduser("~"),
11 | ".cache",
12 | )
13 |
14 | # == Define SubCaches ==================================================
15 | #' Define a Memory object for different purposes
16 | MEMORY_UTILS = SubCache(
17 | location=home,
18 | assert_parent=".cache",
19 | subcache_dir="plotastic_utils",
20 | )
21 | # MEMORY_PLOTTING = SubCache(
22 | # location=home,
23 | # assert_parent=".cache",
24 | # subcache_dir="plotastic_plotting",
25 | # )
26 |
27 | ### Cache like this:
28 | # def sleep(seconds):
29 | # import time
30 | # time.sleep(seconds)
31 |
32 | # sleep = caches.MEMORY_UTILS.subcache(sleep)
33 |
34 | # == Utilities ====================================
35 | if __name__ == "__main__":
36 | pass
37 | # %%
38 | ### View Contents
39 | MEMORY_UTILS.list_dirs()
40 | # %%
41 | # Clear Caches
42 | # MEMORY_UTILS.clear()
43 |
--------------------------------------------------------------------------------
/src/plotastic/dataanalysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/dataanalysis/__init__.py
--------------------------------------------------------------------------------
/src/plotastic/dataanalysis/dataanalysis.py:
--------------------------------------------------------------------------------
1 | # !!
2 |
3 | # %% Imports
4 | from typing import TYPE_CHECKING
5 |
6 | from copy import deepcopy
7 |
8 | # from typing import Self # !! only for python 3.11. Not really needed, since "DataAnalysis" as typehint works with vscode
9 |
10 | from pathlib import Path
11 | import pickle
12 |
13 | import pandas as pd
14 |
15 | import matplotlib.pyplot as plt
16 | from matplotlib.figure import Figure
17 |
18 | from plotastic import docstrings
19 |
20 | # import markurutils as ut
21 | import plotastic.utils.utils as ut
22 | from plotastic.dataanalysis.annotator import Annotator
23 | from plotastic.dataanalysis.filer import Filer
24 |
25 | # from statresult import StatResult
26 | if TYPE_CHECKING:
27 | import matplotlib as mpl
28 | from matplotlib.transforms import Bbox
29 |
30 | # %% Class DataAnalysis
31 |
32 |
33 | class DataAnalysis(Annotator):
34 | # == __init__ ======================================================
35 | def __init__(
36 | self,
37 | data: pd.DataFrame,
38 | dims: dict,
39 | subject: str = None,
40 | levels: list[tuple[str]] = None,
41 | title: str = "untitled",
42 | verbose=True,
43 | ) -> "DataAnalysis":
44 | ### Inherit
45 | # !! verbosity set to False, since each subclass shouldn't test its own DataFrame
46 | dataframetool_kws = dict(
47 | data=data,
48 | dims=dims,
49 | subject=subject,
50 | levels=levels,
51 | )
52 | super().__init__(**dataframetool_kws)
53 |
54 | self._title = title
55 | self.filer = Filer(title=title)
56 |
57 | if verbose:
58 | self.data_check_integrity()
59 |
60 | # self.plot = plot
61 | ### statistics
62 | # self.test = Test()
63 |
64 | # ==
65 | # == TITLE =========================================================
66 |
67 | @property
68 | def title(self) -> str:
69 | return self._title
70 |
71 | @title.setter
72 | def title(self, value):
73 | self._title = value
74 | self.filer.title = value
75 |
76 | def title_add(
77 | self,
78 | to_end: str = "",
79 | to_start: str = "",
80 | con: str = "_",
81 | inplace=False,
82 | ) -> "DataAnalysis":
83 | """Adds string to start and/or end of title
84 |
85 |
86 | :param to_start: str, optional (default="")
87 | String to add to start of title
88 | :param to_end: str, optional (default="")
89 | String to add to end of title
90 | :param con: str, optional (default="_")
91 | Conjunction-character to put between string addition and original title
92 | :return: str
93 | """
94 | a: "DataAnalysis" = self if inplace else deepcopy(self)
95 |
96 | if to_start:
97 | a.title = f"{to_start}{con}{a.title}"
98 | if to_end:
99 | a.title = f"{a.title}{con}{to_end}"
100 | return a
101 |
102 | # ==
103 | # == Saving stuff ==================================================
104 |
105 | @docstrings.subst(param_overwrite=docstrings.param_overwrite)
106 | def save_statistics(
107 | self,
108 | fname: str = "plotastic_results",
109 | overwrite: str | bool = "day",
110 | ) -> None:
111 | """Exports all statistics to one excel file. Different sheets for different
112 | tests
113 |
114 | :param overwrite: {param_overwrite}
115 | :param out: Path to save excel file, optional (default="")
116 | :type out: str, optional
117 | """
118 |
119 | ### Overwrite Protection
120 | if (not overwrite and not overwrite is None) or isinstance(
121 | overwrite, str
122 | ):
123 | fname = self.filer.prevent_overwrite(
124 | fname=fname, overwrite=overwrite
125 | )
126 |
127 | ### Save Statistics
128 | self.results.save(fname=fname)
129 |
130 | # @docstrings.subst(param_overwrite=docstrings.param_overwrite)
131 | # def save_fig(
132 | # self,
133 | # fname: str | Path = "plotastic_results",
134 | # format: str = "pdf",
135 | # fig: Figure = None,
136 | # overwrite: str | bool = "day", #' Added overwrite protection
137 | # dpi: int | str = 300, # !! mpl default is "figure"
138 | # bbox_inches: "str | Bbox" = "tight",
139 | # pad_inches: float = 0.1,
140 | # facecolor: str = "none", # !! mpl default is "auto", using current figure facecolor
141 | # edgecolor: str = "none", # !! mpl default is "auto", using current figure edgecolor
142 | # backend: str = None,
143 | # **user_kwargs,
144 | # ) -> "DataAnalysis":
145 | # """Calls plt.figure.Figure.savefig(). Also provides an overwrite protection
146 |
147 | # {param_overwrite}
148 | # :param fname: A path, or a Python file-like object. If format is set, it
149 | # determines the output format, and the file is saved as fname. Note that
150 | # fname is used verbatim, and there is no attempt to make the extension, if
151 | # any, of fname match format, and no extension is appended.
152 |
153 | # If format is not set, then the format is inferred from the extension of
154 | # fname, if there is one. If format is not set and fname has no extension,
155 | # then the file is saved with rcParams["savefig.format"] (default: 'png') and
156 | # the appropriate extension is appended to fname., defaults to
157 | # "plotastic_results"
158 | # :type fname: str | path.Path, optional
159 | # :param format: The file format, e.g. 'png', 'pdf', 'svg', ... The behavior when
160 | # this is unset is documented under fname., defaults to "pdf"
161 | # :type format: str, optional
162 | # :param dpi: The resolution in dots per inch. If 'figure', use the figure's dpi
163 | # value., defaults to 300
164 | # :type dpi: int, optional
165 | # :param bbox_inches: Bounding box in inches: only the given portion of the figure
166 | # is saved. If 'tight', try to figure out the tight bbox of the figure.,
167 | # defaults to "tight"
168 | # :type bbox_inches: str | plt.Bbox, optional
169 | # :param pad_inches: Amount of padding in inches around the figure when
170 | # bbox_inches is 'tight'. If 'layout' use the padding from the constrained or
171 | # compressed layout engine; ignored if one of those engines is not in use,
172 | # defaults to 0.1
173 | # :type pad_inches: float, optional
174 | # :param facecolor: The facecolor of the figure. If 'auto', use the current figure
175 | # facecolor., defaults to "auto"
176 | # :type facecolor: str, optional
177 | # :param edgecolor: The edgecolor of the figure. If 'auto', use the current figure
178 | # edgecolor., defaults to "auto"
179 | # :type edgecolor: str, optional
180 | # :param backend: The backend to use for the rendering. If None, use
181 | # rcParams["savefig.backend"], otherwise use backend, defaults to None
182 | # :type backend: str, optional
183 | # :param user_kwargs: Additional kwargs passed to plt.figure.Figure.savefig()
184 | # """
185 |
186 | # ### Gather arguments
187 | # kwargs = dict(
188 | # # fname=self.title, # !! pass it directly
189 | # format=format,
190 | # dpi=dpi,
191 | # bbox_inches=bbox_inches,
192 | # pad_inches=pad_inches,
193 | # facecolor=facecolor,
194 | # edgecolor=edgecolor,
195 | # backend=backend,
196 | # )
197 | # kwargs.update(**user_kwargs) #' Add user kwargs
198 |
199 | # ### Overwrite protection
200 | # if (not overwrite and not overwrite is None) or isinstance(overwrite, str):
201 | # fname = self.filer.prevent_overwrite(filename=fname, mode=overwrite)
202 |
203 | # ### Add Suffix
204 | # fname = Path(fname).with_suffix("." + format)
205 |
206 | # ### take figure
207 | # if fig is None:
208 | # fig = self.fig
209 | # fig.savefig(fname, **kwargs)
210 |
211 | # ### Save figure
212 | # # Not working, self.fig is never updated during plotting (only axes?)
213 | # # self.fig.savefig(fname, **kwargs)
214 | # # plt.savefig(fname, **kwargs)
215 |
216 | # return self
217 |
218 | # def save_all(
219 | # self,
220 | # fname: str = "plotastic_results",
221 | # overwrite: str | bool = "day",
222 | # savefig_kws: dict = None,
223 | # ) -> None:
224 | # """Exports all files stored in DataAnalysis object
225 |
226 | # :param fname: Path to save excel file, optional (default="")
227 | # :type fname: str, optional
228 | # :param overwrite: Mode of overwrite protection. If "day", it simply adds the
229 | # current date at the end of the filename, causing every output on the same
230 | # day to overwrite itself. If "nothing" ["day", "nothing"], files with the
231 | # same filename will be detected in the current work directory and a number
232 | # will be added to the filename. If True, everything will be overwritten.,
233 | # defaults to "day"
234 | # :type overwrite: str|bool, optional
235 | # :param savefig_kws: Additional kwargs passed to plt.figure.Figure.savefig()
236 | # :type savefig_kws: dict, optional
237 | # """
238 |
239 | # ### Gather Arguments
240 | # if savefig_kws is None:
241 | # savefig_kws = dict()
242 |
243 | # # if (not overwrite and not overwrite is None) or isinstance(overwrite, str):
244 | # # fname = self.filer.prevent_overwrite(filename=fname, mode=overwrite)
245 |
246 | # self.save_statistics(fname=fname, overwrite=overwrite)
247 | # self.save_fig(fname=fname, overwrite=overwrite, **savefig_kws)
248 |
249 | # @staticmethod
250 | # def _redraw_fig(fig):
251 | # """create a dummy figure and use its manager to display "fig" """
252 | # dummy = plt.figure() #' Make empty figure
253 | # new_manager = dummy.canvas.manager #' Get the figure's manager
254 | # new_manager.canvas.figure = fig #' Associate it with the figure
255 | # fig.set_canvas(new_manager.canvas)
256 | # return fig
257 |
258 |
259 | # %%
260 | if __name__ == "__main__":
261 | from plotastic.example_data.load_dataset import load_dataset
262 |
263 | DF, dims = load_dataset("qpcr")
264 | DA = DataAnalysis(DF, dims)
265 |
266 | # %% Fill DA with stuff
267 |
268 |
269 | # %%
270 |
--------------------------------------------------------------------------------
/src/plotastic/dataanalysis/filer.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% Imports
3 |
4 | # import sys
5 |
6 | # import ipynbname
7 | # import IPython
8 |
9 | # import inspect
10 |
11 | import re
12 |
13 | # import markurutils as ut
14 | import plotastic.utils.utils as ut
15 |
16 | # from IPython import get_ipython
17 |
18 | from datetime import date
19 |
20 | from pathlib import Path
21 | from typing import Any
22 |
23 | from plotastic import docstrings
24 |
25 |
26 | # %% Class Filer
27 | class Filer:
28 |
29 | """A class to handle file operations.
30 | - It reads the name of the current file, and sets it as the default filename for
31 | saving.
32 | - Provides function for overwrite protection.
33 | - More stuff coming..?
34 |
35 |
36 | """
37 |
38 | DEFAULT_TITLE = "plotastic_result"
39 |
40 | # ==
41 | # == __init__ ======================================================================
42 |
43 | def __init__(self, title: str):
44 | self.title = title
45 |
46 | # ==
47 | # == Time info =====================================================================
48 |
49 | @property
50 | def current_day(self) -> str:
51 | return date.today().strftime("%y%m%d")
52 |
53 | # ==
54 | # == Properties of Users's Script ==================================================
55 |
56 | @staticmethod
57 | def _prevent_overwrite_all(filename: str) -> str:
58 | """Returns a new filename that has a number at the end, if the filename already
59 | exists.
60 | - Checks filenames in path that are similar to filename
61 | - If there are similar filenames with an index behind them, it gets the largest
62 | index
63 | - Adds plus one to that index and puts it at the end of filenames
64 |
65 | :param filename: filename.
66 | :type filename: str
67 | :return: str
68 | """
69 |
70 | ### Get a list of filenames that might be overwritten
71 | files = ut.glob_searchfilename(
72 | path=Path.cwd(),
73 | filename=filename,
74 | rettype="str",
75 | )
76 |
77 | ### Define Pattern Rules:
78 | #' Between Start (^) and end ($) of line
79 | # -- fname: Match all characters non-greedy ( .*? )
80 | # !! fname: Match exact string
81 | #' index: : 1 through 3 repetitions of single digit ( \d{1,3} )
82 | # regex = r"^(?P.*?)_(?P\d{1,2})$" # ? old one
83 | regex = r"^(?P" + filename + r")_(?P\d{1,3})$"
84 | ### Get matches
85 | pattern = re.compile(regex, flags=re.MULTILINE)
86 | matches: list[dict] = ut.re_matchgroups(pattern=pattern, string=files)
87 | ### Extract their indices
88 | indices: list[int] = [int(matchD["index"]) for matchD in matches]
89 | ### fnames are never used
90 | # fnames: list[str] = [matchD["fname"] for matchD in matches]
91 |
92 | ### Add plus one to max index
93 | newindex = 0
94 | if indices:
95 | newindex = max(indices) + 1
96 |
97 | return f"{filename}_{newindex}"
98 |
99 | @docstrings.subst(param_overwrite=docstrings.param_overwrite)
100 | def prevent_overwrite(
101 | self, fname: "str | Path", overwrite: str = "day"
102 | ) -> str:
103 | """Returns a new filename that has a number or current date at the end to enable
104 | different modes of overwriting protection.
105 |
106 | :param fname: filename to be protected from overwriting
107 | :type fname: str | Path
108 | :param overwrite: {param_overwrite}
109 | :return: filename that is protected from overwriting by adding either number or
110 | the current date at its end
111 | :rtype: str
112 | """
113 | overwrite_args = ["day", "daily", "nothing", True, False]
114 | assert (
115 | overwrite in overwrite_args
116 | ), f"overwrite must be one of {overwrite_args}, not {overwrite}"
117 |
118 | ### Convert to string if path
119 | fname = str(fname) if isinstance(fname, Path) else fname
120 | ### Remove suffix
121 | fname = fname.split(".")[0]
122 |
123 | if overwrite in ["day", "daily"]: #' "day"
124 | fname = f"{fname}_{self.current_day}"
125 | elif overwrite in ["nothing", False]: #' "nothing"
126 | fname = self._prevent_overwrite_all(filename=fname)
127 |
128 | return fname
129 |
--------------------------------------------------------------------------------
/src/plotastic/dimensions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/dimensions/__init__.py
--------------------------------------------------------------------------------
/src/plotastic/dimensions/dims.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% Importds
3 |
4 | from typing import TYPE_CHECKING
5 |
6 | from typing import Dict, Literal
7 | from copy import copy, deepcopy
8 |
9 | if TYPE_CHECKING:
10 | from plotastic.dataanalysis.dataanalysis import DataAnalysis
11 |
12 |
13 | # TODO maybe refactor this to specify statistical type of data
14 | # class Dimension:
15 | # def __init__(
16 | # self, name: str, scale_of_measurement: Literal["nominal", "ordinal", "cardinal"]
17 | # ) -> "Dimension":
18 | # """_summary_
19 |
20 | # Args:
21 | # name (str): _description_
22 | # scale_of_measurement (str):
23 | # (https://en.wikipedia.org/wiki/Level_of_measurement)
24 | # What's the scale of measurement?
25 | # * Nominal data:
26 | # * Categorical data that has no order
27 | # * e.g. colors, names, etc.
28 | # * Ordinal data:
29 | # * Categorical data that has order
30 | # * e.g. grades, sizes, etc.
31 | # * This works independently from ordered pd.Categorical type. That is used to place plots in the right order.
32 | # * Cardinal data:
33 | # * Numerical data that has order
34 | # * Three types: interval, ratio, and absolute
35 | # * Don't let them confuse you. It's an old scale and is often contested
36 | # * Interval data:
37 | # * Numerical data that has order and equal intervals
38 | # * e.g. temperature [°C], dates, etc.
39 | # * Ratio data:
40 | # * Numerical data that has order, equal intervals, and a true zero
41 | # * Might not have a unit, since it was divided by itself
42 | # * e.g. temperature [Kelvin], height, weight, etc.
43 | # * Absolute data:
44 | # * Numerical data that has order, equal intervals, a true zero, and an absolute scale
45 |
46 | # Returns:
47 | # Dimension: _description_
48 | # """
49 | # #' "nominal", "ordinal", "interval", "ratio
50 |
51 | # self.name = name
52 | # self.som = scale_of_measurement
53 |
54 | # #
55 | # #
56 |
57 | # %% class Dims
58 |
59 |
60 | class Dims:
61 | # == Init ................................................................
62 |
63 | def __init__(
64 | self,
65 | y: str,
66 | x: str,
67 | hue: str = None,
68 | row: str = None,
69 | col: str = None,
70 | ):
71 | ### Define Dims
72 | self.y = y
73 | self.x = x
74 | self.hue = hue
75 | self.row = row
76 | self.col = col
77 | # self._by = None
78 |
79 | # self.som = dict(y="interval", x="ordinal", row="")
80 |
81 | # if som: #' SOM = Scale of Measurement / Skalenniveau
82 | # self.som = som
83 | # else:
84 | # self.som = dict(y= "continuous", )
85 |
86 | #
87 | #
88 | #
89 |
90 | def __repr__(self) -> str:
91 | return self.asdict().__repr__()
92 |
93 | # == Properties ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
94 |
95 | # @property
96 | # def has_hue(self) -> bool:
97 | # return not self.hue is None
98 |
99 | # @property
100 | # def by(self) -> list[str] | None:
101 | # if self._by:
102 | # return self._by
103 | # elif self.row and self.col:
104 | # return [self.row, self.col]
105 | # elif self.row:
106 | # return [self.row]
107 | # elif self.col:
108 | # return [self.col]
109 | # else:
110 | # return None
111 |
112 | def asdict(self, incl_None=True) -> dict:
113 | d = dict(y=self.y, x=self.x, hue=self.hue, row=self.row, col=self.col)
114 | if not incl_None:
115 | d = {k: v for (k, v) in d.items() if (not v is None)}
116 | return d
117 |
118 | def set(self, inplace=False, **kwargs) -> "Dims | DataAnalysis":
119 | newobj = self if inplace else copy(self)
120 | for k, v in kwargs.items():
121 | v = v if not v == "none" else None
122 | setattr(newobj, k, v)
123 | return newobj
124 |
125 | def getvalues(self, keys: list[str] | tuple[str], *args):
126 | """
127 | Converts a list of dimensions into a list of dimension values, e.g.
128 | :param keys: ["x", "y", "col"]
129 | :return: e.g. ["smoker", "tips", "day"]
130 | """
131 | defkeys = ("x", "y", "hue", "row", "col")
132 | l = []
133 | keys = [keys] + [arg for arg in args]
134 | for key in keys:
135 | assert (
136 | key in defkeys
137 | ), f"#! '{key}' should have been one of {defkeys}"
138 | l.append(getattr(self, key))
139 | return l
140 |
141 | def switch(
142 | self,
143 | *keys_args: str,
144 | inplace: bool = False,
145 | verbose: bool = True,
146 | **keys_kws: str | Dict[str, str],
147 | ) -> "Dims | DataAnalysis":
148 | """Switches two dimensions, e.g. x and hue, or x and row, etc. If you
149 | want to switch more than two dimensions, use the switch method in
150 | chain.
151 |
152 | :param keys_args: Two dimensions to switch. Only 2 Positional arguments
153 | allowed. Use e.g. dims.switch("x", "hue", **kwargs)
154 | :type keys_args: str
155 | :param inplace: Decide if this switching should change the dims object
156 | permanently (analogously to pandas dataframe). If False, you should
157 | pass return value into a variable, defaults to False
158 | :type inplace: bool, optional
159 | :param verbose: Whether to print out switched values, defaults to True
160 | :type verbose: bool, optional
161 | :param kwarg: Keyword arguments: row="smoker".
162 | :type kwarg: str | Dict[str, str]
163 | :raises AssertionError:
164 | :return: DataAnalysis object with switched dimensions in dims
165 | :rtype: Dims | DataAnalysis
166 | """
167 |
168 | ### Handle Arguments
169 | #' If keys are passed, e.g. dims.switch("x","row",**kwargs)"""
170 | if len(keys_args) == 0:
171 | pass
172 | elif len(keys_args) == 2:
173 | assert (
174 | len(keys_kws) == 0
175 | ), "#! Can't switch when both keys and kwarg is passed"
176 | values = self.getvalues(*keys_args)
177 | keys_kws[keys_args[0]] = values[1]
178 | else:
179 | raise AssertionError(
180 | f"#! '{keys_args}' should have been of length 2"
181 | )
182 | assert len(keys_kws) == 1, f"#! {keys_kws} should be of length 1 "
183 |
184 | ### Print first Line
185 | if verbose:
186 | todo = "RE-WRITING" if inplace else "TEMPORARY CHANGING:"
187 | print(
188 | f"#! {todo} {self.__class__.__name__} with keys: '{keys_args}' and kwarg: {keys_kws}:"
189 | )
190 | print(" (dim =\t'old' -> 'new')")
191 |
192 | ### SWITCH IT
193 | #' Copy Object
194 | original: dict = deepcopy(self.asdict(incl_None=True))
195 | newobj = self if inplace else deepcopy(self)
196 |
197 | qK, qV = *keys_kws.keys(), *keys_kws.values()
198 | replace_v = "none"
199 | for oK, oV in original.items(): # Original Object
200 | if qK == oK:
201 | replace_v = oV
202 | setattr(newobj, qK, qV)
203 | elif qK != oK and oV == qV:
204 | replace_v = original[qK]
205 | setattr(newobj, oK, replace_v)
206 | assert (
207 | replace_v != "none"
208 | ), f"#! Did not find {list(keys_kws.keys())} in dims {list(original.keys())}"
209 |
210 | ### PRINT THE OVERVIEW OF THE NEW MAPPING
211 | if verbose:
212 | for (oK, oV), nV in zip(original.items(), newobj.asdict().values()):
213 | pre = " "
214 | if oV != nV and oV == replace_v: # or replace_v == "none":
215 | printval = f"'{replace_v}' -> '{qV}'"
216 | pre = ">>"
217 | elif oV != nV and oV != replace_v:
218 | printval = f"'{oV}' -> '{replace_v}'"
219 | pre = " <"
220 | else: # oV == nV
221 | printval = f"'{oV}'" if type(oV) is str else f"{oV}"
222 | if len(oK) < 3:
223 | oK = oK + " "
224 |
225 | printval = printval.replace("'None'", "None") # REMOVE QUOTES
226 |
227 | print(f" {pre} {oK} =\t{printval}")
228 |
229 | ### x AND y MUST NOT BE None
230 | assert not None in [
231 | self.y,
232 | self.x,
233 | ], "#! This switch causes x or y to be None"
234 |
235 | return newobj
236 |
237 |
238 | # !!
239 | # !!
240 | # !! end class
241 |
242 | # %% Test
243 |
244 | if __name__ == "__main__":
245 | wt = "../../../Examples, Walkthroughs, Tests/Scripts for Walkthroughs/dims_wt.py"
246 | with open(wt) as f:
247 | exec(f.read())
248 |
--------------------------------------------------------------------------------
/src/plotastic/dimensions/hierarchical_dims.py:
--------------------------------------------------------------------------------
1 | """We utilize List of all dims/ factors in a specific orders to group
2 | and index the data into (fully) facetted datagroups. This allows easier
3 | implementation of ... - ... displaying missing levels of the last factor
4 | (x or hue) per group - ... connecting datapoints of the same subject
5 | across x and hue levels - (... iterating through all datagroups for
6 | statistics)
7 | """
8 |
9 | # %%
10 |
11 | # from pprint import pprint
12 | # from IPython.display import display
13 |
14 | import numpy as np
15 | import pandas as pd
16 | import seaborn as sns
17 | import matplotlib.pyplot as plt
18 |
19 | import plotastic as plst
20 | from plotastic.dimensions.subject import Subject
21 | from plotastic.utils import utils as ut
22 |
23 |
24 | from typing import Generator, TYPE_CHECKING
25 |
26 | if TYPE_CHECKING:
27 | from plotastic.dataanalysis.dataanalysis import DataAnalysis
28 |
29 |
30 | # %%
31 |
32 |
33 | class HierarchicalDims(Subject):
34 | def __init__(self, **kws):
35 | super().__init__(**kws)
36 |
37 | @property
38 | def _factors_hierarchical(self) -> list:
39 | """Return list of factors that are used for indexing the
40 | subjectdata. It places subjects before x and hue, which is
41 | useful to see which x and hue level are missing per subject"""
42 | factors = [
43 | self.dims.row,
44 | self.dims.col,
45 | self.subject, # < Subject
46 | self.dims.x,
47 | self.dims.hue,
48 | ]
49 | ### Kick out Nones:
50 | return [f for f in factors if f is not None]
51 |
52 | @property
53 | def _factors_hierarchical_subjects_last(self) -> list:
54 | """Return list of factors that are used for indexing the
55 | subjectdata. It places subjects after x and hue, which is
56 | useful to see which subject is missing per x and hue level"""
57 | factors = [
58 | self.dims.row,
59 | self.dims.col,
60 | self.dims.x,
61 | self.dims.hue,
62 | self.subject, # < Subject
63 | ]
64 | ### Kick out Nones:
65 | return [f for f in factors if f is not None]
66 |
67 | def data_hierarchicize(
68 | self,
69 | sort=True,
70 | subjects_last=False,
71 | ) -> pd.DataFrame:
72 | """Return Dataframe indexed by all factors containing only
73 | columns y and subjects"""
74 |
75 | ### Pick order of Hierarchy
76 | if subjects_last:
77 | factors = self._factors_hierarchical_subjects_last
78 | else:
79 | factors = self._factors_hierarchical
80 |
81 | ### Pick Data and set Index
82 | DF = self.data[factors + [self.dims.y]]
83 | DF = DF.set_index(factors)
84 |
85 | ### Sort
86 | if sort:
87 | DF = DF.sort_index()
88 |
89 | return DF
90 |
91 | def _iter__hlkey_df(
92 | self, sort=False, subject_last=False, by_lastdim=False
93 | ) -> Generator[tuple[tuple[str | int], pd.DataFrame], None, None]:
94 | """Iterate over data_hierarchical, return hierarchical levelkeys
95 | and dataframe"""
96 | ### Pick order of Hierarchy
97 | if subject_last:
98 | factors = self._factors_hierarchical_subjects_last
99 | else:
100 | factors = self._factors_hierarchical
101 |
102 | ### Remove last dim (x or hue)
103 | # > Otherwise we iterate over single rows
104 | if not by_lastdim:
105 | factors = factors[:-1]
106 |
107 | ### Pandas doesn't like grouping by length 1 tuples/lists
108 | if len(factors) == 1:
109 | factors = factors[0]
110 |
111 | for key, df in self.data_hierarchicize(
112 | sort=sort, subjects_last=subject_last
113 | ).groupby(factors):
114 | yield key, df
115 |
116 | def get_missing_lvls_from_last_factor(
117 | self,
118 | show_all=False,
119 | as_dict=False,
120 | ) -> pd.DataFrame | dict:
121 | """Return dataframe with missing levels per group. If show_all
122 | is False, only groups with missing levels are shown."""
123 | ### Reference for complete levels
124 | all_x_lvls = tuple(self.levels_dict_dim["x"])
125 | all_hue_lvls = tuple(self.levels_dict_dim["hue"])
126 |
127 | ### Collect Missing
128 | missing = {}
129 | for key, df in self._iter__hlkey_df():
130 | if self.dims.hue:
131 | hue_lvls = tuple(df.index.get_level_values(self.dims.hue))
132 | missing[key] = tuple(set(all_hue_lvls) - set(hue_lvls))
133 | else:
134 | x_lvls = tuple(df.index.get_level_values(self.dims.x))
135 | missing[key] = tuple(set(all_x_lvls) - set(x_lvls))
136 |
137 | ### Remove groups that didn't have any missing values
138 | if not show_all:
139 | # > Convert v to list so that resulting DataFrame has
140 | #' just one column
141 | missing = {k: [v] for k, v in missing.items() if v}
142 |
143 | ### Convert Result to DataFrame
144 | if not as_dict:
145 | missing = pd.DataFrame(
146 | index=pd.MultiIndex.from_tuples(
147 | tuples=missing.keys(),
148 | names=self._factors_hierarchical[:-1],
149 | ),
150 | data=missing.values(),
151 | columns=["missing levels"],
152 | ).sort_index()
153 |
154 | return missing
155 |
156 |
157 | # %%
158 | if __name__ == "__main__":
159 | # == Test Data =====================================================
160 |
161 | def make_testdata_paired_but_nosubject():
162 | ### Attention
163 | DF = sns.load_dataset("attention")
164 | dims = dict(y="score", x="attention", hue="solutions")
165 | DA1 = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
166 | DA1.test_pairwise(paired=False)
167 |
168 | ### qPCR
169 | DF, dims = plst.load_dataset("qpcr", verbose=False)
170 | #' DA2
171 | dims = dict(y="fc", x="gene", row="fraction", col="class")
172 | DA2 = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
173 | DA2.test_pairwise(paired=False)
174 | #' DA3
175 | dims = dict(y="fc", x="gene", hue="fraction", col="class")
176 | DA3 = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
177 | DA3.test_pairwise(paired=False)
178 |
179 | return (DA1, DA2, DA3)
180 |
181 | def make_testdata():
182 | ### Attention
183 | DF = sns.load_dataset("attention")
184 | #' DA4 - no col, but hue
185 | dims = dict(y="score", x="attention", hue="solutions")
186 | DA4 = plst.DataAnalysis(
187 | data=DF, dims=dims, subject="subject", verbose=False
188 | )
189 | DA4.test_pairwise(paired=False)
190 |
191 | #' DA5 - no hue, but col
192 | dims = dict(y="score", x="solutions", col="attention")
193 | DA5 = plst.DataAnalysis(
194 | data=DF, dims=dims, subject="subject", verbose=False
195 | )
196 | DA5.test_pairwise(paired=True)
197 |
198 | ### qPCR
199 | DF, dims = plst.load_dataset("qpcr", verbose=False)
200 | #' DA6
201 | dims = dict(y="fc", x="gene", hue="fraction", col="class")
202 | DA6 = plst.DataAnalysis(
203 | data=DF, dims=dims, subject="subject", verbose=False
204 | )
205 | DA6.test_pairwise(paired=True)
206 |
207 | #' DA7 - with row
208 | dims = dict(y="fc", x="gene", row="fraction", col="class")
209 | DA7 = plst.DataAnalysis(
210 | data=DF, dims=dims, subject="subject", verbose=False
211 | )
212 | DA7.test_pairwise(paired=True)
213 |
214 | return DA4, DA5, DA6, DA7
215 |
216 | DA1, DA2, DA3 = make_testdata_paired_but_nosubject()
217 | DA4, DA5, DA6, DA7 = make_testdata()
218 |
219 | # %%
220 | ### Test when executed with DA doesn't have subject specified
221 | # DA1.subjectlist # > Gives error rightfully
222 |
223 | DA1.get_missing_lvls_from_last_factor()
224 | DA2.get_missing_lvls_from_last_factor()
225 | DA3.get_missing_lvls_from_last_factor()
226 |
227 | # DA1._subjects_get_XY() # > Gives error correctly
228 | # DA1.plot_connect_subjects() # > Gives error correctly
229 |
230 | # %%
231 | # DA1.catplot()
232 |
233 | # DA3.data_hierarchicize()
234 | # DA3.levels_get_missing()
235 |
236 | # %%
237 | # DA4.get_hierarchical_data(sorted=True)
238 | # DA4.get_hierarchical_data(sorted=True)
239 | DA6.data_hierarchicize(sort=True, subjects_last=True)
240 | # %%
241 | DA6.data_hierarchicize(sort=True, subjects_last=False)
242 |
243 | # %%
244 | # for subject, df in DA6.subjects_iter__subject_df:
245 | # pprint(subject)
246 | # pprint(df)
247 | # print()
248 |
249 | # %%
250 | # DA4.subjects_get_missing()
251 | # DA5.subjects_get_missing()
252 | DA6.get_missing_lvls_from_last_factor()
253 | # %%
254 | # pprint(DA4.subjects_get_XY())
255 | # pprint(DA5.subjects_get_XY())
256 | # pprint(DA6.subjects_get_XY())
257 | DA4._subjects_get_XY()
258 | # DA5.subjects_get_XY()
259 | # DA6.subjects_get_XY().loc[("MMPs", slice(None), "MMP7"), :]
260 | # DF = DA6.subjects_get_XY()
261 | # DF[DF.index.get_level_values("class") == "Chemokines"].index
262 | # # DF.index
263 |
264 | # %%
265 | def plottest(self: plst.DataAnalysis, figsize=(2.5, 2), **plot_kws):
266 | (
267 | self.subplots(figsize=figsize)
268 | .fillaxes(
269 | kind="swarm",
270 | size=2,
271 | dodge=True,
272 | )
273 | .edit_y_scale_log(10)
274 | .plot_connect_subjects(**plot_kws)
275 | .annotate_pairwise()
276 | )
277 | return self
278 |
279 | plottest(DA4)
280 | plottest(DA5)
281 | plottest(DA6, figsize=(12, 4))
282 | plottest(DA7, figsize=(12, 12))
283 |
--------------------------------------------------------------------------------
/src/plotastic/dimensions/subject.py:
--------------------------------------------------------------------------------
1 | """Adds Subject funcitonality to DataAnalysis."""
2 | # %%
3 | #== Imports ============================================================
4 |
5 | # from plotastic
6 | from plotastic.utils import utils as ut
7 | from plotastic.dimensions.dimsandlevels import DimsAndLevels
8 |
9 | # %%
10 | #== Class Subject ======================================================
11 |
12 | class Subject (DimsAndLevels):
13 |
14 | def __init__(self, subject = None, **kws) -> None:
15 | super().__init__(**kws)
16 | self.subject = subject
17 | if not subject is None:
18 | assert (
19 | subject in self.data.columns
20 | ), f"#! Subject '{subject}' not in columns, expected one of {self.data.columns.to_list()}"
21 |
22 | @property
23 | def subjectlist(self):
24 | if self.subject is None:
25 | raise TypeError("No subject column specified")
26 | return tuple(self.data[self.subject].unique())
27 |
28 |
--------------------------------------------------------------------------------
/src/plotastic/docstrings.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% imports
3 |
4 | from typing import Callable
5 |
6 | import plotastic.utils.utils as ut
7 |
8 | # %% Test wrapping function
9 |
10 |
11 | if __name__ == "__main__":
12 | descr = """
13 | Mode of overwrite protection. If "day", it simply adds the current date at the end
14 | of the filename, causing every output on the same day to overwrite itself. If
15 | "nothing" ["day", "nothing"], files with the same filename will be detected in the
16 | current work directory and a number will be added to the filename. If True,
17 | everything will be overwritten.
18 | """
19 | w = ut.wrap_text(descr)
20 | print(w)
21 | len(" ")
22 |
23 |
24 | # %% Write :param: part of docstring
25 |
26 |
27 | def param(
28 | param: str,
29 | descr: str,
30 | default: str = "",
31 | typ: str = "",
32 | optional: bool = False,
33 | ) -> str:
34 | """Returns part of docstring describing parameter in sphinx format"""
35 |
36 | ### If descr starts with new line remove it
37 | if descr.startswith("\n"):
38 | descr = descr[1:]
39 |
40 | S = []
41 |
42 | ### First line, (no tabstop needed)
43 | # # Don't include :param: in docstring, add that manually always, so
44 | # # vscode at least shows the parameter in the intellisense
45 | S.append(" ") #' whitespace after :param param:
46 | # S = f":param {param}: {wrap_descr(descr)}"
47 | S.append(
48 | ut.wrap_text(
49 | string=descr,
50 | width=72,
51 | width_first_line=54,
52 | indent=" ",
53 | )
54 | )
55 |
56 | ### Add default value to first line
57 | if default:
58 | if isinstance(default, str):
59 | # # Add quotes if param defaults to string
60 | default = f"'{default}'"
61 | S.append(f", defaults to {default}")
62 |
63 | ### Further options need a tab
64 | ### Type
65 | if typ:
66 | S.append("\n\t") #' newline
67 | S.append(f":type {param}: {typ}")
68 |
69 | ### Optional, same line as type
70 | if optional:
71 | S.append(f", optional")
72 |
73 | return "".join(S)
74 |
75 |
76 | if __name__ == "__main__":
77 | docpart = param(
78 | param="sdaf",
79 | descr="makes makes and does does stuffystuff",
80 | default="ja!",
81 | typ="str",
82 | )
83 | print(docpart)
84 |
85 |
86 | # %% Substitute variables in docstring
87 |
88 |
89 | def subst(*args, **kwargs):
90 | """Decorator that substitutes variables in docstrings, e.g.: {} as args and {var} as
91 | kwargs
92 | """
93 |
94 | def F(func: Callable):
95 | doc = func.__doc__
96 | ### Shouldn't raise error if no docstring is present
97 | if doc:
98 | try:
99 | ### Substitute args
100 | func.__doc__ = doc.format(*args, **kwargs)
101 | except KeyError as e:
102 | raise KeyError(
103 | f"Could not substitute {e} in docstring of {func.__name__}"
104 | "with {args} or {list(kwargs.keys())}"
105 | )
106 |
107 | return func
108 |
109 | return F
110 |
111 |
112 | if __name__ == "__main__":
113 | # p = """:param verbose: Set to False to not print stuff, defaults to False"""
114 | # p += "\n\t:type verbose: bool"
115 | p = param(
116 | param="verbose",
117 | descr="Ladidah awesome parameter if you know what I mean. Makes makes and does does stuffystuff",
118 | default="ja!",
119 | typ="str",
120 | )
121 |
122 | @subst("banana", var2="milkshake", var3=p)
123 | def bla(verbose: False):
124 | """this is a docstring with {} and {var2},
125 |
126 | :param verbose: {var3}
127 | """
128 | if verbose:
129 | print("jo!")
130 |
131 | print(bla.__doc__)
132 |
133 |
134 | # %%
135 |
136 | ### Overwrite Protection
137 | param_overwrite = param(
138 | param="overwrite",
139 | descr="""
140 | Mode of overwrite protection. If "day", it simply adds the current date at the end
141 | of the filename, causing every output on the same day to overwrite itself. If
142 | "nothing" ["day", "nothing"], files with the same filename will be detected in the
143 | current work directory and a number will be added to the filename. If True,
144 | everything will be overwritten.
145 | """,
146 | default="day",
147 | typ="str | bool",
148 | optional=True,
149 | )
150 |
151 | if __name__ == "__main__":
152 | from plotastic.dataanalysis.dataanalysis import DataAnalysis
153 |
154 | # print(overwrite)
155 | print(DataAnalysis.save_statistics.__doc__)
156 | # DataAnalysis.save_fig()
157 |
158 | # %%
159 |
--------------------------------------------------------------------------------
/src/plotastic/example_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/example_data/__init__.py
--------------------------------------------------------------------------------
/src/plotastic/example_data/data/fmri.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/example_data/data/fmri.xlsx
--------------------------------------------------------------------------------
/src/plotastic/example_data/data/qpcr.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/example_data/data/qpcr.xlsx
--------------------------------------------------------------------------------
/src/plotastic/example_data/data/tips.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/example_data/data/tips.xlsx
--------------------------------------------------------------------------------
/src/plotastic/example_data/load_dataset.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import pkg_resources
3 | import os
4 |
5 | ### List all available datasets
6 |
7 | FILES = dict(
8 | fmri="fmri.xlsx", #' Removed timepoints bigger than 10
9 | tips="tips.xlsx", #' Added a size-cut column pd.cut(df["size"], bins=[0, 2, 10], labels=["1-2", ">=3"])
10 | qpcr="qpcr.xlsx",
11 | )
12 |
13 | DIMS = dict(
14 | fmri=dict(y="signal", x="timepoint", hue="event", col="region"),
15 | tips=dict(y="tip", x="size-cut", hue="smoker", col="sex", row="time"),
16 | qpcr=dict(y="fc", x="gene", hue="fraction", col="method", row="class"),
17 | )
18 |
19 |
20 | def load_dataset(name: str = "tips", verbose=True) -> tuple[pd.DataFrame, dict]:
21 | """Executes seaborn.load_dataset, but also returns dictionary that assigns dimensions
22 | to column names ["y","x","hue","col","row"]
23 |
24 | :param verbose: Prints information and dims dictionary
25 | :param name: Name of the dataset. Error messayge contains available options. Defaults to "tips"
26 | :return: Example data and dictionary for dimensions
27 | :rtype: tuple[pd.DataFrame, dict]
28 | """
29 |
30 | ### Check user Arguments
31 | assert (
32 | name in FILES
33 | ), f" '{name}' should have been one of {list(FILES.keys())}"
34 |
35 | ### Import DataFrame from package
36 | package = "plotastic.example_data" #' Needs to be importable
37 | path_relative = os.path.join(
38 | "data", FILES[name]
39 | ) #' Path with python package as root
40 | path_full = pkg_resources.resource_filename(package, path_relative)
41 | df = pd.read_excel(path_full)
42 |
43 | ### Get dims
44 | dims = DIMS[name]
45 |
46 | if verbose:
47 | print(
48 | f"#! Imported seaborn dataset '{name}' \n\t columns:{df.columns}\n\t dimensions: {dims}"
49 | )
50 |
51 | return df, dims
52 |
--------------------------------------------------------------------------------
/src/plotastic/plotting/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/plotting/__init__.py
--------------------------------------------------------------------------------
/src/plotastic/plotting/multiplot.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% imports
3 |
4 | from typing import TYPE_CHECKING
5 |
6 | import pandas as pd
7 |
8 | import matplotlib.pyplot as plt
9 | import matplotlib as mpl
10 |
11 | # import pyperclip
12 |
13 | # import markurutils as ut
14 | import plotastic.utils.utils as ut
15 |
16 | # from plotastic.plotting.plotedits import PlotEdits
17 | from plotastic.plotting.plot import Plot
18 |
19 | if TYPE_CHECKING:
20 | from plotastic.dataanalysis.dataanalysis import DataAnalysis
21 |
22 | # %% Matplotlib Runtime Config (RC)
23 |
24 |
25 | # %% Class MultiPlot
26 |
27 |
28 | class MultiPlot(Plot):
29 | def __init__(self, **dataframetool_kws):
30 | super().__init__(**dataframetool_kws)
31 |
32 | #
33 | # == Boxplots ======================================================
34 |
35 | def plot_box_strip(
36 | self,
37 | marker_size: float = 2,
38 | marker_alpha: float = 0.5,
39 | legend=True,
40 | subplot_kws: dict = dict(),
41 | box_kws: dict = dict(),
42 | strip_kws: dict = dict(),
43 | legend_kws: dict = dict(),
44 | ) -> "MultiPlot | DataAnalysis":
45 | """A boxplot with a stripplott (scatter) on top
46 |
47 | Args:
48 | markersize (float, optional): _description_. Defaults to 2.
49 | markeralpha (float, optional): _description_. Defaults to 0.5.
50 | box_kws (dict, optional): _description_. Defaults to dict().
51 | strip_kws (dict, optional): _description_. Defaults to dict().
52 | """
53 | # == PARAMETERS
54 | thin, thick = 0.3, 1.0 #' Linewidths
55 | covering, translucent, hazy = 1.0, 0.5, 0.3 #' Alpha
56 | front, mid, background, hidden = 100, 50, 1, -1 #' z-order
57 |
58 | ### == KEYWORD ARGUMENTS
59 | ### Boxplot kws
60 | box_KWS = dict(
61 | showfliers=False,
62 | boxprops=dict( #' Box line and surface
63 | alpha=hazy,
64 | linewidth=thin,
65 | ),
66 | medianprops=dict( #' Median line
67 | alpha=covering,
68 | zorder=front,
69 | linewidth=thick,
70 | ),
71 | whiskerprops=dict( #' Lines conencting box and caps
72 | alpha=covering,
73 | zorder=mid,
74 | linewidth=thin,
75 | ),
76 | capprops=dict( #' Caps at the end of whiskers
77 | alpha=covering,
78 | zorder=mid,
79 | linewidth=thick,
80 | ),
81 | )
82 |
83 | ### Stripplot kws
84 | strip_KWS = dict(
85 | dodge=True, #' Separates the points in hue
86 | jitter=0.2, #' How far datapoints of one group scatter across the x-axis
87 | zorder=front,
88 | ### Marker Style
89 | size=marker_size,
90 | alpha=marker_alpha,
91 | # color="none",
92 | edgecolor="white",
93 | linewidth=thin, #' Edge width of the marker
94 | )
95 |
96 | ### User KWS
97 | box_KWS.update(box_kws)
98 | strip_KWS.update(strip_kws)
99 |
100 | ###... PLOT
101 | (
102 | self.subplots(**subplot_kws)
103 | .fillaxes(kind="box", **box_KWS)
104 | .fillaxes(kind="strip", **strip_KWS)
105 | )
106 |
107 | ### Legend displaying labels of stripplot (since that was called last)
108 | if legend and self.dims.hue:
109 | self.edit_legend(**legend_kws)
110 |
111 | return self
112 |
113 | def plot_box_swarm(
114 | self,
115 | marker_size: float = 1.5,
116 | marker_alpha: float = 0.9,
117 | legend=True,
118 | subplot_kws: dict = dict(),
119 | box_kws: dict = dict(),
120 | swarm_kws: dict = dict(),
121 | legend_kws: dict = dict(),
122 | ) -> "MultiPlot | DataAnalysis":
123 | """A boxplot with a stripplott (scatter) on top
124 |
125 | Args:
126 | markersize (float, optional): _description_. Defaults to 2.
127 | markeralpha (float, optional): _description_. Defaults to 0.5.
128 | box_kws (dict, optional): _description_. Defaults to dict().
129 | strip_kws (dict, optional): _description_. Defaults to dict().
130 | """
131 | # == PARAMETERS
132 | thin, thick = 0.2, 1.0 #' Linewidths
133 | covering, translucent, hazy = 1.0, 0.5, 0.3 #' Alpha
134 | front, mid, background, hidden = 100, 50, 1, -1 #' z-order
135 |
136 | ### == KEYWORD ARGUMENTS
137 | ### Boxplot kws
138 | box_KWS = dict(
139 | showfliers=False,
140 | #' Widths of boxes
141 | # !! Throws TypeError: matplotlib.axes._axes.Axes.boxplot() got multiple values for keyword argument 'widths'
142 | # widths=0.9,
143 | boxprops=dict( #' Box line and surface
144 | alpha=translucent,
145 | linewidth=thin,
146 | ),
147 | medianprops=dict( #' Median line
148 | alpha=covering,
149 | zorder=front,
150 | linewidth=thick,
151 | ),
152 | whiskerprops=dict( #' Lines conencting box and caps
153 | alpha=covering,
154 | zorder=mid,
155 | linewidth=thin,
156 | ),
157 | capprops=dict( #' Caps at the end of whiskers
158 | alpha=covering,
159 | zorder=mid,
160 | linewidth=thick,
161 | ),
162 | )
163 |
164 | ### Swarmplot kws
165 | swarm_KWS = dict(
166 | dodge=True, #' Separates the points in hue
167 | zorder=front,
168 | ### Marker Style
169 | alpha=marker_alpha,
170 | size=marker_size,
171 | # color="none",
172 | edgecolor="black",
173 | linewidth=thin, #' Edge width of the marker
174 | )
175 |
176 | ### User KWS
177 | box_KWS.update(box_kws)
178 | swarm_KWS.update(swarm_kws)
179 |
180 | ###... PLOT
181 | # !! If log y scale, you should pass y_scale = "log" in sublot_kws! Otherwise Points will not cluster in the middle!
182 | (
183 | self.subplots(**subplot_kws)
184 | .fillaxes(kind="box", **box_KWS)
185 | .fillaxes(kind="swarm", **swarm_KWS)
186 | )
187 |
188 | ### Legend displaying labels of swarmplot (since that was called last)
189 | if legend and self.dims.hue:
190 | self.edit_legend(**legend_kws)
191 |
192 | return self
193 |
194 |
195 | ## !!__________________________________________________________________________
196 |
197 | # # %% Matplotlib Runtime Config (RC)
198 |
199 | # mpl.rc("figure", dpi=250)
200 |
201 | # # %% get data
202 |
203 | # MP = MultiPlot(data=df, dims=dims)
204 |
205 |
206 | # # %%
207 |
--------------------------------------------------------------------------------
/src/plotastic/plotting/plot.py:
--------------------------------------------------------------------------------
1 | """Plotting functions that aren't covered by matplotlib or seaborn."""
2 | # %%
3 |
4 | import numpy as np
5 | import pandas as pd
6 | import matplotlib.pyplot as plt
7 | import seaborn as sns
8 |
9 | from plotastic.plotting.plotedits import PlotEdits
10 | from plotastic.utils import utils as ut
11 |
12 | from typing import TYPE_CHECKING
13 |
14 | if TYPE_CHECKING:
15 | from plotastic.dataanalysis.dataanalysis import DataAnalysis
16 |
17 | # %%
18 | # == Class Plot ========================================================
19 |
20 |
21 | class Plot(PlotEdits):
22 | def __init__(self, **dataframetool_kws) -> None:
23 | super().__init__(**dataframetool_kws)
24 |
25 | def plot(
26 | self, kind: str = "strip", subplot_kws: dict = None, **sns_kws
27 | ) -> "Plot | DataAnalysis":
28 | """Quick plotting, combines self.subplots and self.fillaxes its axes with seaborn graphics
29 |
30 | Args:
31 | kind (str, optional): _description_. Defaults to "strip".
32 | subplot_kws (dict, optional): _description_. Defaults to None.
33 | sns_kws (dict, optional): _description_. Defaults to None.
34 |
35 | Returns:
36 | fig_and_axes: _description_
37 | """
38 | ### Handle kwargs
39 | subplot_kws = subplot_kws or {}
40 | sns_kws = sns_kws or {}
41 |
42 | ### Standard kws for standard stripplot
43 | if kind == "strip" and len(sns_kws) == 0:
44 | sns_kws = dict(alpha=0.6, dodge=True)
45 |
46 | self.subplots(**subplot_kws) #' Initialise Figure and Axes
47 | self.fillaxes(kind=kind, **sns_kws) #' Fill axes with seaborn graphics
48 | if self.dims.hue:
49 | self.edit_legend() #' Add legend to figure
50 |
51 | plt.tight_layout() #' Make sure everything fits nicely
52 |
53 | return self
54 |
55 | #
56 | # == Subject lines =================================================
57 |
58 | def _nested_offsets(self, n_levels, width=0.8, dodge=True) -> np.ndarray:
59 | """Return offsets for each hue level for dodged plots. This must
60 | represent the same function that seaborn uses to dodge the plot,
61 | which can be found here:
62 | https://github.com/mwaskom/seaborn/blob/908ca95137c0e73bb6ac9ce9a8051577b6453138/seaborn/categorical.py#L437
63 | """
64 | # ?? Retrieve hue_offsets from axes independently of width?
65 | # ?? This here could work, but might also be a bit hacky
66 | # axes = self.axes
67 | # offset = self.axes.collections[0].get_offsets()
68 |
69 | hue_offsets: np.ndarray
70 | if dodge:
71 | each_width = width / n_levels
72 | hue_offsets = np.linspace(0, width - each_width, n_levels)
73 | hue_offsets -= hue_offsets.mean()
74 | else:
75 | hue_offsets = np.zeros(n_levels)
76 | return hue_offsets
77 |
78 | @ut.ignore_warnings
79 | def _subjects_get_XY(self) -> pd.DataFrame:
80 | """Collects X and Y positions of all datapoints indexed by all
81 | factors and subjects in a dataframe"""
82 | if self.subject is None:
83 | raise TypeError("No subject column specified")
84 |
85 | ### Retrieve hue levels and relative x-positions of data on plot
86 | if self.dims.hue:
87 | all_hue_lvls = tuple(self.levels_dict_dim["hue"])
88 | hue_offset = self._nested_offsets(len(all_hue_lvls))
89 |
90 | get_y = lambda df: tuple(df[self.dims.y].tolist())
91 |
92 | XY_df = pd.DataFrame(
93 | index=self.data_hierarchicize(sort=False).index,
94 | columns=["X", "Y"],
95 | data=None,
96 | )
97 | if self.dims.hue is None:
98 | for key, df in self._iter__hlkey_df():
99 | XY_df.loc[key, "Y"] = get_y(df)
100 | XY_df.loc[key, "X"] = tuple(i for i in range(len(df)))
101 | else:
102 | for key, df in self._iter__hlkey_df():
103 | #' X_positions >> hue_positions
104 | # > [0, 1] >> [0.2, 1.2, 2.2] and [0.2, 1.2, 2.2]
105 | # > [0, 1] >> [0.2, 1.2] and [0.2, 1.2, 2.2]
106 | #' Get hue-indices of hue-levels that aren't missing
107 | hue_lvls = tuple(
108 | df.index.get_level_values(self.dims.hue).unique()
109 | )
110 | hue_indices: list[int] = ut.index_of_matchelements(
111 | i1=all_hue_lvls, i2=hue_lvls
112 | )
113 |
114 | ### Find out which x_index we are at
115 | if self.factors_is_unfacetted:
116 | x_levels: list[str | int] = self.levels_dict_dim["x"]
117 | else:
118 | x_levels = tuple(
119 | XY_df.loc[key[:-1], :]
120 | .index.get_level_values(self.dims.x)
121 | .unique()
122 | )
123 | x_level_index = x_levels.index(key[-1])
124 |
125 | ### Translate hue_indices into x_positions by adding offset
126 | hue_positions: tuple = tuple(
127 | x_level_index + hue_offset[hue_indices]
128 | )
129 |
130 | XY_df.loc[key, "Y"] = get_y(df)
131 | XY_df.loc[key, "X"] = hue_positions
132 |
133 | return XY_df
134 |
135 | def plot_connect_subjects(self, **plot_kws) -> "Plot | DataAnalysis":
136 | """Joins subjects with lines. This is useful to see how subjects
137 | behave across x and hue levels. This is only possible if the
138 | subject column is specified.
139 |
140 | :raises TypeError: Requires subject column to be specified in
141 | DataAnalysis object
142 | :return: self
143 | :rtype: Plot | DataAnalysis
144 | """
145 | if self.subject is None:
146 | raise TypeError("No subject column specified")
147 |
148 | plot_KWS = dict(color="black", ls="-", zorder=2, alpha=0.3)
149 | plot_KWS.update(plot_kws)
150 |
151 | XY_df = self._subjects_get_XY()
152 |
153 | for key, df in XY_df.groupby(self._factors_hierarchical[:-1]):
154 | if self.factors_is_unfacetted:
155 | X, Y = df["X"], df["Y"]
156 | plt.plot(X, Y, **plot_KWS)
157 |
158 | else:
159 | for rowcolkey_ax, ax in self.axes_iter__keys_ax:
160 | if self.factors_is_1_facet:
161 | rowcolkey_xy = key[0]
162 | else:
163 | rowcolkey_xy = key[0:2]
164 |
165 | if rowcolkey_ax == rowcolkey_xy:
166 | X, Y = df["X"], df["Y"]
167 | ax.plot(X, Y, **plot_KWS)
168 | return self
169 |
--------------------------------------------------------------------------------
/src/plotastic/plotting/rc.py:
--------------------------------------------------------------------------------
1 | #
2 | # %%
3 | ### Imports
4 |
5 | import matplotlib as mpl
6 |
7 |
8 | # %%
9 | # == Variables ro reuse ================================================
10 | FONTSIZE = 10
11 |
12 |
13 | # %%
14 | # == STYLE PAPER =======================================================
15 | PAPER = {
16 | ### Figure
17 | "figure.dpi": 200, #' Displaying figures doesn't need as much dpi as saving them
18 | "figure.figsize": (3, 3), #' default is way too big
19 | # "figure.facecolor": "gray", #' it's easier on the eyes
20 | ### Savefig
21 | "savefig.dpi": 300, #' Saving figures needs more dpi
22 | "savefig.format": "pdf",
23 | # "savefig.transparent": True,
24 | "savefig.facecolor": "white",
25 | "axes.facecolor": "white",
26 | ### Font
27 | "font.family": "sans-serif",
28 | "font.sans-serif": "Arial Narrow",
29 | "font.size": FONTSIZE,
30 | "font.weight": "bold",
31 | # ## Lines
32 | "lines.linewidth": 0.75,
33 | # ## Axes
34 | "axes.spines.right": True, #' requires argument despine=False
35 | "axes.spines.top": True,
36 | "axes.linewidth": 0.75,
37 | "axes.labelweight": "bold",
38 | "axes.titleweight": "bold",
39 | "axes.titlepad": 5,
40 | "axes.labelsize": FONTSIZE, #' fontsize of the x any y labels
41 | # ## Grid
42 | # "axes.grid": True,
43 | "grid.linestyle": "-",
44 | "grid.linewidth": 0.5,
45 | # ## Ticks
46 | "ytick.left": True,
47 | "xtick.labelsize": FONTSIZE - 1,
48 | "ytick.labelsize": FONTSIZE - 1,
49 | "ytick.major.pad": 0.9, #' distance Yticklabels and yticks
50 | "ytick.minor.pad": 0.8,
51 | "xtick.major.pad": 2, #' distance Xticklabels and yticks
52 | "xtick.minor.pad": 2,
53 | "ytick.major.size": 2.5,
54 | "ytick.minor.size": 2,
55 | "xtick.major.size": 2.5,
56 | "xtick.minor.size": 2,
57 | # ## Legend
58 | "legend.fancybox": False, #' use rounded box for legend
59 | "legend.title_fontsize": FONTSIZE,
60 | "legend.fontsize": FONTSIZE,
61 | "legend.markerscale": 1.3, #' size scaled of markers in legend
62 | "legend.handleheight": 0.7, #' line distance between legend entries
63 | "legend.handletextpad": 0.1, #' distance markers legend text
64 | # 'legend.borderaxespad': 1, #' distance legend axes border, must be negative..?
65 | "legend.borderpad": 0.001,
66 | # 'text.usetex': True,
67 | # 'scatter.marker': 'x',
68 | }
69 |
70 | # == Collect STYLES ====================================================
71 |
72 | ### Give styles a name and add them to STYLES_PLST
73 | STYLES = {
74 | "default": PAPER,
75 | "paper": PAPER,
76 | }
77 |
78 | ### Keys are the styles, values are the keys of the styles
79 | STYLENAMES = {
80 | "plotastic": sorted(list(STYLES.keys())),
81 | "seaborn": ["white", "dark", "whitegrid", "darkgrid", "ticks"],
82 | "matplotlib": mpl.style.available,
83 | }
84 |
85 |
86 |
87 |
--------------------------------------------------------------------------------
/src/plotastic/plotting/rc_utils.py:
--------------------------------------------------------------------------------
1 | """Utilities for setting rcParams and styles"""
2 |
3 | # %%
4 | import matplotlib as mpl
5 | import matplotlib.pyplot as plt
6 | import seaborn as sns
7 |
8 | # import markurutils as ut
9 | import plotastic.utils.utils as ut
10 | from plotastic.dataanalysis.dataanalysis import DataAnalysis
11 | from plotastic.example_data.load_dataset import load_dataset
12 |
13 | import plotastic.plotting.rc as rc
14 |
15 |
16 | # %%
17 | def print_styles() -> str:
18 | print("\n".join([f"{k}:\n\t{v}" for k, v in rc.STYLENAMES.items()]))
19 |
20 |
21 | def set_rcParams(rcParams: dict):
22 | """Iterates through settings dictionary and applies them to
23 | matplotlib rcParams via mpl.rcParams [setting] = value.
24 |
25 | :param rcParams: _description_
26 | :type rcParams: dict
27 | """
28 | for setting, value in rcParams.items():
29 | mpl.rcParams[setting] = value
30 |
31 |
32 | def set_style(style: dict | str) -> None:
33 | """Checks if style is set by plotastic, if not checks if style is a
34 | dict with rcParams as keys and values, if not checks if style is a
35 | matplotlib style and mpl.style.use(style), if not uses seaborn styleplott
36 |
37 | :param style: _description_
38 | :type style: dict | str
39 | :raises ValueError: _description_
40 | :return: _description_
41 | :rtype: _type_
42 | """
43 |
44 | ### Set matplotlib settings
45 | if style in rc.STYLENAMES["plotastic"]:
46 | set_rcParams(rc.STYLES[style])
47 | elif isinstance(style, dict):
48 | set_rcParams(style)
49 | elif style in mpl.style.available:
50 | mpl.style.use(style)
51 | else:
52 | try:
53 | sns.set_style(style)
54 | except ValueError:
55 | m = [
56 | f"#! Style '{style}' not found. Choose one",
57 | f"from these: {print_styles()}",
58 | ]
59 | raise ValueError(" ".join(m))
60 |
61 |
62 | # %%
63 | def set_palette(palette: str | list = "Paired", verbose=True):
64 | """Sets the color palette.
65 |
66 | :param palette: _description_, defaults to "Paired"
67 | :type palette: str | list, optional
68 | :param verbose: _description_, defaults to True
69 | :type verbose: bool, optional
70 | """
71 | if verbose:
72 | pal = sns.color_palette(palette, 8).as_hex()
73 | print(f"#! You chose this color palette: {pal}")
74 | if ut.is_notebook():
75 | from IPython.display import display
76 |
77 | display(pal)
78 |
79 | # sns.set_theme(palette=palette) # !! resets rcParams
80 | mpl.rcParams["axes.prop_cycle"] = mpl.cycler(
81 | color=sns.color_palette(palette)
82 | )
83 |
--------------------------------------------------------------------------------
/src/plotastic/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/py.typed
--------------------------------------------------------------------------------
/src/plotastic/stat/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/stat/__init__.py
--------------------------------------------------------------------------------
/src/plotastic/stat/assumptions.py:
--------------------------------------------------------------------------------
1 | #
2 | # %%
3 |
4 | from typing import TYPE_CHECKING, NamedTuple #' SpherResults is a NamedTuple
5 |
6 | import pandas as pd
7 | import pingouin as pg
8 |
9 | # from plotastic.dimensions.dataframetool import DataFrameTool
10 | # from plotastic.stat.statresults import StatResults
11 | from plotastic.stat.stattest import StatTest
12 |
13 | # if TYPE_CHECKING:
14 | # from collections import namedtuple #' SpherResults is a NamedTuple
15 | # # from pingouin.distribution import SpherResults
16 |
17 | # %% Class Assumptions
18 |
19 |
20 | class Assumptions(StatTest):
21 | # == __init__=======================================================================
22 | def __init__(self, **dataframetool_kws):
23 | super().__init__(**dataframetool_kws)
24 |
25 | self.two_factor = True
26 |
27 | # ==
28 | # ==
29 | # == Normality =====================================================================
30 |
31 | def check_normality(
32 | self, method: str = "shapiro", **user_kwargs
33 | ) -> pd.DataFrame:
34 | """Check assumption of normality. If the assumption is violated, you should use
35 | non-parametric tests (e.g. Kruskal-Wallis, Mann-Whitney, Wilcoxon, etc.) instead
36 | of parametric tests (ANOVA, t-test, etc.).
37 |
38 |
39 | :param method: 'shapiro', 'jarque-bera' or 'normaltest', defaults to 'shapiro'
40 | :type method: str, optional
41 | :return: _description_
42 | :rtype: pd.DataFrame
43 | """
44 |
45 | ### Gather Arguments
46 | kwargs = dict(
47 | dv=self.dims.y,
48 | group=self.dims.x, # !! pingouin crashes without group, so we iterate without x
49 | method=method,
50 | )
51 | kwargs.update(user_kwargs) #' Add user kwargs
52 |
53 | ### Perform Test
54 | #' Iterate over rows, cols, hue
55 | #' Skip empty groups
56 | normDF_dict = {}
57 | # TODO: Use an iterator from hierarchical instead of one that omits x
58 | for key, df in self.data_iter__key_groups_skip_empty:
59 | #' key = (row, col, hue)
60 | normdf = pg.normality(df, **kwargs)
61 | #' Add n to seehow big group is.
62 | normdf["n"] = self.data_count_n_per_x(
63 | df
64 | ) #' -> Series with same length as normdf
65 |
66 | normDF_dict[key] = normdf
67 |
68 | normDF = pd.concat(
69 | normDF_dict, keys=normDF_dict.keys(), names=self.factors_all
70 | )
71 |
72 | ### Save Results
73 | self.results.DF_normality = normDF
74 |
75 | return normDF
76 |
77 | # ==
78 | # == Homoscedasticity ==============================================================
79 |
80 | def check_homoscedasticity(
81 | self, method: str = "levene", **user_kwargs
82 | ) -> pd.DataFrame:
83 | """Checks assumption of homoscedasticity. If the assumption is violated, the
84 | p-values from a t-test should be corrected with Welch's correction.
85 |
86 | :param method: 'levene' or 'bartlett', defaults to "levene"
87 | :type method: str, optional
88 | :return: _description_
89 | :rtype: pd.DataFrame
90 | """
91 |
92 | ### Gather Arguments
93 | kwargs = dict(
94 | dv=self.dims.y,
95 | group=self.dims.x, # !! required, homoscedasticity is measured over a list of groups
96 | method=method,
97 | )
98 | kwargs.update(user_kwargs) #' Add user kwargs
99 |
100 | ### Perform Test
101 | #' Iterate over rows, cols, and hue
102 | #' Skip empty groups
103 | homosced_dict = {}
104 | for key, df in self.data_iter__key_groups_skip_empty:
105 | #' key = (row, col, hue)
106 | homosced = pg.homoscedasticity(df, **kwargs)
107 | #' Add number of groups
108 | homosced["group count"] = self.data_count_groups_in_x(df)
109 | #' Add n to see how big groups are, make nested list to fit into single cell
110 | homosced["n per group"] = [self.data_count_n_per_x(df).to_list()]
111 |
112 | homosced_dict[key] = homosced
113 |
114 | homoscedDF = pd.concat(
115 | homosced_dict, keys=homosced_dict.keys(), names=self.factors_all
116 | )
117 |
118 | #
119 | ### Save Results
120 | self.results.DF_homoscedasticity = homoscedDF
121 |
122 | return homoscedDF
123 |
124 | # ==
125 | # == Spherecity ====================================================================
126 |
127 | @staticmethod
128 | def _spher_to_df(spher: NamedTuple) -> pd.DataFrame:
129 | """pingouin returns a strange SpherResults object (namedtuple?), we need to
130 | convert it to a dataframe.
131 |
132 | :param spher: Output of pg.sphericity()
133 | :type spher: pingouin.distribution.SpherResults, NamedTuple
134 | :return: Sphericity Result as DataFrame
135 | :rtype: pd.DataFrame
136 | """
137 |
138 | if isinstance(spher, tuple):
139 | spher_dict = dict(zip(["spher", "W", "chi2", "dof", "pval"], spher))
140 | spher_DF = pd.DataFrame(data=spher_dict, index=[0])
141 | else:
142 | spher_DF = pd.DataFrame(data=spher._asdict(), index=[0])
143 |
144 | return spher_DF
145 |
146 | def check_sphericity(
147 | self, method: str = "mauchly", **user_kwargs
148 | ) -> pd.DataFrame:
149 | """Checks assumption of sphericity. If the assumption is violated, the p-values
150 | of an RM-ANOVA should be corrected with Greenhouse-Geisser or Huynh-Feldt method
151 |
152 | :param method: 'mauchly' or 'jns', defaults to "mauchly"
153 | :type method: str, optional
154 | :return: _description_
155 | :rtype: pd.DataFrame
156 | """
157 | ### Make sure subject is specified
158 | if self.subject is None:
159 | raise ValueError(
160 | "Testing sphericity requires a subject to be specified."
161 | )
162 |
163 | # TODO: Add option to use x or hue as within-factors
164 | ### All
165 |
166 | ### Gather Arguments
167 | kwargs = dict(
168 | dv=self.dims.y,
169 | subject=self.subject,
170 | within=self.dims.x,
171 | method=method,
172 | )
173 | kwargs.update(user_kwargs) #' Add user kwargs
174 |
175 | ### Perform Test
176 | #' Iterate over rows, cols, and hue
177 | #' Skip empty groups
178 | spher_dict = {}
179 | for key, df in self.data_iter__key_groups_skip_empty:
180 | #' key = (row, col, hue)
181 | spher = pg.sphericity(df, **kwargs)
182 | #' Convert NamedTuple to DataFrame
183 | spherdf = self._spher_to_df(spher)
184 | #' Add number of groups
185 | spherdf["group count"] = self.data_count_groups_in_x(df)
186 | #' Add n to seehow big groups are
187 | spherdf["n per group"] = [self.data_count_n_per_x(df).to_list()]
188 |
189 | spher_dict[key] = spherdf
190 |
191 | spherDF = pd.concat(
192 | spher_dict, keys=spher_dict.keys(), names=self.factors_all_without_x
193 | )
194 |
195 | ### Save Results
196 | self.results.DF_sphericity = spherDF
197 |
198 | return spherDF
199 |
200 |
201 | # !! end class
202 | # !!
203 | # !!
204 |
205 |
206 | # #%%
207 | # from plotastic.example_data.load_dataset import load_dataset
208 | # DF, dims = load_dataset("fmri")
209 |
210 |
211 | # # %% plot
212 | # import seaborn as sns
213 |
214 | # sns.catplot(data=DF, **dims, kind="box")
215 |
216 | # # %% Check functionality with pingouin
217 |
218 | # pg.normality(DF, dv=dims["y"], group=dims["x"])
219 | # pg.homoscedasticity(DF, dv=dims["y"], group=dims["x"])
220 |
221 | # spher = pg.sphericity(DF, dv=dims["y"], subject="subject", within=dims["x"])
222 | # type(spher)
223 |
224 | # # %% create Assumptions object
225 |
226 | # DA = Assumptions(data=DF, dims=dims, subject="subject", verbose=True)
227 |
228 | # DA.check_normality()
229 | # DA.check_homoscedasticity()
230 | # DA.check_sphericity()
231 |
232 | # #%% Plot roughest facetting
233 |
234 | # sns.catplot(data=DF, x="timepoint")
235 |
236 | # # %% Use different set
237 |
238 |
239 | # DA2 = Assumptions(data=DF, dims=dict(x="timepoint", y="signal"),
240 | # subject="subject", verbose=True)
241 | # # DA2.catplot()
242 |
243 | # DA2.check_normality()
244 | # DA2.check_homoscedasticity()
245 | # DA2.check_sphericity()
246 |
--------------------------------------------------------------------------------
/src/plotastic/stat/bivariate.py:
--------------------------------------------------------------------------------
1 | # !!
2 |
3 | import pingouin as pg
4 |
5 | from plotastic.dimensions.dataframetool import DataFrameTool
6 |
7 | # %%
8 |
9 |
10 | class Bivariate(DataFrameTool):
11 | def __init__(self, **dataframetool_kws):
12 | super().__init__(**dataframetool_kws)
13 |
--------------------------------------------------------------------------------
/src/plotastic/stat/omnibus.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% imports
3 |
4 | import warnings
5 |
6 | import numpy as np
7 | import pandas as pd
8 | import pingouin as pg
9 |
10 | # import markurutils as ut
11 | import plotastic.utils.utils as ut
12 |
13 | from plotastic.stat.assumptions import Assumptions
14 |
15 | # %% Class Omnibus
16 |
17 |
18 | class Omnibus(Assumptions):
19 | # ==
20 | # == __init__ ======================================================================
21 | def __init__(self, **dataframetool_kws):
22 | super().__init__(**dataframetool_kws)
23 |
24 | # ==
25 | # == Helpers =======================================================================
26 |
27 | @staticmethod
28 | def _enhance_omnibus(DF: pd.DataFrame) -> pd.DataFrame:
29 | """Enhances the result DataFrame by adding additional columns
30 |
31 | :param DF: Result from omnibus_functions
32 | :type DF: pd.DataFrame
33 | :return: _description_
34 | :rtype: pd.DataFrame
35 | """
36 | ### Insert Star column right after "p-unc"
37 | stars = DF["p-unc"].apply(Omnibus._p_to_stars)
38 | DF.insert(DF.columns.get_loc("p-unc") + 1, "stars", stars)
39 |
40 | return DF
41 |
42 | def _ensure_more_than_one_sample_per_group(
43 | self,
44 | df: pd.DataFrame,
45 | facetkey: tuple = None,
46 | ) -> bool:
47 | """Returns an empty DataFrame if there are is only a single sample found within
48 | in all level combos. Useful to skip warning messages from pingouin.
49 |
50 | :param df: A facet of self.data
51 | :type df: pd.DataFrame
52 | :return: Either unchanged df or an empty DataFrame
53 | :rtype: pd.DtataFrame
54 | """
55 |
56 | ### Iterate through Sample groups within that facet
57 | results = []
58 | for levelkey, group in df.groupby(self.factors_xhue):
59 | if len(group) < 2:
60 | warnings.warn(
61 | f"Skipping facet {facetkey}, because there is only one sample in {levelkey}",
62 | RuntimeWarning,
63 | stacklevel=3, # ? Prints out function that calls this one (e.g. omnibus_anova) ?
64 | )
65 | results.append(False)
66 | else:
67 | results.append(True)
68 |
69 | ### Return True if all groups have more than one sample
70 | return all(results)
71 |
72 | # ==
73 | # == ANOVA =========================================================================
74 |
75 | def omnibus_anova(self, **user_kwargs) -> pd.DataFrame:
76 | """Performs an ANOVA (parametric, unpaired) on all facets of self.data
77 |
78 | :return: Result from pg.anova with row and column as MultiIndex
79 | :rtype: pd.DataFrame
80 | """
81 | ### Gather Arguments
82 | kwargs = dict(
83 | dv=self.dims.y,
84 | between=self.factors_xhue,
85 | detailed=True,
86 | )
87 | kwargs.update(user_kwargs) #' Add user kwargs
88 |
89 | ### Perform ANOVA
90 | #' Skip empty groups
91 | aov_dict = {}
92 | for key, df in self.data_iter__key_facet_skip_empty:
93 | #' key = (row, col)
94 | aov = pg.anova(
95 | df, **kwargs
96 | ) # ? Doesn't seem to print annoying warnings
97 | aov_dict[key] = aov
98 | aov_DF = pd.concat(
99 | aov_dict, keys=aov_dict.keys(), names=self.factors_rowcol_list
100 | )
101 |
102 | ### Add extra columns
103 | aov_DF = self._enhance_omnibus(aov_DF)
104 |
105 | ### Save Result
106 | self.results.DF_omnibus_anova = aov_DF
107 |
108 | return aov_DF
109 |
110 | # ==
111 | # == RMANOVA =======================================================================
112 |
113 | def _omnibus_rm_anova_base(
114 | self,
115 | df: pd.DataFrame,
116 | facetkey: tuple,
117 | **kwargs,
118 | ) -> pd.DataFrame:
119 | """Handles Warnings of pg.rm_anova
120 |
121 | :param df: A facet of self.data
122 | :type df: pd.DataFrame
123 | :param facetkey: The key of the facet. Needed for warnings
124 | :type facetkey: tuple
125 | :return: Result from pg.rm_anova or empty DataFrame if there is only one sample
126 | :rtype: pd.DataFrame
127 | """
128 | ### Warn if there is only one sample in a group
129 | self._ensure_more_than_one_sample_per_group(df, facetkey)
130 |
131 | ### Perform RMANOVA
132 | # !! Pingouin slams you with warnings in a big loop
133 | # !! Trying best to redirect special cases, but still too many warnings
134 | with warnings.catch_warnings():
135 | warnings.simplefilter("ignore")
136 | rmaov = pg.rm_anova(df, **kwargs)
137 |
138 | return rmaov
139 |
140 | def omnibus_rm_anova(
141 | self,
142 | **user_kwargs,
143 | ) -> pd.DataFrame:
144 | """Performs a repeated measures ANOVA (parametric, paired) on all facets of
145 | self.data
146 |
147 |
148 | :return: Result from pg.rm_anova with row and column as MultiIndex
149 | :rtype: pd.DataFrame
150 | """
151 | ### Gather Arguments
152 | kwargs = dict(
153 | dv=self.dims.y,
154 | subject=self.subject,
155 | within=self.factors_xhue,
156 | detailed=True,
157 | )
158 | kwargs.update(user_kwargs) #' Add user kwargs
159 |
160 | ### Perform RMANOVA
161 | #' Skip empty groups
162 | rmaov_dict = {}
163 | for key, df in self.data_iter__key_facet_skip_empty:
164 | #' key = (row, col)
165 | rmaov = self._omnibus_rm_anova_base(df, facetkey=key, **kwargs)
166 | rmaov_dict[key] = rmaov
167 | rmaov_DF = pd.concat(
168 | rmaov_dict, keys=rmaov_dict.keys(), names=self.factors_rowcol_list
169 | )
170 | ### Add extra columns
171 | rmaov_DF = self._enhance_omnibus(rmaov_DF)
172 |
173 | ### Save Result
174 | self.results.DF_omnibus_rmanova = rmaov_DF
175 |
176 | return rmaov_DF
177 |
178 | # ==
179 | # == Kruskal-Wallis ================================================================
180 |
181 | def omnibus_kruskal(self, **user_kwargs) -> pd.DataFrame:
182 | """Performs a Kruskal-Wallis test (non-parametric, unpaired) on all facets of
183 | self.data
184 |
185 |
186 | :return: Result from pg.kruskal with row and column as MultiIndex
187 | :rtype: pd.DataFrame
188 | """
189 | ### Gather Arguments
190 | kwargs = dict(
191 | dv=self.dims.y,
192 | between=self.dims.x,
193 | detailed=True,
194 | )
195 | kwargs.update(user_kwargs) #' Add user kwargs
196 |
197 | ### Perform Kruskal-Wallis
198 | #' pg.Kruskal takes only a single factor
199 | #' Skip empty groups
200 | kruskal_dict = {}
201 | for key, df in self.data_iter__key_groups_skip_empty:
202 | #' key = (row, col, hue)
203 | kruskal = pg.kruskal(df, **kwargs)
204 | kruskal_dict[key] = kruskal
205 | kruskal_DF = pd.concat(
206 | kruskal_dict,
207 | keys=kruskal_dict.keys(),
208 | names=self.factors_all_without_x,
209 | )
210 | ### Add extra columns
211 | kruskal_DF = self._enhance_omnibus(kruskal_DF)
212 |
213 | ### Save Result
214 | self.results.DF_omnibus_kruskal = kruskal_DF
215 |
216 | return kruskal_DF
217 |
218 | # ==
219 | # == Friedman ======================================================================
220 |
221 | def omnibus_friedman(self, **user_kwargs) -> pd.DataFrame:
222 | """Performs a Friedman test (non-parametric, paired) on all facets of self.data
223 |
224 | :return: Result from pg.friedman with row and column as MultiIndex
225 | :rtype: pd.DataFrame
226 | """
227 | ### Gather Arguments
228 | kwargs = dict(
229 | dv=self.dims.y,
230 | subject=self.subject,
231 | within=self.dims.x,
232 | # detailed=True, # !! pg.friedman doesn't have this option
233 | )
234 | kwargs.update(user_kwargs) #' Add user kwargs
235 |
236 | ### Perform Friedman
237 | #' pg.friedman takes only a single factor
238 | #' Skip empty groups
239 | friedman_dict = {}
240 | for key, df in self.data_iter__key_groups_skip_empty:
241 | #' key = (row, col, hue)
242 | friedman = pg.friedman(df, **kwargs)
243 | friedman_dict[key] = friedman
244 | friedman_DF = pd.concat(
245 | friedman_dict,
246 | keys=friedman_dict.keys(),
247 | names=self.factors_all_without_x,
248 | )
249 | ### Add extra columns
250 | friedman_DF = self._enhance_omnibus(friedman_DF)
251 |
252 | ### Save Result
253 | self.results.DF_omnibus_friedman = friedman_DF
254 |
255 | return friedman_DF
256 |
257 |
258 | # !!
259 | # !! end class
260 |
261 | # %% Test Omnibus
262 |
263 | if __name__ == "__main__":
264 | from plotastic.example_data.load_dataset import load_dataset
265 |
266 | DF, dims = load_dataset("fmri")
267 | DF, dims = load_dataset("qpcr")
268 |
269 | # %% CHECK pingouin ANOVA
270 | kwargs = dict(data=DF, dv=dims["y"], detailed=True)
271 |
272 | aov = pg.anova(between=[dims["x"], dims["hue"]], **kwargs)
273 | rmaov = pg.rm_anova(
274 | within=[dims["x"], dims["hue"]], subject="subject", **kwargs
275 | )
276 | kruskal = pg.kruskal(between=dims["hue"], **kwargs)
277 |
278 | # %% Make DataAnalysis
279 |
280 | DA = Omnibus(data=DF, dims=dims, subject="subject", verbose=True)
281 |
282 | # %% There's a problem with the Data: Only 1 sample in MMP and MACS
283 |
284 | ### Sort by xhue
285 | df2 = DF[(DF["class"] == "MMPs") & (DF["method"] == "MACS")].sort_values(
286 | ["gene", "fraction"]
287 | )
288 | len(df2) #' 24
289 | levelkeys2 = df2.set_index([dims["x"], dims["hue"]]).index.unique()
290 | DA._ensure_more_than_one_sample_per_group(df2)
291 | # DA._plot_dendrogram_from_levelkeys(levelkeys2)
292 |
293 | pg.rm_anova(
294 | data=df2,
295 | dv=dims["y"],
296 | within=[dims["x"], dims["hue"]],
297 | detailed=True,
298 | subject="subject",
299 | )
300 | #
301 |
302 | # %% Check stuff
303 |
304 | aov = DA.omnibus_anova()
305 | rmaov = DA.omnibus_rm_anova()
306 | kruskal = DA.omnibus_kruskal()
307 | friedman = DA.omnibus_friedman()
308 |
309 | # %% Check Kruskal
310 |
--------------------------------------------------------------------------------
/src/plotastic/stat/posthoc.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% Import Modules
3 | # import markurutils as ut
4 | import plotastic.utils.utils as ut
5 |
6 | import numpy as np
7 | import pandas as pd
8 |
9 | # print(pd.__version__)
10 | import pingouin as pg
11 |
12 |
13 | from plotastic.stat.assumptions import Assumptions
14 |
15 |
16 | # %% Class PostHoc
17 |
18 |
19 | class PostHoc(Assumptions):
20 | DEFAULT_KWS_PAIRWISETESTS = dict(
21 | nan_policy="pairwise", #' Delete only pairs or complete subjects ("listwise") if sasmples are missing?
22 | return_desc=True, #' Return descriptive statistics?
23 | correction="auto", #' Use welch correction if variances unequal?
24 | )
25 |
26 | # == __init__ ======================================================================
27 | def __init__(self, **dataframetool_kws):
28 | super().__init__(**dataframetool_kws)
29 |
30 | #
31 | #
32 | # == Base function =================================================================
33 |
34 | @ut.ignore_warnings
35 | def _base_pairwise_tests(self, **kwargs) -> pd.DataFrame:
36 | """Performs pairwise tests for a facet of self.data"""
37 |
38 | ### Turn around hue and x for between or within argument
39 | if self.dims.hue:
40 | kwargs_2 = kwargs.copy()
41 | if "within" in kwargs:
42 | kwargs_2["within"] = list(reversed(kwargs["within"]))
43 | elif "between" in kwargs:
44 | kwargs_2["between"] = list(reversed(kwargs["between"]))
45 |
46 | ### Perform Test
47 | #' Iterate over rows and columns
48 | PH_dict = {}
49 |
50 | #' Skip empty so that no empty groups of level combinations are artificially added
51 | for key, df in self.data_iter__key_facet_skip_empty:
52 | # print(key)
53 | # ut.pp(df)
54 |
55 | # for key in self.levelkeys_rowcol:
56 | # df = self.data_dict_skip_empty[key]
57 |
58 | if (
59 | self.dims.hue
60 | ): #' Perform twice with x and hue turned around (= huex)
61 | ph_xhue = pg.pairwise_tests(data=df, **kwargs)
62 | ph_huex = pg.pairwise_tests(data=df, **kwargs_2)
63 | PH_dict[key] = ph_xhue.merge(ph_huex, how="outer")
64 | else: #' perform once with x
65 | ph_x = pg.pairwise_tests(data=df, **kwargs)
66 | PH_dict[key] = ph_x
67 |
68 | PH = pd.concat(
69 | PH_dict, keys=PH_dict.keys(), names=self.factors_rowcol_list
70 | )
71 |
72 | return PH
73 |
74 | #
75 | # == Pairwise TESTs ================================================================
76 |
77 | def test_pairwise(
78 | self,
79 | paired=None,
80 | parametric=True,
81 | subject=None,
82 | # only_contrast=False,
83 | **user_kwargs,
84 | ) -> pd.DataFrame:
85 | """Interface that sorts arguments, executes pairwise tests and adds extra features to PH table"""
86 |
87 | ### Gather Arguments
88 | kwargs = dict(
89 | dv=self.dims.y,
90 | parametric=parametric,
91 | nan_policy="pairwise",
92 | )
93 | #' Paired or unpaired
94 | if paired is None and self.subject:
95 | paired = True
96 | if paired:
97 | assert (self.subject is not None) or (
98 | subject is not None
99 | ), "No subject column specified"
100 | kwargs["within"] = self.factors_xhue
101 | kwargs["subject"] = self.subject if self.subject else subject
102 | else:
103 | kwargs["between"] = self.factors_xhue
104 | #' Add user kwargs
105 | kwargs.update(self.DEFAULT_KWS_PAIRWISETESTS)
106 | kwargs.update(user_kwargs)
107 |
108 | #' Make sure the specified factors are present
109 | if "within" in kwargs:
110 | assert all(
111 | [
112 | f in self.factors_all
113 | for f in ut.ensure_list(kwargs["within"])
114 | ]
115 | ), f"Argument 'within' contains unknown columns ({kwargs['within']} should be like one of {self.factors_all}"
116 | if "between" in kwargs:
117 | assert all(
118 | [
119 | f in self.factors_all
120 | for f in ut.ensure_list(kwargs["between"])
121 | ]
122 | ), f"Argument 'between' contains unknown columns {kwargs['between']} should be like one of {self.factors_all}"
123 |
124 | ### Make PH table
125 | PH = self._base_pairwise_tests(**kwargs)
126 | PH = self._enhance_PH(
127 | PH,
128 | # only_contrast=only_contrast,
129 | )
130 |
131 | ### Save result
132 | self.results.DF_posthoc = PH
133 |
134 | return PH
135 |
136 | def _enhance_PH(
137 | self,
138 | PH: pd.DataFrame,
139 | # only_contrast=False,
140 | ) -> pd.DataFrame:
141 | ### Define Alpha
142 | alpha = self.ALPHA
143 | alpha_tolerance = self.ALPHA_TOLERANCE
144 |
145 | ### Define column that contains p-values
146 | # pcol = "p-unc" if padjust in ("none", None) else "p-corr"
147 |
148 | ### EDIT PH
149 | PH = PH.reset_index(
150 | drop=False
151 | ) #' drop is default false, but put it explicitly here
152 |
153 | #' Add Stars
154 | PH["**p-unc"] = PH["p-unc"].apply(self._p_to_stars, alpha=alpha)
155 | if "p-corr" in PH.columns:
156 | PH["**p-corr"] = PH["p-corr"].apply(self._p_to_stars, alpha=alpha)
157 |
158 | #' Make Column for categorizing significance
159 | PH["Sign."] = pd.cut(
160 | PH["p-unc"],
161 | bins=[0, alpha, alpha_tolerance, 1],
162 | labels=["signif.", "toler.", False],
163 | )
164 | if "p-corr" in PH.columns:
165 | PH["Sign."] = pd.cut(
166 | PH["p-corr"],
167 | bins=[0, alpha, alpha_tolerance, 1],
168 | labels=["signif.", "toler.", False],
169 | )
170 |
171 | #' Make pairs
172 | PH["pairs"] = PH.apply(self._level_to_pair, axis=1)
173 |
174 | # ### Check contrast
175 | # #' Optionally remove non-contrast comparisons
176 | # if only_contrast and self.dims.hue:
177 | # PH = PH[
178 | # PH["Contrast"].str.contains("*", regex=False)
179 | # ] # <<<< OVERRRIDE PH, REMOVE NON-CONTRAST ROWS
180 |
181 | #' Show if the pair crosses x or hue boundaries
182 | if self.dims.hue:
183 | PH["cross"] = PH.apply(self._detect_xhue_crossing, axis=1)
184 | else:
185 | PH["cross"] = "x"
186 |
187 | ### Set index
188 | PH = ut.drop_columns_by_regex(PH, "level_\d")
189 | if self.dims.hue:
190 | PH = PH.set_index(
191 | self.factors_rowcol_list + [self.dims.hue, "Contrast"]
192 | )
193 | else:
194 | PH = PH.set_index(self.factors_rowcol_list + ["Contrast"])
195 |
196 | return PH
197 |
198 | # == Pairing functions =============================================================
199 |
200 | def _level_to_pair(self, row: "pd.Series") -> tuple:
201 | """converts the factor-columns of a posthoc table into a column of pairs"""
202 |
203 | ### See if there are multiple factors
204 | phInteract = " * " in row["Contrast"]
205 |
206 | if not phInteract:
207 | return row["A"], row["B"]
208 | else:
209 | levels = row[[self.dims.hue, self.dims.x]].tolist()
210 | if pd.notna(
211 | levels[0]
212 | ): # switch column if NaN, also check: if not math.isnan(factor)
213 | lvl = levels[0]
214 | pair = ((row["B"], lvl), (row["A"], lvl))
215 | else:
216 | lvl = levels[1]
217 | pair = ((lvl, row["B"]), (lvl, row["A"]))
218 | return pair
219 |
220 | @staticmethod
221 | def _detect_xhue_crossing(row: "pd.Series") -> str:
222 | """
223 | Detects if a pair ((DCN, F2), (DCN, F1)) is crossing x or hue boundaries
224 | :param row:
225 | :return:
226 | """
227 |
228 | """crossing Hue: ((x, hue1), (x, hue2))"""
229 | """crossing X: ((x1, hue), (x2, hue))"""
230 |
231 | ### See if there are multiple factors
232 | phInteract = " * " in row["Contrast"]
233 |
234 | if not phInteract:
235 | return "x"
236 | else:
237 | cross = np.nan
238 | pair = row["pairs"]
239 | if pair[0][0] == pair[1][0]:
240 | cross = "hue"
241 | if pair[0][1] == pair[1][1]:
242 | cross = "x"
243 | return cross
244 |
245 |
246 | # %% Import data and make PostHoc object
247 |
248 |
249 | # DF, dims = plst.load_dataset("fmri")
250 |
251 | # PH = PostHoc(data=DF, dims=dims, verbose=False, subject="subject")
252 |
253 |
254 | # %% Check functionality of pingouin
255 |
256 | # # !! Raises TypeError: Could not convert value 'cuestim' to numeric. This didn't happen before changing to new environment.
257 | # # !! Downgraded pandas from 2.0.3 (released april 2023) to 1.5.3 -> FIXED IT
258 | # ph = pg.pairwise_tests(data=DF, dv="signal", within=["timepoint", "event"], subject="subject", parametric=True, padjust="bonf", nan_policy="pairwise")
259 |
260 | # %% test with pingouin
261 |
262 | # ph = PH.test_pairwise(
263 | # # dv="signal",
264 | # # between=[dims["x"], dims["hue"]],
265 | # # within=[dims["x"], dims["hue"]],
266 | # # between=dims["col"],
267 | # # subject="subject",
268 | # parametric=True,
269 | # padjust="bh",
270 | # nan_policy="pairwise",
271 | # )
272 |
273 | # ut.pp(ph[ph["Sign."].isin(["signif."])]).head(70)
274 |
275 | # %%
276 |
--------------------------------------------------------------------------------
/src/plotastic/stat/statresults.py:
--------------------------------------------------------------------------------
1 | # %% Imports
2 | from typing import TYPE_CHECKING
3 |
4 | from pathlib import Path
5 |
6 | import pandas as pd
7 |
8 | if TYPE_CHECKING:
9 | import pandas as pd
10 |
11 | # %% class StatResults
12 |
13 |
14 | class StatResults:
15 | # ==
16 | # == DEFAULTS ======================================================================
17 | # fmt: off
18 | DEFAULT_UNCHECKED = "NOT CHECKED" #' If ASSUMPTION not tested,
19 | DEFAULT_UNTESTED = "NOT TESTED" #' If statistical test not tested (posthoc, omnibus)
20 | DEFAULT_UNASSESSED = "NOT ASSESSED" #' If not
21 | # fmt: on
22 |
23 | # ==
24 | # == INIT ==========================================================================
25 | def __init__(self):
26 | ### Data Tables
27 | self.DF_normality: pd.DataFrame | str = self.DEFAULT_UNCHECKED
28 | self.DF_homoscedasticity: pd.DataFrame | str = self.DEFAULT_UNCHECKED
29 | self.DF_sphericity: pd.DataFrame | str = self.DEFAULT_UNCHECKED
30 |
31 | self.DF_omnibus_anova: pd.DataFrame | str = self.DEFAULT_UNTESTED
32 | self.DF_omnibus_rmanova: pd.DataFrame | str = self.DEFAULT_UNTESTED
33 | self.DF_omnibus_kruskal: pd.DataFrame | str = self.DEFAULT_UNTESTED
34 | self.DF_omnibus_friedman: pd.DataFrame | str = self.DEFAULT_UNTESTED
35 | self.DF_posthoc: pd.DataFrame | str = self.DEFAULT_UNTESTED
36 | self.DF_bivariate: pd.DataFrame | str = self.DEFAULT_UNTESTED
37 |
38 | ### Assessments = Summarizing results from multiple groups
39 | self._normal: bool | str = self.DEFAULT_UNASSESSED
40 | self._homoscedastic: bool | str = self.DEFAULT_UNASSESSED
41 | self._spherical: bool | str = self.DEFAULT_UNASSESSED
42 |
43 | self._parametric: bool | str = self.DEFAULT_UNASSESSED
44 |
45 | # ==
46 | # == Summarize Results =============================================================
47 |
48 | @property
49 | def as_dict(self) -> dict:
50 | d = dict(
51 | ### Assumptions
52 | normality=self.DF_normality,
53 | homoscedasticity=self.DF_homoscedasticity,
54 | sphericity=self.DF_sphericity,
55 | ### Omnibus
56 | anova=self.DF_omnibus_anova,
57 | rm_anova=self.DF_omnibus_rmanova,
58 | kruskal=self.DF_omnibus_kruskal,
59 | friedman=self.DF_omnibus_friedman,
60 | ### Posthoc
61 | posthoc=self.DF_posthoc,
62 | ### Bivariate
63 | bivariate=self.DF_bivariate,
64 | )
65 |
66 | ### Remove untested
67 | d = {k: v for k, v in d.items() if not isinstance(v, str)}
68 |
69 | return d
70 |
71 | def __iter__(self) -> tuple[str, pd.DataFrame]:
72 | for test_name, DF in self.as_dict.items():
73 | yield test_name, DF
74 |
75 | # ==
76 | # == GETTERS AND SETTERS ===========================================================
77 |
78 | @property
79 | def normal(self):
80 | if self._normal == self.DEFAULT_UNASSESSED:
81 | self._normal = self.assess_normality()
82 | return self._normal
83 |
84 | @normal.setter
85 | def normal(self, value: bool):
86 | print(f"#! Defining normality as {value}!")
87 | self._normal = value
88 |
89 | @property
90 | def parametric(self):
91 | if self._parametric == self.DEFAULT_UNASSESSED:
92 | self._parametric = self.assess_parametric()
93 | return self._parametric
94 |
95 | @parametric.setter
96 | def parametric(self, value: bool):
97 | print(f"#! Defining parametric as {value}!")
98 | self._parametric = value
99 |
100 | # ==
101 | # == ASSESS ASSUMPTIONS ============================================================
102 |
103 | def assess_normality(self, data) -> bool:
104 | """Uses result from normality test for each group and decides if data should be considered normal or not"""
105 | assert (
106 | self.DF_normality is not self.DEFAULT_UNCHECKED
107 | ), "Normality not tested yet"
108 | raise NotImplementedError
109 | self.normal = stats.normaltest(data)[1] > 0.05
110 |
111 | def assess_parametric(self):
112 | """Uses results from normality, homoscedasticity and sphericity tests to decide if parametric tests should be used"""
113 | self.parametric = self.normal and self.homoscedastic and self.spherical
114 | return self.parametric
115 |
116 | # ==
117 | # == EXPORT ========================================================================
118 |
119 | def save(self, fname: str | Path = "plotastic_results", verbose=True) -> None:
120 | """Exports all statistics to one excel file. Different sheets for different
121 | tests
122 |
123 | :param out: Path to save excel file, optional (default="")
124 | :type out: str, optional
125 | """
126 | ### Construct output path
127 | fname = Path(fname).with_suffix(".xlsx")
128 |
129 | ### Init writer for multiple sheets
130 | writer = pd.ExcelWriter(fname, engine="xlsxwriter")
131 | workbook = writer.book
132 |
133 | ### Iterate through results
134 | for test_name, DF in self.as_dict.items():
135 | worksheet = workbook.add_worksheet(test_name) #' Make sheet
136 | writer.sheets[test_name] = worksheet #' Add sheet name to writer
137 | DF.to_excel(writer, sheet_name=test_name) #' # Write DF to sheet
138 |
139 | ### Save
140 | writer.close()
141 |
142 | ### Tell save location
143 | if verbose:
144 | print(f"Saved results to {fname.resolve()}")
145 |
146 |
147 | # !!
148 | # !! end class
149 |
150 | # %% test it
151 | # if __name__ == "__main__":
152 |
153 | # # %% Load Data, make DA, fill it with stuff
154 | # from plotastic.example_data.load_dataset import load_dataset
155 | # DF, dims = load_dataset("qpcr")
156 | # # DA = DataAnalysis(DF, dims)
157 | # # DA.test_pairwise()
158 |
--------------------------------------------------------------------------------
/src/plotastic/stat/stattest.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | from plotastic.dimensions.dataintegrity import DataIntegrity
5 | from plotastic.stat.statresults import StatResults
6 |
7 |
8 | class StatTest(DataIntegrity):
9 | # == Class Attribute ===============================================================
10 |
11 | #' Alpha
12 | ALPHA = 0.05 # TODO Why class variable?
13 | #' Alpha Tolerance: Will still print out result if it nearly crossed alpha level.
14 | ALPHA_TOLERANCE = 0.075
15 |
16 | @classmethod
17 | def set_alpha(cls, value: float) -> None:
18 | cls.ALPHA = value
19 |
20 | @classmethod
21 | def set_alpha_tolerance(cls, value: float) -> None:
22 | cls.ALPHA_TOLERANCE = value
23 |
24 | #
25 | #
26 | # == __init__=======================================================================
27 | def __init__(
28 | self,
29 | single_factor: bool | str = False,
30 | **dataframetool_kwargs,
31 | ):
32 | super().__init__(**dataframetool_kwargs)
33 |
34 | ### Singl-Factor Mode
35 | #' Default is two-factor analysis
36 | # TODO: Add single-factor mode to each funtion
37 | assert single_factor in [
38 | "hue",
39 | "col",
40 | False,
41 | ], f"#! single_factor must be 'hue' or 'col', not {single_factor}"
42 | self.single_factor = single_factor
43 |
44 | ### Composition
45 | self.results = StatResults()
46 |
47 | #
48 | #
49 | # == Helper functions ==============================================================
50 |
51 | @staticmethod
52 | def _p_to_stars(fl: float, alpha=0.05):
53 | # if mpl.get_backend() == "module://mplcairo.macosx":
54 | # s = "★"
55 | # else:
56 | # s= "*"
57 | s = "*"
58 | # s = "★"
59 |
60 | assert type(alpha) in [
61 | float,
62 | ], f"#! Alpha was type{alpha}, float required"
63 | a = alpha
64 | # use other stars ☆ ★ ★ ★ ٭★☆✡✦✧✩✪✫✬✭✮✯✰✵✶✷✸✹⭑⭒✴︎
65 | if a / 1 < fl:
66 | stars = "ns"
67 | elif a / 1 >= fl > a / 5:
68 | stars = s
69 | elif a / 5 >= fl > a / 50:
70 | stars = s * 2
71 | elif a / 50 >= fl > a / 500:
72 | stars = s * 3
73 | elif a / 500 >= fl:
74 | stars = s * 4
75 | else:
76 | stars = float("NaN")
77 |
78 | # display p-values that are between 0.05-0.06 not as stars, but show them
79 | if a * 1.4 >= fl > a: # -0.01
80 | stars = round(fl, 3) # Report p-values if they're 0.05 -0.06.
81 |
82 | return stars
83 |
84 | @staticmethod
85 | def _effectsize_to_words(fl: float, t=(0.01, 0.06, 0.14, 0.5)):
86 | if t[0] > fl:
87 | effectSize = "No Effect"
88 | elif t[0] <= fl < t[1]:
89 | effectSize = "Small"
90 | elif t[1] <= fl < t[2]:
91 | effectSize = "Medium"
92 | elif t[2] <= fl < t[3]:
93 | effectSize = "Large"
94 | elif t[3] <= fl:
95 | effectSize = "Huge"
96 | else:
97 | effectSize = float("NaN")
98 | return effectSize
99 |
--------------------------------------------------------------------------------
/src/plotastic/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/utils/__init__.py
--------------------------------------------------------------------------------
/src/plotastic/utils/subcache.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% Imports
3 |
4 |
5 | from typing import Callable, List
6 |
7 | import os
8 | from pathlib import Path
9 |
10 | from icecream import ic
11 |
12 | from joblib import Memory
13 |
14 | # from plotastic.utils import utils as ut
15 |
16 |
17 | class SubCache(Memory):
18 | """Expands the joblib.Memory class with some useful methods.
19 | -
20 | - List directories within cache
21 | - List objects within cache
22 | - Adds subcache attribute, with benefits:
23 | - Subcache replaces module name in cache directory
24 | - More control over cache directories
25 | - Persistent caching, since IPythond passes a new location to
26 | joblib each time the Memory object is initialized
27 | - Doesn't work right if two SubCache Objects point cache the same function
28 | """
29 |
30 | def __init__(
31 | self, subcache_dir: str, assert_parent: str = None, *args, **kwargs
32 | ):
33 | super().__init__(*args, **kwargs)
34 |
35 | ### Subfolder of location, overrides default subfolder by joblib
36 | self.subcache_dir = subcache_dir
37 |
38 | ### self.location/joblib/subcache
39 | self.subcache_path = os.path.join(
40 | self.location, "joblib", self.subcache_dir
41 | )
42 |
43 | ### Prevent joblib folders being created by wrong Interactive Windows
44 | if not assert_parent is None:
45 | parent_full = Path(self.location).absolute()
46 | parent = os.path.split(parent_full)[-1]
47 | assert (
48 | parent == assert_parent
49 | ), f"When Initializing joblib.Memory, we expected cache to be in {assert_parent}, but we ended up in {parent_full}"
50 |
51 | def list_dirs(
52 | self, detailed: bool = False, max_depth: int = 3
53 | ) -> List[str]:
54 | """
55 | Returns a list of cache directories.
56 |
57 | :param detailed: if True, returns all cache directories with
58 | full paths. Default is False.
59 | :type detailed: bool, optional
60 | :param max_depth: The maximum depth to search for cache
61 | directories. Default is 4.
62 | :type max_depth: int, optional
63 | :return: List[str], a list of cache directories.
64 | """
65 |
66 | subcache = self.subcache_path
67 |
68 | location_subdirs = []
69 |
70 | ### Recursive walking
71 | for root, dirs, _files in os.walk(subcache):
72 | #' Don't go too deep: 'joblib/plotastic/example_data/load_dataset/load_dataset',
73 | depth = root[len(subcache) :].count(os.sep)
74 | if not detailed and depth > max_depth:
75 | continue
76 | for dir in dirs:
77 | #' Don't need to check for 'joblib' because it's not a subdirectory of cache_dir
78 | #' Exclude subdirectories like "c1589ea5535064b588b2f6922e898473"
79 | if len(dir) >= 32 or dir == "joblib":
80 | continue
81 | #' Return every path completely
82 | if detailed:
83 | location_subdirs.append(os.path.join(root, dir))
84 | else:
85 | dir_path = os.path.join(root, dir)
86 | dir_path = dir_path.replace(subcache, "")
87 | if dir_path.startswith("/"):
88 | dir_path = dir_path[1:]
89 | location_subdirs.append(dir_path)
90 | return location_subdirs
91 |
92 | def list_objects(self):
93 | """Return the list of inputs and outputs from `mem` (joblib.Memory
94 | cache)."""
95 |
96 | objects = []
97 |
98 | for item in self.store_backend.get_items():
99 | path_to_item = os.path.split(
100 | os.path.relpath(item.path, start=self.store_backend.location)
101 | )
102 | result = self.store_backend.load_item(path_to_item)
103 | input_args = self.store_backend.get_metadata(path_to_item).get(
104 | "input_args"
105 | )
106 | objects.append((input_args, result))
107 | return objects
108 |
109 | def subcache(self, f: Callable, **mem_kwargs) -> Callable:
110 | """Cache it in a persistent manner, since Ipython passes a new
111 | location to joblib each time the Memory object is initialized
112 | """
113 | f.__module__ = self.subcache_dir
114 | f.__qualname__ = f.__name__
115 |
116 | return self.cache(f, **mem_kwargs)
117 |
118 |
119 | if __name__ == "__main__":
120 | home = os.path.join(
121 | os.path.expanduser("~"),
122 | ".cache",
123 | )
124 |
125 | def sleep(seconds):
126 | import time
127 |
128 | time.sleep(seconds)
129 |
130 | MEM = SubCache(location=home, subcache_dir="plotastic", verbose=True)
131 |
132 | sleep = MEM.subcache(sleep)
133 | # %%
134 | ### First time slow, next time fast
135 | sleep(1.4)
136 | # %%
137 | MEM.list_dirs()
138 | # %%
139 | MEM.clear()
140 |
141 | # %%
142 | ### Using different cache allows clearance of only that cache
143 | MEM2 = SubCache(location=home, subcache_dir="plotic2", verbose=True)
144 |
145 | def slep(seconds):
146 | import time
147 |
148 | time.sleep(seconds)
149 |
150 | sleep_cached2 = MEM2.subcache(slep)
151 | sleep_cached2(1.4)
152 | # %%
153 | MEM2.list_dirs()
154 | # %%
155 | MEM2.clear()
156 |
--------------------------------------------------------------------------------
/testing/make_htmlcov.sh:
--------------------------------------------------------------------------------
1 |
2 |
3 | ### From within project root
4 | #' sadly coveragerc can't be in a different directory
5 | pytest tests -n 3 --cov --cov-report html:testing/htmlcov --cov-config .coveragerc
6 |
--------------------------------------------------------------------------------
/tests/DA_configs.py:
--------------------------------------------------------------------------------
1 | """Utilities for testing plotastic. Contains lists of arguments"""
2 |
3 | # %% imports
4 |
5 | from typing import Callable
6 |
7 | import os
8 | import warnings
9 | from glob import glob
10 |
11 |
12 | import pandas as pd
13 |
14 | import matplotlib.pyplot as plt
15 |
16 | import plotastic as plst
17 |
18 | # import plotastic.utils.utils as ut
19 | # import plotastic.utils.cache as utc
20 | from plotastic.utils.subcache import SubCache
21 |
22 | # %%
23 | # ### Cache it to speed up
24 | # # !! Execute from within plotastic folder!
25 | # # !! Makes a wrong cache folder if executed with interactive mode
26 | # MEMORYDA_configs = SubCache(
27 | # location="../testing", #' Passed to joblib.Memory
28 | # assert_parent="testing",
29 | # subcache_dir="DA_configs", #' subfolder of location, overrides joblib
30 | # verbose=True,
31 | # )
32 | ### Clear cache if needed
33 | # MEMORY_TESTCONFIGS.clear()
34 |
35 |
36 | # %%
37 | # == Load Datasets =====================================================
38 | #' Source of files is seaborn, markurutils just adds cut column
39 |
40 | ### Cache function
41 | # load_dataset = MEMORY_DA_CONFIGS.subcache(plst.load_dataset)
42 |
43 | ### Load datasets
44 | DF_tips, dims_tips = plst.load_dataset("tips", verbose=False)
45 | DF_fmri, dims_fmri = plst.load_dataset("fmri", verbose=False)
46 | DF_qpcr, dims_qpcr = plst.load_dataset("qpcr", verbose=False)
47 |
48 |
49 | # %%
50 | # == Arguments for plst.DataAnalysis ===================================
51 | #' Facet data such that it leads to presence of absence of empty groups
52 |
53 |
54 | ### Empty groups
55 | dims_withempty_tips = [
56 | dict(y="tip", x="day", hue="sex", col="smoker", row="time"),
57 | dict(y="tip", x="sex", hue="day", col="smoker", row="time"),
58 | dict(y="tip", x="sex", hue="day", col="time", row="smoker"),
59 | dict(y="tip", x="sex", hue="day", col="time"),
60 | dict(y="tip", x="sex", hue="day", row="time"),
61 | dict(y="tip", x="sex", hue="day", row="size-cut"),
62 | dict(y="tip", x="sex", hue="day"),
63 | dict(y="tip", x="sex"),
64 | dict(y="tip", x="size-cut"),
65 | ]
66 |
67 |
68 | ### Don't make empty groups
69 | dims_noempty_tips = [
70 | dict(y="tip", x="size-cut", hue="smoker", col="sex", row="time"),
71 | dict(y="tip", x="size-cut", hue="smoker", col="sex"),
72 | dict(y="tip", x="size-cut", hue="smoker"),
73 | dict(y="tip", x="size-cut"),
74 | ]
75 |
76 | dims_noempty_fmri = [
77 | dict(y="signal", x="timepoint", hue="event", col="region"),
78 | dict(y="signal", x="timepoint", hue="region", col="event"),
79 | dict(y="signal", x="timepoint", hue="region"),
80 | dict(y="signal", x="timepoint", hue="event"),
81 | dict(y="signal", x="timepoint"),
82 | ]
83 |
84 | dims_noempty_qpcr = [
85 | dict(y="fc", x="gene", hue="fraction", col="class", row="method"),
86 | dict(y="fc", x="gene", hue="fraction", col="method", row="class"),
87 | dict(y="fc", x="gene", hue="fraction", col="class"),
88 | dict(y="fc", x="gene", hue="fraction"),
89 | dict(y="fc", x="gene"),
90 | ]
91 |
92 | # %%
93 |
94 |
95 | # %%
96 | # == Make tuples (DF, dims) ============================================
97 | #' for pytest.parametrize
98 |
99 | zipped_withempty_tips = [(DF_tips, dim) for dim in dims_withempty_tips]
100 | zipped_noempty_tips = [(DF_tips, dim) for dim in dims_noempty_tips]
101 | zipped_noempty_fmri = [(DF_fmri, dim) for dim in dims_noempty_fmri]
102 | zipped_noempty_qpcr = [(DF_qpcr, dim) for dim in dims_noempty_qpcr]
103 |
104 | ### Paired Data (with subject)
105 | zipped_noempty_PAIRED = zipped_noempty_fmri + zipped_noempty_qpcr
106 |
107 | ### All should make 14 test
108 | zipped_noempty_ALL = (
109 | zipped_noempty_tips + zipped_noempty_fmri + zipped_noempty_qpcr
110 | )
111 |
112 | zipped_ALL = (
113 | zipped_noempty_tips
114 | + zipped_noempty_fmri
115 | + zipped_noempty_qpcr
116 | + zipped_withempty_tips
117 | )
118 | # len(zipped_noempty_ALL) #' -> 14 total tests
119 |
120 |
121 | # %%
122 | # == A cached DataAnalysis Initializer =================================
123 |
124 |
125 | # def DataAnalysis_cached(*args, **kwargs) -> plst.DataAnalysis:
126 | # """Makes a DataAnalysis object"""
127 | # return plst.DataAnalysis(*args, **kwargs)
128 |
129 |
130 | # DataAnalysis_cached = MEMORY_DA_CONFIGS.subcache(DataAnalysis_cached)
131 |
132 |
133 | # %%
134 | # == Make Dataanalysis objects =========================================
135 |
136 |
137 | def make_DA_statistics(dataset: str = "qpcr") -> plst.DataAnalysis:
138 | """Makes a DA object with every possible data stored in it
139 |
140 | :param dataset: "tips", "fmri", or "qpcr"
141 | :type dataset: str
142 | """
143 |
144 | ### ignore warnings
145 | with warnings.catch_warnings():
146 | warnings.simplefilter("ignore")
147 |
148 | ### Example Data that's Paired, so we can use tests for paired data
149 | assert dataset not in ["tips"], f"{dataset} is not paired"
150 |
151 | ### Load example data
152 | DF, dims = plst.load_dataset(dataset, verbose=False)
153 |
154 | ### Init DA
155 | DA = plst.DataAnalysis(DF, dims, subject="subject", verbose=False)
156 |
157 | ### Assumptions
158 | DA.check_normality()
159 | DA.check_homoscedasticity()
160 | DA.check_sphericity()
161 |
162 | ### Omnibus
163 | DA.omnibus_anova()
164 | DA.omnibus_rm_anova()
165 | DA.omnibus_kruskal()
166 | DA.omnibus_friedman()
167 |
168 | ### Posthoc
169 | DA.test_pairwise()
170 |
171 | return DA
172 |
173 |
174 | def make_DA_plot(dataset: str = "qpcr") -> plst.DataAnalysis:
175 | """A DA that has a plot"""
176 | with warnings.catch_warnings():
177 | warnings.simplefilter("ignore")
178 | ### Load example data
179 | DF, dims = plst.load_dataset(dataset, verbose=False)
180 |
181 | ### Init DA
182 | DA = plst.DataAnalysis(DF, dims, subject="subject", verbose=False)
183 |
184 | DA.plot_box_strip()
185 | plt.close()
186 | return DA
187 |
188 |
189 | def make_DA_all(dataset: str) -> plst.DataAnalysis:
190 | """A DA with all possible statistics and a plot"""
191 | with warnings.catch_warnings():
192 | warnings.simplefilter("ignore")
193 |
194 | DA = make_DA_statistics(dataset)
195 | DA.plot_box_swarm()
196 | plt.close()
197 | return DA
198 |
199 |
200 | # %%
201 | ### Test make functions
202 | if __name__ == "__main__":
203 | pass
204 | # %%
205 | #!%%timeit
206 | # get_DA_statistics()
207 |
208 | # %%
209 | #!%%timeit
210 | # get_DA_plot()
211 |
212 | # %%
213 | #!%%timeit
214 | # get_DA_all(dataset="qpcr")
215 |
216 | # %%
217 |
218 | ### Cache results of these functions to speed up testing
219 | # make_DA_statistics = MEMORY_DA_CONFIGS.subcache(make_DA_statistics)
220 | # make_DA_plot = MEMORY_DA_CONFIGS.subcache(make_DA_plot)
221 | # make_DA_all = MEMORY_DA_CONFIGS.subcache(make_DA_all)
222 |
223 | ### Make DataAnalysis objects for testing
224 | DA_STATISTICS: plst.DataAnalysis = make_DA_statistics("qpcr")
225 | DA_PLOT: plst.DataAnalysis = make_DA_plot("qpcr")
226 | DA_ALL: plst.DataAnalysis = make_DA_all("qpcr")
227 |
228 |
229 | # %%
230 | # == Utils =============================================================
231 |
232 |
233 | ### (DF, dims) -> (DF, dims, kwargs)
234 | def add_zip_column(zipped: list[tuple], column: list) -> list[tuple]:
235 | """Adds a column to a list of tuples. Useful for adding a list of arguments to a
236 | list of dataframes and dimensions. E.g.: (DF, dims) -> (DF, dims, kwargs)
237 |
238 | :param zipped: A set of dataframes and dimensions in this shape [(df, dim), (df,
239 | dim), ...] ready to be used in pytest.parametrize
240 | :type zipped: list[tuple]
241 | :param column: A list of ne arguments to be added to each tuple in zipped. Must be same length as zipped
242 | :type column: list
243 | :return: (DF, dims) -> (DF, dims, kwargs)
244 | :rtype: list[tuple]
245 | """
246 |
247 | assert len(zipped) == len(column), "zipped and column must be same length"
248 |
249 | zipped_with_column = []
250 | for tup, e in zip(zipped, column):
251 | zipped_with_column.append(tup + (e,))
252 | return zipped_with_column
253 |
254 |
255 | def cleanfiles(fname: str):
256 | """deletes all files that start with fname"""
257 | testfiles = glob(fname + "*")
258 | for file in testfiles:
259 | os.remove(file)
260 |
--------------------------------------------------------------------------------
/tests/DA_configs2.py:
--------------------------------------------------------------------------------
1 |
2 | ### They are all non-empty
3 | StatTestCases = [
4 | "all",
5 | "paired",
6 | "unpaired",
7 | "unpaired",
8 | "parametric_paired",
9 | ]
--------------------------------------------------------------------------------
/tests/DA_utils.py:
--------------------------------------------------------------------------------
1 | """A utility class that creates DataAnalysis objects for testing"""
2 | # %%
3 |
4 |
5 | import pandas as pd
6 |
7 | import plotastic as plst
8 | from plotastic.dataanalysis.dataanalysis import DataAnalysis
9 | from plotastic.utils.subcache import SubCache
10 |
11 | import DA_configs as dac
12 |
13 | # %%
14 | # == Class CreateDA ====================================================
15 |
16 |
17 | class TestDA(DataAnalysis):
18 | def __init__(
19 | self,
20 | data: pd.DataFrame,
21 | dims: dict,
22 | subject: str = None,
23 | levels: list[tuple[str]] = None,
24 | title: str = "untitled",
25 | verbose=False,
26 | ) -> DataAnalysis:
27 | kws = dict(
28 | data=data,
29 | dims=dims,
30 | subject=subject,
31 | levels=levels, #' Introduced by DataFrameTool
32 | title=title, #' Introduced by DataAnalysis
33 | verbose=verbose, #' Introduced by DataAnalysis
34 | )
35 |
36 | super().__init__(**kws)
37 |
38 | def perform_statistics_unpaired(self, parametric=True) -> "TestDA":
39 | """Perform unpaired statistics"""
40 | ### Assumptions
41 | self.check_normality()
42 | self.check_homoscedasticity()
43 |
44 | ### Omnibus
45 | if parametric:
46 | self.omnibus_anova()
47 | else:
48 | self.omnibus_kruskal()
49 |
50 | ### PostHoc
51 | self.test_pairwise(parametric=parametric)
52 |
53 | return self
54 |
55 | def perform_statistics_paired(self, parametric=True) -> "TestDA":
56 | """Perform unpaired statistics"""
57 | ### Assumptions
58 | self.check_normality()
59 | self.check_homoscedasticity()
60 | self.check_sphericity()
61 |
62 | ### Omnibus
63 | if parametric:
64 | self.omnibus_anova()
65 | else:
66 | self.omnibus_kruskal()
67 |
68 | ### PostHoc
69 | self.test_pairwise(parametric=parametric)
70 |
71 | return self
72 |
73 |
74 |
75 | if __name__ == "__main__":
76 | pass
77 | # %%
78 | dims = dac.dims_withempty_tips[0]
79 | data = dac.DF_tips
80 | DA = TestDA(data=data, dims=dims)
81 |
--------------------------------------------------------------------------------
/tests/_annotator_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% Imports
3 |
4 | import matplotlib.pyplot as plt
5 | import pytest
6 |
7 | import plotastic as plst
8 | # from plotastic.dataanalysis.annotator import Annotator
9 |
10 |
11 | import DA_configs as dac
12 |
13 |
14 | # %% testing for dataset TIPS
15 |
16 | # !! Don't use with empty groups
17 | # !! We chose exclusions that won't show in the plot, but we need those arguments to test it
18 | TIPS_annot_pairwise_kwargs = [
19 | dict(
20 | include=["Yes", {"1-2": ("Yes", "No")}],
21 | exclude=["No", {"Yes": ("1-2", ">=3")}],
22 | include_in_facet={
23 | ("Lunch", "Male"): ["Yes", {">=3": ("Yes", "No")}],
24 | ("Lunch", "Female"): ["No", {"No": ("1-2", ">=3")}],
25 | },
26 | exclude_in_facet={
27 | ("Lunch", "Male"): ["Yes", {">=3": ("No", "Yes")}],
28 | ("Lunch", "Female"): ["No", {"Yes": ("1-2", ">=3")}],
29 | },
30 | ),
31 | dict(
32 | include=["Yes", {"1-2": ("Yes", "No")}],
33 | exclude=["No", {"Yes": ("1-2", ">=3")}],
34 | include_in_facet={
35 | "Male": ["Yes", {">=3": ("Yes", "No")}],
36 | "Female": ["No", {"No": ("1-2", ">=3")}],
37 | },
38 | exclude_in_facet={
39 | "Male": ["Yes", {">=3": ("No", "Yes")}],
40 | "Female": ["No", {"Yes": ("1-2", ">=3")}],
41 | },
42 | ),
43 | dict(
44 | include=["Yes", {"1-2": ("Yes", "No")}],
45 | exclude=["No", {"Yes": ("1-2", ">=3")}],
46 | ),
47 | dict(
48 | include=["1-2"],
49 | exclude=[">=3"],
50 | ),
51 | ]
52 |
53 | ### Add a column of args: (DF, dims) -> (DF, dims, kwargs)
54 | zipped_tips: list[tuple] = dac.add_zip_column(
55 | dac.zipped_noempty_tips, TIPS_annot_pairwise_kwargs
56 | )
57 |
58 |
59 | @pytest.mark.parametrize("DF, dims, annot_kwargs", zipped_tips)
60 | def test_pairwiseannotations_tips(DF, dims, annot_kwargs):
61 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=True)
62 | _ph = DA.test_pairwise(paired=False, padjust="none")
63 | DA = (
64 | DA.subplots()
65 | .fillaxes(kind="box")
66 | .annotate_pairwise(
67 | **annot_kwargs,
68 | show_ph=False,
69 | only_sig="all",
70 | )
71 | )
72 | ### Don't plot while executing pytest in terminal
73 | if __name__ != "__main__":
74 | plt.close()
75 |
76 |
77 | # %% Testing for dataset FMRI
78 |
79 | FMRI_annot_pairwise_kwargs = [
80 | dict(
81 | include=[0, "stim"],
82 | exclude=[1, {"stim": (0, 2)}],
83 | include_in_facet={
84 | "frontal": [0, "cue", {"stim": (3, 4)}],
85 | "parietal": [0, "cue", {"stim": (4, 6)}],
86 | },
87 | exclude_in_facet={
88 | "frontal": [2, "cue", {"stim": (3, 7)}],
89 | "parietal": [4, "stim", {"stim": (2, 9)}],
90 | },
91 | ),
92 | dict(
93 | include=[0, "frontal"],
94 | exclude=[1, {"frontal": (0, 2)}],
95 | include_in_facet={
96 | "stim": [0, "frontal", {"parietal": (3, 4)}],
97 | "cue": [0, "parietal", {"frontal": (4, 6)}],
98 | },
99 | exclude_in_facet={
100 | "stim": [2, "parietal", {"frontal": (3, 7)}],
101 | "cue": [4, "frontal", {"parietal": (2, 9)}],
102 | },
103 | ),
104 | dict(
105 | include=[0, "frontal"],
106 | exclude=[1, {"frontal": (0, 2)}],
107 | ),
108 | dict(
109 | include=[0, "cue"],
110 | exclude=[1, {"stim": (0, 2)}],
111 | ),
112 | dict(
113 | include=[0, 2],
114 | exclude=[1],
115 | ),
116 | ]
117 |
118 | ### Add a column of args: (DF, dims) -> (DF, dims, kwargs)
119 | zipped_fmri: list[tuple] = dac.add_zip_column(
120 | dac.zipped_noempty_fmri, FMRI_annot_pairwise_kwargs
121 | )
122 |
123 |
124 | @pytest.mark.parametrize("DF, dims, annot_kwargs", zipped_fmri)
125 | def test_pairwiseannotations_fmri(DF, dims, annot_kwargs):
126 | DA = plst.DataAnalysis(
127 | data=DF, dims=dims, verbose=True, subject="subject"
128 | ) # !! subject
129 | _ph = DA.test_pairwise(paired=True, padjust="bonf")
130 | DA = (
131 | DA.subplots()
132 | .fillaxes(kind="box")
133 | .annotate_pairwise(
134 | **annot_kwargs,
135 | show_ph=False,
136 | only_sig="strict",
137 | )
138 | )
139 | ### Don't plot while executing pytest in terminal
140 | if __name__ != "__main__":
141 | plt.close()
142 |
143 |
144 | # %% For dataset qPCR
145 |
146 |
147 | QPCR_annot_pairwise_kwargs = [
148 | dict(
149 | include=["F1", "LOXL2", "SOST"],
150 | exclude=["F2", {"MMP7": ("F1", "F3")}],
151 | include_in_facet={
152 | "MMPs": ["MMP7", {"MMP9": ("F1", "F2")}],
153 | "Bone Metabolism": ["SOST", "F2", {"TIMP1": ("F3", "F1")}],
154 | },
155 | exclude_in_facet={
156 | "Wash": ["MMP7", {"MMP9": ("F1", "F2")}],
157 | "MACS": ["SOST", {"JAK2": ("F1", "F2")}],
158 | },
159 | ),
160 | dict(
161 | include=["F1", "LOXL2", "SOST"],
162 | exclude=["F2", {"MMP7": ("F1", "F3")}],
163 | include_in_facet={
164 | "MMPs": ["MMP7", {"MMP9": ("F1", "F2")}],
165 | "Bone Metabolism": ["SOST", "F2", {"TIMP1": ("F3", "F1")}],
166 | },
167 | exclude_in_facet={
168 | "Wash": ["MMP7", {"MMP9": ("F1", "F2")}],
169 | "MACS": ["SOST", {"JAK2": ("F1", "F2")}],
170 | },
171 | ),
172 | dict(
173 | include="__HUE",
174 | exclude=["F2", {"MMP7": ("F1", "F3")}],
175 | ),
176 | dict(
177 | include="__X",
178 | exclude=["F2", {"MMP7": ("F1", "F3")}],
179 | ),
180 | dict(
181 | include=["Vimentin", "MMP7"],
182 | exclude=["FZD4"],
183 | ),
184 | ]
185 |
186 | zipped_qpcr: list[tuple] = dac.add_zip_column(
187 | dac.zipped_noempty_qpcr, QPCR_annot_pairwise_kwargs
188 | )
189 |
190 |
191 | @pytest.mark.parametrize("DF, dims, annot_kwargs", zipped_qpcr)
192 | def test_pairwiseannotation_qpcr(DF, dims, annot_kwargs):
193 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=True)
194 | _ph = DA.test_pairwise(paired=False, padjust="none", subject="subject")
195 | DA = (
196 | DA.subplots(sharey=False, figsize=(10, 10))
197 | .fillaxes(kind="box")
198 | .transform_y("log10") # !! log transform
199 | .edit_y_scale_log(10) # !! MUST be called before annotation!
200 | .annotate_pairwise(
201 | # include="__HUE",
202 | show_ph=False,
203 | only_sig="tolerant",
204 | **annot_kwargs,
205 | )
206 | # .edit_tight_layout() # !! just uglier
207 | )
208 | ### Don't plot while executing pytest in terminal
209 | if __name__ != "__main__":
210 | plt.close()
211 |
212 |
213 | ### Run without pytest
214 | if __name__ == "__main__":
215 | DF, dims = plst.load_dataset("qpcr")
216 | AN = Annotator(data=DF, dims=dims, verbose=True)
217 | AN.levels_dendrogram()
218 | test_pairwiseannotation_qpcr(
219 | DF, dims, annot_kwargs=QPCR_annot_pairwise_kwargs[0]
220 | )
221 |
222 | # %% Interactive testing to display Plots
223 |
224 | if __name__ == "__main__":
225 | import ipytest
226 |
227 | ipytest.run()
228 |
--------------------------------------------------------------------------------
/tests/_assumptions_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% imports
3 |
4 | import seaborn as sns
5 | import pandas as pd
6 |
7 | import pytest
8 |
9 |
10 | # import markurutils as ut
11 | # import plotastic as plst
12 | from plotastic import DataAnalysis
13 |
14 | import DA_configs as dac
15 |
16 |
17 | # %% Test per config
18 |
19 |
20 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL)
21 | def test_normality(DF: pd.DataFrame, dims):
22 | DA = DataAnalysis(data=DF, dims=dims, verbose=True)
23 | DA.check_normality()
24 |
25 |
26 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL)
27 | def test_homoscedasticity(DF, dims):
28 | DA = DataAnalysis(data=DF, dims=dims, verbose=True)
29 | DA.check_homoscedasticity()
30 |
31 |
32 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_PAIRED)
33 | def test_sphericity(DF, dims):
34 | DA = DataAnalysis(data=DF, dims=dims, verbose=True, subject="subject")
35 | DA.check_sphericity()
36 |
37 |
38 | # %% interactive testing to display Plots
39 |
40 | if __name__ == "__main__":
41 | import ipytest
42 |
43 | ipytest.run()
44 |
--------------------------------------------------------------------------------
/tests/_dims_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% imports
3 | import plotastic as plst
4 | import unittest
5 |
6 |
7 | # %% Import Test Data
8 | DF, dims = plst.load_dataset("tips") #' Import Data
9 | DA = plst.DataAnalysis(
10 | data=DF, dims=dims, title="tips"
11 | ) #' Make DataAnalysis Object
12 |
13 |
14 | # %% Unit Tests
15 |
16 |
17 | class TestDataAnalysis(unittest.TestCase):
18 | def test_switching(self):
19 | v = False
20 | data, dims = plst.load_dataset("tips", verbose=v)
21 | DA = plst.DataAnalysis(data, dims, verbose=v)
22 |
23 | ### Chaining work?
24 | x, E1 = DA.dims.x, "size-cut"
25 | x_inchain, E2 = DA.switch("x", "hue", verbose=v).dims.x, "smoker"
26 | x_after_chaining, E3 = DA.dims.x, "size-cut"
27 | print(x, x_inchain, x_after_chaining)
28 | print(x != x_inchain)
29 | print(x == x_after_chaining)
30 |
31 | self.assertEqual(x, E1)
32 | self.assertEqual(x_inchain, E2)
33 | self.assertEqual(x_after_chaining, E3)
34 |
35 |
36 | # %% __name__ == "__main__"
37 |
38 | if __name__ == "__main__":
39 | unittest.main()
40 |
--------------------------------------------------------------------------------
/tests/_dimsandlevels_test.py:
--------------------------------------------------------------------------------
1 | # %% Imports
2 | import pytest
3 | import matplotlib.pyplot as plt
4 |
5 | import plotastic as plst
6 |
7 | # from . import configs as ct
8 | import DA_configs as dac
9 |
10 |
11 | # %%
12 | # == Dendrogram ========================================================
13 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL)
14 | def test_levels_dendrogram(DF, dims):
15 | ### No need to evaluate level combos if there's just an X and no facet (hue, col, row)
16 | if not len(dims.keys()) == 2:
17 | DA = plst.DataAnalysis(data=DF, dims=dims)
18 | DA.levels_dendrogram()
19 |
20 | ### Plot if interactive
21 | if __name__ != "__main__":
22 | plt.close()
23 | else:
24 | plt.show() #' show plot, otherwise too many figures
25 |
26 |
27 | # %%
28 | # == Test combocounts ==================================================
29 |
30 |
31 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL)
32 | def test_levels_combocounts(DF, dims):
33 | ### No need to evaluate level combos if there's just an X and no facet (hue, col, row)
34 | if not len(dims.keys()) == 2:
35 | DA = plst.DataAnalysis(data=DF, dims=dims)
36 | DA.levels_combocounts()
37 |
38 | ### Plot if interactive
39 | if __name__ != "__main__":
40 | plt.close()
41 | else:
42 | plt.show() #' show plot, otherwise too many figures
43 |
44 |
45 | if __name__ == "__main__":
46 | import pandas as pd
47 |
48 | DF, dims = plst.load_dataset("qpcr", verbose=False)
49 | DF, dims = plst.load_dataset("tips", verbose=False)
50 | DF, dims = plst.load_dataset("fmri", verbose=False)
51 |
52 | ### Init DataAnalysis object
53 | DA = plst.DataAnalysis(data=DF, dims=dims)
54 |
55 | DA._count_levelcombos()
56 |
57 | DA.levelkeys
58 | len(DA.levelkeys)
59 | DA.levels_combocounts()
60 | DA.levels_dendrogram()
61 |
62 | # %% run interactively
63 |
64 | if __name__ == "__main__":
65 | import ipytest
66 |
67 | ipytest.run()
68 |
69 |
70 | # %%
71 |
--------------------------------------------------------------------------------
/tests/_filer_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% imports
3 |
4 | import pytest
5 |
6 | import pandas as pd
7 |
8 | import os
9 | from glob import glob
10 | from pathlib import Path
11 |
12 | import plotastic as plst
13 |
14 | import DA_configs as dac
15 |
16 |
17 | # %%
18 | import IPython
19 |
20 | IPython.extract_module_locals()[1].get("__vsc_ipynb_file__")
21 |
22 |
23 | # %% Test
24 |
25 | DF, dims = plst.load_dataset("tips", verbose=False)
26 | DA = plst.DataAnalysis(DF, dims, verbose=False)
27 | DA_COMPLETE = dac.DA_STATISTICS
28 |
29 |
30 | # %% Test prevent_overwrite
31 |
32 |
33 | def test_prevent_overwrite():
34 | ### Define a name
35 | testfile_name = "_FILE_123"
36 | distraction_names = [
37 | "_FILE_",
38 | "__FILE_",
39 | "_FILE_12",
40 | "_FIL_12",
41 | ]
42 |
43 | def mk_testfiles(testfile_name) -> str:
44 | ### Make a testfile excel
45 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
46 | df.to_excel(testfile_name + ".xlsx")
47 |
48 | ### Make a testfile text as a distraction
49 | with open(testfile_name + ".txt", "w") as f:
50 | f.write("I am an text file")
51 |
52 | return testfile_name
53 |
54 | ### Cleanup before testing
55 | dac.cleanfiles(testfile_name)
56 | for name in distraction_names:
57 | dac.cleanfiles(name)
58 |
59 | ### Make Distraction Files
60 | for name in distraction_names:
61 | mk_testfiles(name)
62 |
63 | # == TEST 0: mode="day"
64 | kws = dict(overwrite="day")
65 | new = DA.filer.prevent_overwrite(testfile_name, **kws)
66 | assert (
67 | new == testfile_name + f"_{DA.filer.current_day}"
68 | ), f"new_name = {new}, testfile_name = {testfile_name}"
69 |
70 | # == TEST 1: mode="nothing"
71 | kws = dict(overwrite="nothing")
72 |
73 | ### If NO file exists, it should return the same name with _0
74 | new = DA.filer.prevent_overwrite(testfile_name, **kws)
75 | assert (
76 | new == testfile_name + "_0"
77 | ), f"new_name = {new}, testfile_name = {testfile_name}"
78 |
79 | ### If a file EXISTS, it should return the same name with _0
80 | tested = mk_testfiles(testfile_name)
81 | new = DA.filer.prevent_overwrite(testfile_name, **kws)
82 | assert (
83 | new == testfile_name + "_0"
84 | ), f"new_name = {new}, testfile_name = {tested}"
85 |
86 | ### If a file with _0 exists, it should return a new name with _1
87 | tested = mk_testfiles(new) #' "testfile_name_0"
88 | new = DA.filer.prevent_overwrite(testfile_name, **kws)
89 | assert (
90 | new == testfile_name + "_1"
91 | ), f"new_name = {new}, testfile_name = {tested}"
92 |
93 | ### If a file with _1 exists, it should return a new name with _2
94 | tested = mk_testfiles(new) #' "testfile_name_1"
95 | new = DA.filer.prevent_overwrite(testfile_name, **kws)
96 | assert (
97 | new == testfile_name + "_2"
98 | ), f"new_name = {new}, testfile_name = {tested}"
99 |
100 | # == Cleanup
101 | dac.cleanfiles(testfile_name)
102 | for name in distraction_names:
103 | dac.cleanfiles(name)
104 |
105 |
106 | if __name__ == "__main__":
107 | test_prevent_overwrite()
108 | os.getcwd()
109 | Path.cwd()
110 |
111 | # %%
112 | if __name__ == "__main__":
113 | import ipytest
114 |
115 | ipytest.run()
116 |
--------------------------------------------------------------------------------
/tests/_hierarchical_dims_test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/tests/_hierarchical_dims_test.py
--------------------------------------------------------------------------------
/tests/_load_dataset_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% Imports
3 |
4 | import pytest
5 |
6 | import plotastic as plst
7 | from plotastic.example_data.load_dataset import FILES
8 |
9 | # %%
10 |
11 | parameters = [name for name in FILES.keys()]
12 |
13 |
14 | @pytest.mark.parametrize("name", parameters)
15 | def test_load_dataset(name: str):
16 | """simply checks, if it's executable, after correct packaging in setup.py and all."""
17 | df, dims = plst.load_dataset(name, verbose=True)
18 |
19 |
20 | # %%
21 |
22 | if __name__ == "__main__":
23 | import ipytest
24 | ipytest.run()
25 |
--------------------------------------------------------------------------------
/tests/_multiplot_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %%
3 |
4 | import pytest
5 | import ipytest
6 | import matplotlib.pyplot as plt
7 |
8 | import plotastic as plst
9 |
10 | import DA_configs as dac
11 |
12 |
13 | # %%
14 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
15 | def test_plot(DF, dims):
16 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
17 | DA.plot()
18 | if __name__ == "__main__":
19 | plt.show()
20 | else:
21 | plt.close("all")
22 |
23 |
24 | if __name__ == "__main__":
25 | ipytest.run()
26 |
27 |
28 | # %%
29 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
30 | def test_box_strip(DF, dims):
31 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
32 | DA.plot_box_strip()
33 | if __name__ == "__main__":
34 | plt.show()
35 | else:
36 | plt.close("all")
37 |
38 |
39 | if __name__ == "__main__":
40 | ipytest.run()
41 |
42 |
43 | # %%
44 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
45 | def plot_box_swarm(DF, dims):
46 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
47 | DA.plot_box_strip()
48 | if __name__ == "__main__":
49 | plt.show()
50 | else:
51 | plt.close("all")
52 |
53 |
54 | if __name__ == "__main__":
55 | ipytest.run()
56 |
--------------------------------------------------------------------------------
/tests/_omnibus_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% imports
3 |
4 | import warnings
5 |
6 | import seaborn as sns
7 | import pandas as pd
8 |
9 | import pytest
10 |
11 |
12 | # import markurutils as ut
13 | # import plotastic as plst
14 | from plotastic import DataAnalysis
15 |
16 | import DA_configs as dac
17 |
18 |
19 | # %% Test per config
20 |
21 |
22 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL)
23 | def test_omnibus_anova(DF: pd.DataFrame, dims):
24 | DA = DataAnalysis(data=DF, dims=dims, verbose=True)
25 | DA.omnibus_anova()
26 |
27 |
28 | # !! Warnings happen when groups have only one sample
29 | @pytest.mark.filterwarnings("ignore::RuntimeWarning")
30 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_PAIRED)
31 | def test_omnibus_rm_amova(DF, dims):
32 | DA = DataAnalysis(data=DF, dims=dims, subject="subject", verbose=True)
33 | DA.omnibus_rm_anova()
34 |
35 |
36 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL)
37 | def test_omnibus_kruskal(DF, dims):
38 | DA = DataAnalysis(data=DF, dims=dims, verbose=True)
39 | DA.omnibus_kruskal()
40 |
41 |
42 | # @pytest.mark.filterwarnings("ignore::RuntimeWarning")
43 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_PAIRED)
44 | def test_omnibus_friedman(DF, dims):
45 | DA = DataAnalysis(data=DF, dims=dims, subject="subject", verbose=True)
46 | DA.omnibus_friedman()
47 |
48 |
49 | # %% interactive testing to display Plots
50 |
51 | if __name__ == "__main__":
52 | import ipytest
53 |
54 | ipytest.run()
55 |
--------------------------------------------------------------------------------
/tests/_paper_test.py:
--------------------------------------------------------------------------------
1 | ### IMPORT PLOTASTIC
2 | import plotastic as plst
3 |
4 | # IMPORT EXAMPLE DATA
5 | DF, _dims = plst.load_dataset("fmri", verbose=False)
6 | # EXPLICITLY DEFINE DIMENSIONS TO FACET BY
7 | dims = dict(
8 | y="signal", # y-axis, dependent variable
9 | x="timepoint", # x-axis, independent variable (within-subject factor)
10 | hue="event", # color, independent variable (within-subject factor)
11 | col="region", # axes, grouping variable
12 | )
13 | # INITIALIZE DATAANALYSIS OBJECT
14 | DA = plst.DataAnalysis(
15 | data=DF, # Dataframe, long format
16 | dims=dims, # Dictionary with y, x, hue, col, row
17 | subject="subject", # Datapoints are paired by subject (optional)
18 | verbose=False, # Print out info about the Data (optional)
19 | )
20 | # STATISTICAL TESTS
21 | DA.check_normality() # Check Normality
22 | DA.check_sphericity() # Check Sphericity
23 | DA.omnibus_rm_anova() # Perform RM-ANOVA
24 | DA.test_pairwise() # Perform Posthoc Analysis
25 | # PLOTTING
26 | (
27 | DA.plot_box_strip().annotate_pairwise( # Pre-built plotting function initializes plot # Annotate results from DA.test_pairwise()
28 | include="__HUE" # Use only significant pairs across each hue
29 | )
30 | )
31 |
32 |
33 | ### BACK-CHECK
34 | import seaborn as sns
35 | sns.catplot(data=DF, **_dims)
--------------------------------------------------------------------------------
/tests/_plotedits_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %%
3 |
4 | import pytest
5 | import ipytest
6 | import matplotlib.pyplot as plt
7 |
8 | import plotastic as plst
9 |
10 | import DA_configs as dac
11 |
12 |
13 | # %%
14 | titles_tips = [
15 | {("Lunch", "Male"): "blaa"},
16 | {("Male"): "blAA"},
17 | None,
18 | None,
19 | ]
20 | zipped_tips = dac.add_zip_column(dac.zipped_noempty_tips, titles_tips)
21 |
22 |
23 | @pytest.mark.parametrize("DF, dims, axtitles", zipped_tips)
24 | def test_edit_titles(DF, dims, axtitles: dict):
25 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
26 | if DA.factors_rowcol: #' Need facetting, otherwise no axes
27 | DA.plot()
28 | DA.edit_titles(axtitles=axtitles)
29 | if __name__ == "__main__":
30 | plt.show()
31 | else:
32 | plt.close("all")
33 |
34 |
35 | if __name__ == "__main__":
36 | ipytest.run()
37 |
38 |
39 | # %%
40 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
41 | def test_edit_titles_with_func(DF, dims):
42 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
43 | if DA.factors_rowcol: #' Need facetting, otherwise no axes
44 | DA.plot().edit_titles_with_func(
45 | row_func=lambda x: x.upper(),
46 | col_func=lambda x: "hä",
47 | connect=" || ",
48 | )
49 |
50 | if __name__ == "__main__":
51 | plt.show()
52 | else:
53 | plt.close("all")
54 |
55 |
56 | if __name__ == "__main__":
57 | ipytest.run()
58 |
59 | # %%
60 | titles_tips = [
61 | ["sdfsfd", None, "dd", None],
62 | [None, "aa"],
63 | None,
64 | None,
65 | ]
66 | zipped_tips = dac.add_zip_column(dac.zipped_noempty_tips, titles_tips)
67 |
68 |
69 | @pytest.mark.parametrize("DF, dims, titles", zipped_tips)
70 | def test_edit_titles_replace(DF, dims, titles: dict):
71 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
72 | if DA.factors_rowcol: #' Need facetting, otherwise no axes
73 | (DA.plot().edit_titles_replace(titles=titles))
74 | if __name__ == "__main__":
75 | plt.show()
76 | else:
77 | plt.close("all")
78 |
79 |
80 | if __name__ == "__main__":
81 | ipytest.run()
82 |
83 |
84 | # %%
85 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
86 | def test_edit_xy_axis_labels(DF, dims):
87 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
88 | DA.plot().edit_xy_axis_labels(
89 | x="ui!",
90 | x_lowest_row="rambazamba",
91 | x_notlowest_row="FLOH",
92 | y="Johannes",
93 | y_leftmost_col="Gertrude",
94 | y_notleftmost_col="Hä?",
95 | )
96 | if __name__ == "__main__":
97 | plt.show()
98 | else:
99 | plt.close("all")
100 |
101 |
102 | if __name__ == "__main__":
103 | ipytest.run()
104 |
105 |
106 | # %%
107 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
108 | def test_edit_y_scale_log(DF, dims):
109 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
110 | DA.plot().edit_y_scale_log(base=2)
111 | if __name__ == "__main__":
112 | plt.show()
113 | else:
114 | plt.close("all")
115 |
116 |
117 | if __name__ == "__main__":
118 | ipytest.run()
119 |
120 |
121 | # %%
122 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
123 | def test_edit_y_ticklabel_percentage(DF, dims):
124 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
125 | DA.plot().edit_y_ticklabel_percentage(
126 | decimals_major=1,
127 | decimals_minor=1, # !! Not working
128 | )
129 | if __name__ == "__main__":
130 | plt.show()
131 | else:
132 | plt.close("all")
133 |
134 |
135 | if __name__ == "__main__":
136 | ipytest.run()
137 |
138 |
139 | # %%
140 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
141 | def test_edit_y_ticklabels_log_minor(DF, dims):
142 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
143 | DA.plot().edit_y_scale_log(base=2).edit_y_ticklabels_log_minor(
144 | subs=[2, 3, 5, 7],
145 | )
146 | if __name__ == "__main__":
147 | plt.show()
148 | else:
149 | plt.close("all")
150 |
151 |
152 | if __name__ == "__main__":
153 | ipytest.run()
154 |
155 |
156 | # %%
157 | labels_zip = [
158 | ["sdfsfd", "dddd"],
159 | ["sdfsfd", "dddd"],
160 | ["sdfsfd", "dddd"],
161 | ["sdfsfd", "dddd"],
162 | ]
163 | zipped_tips = dac.add_zip_column(dac.zipped_noempty_tips, labels_zip)
164 |
165 |
166 | @pytest.mark.parametrize("DF, dims, labels", zipped_tips)
167 | def test_edit_x_ticklabels_exchange(DF, dims, labels):
168 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
169 | DA.plot().edit_x_ticklabels_exchange(
170 | labels=labels,
171 | labels_lowest_row=[l.upper() for l in labels],
172 | )
173 | if __name__ == "__main__":
174 | plt.show()
175 | else:
176 | plt.close("all")
177 |
178 |
179 | if __name__ == "__main__":
180 | ipytest.run()
181 |
182 |
183 | # %%
184 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
185 | def test_edit_x_ticklabels_exchange(DF, dims):
186 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
187 | DA.plot().edit_x_ticklabels_rotate(
188 | rotation=75,
189 | ha="center",
190 | # va="top",
191 | pad=0.1,
192 | )
193 | if __name__ == "__main__":
194 | plt.show()
195 | else:
196 | plt.close("all")
197 |
198 |
199 | if __name__ == "__main__":
200 | ipytest.run()
201 |
202 | # %%
203 | plt.close("all")
204 |
205 |
206 | # %%
207 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
208 | def test_edit_grid(DF, dims):
209 | plt.close()
210 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
211 | (
212 | DA.plot()
213 | .edit_y_scale_log(base=2) #' To see minor ticks
214 | .edit_grid(
215 | y_major_kws=dict(ls="--", linewidth=0.5, c="grey"),
216 | y_minor_kws=dict(ls=":", linewidth=0.2, c="grey"),
217 | x_major_kws=dict(ls="--", linewidth=0.6, c="grey"),
218 | )
219 | )
220 | if __name__ == "__main__":
221 | plt.show()
222 | else:
223 | plt.close("all")
224 |
225 |
226 | if __name__ == "__main__":
227 | ipytest.run()
228 |
229 |
230 | # %%
231 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
232 | def test_edit_legend(DF, dims):
233 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
234 | if DA.dims.hue:
235 | DA.plot().edit_legend(
236 | reset_legend=True,
237 | title="HUI",
238 | loc="upper right",
239 | bbox_to_anchor=(1.3, 1),
240 | borderaxespad=1,
241 | # pad=0.5,
242 | frameon=True,
243 | ) #' To see minor ticks
244 |
245 | if __name__ == "__main__":
246 | plt.show()
247 | else:
248 | plt.close("all")
249 |
250 |
251 | if __name__ == "__main__":
252 | ipytest.run()
253 |
254 |
255 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL)
256 | def test_edit_fontsizes(DF, dims):
257 | plt.close()
258 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False)
259 |
260 | DA.plot().edit_fontsizes(
261 | ticklabels=14,
262 | xylabels=16,
263 | axis_titles=18,
264 | ) #' To see minor ticks
265 |
266 | if __name__ == "__main__":
267 | plt.show()
268 | else:
269 | plt.close("all")
270 |
271 |
272 | if __name__ == "__main__":
273 | ipytest.run()
274 |
--------------------------------------------------------------------------------
/tests/_rc_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %%
3 | # import pytest
4 | import ipytest
5 | import matplotlib.pyplot as plt
6 |
7 | import plotastic as plst
8 |
9 | import DA_configs as dac
10 |
11 | # %%
12 |
13 |
14 | def test_rc():
15 | """Test rc()"""
16 | plst.set_palette("Set2")
17 | plst.set_style("paper")
18 |
19 | DA = dac.DA_ALL
20 | DA.plot_box_strip()
21 |
22 | if __name__ == "__main__":
23 | plt.show()
24 | else:
25 | plt.close("all")
26 |
27 |
28 | if __name__ == "__main__":
29 | ipytest.run()
30 |
--------------------------------------------------------------------------------
/tests/_save_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %% imports
3 | from typing import Callable
4 |
5 | import os
6 | from glob import glob
7 |
8 | import pytest
9 |
10 | # import seaborn as sns
11 | # import pandas as pd
12 |
13 | # import markurutils as ut
14 | import plotastic as plst
15 |
16 |
17 | import DA_configs as dac
18 |
19 |
20 | # %% testfigure
21 | # import matplotlib.pyplot as plt
22 | # import numpy as np
23 | # fig, ax = plt.subplots(2,2)
24 |
25 | # fig.get_axes()
26 | # fig.axes
27 |
28 | # %% Test
29 |
30 |
31 | DA = dac.DA_STATISTICS
32 |
33 | funcs = [
34 | DA.save_statistics,
35 | # DA.save_fig, # !! Not working, but let's keep it for now
36 | # DA.save_all, # !! Not working
37 | ]
38 |
39 |
40 | @pytest.mark.parametrize("func", funcs)
41 | def test_save(func: Callable, lastcleanup=True):
42 | """Test export_statistics()"""
43 |
44 | ### Define a name
45 | fname = "plotastic_results"
46 |
47 | ### Cleanup before testing
48 | dac.cleanfiles(fname)
49 |
50 | # == Test overwrite=True ===============================
51 | kwargs = dict(fname=fname, overwrite=True)
52 | func(**kwargs)
53 | func(**kwargs) #' Should overwrite
54 | func(**kwargs) #' Should overwrite
55 |
56 | ### Make sure files overwrote each other
57 | saved = glob(fname + "*")
58 | assert len(saved) in [
59 | 1,
60 | 2,
61 | ], "Should have saved one/two files, insted got: " + str(saved)
62 |
63 | dac.cleanfiles(fname)
64 |
65 | # == Test overwrite="day" ===============================
66 | kwargs = dict(fname=fname, overwrite="day")
67 | func(**kwargs)
68 | func(**kwargs) #' Should overwrite
69 | func(**kwargs) #' Should overwrite
70 |
71 | ### Make sure files didn't delet each other
72 | saved = glob(fname + "*")
73 | assert len(saved) in [
74 | 1,
75 | 2,
76 | ], "Should have saved one or two files, insted got: " + str(saved)
77 |
78 | dac.cleanfiles(fname)
79 |
80 | # == Test overwrite="nothing" ===============================
81 | kwargs = dict(fname=fname, overwrite="nothing")
82 | func(**kwargs)
83 | func(**kwargs) #' Should NOT overwrite
84 | func(**kwargs) #' Should NOT overwrite
85 |
86 | ### Make sure files didn't delete each other
87 | saved = glob(fname + "*")
88 | assert len(saved) in [
89 | 3,
90 | 6,
91 | ], "Should have saved three/six files, insted got: " + str(saved)
92 |
93 | if lastcleanup:
94 | dac.cleanfiles(fname)
95 |
96 |
97 | if __name__ == "__main__":
98 | test_save(func=DA.save_statistics, lastcleanup=False)
99 |
100 | ### cleanup
101 | # for file in glob("plotastic_results*"):
102 | # os.remove(file)
103 |
104 | # %%
105 |
106 | # %% Test save_fig
107 | # import matplotlib.pyplot as plt
108 | # DA.plot_box_strip()
109 | # DA.save_fig(fname="p1", overwrite=True) # ? saves wrong fig ?
110 | # DA.save_fig(fname="p2", overwrite=True, fig=DA.fig) # ? saves wrong fig ??
111 | # DA.fig.savefig("p3.pdf") # ? saves CORRECT FIG!!
112 | # plt.savefig("p4.pdf")
113 |
114 |
115 | # %%
116 |
117 | # %% interactive testing to display Plots
118 |
119 | if __name__ == "__main__":
120 | import ipytest
121 |
122 | # ipytest.run()
123 |
--------------------------------------------------------------------------------
/tests/_utils_test.py:
--------------------------------------------------------------------------------
1 | #
2 | # %%
3 | import matplotlib as mpl
4 |
5 | from plotastic import utils as ut
6 |
7 | import DA_configs as dac
8 |
9 |
10 | # %%
11 | def test_font_functions():
12 | ut.mpl_font()
13 | ut.mpl_fontsizes_get_all()
14 | ut.mpl_fontsize_from_rc(rc_param="legend.fontsize")
15 |
16 | ut.mpl_fontpath()
17 |
18 |
19 | if __name__ == "__main__":
20 | test_font_functions()
21 |
22 | # mpl.rcParams["font.size"] = 22
23 | # print(mpl.rcParams["font.size"]) #' returns an integer
24 | # print(mpl.rcParams["legend.fontsize"]) #' returns mediu
25 | # m
26 |
27 | ### Fontsizes
28 | d = ut.mpl_fontsizes_get_all()
29 | print(d)
30 |
31 | fs = ut.mpl_fontsize_from_rc()
32 | legend_fs = ut.mpl_fontsize_from_rc(rc_param="legend.fontsize")
33 | print(legend_fs)
34 |
35 | ### Font
36 | # plst.set_style("paper")
37 | font = ut.mpl_font
38 | fontpath = ut.mpl_fontpath()
39 | print(fontpath)
40 |
41 | if "Narrow" in font:
42 | print("narrow")
43 |
44 |
45 | # %%
46 | def test_get_bbox_width():
47 | DA = dac.DA_ALL
48 | # DA.legend.get_window_extent()
49 | bbox = DA.legend.get_tightbbox()
50 | ut.get_bbox_width(bbox)
51 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tests/non_pytest_checks/hspace_wspace.py:
--------------------------------------------------------------------------------
1 | """A script that shows that hspace and wspace are autoadjusted
2 | """
3 | # %%
4 |
5 | from matplotlib import pyplot as plt
6 | import matplotlib as mpl
7 | import plotastic as plst
8 | from plotastic import utils as ut
9 |
10 | print(mpl.__version__)
11 |
12 | ### Lower dpi
13 | plt.rcParams["figure.dpi"] = 70
14 |
15 |
16 | # %%
17 | # == Example Data ======================================================
18 | DF, dims = plst.load_dataset("tips", verbose=False)
19 | DA = plst.DataAnalysis(DF, dims=dims)
20 |
21 |
22 | # %%
23 | def get_heights(DA: plst.DataAnalysis):
24 | ### Get bboxes of axes
25 |
26 | heights_cols = []
27 | for key, axes in DA.axes_iter__col_axes:
28 | heights = []
29 | # print(key) # todo
30 | for ax in axes:
31 | bbox = ax.get_tightbbox()
32 | height = ut.get_bbox_height(bbox, in_inches=True)
33 | heights.append(height)
34 |
35 | # print(bbox.extents) # todo
36 | # print(height)
37 | # print()
38 | heights_cols.append(heights)
39 | return heights_cols
40 |
41 |
42 | if __name__ == "__main__":
43 | heights_cols = get_heights(DA)
44 | print(heights_cols)
45 | # print()
46 |
47 |
48 | def adjust_hspace(DA: plst.DataAnalysis):
49 | height = DA.figsize[1]
50 |
51 | ### Adjust height to fit all axes
52 | heights_cols = get_heights(DA)
53 | heights = [sum(heights_col) for heights_col in heights_cols]
54 | new_height = max(heights)
55 | DA.fig.set_figheight(new_height, forward=True)
56 |
57 | ### That size increase stretched the axes, too, undo that
58 | # height_fraction = height / new_height
59 | # hspace = new_height - height
60 | # plt.subplots_adjust(hspace=hspace) #?? Doesn't work at all
61 |
62 | if __name__ == "__main__":
63 | adjust_hspace(DA)
64 | print(DA.figsize)
65 | # print()
66 |
67 | #%%
68 | def adjust_hspace_recursive(DA: plst.DataAnalysis):
69 | """Since plt.adjust_subplots(hspace=...) doesn't work, we need to
70 | figure out something else. This function is a recursive approach
71 | that increases the figure height until all axes fit."""
72 |
73 | height = DA.figsize[1]
74 |
75 | ### Adjust height to fit all axes
76 | heights_cols = get_heights(DA)
77 | heights = [sum(heights_col) for heights_col in heights_cols]
78 | new_height = max(heights)
79 |
80 | ### Save some recursion steps
81 | if new_height / height > 1.7:
82 | new_height = new_height * 1.7
83 | # print("heightboost")
84 | print(new_height, height)
85 |
86 | ### Recursive increase
87 | #' Repeat until the sum of axes heights is less than the figure
88 | #' height. 99% of new_height is more than enough
89 | while 0.99 * new_height > height:
90 | #' Save some recursion steps, also makes nice spacing
91 | new_height = new_height * 1.1
92 | DA.fig.set_figheight(new_height, forward=True)
93 | height = DA.figsize[1]
94 | adjust_hspace_recursive(DA) #' Recursive call
95 |
96 |
97 |
98 | if __name__ == "__main__":
99 | pass
100 | # DA.subplots(figsize=(5,1)).fillaxes(kind="strip", dodge=True)
101 | # adjust_hspace_recursive(DA)
102 | # print(DA.figsize)
103 | # print()
104 |
105 |
106 |
107 | def check_hspace():
108 | for i in range(5):
109 | # print(labels)
110 | width=5
111 | height = i + 1
112 |
113 | ### Plot
114 | (
115 | DA.subplots(
116 | figsize=(width, height),
117 | # constrained_layout=True, # !! not working with subplots_adjust
118 | # hspace=.7,
119 | )
120 | .fillaxes(kind="strip", dodge=True)
121 | .edit_legend()
122 | )
123 | ### Spaces
124 | # adjust_hspace(DA)
125 | adjust_hspace_recursive(DA)
126 |
127 | ### Try mpl native functions
128 | # DA.fig.subplots_adjust(hspace=.9)
129 | # plt.subplots_adjust(hspace=.5)# ?? NOT WORKING AT ALL
130 | # DA.fig.tight_layout(pad=5.0)
131 | # DA.fig.tight_layout(h_pad=2)
132 | # plt.tight_layout(h_pad=2)
133 | # plt.subplot_tool()
134 |
135 |
136 | print()
137 | new_height = round(DA.figsize[1], 2)
138 | # plt.suptitle(f"width={width}, height={height, new_height}", y=1.1)
139 |
140 | # plt.close()
141 |
142 |
143 | if __name__ == "__main__":
144 | plst.set_style("paper")
145 | # plst.set_style("classic")
146 | # plst.print_styles()
147 | check_hspace()
148 |
149 |
150 | # def _get_legend_width(labels: list[str]) -> float:
151 | # """Calculates the width of the legend in inches, taking fontsize
152 | # into account"""
153 |
154 | # ### Add legend title, which is hue
155 | # labels = [DA.dims.hue] + labels # TODO: replace with self
156 |
157 | # ### Split by new lines and flatten
158 | # labels = [label.split("\n") for label in labels]
159 | # labels = [item for sublist in labels for item in sublist]
160 | # # print(labels)
161 |
162 | # ### Get length of longest level (or title)
163 | # max_label_length = max([len(label) for label in labels])
164 |
165 | # ### Convert label length to inches
166 | # #' 1 inch = 72 points, one character = ~10 points
167 | # fontsize = _get_fontsize_legend()
168 | # character_per_inch = 72 / fontsize
169 | # if "Narrow" in DA.font_mpl: # TODO: replace with self
170 | # character_per_inch = character_per_inch * 0.8
171 |
172 | # legend_width = max_label_length / character_per_inch
173 |
174 | # ### Add more for the markers
175 | # #' When the legend title (hue) is the largest, no space needed
176 | # if len(DA.dims.hue) != max_label_length:
177 | # # legend_width += 0.5 # TODO reactivate
178 | # print("added marker width")
179 |
180 | # return legend_width
181 |
--------------------------------------------------------------------------------
/tests/non_pytest_checks/legend_position.py:
--------------------------------------------------------------------------------
1 | """A helper script that generates plots of different sizes to test the
2 | things that highly depend on overall plot size, like legend positioning
3 | """
4 | # %%
5 |
6 | from matplotlib import pyplot as plt
7 | import matplotlib as mpl
8 | import plotastic as plst
9 | from plotastic import utils as ut
10 |
11 | ### Lower dpi
12 | plt.rcParams["figure.dpi"] = 70
13 |
14 |
15 | # %%
16 | # == Example Data ======================================================
17 | DF, dims = plst.load_dataset("tips", verbose=False)
18 | DA = plst.DataAnalysis(DF, dims=dims)
19 |
20 |
21 |
22 | # %%
23 | # == Utils: Legend Width ================================================
24 |
25 | if __name__ == "__main__":
26 | ### Test with legend
27 | DA.plot().edit_legend()
28 | plt.close()
29 | legend = DA.legend
30 | legend_box = legend.get_tightbbox()
31 | legend_width = ut.get_bbox_width(legend_box)
32 |
33 | ### Test with axes
34 | box = DA.axes[0][0].get_tightbbox()
35 | box.extents
36 | fig_width = ut.get_bbox_width(box)
37 |
38 |
39 | # %%
40 | # == Legend ============================================================
41 | """
42 | We want the legend to be `loc="center right"`. But that setting is bad,
43 | we need to use bbox_to_anchor, otherwise the legend will be outside the
44 | figure. However, With increasing figure width, the legend to drift away
45 | from the figure. That effect is more drastic when bbox_to_anchor has
46 | larger numbers than (1.0, 0.5). So we need to adjust borderaxespad
47 | depending on figure width.
48 | But that was also bad. I opted to stretch the figure instead and then
49 | adjust the subplot size to fit the legend.
50 | """
51 |
52 |
53 |
54 | def check_legend():
55 | label_list = [
56 | ["yes", "no"],
57 | ["ad", "saaaaaaaaaaaaaaa"],
58 | ["ad", "saaaaa\naaaaaaaaaa"],
59 | ]
60 |
61 | for i in range(20):
62 | for labels in label_list:
63 | # print(labels)
64 | width = i + 1
65 |
66 | ### Plot
67 | (DA.subplots(figsize=(width, 3)).fillaxes(kind="strip", dodge=True))
68 |
69 | DA.edit_legend(
70 | labels=labels,
71 | # borderaxespad=None,
72 | # loc="center right",
73 | # bbox_to_anchor=None,
74 | )
75 | ### Legend Positioning
76 | # _adjust_fig_to_fit_legend(DA, labels=labels)
77 |
78 | # print()
79 | plt.suptitle(f"width={width}", y=1.1)
80 | # plt.close()
81 |
82 |
83 | if __name__ == "__main__":
84 | print(mpl.rcParams["legend.fontsize"])
85 | # plst.set_style("paper")
86 | plst.set_style("classic")
87 | # plst.print_styles()
88 | check_legend()
89 |
90 |
91 |
92 |
--------------------------------------------------------------------------------
/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | filterwarnings =
3 | error
4 | ignore::UserWarning
5 | ignore::DeprecationWarning
6 | ignore::FutureWarning
7 | testpaths =
8 | tests
--------------------------------------------------------------------------------
/tests/run_tests_in_new_env.py:
--------------------------------------------------------------------------------
1 | """A helper script to execute tests in a new virtual environment. Not
2 | needed if tomltovenv is used to create the virtual environment."""
3 | #
4 | # %%
5 | import os
6 | import shutil
7 | import venv
8 |
9 | #%%
10 | ### Delete environment if present
11 | if os.path.exists("../venv_not_e"):
12 | shutil.rmtree("../venv_not_e")
13 |
14 | # %%
15 | ### Create virtual environment
16 | # !! we're inside the tests folder
17 | venv.create(env_dir="../venv_not_e", clear=True, with_pip=True)
18 |
19 | #%%
20 | ! source venv_not_e/bin/activate
21 |
22 | #%%
23 | ### Install non editable for testing
24 | ! pip install -r requirements.txt
25 | ! pip install git+https://github.com/markur4/plotastic.git
26 | ! pip install pytest pytest-cov ipytest
27 |
28 | # %%
29 | # !! Coverage requires editable mode
30 | ! pytest
31 |
32 | # # ! pytest --cov --cov-report=xml
33 |
34 | # %%
35 |
--------------------------------------------------------------------------------