├── .coveragerc ├── .gitattributes ├── .github ├── workflows │ └── codecov.yml └── workflows_inactive │ └── codecov.yml ├── .gitignore ├── CHANGELOGS.md ├── CODE_OF_CONDUCT.md ├── EXAMPLES ├── attention.ipynb ├── attention1.png ├── cars.ipynb ├── cars1.png ├── diamonds.ipynb ├── diamonds1.png ├── diamonds2.png ├── fmri.ipynb ├── fmri.xlsx ├── fmri1.png ├── fmri2.png ├── iris.ipynb ├── iris1.png ├── qpcr(presentation).ipynb ├── qpcr.ipynb ├── qpcr1.png ├── tips.ipynb └── tips1.png ├── HOW_TO_USE ├── dimensions.ipynb ├── plot_editing.ipynb ├── plot_legend.ipynb ├── plot_multiplots.ipynb ├── plot_styles.ipynb └── plotting.ipynb ├── IMAGES └── LOGO │ ├── plotastic_logo.afdesign │ ├── plotastic_logo.png │ ├── plotastic_logo_2.afdesign │ ├── plotastic_logo_3.afdesign │ └── plotastic_logo_3.png ├── LICENSE ├── MANIFEST.in ├── README.md ├── class_diagram.mmd ├── class_diagram.svg ├── devtools ├── readme_for_pypi.py ├── setupvenv.py └── upload_to_pypi.sh ├── paper.bib ├── paper.md ├── paper.pdf ├── pyproject.toml ├── qpcr1.png ├── requirements.txt ├── src └── plotastic │ ├── __init__.py │ ├── caches.py │ ├── dataanalysis │ ├── __init__.py │ ├── annotator.py │ ├── dataanalysis.py │ └── filer.py │ ├── dimensions │ ├── __init__.py │ ├── dataframetool.py │ ├── dataintegrity.py │ ├── dims.py │ ├── dimsandlevels.py │ ├── hierarchical_dims.py │ └── subject.py │ ├── docstrings.py │ ├── example_data │ ├── __init__.py │ ├── data │ │ ├── fmri.xlsx │ │ ├── qpcr.xlsx │ │ └── tips.xlsx │ └── load_dataset.py │ ├── plotting │ ├── __init__.py │ ├── multiplot.py │ ├── plot.py │ ├── plotedits.py │ ├── rc.py │ ├── rc_utils.py │ └── subplot.py │ ├── py.typed │ ├── stat │ ├── __init__.py │ ├── assumptions.py │ ├── bivariate.py │ ├── omnibus.py │ ├── posthoc.py │ ├── statresults.py │ └── stattest.py │ └── utils │ ├── __init__.py │ ├── subcache.py │ └── utils.py ├── testing └── make_htmlcov.sh └── tests ├── DA_configs.py ├── DA_configs2.py ├── DA_utils.py ├── _annotator_test.py ├── _assumptions_test.py ├── _dims_test.py ├── _dimsandlevels_test.py ├── _filer_test.py ├── _hierarchical_dims_test.py ├── _load_dataset_test.py ├── _multiplot_test.py ├── _omnibus_test.py ├── _paper_test.py ├── _plotedits_test.py ├── _rc_test.py ├── _save_test.py ├── _utils_test.py ├── conftest.py ├── non_pytest_checks ├── hspace_wspace.py └── legend_position.py ├── pytest.ini └── run_tests_in_new_env.py /.coveragerc: -------------------------------------------------------------------------------- 1 | ; https://coverage.readthedocs.io/en/latest/config.html 2 | 3 | [paths] 4 | source = 5 | src 6 | ; */site-packages 7 | 8 | 9 | ; [run] 10 | ; parallel = true 11 | ; omit = tests/* 12 | ; branch = true 13 | ; source = 14 | ; sampleproject 15 | 16 | [report] 17 | show_missing = true 18 | precision = 1 19 | ; omit = *migrations* 20 | exclude_lines = 21 | if __name__ == .__main__.: 22 | if TYPE_CHECKING: -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/workflows/codecov.yml: -------------------------------------------------------------------------------- 1 | name: test coverage 2 | run-name: ${{ github.actor }} ${{ github.event_name }}; Setup testenv, run tests, upload coverage 3 | 4 | on: [push, pull_request] 5 | 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | name: Setup testenv, run tests, upload coverage 10 | steps: 11 | - uses: actions/checkout@v3 12 | - uses: actions/setup-python@v2 13 | with: 14 | python-version: '3.11' 15 | - name: Install requirements #' Using requirements.txt should be faster 16 | run: pip install -r requirements.txt 17 | - name: Install plotastic (in editable mode! Otherwise 0% coverage) 18 | run: pip install -e .[dev] 19 | - name: Run tests and collect coverage 20 | run: pytest tests --cov --cov-report=xml 21 | - name: Upload coverage reports to Codecov with GitHub Action 22 | uses: codecov/codecov-action@v3 23 | with: 24 | token: ${{ secrets.CODECOV_TOKEN }} 25 | env: 26 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} -------------------------------------------------------------------------------- /.github/workflows_inactive/codecov.yml: -------------------------------------------------------------------------------- 1 | name: Codecov 2 | run-name: ${{ github.actor }} uploaded coverage reports to Codecov 3 | 4 | on: [push, pull_request] 5 | 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | name: upload coverage reports to Codecov 10 | steps: 11 | - name: Upload coverage reports to Codecov 12 | uses: codecov/codecov-action@v3 13 | env: 14 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Figures 2 | IMAGES/CLASS_DIAGRAMS 3 | 4 | # Examples 5 | EXAMPLES/Ezgitest 6 | EXAMPLES/qpcr.pdf 7 | 8 | # Readme for Pypi 9 | README_pypi.md 10 | README_pypi.pdf 11 | README.pdf 12 | 13 | # setup_env 14 | setup_env.ipynb 15 | 16 | # DS_store 17 | .DS_Store 18 | 19 | # joblib cache 20 | .joblib_cache 21 | joblib_cache 22 | 23 | # Folder for storing not to publish things 24 | _experimental 25 | GITIGNORE 26 | GIT_IGNORE 27 | Dont_Publish 28 | DEVELOPMENT 29 | 30 | # !!! paper, publish it only when publishing 31 | paper.docx 32 | 33 | 34 | 35 | # vscode 36 | .vscode 37 | .vscode/ 38 | 39 | # Byte-compiled / optimized / DLL files 40 | __pycache__/ 41 | *.py[cod] 42 | *$py.class 43 | 44 | # C extensions 45 | *.so 46 | 47 | # Distribution / packaging 48 | .Python 49 | build/ 50 | develop-eggs/ 51 | dist/ 52 | downloads/ 53 | eggs/ 54 | .eggs/ 55 | lib/ 56 | lib64/ 57 | parts/ 58 | sdist/ 59 | var/ 60 | wheels/ 61 | share/python-wheels/ 62 | *.egg-info/ 63 | .installed.cfg 64 | *.egg 65 | MANIFEST 66 | 67 | # PyInstaller 68 | # Usually these files are written by a python script from a template 69 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 70 | *.manifest 71 | *.spec 72 | 73 | # Installer logs 74 | pip-log.txt 75 | pip-delete-this-directory.txt 76 | 77 | # Unit test / coverage reports 78 | htmlcov/ 79 | .tox/ 80 | .nox/ 81 | .coverage 82 | .coverage.* 83 | .cache 84 | nosetests.xml 85 | coverage.xml 86 | *.cover 87 | *.py,cover 88 | .hypothesis/ 89 | .pytest_cache/ 90 | cover/ 91 | 92 | # Translations 93 | *.mo 94 | *.pot 95 | 96 | # Django stuff: 97 | *.log 98 | local_settings.py 99 | db.sqlite3 100 | db.sqlite3-journal 101 | 102 | # Flask stuff: 103 | instance/ 104 | .webassets-cache 105 | 106 | # Scrapy stuff: 107 | .scrapy 108 | 109 | # Sphinx documentation 110 | docs/_build/ 111 | src/docs/ 112 | 113 | # PyBuilder 114 | .pybuilder/ 115 | target/ 116 | 117 | # Jupyter Notebook 118 | .ipynb_checkpoints 119 | 120 | # IPython 121 | profile_default/ 122 | ipython_config.py 123 | 124 | # pyenv 125 | # For a library or package, you might want to ignore these files since the code is 126 | # intended to run in multiple environments; otherwise, check them in: 127 | # .python-version 128 | 129 | # pipenv 130 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 131 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 132 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 133 | # install all needed dependencies. 134 | #Pipfile.lock 135 | 136 | # poetry 137 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 138 | # This is especially recommended for binary packages to ensure reproducibility, and is more 139 | # commonly ignored for libraries. 140 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 141 | #poetry.lock 142 | 143 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 144 | __pypackages__/ 145 | 146 | # Celery stuff 147 | celerybeat-schedule 148 | celerybeat.pid 149 | 150 | # SageMath parsed files 151 | *.sage.py 152 | 153 | # Environments 154 | .env 155 | .venv 156 | _venv 157 | env/ 158 | venv*/ 159 | ENV/ 160 | env.bak/ 161 | venv.bak/ 162 | 163 | # Spyder project settings 164 | .spyderproject 165 | .spyproject 166 | 167 | # Rope project settings 168 | .ropeproject 169 | 170 | # mkdocs documentation 171 | /site 172 | 173 | # mypy 174 | .mypy_cache/ 175 | .dmypy.json 176 | dmypy.json 177 | 178 | # Pyre type checker 179 | .pyre/ 180 | 181 | # pytype static type analyzer 182 | .pytype/ 183 | 184 | # Cython debug symbols 185 | cython_debug/ 186 | 187 | # PyCharm 188 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 189 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 190 | # and can be added to the global gitignore or merged into this file. For a more nuclear 191 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 192 | #.idea/ 193 | -------------------------------------------------------------------------------- /CHANGELOGS.md: -------------------------------------------------------------------------------- 1 | # 0.1.2 2 | ### Available on: 3 | - github 4 | 5 | ### New Features: 6 | - None yet 7 | 8 | 9 | 10 | # 0.1.1 11 | ### Available on: 12 | - github 13 | - PyPi 14 | 15 | ### New Features: 16 | - Runtime config `plotting.rc` 17 | - `set_style()` now passes all available matplotlib styles to `matplotlib.style.use()` 18 | 19 | ### Experimental Features: 20 | - Plot Paired Data by Joining subjects with line for each facet/x/hue-level! 21 | - To my knowledge, the solutions provided by matplolib or seaborn are 22 | way too difficult. 23 | - I implemented a solution that worked, but since I 24 | found a case where it didn't, this feature is experimental 25 | 26 | 27 | ### Changes: 28 | - Legends 29 | - Added `legend_kws` parameter to all multiplots 30 | - It seems strange to enforce `.edit_legend()` in chain 31 | - Also, the multiplot should decide, which legend should be 32 | displayed (e.g. by correct order of calling `.edit_legend()` 33 | inbetween or after `.fillaxes()`) 34 | 35 | ### Fixes: 36 | - Rewrote .edit_titles_with_func() becasue it didn't work 37 | - Legend is now outside of the plot no matter the figure width! 38 | 39 | ### Others: 40 | - Added Documentation notebooks to Readme 41 | 42 | 43 | # 0.1.0 - Initial Release 44 | ### Available on: 45 | - github 46 | - pypi 47 | 48 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our 7 | project and our community a harassment-free experience for everyone, 8 | regardless of age, body size, disability, ethnicity, gender identity and 9 | expression, level of experience, nationality, personal appearance, race, 10 | religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual 26 | attention or advances 27 | * Trolling, insulting/derogatory comments, and personal or political 28 | attacks 29 | * Public or private harassment 30 | * Publishing others' private information, such as a physical or 31 | electronic address, without explicit permission 32 | * Other conduct which could reasonably be considered inappropriate in a 33 | professional setting 34 | 35 | ## Our Responsibilities 36 | 37 | Project maintainers are responsible for clarifying the standards of 38 | acceptable behavior and are expected to take appropriate and fair 39 | corrective action in response to any instances of unacceptable behavior. 40 | 41 | Project maintainers have the right and responsibility to remove, edit, 42 | or reject comments, commits, code, wiki edits, issues, and other 43 | contributions that are not aligned to this Code of Conduct, or to ban 44 | temporarily or permanently any contributor for other behaviors that they 45 | deem inappropriate, threatening, offensive, or harmful. 46 | 47 | ## Scope 48 | 49 | This Code of Conduct applies both within project spaces and in public 50 | spaces when an individual is representing the project or its community. 51 | Examples of representing a project or community include using an 52 | official project e-mail address, posting via an official social media 53 | account, or acting as an appointed representative at an online or 54 | offline event. Representation of a project may be further defined and 55 | clarified by project maintainers. 56 | 57 | ## Enforcement 58 | 59 | Instances of abusive, harassing, or otherwise unacceptable behavior may 60 | be reported by contacting the project team at martin.kur4@gmail.com. The 61 | project team will review and investigate all complaints, and will 62 | respond in a way that it deems appropriate to the circumstances. The 63 | project team is obligated to maintain confidentiality with regard to the 64 | reporter of an incident. Further details of specific enforcement 65 | policies may be posted separately. 66 | 67 | Project maintainers who do not follow or enforce the Code of Conduct in 68 | good faith may face temporary or permanent repercussions as determined 69 | by other members of the project's leadership. 70 | 71 | ## Attribution 72 | 73 | This Code of Conduct is adapted from the [Contributor 74 | Covenant][homepage], version 1.4, available at 75 | [http://contributor-covenant.org/version/1/4][version] 76 | 77 | [homepage]: http://contributor-covenant.org 78 | [version]: http://contributor-covenant.org/version/1/4/ -------------------------------------------------------------------------------- /EXAMPLES/attention1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/attention1.png -------------------------------------------------------------------------------- /EXAMPLES/cars1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/cars1.png -------------------------------------------------------------------------------- /EXAMPLES/diamonds1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/diamonds1.png -------------------------------------------------------------------------------- /EXAMPLES/diamonds2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/diamonds2.png -------------------------------------------------------------------------------- /EXAMPLES/fmri.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/fmri.xlsx -------------------------------------------------------------------------------- /EXAMPLES/fmri1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/fmri1.png -------------------------------------------------------------------------------- /EXAMPLES/fmri2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/fmri2.png -------------------------------------------------------------------------------- /EXAMPLES/iris1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/iris1.png -------------------------------------------------------------------------------- /EXAMPLES/qpcr1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/qpcr1.png -------------------------------------------------------------------------------- /EXAMPLES/tips1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/EXAMPLES/tips1.png -------------------------------------------------------------------------------- /HOW_TO_USE/plot_editing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "tags": [] 7 | }, 8 | "source": [ 9 | "# Editing Plots\n", 10 | "Here we explain the low-end interface for plotting with plotastic." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [] 19 | } 20 | ], 21 | "metadata": { 22 | "language_info": { 23 | "name": "python" 24 | } 25 | }, 26 | "nbformat": 4, 27 | "nbformat_minor": 2 28 | } 29 | -------------------------------------------------------------------------------- /HOW_TO_USE/plot_styles.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Styles" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "### Get Example Data" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "data": { 24 | "text/html": [ 25 | "
\n", 26 | "\n", 39 | "\n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | "
Unnamed: 0subjecttimepointeventregionsignal
017s79stimparietal0.058897
136s89stimparietal0.170227
267s00stimfrontal-0.021452
384s10stimparietal-0.064454
4127s139stimparietal0.013245
\n", 99 | "
" 100 | ], 101 | "text/plain": [ 102 | " Unnamed: 0 subject timepoint event region signal\n", 103 | "0 17 s7 9 stim parietal 0.058897\n", 104 | "1 36 s8 9 stim parietal 0.170227\n", 105 | "2 67 s0 0 stim frontal -0.021452\n", 106 | "3 84 s1 0 stim parietal -0.064454\n", 107 | "4 127 s13 9 stim parietal 0.013245" 108 | ] 109 | }, 110 | "execution_count": 1, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "import plotastic as plst\n", 117 | "\n", 118 | "# Import Example Data\n", 119 | "DF, _dims = plst.load_dataset(\"fmri\", verbose=False)\n", 120 | "\n", 121 | "DF.head()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 2, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "===============================================================================\n", 134 | "#! Checking data integrity...\n", 135 | "✅ DATA COMPLETE: All combinations of levels from selected factors are present\n", 136 | " in the Dataframe, including x.\n", 137 | "✅ GROUPS COMPLETE: No groups with NaNs.\n", 138 | "✅ GROUPS EQUAL: All groups (40 total) have the same samplesize n = 14.0.\n", 139 | "🌳 LEVELS WELL CONNECTED: These Factors have levels that are always found\n", 140 | " together: ['region', 'event'].\n", 141 | " 👉 Call .levels_combocount() or .levels_dendrogram() to see them all.\n", 142 | "✅ Subjects complete: No subjects with missing data\n", 143 | "===============================================================================\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "# Define dimensions explicitly\n", 149 | "dims = dict(\n", 150 | " y=\"signal\",\n", 151 | " x=\"timepoint\",\n", 152 | " hue=\"event\",\n", 153 | " col=\"region\",\n", 154 | ")\n", 155 | "\n", 156 | "# Make DataAnalysis object\n", 157 | "DA = plst.DataAnalysis(\n", 158 | " data=DF, # Dataframe\n", 159 | " dims=dims, # Dictionary with y, x, hue, col, row\n", 160 | " subject=\"subject\", # Data is paired by subject (optional)\n", 161 | " verbose=True, # Print out Info about the Data\n", 162 | ")" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "## What Styles are there?" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "#### Print style names" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 3, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "name": "stdout", 186 | "output_type": "stream", 187 | "text": [ 188 | "plotastic:\n", 189 | "\t['default', 'paper']\n", 190 | "seaborn:\n", 191 | "\t['white', 'dark', 'whitegrid', 'darkgrid', 'ticks']\n", 192 | "matplotlib:\n", 193 | "\t['Solarize_Light2', '_classic_test_patch', '_mpl-gallery', '_mpl-gallery-nogrid', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-v0_8', 'seaborn-v0_8-bright', 'seaborn-v0_8-colorblind', 'seaborn-v0_8-dark', 'seaborn-v0_8-dark-palette', 'seaborn-v0_8-darkgrid', 'seaborn-v0_8-deep', 'seaborn-v0_8-muted', 'seaborn-v0_8-notebook', 'seaborn-v0_8-paper', 'seaborn-v0_8-pastel', 'seaborn-v0_8-poster', 'seaborn-v0_8-talk', 'seaborn-v0_8-ticks', 'seaborn-v0_8-white', 'seaborn-v0_8-whitegrid', 'tableau-colorblind10']\n" 194 | ] 195 | } 196 | ], 197 | "source": [ 198 | "plst.print_styles()" 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "#### Show all styles" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "## `plst.set_style()` takes styles from matplotlib, seaborn and plotastic!" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 4, 225 | "metadata": {}, 226 | "outputs": [], 227 | "source": [ 228 | "plst.set_style(\"paper\")" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "## Colors" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": null, 241 | "metadata": {}, 242 | "outputs": [], 243 | "source": [ 244 | "plst.set_palette([\"blue\", \"green\", \"red\"], verbose=False)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "metadata": {}, 250 | "source": [] 251 | } 252 | ], 253 | "metadata": { 254 | "kernelspec": { 255 | "display_name": "venv", 256 | "language": "python", 257 | "name": "python3" 258 | }, 259 | "language_info": { 260 | "codemirror_mode": { 261 | "name": "ipython", 262 | "version": 3 263 | }, 264 | "file_extension": ".py", 265 | "mimetype": "text/x-python", 266 | "name": "python", 267 | "nbconvert_exporter": "python", 268 | "pygments_lexer": "ipython3", 269 | "version": "3.11.6" 270 | } 271 | }, 272 | "nbformat": 4, 273 | "nbformat_minor": 2 274 | } 275 | -------------------------------------------------------------------------------- /IMAGES/LOGO/plotastic_logo.afdesign: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/IMAGES/LOGO/plotastic_logo.afdesign -------------------------------------------------------------------------------- /IMAGES/LOGO/plotastic_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/IMAGES/LOGO/plotastic_logo.png -------------------------------------------------------------------------------- /IMAGES/LOGO/plotastic_logo_2.afdesign: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/IMAGES/LOGO/plotastic_logo_2.afdesign -------------------------------------------------------------------------------- /IMAGES/LOGO/plotastic_logo_3.afdesign: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/IMAGES/LOGO/plotastic_logo_3.afdesign -------------------------------------------------------------------------------- /IMAGES/LOGO/plotastic_logo_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/IMAGES/LOGO/plotastic_logo_3.png -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | exclude \ 2 | src/plotastic/.vscode/*.json \ 3 | 4 | include \ 5 | # LICENSE \ 6 | # README.md \ 7 | # How_To_Use/*.ipynb \ 8 | # tests/pytest.ini \ 9 | # tests/*.py \ 10 | # tests/*_test/*.py \ 11 | # src/plotastic/py.typed \ 12 | # src/plotastic/dataanalysis/py.typed \ 13 | # src/plotastic/example_data/data/*.xlsx -------------------------------------------------------------------------------- /class_diagram.mmd: -------------------------------------------------------------------------------- 1 | 2 | 3 | classDiagram 4 | 5 | %% == ANALYSIS ============================================================== 6 | 7 | class pd_DataFrame{ 8 | ... 9 | ....() 10 | } 11 | class Dims { 12 | x: str 13 | y: str 14 | hue: str =None 15 | row: str =None 16 | col: str =None 17 | set(**kwargs, inplace: bool =False) 18 | switch(*keys, **kwargs inplace: bool =False) 19 | } 20 | class DimsAndLevels { 21 | 22 | data: pd.DataFrame 23 | dims: Dims 24 | 25 | %%_empty_groups(property) 26 | factors_all(property) list[x,y,hue,row,col] 27 | factors_xhue(property) list[x,hue] 28 | factors_rowcol(property) list[row,col] 29 | levels_dict_factor(property) dict[f1:[l1, l2, ...], f2:[...], ...] 30 | levelkeys(property) list[tuple[l1, l2], ...] 31 | ....() 32 | } 33 | class Subject{ 34 | subject: str 35 | subjectlist(property): list[str] 36 | ....() 37 | } 38 | class HierarchicalDims{ 39 | _factors_hierarchical(property) 40 | ... 41 | data_hierarchicize() 42 | ....() 43 | } 44 | class DataFrameTool{ 45 | verbose: bool =False 46 | levels: list[tuple[str]] =None 47 | catplot(kind="strip") -> sns.FacetGrid 48 | transform_y() -> self 49 | data_describe() -> pd.DataFrame 50 | data_categorize() -> self 51 | data_iter__key_facet(property) -> Generator 52 | ....() 53 | } 54 | class DataIntegrity{ 55 | data_check_integrity() 56 | ....() 57 | } 58 | 59 | 60 | pd_DataFrame *-- DimsAndLevels 61 | Dims *-- DimsAndLevels 62 | DimsAndLevels <|-- Subject 63 | Subject <|-- HierarchicalDims 64 | HierarchicalDims <|-- DataFrameTool 65 | DataFrameTool <|-- DataIntegrity 66 | DataIntegrity <|-- SubPlot 67 | DataIntegrity <|-- StatTest 68 | 69 | 70 | %% == STATISTICS ============================================================ 71 | 72 | class pingouin{ 73 | <> 74 | anova() 75 | rm_anova() 76 | pairwise_tests() 77 | ....() 78 | } 79 | class StatResults{ 80 | <> 81 | DF_normality: pd.DataFrame = "not tested" 82 | DF_homoscedasticity: pd.DataFrame = "not tested" 83 | DF_sphericity: pd.DataFrame = "not tested" 84 | DF_posthoc: pd.DataFrame = "not tested" 85 | DF_omnibus: pd.DataFrame = "not tested" 86 | DF_bivariate: pd.DataFrame = "not tested" 87 | ... 88 | normal(property):bool ="not assessed" 89 | homoscedastic(property):bool ="unknown" 90 | spherical(property):bool ="unknown" 91 | parametric(property):bool =None 92 | assess_normality() 93 | save() 94 | ....() 95 | } 96 | class StatTest{ 97 | <> 98 | ALPHA: float = 0.05 99 | ALPHA_TOLERANCE: float = 0.075 100 | results: StatResults 101 | ... 102 | set_alpha() 103 | set_alpha_tolerance() 104 | _p_to_stars(p: float) -> str 105 | _effectsize_to_words(effectsize: float) -> str 106 | ....() 107 | } 108 | class Assumptions{ 109 | ... 110 | check_normality() 111 | check_sphericity() 112 | check_homoscedasticity() 113 | ....() 114 | } 115 | class Omnibus{ 116 | ... 117 | omnibus_anova() 118 | omnibus_rmanova() 119 | omnibus_kruskal() 120 | ....() 121 | } 122 | class PostHoc{ 123 | ... 124 | test_pairwise(paired, parametric) 125 | ....() 126 | } 127 | class Bivariate{ 128 | ... 129 | test_pearson() 130 | test_spearman() 131 | test_kendall() 132 | ....() 133 | } 134 | 135 | StatResults *-- StatTest 136 | StatTest <|-- Assumptions 137 | 138 | Assumptions <|-- PostHoc 139 | Assumptions <|-- Omnibus 140 | Assumptions <|-- Bivariate 141 | pingouin .. Assumptions: Uses 142 | 143 | 144 | %% == PLOTTING ============================================================== 145 | 146 | class rc{ 147 | <> 148 | FONTSIZE 149 | STYLE_PAPER 150 | STYLE_PRESENTATION 151 | set_style() 152 | set_palette() 153 | } 154 | class matplotlib{ 155 | <> 156 | ... 157 | Axes 158 | Figure 159 | fig.legend() 160 | ....() 161 | } 162 | class SubPlot{ 163 | fig: mpl.figure.Figure 164 | axes: mpl.axes.Axes 165 | ... 166 | subplots() -> (fig, axes) 167 | fillaxes(kind="strip") -> (fig, axes) 168 | 169 | axes_nested(property) -> np.ndarray(axes).shape(1,1) 170 | axes_iter__key_ax(property) -> ax 171 | 172 | } 173 | class PlotEdits{ 174 | edit_titles(titles:dict) -> None 175 | edit_xy_axis_labels(labels:dict) -> None 176 | edit_yticklabels_log_minor(ticks:dict) -> None 177 | ...() 178 | } 179 | class Plot{ 180 | plot() 181 | plot_connect_subjects() 182 | ...() 183 | } 184 | class MultiPlot{ 185 | <> 186 | plot_box_strip() 187 | plot_bar_swarm() 188 | plot_qqplot() 189 | ...() 190 | } 191 | 192 | matplotlib *-- SubPlot 193 | matplotlib <.. rc: Configures 194 | SubPlot <|-- PlotEdits 195 | PlotEdits <|-- Plot 196 | Plot <|-- MultiPlot 197 | 198 | 199 | %% == DATAANALYSIS ========================================================== 200 | 201 | class Annotator{ 202 | _annotated: bool =False 203 | ... 204 | _check_include_exclude() 205 | iter__key_df_ax(PH:pd.DataFrame) -> Generator 206 | annotate_pairwise() 207 | ....() 208 | } 209 | class Filer{ 210 | <> 211 | title: str ="untitled" 212 | prevent_overwrite() 213 | } 214 | class DataAnalysis{ 215 | <> 216 | %% FIGURES DON'T NEED TITLES, WE EDIT THEM AFTERWARDS 217 | title = "untitled" 218 | filer: Filer 219 | ... 220 | title_add() 221 | save_statistics() 222 | ....() 223 | } 224 | 225 | MultiPlot <|-- Annotator 226 | Omnibus <|-- Annotator 227 | PostHoc <|-- Annotator 228 | Bivariate <|-- Annotator 229 | 230 | Filer *-- DataAnalysis 231 | 232 | Annotator <|-- DataAnalysis 233 | 234 | 235 | %% == Links ================================================================= 236 | 237 | %% dimensions 238 | click Dims href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/dims.py" 239 | click DimsAndLevels href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/dimsandlevels.py" 240 | click DataFrameTool href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/dataframetool.py" 241 | click HierarchicalDims href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/hierarchical_dims.py" 242 | click Subject href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/subject.py" 243 | click DataIntegrity href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dimensions/dataintegrity.py" 244 | 245 | %% stat 246 | click StatResults href "https://github.com/markur4/plotastic/blob/main/src/plotastic/stat/statresults.py" 247 | click StatTest href "https://github.com/markur4/plotastic/blob/main/src/plotastic/stat/stattest.py" 248 | click Assumptions href "https://github.com/markur4/plotastic/blob/main/src/plotastic/stat/assumptions.py" 249 | click Omnibus href "https://github.com/markur4/plotastic/blob/main/src/plotastic/stat/omnibus.py" 250 | click PostHoc href "https://github.com/markur4/plotastic/blob/main/src/plotastic/stat/posthoc.py" 251 | 252 | %% plotting 253 | click rc href "https://github.com/markur4/plotastic/blob/main/src/plotastic/plotting/rc.py" 254 | click SubPlot href "https://github.com/markur4/plotastic/blob/main/src/plotastic/plotting/SubPlot.py" 255 | click Plot href "https://github.com/markur4/plotastic/blob/main/src/plotastic/plotting/plot.py" 256 | click PlotEdits href "https://github.com/markur4/plotastic/blob/main/src/plotastic/plotting/plotedits.py" 257 | click MultiPlot href "https://github.com/markur4/plotastic/blob/main/src/plotastic/plotting/multiplot.py" 258 | 259 | %% dataanalysis 260 | click Annotator href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dataanalysis/annotator.py" 261 | click Filer href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dataanalysis/filer.py" 262 | click DataAnalysis href "https://github.com/markur4/plotastic/blob/main/src/plotastic/dataanalysis/dataanalysis.py" 263 | 264 | 265 | 266 | -------------------------------------------------------------------------------- /devtools/readme_for_pypi.py: -------------------------------------------------------------------------------- 1 | """Removes Parts from README.md that PyPi can't handle by removing parts 2 | enclosed by a marker line""" 3 | 4 | # %% 5 | import argparse 6 | 7 | # %% 8 | 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("-i", "--input", default="README.md") 12 | 13 | 14 | 15 | # %% 16 | def open_readme(path: str) -> str: 17 | with open(path, "r") as f: 18 | README = f.read() 19 | return README 20 | 21 | 22 | def write_readme(path: str, text: str) -> None: 23 | with open(path, "w") as f: 24 | f.write(text) 25 | 26 | 27 | # %% 28 | if __name__ == "__main__": 29 | # readme_in = os.path.join("..", "README.md") # :: 30 | # readme_out = os.path.join("..", "README_pypi.md") # :: 31 | args = parser.parse_args() # :: 32 | readme_in = args.input # :: 33 | readme_out = "README_pypi.md" # :: 34 | 35 | README = open_readme(readme_in) 36 | print(README) 37 | 38 | # %% 39 | split = README.split("") 40 | split 41 | # %% 42 | ### Remove those parts from readme that end with 43 | split_r = [p for p in split if not "" in p] 44 | 45 | joined = "\n".join(split_r) 46 | 47 | # %% 48 | write_readme(readme_out, joined) 49 | -------------------------------------------------------------------------------- /devtools/setupvenv.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% 3 | from pathlib import Path 4 | import shutil 5 | import venv 6 | import subprocess as sp 7 | 8 | 9 | # %% 10 | ### Define paths 11 | def makepath(*args) -> str: 12 | return str(Path(*args)) 13 | 14 | # :: Check Project Root! 15 | ROOT = ".." 16 | 17 | PROJ = makepath(ROOT, ".") # > Project location for editable install 18 | VENV = makepath(ROOT, "venv") # > Virtual environment location 19 | PYTHON = makepath(ROOT, "venv", "bin", "python") # > Python executable 20 | REQUIREMENTS = makepath(ROOT, "requirements.txt") 21 | 22 | 23 | # %% 24 | ### make virtual environment 25 | # > Delete venv if it exists 26 | if Path(VENV).exists(): 27 | shutil.rmtree(VENV) 28 | 29 | venv.create(VENV, with_pip=True) 30 | 31 | 32 | # %% 33 | ### Install this project 34 | sp.run([PYTHON, "-m", "pip", "install", "-e", PROJ]) 35 | # %% 36 | ### Create requirements.txt 37 | with open(REQUIREMENTS, "w") as f: 38 | sp.call( 39 | [ 40 | PYTHON, 41 | "-m", 42 | "pip", 43 | "freeze", 44 | "--exclude-editable", 45 | "-l", 46 | ">", 47 | REQUIREMENTS, 48 | ], 49 | stdout=f, 50 | ) 51 | # %% 52 | ### Install devtools 53 | sp.run([PYTHON, "-m", "pip", "install", "-e", f"{PROJ}[dev]"]) 54 | 55 | 56 | # %% 57 | #:: Switch to venv !! ================================================== 58 | # %% 59 | ### test packages 60 | import numpy as np 61 | 62 | np.__version__ 63 | 64 | # %% 65 | import pytest 66 | 67 | pytest.__version__ 68 | 69 | 70 | # %% 71 | ### Make a user venv 72 | import venv 73 | venv.create("venv_user", with_pip=True) -------------------------------------------------------------------------------- /devtools/upload_to_pypi.sh: -------------------------------------------------------------------------------- 1 | 2 | # Convert README.md to README_pypi.md 3 | python devtools/readme_for_pypi.py -i README.md 4 | 5 | # Update version on pyproject.toml 6 | # !! Don't do this, removes comments 7 | # python devtools/update_version.py -i pyproject.toml -o pyproject_test.toml 8 | 9 | # Remove old build 10 | rm -r dist 11 | 12 | # BUILD 13 | python -m build 14 | 15 | # Require API token if two-factor was enabled 16 | # provide API token as password 17 | twine upload --repository testpypi dist/* --username __token__ 18 | 19 | # UPLOAD TO REAL PyPi 20 | twine upload dist/* --username __token__ 21 | 22 | # use this in a venv 23 | pip install -i https://test.pypi.org/simple/ plotastic -------------------------------------------------------------------------------- /paper.bib: -------------------------------------------------------------------------------- 1 | @misc{charlierTrevismdStatannotationsV02022, 2 | title = {Trevismd/Statannotations: V0.5}, 3 | shorttitle = {Trevismd/Statannotations}, 4 | author = {Charlier, Florian and Weber, Marc and Izak, Dariusz and Harkin, Emerson and Magnus, Marcin and Lalli, Joseph and Fresnais, Louison and Chan, Matt and Markov, Nikolay and Amsalem, Oren and Proost, Sebastian and {Agamemnon Krasoulis} and {Getzze} and Repplinger, Stefan}, 5 | year = {2022}, 6 | month = oct, 7 | doi = {10.5281/ZENODO.7213391}, 8 | url = {https://zenodo.org/record/7213391}, 9 | urldate = {2023-11-16}, 10 | abstract = {Add scipy's Brunner-Munzel test Fix applying statannotations for non-string group labels (Issue \#65) Get Zenodo DOI}, 11 | copyright = {Open Access}, 12 | howpublished = {Zenodo} 13 | } 14 | 15 | @article{hunterMatplotlib2DGraphics2007, 16 | title = {Matplotlib: {{A 2D Graphics Environment}}}, 17 | shorttitle = {Matplotlib}, 18 | author = {Hunter, John D.}, 19 | year = {2007}, 20 | month = may, 21 | journal = {Computing in Science \& Engineering}, 22 | volume = {9}, 23 | number = {3}, 24 | pages = {90--95}, 25 | issn = {1558-366X}, 26 | doi = {10.1109/MCSE.2007.55}, 27 | url = {https://ieeexplore.ieee.org/document/4160265}, 28 | urldate = {2023-11-15}, 29 | abstract = {Matplotlib is a 2D graphics package used for Python for application development, interactive scripting,and publication-quality image generation across user interfaces and operating systems}, 30 | file = {/Users/martinkuric/Zotero/storage/W4FJZDNY/§-hunterMatplotlib2DGraphics2007.pdf;/Users/martinkuric/Zotero/storage/GW3HZZHR/4160265.html} 31 | } 32 | 33 | @inproceedings{mckinneyDataStructuresStatistical2010, 34 | title = {Data {{Structures}} for {{Statistical Computing}} in {{Python}}}, 35 | author = {McKinney, Wes}, 36 | year = {2010}, 37 | month = jan, 38 | pages = {56--61}, 39 | doi = {10.25080/Majora-92bf1922-00a} 40 | } 41 | 42 | @article{mckinneyPandasFoundationalPython2011, 43 | title = {Pandas: A {{Foundational Python Library}} for {{Data Analysis}} and {{Statistics}}}, 44 | shorttitle = {Pandas}, 45 | author = {Mckinney, Wes}, 46 | year = {2011}, 47 | month = jan, 48 | journal = {Python High Performance Science Computer}, 49 | abstract = {---In this paper we will discuss pandas, a Python library of rich data structures and tools for working with structured data sets common to statistics, finance, social sciences, and many other fields. The library provides integrated, intuitive routines for performing common data manipulations and analysis on such data sets. It aims to be the foundational layer for the future of statistical computing in Python. It serves as a strong complement to the existing scientific Python stack while implementing and improving upon the kinds of data manipulation tools found in other statistical programming languages such as R. In addition to detailing its design and features of pandas, we will discuss future avenues of work and growth opportunities for statistics and data analysis applications in the Python language.}, 50 | file = {/Users/martinkuric/Zotero/storage/IH5C5UZ3/§-mckinneyPandasFoundationalPython2011.pdf} 51 | } 52 | 53 | @misc{reback2020pandas, 54 | title = {Pandas-Dev/Pandas: {{Pandas}}}, 55 | author = {The Pandas Development Team}, 56 | year = {2020}, 57 | month = feb, 58 | doi = {10.5281/zenodo.3509134}, 59 | url = {https://doi.org/10.5281/zenodo.3509134}, 60 | howpublished = {Zenodo} 61 | } 62 | 63 | @article{vallatPingouinStatisticsPython2018, 64 | title = {Pingouin: Statistics in {{Python}}}, 65 | shorttitle = {Pingouin}, 66 | author = {Vallat, Raphael}, 67 | year = {2018}, 68 | month = nov, 69 | journal = {Journal of Open Source Software}, 70 | volume = {3}, 71 | number = {31}, 72 | pages = {1026}, 73 | issn = {2475-9066}, 74 | doi = {10.21105/joss.01026}, 75 | url = {https://joss.theoj.org/papers/10.21105/joss.01026}, 76 | urldate = {2023-05-29}, 77 | abstract = {Vallat, (2018). Pingouin: statistics in Python. Journal of Open Source Software, 3(31), 1026, https://doi.org/10.21105/joss.01026}, 78 | langid = {english}, 79 | file = {/Users/martinkuric/Zotero/storage/ECARCXLJ/§-vallatPingouinStatisticsPython2018.pdf} 80 | } 81 | 82 | @article{waskomSeabornStatisticalData2021, 83 | title = {Seaborn: Statistical Data Visualization}, 84 | shorttitle = {Seaborn}, 85 | author = {Waskom, Michael L.}, 86 | year = {2021}, 87 | month = apr, 88 | journal = {Journal of Open Source Software}, 89 | volume = {6}, 90 | number = {60}, 91 | pages = {3021}, 92 | issn = {2475-9066}, 93 | doi = {10.21105/joss.03021}, 94 | url = {https://joss.theoj.org/papers/10.21105/joss.03021}, 95 | urldate = {2023-03-26}, 96 | abstract = {Waskom, M. L., (2021). seaborn: statistical data visualization. Journal of Open Source Software, 6(60), 3021, https://doi.org/10.21105/joss.03021}, 97 | langid = {english}, 98 | file = {/Users/martinkuric/Zotero/storage/2ZWPNQDG/§-waskomSeabornStatisticalData2021.pdf} 99 | } 100 | 101 | @article{wickhamTidyData2014a, 102 | title = {Tidy {{Data}}}, 103 | author = {Wickham, Hadley}, 104 | year = {2014}, 105 | month = sep, 106 | journal = {Journal of Statistical Software}, 107 | volume = {59}, 108 | pages = {1--23}, 109 | issn = {1548-7660}, 110 | doi = {10.18637/jss.v059.i10}, 111 | url = {https://doi.org/10.18637/jss.v059.i10}, 112 | urldate = {2023-11-15}, 113 | abstract = {A huge amount of effort is spent cleaning data to get it ready for analysis, but there has been little research on how to make data cleaning as easy and effective as possible. This paper tackles a small, but important, component of data cleaning: data tidying. Tidy datasets are easy to manipulate, model and visualize, and have a specific structure: each variable is a column, each observation is a row, and each type of observational unit is a table. This framework makes it easy to tidy messy datasets because only a small set of tools are needed to deal with a wide range of un-tidy datasets. This structure also makes it easier to develop tidy tools for data analysis, tools that both input and output tidy datasets. The advantages of a consistent data structure and matching tools are demonstrated with a case study free from mundane data manipulation chores.}, 114 | copyright = {Copyright (c) 2013 Hadley Wickham}, 115 | langid = {english} 116 | } 117 | -------------------------------------------------------------------------------- /paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/paper.pdf -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | #' https://setuptools.readthedocs.io/en/latest/setuptools.html#metadata 2 | 3 | 4 | # https://setuptools.pypa.io/en/latest/references/keywords.html 5 | 6 | ### LAYOUT: 7 | #' project_root/ # Project root: 'plotastic' 8 | #' ├── .gitattributes 9 | #' ├── .gitignore 10 | #' ├── LICENSE 11 | #' ├── MANIFEST.in 12 | #' ├── README.md 13 | #' ├── pyproject.toml 14 | #' ├── requirements.txt 15 | #' ├── (setup.cfg) # No longer needed, but still supported 16 | #' ├── (paper.md) # For publication 17 | #' ├── ... 18 | #' └── src/ # Source root 19 | #' └── package/ # Package root: 'plotastic' 20 | #' ├── __init__.py 21 | #' ├── .vscode 22 | #' ├── py.typed 23 | #' ├── ... 24 | #' ├── (module.py) 25 | #' ├── subpkg1/ # Subpackage root: 'plotastic.dimensions' 26 | #' │ ├── __init__.py 27 | #' │ ├── ... 28 | #' │ └── module1.py 29 | #' └── subpkg2/ # Subpackage root: 'plotastic.plotting' 30 | #' ├── __init__.py 31 | #' ├── ... 32 | #' └── module2.py 33 | 34 | [build-system] # ======================================================= 35 | requires = ["setuptools", "setuptools-scm"] 36 | build-backend = "setuptools.build_meta" 37 | 38 | 39 | [project] # ============================================================ 40 | name = "plotastic" 41 | version = "0.1.1" # ' ... 42 | authors = [{ name = "Martin Kuric", email = "martin.kur4@gmail.com" }] 43 | description = "Streamlining statistical analysis by using plotting keywords in Python." 44 | readme = "README_pypi.md" 45 | license = { file = "LICENSE" } # ' or { text = "GPLv3" } 46 | keywords = [ 47 | "plotting", 48 | "statistics", 49 | "data analysis", 50 | "data visualization", 51 | "data science", 52 | "data", 53 | "science", 54 | "visualization", 55 | ] 56 | classifiers = [ 57 | "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 58 | "Development Status :: 5 - Production/Stable", 59 | "Framework :: IPython", 60 | "Framework :: Jupyter", 61 | "Intended Audience :: Science/Research", 62 | "Intended Audience :: Education", 63 | "Intended Audience :: Healthcare Industry", 64 | "Intended Audience :: Financial and Insurance Industry", 65 | "Topic :: Scientific/Engineering", 66 | "Topic :: Scientific/Engineering :: Visualization", 67 | "Topic :: Scientific/Engineering :: Information Analysis", 68 | "Topic :: Scientific/Engineering :: Bio-Informatics", 69 | "Programming Language :: Python :: 3.11", 70 | ] 71 | ### Python version 72 | #' Lower versions than 3.11 have not been tested 73 | requires-python = ">=3.11" 74 | 75 | ### Dependencies 76 | #' Specify version only if concrete incompatibilities exist 77 | dependencies = [ 78 | #* Core 79 | "numpy", 80 | "pandas==1.5.3", #!! pingouin Not working with pandas 2.0 yet 81 | # * Plotting 82 | "matplotlib", 83 | "seaborn<=0.12.2", #!! 0.13 has issues with hue 84 | "Pillow>=10.2.0", #!! github security risk 85 | #* Statistics 86 | "scipy", 87 | # "statannot", #' Superseded by statannotations 88 | "statannotations", 89 | "pingouin", 90 | #* Excel 91 | "xlsxwriter", #' For saving results to excel 92 | "openpyxl", #' Optional for Pandas, but error when not installed 93 | #* Misc 94 | "joblib", #' Caching 95 | "colour", #' For custom colour maps 96 | "ipynbname", #' Used by utils 97 | "icecream", #' Better than print (and maybe later logging) 98 | ] 99 | 100 | ### Dynamic fields 101 | # dynamic = ["version"] 102 | 103 | 104 | [project.optional-dependencies] # ====================================== 105 | ### Install with: 106 | # ' $ pip install sampleproject[dev] 107 | dev = [ 108 | "pytest", 109 | "ipytest", 110 | "pytest-cov", # * Displays how much of code was covered by testing 111 | "pytest-xdist", # * Parallel testing 112 | "nbconvert", # * For converting notebooks to markdown 113 | "build", # * For building the package into dist 114 | "twine", # * For uploading to PyPI 115 | ] 116 | 117 | 118 | [project.urls] # ======================================================= 119 | "Homepage" = "https://github.com/markur4/plotastic" 120 | "Documentation" = "https://github.com/markur4/plotastic" 121 | "Source Code" = "https://github.com/markur4/plotastic" 122 | "Bug Reports" = "https://github.com/markur4/plotastic/issues" 123 | # "Funding" = "https://donate.pypi.org" 124 | 125 | 126 | [tool.setuptools] # ==================================================== 127 | # package-data = { "example_data" = ["*.xlsx"]} 128 | include-package-data = true # ? Defaults to true, should I keep this? 129 | 130 | [tool.setuptools.packages.find] 131 | where = ["src"] # ? it also worked without this..? 132 | 133 | ### Package-data handled in MANIFEST.in 134 | # [tool.setuptools.exclude-package-data] 135 | # plotastic = [".vscode"] 136 | 137 | # [tool.setuptools.package-data] 138 | # "*" = ["LICENSE"] 139 | # plotastic = ["example_data/*.xlsx"] 140 | -------------------------------------------------------------------------------- /qpcr1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/qpcr1.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appnope==0.1.3 2 | asttokens==2.4.1 3 | colorama==0.4.6 4 | colour==0.1.5 5 | comm==0.2.1 6 | contourpy==1.2.0 7 | cycler==0.12.1 8 | debugpy==1.8.0 9 | decorator==5.1.1 10 | et-xmlfile==1.1.0 11 | executing==2.0.1 12 | fonttools==4.47.2 13 | icecream==2.1.3 14 | ipykernel==6.29.0 15 | ipynbname==2023.2.0.0 16 | ipython==8.20.0 17 | jedi==0.19.1 18 | joblib==1.3.2 19 | jupyter_client==8.6.0 20 | jupyter_core==5.7.1 21 | kiwisolver==1.4.5 22 | matplotlib==3.8.2 23 | matplotlib-inline==0.1.6 24 | nest-asyncio==1.6.0 25 | numpy==1.26.3 26 | openpyxl==3.1.2 27 | packaging==23.2 28 | pandas==1.5.3 29 | pandas-flavor==0.6.0 30 | parso==0.8.3 31 | patsy==0.5.6 32 | pexpect==4.9.0 33 | pillow==10.2.0 34 | pingouin==0.5.4 35 | platformdirs==4.1.0 36 | prompt-toolkit==3.0.43 37 | psutil==5.9.8 38 | ptyprocess==0.7.0 39 | pure-eval==0.2.2 40 | Pygments==2.17.2 41 | pyparsing==3.1.1 42 | python-dateutil==2.8.2 43 | pytz==2023.3.post1 44 | pyzmq==25.1.2 45 | scikit-learn==1.4.0 46 | scipy==1.12.0 47 | seaborn==0.11.2 48 | six==1.16.0 49 | stack-data==0.6.3 50 | statannotations==0.6.0 51 | statsmodels==0.14.1 52 | tabulate==0.9.0 53 | threadpoolctl==3.2.0 54 | tornado==6.4 55 | traitlets==5.14.1 56 | wcwidth==0.2.13 57 | xarray==2024.1.1 58 | XlsxWriter==3.1.9 59 | -------------------------------------------------------------------------------- /src/plotastic/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # == Metadata ========================================================== 3 | from importlib import metadata 4 | 5 | # ? https://packaging.python.org/guides/single-sourcing-package-version/ 6 | # ? Do we need this? 7 | __version__ = metadata.version(__name__) 8 | __author__ = "markur4" 9 | 10 | 11 | # == Flatten module access ============================================ 12 | from .plotting.rc_utils import set_style, print_styles, set_palette 13 | from .dataanalysis.dataanalysis import DataAnalysis 14 | from .example_data.load_dataset import load_dataset 15 | from .utils import utils 16 | 17 | 18 | # == __all__ =========================================================== 19 | __all__ = [ 20 | DataAnalysis, 21 | set_style, 22 | print_styles, 23 | set_palette, 24 | load_dataset, 25 | ] 26 | -------------------------------------------------------------------------------- /src/plotastic/caches.py: -------------------------------------------------------------------------------- 1 | # %% 2 | 3 | 4 | import os 5 | from plotastic.utils.subcache import SubCache 6 | 7 | # %% 8 | ### Define Home 9 | home = os.path.join( 10 | os.path.expanduser("~"), 11 | ".cache", 12 | ) 13 | 14 | # == Define SubCaches ================================================== 15 | #' Define a Memory object for different purposes 16 | MEMORY_UTILS = SubCache( 17 | location=home, 18 | assert_parent=".cache", 19 | subcache_dir="plotastic_utils", 20 | ) 21 | # MEMORY_PLOTTING = SubCache( 22 | # location=home, 23 | # assert_parent=".cache", 24 | # subcache_dir="plotastic_plotting", 25 | # ) 26 | 27 | ### Cache like this: 28 | # def sleep(seconds): 29 | # import time 30 | # time.sleep(seconds) 31 | 32 | # sleep = caches.MEMORY_UTILS.subcache(sleep) 33 | 34 | # == Utilities ==================================== 35 | if __name__ == "__main__": 36 | pass 37 | # %% 38 | ### View Contents 39 | MEMORY_UTILS.list_dirs() 40 | # %% 41 | # Clear Caches 42 | # MEMORY_UTILS.clear() 43 | -------------------------------------------------------------------------------- /src/plotastic/dataanalysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/dataanalysis/__init__.py -------------------------------------------------------------------------------- /src/plotastic/dataanalysis/dataanalysis.py: -------------------------------------------------------------------------------- 1 | # !! 2 | 3 | # %% Imports 4 | from typing import TYPE_CHECKING 5 | 6 | from copy import deepcopy 7 | 8 | # from typing import Self # !! only for python 3.11. Not really needed, since "DataAnalysis" as typehint works with vscode 9 | 10 | from pathlib import Path 11 | import pickle 12 | 13 | import pandas as pd 14 | 15 | import matplotlib.pyplot as plt 16 | from matplotlib.figure import Figure 17 | 18 | from plotastic import docstrings 19 | 20 | # import markurutils as ut 21 | import plotastic.utils.utils as ut 22 | from plotastic.dataanalysis.annotator import Annotator 23 | from plotastic.dataanalysis.filer import Filer 24 | 25 | # from statresult import StatResult 26 | if TYPE_CHECKING: 27 | import matplotlib as mpl 28 | from matplotlib.transforms import Bbox 29 | 30 | # %% Class DataAnalysis 31 | 32 | 33 | class DataAnalysis(Annotator): 34 | # == __init__ ====================================================== 35 | def __init__( 36 | self, 37 | data: pd.DataFrame, 38 | dims: dict, 39 | subject: str = None, 40 | levels: list[tuple[str]] = None, 41 | title: str = "untitled", 42 | verbose=True, 43 | ) -> "DataAnalysis": 44 | ### Inherit 45 | # !! verbosity set to False, since each subclass shouldn't test its own DataFrame 46 | dataframetool_kws = dict( 47 | data=data, 48 | dims=dims, 49 | subject=subject, 50 | levels=levels, 51 | ) 52 | super().__init__(**dataframetool_kws) 53 | 54 | self._title = title 55 | self.filer = Filer(title=title) 56 | 57 | if verbose: 58 | self.data_check_integrity() 59 | 60 | # self.plot = plot 61 | ### statistics 62 | # self.test = Test() 63 | 64 | # == 65 | # == TITLE ========================================================= 66 | 67 | @property 68 | def title(self) -> str: 69 | return self._title 70 | 71 | @title.setter 72 | def title(self, value): 73 | self._title = value 74 | self.filer.title = value 75 | 76 | def title_add( 77 | self, 78 | to_end: str = "", 79 | to_start: str = "", 80 | con: str = "_", 81 | inplace=False, 82 | ) -> "DataAnalysis": 83 | """Adds string to start and/or end of title 84 | 85 | 86 | :param to_start: str, optional (default="") 87 | String to add to start of title 88 | :param to_end: str, optional (default="") 89 | String to add to end of title 90 | :param con: str, optional (default="_") 91 | Conjunction-character to put between string addition and original title 92 | :return: str 93 | """ 94 | a: "DataAnalysis" = self if inplace else deepcopy(self) 95 | 96 | if to_start: 97 | a.title = f"{to_start}{con}{a.title}" 98 | if to_end: 99 | a.title = f"{a.title}{con}{to_end}" 100 | return a 101 | 102 | # == 103 | # == Saving stuff ================================================== 104 | 105 | @docstrings.subst(param_overwrite=docstrings.param_overwrite) 106 | def save_statistics( 107 | self, 108 | fname: str = "plotastic_results", 109 | overwrite: str | bool = "day", 110 | ) -> None: 111 | """Exports all statistics to one excel file. Different sheets for different 112 | tests 113 | 114 | :param overwrite: {param_overwrite} 115 | :param out: Path to save excel file, optional (default="") 116 | :type out: str, optional 117 | """ 118 | 119 | ### Overwrite Protection 120 | if (not overwrite and not overwrite is None) or isinstance( 121 | overwrite, str 122 | ): 123 | fname = self.filer.prevent_overwrite( 124 | fname=fname, overwrite=overwrite 125 | ) 126 | 127 | ### Save Statistics 128 | self.results.save(fname=fname) 129 | 130 | # @docstrings.subst(param_overwrite=docstrings.param_overwrite) 131 | # def save_fig( 132 | # self, 133 | # fname: str | Path = "plotastic_results", 134 | # format: str = "pdf", 135 | # fig: Figure = None, 136 | # overwrite: str | bool = "day", #' Added overwrite protection 137 | # dpi: int | str = 300, # !! mpl default is "figure" 138 | # bbox_inches: "str | Bbox" = "tight", 139 | # pad_inches: float = 0.1, 140 | # facecolor: str = "none", # !! mpl default is "auto", using current figure facecolor 141 | # edgecolor: str = "none", # !! mpl default is "auto", using current figure edgecolor 142 | # backend: str = None, 143 | # **user_kwargs, 144 | # ) -> "DataAnalysis": 145 | # """Calls plt.figure.Figure.savefig(). Also provides an overwrite protection 146 | 147 | # {param_overwrite} 148 | # :param fname: A path, or a Python file-like object. If format is set, it 149 | # determines the output format, and the file is saved as fname. Note that 150 | # fname is used verbatim, and there is no attempt to make the extension, if 151 | # any, of fname match format, and no extension is appended. 152 | 153 | # If format is not set, then the format is inferred from the extension of 154 | # fname, if there is one. If format is not set and fname has no extension, 155 | # then the file is saved with rcParams["savefig.format"] (default: 'png') and 156 | # the appropriate extension is appended to fname., defaults to 157 | # "plotastic_results" 158 | # :type fname: str | path.Path, optional 159 | # :param format: The file format, e.g. 'png', 'pdf', 'svg', ... The behavior when 160 | # this is unset is documented under fname., defaults to "pdf" 161 | # :type format: str, optional 162 | # :param dpi: The resolution in dots per inch. If 'figure', use the figure's dpi 163 | # value., defaults to 300 164 | # :type dpi: int, optional 165 | # :param bbox_inches: Bounding box in inches: only the given portion of the figure 166 | # is saved. If 'tight', try to figure out the tight bbox of the figure., 167 | # defaults to "tight" 168 | # :type bbox_inches: str | plt.Bbox, optional 169 | # :param pad_inches: Amount of padding in inches around the figure when 170 | # bbox_inches is 'tight'. If 'layout' use the padding from the constrained or 171 | # compressed layout engine; ignored if one of those engines is not in use, 172 | # defaults to 0.1 173 | # :type pad_inches: float, optional 174 | # :param facecolor: The facecolor of the figure. If 'auto', use the current figure 175 | # facecolor., defaults to "auto" 176 | # :type facecolor: str, optional 177 | # :param edgecolor: The edgecolor of the figure. If 'auto', use the current figure 178 | # edgecolor., defaults to "auto" 179 | # :type edgecolor: str, optional 180 | # :param backend: The backend to use for the rendering. If None, use 181 | # rcParams["savefig.backend"], otherwise use backend, defaults to None 182 | # :type backend: str, optional 183 | # :param user_kwargs: Additional kwargs passed to plt.figure.Figure.savefig() 184 | # """ 185 | 186 | # ### Gather arguments 187 | # kwargs = dict( 188 | # # fname=self.title, # !! pass it directly 189 | # format=format, 190 | # dpi=dpi, 191 | # bbox_inches=bbox_inches, 192 | # pad_inches=pad_inches, 193 | # facecolor=facecolor, 194 | # edgecolor=edgecolor, 195 | # backend=backend, 196 | # ) 197 | # kwargs.update(**user_kwargs) #' Add user kwargs 198 | 199 | # ### Overwrite protection 200 | # if (not overwrite and not overwrite is None) or isinstance(overwrite, str): 201 | # fname = self.filer.prevent_overwrite(filename=fname, mode=overwrite) 202 | 203 | # ### Add Suffix 204 | # fname = Path(fname).with_suffix("." + format) 205 | 206 | # ### take figure 207 | # if fig is None: 208 | # fig = self.fig 209 | # fig.savefig(fname, **kwargs) 210 | 211 | # ### Save figure 212 | # # Not working, self.fig is never updated during plotting (only axes?) 213 | # # self.fig.savefig(fname, **kwargs) 214 | # # plt.savefig(fname, **kwargs) 215 | 216 | # return self 217 | 218 | # def save_all( 219 | # self, 220 | # fname: str = "plotastic_results", 221 | # overwrite: str | bool = "day", 222 | # savefig_kws: dict = None, 223 | # ) -> None: 224 | # """Exports all files stored in DataAnalysis object 225 | 226 | # :param fname: Path to save excel file, optional (default="") 227 | # :type fname: str, optional 228 | # :param overwrite: Mode of overwrite protection. If "day", it simply adds the 229 | # current date at the end of the filename, causing every output on the same 230 | # day to overwrite itself. If "nothing" ["day", "nothing"], files with the 231 | # same filename will be detected in the current work directory and a number 232 | # will be added to the filename. If True, everything will be overwritten., 233 | # defaults to "day" 234 | # :type overwrite: str|bool, optional 235 | # :param savefig_kws: Additional kwargs passed to plt.figure.Figure.savefig() 236 | # :type savefig_kws: dict, optional 237 | # """ 238 | 239 | # ### Gather Arguments 240 | # if savefig_kws is None: 241 | # savefig_kws = dict() 242 | 243 | # # if (not overwrite and not overwrite is None) or isinstance(overwrite, str): 244 | # # fname = self.filer.prevent_overwrite(filename=fname, mode=overwrite) 245 | 246 | # self.save_statistics(fname=fname, overwrite=overwrite) 247 | # self.save_fig(fname=fname, overwrite=overwrite, **savefig_kws) 248 | 249 | # @staticmethod 250 | # def _redraw_fig(fig): 251 | # """create a dummy figure and use its manager to display "fig" """ 252 | # dummy = plt.figure() #' Make empty figure 253 | # new_manager = dummy.canvas.manager #' Get the figure's manager 254 | # new_manager.canvas.figure = fig #' Associate it with the figure 255 | # fig.set_canvas(new_manager.canvas) 256 | # return fig 257 | 258 | 259 | # %% 260 | if __name__ == "__main__": 261 | from plotastic.example_data.load_dataset import load_dataset 262 | 263 | DF, dims = load_dataset("qpcr") 264 | DA = DataAnalysis(DF, dims) 265 | 266 | # %% Fill DA with stuff 267 | 268 | 269 | # %% 270 | -------------------------------------------------------------------------------- /src/plotastic/dataanalysis/filer.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% Imports 3 | 4 | # import sys 5 | 6 | # import ipynbname 7 | # import IPython 8 | 9 | # import inspect 10 | 11 | import re 12 | 13 | # import markurutils as ut 14 | import plotastic.utils.utils as ut 15 | 16 | # from IPython import get_ipython 17 | 18 | from datetime import date 19 | 20 | from pathlib import Path 21 | from typing import Any 22 | 23 | from plotastic import docstrings 24 | 25 | 26 | # %% Class Filer 27 | class Filer: 28 | 29 | """A class to handle file operations. 30 | - It reads the name of the current file, and sets it as the default filename for 31 | saving. 32 | - Provides function for overwrite protection. 33 | - More stuff coming..? 34 | 35 | 36 | """ 37 | 38 | DEFAULT_TITLE = "plotastic_result" 39 | 40 | # == 41 | # == __init__ ====================================================================== 42 | 43 | def __init__(self, title: str): 44 | self.title = title 45 | 46 | # == 47 | # == Time info ===================================================================== 48 | 49 | @property 50 | def current_day(self) -> str: 51 | return date.today().strftime("%y%m%d") 52 | 53 | # == 54 | # == Properties of Users's Script ================================================== 55 | 56 | @staticmethod 57 | def _prevent_overwrite_all(filename: str) -> str: 58 | """Returns a new filename that has a number at the end, if the filename already 59 | exists. 60 | - Checks filenames in path that are similar to filename 61 | - If there are similar filenames with an index behind them, it gets the largest 62 | index 63 | - Adds plus one to that index and puts it at the end of filenames 64 | 65 | :param filename: filename. 66 | :type filename: str 67 | :return: str 68 | """ 69 | 70 | ### Get a list of filenames that might be overwritten 71 | files = ut.glob_searchfilename( 72 | path=Path.cwd(), 73 | filename=filename, 74 | rettype="str", 75 | ) 76 | 77 | ### Define Pattern Rules: 78 | #' Between Start (^) and end ($) of line 79 | # -- fname: Match all characters non-greedy ( .*? ) 80 | # !! fname: Match exact string 81 | #' index: : 1 through 3 repetitions of single digit ( \d{1,3} ) 82 | # regex = r"^(?P.*?)_(?P\d{1,2})$" # ? old one 83 | regex = r"^(?P" + filename + r")_(?P\d{1,3})$" 84 | ### Get matches 85 | pattern = re.compile(regex, flags=re.MULTILINE) 86 | matches: list[dict] = ut.re_matchgroups(pattern=pattern, string=files) 87 | ### Extract their indices 88 | indices: list[int] = [int(matchD["index"]) for matchD in matches] 89 | ### fnames are never used 90 | # fnames: list[str] = [matchD["fname"] for matchD in matches] 91 | 92 | ### Add plus one to max index 93 | newindex = 0 94 | if indices: 95 | newindex = max(indices) + 1 96 | 97 | return f"{filename}_{newindex}" 98 | 99 | @docstrings.subst(param_overwrite=docstrings.param_overwrite) 100 | def prevent_overwrite( 101 | self, fname: "str | Path", overwrite: str = "day" 102 | ) -> str: 103 | """Returns a new filename that has a number or current date at the end to enable 104 | different modes of overwriting protection. 105 | 106 | :param fname: filename to be protected from overwriting 107 | :type fname: str | Path 108 | :param overwrite: {param_overwrite} 109 | :return: filename that is protected from overwriting by adding either number or 110 | the current date at its end 111 | :rtype: str 112 | """ 113 | overwrite_args = ["day", "daily", "nothing", True, False] 114 | assert ( 115 | overwrite in overwrite_args 116 | ), f"overwrite must be one of {overwrite_args}, not {overwrite}" 117 | 118 | ### Convert to string if path 119 | fname = str(fname) if isinstance(fname, Path) else fname 120 | ### Remove suffix 121 | fname = fname.split(".")[0] 122 | 123 | if overwrite in ["day", "daily"]: #' "day" 124 | fname = f"{fname}_{self.current_day}" 125 | elif overwrite in ["nothing", False]: #' "nothing" 126 | fname = self._prevent_overwrite_all(filename=fname) 127 | 128 | return fname 129 | -------------------------------------------------------------------------------- /src/plotastic/dimensions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/dimensions/__init__.py -------------------------------------------------------------------------------- /src/plotastic/dimensions/dims.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% Importds 3 | 4 | from typing import TYPE_CHECKING 5 | 6 | from typing import Dict, Literal 7 | from copy import copy, deepcopy 8 | 9 | if TYPE_CHECKING: 10 | from plotastic.dataanalysis.dataanalysis import DataAnalysis 11 | 12 | 13 | # TODO maybe refactor this to specify statistical type of data 14 | # class Dimension: 15 | # def __init__( 16 | # self, name: str, scale_of_measurement: Literal["nominal", "ordinal", "cardinal"] 17 | # ) -> "Dimension": 18 | # """_summary_ 19 | 20 | # Args: 21 | # name (str): _description_ 22 | # scale_of_measurement (str): 23 | # (https://en.wikipedia.org/wiki/Level_of_measurement) 24 | # What's the scale of measurement? 25 | # * Nominal data: 26 | # * Categorical data that has no order 27 | # * e.g. colors, names, etc. 28 | # * Ordinal data: 29 | # * Categorical data that has order 30 | # * e.g. grades, sizes, etc. 31 | # * This works independently from ordered pd.Categorical type. That is used to place plots in the right order. 32 | # * Cardinal data: 33 | # * Numerical data that has order 34 | # * Three types: interval, ratio, and absolute 35 | # * Don't let them confuse you. It's an old scale and is often contested 36 | # * Interval data: 37 | # * Numerical data that has order and equal intervals 38 | # * e.g. temperature [°C], dates, etc. 39 | # * Ratio data: 40 | # * Numerical data that has order, equal intervals, and a true zero 41 | # * Might not have a unit, since it was divided by itself 42 | # * e.g. temperature [Kelvin], height, weight, etc. 43 | # * Absolute data: 44 | # * Numerical data that has order, equal intervals, a true zero, and an absolute scale 45 | 46 | # Returns: 47 | # Dimension: _description_ 48 | # """ 49 | # #' "nominal", "ordinal", "interval", "ratio 50 | 51 | # self.name = name 52 | # self.som = scale_of_measurement 53 | 54 | # # 55 | # # 56 | 57 | # %% class Dims 58 | 59 | 60 | class Dims: 61 | # == Init ................................................................ 62 | 63 | def __init__( 64 | self, 65 | y: str, 66 | x: str, 67 | hue: str = None, 68 | row: str = None, 69 | col: str = None, 70 | ): 71 | ### Define Dims 72 | self.y = y 73 | self.x = x 74 | self.hue = hue 75 | self.row = row 76 | self.col = col 77 | # self._by = None 78 | 79 | # self.som = dict(y="interval", x="ordinal", row="") 80 | 81 | # if som: #' SOM = Scale of Measurement / Skalenniveau 82 | # self.som = som 83 | # else: 84 | # self.som = dict(y= "continuous", ) 85 | 86 | # 87 | # 88 | # 89 | 90 | def __repr__(self) -> str: 91 | return self.asdict().__repr__() 92 | 93 | # == Properties :::::::::::::::::::::::::::::::::::::::::::::::::::::::::: 94 | 95 | # @property 96 | # def has_hue(self) -> bool: 97 | # return not self.hue is None 98 | 99 | # @property 100 | # def by(self) -> list[str] | None: 101 | # if self._by: 102 | # return self._by 103 | # elif self.row and self.col: 104 | # return [self.row, self.col] 105 | # elif self.row: 106 | # return [self.row] 107 | # elif self.col: 108 | # return [self.col] 109 | # else: 110 | # return None 111 | 112 | def asdict(self, incl_None=True) -> dict: 113 | d = dict(y=self.y, x=self.x, hue=self.hue, row=self.row, col=self.col) 114 | if not incl_None: 115 | d = {k: v for (k, v) in d.items() if (not v is None)} 116 | return d 117 | 118 | def set(self, inplace=False, **kwargs) -> "Dims | DataAnalysis": 119 | newobj = self if inplace else copy(self) 120 | for k, v in kwargs.items(): 121 | v = v if not v == "none" else None 122 | setattr(newobj, k, v) 123 | return newobj 124 | 125 | def getvalues(self, keys: list[str] | tuple[str], *args): 126 | """ 127 | Converts a list of dimensions into a list of dimension values, e.g. 128 | :param keys: ["x", "y", "col"] 129 | :return: e.g. ["smoker", "tips", "day"] 130 | """ 131 | defkeys = ("x", "y", "hue", "row", "col") 132 | l = [] 133 | keys = [keys] + [arg for arg in args] 134 | for key in keys: 135 | assert ( 136 | key in defkeys 137 | ), f"#! '{key}' should have been one of {defkeys}" 138 | l.append(getattr(self, key)) 139 | return l 140 | 141 | def switch( 142 | self, 143 | *keys_args: str, 144 | inplace: bool = False, 145 | verbose: bool = True, 146 | **keys_kws: str | Dict[str, str], 147 | ) -> "Dims | DataAnalysis": 148 | """Switches two dimensions, e.g. x and hue, or x and row, etc. If you 149 | want to switch more than two dimensions, use the switch method in 150 | chain. 151 | 152 | :param keys_args: Two dimensions to switch. Only 2 Positional arguments 153 | allowed. Use e.g. dims.switch("x", "hue", **kwargs) 154 | :type keys_args: str 155 | :param inplace: Decide if this switching should change the dims object 156 | permanently (analogously to pandas dataframe). If False, you should 157 | pass return value into a variable, defaults to False 158 | :type inplace: bool, optional 159 | :param verbose: Whether to print out switched values, defaults to True 160 | :type verbose: bool, optional 161 | :param kwarg: Keyword arguments: row="smoker". 162 | :type kwarg: str | Dict[str, str] 163 | :raises AssertionError: 164 | :return: DataAnalysis object with switched dimensions in dims 165 | :rtype: Dims | DataAnalysis 166 | """ 167 | 168 | ### Handle Arguments 169 | #' If keys are passed, e.g. dims.switch("x","row",**kwargs)""" 170 | if len(keys_args) == 0: 171 | pass 172 | elif len(keys_args) == 2: 173 | assert ( 174 | len(keys_kws) == 0 175 | ), "#! Can't switch when both keys and kwarg is passed" 176 | values = self.getvalues(*keys_args) 177 | keys_kws[keys_args[0]] = values[1] 178 | else: 179 | raise AssertionError( 180 | f"#! '{keys_args}' should have been of length 2" 181 | ) 182 | assert len(keys_kws) == 1, f"#! {keys_kws} should be of length 1 " 183 | 184 | ### Print first Line 185 | if verbose: 186 | todo = "RE-WRITING" if inplace else "TEMPORARY CHANGING:" 187 | print( 188 | f"#! {todo} {self.__class__.__name__} with keys: '{keys_args}' and kwarg: {keys_kws}:" 189 | ) 190 | print(" (dim =\t'old' -> 'new')") 191 | 192 | ### SWITCH IT 193 | #' Copy Object 194 | original: dict = deepcopy(self.asdict(incl_None=True)) 195 | newobj = self if inplace else deepcopy(self) 196 | 197 | qK, qV = *keys_kws.keys(), *keys_kws.values() 198 | replace_v = "none" 199 | for oK, oV in original.items(): # Original Object 200 | if qK == oK: 201 | replace_v = oV 202 | setattr(newobj, qK, qV) 203 | elif qK != oK and oV == qV: 204 | replace_v = original[qK] 205 | setattr(newobj, oK, replace_v) 206 | assert ( 207 | replace_v != "none" 208 | ), f"#! Did not find {list(keys_kws.keys())} in dims {list(original.keys())}" 209 | 210 | ### PRINT THE OVERVIEW OF THE NEW MAPPING 211 | if verbose: 212 | for (oK, oV), nV in zip(original.items(), newobj.asdict().values()): 213 | pre = " " 214 | if oV != nV and oV == replace_v: # or replace_v == "none": 215 | printval = f"'{replace_v}' -> '{qV}'" 216 | pre = ">>" 217 | elif oV != nV and oV != replace_v: 218 | printval = f"'{oV}' -> '{replace_v}'" 219 | pre = " <" 220 | else: # oV == nV 221 | printval = f"'{oV}'" if type(oV) is str else f"{oV}" 222 | if len(oK) < 3: 223 | oK = oK + " " 224 | 225 | printval = printval.replace("'None'", "None") # REMOVE QUOTES 226 | 227 | print(f" {pre} {oK} =\t{printval}") 228 | 229 | ### x AND y MUST NOT BE None 230 | assert not None in [ 231 | self.y, 232 | self.x, 233 | ], "#! This switch causes x or y to be None" 234 | 235 | return newobj 236 | 237 | 238 | # !! 239 | # !! 240 | # !! end class 241 | 242 | # %% Test 243 | 244 | if __name__ == "__main__": 245 | wt = "../../../Examples, Walkthroughs, Tests/Scripts for Walkthroughs/dims_wt.py" 246 | with open(wt) as f: 247 | exec(f.read()) 248 | -------------------------------------------------------------------------------- /src/plotastic/dimensions/hierarchical_dims.py: -------------------------------------------------------------------------------- 1 | """We utilize List of all dims/ factors in a specific orders to group 2 | and index the data into (fully) facetted datagroups. This allows easier 3 | implementation of ... - ... displaying missing levels of the last factor 4 | (x or hue) per group - ... connecting datapoints of the same subject 5 | across x and hue levels - (... iterating through all datagroups for 6 | statistics) 7 | """ 8 | 9 | # %% 10 | 11 | # from pprint import pprint 12 | # from IPython.display import display 13 | 14 | import numpy as np 15 | import pandas as pd 16 | import seaborn as sns 17 | import matplotlib.pyplot as plt 18 | 19 | import plotastic as plst 20 | from plotastic.dimensions.subject import Subject 21 | from plotastic.utils import utils as ut 22 | 23 | 24 | from typing import Generator, TYPE_CHECKING 25 | 26 | if TYPE_CHECKING: 27 | from plotastic.dataanalysis.dataanalysis import DataAnalysis 28 | 29 | 30 | # %% 31 | 32 | 33 | class HierarchicalDims(Subject): 34 | def __init__(self, **kws): 35 | super().__init__(**kws) 36 | 37 | @property 38 | def _factors_hierarchical(self) -> list: 39 | """Return list of factors that are used for indexing the 40 | subjectdata. It places subjects before x and hue, which is 41 | useful to see which x and hue level are missing per subject""" 42 | factors = [ 43 | self.dims.row, 44 | self.dims.col, 45 | self.subject, # < Subject 46 | self.dims.x, 47 | self.dims.hue, 48 | ] 49 | ### Kick out Nones: 50 | return [f for f in factors if f is not None] 51 | 52 | @property 53 | def _factors_hierarchical_subjects_last(self) -> list: 54 | """Return list of factors that are used for indexing the 55 | subjectdata. It places subjects after x and hue, which is 56 | useful to see which subject is missing per x and hue level""" 57 | factors = [ 58 | self.dims.row, 59 | self.dims.col, 60 | self.dims.x, 61 | self.dims.hue, 62 | self.subject, # < Subject 63 | ] 64 | ### Kick out Nones: 65 | return [f for f in factors if f is not None] 66 | 67 | def data_hierarchicize( 68 | self, 69 | sort=True, 70 | subjects_last=False, 71 | ) -> pd.DataFrame: 72 | """Return Dataframe indexed by all factors containing only 73 | columns y and subjects""" 74 | 75 | ### Pick order of Hierarchy 76 | if subjects_last: 77 | factors = self._factors_hierarchical_subjects_last 78 | else: 79 | factors = self._factors_hierarchical 80 | 81 | ### Pick Data and set Index 82 | DF = self.data[factors + [self.dims.y]] 83 | DF = DF.set_index(factors) 84 | 85 | ### Sort 86 | if sort: 87 | DF = DF.sort_index() 88 | 89 | return DF 90 | 91 | def _iter__hlkey_df( 92 | self, sort=False, subject_last=False, by_lastdim=False 93 | ) -> Generator[tuple[tuple[str | int], pd.DataFrame], None, None]: 94 | """Iterate over data_hierarchical, return hierarchical levelkeys 95 | and dataframe""" 96 | ### Pick order of Hierarchy 97 | if subject_last: 98 | factors = self._factors_hierarchical_subjects_last 99 | else: 100 | factors = self._factors_hierarchical 101 | 102 | ### Remove last dim (x or hue) 103 | # > Otherwise we iterate over single rows 104 | if not by_lastdim: 105 | factors = factors[:-1] 106 | 107 | ### Pandas doesn't like grouping by length 1 tuples/lists 108 | if len(factors) == 1: 109 | factors = factors[0] 110 | 111 | for key, df in self.data_hierarchicize( 112 | sort=sort, subjects_last=subject_last 113 | ).groupby(factors): 114 | yield key, df 115 | 116 | def get_missing_lvls_from_last_factor( 117 | self, 118 | show_all=False, 119 | as_dict=False, 120 | ) -> pd.DataFrame | dict: 121 | """Return dataframe with missing levels per group. If show_all 122 | is False, only groups with missing levels are shown.""" 123 | ### Reference for complete levels 124 | all_x_lvls = tuple(self.levels_dict_dim["x"]) 125 | all_hue_lvls = tuple(self.levels_dict_dim["hue"]) 126 | 127 | ### Collect Missing 128 | missing = {} 129 | for key, df in self._iter__hlkey_df(): 130 | if self.dims.hue: 131 | hue_lvls = tuple(df.index.get_level_values(self.dims.hue)) 132 | missing[key] = tuple(set(all_hue_lvls) - set(hue_lvls)) 133 | else: 134 | x_lvls = tuple(df.index.get_level_values(self.dims.x)) 135 | missing[key] = tuple(set(all_x_lvls) - set(x_lvls)) 136 | 137 | ### Remove groups that didn't have any missing values 138 | if not show_all: 139 | # > Convert v to list so that resulting DataFrame has 140 | #' just one column 141 | missing = {k: [v] for k, v in missing.items() if v} 142 | 143 | ### Convert Result to DataFrame 144 | if not as_dict: 145 | missing = pd.DataFrame( 146 | index=pd.MultiIndex.from_tuples( 147 | tuples=missing.keys(), 148 | names=self._factors_hierarchical[:-1], 149 | ), 150 | data=missing.values(), 151 | columns=["missing levels"], 152 | ).sort_index() 153 | 154 | return missing 155 | 156 | 157 | # %% 158 | if __name__ == "__main__": 159 | # == Test Data ===================================================== 160 | 161 | def make_testdata_paired_but_nosubject(): 162 | ### Attention 163 | DF = sns.load_dataset("attention") 164 | dims = dict(y="score", x="attention", hue="solutions") 165 | DA1 = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 166 | DA1.test_pairwise(paired=False) 167 | 168 | ### qPCR 169 | DF, dims = plst.load_dataset("qpcr", verbose=False) 170 | #' DA2 171 | dims = dict(y="fc", x="gene", row="fraction", col="class") 172 | DA2 = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 173 | DA2.test_pairwise(paired=False) 174 | #' DA3 175 | dims = dict(y="fc", x="gene", hue="fraction", col="class") 176 | DA3 = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 177 | DA3.test_pairwise(paired=False) 178 | 179 | return (DA1, DA2, DA3) 180 | 181 | def make_testdata(): 182 | ### Attention 183 | DF = sns.load_dataset("attention") 184 | #' DA4 - no col, but hue 185 | dims = dict(y="score", x="attention", hue="solutions") 186 | DA4 = plst.DataAnalysis( 187 | data=DF, dims=dims, subject="subject", verbose=False 188 | ) 189 | DA4.test_pairwise(paired=False) 190 | 191 | #' DA5 - no hue, but col 192 | dims = dict(y="score", x="solutions", col="attention") 193 | DA5 = plst.DataAnalysis( 194 | data=DF, dims=dims, subject="subject", verbose=False 195 | ) 196 | DA5.test_pairwise(paired=True) 197 | 198 | ### qPCR 199 | DF, dims = plst.load_dataset("qpcr", verbose=False) 200 | #' DA6 201 | dims = dict(y="fc", x="gene", hue="fraction", col="class") 202 | DA6 = plst.DataAnalysis( 203 | data=DF, dims=dims, subject="subject", verbose=False 204 | ) 205 | DA6.test_pairwise(paired=True) 206 | 207 | #' DA7 - with row 208 | dims = dict(y="fc", x="gene", row="fraction", col="class") 209 | DA7 = plst.DataAnalysis( 210 | data=DF, dims=dims, subject="subject", verbose=False 211 | ) 212 | DA7.test_pairwise(paired=True) 213 | 214 | return DA4, DA5, DA6, DA7 215 | 216 | DA1, DA2, DA3 = make_testdata_paired_but_nosubject() 217 | DA4, DA5, DA6, DA7 = make_testdata() 218 | 219 | # %% 220 | ### Test when executed with DA doesn't have subject specified 221 | # DA1.subjectlist # > Gives error rightfully 222 | 223 | DA1.get_missing_lvls_from_last_factor() 224 | DA2.get_missing_lvls_from_last_factor() 225 | DA3.get_missing_lvls_from_last_factor() 226 | 227 | # DA1._subjects_get_XY() # > Gives error correctly 228 | # DA1.plot_connect_subjects() # > Gives error correctly 229 | 230 | # %% 231 | # DA1.catplot() 232 | 233 | # DA3.data_hierarchicize() 234 | # DA3.levels_get_missing() 235 | 236 | # %% 237 | # DA4.get_hierarchical_data(sorted=True) 238 | # DA4.get_hierarchical_data(sorted=True) 239 | DA6.data_hierarchicize(sort=True, subjects_last=True) 240 | # %% 241 | DA6.data_hierarchicize(sort=True, subjects_last=False) 242 | 243 | # %% 244 | # for subject, df in DA6.subjects_iter__subject_df: 245 | # pprint(subject) 246 | # pprint(df) 247 | # print() 248 | 249 | # %% 250 | # DA4.subjects_get_missing() 251 | # DA5.subjects_get_missing() 252 | DA6.get_missing_lvls_from_last_factor() 253 | # %% 254 | # pprint(DA4.subjects_get_XY()) 255 | # pprint(DA5.subjects_get_XY()) 256 | # pprint(DA6.subjects_get_XY()) 257 | DA4._subjects_get_XY() 258 | # DA5.subjects_get_XY() 259 | # DA6.subjects_get_XY().loc[("MMPs", slice(None), "MMP7"), :] 260 | # DF = DA6.subjects_get_XY() 261 | # DF[DF.index.get_level_values("class") == "Chemokines"].index 262 | # # DF.index 263 | 264 | # %% 265 | def plottest(self: plst.DataAnalysis, figsize=(2.5, 2), **plot_kws): 266 | ( 267 | self.subplots(figsize=figsize) 268 | .fillaxes( 269 | kind="swarm", 270 | size=2, 271 | dodge=True, 272 | ) 273 | .edit_y_scale_log(10) 274 | .plot_connect_subjects(**plot_kws) 275 | .annotate_pairwise() 276 | ) 277 | return self 278 | 279 | plottest(DA4) 280 | plottest(DA5) 281 | plottest(DA6, figsize=(12, 4)) 282 | plottest(DA7, figsize=(12, 12)) 283 | -------------------------------------------------------------------------------- /src/plotastic/dimensions/subject.py: -------------------------------------------------------------------------------- 1 | """Adds Subject funcitonality to DataAnalysis.""" 2 | # %% 3 | #== Imports ============================================================ 4 | 5 | # from plotastic 6 | from plotastic.utils import utils as ut 7 | from plotastic.dimensions.dimsandlevels import DimsAndLevels 8 | 9 | # %% 10 | #== Class Subject ====================================================== 11 | 12 | class Subject (DimsAndLevels): 13 | 14 | def __init__(self, subject = None, **kws) -> None: 15 | super().__init__(**kws) 16 | self.subject = subject 17 | if not subject is None: 18 | assert ( 19 | subject in self.data.columns 20 | ), f"#! Subject '{subject}' not in columns, expected one of {self.data.columns.to_list()}" 21 | 22 | @property 23 | def subjectlist(self): 24 | if self.subject is None: 25 | raise TypeError("No subject column specified") 26 | return tuple(self.data[self.subject].unique()) 27 | 28 | -------------------------------------------------------------------------------- /src/plotastic/docstrings.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% imports 3 | 4 | from typing import Callable 5 | 6 | import plotastic.utils.utils as ut 7 | 8 | # %% Test wrapping function 9 | 10 | 11 | if __name__ == "__main__": 12 | descr = """ 13 | Mode of overwrite protection. If "day", it simply adds the current date at the end 14 | of the filename, causing every output on the same day to overwrite itself. If 15 | "nothing" ["day", "nothing"], files with the same filename will be detected in the 16 | current work directory and a number will be added to the filename. If True, 17 | everything will be overwritten. 18 | """ 19 | w = ut.wrap_text(descr) 20 | print(w) 21 | len(" ") 22 | 23 | 24 | # %% Write :param: part of docstring 25 | 26 | 27 | def param( 28 | param: str, 29 | descr: str, 30 | default: str = "", 31 | typ: str = "", 32 | optional: bool = False, 33 | ) -> str: 34 | """Returns part of docstring describing parameter in sphinx format""" 35 | 36 | ### If descr starts with new line remove it 37 | if descr.startswith("\n"): 38 | descr = descr[1:] 39 | 40 | S = [] 41 | 42 | ### First line, (no tabstop needed) 43 | # # Don't include :param: in docstring, add that manually always, so 44 | # # vscode at least shows the parameter in the intellisense 45 | S.append(" ") #' whitespace after :param param: 46 | # S = f":param {param}: {wrap_descr(descr)}" 47 | S.append( 48 | ut.wrap_text( 49 | string=descr, 50 | width=72, 51 | width_first_line=54, 52 | indent=" ", 53 | ) 54 | ) 55 | 56 | ### Add default value to first line 57 | if default: 58 | if isinstance(default, str): 59 | # # Add quotes if param defaults to string 60 | default = f"'{default}'" 61 | S.append(f", defaults to {default}") 62 | 63 | ### Further options need a tab 64 | ### Type 65 | if typ: 66 | S.append("\n\t") #' newline 67 | S.append(f":type {param}: {typ}") 68 | 69 | ### Optional, same line as type 70 | if optional: 71 | S.append(f", optional") 72 | 73 | return "".join(S) 74 | 75 | 76 | if __name__ == "__main__": 77 | docpart = param( 78 | param="sdaf", 79 | descr="makes makes and does does stuffystuff", 80 | default="ja!", 81 | typ="str", 82 | ) 83 | print(docpart) 84 | 85 | 86 | # %% Substitute variables in docstring 87 | 88 | 89 | def subst(*args, **kwargs): 90 | """Decorator that substitutes variables in docstrings, e.g.: {} as args and {var} as 91 | kwargs 92 | """ 93 | 94 | def F(func: Callable): 95 | doc = func.__doc__ 96 | ### Shouldn't raise error if no docstring is present 97 | if doc: 98 | try: 99 | ### Substitute args 100 | func.__doc__ = doc.format(*args, **kwargs) 101 | except KeyError as e: 102 | raise KeyError( 103 | f"Could not substitute {e} in docstring of {func.__name__}" 104 | "with {args} or {list(kwargs.keys())}" 105 | ) 106 | 107 | return func 108 | 109 | return F 110 | 111 | 112 | if __name__ == "__main__": 113 | # p = """:param verbose: Set to False to not print stuff, defaults to False""" 114 | # p += "\n\t:type verbose: bool" 115 | p = param( 116 | param="verbose", 117 | descr="Ladidah awesome parameter if you know what I mean. Makes makes and does does stuffystuff", 118 | default="ja!", 119 | typ="str", 120 | ) 121 | 122 | @subst("banana", var2="milkshake", var3=p) 123 | def bla(verbose: False): 124 | """this is a docstring with {} and {var2}, 125 | 126 | :param verbose: {var3} 127 | """ 128 | if verbose: 129 | print("jo!") 130 | 131 | print(bla.__doc__) 132 | 133 | 134 | # %% 135 | 136 | ### Overwrite Protection 137 | param_overwrite = param( 138 | param="overwrite", 139 | descr=""" 140 | Mode of overwrite protection. If "day", it simply adds the current date at the end 141 | of the filename, causing every output on the same day to overwrite itself. If 142 | "nothing" ["day", "nothing"], files with the same filename will be detected in the 143 | current work directory and a number will be added to the filename. If True, 144 | everything will be overwritten. 145 | """, 146 | default="day", 147 | typ="str | bool", 148 | optional=True, 149 | ) 150 | 151 | if __name__ == "__main__": 152 | from plotastic.dataanalysis.dataanalysis import DataAnalysis 153 | 154 | # print(overwrite) 155 | print(DataAnalysis.save_statistics.__doc__) 156 | # DataAnalysis.save_fig() 157 | 158 | # %% 159 | -------------------------------------------------------------------------------- /src/plotastic/example_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/example_data/__init__.py -------------------------------------------------------------------------------- /src/plotastic/example_data/data/fmri.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/example_data/data/fmri.xlsx -------------------------------------------------------------------------------- /src/plotastic/example_data/data/qpcr.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/example_data/data/qpcr.xlsx -------------------------------------------------------------------------------- /src/plotastic/example_data/data/tips.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/example_data/data/tips.xlsx -------------------------------------------------------------------------------- /src/plotastic/example_data/load_dataset.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pkg_resources 3 | import os 4 | 5 | ### List all available datasets 6 | 7 | FILES = dict( 8 | fmri="fmri.xlsx", #' Removed timepoints bigger than 10 9 | tips="tips.xlsx", #' Added a size-cut column pd.cut(df["size"], bins=[0, 2, 10], labels=["1-2", ">=3"]) 10 | qpcr="qpcr.xlsx", 11 | ) 12 | 13 | DIMS = dict( 14 | fmri=dict(y="signal", x="timepoint", hue="event", col="region"), 15 | tips=dict(y="tip", x="size-cut", hue="smoker", col="sex", row="time"), 16 | qpcr=dict(y="fc", x="gene", hue="fraction", col="method", row="class"), 17 | ) 18 | 19 | 20 | def load_dataset(name: str = "tips", verbose=True) -> tuple[pd.DataFrame, dict]: 21 | """Executes seaborn.load_dataset, but also returns dictionary that assigns dimensions 22 | to column names ["y","x","hue","col","row"] 23 | 24 | :param verbose: Prints information and dims dictionary 25 | :param name: Name of the dataset. Error messayge contains available options. Defaults to "tips" 26 | :return: Example data and dictionary for dimensions 27 | :rtype: tuple[pd.DataFrame, dict] 28 | """ 29 | 30 | ### Check user Arguments 31 | assert ( 32 | name in FILES 33 | ), f" '{name}' should have been one of {list(FILES.keys())}" 34 | 35 | ### Import DataFrame from package 36 | package = "plotastic.example_data" #' Needs to be importable 37 | path_relative = os.path.join( 38 | "data", FILES[name] 39 | ) #' Path with python package as root 40 | path_full = pkg_resources.resource_filename(package, path_relative) 41 | df = pd.read_excel(path_full) 42 | 43 | ### Get dims 44 | dims = DIMS[name] 45 | 46 | if verbose: 47 | print( 48 | f"#! Imported seaborn dataset '{name}' \n\t columns:{df.columns}\n\t dimensions: {dims}" 49 | ) 50 | 51 | return df, dims 52 | -------------------------------------------------------------------------------- /src/plotastic/plotting/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/plotting/__init__.py -------------------------------------------------------------------------------- /src/plotastic/plotting/multiplot.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% imports 3 | 4 | from typing import TYPE_CHECKING 5 | 6 | import pandas as pd 7 | 8 | import matplotlib.pyplot as plt 9 | import matplotlib as mpl 10 | 11 | # import pyperclip 12 | 13 | # import markurutils as ut 14 | import plotastic.utils.utils as ut 15 | 16 | # from plotastic.plotting.plotedits import PlotEdits 17 | from plotastic.plotting.plot import Plot 18 | 19 | if TYPE_CHECKING: 20 | from plotastic.dataanalysis.dataanalysis import DataAnalysis 21 | 22 | # %% Matplotlib Runtime Config (RC) 23 | 24 | 25 | # %% Class MultiPlot 26 | 27 | 28 | class MultiPlot(Plot): 29 | def __init__(self, **dataframetool_kws): 30 | super().__init__(**dataframetool_kws) 31 | 32 | # 33 | # == Boxplots ====================================================== 34 | 35 | def plot_box_strip( 36 | self, 37 | marker_size: float = 2, 38 | marker_alpha: float = 0.5, 39 | legend=True, 40 | subplot_kws: dict = dict(), 41 | box_kws: dict = dict(), 42 | strip_kws: dict = dict(), 43 | legend_kws: dict = dict(), 44 | ) -> "MultiPlot | DataAnalysis": 45 | """A boxplot with a stripplott (scatter) on top 46 | 47 | Args: 48 | markersize (float, optional): _description_. Defaults to 2. 49 | markeralpha (float, optional): _description_. Defaults to 0.5. 50 | box_kws (dict, optional): _description_. Defaults to dict(). 51 | strip_kws (dict, optional): _description_. Defaults to dict(). 52 | """ 53 | # == PARAMETERS 54 | thin, thick = 0.3, 1.0 #' Linewidths 55 | covering, translucent, hazy = 1.0, 0.5, 0.3 #' Alpha 56 | front, mid, background, hidden = 100, 50, 1, -1 #' z-order 57 | 58 | ### == KEYWORD ARGUMENTS 59 | ### Boxplot kws 60 | box_KWS = dict( 61 | showfliers=False, 62 | boxprops=dict( #' Box line and surface 63 | alpha=hazy, 64 | linewidth=thin, 65 | ), 66 | medianprops=dict( #' Median line 67 | alpha=covering, 68 | zorder=front, 69 | linewidth=thick, 70 | ), 71 | whiskerprops=dict( #' Lines conencting box and caps 72 | alpha=covering, 73 | zorder=mid, 74 | linewidth=thin, 75 | ), 76 | capprops=dict( #' Caps at the end of whiskers 77 | alpha=covering, 78 | zorder=mid, 79 | linewidth=thick, 80 | ), 81 | ) 82 | 83 | ### Stripplot kws 84 | strip_KWS = dict( 85 | dodge=True, #' Separates the points in hue 86 | jitter=0.2, #' How far datapoints of one group scatter across the x-axis 87 | zorder=front, 88 | ### Marker Style 89 | size=marker_size, 90 | alpha=marker_alpha, 91 | # color="none", 92 | edgecolor="white", 93 | linewidth=thin, #' Edge width of the marker 94 | ) 95 | 96 | ### User KWS 97 | box_KWS.update(box_kws) 98 | strip_KWS.update(strip_kws) 99 | 100 | ###... PLOT 101 | ( 102 | self.subplots(**subplot_kws) 103 | .fillaxes(kind="box", **box_KWS) 104 | .fillaxes(kind="strip", **strip_KWS) 105 | ) 106 | 107 | ### Legend displaying labels of stripplot (since that was called last) 108 | if legend and self.dims.hue: 109 | self.edit_legend(**legend_kws) 110 | 111 | return self 112 | 113 | def plot_box_swarm( 114 | self, 115 | marker_size: float = 1.5, 116 | marker_alpha: float = 0.9, 117 | legend=True, 118 | subplot_kws: dict = dict(), 119 | box_kws: dict = dict(), 120 | swarm_kws: dict = dict(), 121 | legend_kws: dict = dict(), 122 | ) -> "MultiPlot | DataAnalysis": 123 | """A boxplot with a stripplott (scatter) on top 124 | 125 | Args: 126 | markersize (float, optional): _description_. Defaults to 2. 127 | markeralpha (float, optional): _description_. Defaults to 0.5. 128 | box_kws (dict, optional): _description_. Defaults to dict(). 129 | strip_kws (dict, optional): _description_. Defaults to dict(). 130 | """ 131 | # == PARAMETERS 132 | thin, thick = 0.2, 1.0 #' Linewidths 133 | covering, translucent, hazy = 1.0, 0.5, 0.3 #' Alpha 134 | front, mid, background, hidden = 100, 50, 1, -1 #' z-order 135 | 136 | ### == KEYWORD ARGUMENTS 137 | ### Boxplot kws 138 | box_KWS = dict( 139 | showfliers=False, 140 | #' Widths of boxes 141 | # !! Throws TypeError: matplotlib.axes._axes.Axes.boxplot() got multiple values for keyword argument 'widths' 142 | # widths=0.9, 143 | boxprops=dict( #' Box line and surface 144 | alpha=translucent, 145 | linewidth=thin, 146 | ), 147 | medianprops=dict( #' Median line 148 | alpha=covering, 149 | zorder=front, 150 | linewidth=thick, 151 | ), 152 | whiskerprops=dict( #' Lines conencting box and caps 153 | alpha=covering, 154 | zorder=mid, 155 | linewidth=thin, 156 | ), 157 | capprops=dict( #' Caps at the end of whiskers 158 | alpha=covering, 159 | zorder=mid, 160 | linewidth=thick, 161 | ), 162 | ) 163 | 164 | ### Swarmplot kws 165 | swarm_KWS = dict( 166 | dodge=True, #' Separates the points in hue 167 | zorder=front, 168 | ### Marker Style 169 | alpha=marker_alpha, 170 | size=marker_size, 171 | # color="none", 172 | edgecolor="black", 173 | linewidth=thin, #' Edge width of the marker 174 | ) 175 | 176 | ### User KWS 177 | box_KWS.update(box_kws) 178 | swarm_KWS.update(swarm_kws) 179 | 180 | ###... PLOT 181 | # !! If log y scale, you should pass y_scale = "log" in sublot_kws! Otherwise Points will not cluster in the middle! 182 | ( 183 | self.subplots(**subplot_kws) 184 | .fillaxes(kind="box", **box_KWS) 185 | .fillaxes(kind="swarm", **swarm_KWS) 186 | ) 187 | 188 | ### Legend displaying labels of swarmplot (since that was called last) 189 | if legend and self.dims.hue: 190 | self.edit_legend(**legend_kws) 191 | 192 | return self 193 | 194 | 195 | ## !!__________________________________________________________________________ 196 | 197 | # # %% Matplotlib Runtime Config (RC) 198 | 199 | # mpl.rc("figure", dpi=250) 200 | 201 | # # %% get data 202 | 203 | # MP = MultiPlot(data=df, dims=dims) 204 | 205 | 206 | # # %% 207 | -------------------------------------------------------------------------------- /src/plotastic/plotting/plot.py: -------------------------------------------------------------------------------- 1 | """Plotting functions that aren't covered by matplotlib or seaborn.""" 2 | # %% 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import matplotlib.pyplot as plt 7 | import seaborn as sns 8 | 9 | from plotastic.plotting.plotedits import PlotEdits 10 | from plotastic.utils import utils as ut 11 | 12 | from typing import TYPE_CHECKING 13 | 14 | if TYPE_CHECKING: 15 | from plotastic.dataanalysis.dataanalysis import DataAnalysis 16 | 17 | # %% 18 | # == Class Plot ======================================================== 19 | 20 | 21 | class Plot(PlotEdits): 22 | def __init__(self, **dataframetool_kws) -> None: 23 | super().__init__(**dataframetool_kws) 24 | 25 | def plot( 26 | self, kind: str = "strip", subplot_kws: dict = None, **sns_kws 27 | ) -> "Plot | DataAnalysis": 28 | """Quick plotting, combines self.subplots and self.fillaxes its axes with seaborn graphics 29 | 30 | Args: 31 | kind (str, optional): _description_. Defaults to "strip". 32 | subplot_kws (dict, optional): _description_. Defaults to None. 33 | sns_kws (dict, optional): _description_. Defaults to None. 34 | 35 | Returns: 36 | fig_and_axes: _description_ 37 | """ 38 | ### Handle kwargs 39 | subplot_kws = subplot_kws or {} 40 | sns_kws = sns_kws or {} 41 | 42 | ### Standard kws for standard stripplot 43 | if kind == "strip" and len(sns_kws) == 0: 44 | sns_kws = dict(alpha=0.6, dodge=True) 45 | 46 | self.subplots(**subplot_kws) #' Initialise Figure and Axes 47 | self.fillaxes(kind=kind, **sns_kws) #' Fill axes with seaborn graphics 48 | if self.dims.hue: 49 | self.edit_legend() #' Add legend to figure 50 | 51 | plt.tight_layout() #' Make sure everything fits nicely 52 | 53 | return self 54 | 55 | # 56 | # == Subject lines ================================================= 57 | 58 | def _nested_offsets(self, n_levels, width=0.8, dodge=True) -> np.ndarray: 59 | """Return offsets for each hue level for dodged plots. This must 60 | represent the same function that seaborn uses to dodge the plot, 61 | which can be found here: 62 | https://github.com/mwaskom/seaborn/blob/908ca95137c0e73bb6ac9ce9a8051577b6453138/seaborn/categorical.py#L437 63 | """ 64 | # ?? Retrieve hue_offsets from axes independently of width? 65 | # ?? This here could work, but might also be a bit hacky 66 | # axes = self.axes 67 | # offset = self.axes.collections[0].get_offsets() 68 | 69 | hue_offsets: np.ndarray 70 | if dodge: 71 | each_width = width / n_levels 72 | hue_offsets = np.linspace(0, width - each_width, n_levels) 73 | hue_offsets -= hue_offsets.mean() 74 | else: 75 | hue_offsets = np.zeros(n_levels) 76 | return hue_offsets 77 | 78 | @ut.ignore_warnings 79 | def _subjects_get_XY(self) -> pd.DataFrame: 80 | """Collects X and Y positions of all datapoints indexed by all 81 | factors and subjects in a dataframe""" 82 | if self.subject is None: 83 | raise TypeError("No subject column specified") 84 | 85 | ### Retrieve hue levels and relative x-positions of data on plot 86 | if self.dims.hue: 87 | all_hue_lvls = tuple(self.levels_dict_dim["hue"]) 88 | hue_offset = self._nested_offsets(len(all_hue_lvls)) 89 | 90 | get_y = lambda df: tuple(df[self.dims.y].tolist()) 91 | 92 | XY_df = pd.DataFrame( 93 | index=self.data_hierarchicize(sort=False).index, 94 | columns=["X", "Y"], 95 | data=None, 96 | ) 97 | if self.dims.hue is None: 98 | for key, df in self._iter__hlkey_df(): 99 | XY_df.loc[key, "Y"] = get_y(df) 100 | XY_df.loc[key, "X"] = tuple(i for i in range(len(df))) 101 | else: 102 | for key, df in self._iter__hlkey_df(): 103 | #' X_positions >> hue_positions 104 | # > [0, 1] >> [0.2, 1.2, 2.2] and [0.2, 1.2, 2.2] 105 | # > [0, 1] >> [0.2, 1.2] and [0.2, 1.2, 2.2] 106 | #' Get hue-indices of hue-levels that aren't missing 107 | hue_lvls = tuple( 108 | df.index.get_level_values(self.dims.hue).unique() 109 | ) 110 | hue_indices: list[int] = ut.index_of_matchelements( 111 | i1=all_hue_lvls, i2=hue_lvls 112 | ) 113 | 114 | ### Find out which x_index we are at 115 | if self.factors_is_unfacetted: 116 | x_levels: list[str | int] = self.levels_dict_dim["x"] 117 | else: 118 | x_levels = tuple( 119 | XY_df.loc[key[:-1], :] 120 | .index.get_level_values(self.dims.x) 121 | .unique() 122 | ) 123 | x_level_index = x_levels.index(key[-1]) 124 | 125 | ### Translate hue_indices into x_positions by adding offset 126 | hue_positions: tuple = tuple( 127 | x_level_index + hue_offset[hue_indices] 128 | ) 129 | 130 | XY_df.loc[key, "Y"] = get_y(df) 131 | XY_df.loc[key, "X"] = hue_positions 132 | 133 | return XY_df 134 | 135 | def plot_connect_subjects(self, **plot_kws) -> "Plot | DataAnalysis": 136 | """Joins subjects with lines. This is useful to see how subjects 137 | behave across x and hue levels. This is only possible if the 138 | subject column is specified. 139 | 140 | :raises TypeError: Requires subject column to be specified in 141 | DataAnalysis object 142 | :return: self 143 | :rtype: Plot | DataAnalysis 144 | """ 145 | if self.subject is None: 146 | raise TypeError("No subject column specified") 147 | 148 | plot_KWS = dict(color="black", ls="-", zorder=2, alpha=0.3) 149 | plot_KWS.update(plot_kws) 150 | 151 | XY_df = self._subjects_get_XY() 152 | 153 | for key, df in XY_df.groupby(self._factors_hierarchical[:-1]): 154 | if self.factors_is_unfacetted: 155 | X, Y = df["X"], df["Y"] 156 | plt.plot(X, Y, **plot_KWS) 157 | 158 | else: 159 | for rowcolkey_ax, ax in self.axes_iter__keys_ax: 160 | if self.factors_is_1_facet: 161 | rowcolkey_xy = key[0] 162 | else: 163 | rowcolkey_xy = key[0:2] 164 | 165 | if rowcolkey_ax == rowcolkey_xy: 166 | X, Y = df["X"], df["Y"] 167 | ax.plot(X, Y, **plot_KWS) 168 | return self 169 | -------------------------------------------------------------------------------- /src/plotastic/plotting/rc.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% 3 | ### Imports 4 | 5 | import matplotlib as mpl 6 | 7 | 8 | # %% 9 | # == Variables ro reuse ================================================ 10 | FONTSIZE = 10 11 | 12 | 13 | # %% 14 | # == STYLE PAPER ======================================================= 15 | PAPER = { 16 | ### Figure 17 | "figure.dpi": 200, #' Displaying figures doesn't need as much dpi as saving them 18 | "figure.figsize": (3, 3), #' default is way too big 19 | # "figure.facecolor": "gray", #' it's easier on the eyes 20 | ### Savefig 21 | "savefig.dpi": 300, #' Saving figures needs more dpi 22 | "savefig.format": "pdf", 23 | # "savefig.transparent": True, 24 | "savefig.facecolor": "white", 25 | "axes.facecolor": "white", 26 | ### Font 27 | "font.family": "sans-serif", 28 | "font.sans-serif": "Arial Narrow", 29 | "font.size": FONTSIZE, 30 | "font.weight": "bold", 31 | # ## Lines 32 | "lines.linewidth": 0.75, 33 | # ## Axes 34 | "axes.spines.right": True, #' requires argument despine=False 35 | "axes.spines.top": True, 36 | "axes.linewidth": 0.75, 37 | "axes.labelweight": "bold", 38 | "axes.titleweight": "bold", 39 | "axes.titlepad": 5, 40 | "axes.labelsize": FONTSIZE, #' fontsize of the x any y labels 41 | # ## Grid 42 | # "axes.grid": True, 43 | "grid.linestyle": "-", 44 | "grid.linewidth": 0.5, 45 | # ## Ticks 46 | "ytick.left": True, 47 | "xtick.labelsize": FONTSIZE - 1, 48 | "ytick.labelsize": FONTSIZE - 1, 49 | "ytick.major.pad": 0.9, #' distance Yticklabels and yticks 50 | "ytick.minor.pad": 0.8, 51 | "xtick.major.pad": 2, #' distance Xticklabels and yticks 52 | "xtick.minor.pad": 2, 53 | "ytick.major.size": 2.5, 54 | "ytick.minor.size": 2, 55 | "xtick.major.size": 2.5, 56 | "xtick.minor.size": 2, 57 | # ## Legend 58 | "legend.fancybox": False, #' use rounded box for legend 59 | "legend.title_fontsize": FONTSIZE, 60 | "legend.fontsize": FONTSIZE, 61 | "legend.markerscale": 1.3, #' size scaled of markers in legend 62 | "legend.handleheight": 0.7, #' line distance between legend entries 63 | "legend.handletextpad": 0.1, #' distance markers legend text 64 | # 'legend.borderaxespad': 1, #' distance legend axes border, must be negative..? 65 | "legend.borderpad": 0.001, 66 | # 'text.usetex': True, 67 | # 'scatter.marker': 'x', 68 | } 69 | 70 | # == Collect STYLES ==================================================== 71 | 72 | ### Give styles a name and add them to STYLES_PLST 73 | STYLES = { 74 | "default": PAPER, 75 | "paper": PAPER, 76 | } 77 | 78 | ### Keys are the styles, values are the keys of the styles 79 | STYLENAMES = { 80 | "plotastic": sorted(list(STYLES.keys())), 81 | "seaborn": ["white", "dark", "whitegrid", "darkgrid", "ticks"], 82 | "matplotlib": mpl.style.available, 83 | } 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /src/plotastic/plotting/rc_utils.py: -------------------------------------------------------------------------------- 1 | """Utilities for setting rcParams and styles""" 2 | 3 | # %% 4 | import matplotlib as mpl 5 | import matplotlib.pyplot as plt 6 | import seaborn as sns 7 | 8 | # import markurutils as ut 9 | import plotastic.utils.utils as ut 10 | from plotastic.dataanalysis.dataanalysis import DataAnalysis 11 | from plotastic.example_data.load_dataset import load_dataset 12 | 13 | import plotastic.plotting.rc as rc 14 | 15 | 16 | # %% 17 | def print_styles() -> str: 18 | print("\n".join([f"{k}:\n\t{v}" for k, v in rc.STYLENAMES.items()])) 19 | 20 | 21 | def set_rcParams(rcParams: dict): 22 | """Iterates through settings dictionary and applies them to 23 | matplotlib rcParams via mpl.rcParams [setting] = value. 24 | 25 | :param rcParams: _description_ 26 | :type rcParams: dict 27 | """ 28 | for setting, value in rcParams.items(): 29 | mpl.rcParams[setting] = value 30 | 31 | 32 | def set_style(style: dict | str) -> None: 33 | """Checks if style is set by plotastic, if not checks if style is a 34 | dict with rcParams as keys and values, if not checks if style is a 35 | matplotlib style and mpl.style.use(style), if not uses seaborn styleplott 36 | 37 | :param style: _description_ 38 | :type style: dict | str 39 | :raises ValueError: _description_ 40 | :return: _description_ 41 | :rtype: _type_ 42 | """ 43 | 44 | ### Set matplotlib settings 45 | if style in rc.STYLENAMES["plotastic"]: 46 | set_rcParams(rc.STYLES[style]) 47 | elif isinstance(style, dict): 48 | set_rcParams(style) 49 | elif style in mpl.style.available: 50 | mpl.style.use(style) 51 | else: 52 | try: 53 | sns.set_style(style) 54 | except ValueError: 55 | m = [ 56 | f"#! Style '{style}' not found. Choose one", 57 | f"from these: {print_styles()}", 58 | ] 59 | raise ValueError(" ".join(m)) 60 | 61 | 62 | # %% 63 | def set_palette(palette: str | list = "Paired", verbose=True): 64 | """Sets the color palette. 65 | 66 | :param palette: _description_, defaults to "Paired" 67 | :type palette: str | list, optional 68 | :param verbose: _description_, defaults to True 69 | :type verbose: bool, optional 70 | """ 71 | if verbose: 72 | pal = sns.color_palette(palette, 8).as_hex() 73 | print(f"#! You chose this color palette: {pal}") 74 | if ut.is_notebook(): 75 | from IPython.display import display 76 | 77 | display(pal) 78 | 79 | # sns.set_theme(palette=palette) # !! resets rcParams 80 | mpl.rcParams["axes.prop_cycle"] = mpl.cycler( 81 | color=sns.color_palette(palette) 82 | ) 83 | -------------------------------------------------------------------------------- /src/plotastic/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/py.typed -------------------------------------------------------------------------------- /src/plotastic/stat/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/stat/__init__.py -------------------------------------------------------------------------------- /src/plotastic/stat/assumptions.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% 3 | 4 | from typing import TYPE_CHECKING, NamedTuple #' SpherResults is a NamedTuple 5 | 6 | import pandas as pd 7 | import pingouin as pg 8 | 9 | # from plotastic.dimensions.dataframetool import DataFrameTool 10 | # from plotastic.stat.statresults import StatResults 11 | from plotastic.stat.stattest import StatTest 12 | 13 | # if TYPE_CHECKING: 14 | # from collections import namedtuple #' SpherResults is a NamedTuple 15 | # # from pingouin.distribution import SpherResults 16 | 17 | # %% Class Assumptions 18 | 19 | 20 | class Assumptions(StatTest): 21 | # == __init__======================================================================= 22 | def __init__(self, **dataframetool_kws): 23 | super().__init__(**dataframetool_kws) 24 | 25 | self.two_factor = True 26 | 27 | # == 28 | # == 29 | # == Normality ===================================================================== 30 | 31 | def check_normality( 32 | self, method: str = "shapiro", **user_kwargs 33 | ) -> pd.DataFrame: 34 | """Check assumption of normality. If the assumption is violated, you should use 35 | non-parametric tests (e.g. Kruskal-Wallis, Mann-Whitney, Wilcoxon, etc.) instead 36 | of parametric tests (ANOVA, t-test, etc.). 37 | 38 | 39 | :param method: 'shapiro', 'jarque-bera' or 'normaltest', defaults to 'shapiro' 40 | :type method: str, optional 41 | :return: _description_ 42 | :rtype: pd.DataFrame 43 | """ 44 | 45 | ### Gather Arguments 46 | kwargs = dict( 47 | dv=self.dims.y, 48 | group=self.dims.x, # !! pingouin crashes without group, so we iterate without x 49 | method=method, 50 | ) 51 | kwargs.update(user_kwargs) #' Add user kwargs 52 | 53 | ### Perform Test 54 | #' Iterate over rows, cols, hue 55 | #' Skip empty groups 56 | normDF_dict = {} 57 | # TODO: Use an iterator from hierarchical instead of one that omits x 58 | for key, df in self.data_iter__key_groups_skip_empty: 59 | #' key = (row, col, hue) 60 | normdf = pg.normality(df, **kwargs) 61 | #' Add n to seehow big group is. 62 | normdf["n"] = self.data_count_n_per_x( 63 | df 64 | ) #' -> Series with same length as normdf 65 | 66 | normDF_dict[key] = normdf 67 | 68 | normDF = pd.concat( 69 | normDF_dict, keys=normDF_dict.keys(), names=self.factors_all 70 | ) 71 | 72 | ### Save Results 73 | self.results.DF_normality = normDF 74 | 75 | return normDF 76 | 77 | # == 78 | # == Homoscedasticity ============================================================== 79 | 80 | def check_homoscedasticity( 81 | self, method: str = "levene", **user_kwargs 82 | ) -> pd.DataFrame: 83 | """Checks assumption of homoscedasticity. If the assumption is violated, the 84 | p-values from a t-test should be corrected with Welch's correction. 85 | 86 | :param method: 'levene' or 'bartlett', defaults to "levene" 87 | :type method: str, optional 88 | :return: _description_ 89 | :rtype: pd.DataFrame 90 | """ 91 | 92 | ### Gather Arguments 93 | kwargs = dict( 94 | dv=self.dims.y, 95 | group=self.dims.x, # !! required, homoscedasticity is measured over a list of groups 96 | method=method, 97 | ) 98 | kwargs.update(user_kwargs) #' Add user kwargs 99 | 100 | ### Perform Test 101 | #' Iterate over rows, cols, and hue 102 | #' Skip empty groups 103 | homosced_dict = {} 104 | for key, df in self.data_iter__key_groups_skip_empty: 105 | #' key = (row, col, hue) 106 | homosced = pg.homoscedasticity(df, **kwargs) 107 | #' Add number of groups 108 | homosced["group count"] = self.data_count_groups_in_x(df) 109 | #' Add n to see how big groups are, make nested list to fit into single cell 110 | homosced["n per group"] = [self.data_count_n_per_x(df).to_list()] 111 | 112 | homosced_dict[key] = homosced 113 | 114 | homoscedDF = pd.concat( 115 | homosced_dict, keys=homosced_dict.keys(), names=self.factors_all 116 | ) 117 | 118 | # 119 | ### Save Results 120 | self.results.DF_homoscedasticity = homoscedDF 121 | 122 | return homoscedDF 123 | 124 | # == 125 | # == Spherecity ==================================================================== 126 | 127 | @staticmethod 128 | def _spher_to_df(spher: NamedTuple) -> pd.DataFrame: 129 | """pingouin returns a strange SpherResults object (namedtuple?), we need to 130 | convert it to a dataframe. 131 | 132 | :param spher: Output of pg.sphericity() 133 | :type spher: pingouin.distribution.SpherResults, NamedTuple 134 | :return: Sphericity Result as DataFrame 135 | :rtype: pd.DataFrame 136 | """ 137 | 138 | if isinstance(spher, tuple): 139 | spher_dict = dict(zip(["spher", "W", "chi2", "dof", "pval"], spher)) 140 | spher_DF = pd.DataFrame(data=spher_dict, index=[0]) 141 | else: 142 | spher_DF = pd.DataFrame(data=spher._asdict(), index=[0]) 143 | 144 | return spher_DF 145 | 146 | def check_sphericity( 147 | self, method: str = "mauchly", **user_kwargs 148 | ) -> pd.DataFrame: 149 | """Checks assumption of sphericity. If the assumption is violated, the p-values 150 | of an RM-ANOVA should be corrected with Greenhouse-Geisser or Huynh-Feldt method 151 | 152 | :param method: 'mauchly' or 'jns', defaults to "mauchly" 153 | :type method: str, optional 154 | :return: _description_ 155 | :rtype: pd.DataFrame 156 | """ 157 | ### Make sure subject is specified 158 | if self.subject is None: 159 | raise ValueError( 160 | "Testing sphericity requires a subject to be specified." 161 | ) 162 | 163 | # TODO: Add option to use x or hue as within-factors 164 | ### All 165 | 166 | ### Gather Arguments 167 | kwargs = dict( 168 | dv=self.dims.y, 169 | subject=self.subject, 170 | within=self.dims.x, 171 | method=method, 172 | ) 173 | kwargs.update(user_kwargs) #' Add user kwargs 174 | 175 | ### Perform Test 176 | #' Iterate over rows, cols, and hue 177 | #' Skip empty groups 178 | spher_dict = {} 179 | for key, df in self.data_iter__key_groups_skip_empty: 180 | #' key = (row, col, hue) 181 | spher = pg.sphericity(df, **kwargs) 182 | #' Convert NamedTuple to DataFrame 183 | spherdf = self._spher_to_df(spher) 184 | #' Add number of groups 185 | spherdf["group count"] = self.data_count_groups_in_x(df) 186 | #' Add n to seehow big groups are 187 | spherdf["n per group"] = [self.data_count_n_per_x(df).to_list()] 188 | 189 | spher_dict[key] = spherdf 190 | 191 | spherDF = pd.concat( 192 | spher_dict, keys=spher_dict.keys(), names=self.factors_all_without_x 193 | ) 194 | 195 | ### Save Results 196 | self.results.DF_sphericity = spherDF 197 | 198 | return spherDF 199 | 200 | 201 | # !! end class 202 | # !! 203 | # !! 204 | 205 | 206 | # #%% 207 | # from plotastic.example_data.load_dataset import load_dataset 208 | # DF, dims = load_dataset("fmri") 209 | 210 | 211 | # # %% plot 212 | # import seaborn as sns 213 | 214 | # sns.catplot(data=DF, **dims, kind="box") 215 | 216 | # # %% Check functionality with pingouin 217 | 218 | # pg.normality(DF, dv=dims["y"], group=dims["x"]) 219 | # pg.homoscedasticity(DF, dv=dims["y"], group=dims["x"]) 220 | 221 | # spher = pg.sphericity(DF, dv=dims["y"], subject="subject", within=dims["x"]) 222 | # type(spher) 223 | 224 | # # %% create Assumptions object 225 | 226 | # DA = Assumptions(data=DF, dims=dims, subject="subject", verbose=True) 227 | 228 | # DA.check_normality() 229 | # DA.check_homoscedasticity() 230 | # DA.check_sphericity() 231 | 232 | # #%% Plot roughest facetting 233 | 234 | # sns.catplot(data=DF, x="timepoint") 235 | 236 | # # %% Use different set 237 | 238 | 239 | # DA2 = Assumptions(data=DF, dims=dict(x="timepoint", y="signal"), 240 | # subject="subject", verbose=True) 241 | # # DA2.catplot() 242 | 243 | # DA2.check_normality() 244 | # DA2.check_homoscedasticity() 245 | # DA2.check_sphericity() 246 | -------------------------------------------------------------------------------- /src/plotastic/stat/bivariate.py: -------------------------------------------------------------------------------- 1 | # !! 2 | 3 | import pingouin as pg 4 | 5 | from plotastic.dimensions.dataframetool import DataFrameTool 6 | 7 | # %% 8 | 9 | 10 | class Bivariate(DataFrameTool): 11 | def __init__(self, **dataframetool_kws): 12 | super().__init__(**dataframetool_kws) 13 | -------------------------------------------------------------------------------- /src/plotastic/stat/omnibus.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% imports 3 | 4 | import warnings 5 | 6 | import numpy as np 7 | import pandas as pd 8 | import pingouin as pg 9 | 10 | # import markurutils as ut 11 | import plotastic.utils.utils as ut 12 | 13 | from plotastic.stat.assumptions import Assumptions 14 | 15 | # %% Class Omnibus 16 | 17 | 18 | class Omnibus(Assumptions): 19 | # == 20 | # == __init__ ====================================================================== 21 | def __init__(self, **dataframetool_kws): 22 | super().__init__(**dataframetool_kws) 23 | 24 | # == 25 | # == Helpers ======================================================================= 26 | 27 | @staticmethod 28 | def _enhance_omnibus(DF: pd.DataFrame) -> pd.DataFrame: 29 | """Enhances the result DataFrame by adding additional columns 30 | 31 | :param DF: Result from omnibus_functions 32 | :type DF: pd.DataFrame 33 | :return: _description_ 34 | :rtype: pd.DataFrame 35 | """ 36 | ### Insert Star column right after "p-unc" 37 | stars = DF["p-unc"].apply(Omnibus._p_to_stars) 38 | DF.insert(DF.columns.get_loc("p-unc") + 1, "stars", stars) 39 | 40 | return DF 41 | 42 | def _ensure_more_than_one_sample_per_group( 43 | self, 44 | df: pd.DataFrame, 45 | facetkey: tuple = None, 46 | ) -> bool: 47 | """Returns an empty DataFrame if there are is only a single sample found within 48 | in all level combos. Useful to skip warning messages from pingouin. 49 | 50 | :param df: A facet of self.data 51 | :type df: pd.DataFrame 52 | :return: Either unchanged df or an empty DataFrame 53 | :rtype: pd.DtataFrame 54 | """ 55 | 56 | ### Iterate through Sample groups within that facet 57 | results = [] 58 | for levelkey, group in df.groupby(self.factors_xhue): 59 | if len(group) < 2: 60 | warnings.warn( 61 | f"Skipping facet {facetkey}, because there is only one sample in {levelkey}", 62 | RuntimeWarning, 63 | stacklevel=3, # ? Prints out function that calls this one (e.g. omnibus_anova) ? 64 | ) 65 | results.append(False) 66 | else: 67 | results.append(True) 68 | 69 | ### Return True if all groups have more than one sample 70 | return all(results) 71 | 72 | # == 73 | # == ANOVA ========================================================================= 74 | 75 | def omnibus_anova(self, **user_kwargs) -> pd.DataFrame: 76 | """Performs an ANOVA (parametric, unpaired) on all facets of self.data 77 | 78 | :return: Result from pg.anova with row and column as MultiIndex 79 | :rtype: pd.DataFrame 80 | """ 81 | ### Gather Arguments 82 | kwargs = dict( 83 | dv=self.dims.y, 84 | between=self.factors_xhue, 85 | detailed=True, 86 | ) 87 | kwargs.update(user_kwargs) #' Add user kwargs 88 | 89 | ### Perform ANOVA 90 | #' Skip empty groups 91 | aov_dict = {} 92 | for key, df in self.data_iter__key_facet_skip_empty: 93 | #' key = (row, col) 94 | aov = pg.anova( 95 | df, **kwargs 96 | ) # ? Doesn't seem to print annoying warnings 97 | aov_dict[key] = aov 98 | aov_DF = pd.concat( 99 | aov_dict, keys=aov_dict.keys(), names=self.factors_rowcol_list 100 | ) 101 | 102 | ### Add extra columns 103 | aov_DF = self._enhance_omnibus(aov_DF) 104 | 105 | ### Save Result 106 | self.results.DF_omnibus_anova = aov_DF 107 | 108 | return aov_DF 109 | 110 | # == 111 | # == RMANOVA ======================================================================= 112 | 113 | def _omnibus_rm_anova_base( 114 | self, 115 | df: pd.DataFrame, 116 | facetkey: tuple, 117 | **kwargs, 118 | ) -> pd.DataFrame: 119 | """Handles Warnings of pg.rm_anova 120 | 121 | :param df: A facet of self.data 122 | :type df: pd.DataFrame 123 | :param facetkey: The key of the facet. Needed for warnings 124 | :type facetkey: tuple 125 | :return: Result from pg.rm_anova or empty DataFrame if there is only one sample 126 | :rtype: pd.DataFrame 127 | """ 128 | ### Warn if there is only one sample in a group 129 | self._ensure_more_than_one_sample_per_group(df, facetkey) 130 | 131 | ### Perform RMANOVA 132 | # !! Pingouin slams you with warnings in a big loop 133 | # !! Trying best to redirect special cases, but still too many warnings 134 | with warnings.catch_warnings(): 135 | warnings.simplefilter("ignore") 136 | rmaov = pg.rm_anova(df, **kwargs) 137 | 138 | return rmaov 139 | 140 | def omnibus_rm_anova( 141 | self, 142 | **user_kwargs, 143 | ) -> pd.DataFrame: 144 | """Performs a repeated measures ANOVA (parametric, paired) on all facets of 145 | self.data 146 | 147 | 148 | :return: Result from pg.rm_anova with row and column as MultiIndex 149 | :rtype: pd.DataFrame 150 | """ 151 | ### Gather Arguments 152 | kwargs = dict( 153 | dv=self.dims.y, 154 | subject=self.subject, 155 | within=self.factors_xhue, 156 | detailed=True, 157 | ) 158 | kwargs.update(user_kwargs) #' Add user kwargs 159 | 160 | ### Perform RMANOVA 161 | #' Skip empty groups 162 | rmaov_dict = {} 163 | for key, df in self.data_iter__key_facet_skip_empty: 164 | #' key = (row, col) 165 | rmaov = self._omnibus_rm_anova_base(df, facetkey=key, **kwargs) 166 | rmaov_dict[key] = rmaov 167 | rmaov_DF = pd.concat( 168 | rmaov_dict, keys=rmaov_dict.keys(), names=self.factors_rowcol_list 169 | ) 170 | ### Add extra columns 171 | rmaov_DF = self._enhance_omnibus(rmaov_DF) 172 | 173 | ### Save Result 174 | self.results.DF_omnibus_rmanova = rmaov_DF 175 | 176 | return rmaov_DF 177 | 178 | # == 179 | # == Kruskal-Wallis ================================================================ 180 | 181 | def omnibus_kruskal(self, **user_kwargs) -> pd.DataFrame: 182 | """Performs a Kruskal-Wallis test (non-parametric, unpaired) on all facets of 183 | self.data 184 | 185 | 186 | :return: Result from pg.kruskal with row and column as MultiIndex 187 | :rtype: pd.DataFrame 188 | """ 189 | ### Gather Arguments 190 | kwargs = dict( 191 | dv=self.dims.y, 192 | between=self.dims.x, 193 | detailed=True, 194 | ) 195 | kwargs.update(user_kwargs) #' Add user kwargs 196 | 197 | ### Perform Kruskal-Wallis 198 | #' pg.Kruskal takes only a single factor 199 | #' Skip empty groups 200 | kruskal_dict = {} 201 | for key, df in self.data_iter__key_groups_skip_empty: 202 | #' key = (row, col, hue) 203 | kruskal = pg.kruskal(df, **kwargs) 204 | kruskal_dict[key] = kruskal 205 | kruskal_DF = pd.concat( 206 | kruskal_dict, 207 | keys=kruskal_dict.keys(), 208 | names=self.factors_all_without_x, 209 | ) 210 | ### Add extra columns 211 | kruskal_DF = self._enhance_omnibus(kruskal_DF) 212 | 213 | ### Save Result 214 | self.results.DF_omnibus_kruskal = kruskal_DF 215 | 216 | return kruskal_DF 217 | 218 | # == 219 | # == Friedman ====================================================================== 220 | 221 | def omnibus_friedman(self, **user_kwargs) -> pd.DataFrame: 222 | """Performs a Friedman test (non-parametric, paired) on all facets of self.data 223 | 224 | :return: Result from pg.friedman with row and column as MultiIndex 225 | :rtype: pd.DataFrame 226 | """ 227 | ### Gather Arguments 228 | kwargs = dict( 229 | dv=self.dims.y, 230 | subject=self.subject, 231 | within=self.dims.x, 232 | # detailed=True, # !! pg.friedman doesn't have this option 233 | ) 234 | kwargs.update(user_kwargs) #' Add user kwargs 235 | 236 | ### Perform Friedman 237 | #' pg.friedman takes only a single factor 238 | #' Skip empty groups 239 | friedman_dict = {} 240 | for key, df in self.data_iter__key_groups_skip_empty: 241 | #' key = (row, col, hue) 242 | friedman = pg.friedman(df, **kwargs) 243 | friedman_dict[key] = friedman 244 | friedman_DF = pd.concat( 245 | friedman_dict, 246 | keys=friedman_dict.keys(), 247 | names=self.factors_all_without_x, 248 | ) 249 | ### Add extra columns 250 | friedman_DF = self._enhance_omnibus(friedman_DF) 251 | 252 | ### Save Result 253 | self.results.DF_omnibus_friedman = friedman_DF 254 | 255 | return friedman_DF 256 | 257 | 258 | # !! 259 | # !! end class 260 | 261 | # %% Test Omnibus 262 | 263 | if __name__ == "__main__": 264 | from plotastic.example_data.load_dataset import load_dataset 265 | 266 | DF, dims = load_dataset("fmri") 267 | DF, dims = load_dataset("qpcr") 268 | 269 | # %% CHECK pingouin ANOVA 270 | kwargs = dict(data=DF, dv=dims["y"], detailed=True) 271 | 272 | aov = pg.anova(between=[dims["x"], dims["hue"]], **kwargs) 273 | rmaov = pg.rm_anova( 274 | within=[dims["x"], dims["hue"]], subject="subject", **kwargs 275 | ) 276 | kruskal = pg.kruskal(between=dims["hue"], **kwargs) 277 | 278 | # %% Make DataAnalysis 279 | 280 | DA = Omnibus(data=DF, dims=dims, subject="subject", verbose=True) 281 | 282 | # %% There's a problem with the Data: Only 1 sample in MMP and MACS 283 | 284 | ### Sort by xhue 285 | df2 = DF[(DF["class"] == "MMPs") & (DF["method"] == "MACS")].sort_values( 286 | ["gene", "fraction"] 287 | ) 288 | len(df2) #' 24 289 | levelkeys2 = df2.set_index([dims["x"], dims["hue"]]).index.unique() 290 | DA._ensure_more_than_one_sample_per_group(df2) 291 | # DA._plot_dendrogram_from_levelkeys(levelkeys2) 292 | 293 | pg.rm_anova( 294 | data=df2, 295 | dv=dims["y"], 296 | within=[dims["x"], dims["hue"]], 297 | detailed=True, 298 | subject="subject", 299 | ) 300 | # 301 | 302 | # %% Check stuff 303 | 304 | aov = DA.omnibus_anova() 305 | rmaov = DA.omnibus_rm_anova() 306 | kruskal = DA.omnibus_kruskal() 307 | friedman = DA.omnibus_friedman() 308 | 309 | # %% Check Kruskal 310 | -------------------------------------------------------------------------------- /src/plotastic/stat/posthoc.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% Import Modules 3 | # import markurutils as ut 4 | import plotastic.utils.utils as ut 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | # print(pd.__version__) 10 | import pingouin as pg 11 | 12 | 13 | from plotastic.stat.assumptions import Assumptions 14 | 15 | 16 | # %% Class PostHoc 17 | 18 | 19 | class PostHoc(Assumptions): 20 | DEFAULT_KWS_PAIRWISETESTS = dict( 21 | nan_policy="pairwise", #' Delete only pairs or complete subjects ("listwise") if sasmples are missing? 22 | return_desc=True, #' Return descriptive statistics? 23 | correction="auto", #' Use welch correction if variances unequal? 24 | ) 25 | 26 | # == __init__ ====================================================================== 27 | def __init__(self, **dataframetool_kws): 28 | super().__init__(**dataframetool_kws) 29 | 30 | # 31 | # 32 | # == Base function ================================================================= 33 | 34 | @ut.ignore_warnings 35 | def _base_pairwise_tests(self, **kwargs) -> pd.DataFrame: 36 | """Performs pairwise tests for a facet of self.data""" 37 | 38 | ### Turn around hue and x for between or within argument 39 | if self.dims.hue: 40 | kwargs_2 = kwargs.copy() 41 | if "within" in kwargs: 42 | kwargs_2["within"] = list(reversed(kwargs["within"])) 43 | elif "between" in kwargs: 44 | kwargs_2["between"] = list(reversed(kwargs["between"])) 45 | 46 | ### Perform Test 47 | #' Iterate over rows and columns 48 | PH_dict = {} 49 | 50 | #' Skip empty so that no empty groups of level combinations are artificially added 51 | for key, df in self.data_iter__key_facet_skip_empty: 52 | # print(key) 53 | # ut.pp(df) 54 | 55 | # for key in self.levelkeys_rowcol: 56 | # df = self.data_dict_skip_empty[key] 57 | 58 | if ( 59 | self.dims.hue 60 | ): #' Perform twice with x and hue turned around (= huex) 61 | ph_xhue = pg.pairwise_tests(data=df, **kwargs) 62 | ph_huex = pg.pairwise_tests(data=df, **kwargs_2) 63 | PH_dict[key] = ph_xhue.merge(ph_huex, how="outer") 64 | else: #' perform once with x 65 | ph_x = pg.pairwise_tests(data=df, **kwargs) 66 | PH_dict[key] = ph_x 67 | 68 | PH = pd.concat( 69 | PH_dict, keys=PH_dict.keys(), names=self.factors_rowcol_list 70 | ) 71 | 72 | return PH 73 | 74 | # 75 | # == Pairwise TESTs ================================================================ 76 | 77 | def test_pairwise( 78 | self, 79 | paired=None, 80 | parametric=True, 81 | subject=None, 82 | # only_contrast=False, 83 | **user_kwargs, 84 | ) -> pd.DataFrame: 85 | """Interface that sorts arguments, executes pairwise tests and adds extra features to PH table""" 86 | 87 | ### Gather Arguments 88 | kwargs = dict( 89 | dv=self.dims.y, 90 | parametric=parametric, 91 | nan_policy="pairwise", 92 | ) 93 | #' Paired or unpaired 94 | if paired is None and self.subject: 95 | paired = True 96 | if paired: 97 | assert (self.subject is not None) or ( 98 | subject is not None 99 | ), "No subject column specified" 100 | kwargs["within"] = self.factors_xhue 101 | kwargs["subject"] = self.subject if self.subject else subject 102 | else: 103 | kwargs["between"] = self.factors_xhue 104 | #' Add user kwargs 105 | kwargs.update(self.DEFAULT_KWS_PAIRWISETESTS) 106 | kwargs.update(user_kwargs) 107 | 108 | #' Make sure the specified factors are present 109 | if "within" in kwargs: 110 | assert all( 111 | [ 112 | f in self.factors_all 113 | for f in ut.ensure_list(kwargs["within"]) 114 | ] 115 | ), f"Argument 'within' contains unknown columns ({kwargs['within']} should be like one of {self.factors_all}" 116 | if "between" in kwargs: 117 | assert all( 118 | [ 119 | f in self.factors_all 120 | for f in ut.ensure_list(kwargs["between"]) 121 | ] 122 | ), f"Argument 'between' contains unknown columns {kwargs['between']} should be like one of {self.factors_all}" 123 | 124 | ### Make PH table 125 | PH = self._base_pairwise_tests(**kwargs) 126 | PH = self._enhance_PH( 127 | PH, 128 | # only_contrast=only_contrast, 129 | ) 130 | 131 | ### Save result 132 | self.results.DF_posthoc = PH 133 | 134 | return PH 135 | 136 | def _enhance_PH( 137 | self, 138 | PH: pd.DataFrame, 139 | # only_contrast=False, 140 | ) -> pd.DataFrame: 141 | ### Define Alpha 142 | alpha = self.ALPHA 143 | alpha_tolerance = self.ALPHA_TOLERANCE 144 | 145 | ### Define column that contains p-values 146 | # pcol = "p-unc" if padjust in ("none", None) else "p-corr" 147 | 148 | ### EDIT PH 149 | PH = PH.reset_index( 150 | drop=False 151 | ) #' drop is default false, but put it explicitly here 152 | 153 | #' Add Stars 154 | PH["**p-unc"] = PH["p-unc"].apply(self._p_to_stars, alpha=alpha) 155 | if "p-corr" in PH.columns: 156 | PH["**p-corr"] = PH["p-corr"].apply(self._p_to_stars, alpha=alpha) 157 | 158 | #' Make Column for categorizing significance 159 | PH["Sign."] = pd.cut( 160 | PH["p-unc"], 161 | bins=[0, alpha, alpha_tolerance, 1], 162 | labels=["signif.", "toler.", False], 163 | ) 164 | if "p-corr" in PH.columns: 165 | PH["Sign."] = pd.cut( 166 | PH["p-corr"], 167 | bins=[0, alpha, alpha_tolerance, 1], 168 | labels=["signif.", "toler.", False], 169 | ) 170 | 171 | #' Make pairs 172 | PH["pairs"] = PH.apply(self._level_to_pair, axis=1) 173 | 174 | # ### Check contrast 175 | # #' Optionally remove non-contrast comparisons 176 | # if only_contrast and self.dims.hue: 177 | # PH = PH[ 178 | # PH["Contrast"].str.contains("*", regex=False) 179 | # ] # <<<< OVERRRIDE PH, REMOVE NON-CONTRAST ROWS 180 | 181 | #' Show if the pair crosses x or hue boundaries 182 | if self.dims.hue: 183 | PH["cross"] = PH.apply(self._detect_xhue_crossing, axis=1) 184 | else: 185 | PH["cross"] = "x" 186 | 187 | ### Set index 188 | PH = ut.drop_columns_by_regex(PH, "level_\d") 189 | if self.dims.hue: 190 | PH = PH.set_index( 191 | self.factors_rowcol_list + [self.dims.hue, "Contrast"] 192 | ) 193 | else: 194 | PH = PH.set_index(self.factors_rowcol_list + ["Contrast"]) 195 | 196 | return PH 197 | 198 | # == Pairing functions ============================================================= 199 | 200 | def _level_to_pair(self, row: "pd.Series") -> tuple: 201 | """converts the factor-columns of a posthoc table into a column of pairs""" 202 | 203 | ### See if there are multiple factors 204 | phInteract = " * " in row["Contrast"] 205 | 206 | if not phInteract: 207 | return row["A"], row["B"] 208 | else: 209 | levels = row[[self.dims.hue, self.dims.x]].tolist() 210 | if pd.notna( 211 | levels[0] 212 | ): # switch column if NaN, also check: if not math.isnan(factor) 213 | lvl = levels[0] 214 | pair = ((row["B"], lvl), (row["A"], lvl)) 215 | else: 216 | lvl = levels[1] 217 | pair = ((lvl, row["B"]), (lvl, row["A"])) 218 | return pair 219 | 220 | @staticmethod 221 | def _detect_xhue_crossing(row: "pd.Series") -> str: 222 | """ 223 | Detects if a pair ((DCN, F2), (DCN, F1)) is crossing x or hue boundaries 224 | :param row: 225 | :return: 226 | """ 227 | 228 | """crossing Hue: ((x, hue1), (x, hue2))""" 229 | """crossing X: ((x1, hue), (x2, hue))""" 230 | 231 | ### See if there are multiple factors 232 | phInteract = " * " in row["Contrast"] 233 | 234 | if not phInteract: 235 | return "x" 236 | else: 237 | cross = np.nan 238 | pair = row["pairs"] 239 | if pair[0][0] == pair[1][0]: 240 | cross = "hue" 241 | if pair[0][1] == pair[1][1]: 242 | cross = "x" 243 | return cross 244 | 245 | 246 | # %% Import data and make PostHoc object 247 | 248 | 249 | # DF, dims = plst.load_dataset("fmri") 250 | 251 | # PH = PostHoc(data=DF, dims=dims, verbose=False, subject="subject") 252 | 253 | 254 | # %% Check functionality of pingouin 255 | 256 | # # !! Raises TypeError: Could not convert value 'cuestim' to numeric. This didn't happen before changing to new environment. 257 | # # !! Downgraded pandas from 2.0.3 (released april 2023) to 1.5.3 -> FIXED IT 258 | # ph = pg.pairwise_tests(data=DF, dv="signal", within=["timepoint", "event"], subject="subject", parametric=True, padjust="bonf", nan_policy="pairwise") 259 | 260 | # %% test with pingouin 261 | 262 | # ph = PH.test_pairwise( 263 | # # dv="signal", 264 | # # between=[dims["x"], dims["hue"]], 265 | # # within=[dims["x"], dims["hue"]], 266 | # # between=dims["col"], 267 | # # subject="subject", 268 | # parametric=True, 269 | # padjust="bh", 270 | # nan_policy="pairwise", 271 | # ) 272 | 273 | # ut.pp(ph[ph["Sign."].isin(["signif."])]).head(70) 274 | 275 | # %% 276 | -------------------------------------------------------------------------------- /src/plotastic/stat/statresults.py: -------------------------------------------------------------------------------- 1 | # %% Imports 2 | from typing import TYPE_CHECKING 3 | 4 | from pathlib import Path 5 | 6 | import pandas as pd 7 | 8 | if TYPE_CHECKING: 9 | import pandas as pd 10 | 11 | # %% class StatResults 12 | 13 | 14 | class StatResults: 15 | # == 16 | # == DEFAULTS ====================================================================== 17 | # fmt: off 18 | DEFAULT_UNCHECKED = "NOT CHECKED" #' If ASSUMPTION not tested, 19 | DEFAULT_UNTESTED = "NOT TESTED" #' If statistical test not tested (posthoc, omnibus) 20 | DEFAULT_UNASSESSED = "NOT ASSESSED" #' If not 21 | # fmt: on 22 | 23 | # == 24 | # == INIT ========================================================================== 25 | def __init__(self): 26 | ### Data Tables 27 | self.DF_normality: pd.DataFrame | str = self.DEFAULT_UNCHECKED 28 | self.DF_homoscedasticity: pd.DataFrame | str = self.DEFAULT_UNCHECKED 29 | self.DF_sphericity: pd.DataFrame | str = self.DEFAULT_UNCHECKED 30 | 31 | self.DF_omnibus_anova: pd.DataFrame | str = self.DEFAULT_UNTESTED 32 | self.DF_omnibus_rmanova: pd.DataFrame | str = self.DEFAULT_UNTESTED 33 | self.DF_omnibus_kruskal: pd.DataFrame | str = self.DEFAULT_UNTESTED 34 | self.DF_omnibus_friedman: pd.DataFrame | str = self.DEFAULT_UNTESTED 35 | self.DF_posthoc: pd.DataFrame | str = self.DEFAULT_UNTESTED 36 | self.DF_bivariate: pd.DataFrame | str = self.DEFAULT_UNTESTED 37 | 38 | ### Assessments = Summarizing results from multiple groups 39 | self._normal: bool | str = self.DEFAULT_UNASSESSED 40 | self._homoscedastic: bool | str = self.DEFAULT_UNASSESSED 41 | self._spherical: bool | str = self.DEFAULT_UNASSESSED 42 | 43 | self._parametric: bool | str = self.DEFAULT_UNASSESSED 44 | 45 | # == 46 | # == Summarize Results ============================================================= 47 | 48 | @property 49 | def as_dict(self) -> dict: 50 | d = dict( 51 | ### Assumptions 52 | normality=self.DF_normality, 53 | homoscedasticity=self.DF_homoscedasticity, 54 | sphericity=self.DF_sphericity, 55 | ### Omnibus 56 | anova=self.DF_omnibus_anova, 57 | rm_anova=self.DF_omnibus_rmanova, 58 | kruskal=self.DF_omnibus_kruskal, 59 | friedman=self.DF_omnibus_friedman, 60 | ### Posthoc 61 | posthoc=self.DF_posthoc, 62 | ### Bivariate 63 | bivariate=self.DF_bivariate, 64 | ) 65 | 66 | ### Remove untested 67 | d = {k: v for k, v in d.items() if not isinstance(v, str)} 68 | 69 | return d 70 | 71 | def __iter__(self) -> tuple[str, pd.DataFrame]: 72 | for test_name, DF in self.as_dict.items(): 73 | yield test_name, DF 74 | 75 | # == 76 | # == GETTERS AND SETTERS =========================================================== 77 | 78 | @property 79 | def normal(self): 80 | if self._normal == self.DEFAULT_UNASSESSED: 81 | self._normal = self.assess_normality() 82 | return self._normal 83 | 84 | @normal.setter 85 | def normal(self, value: bool): 86 | print(f"#! Defining normality as {value}!") 87 | self._normal = value 88 | 89 | @property 90 | def parametric(self): 91 | if self._parametric == self.DEFAULT_UNASSESSED: 92 | self._parametric = self.assess_parametric() 93 | return self._parametric 94 | 95 | @parametric.setter 96 | def parametric(self, value: bool): 97 | print(f"#! Defining parametric as {value}!") 98 | self._parametric = value 99 | 100 | # == 101 | # == ASSESS ASSUMPTIONS ============================================================ 102 | 103 | def assess_normality(self, data) -> bool: 104 | """Uses result from normality test for each group and decides if data should be considered normal or not""" 105 | assert ( 106 | self.DF_normality is not self.DEFAULT_UNCHECKED 107 | ), "Normality not tested yet" 108 | raise NotImplementedError 109 | self.normal = stats.normaltest(data)[1] > 0.05 110 | 111 | def assess_parametric(self): 112 | """Uses results from normality, homoscedasticity and sphericity tests to decide if parametric tests should be used""" 113 | self.parametric = self.normal and self.homoscedastic and self.spherical 114 | return self.parametric 115 | 116 | # == 117 | # == EXPORT ======================================================================== 118 | 119 | def save(self, fname: str | Path = "plotastic_results", verbose=True) -> None: 120 | """Exports all statistics to one excel file. Different sheets for different 121 | tests 122 | 123 | :param out: Path to save excel file, optional (default="") 124 | :type out: str, optional 125 | """ 126 | ### Construct output path 127 | fname = Path(fname).with_suffix(".xlsx") 128 | 129 | ### Init writer for multiple sheets 130 | writer = pd.ExcelWriter(fname, engine="xlsxwriter") 131 | workbook = writer.book 132 | 133 | ### Iterate through results 134 | for test_name, DF in self.as_dict.items(): 135 | worksheet = workbook.add_worksheet(test_name) #' Make sheet 136 | writer.sheets[test_name] = worksheet #' Add sheet name to writer 137 | DF.to_excel(writer, sheet_name=test_name) #' # Write DF to sheet 138 | 139 | ### Save 140 | writer.close() 141 | 142 | ### Tell save location 143 | if verbose: 144 | print(f"Saved results to {fname.resolve()}") 145 | 146 | 147 | # !! 148 | # !! end class 149 | 150 | # %% test it 151 | # if __name__ == "__main__": 152 | 153 | # # %% Load Data, make DA, fill it with stuff 154 | # from plotastic.example_data.load_dataset import load_dataset 155 | # DF, dims = load_dataset("qpcr") 156 | # # DA = DataAnalysis(DF, dims) 157 | # # DA.test_pairwise() 158 | -------------------------------------------------------------------------------- /src/plotastic/stat/stattest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from plotastic.dimensions.dataintegrity import DataIntegrity 5 | from plotastic.stat.statresults import StatResults 6 | 7 | 8 | class StatTest(DataIntegrity): 9 | # == Class Attribute =============================================================== 10 | 11 | #' Alpha 12 | ALPHA = 0.05 # TODO Why class variable? 13 | #' Alpha Tolerance: Will still print out result if it nearly crossed alpha level. 14 | ALPHA_TOLERANCE = 0.075 15 | 16 | @classmethod 17 | def set_alpha(cls, value: float) -> None: 18 | cls.ALPHA = value 19 | 20 | @classmethod 21 | def set_alpha_tolerance(cls, value: float) -> None: 22 | cls.ALPHA_TOLERANCE = value 23 | 24 | # 25 | # 26 | # == __init__======================================================================= 27 | def __init__( 28 | self, 29 | single_factor: bool | str = False, 30 | **dataframetool_kwargs, 31 | ): 32 | super().__init__(**dataframetool_kwargs) 33 | 34 | ### Singl-Factor Mode 35 | #' Default is two-factor analysis 36 | # TODO: Add single-factor mode to each funtion 37 | assert single_factor in [ 38 | "hue", 39 | "col", 40 | False, 41 | ], f"#! single_factor must be 'hue' or 'col', not {single_factor}" 42 | self.single_factor = single_factor 43 | 44 | ### Composition 45 | self.results = StatResults() 46 | 47 | # 48 | # 49 | # == Helper functions ============================================================== 50 | 51 | @staticmethod 52 | def _p_to_stars(fl: float, alpha=0.05): 53 | # if mpl.get_backend() == "module://mplcairo.macosx": 54 | # s = "★" 55 | # else: 56 | # s= "*" 57 | s = "*" 58 | # s = "★" 59 | 60 | assert type(alpha) in [ 61 | float, 62 | ], f"#! Alpha was type{alpha}, float required" 63 | a = alpha 64 | # use other stars ☆ ★ ★ ★ ٭★☆✡✦✧✩✪✫✬✭✮✯✰✵✶✷✸✹⭑⭒✴︎ 65 | if a / 1 < fl: 66 | stars = "ns" 67 | elif a / 1 >= fl > a / 5: 68 | stars = s 69 | elif a / 5 >= fl > a / 50: 70 | stars = s * 2 71 | elif a / 50 >= fl > a / 500: 72 | stars = s * 3 73 | elif a / 500 >= fl: 74 | stars = s * 4 75 | else: 76 | stars = float("NaN") 77 | 78 | # display p-values that are between 0.05-0.06 not as stars, but show them 79 | if a * 1.4 >= fl > a: # -0.01 80 | stars = round(fl, 3) # Report p-values if they're 0.05 -0.06. 81 | 82 | return stars 83 | 84 | @staticmethod 85 | def _effectsize_to_words(fl: float, t=(0.01, 0.06, 0.14, 0.5)): 86 | if t[0] > fl: 87 | effectSize = "No Effect" 88 | elif t[0] <= fl < t[1]: 89 | effectSize = "Small" 90 | elif t[1] <= fl < t[2]: 91 | effectSize = "Medium" 92 | elif t[2] <= fl < t[3]: 93 | effectSize = "Large" 94 | elif t[3] <= fl: 95 | effectSize = "Huge" 96 | else: 97 | effectSize = float("NaN") 98 | return effectSize 99 | -------------------------------------------------------------------------------- /src/plotastic/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/src/plotastic/utils/__init__.py -------------------------------------------------------------------------------- /src/plotastic/utils/subcache.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% Imports 3 | 4 | 5 | from typing import Callable, List 6 | 7 | import os 8 | from pathlib import Path 9 | 10 | from icecream import ic 11 | 12 | from joblib import Memory 13 | 14 | # from plotastic.utils import utils as ut 15 | 16 | 17 | class SubCache(Memory): 18 | """Expands the joblib.Memory class with some useful methods. 19 | - 20 | - List directories within cache 21 | - List objects within cache 22 | - Adds subcache attribute, with benefits: 23 | - Subcache replaces module name in cache directory 24 | - More control over cache directories 25 | - Persistent caching, since IPythond passes a new location to 26 | joblib each time the Memory object is initialized 27 | - Doesn't work right if two SubCache Objects point cache the same function 28 | """ 29 | 30 | def __init__( 31 | self, subcache_dir: str, assert_parent: str = None, *args, **kwargs 32 | ): 33 | super().__init__(*args, **kwargs) 34 | 35 | ### Subfolder of location, overrides default subfolder by joblib 36 | self.subcache_dir = subcache_dir 37 | 38 | ### self.location/joblib/subcache 39 | self.subcache_path = os.path.join( 40 | self.location, "joblib", self.subcache_dir 41 | ) 42 | 43 | ### Prevent joblib folders being created by wrong Interactive Windows 44 | if not assert_parent is None: 45 | parent_full = Path(self.location).absolute() 46 | parent = os.path.split(parent_full)[-1] 47 | assert ( 48 | parent == assert_parent 49 | ), f"When Initializing joblib.Memory, we expected cache to be in {assert_parent}, but we ended up in {parent_full}" 50 | 51 | def list_dirs( 52 | self, detailed: bool = False, max_depth: int = 3 53 | ) -> List[str]: 54 | """ 55 | Returns a list of cache directories. 56 | 57 | :param detailed: if True, returns all cache directories with 58 | full paths. Default is False. 59 | :type detailed: bool, optional 60 | :param max_depth: The maximum depth to search for cache 61 | directories. Default is 4. 62 | :type max_depth: int, optional 63 | :return: List[str], a list of cache directories. 64 | """ 65 | 66 | subcache = self.subcache_path 67 | 68 | location_subdirs = [] 69 | 70 | ### Recursive walking 71 | for root, dirs, _files in os.walk(subcache): 72 | #' Don't go too deep: 'joblib/plotastic/example_data/load_dataset/load_dataset', 73 | depth = root[len(subcache) :].count(os.sep) 74 | if not detailed and depth > max_depth: 75 | continue 76 | for dir in dirs: 77 | #' Don't need to check for 'joblib' because it's not a subdirectory of cache_dir 78 | #' Exclude subdirectories like "c1589ea5535064b588b2f6922e898473" 79 | if len(dir) >= 32 or dir == "joblib": 80 | continue 81 | #' Return every path completely 82 | if detailed: 83 | location_subdirs.append(os.path.join(root, dir)) 84 | else: 85 | dir_path = os.path.join(root, dir) 86 | dir_path = dir_path.replace(subcache, "") 87 | if dir_path.startswith("/"): 88 | dir_path = dir_path[1:] 89 | location_subdirs.append(dir_path) 90 | return location_subdirs 91 | 92 | def list_objects(self): 93 | """Return the list of inputs and outputs from `mem` (joblib.Memory 94 | cache).""" 95 | 96 | objects = [] 97 | 98 | for item in self.store_backend.get_items(): 99 | path_to_item = os.path.split( 100 | os.path.relpath(item.path, start=self.store_backend.location) 101 | ) 102 | result = self.store_backend.load_item(path_to_item) 103 | input_args = self.store_backend.get_metadata(path_to_item).get( 104 | "input_args" 105 | ) 106 | objects.append((input_args, result)) 107 | return objects 108 | 109 | def subcache(self, f: Callable, **mem_kwargs) -> Callable: 110 | """Cache it in a persistent manner, since Ipython passes a new 111 | location to joblib each time the Memory object is initialized 112 | """ 113 | f.__module__ = self.subcache_dir 114 | f.__qualname__ = f.__name__ 115 | 116 | return self.cache(f, **mem_kwargs) 117 | 118 | 119 | if __name__ == "__main__": 120 | home = os.path.join( 121 | os.path.expanduser("~"), 122 | ".cache", 123 | ) 124 | 125 | def sleep(seconds): 126 | import time 127 | 128 | time.sleep(seconds) 129 | 130 | MEM = SubCache(location=home, subcache_dir="plotastic", verbose=True) 131 | 132 | sleep = MEM.subcache(sleep) 133 | # %% 134 | ### First time slow, next time fast 135 | sleep(1.4) 136 | # %% 137 | MEM.list_dirs() 138 | # %% 139 | MEM.clear() 140 | 141 | # %% 142 | ### Using different cache allows clearance of only that cache 143 | MEM2 = SubCache(location=home, subcache_dir="plotic2", verbose=True) 144 | 145 | def slep(seconds): 146 | import time 147 | 148 | time.sleep(seconds) 149 | 150 | sleep_cached2 = MEM2.subcache(slep) 151 | sleep_cached2(1.4) 152 | # %% 153 | MEM2.list_dirs() 154 | # %% 155 | MEM2.clear() 156 | -------------------------------------------------------------------------------- /testing/make_htmlcov.sh: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### From within project root 4 | #' sadly coveragerc can't be in a different directory 5 | pytest tests -n 3 --cov --cov-report html:testing/htmlcov --cov-config .coveragerc 6 | -------------------------------------------------------------------------------- /tests/DA_configs.py: -------------------------------------------------------------------------------- 1 | """Utilities for testing plotastic. Contains lists of arguments""" 2 | 3 | # %% imports 4 | 5 | from typing import Callable 6 | 7 | import os 8 | import warnings 9 | from glob import glob 10 | 11 | 12 | import pandas as pd 13 | 14 | import matplotlib.pyplot as plt 15 | 16 | import plotastic as plst 17 | 18 | # import plotastic.utils.utils as ut 19 | # import plotastic.utils.cache as utc 20 | from plotastic.utils.subcache import SubCache 21 | 22 | # %% 23 | # ### Cache it to speed up 24 | # # !! Execute from within plotastic folder! 25 | # # !! Makes a wrong cache folder if executed with interactive mode 26 | # MEMORYDA_configs = SubCache( 27 | # location="../testing", #' Passed to joblib.Memory 28 | # assert_parent="testing", 29 | # subcache_dir="DA_configs", #' subfolder of location, overrides joblib 30 | # verbose=True, 31 | # ) 32 | ### Clear cache if needed 33 | # MEMORY_TESTCONFIGS.clear() 34 | 35 | 36 | # %% 37 | # == Load Datasets ===================================================== 38 | #' Source of files is seaborn, markurutils just adds cut column 39 | 40 | ### Cache function 41 | # load_dataset = MEMORY_DA_CONFIGS.subcache(plst.load_dataset) 42 | 43 | ### Load datasets 44 | DF_tips, dims_tips = plst.load_dataset("tips", verbose=False) 45 | DF_fmri, dims_fmri = plst.load_dataset("fmri", verbose=False) 46 | DF_qpcr, dims_qpcr = plst.load_dataset("qpcr", verbose=False) 47 | 48 | 49 | # %% 50 | # == Arguments for plst.DataAnalysis =================================== 51 | #' Facet data such that it leads to presence of absence of empty groups 52 | 53 | 54 | ### Empty groups 55 | dims_withempty_tips = [ 56 | dict(y="tip", x="day", hue="sex", col="smoker", row="time"), 57 | dict(y="tip", x="sex", hue="day", col="smoker", row="time"), 58 | dict(y="tip", x="sex", hue="day", col="time", row="smoker"), 59 | dict(y="tip", x="sex", hue="day", col="time"), 60 | dict(y="tip", x="sex", hue="day", row="time"), 61 | dict(y="tip", x="sex", hue="day", row="size-cut"), 62 | dict(y="tip", x="sex", hue="day"), 63 | dict(y="tip", x="sex"), 64 | dict(y="tip", x="size-cut"), 65 | ] 66 | 67 | 68 | ### Don't make empty groups 69 | dims_noempty_tips = [ 70 | dict(y="tip", x="size-cut", hue="smoker", col="sex", row="time"), 71 | dict(y="tip", x="size-cut", hue="smoker", col="sex"), 72 | dict(y="tip", x="size-cut", hue="smoker"), 73 | dict(y="tip", x="size-cut"), 74 | ] 75 | 76 | dims_noempty_fmri = [ 77 | dict(y="signal", x="timepoint", hue="event", col="region"), 78 | dict(y="signal", x="timepoint", hue="region", col="event"), 79 | dict(y="signal", x="timepoint", hue="region"), 80 | dict(y="signal", x="timepoint", hue="event"), 81 | dict(y="signal", x="timepoint"), 82 | ] 83 | 84 | dims_noempty_qpcr = [ 85 | dict(y="fc", x="gene", hue="fraction", col="class", row="method"), 86 | dict(y="fc", x="gene", hue="fraction", col="method", row="class"), 87 | dict(y="fc", x="gene", hue="fraction", col="class"), 88 | dict(y="fc", x="gene", hue="fraction"), 89 | dict(y="fc", x="gene"), 90 | ] 91 | 92 | # %% 93 | 94 | 95 | # %% 96 | # == Make tuples (DF, dims) ============================================ 97 | #' for pytest.parametrize 98 | 99 | zipped_withempty_tips = [(DF_tips, dim) for dim in dims_withempty_tips] 100 | zipped_noempty_tips = [(DF_tips, dim) for dim in dims_noempty_tips] 101 | zipped_noempty_fmri = [(DF_fmri, dim) for dim in dims_noempty_fmri] 102 | zipped_noempty_qpcr = [(DF_qpcr, dim) for dim in dims_noempty_qpcr] 103 | 104 | ### Paired Data (with subject) 105 | zipped_noempty_PAIRED = zipped_noempty_fmri + zipped_noempty_qpcr 106 | 107 | ### All should make 14 test 108 | zipped_noempty_ALL = ( 109 | zipped_noempty_tips + zipped_noempty_fmri + zipped_noempty_qpcr 110 | ) 111 | 112 | zipped_ALL = ( 113 | zipped_noempty_tips 114 | + zipped_noempty_fmri 115 | + zipped_noempty_qpcr 116 | + zipped_withempty_tips 117 | ) 118 | # len(zipped_noempty_ALL) #' -> 14 total tests 119 | 120 | 121 | # %% 122 | # == A cached DataAnalysis Initializer ================================= 123 | 124 | 125 | # def DataAnalysis_cached(*args, **kwargs) -> plst.DataAnalysis: 126 | # """Makes a DataAnalysis object""" 127 | # return plst.DataAnalysis(*args, **kwargs) 128 | 129 | 130 | # DataAnalysis_cached = MEMORY_DA_CONFIGS.subcache(DataAnalysis_cached) 131 | 132 | 133 | # %% 134 | # == Make Dataanalysis objects ========================================= 135 | 136 | 137 | def make_DA_statistics(dataset: str = "qpcr") -> plst.DataAnalysis: 138 | """Makes a DA object with every possible data stored in it 139 | 140 | :param dataset: "tips", "fmri", or "qpcr" 141 | :type dataset: str 142 | """ 143 | 144 | ### ignore warnings 145 | with warnings.catch_warnings(): 146 | warnings.simplefilter("ignore") 147 | 148 | ### Example Data that's Paired, so we can use tests for paired data 149 | assert dataset not in ["tips"], f"{dataset} is not paired" 150 | 151 | ### Load example data 152 | DF, dims = plst.load_dataset(dataset, verbose=False) 153 | 154 | ### Init DA 155 | DA = plst.DataAnalysis(DF, dims, subject="subject", verbose=False) 156 | 157 | ### Assumptions 158 | DA.check_normality() 159 | DA.check_homoscedasticity() 160 | DA.check_sphericity() 161 | 162 | ### Omnibus 163 | DA.omnibus_anova() 164 | DA.omnibus_rm_anova() 165 | DA.omnibus_kruskal() 166 | DA.omnibus_friedman() 167 | 168 | ### Posthoc 169 | DA.test_pairwise() 170 | 171 | return DA 172 | 173 | 174 | def make_DA_plot(dataset: str = "qpcr") -> plst.DataAnalysis: 175 | """A DA that has a plot""" 176 | with warnings.catch_warnings(): 177 | warnings.simplefilter("ignore") 178 | ### Load example data 179 | DF, dims = plst.load_dataset(dataset, verbose=False) 180 | 181 | ### Init DA 182 | DA = plst.DataAnalysis(DF, dims, subject="subject", verbose=False) 183 | 184 | DA.plot_box_strip() 185 | plt.close() 186 | return DA 187 | 188 | 189 | def make_DA_all(dataset: str) -> plst.DataAnalysis: 190 | """A DA with all possible statistics and a plot""" 191 | with warnings.catch_warnings(): 192 | warnings.simplefilter("ignore") 193 | 194 | DA = make_DA_statistics(dataset) 195 | DA.plot_box_swarm() 196 | plt.close() 197 | return DA 198 | 199 | 200 | # %% 201 | ### Test make functions 202 | if __name__ == "__main__": 203 | pass 204 | # %% 205 | #!%%timeit 206 | # get_DA_statistics() 207 | 208 | # %% 209 | #!%%timeit 210 | # get_DA_plot() 211 | 212 | # %% 213 | #!%%timeit 214 | # get_DA_all(dataset="qpcr") 215 | 216 | # %% 217 | 218 | ### Cache results of these functions to speed up testing 219 | # make_DA_statistics = MEMORY_DA_CONFIGS.subcache(make_DA_statistics) 220 | # make_DA_plot = MEMORY_DA_CONFIGS.subcache(make_DA_plot) 221 | # make_DA_all = MEMORY_DA_CONFIGS.subcache(make_DA_all) 222 | 223 | ### Make DataAnalysis objects for testing 224 | DA_STATISTICS: plst.DataAnalysis = make_DA_statistics("qpcr") 225 | DA_PLOT: plst.DataAnalysis = make_DA_plot("qpcr") 226 | DA_ALL: plst.DataAnalysis = make_DA_all("qpcr") 227 | 228 | 229 | # %% 230 | # == Utils ============================================================= 231 | 232 | 233 | ### (DF, dims) -> (DF, dims, kwargs) 234 | def add_zip_column(zipped: list[tuple], column: list) -> list[tuple]: 235 | """Adds a column to a list of tuples. Useful for adding a list of arguments to a 236 | list of dataframes and dimensions. E.g.: (DF, dims) -> (DF, dims, kwargs) 237 | 238 | :param zipped: A set of dataframes and dimensions in this shape [(df, dim), (df, 239 | dim), ...] ready to be used in pytest.parametrize 240 | :type zipped: list[tuple] 241 | :param column: A list of ne arguments to be added to each tuple in zipped. Must be same length as zipped 242 | :type column: list 243 | :return: (DF, dims) -> (DF, dims, kwargs) 244 | :rtype: list[tuple] 245 | """ 246 | 247 | assert len(zipped) == len(column), "zipped and column must be same length" 248 | 249 | zipped_with_column = [] 250 | for tup, e in zip(zipped, column): 251 | zipped_with_column.append(tup + (e,)) 252 | return zipped_with_column 253 | 254 | 255 | def cleanfiles(fname: str): 256 | """deletes all files that start with fname""" 257 | testfiles = glob(fname + "*") 258 | for file in testfiles: 259 | os.remove(file) 260 | -------------------------------------------------------------------------------- /tests/DA_configs2.py: -------------------------------------------------------------------------------- 1 | 2 | ### They are all non-empty 3 | StatTestCases = [ 4 | "all", 5 | "paired", 6 | "unpaired", 7 | "unpaired", 8 | "parametric_paired", 9 | ] -------------------------------------------------------------------------------- /tests/DA_utils.py: -------------------------------------------------------------------------------- 1 | """A utility class that creates DataAnalysis objects for testing""" 2 | # %% 3 | 4 | 5 | import pandas as pd 6 | 7 | import plotastic as plst 8 | from plotastic.dataanalysis.dataanalysis import DataAnalysis 9 | from plotastic.utils.subcache import SubCache 10 | 11 | import DA_configs as dac 12 | 13 | # %% 14 | # == Class CreateDA ==================================================== 15 | 16 | 17 | class TestDA(DataAnalysis): 18 | def __init__( 19 | self, 20 | data: pd.DataFrame, 21 | dims: dict, 22 | subject: str = None, 23 | levels: list[tuple[str]] = None, 24 | title: str = "untitled", 25 | verbose=False, 26 | ) -> DataAnalysis: 27 | kws = dict( 28 | data=data, 29 | dims=dims, 30 | subject=subject, 31 | levels=levels, #' Introduced by DataFrameTool 32 | title=title, #' Introduced by DataAnalysis 33 | verbose=verbose, #' Introduced by DataAnalysis 34 | ) 35 | 36 | super().__init__(**kws) 37 | 38 | def perform_statistics_unpaired(self, parametric=True) -> "TestDA": 39 | """Perform unpaired statistics""" 40 | ### Assumptions 41 | self.check_normality() 42 | self.check_homoscedasticity() 43 | 44 | ### Omnibus 45 | if parametric: 46 | self.omnibus_anova() 47 | else: 48 | self.omnibus_kruskal() 49 | 50 | ### PostHoc 51 | self.test_pairwise(parametric=parametric) 52 | 53 | return self 54 | 55 | def perform_statistics_paired(self, parametric=True) -> "TestDA": 56 | """Perform unpaired statistics""" 57 | ### Assumptions 58 | self.check_normality() 59 | self.check_homoscedasticity() 60 | self.check_sphericity() 61 | 62 | ### Omnibus 63 | if parametric: 64 | self.omnibus_anova() 65 | else: 66 | self.omnibus_kruskal() 67 | 68 | ### PostHoc 69 | self.test_pairwise(parametric=parametric) 70 | 71 | return self 72 | 73 | 74 | 75 | if __name__ == "__main__": 76 | pass 77 | # %% 78 | dims = dac.dims_withempty_tips[0] 79 | data = dac.DF_tips 80 | DA = TestDA(data=data, dims=dims) 81 | -------------------------------------------------------------------------------- /tests/_annotator_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% Imports 3 | 4 | import matplotlib.pyplot as plt 5 | import pytest 6 | 7 | import plotastic as plst 8 | # from plotastic.dataanalysis.annotator import Annotator 9 | 10 | 11 | import DA_configs as dac 12 | 13 | 14 | # %% testing for dataset TIPS 15 | 16 | # !! Don't use with empty groups 17 | # !! We chose exclusions that won't show in the plot, but we need those arguments to test it 18 | TIPS_annot_pairwise_kwargs = [ 19 | dict( 20 | include=["Yes", {"1-2": ("Yes", "No")}], 21 | exclude=["No", {"Yes": ("1-2", ">=3")}], 22 | include_in_facet={ 23 | ("Lunch", "Male"): ["Yes", {">=3": ("Yes", "No")}], 24 | ("Lunch", "Female"): ["No", {"No": ("1-2", ">=3")}], 25 | }, 26 | exclude_in_facet={ 27 | ("Lunch", "Male"): ["Yes", {">=3": ("No", "Yes")}], 28 | ("Lunch", "Female"): ["No", {"Yes": ("1-2", ">=3")}], 29 | }, 30 | ), 31 | dict( 32 | include=["Yes", {"1-2": ("Yes", "No")}], 33 | exclude=["No", {"Yes": ("1-2", ">=3")}], 34 | include_in_facet={ 35 | "Male": ["Yes", {">=3": ("Yes", "No")}], 36 | "Female": ["No", {"No": ("1-2", ">=3")}], 37 | }, 38 | exclude_in_facet={ 39 | "Male": ["Yes", {">=3": ("No", "Yes")}], 40 | "Female": ["No", {"Yes": ("1-2", ">=3")}], 41 | }, 42 | ), 43 | dict( 44 | include=["Yes", {"1-2": ("Yes", "No")}], 45 | exclude=["No", {"Yes": ("1-2", ">=3")}], 46 | ), 47 | dict( 48 | include=["1-2"], 49 | exclude=[">=3"], 50 | ), 51 | ] 52 | 53 | ### Add a column of args: (DF, dims) -> (DF, dims, kwargs) 54 | zipped_tips: list[tuple] = dac.add_zip_column( 55 | dac.zipped_noempty_tips, TIPS_annot_pairwise_kwargs 56 | ) 57 | 58 | 59 | @pytest.mark.parametrize("DF, dims, annot_kwargs", zipped_tips) 60 | def test_pairwiseannotations_tips(DF, dims, annot_kwargs): 61 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=True) 62 | _ph = DA.test_pairwise(paired=False, padjust="none") 63 | DA = ( 64 | DA.subplots() 65 | .fillaxes(kind="box") 66 | .annotate_pairwise( 67 | **annot_kwargs, 68 | show_ph=False, 69 | only_sig="all", 70 | ) 71 | ) 72 | ### Don't plot while executing pytest in terminal 73 | if __name__ != "__main__": 74 | plt.close() 75 | 76 | 77 | # %% Testing for dataset FMRI 78 | 79 | FMRI_annot_pairwise_kwargs = [ 80 | dict( 81 | include=[0, "stim"], 82 | exclude=[1, {"stim": (0, 2)}], 83 | include_in_facet={ 84 | "frontal": [0, "cue", {"stim": (3, 4)}], 85 | "parietal": [0, "cue", {"stim": (4, 6)}], 86 | }, 87 | exclude_in_facet={ 88 | "frontal": [2, "cue", {"stim": (3, 7)}], 89 | "parietal": [4, "stim", {"stim": (2, 9)}], 90 | }, 91 | ), 92 | dict( 93 | include=[0, "frontal"], 94 | exclude=[1, {"frontal": (0, 2)}], 95 | include_in_facet={ 96 | "stim": [0, "frontal", {"parietal": (3, 4)}], 97 | "cue": [0, "parietal", {"frontal": (4, 6)}], 98 | }, 99 | exclude_in_facet={ 100 | "stim": [2, "parietal", {"frontal": (3, 7)}], 101 | "cue": [4, "frontal", {"parietal": (2, 9)}], 102 | }, 103 | ), 104 | dict( 105 | include=[0, "frontal"], 106 | exclude=[1, {"frontal": (0, 2)}], 107 | ), 108 | dict( 109 | include=[0, "cue"], 110 | exclude=[1, {"stim": (0, 2)}], 111 | ), 112 | dict( 113 | include=[0, 2], 114 | exclude=[1], 115 | ), 116 | ] 117 | 118 | ### Add a column of args: (DF, dims) -> (DF, dims, kwargs) 119 | zipped_fmri: list[tuple] = dac.add_zip_column( 120 | dac.zipped_noempty_fmri, FMRI_annot_pairwise_kwargs 121 | ) 122 | 123 | 124 | @pytest.mark.parametrize("DF, dims, annot_kwargs", zipped_fmri) 125 | def test_pairwiseannotations_fmri(DF, dims, annot_kwargs): 126 | DA = plst.DataAnalysis( 127 | data=DF, dims=dims, verbose=True, subject="subject" 128 | ) # !! subject 129 | _ph = DA.test_pairwise(paired=True, padjust="bonf") 130 | DA = ( 131 | DA.subplots() 132 | .fillaxes(kind="box") 133 | .annotate_pairwise( 134 | **annot_kwargs, 135 | show_ph=False, 136 | only_sig="strict", 137 | ) 138 | ) 139 | ### Don't plot while executing pytest in terminal 140 | if __name__ != "__main__": 141 | plt.close() 142 | 143 | 144 | # %% For dataset qPCR 145 | 146 | 147 | QPCR_annot_pairwise_kwargs = [ 148 | dict( 149 | include=["F1", "LOXL2", "SOST"], 150 | exclude=["F2", {"MMP7": ("F1", "F3")}], 151 | include_in_facet={ 152 | "MMPs": ["MMP7", {"MMP9": ("F1", "F2")}], 153 | "Bone Metabolism": ["SOST", "F2", {"TIMP1": ("F3", "F1")}], 154 | }, 155 | exclude_in_facet={ 156 | "Wash": ["MMP7", {"MMP9": ("F1", "F2")}], 157 | "MACS": ["SOST", {"JAK2": ("F1", "F2")}], 158 | }, 159 | ), 160 | dict( 161 | include=["F1", "LOXL2", "SOST"], 162 | exclude=["F2", {"MMP7": ("F1", "F3")}], 163 | include_in_facet={ 164 | "MMPs": ["MMP7", {"MMP9": ("F1", "F2")}], 165 | "Bone Metabolism": ["SOST", "F2", {"TIMP1": ("F3", "F1")}], 166 | }, 167 | exclude_in_facet={ 168 | "Wash": ["MMP7", {"MMP9": ("F1", "F2")}], 169 | "MACS": ["SOST", {"JAK2": ("F1", "F2")}], 170 | }, 171 | ), 172 | dict( 173 | include="__HUE", 174 | exclude=["F2", {"MMP7": ("F1", "F3")}], 175 | ), 176 | dict( 177 | include="__X", 178 | exclude=["F2", {"MMP7": ("F1", "F3")}], 179 | ), 180 | dict( 181 | include=["Vimentin", "MMP7"], 182 | exclude=["FZD4"], 183 | ), 184 | ] 185 | 186 | zipped_qpcr: list[tuple] = dac.add_zip_column( 187 | dac.zipped_noempty_qpcr, QPCR_annot_pairwise_kwargs 188 | ) 189 | 190 | 191 | @pytest.mark.parametrize("DF, dims, annot_kwargs", zipped_qpcr) 192 | def test_pairwiseannotation_qpcr(DF, dims, annot_kwargs): 193 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=True) 194 | _ph = DA.test_pairwise(paired=False, padjust="none", subject="subject") 195 | DA = ( 196 | DA.subplots(sharey=False, figsize=(10, 10)) 197 | .fillaxes(kind="box") 198 | .transform_y("log10") # !! log transform 199 | .edit_y_scale_log(10) # !! MUST be called before annotation! 200 | .annotate_pairwise( 201 | # include="__HUE", 202 | show_ph=False, 203 | only_sig="tolerant", 204 | **annot_kwargs, 205 | ) 206 | # .edit_tight_layout() # !! just uglier 207 | ) 208 | ### Don't plot while executing pytest in terminal 209 | if __name__ != "__main__": 210 | plt.close() 211 | 212 | 213 | ### Run without pytest 214 | if __name__ == "__main__": 215 | DF, dims = plst.load_dataset("qpcr") 216 | AN = Annotator(data=DF, dims=dims, verbose=True) 217 | AN.levels_dendrogram() 218 | test_pairwiseannotation_qpcr( 219 | DF, dims, annot_kwargs=QPCR_annot_pairwise_kwargs[0] 220 | ) 221 | 222 | # %% Interactive testing to display Plots 223 | 224 | if __name__ == "__main__": 225 | import ipytest 226 | 227 | ipytest.run() 228 | -------------------------------------------------------------------------------- /tests/_assumptions_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% imports 3 | 4 | import seaborn as sns 5 | import pandas as pd 6 | 7 | import pytest 8 | 9 | 10 | # import markurutils as ut 11 | # import plotastic as plst 12 | from plotastic import DataAnalysis 13 | 14 | import DA_configs as dac 15 | 16 | 17 | # %% Test per config 18 | 19 | 20 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL) 21 | def test_normality(DF: pd.DataFrame, dims): 22 | DA = DataAnalysis(data=DF, dims=dims, verbose=True) 23 | DA.check_normality() 24 | 25 | 26 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL) 27 | def test_homoscedasticity(DF, dims): 28 | DA = DataAnalysis(data=DF, dims=dims, verbose=True) 29 | DA.check_homoscedasticity() 30 | 31 | 32 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_PAIRED) 33 | def test_sphericity(DF, dims): 34 | DA = DataAnalysis(data=DF, dims=dims, verbose=True, subject="subject") 35 | DA.check_sphericity() 36 | 37 | 38 | # %% interactive testing to display Plots 39 | 40 | if __name__ == "__main__": 41 | import ipytest 42 | 43 | ipytest.run() 44 | -------------------------------------------------------------------------------- /tests/_dims_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% imports 3 | import plotastic as plst 4 | import unittest 5 | 6 | 7 | # %% Import Test Data 8 | DF, dims = plst.load_dataset("tips") #' Import Data 9 | DA = plst.DataAnalysis( 10 | data=DF, dims=dims, title="tips" 11 | ) #' Make DataAnalysis Object 12 | 13 | 14 | # %% Unit Tests 15 | 16 | 17 | class TestDataAnalysis(unittest.TestCase): 18 | def test_switching(self): 19 | v = False 20 | data, dims = plst.load_dataset("tips", verbose=v) 21 | DA = plst.DataAnalysis(data, dims, verbose=v) 22 | 23 | ### Chaining work? 24 | x, E1 = DA.dims.x, "size-cut" 25 | x_inchain, E2 = DA.switch("x", "hue", verbose=v).dims.x, "smoker" 26 | x_after_chaining, E3 = DA.dims.x, "size-cut" 27 | print(x, x_inchain, x_after_chaining) 28 | print(x != x_inchain) 29 | print(x == x_after_chaining) 30 | 31 | self.assertEqual(x, E1) 32 | self.assertEqual(x_inchain, E2) 33 | self.assertEqual(x_after_chaining, E3) 34 | 35 | 36 | # %% __name__ == "__main__" 37 | 38 | if __name__ == "__main__": 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /tests/_dimsandlevels_test.py: -------------------------------------------------------------------------------- 1 | # %% Imports 2 | import pytest 3 | import matplotlib.pyplot as plt 4 | 5 | import plotastic as plst 6 | 7 | # from . import configs as ct 8 | import DA_configs as dac 9 | 10 | 11 | # %% 12 | # == Dendrogram ======================================================== 13 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL) 14 | def test_levels_dendrogram(DF, dims): 15 | ### No need to evaluate level combos if there's just an X and no facet (hue, col, row) 16 | if not len(dims.keys()) == 2: 17 | DA = plst.DataAnalysis(data=DF, dims=dims) 18 | DA.levels_dendrogram() 19 | 20 | ### Plot if interactive 21 | if __name__ != "__main__": 22 | plt.close() 23 | else: 24 | plt.show() #' show plot, otherwise too many figures 25 | 26 | 27 | # %% 28 | # == Test combocounts ================================================== 29 | 30 | 31 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL) 32 | def test_levels_combocounts(DF, dims): 33 | ### No need to evaluate level combos if there's just an X and no facet (hue, col, row) 34 | if not len(dims.keys()) == 2: 35 | DA = plst.DataAnalysis(data=DF, dims=dims) 36 | DA.levels_combocounts() 37 | 38 | ### Plot if interactive 39 | if __name__ != "__main__": 40 | plt.close() 41 | else: 42 | plt.show() #' show plot, otherwise too many figures 43 | 44 | 45 | if __name__ == "__main__": 46 | import pandas as pd 47 | 48 | DF, dims = plst.load_dataset("qpcr", verbose=False) 49 | DF, dims = plst.load_dataset("tips", verbose=False) 50 | DF, dims = plst.load_dataset("fmri", verbose=False) 51 | 52 | ### Init DataAnalysis object 53 | DA = plst.DataAnalysis(data=DF, dims=dims) 54 | 55 | DA._count_levelcombos() 56 | 57 | DA.levelkeys 58 | len(DA.levelkeys) 59 | DA.levels_combocounts() 60 | DA.levels_dendrogram() 61 | 62 | # %% run interactively 63 | 64 | if __name__ == "__main__": 65 | import ipytest 66 | 67 | ipytest.run() 68 | 69 | 70 | # %% 71 | -------------------------------------------------------------------------------- /tests/_filer_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% imports 3 | 4 | import pytest 5 | 6 | import pandas as pd 7 | 8 | import os 9 | from glob import glob 10 | from pathlib import Path 11 | 12 | import plotastic as plst 13 | 14 | import DA_configs as dac 15 | 16 | 17 | # %% 18 | import IPython 19 | 20 | IPython.extract_module_locals()[1].get("__vsc_ipynb_file__") 21 | 22 | 23 | # %% Test 24 | 25 | DF, dims = plst.load_dataset("tips", verbose=False) 26 | DA = plst.DataAnalysis(DF, dims, verbose=False) 27 | DA_COMPLETE = dac.DA_STATISTICS 28 | 29 | 30 | # %% Test prevent_overwrite 31 | 32 | 33 | def test_prevent_overwrite(): 34 | ### Define a name 35 | testfile_name = "_FILE_123" 36 | distraction_names = [ 37 | "_FILE_", 38 | "__FILE_", 39 | "_FILE_12", 40 | "_FIL_12", 41 | ] 42 | 43 | def mk_testfiles(testfile_name) -> str: 44 | ### Make a testfile excel 45 | df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) 46 | df.to_excel(testfile_name + ".xlsx") 47 | 48 | ### Make a testfile text as a distraction 49 | with open(testfile_name + ".txt", "w") as f: 50 | f.write("I am an text file") 51 | 52 | return testfile_name 53 | 54 | ### Cleanup before testing 55 | dac.cleanfiles(testfile_name) 56 | for name in distraction_names: 57 | dac.cleanfiles(name) 58 | 59 | ### Make Distraction Files 60 | for name in distraction_names: 61 | mk_testfiles(name) 62 | 63 | # == TEST 0: mode="day" 64 | kws = dict(overwrite="day") 65 | new = DA.filer.prevent_overwrite(testfile_name, **kws) 66 | assert ( 67 | new == testfile_name + f"_{DA.filer.current_day}" 68 | ), f"new_name = {new}, testfile_name = {testfile_name}" 69 | 70 | # == TEST 1: mode="nothing" 71 | kws = dict(overwrite="nothing") 72 | 73 | ### If NO file exists, it should return the same name with _0 74 | new = DA.filer.prevent_overwrite(testfile_name, **kws) 75 | assert ( 76 | new == testfile_name + "_0" 77 | ), f"new_name = {new}, testfile_name = {testfile_name}" 78 | 79 | ### If a file EXISTS, it should return the same name with _0 80 | tested = mk_testfiles(testfile_name) 81 | new = DA.filer.prevent_overwrite(testfile_name, **kws) 82 | assert ( 83 | new == testfile_name + "_0" 84 | ), f"new_name = {new}, testfile_name = {tested}" 85 | 86 | ### If a file with _0 exists, it should return a new name with _1 87 | tested = mk_testfiles(new) #' "testfile_name_0" 88 | new = DA.filer.prevent_overwrite(testfile_name, **kws) 89 | assert ( 90 | new == testfile_name + "_1" 91 | ), f"new_name = {new}, testfile_name = {tested}" 92 | 93 | ### If a file with _1 exists, it should return a new name with _2 94 | tested = mk_testfiles(new) #' "testfile_name_1" 95 | new = DA.filer.prevent_overwrite(testfile_name, **kws) 96 | assert ( 97 | new == testfile_name + "_2" 98 | ), f"new_name = {new}, testfile_name = {tested}" 99 | 100 | # == Cleanup 101 | dac.cleanfiles(testfile_name) 102 | for name in distraction_names: 103 | dac.cleanfiles(name) 104 | 105 | 106 | if __name__ == "__main__": 107 | test_prevent_overwrite() 108 | os.getcwd() 109 | Path.cwd() 110 | 111 | # %% 112 | if __name__ == "__main__": 113 | import ipytest 114 | 115 | ipytest.run() 116 | -------------------------------------------------------------------------------- /tests/_hierarchical_dims_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markur4/plotastic/ae9107afce70482f47c17f3ade3e5f349b2f30de/tests/_hierarchical_dims_test.py -------------------------------------------------------------------------------- /tests/_load_dataset_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% Imports 3 | 4 | import pytest 5 | 6 | import plotastic as plst 7 | from plotastic.example_data.load_dataset import FILES 8 | 9 | # %% 10 | 11 | parameters = [name for name in FILES.keys()] 12 | 13 | 14 | @pytest.mark.parametrize("name", parameters) 15 | def test_load_dataset(name: str): 16 | """simply checks, if it's executable, after correct packaging in setup.py and all.""" 17 | df, dims = plst.load_dataset(name, verbose=True) 18 | 19 | 20 | # %% 21 | 22 | if __name__ == "__main__": 23 | import ipytest 24 | ipytest.run() 25 | -------------------------------------------------------------------------------- /tests/_multiplot_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% 3 | 4 | import pytest 5 | import ipytest 6 | import matplotlib.pyplot as plt 7 | 8 | import plotastic as plst 9 | 10 | import DA_configs as dac 11 | 12 | 13 | # %% 14 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 15 | def test_plot(DF, dims): 16 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 17 | DA.plot() 18 | if __name__ == "__main__": 19 | plt.show() 20 | else: 21 | plt.close("all") 22 | 23 | 24 | if __name__ == "__main__": 25 | ipytest.run() 26 | 27 | 28 | # %% 29 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 30 | def test_box_strip(DF, dims): 31 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 32 | DA.plot_box_strip() 33 | if __name__ == "__main__": 34 | plt.show() 35 | else: 36 | plt.close("all") 37 | 38 | 39 | if __name__ == "__main__": 40 | ipytest.run() 41 | 42 | 43 | # %% 44 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 45 | def plot_box_swarm(DF, dims): 46 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 47 | DA.plot_box_strip() 48 | if __name__ == "__main__": 49 | plt.show() 50 | else: 51 | plt.close("all") 52 | 53 | 54 | if __name__ == "__main__": 55 | ipytest.run() 56 | -------------------------------------------------------------------------------- /tests/_omnibus_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% imports 3 | 4 | import warnings 5 | 6 | import seaborn as sns 7 | import pandas as pd 8 | 9 | import pytest 10 | 11 | 12 | # import markurutils as ut 13 | # import plotastic as plst 14 | from plotastic import DataAnalysis 15 | 16 | import DA_configs as dac 17 | 18 | 19 | # %% Test per config 20 | 21 | 22 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL) 23 | def test_omnibus_anova(DF: pd.DataFrame, dims): 24 | DA = DataAnalysis(data=DF, dims=dims, verbose=True) 25 | DA.omnibus_anova() 26 | 27 | 28 | # !! Warnings happen when groups have only one sample 29 | @pytest.mark.filterwarnings("ignore::RuntimeWarning") 30 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_PAIRED) 31 | def test_omnibus_rm_amova(DF, dims): 32 | DA = DataAnalysis(data=DF, dims=dims, subject="subject", verbose=True) 33 | DA.omnibus_rm_anova() 34 | 35 | 36 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_ALL) 37 | def test_omnibus_kruskal(DF, dims): 38 | DA = DataAnalysis(data=DF, dims=dims, verbose=True) 39 | DA.omnibus_kruskal() 40 | 41 | 42 | # @pytest.mark.filterwarnings("ignore::RuntimeWarning") 43 | @pytest.mark.parametrize("DF, dims", dac.zipped_noempty_PAIRED) 44 | def test_omnibus_friedman(DF, dims): 45 | DA = DataAnalysis(data=DF, dims=dims, subject="subject", verbose=True) 46 | DA.omnibus_friedman() 47 | 48 | 49 | # %% interactive testing to display Plots 50 | 51 | if __name__ == "__main__": 52 | import ipytest 53 | 54 | ipytest.run() 55 | -------------------------------------------------------------------------------- /tests/_paper_test.py: -------------------------------------------------------------------------------- 1 | ### IMPORT PLOTASTIC 2 | import plotastic as plst 3 | 4 | # IMPORT EXAMPLE DATA 5 | DF, _dims = plst.load_dataset("fmri", verbose=False) 6 | # EXPLICITLY DEFINE DIMENSIONS TO FACET BY 7 | dims = dict( 8 | y="signal", # y-axis, dependent variable 9 | x="timepoint", # x-axis, independent variable (within-subject factor) 10 | hue="event", # color, independent variable (within-subject factor) 11 | col="region", # axes, grouping variable 12 | ) 13 | # INITIALIZE DATAANALYSIS OBJECT 14 | DA = plst.DataAnalysis( 15 | data=DF, # Dataframe, long format 16 | dims=dims, # Dictionary with y, x, hue, col, row 17 | subject="subject", # Datapoints are paired by subject (optional) 18 | verbose=False, # Print out info about the Data (optional) 19 | ) 20 | # STATISTICAL TESTS 21 | DA.check_normality() # Check Normality 22 | DA.check_sphericity() # Check Sphericity 23 | DA.omnibus_rm_anova() # Perform RM-ANOVA 24 | DA.test_pairwise() # Perform Posthoc Analysis 25 | # PLOTTING 26 | ( 27 | DA.plot_box_strip().annotate_pairwise( # Pre-built plotting function initializes plot # Annotate results from DA.test_pairwise() 28 | include="__HUE" # Use only significant pairs across each hue 29 | ) 30 | ) 31 | 32 | 33 | ### BACK-CHECK 34 | import seaborn as sns 35 | sns.catplot(data=DF, **_dims) -------------------------------------------------------------------------------- /tests/_plotedits_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% 3 | 4 | import pytest 5 | import ipytest 6 | import matplotlib.pyplot as plt 7 | 8 | import plotastic as plst 9 | 10 | import DA_configs as dac 11 | 12 | 13 | # %% 14 | titles_tips = [ 15 | {("Lunch", "Male"): "blaa"}, 16 | {("Male"): "blAA"}, 17 | None, 18 | None, 19 | ] 20 | zipped_tips = dac.add_zip_column(dac.zipped_noempty_tips, titles_tips) 21 | 22 | 23 | @pytest.mark.parametrize("DF, dims, axtitles", zipped_tips) 24 | def test_edit_titles(DF, dims, axtitles: dict): 25 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 26 | if DA.factors_rowcol: #' Need facetting, otherwise no axes 27 | DA.plot() 28 | DA.edit_titles(axtitles=axtitles) 29 | if __name__ == "__main__": 30 | plt.show() 31 | else: 32 | plt.close("all") 33 | 34 | 35 | if __name__ == "__main__": 36 | ipytest.run() 37 | 38 | 39 | # %% 40 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 41 | def test_edit_titles_with_func(DF, dims): 42 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 43 | if DA.factors_rowcol: #' Need facetting, otherwise no axes 44 | DA.plot().edit_titles_with_func( 45 | row_func=lambda x: x.upper(), 46 | col_func=lambda x: "hä", 47 | connect=" || ", 48 | ) 49 | 50 | if __name__ == "__main__": 51 | plt.show() 52 | else: 53 | plt.close("all") 54 | 55 | 56 | if __name__ == "__main__": 57 | ipytest.run() 58 | 59 | # %% 60 | titles_tips = [ 61 | ["sdfsfd", None, "dd", None], 62 | [None, "aa"], 63 | None, 64 | None, 65 | ] 66 | zipped_tips = dac.add_zip_column(dac.zipped_noempty_tips, titles_tips) 67 | 68 | 69 | @pytest.mark.parametrize("DF, dims, titles", zipped_tips) 70 | def test_edit_titles_replace(DF, dims, titles: dict): 71 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 72 | if DA.factors_rowcol: #' Need facetting, otherwise no axes 73 | (DA.plot().edit_titles_replace(titles=titles)) 74 | if __name__ == "__main__": 75 | plt.show() 76 | else: 77 | plt.close("all") 78 | 79 | 80 | if __name__ == "__main__": 81 | ipytest.run() 82 | 83 | 84 | # %% 85 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 86 | def test_edit_xy_axis_labels(DF, dims): 87 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 88 | DA.plot().edit_xy_axis_labels( 89 | x="ui!", 90 | x_lowest_row="rambazamba", 91 | x_notlowest_row="FLOH", 92 | y="Johannes", 93 | y_leftmost_col="Gertrude", 94 | y_notleftmost_col="Hä?", 95 | ) 96 | if __name__ == "__main__": 97 | plt.show() 98 | else: 99 | plt.close("all") 100 | 101 | 102 | if __name__ == "__main__": 103 | ipytest.run() 104 | 105 | 106 | # %% 107 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 108 | def test_edit_y_scale_log(DF, dims): 109 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 110 | DA.plot().edit_y_scale_log(base=2) 111 | if __name__ == "__main__": 112 | plt.show() 113 | else: 114 | plt.close("all") 115 | 116 | 117 | if __name__ == "__main__": 118 | ipytest.run() 119 | 120 | 121 | # %% 122 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 123 | def test_edit_y_ticklabel_percentage(DF, dims): 124 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 125 | DA.plot().edit_y_ticklabel_percentage( 126 | decimals_major=1, 127 | decimals_minor=1, # !! Not working 128 | ) 129 | if __name__ == "__main__": 130 | plt.show() 131 | else: 132 | plt.close("all") 133 | 134 | 135 | if __name__ == "__main__": 136 | ipytest.run() 137 | 138 | 139 | # %% 140 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 141 | def test_edit_y_ticklabels_log_minor(DF, dims): 142 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 143 | DA.plot().edit_y_scale_log(base=2).edit_y_ticklabels_log_minor( 144 | subs=[2, 3, 5, 7], 145 | ) 146 | if __name__ == "__main__": 147 | plt.show() 148 | else: 149 | plt.close("all") 150 | 151 | 152 | if __name__ == "__main__": 153 | ipytest.run() 154 | 155 | 156 | # %% 157 | labels_zip = [ 158 | ["sdfsfd", "dddd"], 159 | ["sdfsfd", "dddd"], 160 | ["sdfsfd", "dddd"], 161 | ["sdfsfd", "dddd"], 162 | ] 163 | zipped_tips = dac.add_zip_column(dac.zipped_noempty_tips, labels_zip) 164 | 165 | 166 | @pytest.mark.parametrize("DF, dims, labels", zipped_tips) 167 | def test_edit_x_ticklabels_exchange(DF, dims, labels): 168 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 169 | DA.plot().edit_x_ticklabels_exchange( 170 | labels=labels, 171 | labels_lowest_row=[l.upper() for l in labels], 172 | ) 173 | if __name__ == "__main__": 174 | plt.show() 175 | else: 176 | plt.close("all") 177 | 178 | 179 | if __name__ == "__main__": 180 | ipytest.run() 181 | 182 | 183 | # %% 184 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 185 | def test_edit_x_ticklabels_exchange(DF, dims): 186 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 187 | DA.plot().edit_x_ticklabels_rotate( 188 | rotation=75, 189 | ha="center", 190 | # va="top", 191 | pad=0.1, 192 | ) 193 | if __name__ == "__main__": 194 | plt.show() 195 | else: 196 | plt.close("all") 197 | 198 | 199 | if __name__ == "__main__": 200 | ipytest.run() 201 | 202 | # %% 203 | plt.close("all") 204 | 205 | 206 | # %% 207 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 208 | def test_edit_grid(DF, dims): 209 | plt.close() 210 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 211 | ( 212 | DA.plot() 213 | .edit_y_scale_log(base=2) #' To see minor ticks 214 | .edit_grid( 215 | y_major_kws=dict(ls="--", linewidth=0.5, c="grey"), 216 | y_minor_kws=dict(ls=":", linewidth=0.2, c="grey"), 217 | x_major_kws=dict(ls="--", linewidth=0.6, c="grey"), 218 | ) 219 | ) 220 | if __name__ == "__main__": 221 | plt.show() 222 | else: 223 | plt.close("all") 224 | 225 | 226 | if __name__ == "__main__": 227 | ipytest.run() 228 | 229 | 230 | # %% 231 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 232 | def test_edit_legend(DF, dims): 233 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 234 | if DA.dims.hue: 235 | DA.plot().edit_legend( 236 | reset_legend=True, 237 | title="HUI", 238 | loc="upper right", 239 | bbox_to_anchor=(1.3, 1), 240 | borderaxespad=1, 241 | # pad=0.5, 242 | frameon=True, 243 | ) #' To see minor ticks 244 | 245 | if __name__ == "__main__": 246 | plt.show() 247 | else: 248 | plt.close("all") 249 | 250 | 251 | if __name__ == "__main__": 252 | ipytest.run() 253 | 254 | 255 | @pytest.mark.parametrize("DF, dims", dac.zipped_ALL) 256 | def test_edit_fontsizes(DF, dims): 257 | plt.close() 258 | DA = plst.DataAnalysis(data=DF, dims=dims, verbose=False) 259 | 260 | DA.plot().edit_fontsizes( 261 | ticklabels=14, 262 | xylabels=16, 263 | axis_titles=18, 264 | ) #' To see minor ticks 265 | 266 | if __name__ == "__main__": 267 | plt.show() 268 | else: 269 | plt.close("all") 270 | 271 | 272 | if __name__ == "__main__": 273 | ipytest.run() 274 | -------------------------------------------------------------------------------- /tests/_rc_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% 3 | # import pytest 4 | import ipytest 5 | import matplotlib.pyplot as plt 6 | 7 | import plotastic as plst 8 | 9 | import DA_configs as dac 10 | 11 | # %% 12 | 13 | 14 | def test_rc(): 15 | """Test rc()""" 16 | plst.set_palette("Set2") 17 | plst.set_style("paper") 18 | 19 | DA = dac.DA_ALL 20 | DA.plot_box_strip() 21 | 22 | if __name__ == "__main__": 23 | plt.show() 24 | else: 25 | plt.close("all") 26 | 27 | 28 | if __name__ == "__main__": 29 | ipytest.run() 30 | -------------------------------------------------------------------------------- /tests/_save_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% imports 3 | from typing import Callable 4 | 5 | import os 6 | from glob import glob 7 | 8 | import pytest 9 | 10 | # import seaborn as sns 11 | # import pandas as pd 12 | 13 | # import markurutils as ut 14 | import plotastic as plst 15 | 16 | 17 | import DA_configs as dac 18 | 19 | 20 | # %% testfigure 21 | # import matplotlib.pyplot as plt 22 | # import numpy as np 23 | # fig, ax = plt.subplots(2,2) 24 | 25 | # fig.get_axes() 26 | # fig.axes 27 | 28 | # %% Test 29 | 30 | 31 | DA = dac.DA_STATISTICS 32 | 33 | funcs = [ 34 | DA.save_statistics, 35 | # DA.save_fig, # !! Not working, but let's keep it for now 36 | # DA.save_all, # !! Not working 37 | ] 38 | 39 | 40 | @pytest.mark.parametrize("func", funcs) 41 | def test_save(func: Callable, lastcleanup=True): 42 | """Test export_statistics()""" 43 | 44 | ### Define a name 45 | fname = "plotastic_results" 46 | 47 | ### Cleanup before testing 48 | dac.cleanfiles(fname) 49 | 50 | # == Test overwrite=True =============================== 51 | kwargs = dict(fname=fname, overwrite=True) 52 | func(**kwargs) 53 | func(**kwargs) #' Should overwrite 54 | func(**kwargs) #' Should overwrite 55 | 56 | ### Make sure files overwrote each other 57 | saved = glob(fname + "*") 58 | assert len(saved) in [ 59 | 1, 60 | 2, 61 | ], "Should have saved one/two files, insted got: " + str(saved) 62 | 63 | dac.cleanfiles(fname) 64 | 65 | # == Test overwrite="day" =============================== 66 | kwargs = dict(fname=fname, overwrite="day") 67 | func(**kwargs) 68 | func(**kwargs) #' Should overwrite 69 | func(**kwargs) #' Should overwrite 70 | 71 | ### Make sure files didn't delet each other 72 | saved = glob(fname + "*") 73 | assert len(saved) in [ 74 | 1, 75 | 2, 76 | ], "Should have saved one or two files, insted got: " + str(saved) 77 | 78 | dac.cleanfiles(fname) 79 | 80 | # == Test overwrite="nothing" =============================== 81 | kwargs = dict(fname=fname, overwrite="nothing") 82 | func(**kwargs) 83 | func(**kwargs) #' Should NOT overwrite 84 | func(**kwargs) #' Should NOT overwrite 85 | 86 | ### Make sure files didn't delete each other 87 | saved = glob(fname + "*") 88 | assert len(saved) in [ 89 | 3, 90 | 6, 91 | ], "Should have saved three/six files, insted got: " + str(saved) 92 | 93 | if lastcleanup: 94 | dac.cleanfiles(fname) 95 | 96 | 97 | if __name__ == "__main__": 98 | test_save(func=DA.save_statistics, lastcleanup=False) 99 | 100 | ### cleanup 101 | # for file in glob("plotastic_results*"): 102 | # os.remove(file) 103 | 104 | # %% 105 | 106 | # %% Test save_fig 107 | # import matplotlib.pyplot as plt 108 | # DA.plot_box_strip() 109 | # DA.save_fig(fname="p1", overwrite=True) # ? saves wrong fig ? 110 | # DA.save_fig(fname="p2", overwrite=True, fig=DA.fig) # ? saves wrong fig ?? 111 | # DA.fig.savefig("p3.pdf") # ? saves CORRECT FIG!! 112 | # plt.savefig("p4.pdf") 113 | 114 | 115 | # %% 116 | 117 | # %% interactive testing to display Plots 118 | 119 | if __name__ == "__main__": 120 | import ipytest 121 | 122 | # ipytest.run() 123 | -------------------------------------------------------------------------------- /tests/_utils_test.py: -------------------------------------------------------------------------------- 1 | # 2 | # %% 3 | import matplotlib as mpl 4 | 5 | from plotastic import utils as ut 6 | 7 | import DA_configs as dac 8 | 9 | 10 | # %% 11 | def test_font_functions(): 12 | ut.mpl_font() 13 | ut.mpl_fontsizes_get_all() 14 | ut.mpl_fontsize_from_rc(rc_param="legend.fontsize") 15 | 16 | ut.mpl_fontpath() 17 | 18 | 19 | if __name__ == "__main__": 20 | test_font_functions() 21 | 22 | # mpl.rcParams["font.size"] = 22 23 | # print(mpl.rcParams["font.size"]) #' returns an integer 24 | # print(mpl.rcParams["legend.fontsize"]) #' returns mediu 25 | # m 26 | 27 | ### Fontsizes 28 | d = ut.mpl_fontsizes_get_all() 29 | print(d) 30 | 31 | fs = ut.mpl_fontsize_from_rc() 32 | legend_fs = ut.mpl_fontsize_from_rc(rc_param="legend.fontsize") 33 | print(legend_fs) 34 | 35 | ### Font 36 | # plst.set_style("paper") 37 | font = ut.mpl_font 38 | fontpath = ut.mpl_fontpath() 39 | print(fontpath) 40 | 41 | if "Narrow" in font: 42 | print("narrow") 43 | 44 | 45 | # %% 46 | def test_get_bbox_width(): 47 | DA = dac.DA_ALL 48 | # DA.legend.get_window_extent() 49 | bbox = DA.legend.get_tightbbox() 50 | ut.get_bbox_width(bbox) 51 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/non_pytest_checks/hspace_wspace.py: -------------------------------------------------------------------------------- 1 | """A script that shows that hspace and wspace are autoadjusted 2 | """ 3 | # %% 4 | 5 | from matplotlib import pyplot as plt 6 | import matplotlib as mpl 7 | import plotastic as plst 8 | from plotastic import utils as ut 9 | 10 | print(mpl.__version__) 11 | 12 | ### Lower dpi 13 | plt.rcParams["figure.dpi"] = 70 14 | 15 | 16 | # %% 17 | # == Example Data ====================================================== 18 | DF, dims = plst.load_dataset("tips", verbose=False) 19 | DA = plst.DataAnalysis(DF, dims=dims) 20 | 21 | 22 | # %% 23 | def get_heights(DA: plst.DataAnalysis): 24 | ### Get bboxes of axes 25 | 26 | heights_cols = [] 27 | for key, axes in DA.axes_iter__col_axes: 28 | heights = [] 29 | # print(key) # todo 30 | for ax in axes: 31 | bbox = ax.get_tightbbox() 32 | height = ut.get_bbox_height(bbox, in_inches=True) 33 | heights.append(height) 34 | 35 | # print(bbox.extents) # todo 36 | # print(height) 37 | # print() 38 | heights_cols.append(heights) 39 | return heights_cols 40 | 41 | 42 | if __name__ == "__main__": 43 | heights_cols = get_heights(DA) 44 | print(heights_cols) 45 | # print() 46 | 47 | 48 | def adjust_hspace(DA: plst.DataAnalysis): 49 | height = DA.figsize[1] 50 | 51 | ### Adjust height to fit all axes 52 | heights_cols = get_heights(DA) 53 | heights = [sum(heights_col) for heights_col in heights_cols] 54 | new_height = max(heights) 55 | DA.fig.set_figheight(new_height, forward=True) 56 | 57 | ### That size increase stretched the axes, too, undo that 58 | # height_fraction = height / new_height 59 | # hspace = new_height - height 60 | # plt.subplots_adjust(hspace=hspace) #?? Doesn't work at all 61 | 62 | if __name__ == "__main__": 63 | adjust_hspace(DA) 64 | print(DA.figsize) 65 | # print() 66 | 67 | #%% 68 | def adjust_hspace_recursive(DA: plst.DataAnalysis): 69 | """Since plt.adjust_subplots(hspace=...) doesn't work, we need to 70 | figure out something else. This function is a recursive approach 71 | that increases the figure height until all axes fit.""" 72 | 73 | height = DA.figsize[1] 74 | 75 | ### Adjust height to fit all axes 76 | heights_cols = get_heights(DA) 77 | heights = [sum(heights_col) for heights_col in heights_cols] 78 | new_height = max(heights) 79 | 80 | ### Save some recursion steps 81 | if new_height / height > 1.7: 82 | new_height = new_height * 1.7 83 | # print("heightboost") 84 | print(new_height, height) 85 | 86 | ### Recursive increase 87 | #' Repeat until the sum of axes heights is less than the figure 88 | #' height. 99% of new_height is more than enough 89 | while 0.99 * new_height > height: 90 | #' Save some recursion steps, also makes nice spacing 91 | new_height = new_height * 1.1 92 | DA.fig.set_figheight(new_height, forward=True) 93 | height = DA.figsize[1] 94 | adjust_hspace_recursive(DA) #' Recursive call 95 | 96 | 97 | 98 | if __name__ == "__main__": 99 | pass 100 | # DA.subplots(figsize=(5,1)).fillaxes(kind="strip", dodge=True) 101 | # adjust_hspace_recursive(DA) 102 | # print(DA.figsize) 103 | # print() 104 | 105 | 106 | 107 | def check_hspace(): 108 | for i in range(5): 109 | # print(labels) 110 | width=5 111 | height = i + 1 112 | 113 | ### Plot 114 | ( 115 | DA.subplots( 116 | figsize=(width, height), 117 | # constrained_layout=True, # !! not working with subplots_adjust 118 | # hspace=.7, 119 | ) 120 | .fillaxes(kind="strip", dodge=True) 121 | .edit_legend() 122 | ) 123 | ### Spaces 124 | # adjust_hspace(DA) 125 | adjust_hspace_recursive(DA) 126 | 127 | ### Try mpl native functions 128 | # DA.fig.subplots_adjust(hspace=.9) 129 | # plt.subplots_adjust(hspace=.5)# ?? NOT WORKING AT ALL 130 | # DA.fig.tight_layout(pad=5.0) 131 | # DA.fig.tight_layout(h_pad=2) 132 | # plt.tight_layout(h_pad=2) 133 | # plt.subplot_tool() 134 | 135 | 136 | print() 137 | new_height = round(DA.figsize[1], 2) 138 | # plt.suptitle(f"width={width}, height={height, new_height}", y=1.1) 139 | 140 | # plt.close() 141 | 142 | 143 | if __name__ == "__main__": 144 | plst.set_style("paper") 145 | # plst.set_style("classic") 146 | # plst.print_styles() 147 | check_hspace() 148 | 149 | 150 | # def _get_legend_width(labels: list[str]) -> float: 151 | # """Calculates the width of the legend in inches, taking fontsize 152 | # into account""" 153 | 154 | # ### Add legend title, which is hue 155 | # labels = [DA.dims.hue] + labels # TODO: replace with self 156 | 157 | # ### Split by new lines and flatten 158 | # labels = [label.split("\n") for label in labels] 159 | # labels = [item for sublist in labels for item in sublist] 160 | # # print(labels) 161 | 162 | # ### Get length of longest level (or title) 163 | # max_label_length = max([len(label) for label in labels]) 164 | 165 | # ### Convert label length to inches 166 | # #' 1 inch = 72 points, one character = ~10 points 167 | # fontsize = _get_fontsize_legend() 168 | # character_per_inch = 72 / fontsize 169 | # if "Narrow" in DA.font_mpl: # TODO: replace with self 170 | # character_per_inch = character_per_inch * 0.8 171 | 172 | # legend_width = max_label_length / character_per_inch 173 | 174 | # ### Add more for the markers 175 | # #' When the legend title (hue) is the largest, no space needed 176 | # if len(DA.dims.hue) != max_label_length: 177 | # # legend_width += 0.5 # TODO reactivate 178 | # print("added marker width") 179 | 180 | # return legend_width 181 | -------------------------------------------------------------------------------- /tests/non_pytest_checks/legend_position.py: -------------------------------------------------------------------------------- 1 | """A helper script that generates plots of different sizes to test the 2 | things that highly depend on overall plot size, like legend positioning 3 | """ 4 | # %% 5 | 6 | from matplotlib import pyplot as plt 7 | import matplotlib as mpl 8 | import plotastic as plst 9 | from plotastic import utils as ut 10 | 11 | ### Lower dpi 12 | plt.rcParams["figure.dpi"] = 70 13 | 14 | 15 | # %% 16 | # == Example Data ====================================================== 17 | DF, dims = plst.load_dataset("tips", verbose=False) 18 | DA = plst.DataAnalysis(DF, dims=dims) 19 | 20 | 21 | 22 | # %% 23 | # == Utils: Legend Width ================================================ 24 | 25 | if __name__ == "__main__": 26 | ### Test with legend 27 | DA.plot().edit_legend() 28 | plt.close() 29 | legend = DA.legend 30 | legend_box = legend.get_tightbbox() 31 | legend_width = ut.get_bbox_width(legend_box) 32 | 33 | ### Test with axes 34 | box = DA.axes[0][0].get_tightbbox() 35 | box.extents 36 | fig_width = ut.get_bbox_width(box) 37 | 38 | 39 | # %% 40 | # == Legend ============================================================ 41 | """ 42 | We want the legend to be `loc="center right"`. But that setting is bad, 43 | we need to use bbox_to_anchor, otherwise the legend will be outside the 44 | figure. However, With increasing figure width, the legend to drift away 45 | from the figure. That effect is more drastic when bbox_to_anchor has 46 | larger numbers than (1.0, 0.5). So we need to adjust borderaxespad 47 | depending on figure width. 48 | But that was also bad. I opted to stretch the figure instead and then 49 | adjust the subplot size to fit the legend. 50 | """ 51 | 52 | 53 | 54 | def check_legend(): 55 | label_list = [ 56 | ["yes", "no"], 57 | ["ad", "saaaaaaaaaaaaaaa"], 58 | ["ad", "saaaaa\naaaaaaaaaa"], 59 | ] 60 | 61 | for i in range(20): 62 | for labels in label_list: 63 | # print(labels) 64 | width = i + 1 65 | 66 | ### Plot 67 | (DA.subplots(figsize=(width, 3)).fillaxes(kind="strip", dodge=True)) 68 | 69 | DA.edit_legend( 70 | labels=labels, 71 | # borderaxespad=None, 72 | # loc="center right", 73 | # bbox_to_anchor=None, 74 | ) 75 | ### Legend Positioning 76 | # _adjust_fig_to_fit_legend(DA, labels=labels) 77 | 78 | # print() 79 | plt.suptitle(f"width={width}", y=1.1) 80 | # plt.close() 81 | 82 | 83 | if __name__ == "__main__": 84 | print(mpl.rcParams["legend.fontsize"]) 85 | # plst.set_style("paper") 86 | plst.set_style("classic") 87 | # plst.print_styles() 88 | check_legend() 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | error 4 | ignore::UserWarning 5 | ignore::DeprecationWarning 6 | ignore::FutureWarning 7 | testpaths = 8 | tests -------------------------------------------------------------------------------- /tests/run_tests_in_new_env.py: -------------------------------------------------------------------------------- 1 | """A helper script to execute tests in a new virtual environment. Not 2 | needed if tomltovenv is used to create the virtual environment.""" 3 | # 4 | # %% 5 | import os 6 | import shutil 7 | import venv 8 | 9 | #%% 10 | ### Delete environment if present 11 | if os.path.exists("../venv_not_e"): 12 | shutil.rmtree("../venv_not_e") 13 | 14 | # %% 15 | ### Create virtual environment 16 | # !! we're inside the tests folder 17 | venv.create(env_dir="../venv_not_e", clear=True, with_pip=True) 18 | 19 | #%% 20 | ! source venv_not_e/bin/activate 21 | 22 | #%% 23 | ### Install non editable for testing 24 | ! pip install -r requirements.txt 25 | ! pip install git+https://github.com/markur4/plotastic.git 26 | ! pip install pytest pytest-cov ipytest 27 | 28 | # %% 29 | # !! Coverage requires editable mode 30 | ! pytest 31 | 32 | # # ! pytest --cov --cov-report=xml 33 | 34 | # %% 35 | --------------------------------------------------------------------------------