├── .dvc ├── .gitignore └── config ├── .dvcignore ├── .gitignore ├── .pre-commit-config.yaml ├── .python-version ├── .vscode └── settings.json ├── Makefile ├── README.md ├── dvc.lock ├── dvc.yaml ├── generate_readme.py ├── images ├── y2022 │ ├── bar_plot_w_custom_cmap.png │ ├── binary_outcome_variable.png │ ├── box_plot_w_scatter_distributions.png │ ├── default_plot.png │ ├── histogram_with_two_variables.png │ ├── line_plot_fill_between.png │ ├── meaningless_points.png │ ├── opinium_barchart.png │ ├── pandas_stacked_bars_with_values.png │ ├── pos_neg_split_hbar.png │ ├── scatter_distributions.png │ ├── scatter_matrix_w_kde_on_diag.png │ ├── scatter_w_outlined_text_insert.png │ ├── sns_violin_plot_custom.png │ ├── split_x_axis_custom_legend.png │ ├── stacked_bar_with_single_bars_layout.png │ └── uk_hexmap.png └── y2024 │ └── cat_weight.png ├── mypy.ini ├── plotting_examples ├── __init__.py ├── dvc_entry.py ├── extract_year_name.py ├── rc.mplstyle ├── save_plot_output.py ├── y2022 │ ├── __init__.py │ ├── bar_plot_w_custom_cmap │ │ ├── __init__.py │ │ └── plot.py │ ├── binary_outcome_variable │ │ ├── __init__.py │ │ ├── data.csv │ │ └── plot.py │ ├── box_plot_w_scatter_distributions │ │ ├── __init__.py │ │ ├── data.parquet │ │ └── plot.py │ ├── default_plot │ │ ├── __init__.py │ │ └── plot.py │ ├── histogram_with_two_variables │ │ ├── __init__.py │ │ └── plot.py │ ├── line_plot_fill_between │ │ ├── __init__.py │ │ ├── data.csv │ │ └── plot.py │ ├── meaningless_points │ │ ├── __init__.py │ │ └── plot.py │ ├── metadata.py │ ├── opinium_barchart │ │ ├── __init__.py │ │ ├── opinium.png │ │ ├── opinium_barchart_example.png │ │ └── plot.py │ ├── pandas_stacked_bars_with_values │ │ ├── __init__.py │ │ └── plot.py │ ├── pos_neg_split_hbar │ │ ├── __init__.py │ │ └── plot.py │ ├── scatter_distributions │ │ ├── __init__.py │ │ └── plot.py │ ├── scatter_matrix_w_kde_on_diag │ │ ├── __init__.py │ │ └── plot.py │ ├── scatter_w_outlined_text_insert │ │ ├── __init__.py │ │ ├── data.parquet │ │ └── plot.py │ ├── sns_violin_plot_custom │ │ ├── __init__.py │ │ ├── data.parquet │ │ └── plot.py │ ├── split_x_axis_custom_legend │ │ ├── __init__.py │ │ └── plot.py │ ├── stacked_bar_with_single_bars_layout │ │ ├── __init__.py │ │ ├── data │ │ │ └── lab.png │ │ └── plot.py │ └── uk_hexmap │ │ ├── __init__.py │ │ ├── data │ │ ├── gb_hex_cartogram │ │ │ ├── GB_Hex_Cartogram_Const.cpg │ │ │ ├── GB_Hex_Cartogram_Const.dbf │ │ │ ├── GB_Hex_Cartogram_Const.prj │ │ │ ├── GB_Hex_Cartogram_Const.sbn │ │ │ ├── GB_Hex_Cartogram_Const.sbx │ │ │ ├── GB_Hex_Cartogram_Const.shp │ │ │ ├── GB_Hex_Cartogram_Const.shp.xml │ │ │ ├── GB_Hex_Cartogram_Const.shx │ │ │ └── GB_Hex_Cartogram_Const.zip │ │ ├── petition_data.csv │ │ └── petition_data.json │ │ ├── plot.py │ │ ├── uk_hex_example.png │ │ └── uk_hex_example.py └── y2024 │ ├── __init__.py │ ├── cat_weight │ ├── __init__.py │ ├── data │ │ ├── cat_looking_to_side.jpeg │ │ └── weight_data.parquet │ └── plot.py │ └── metadata.py ├── poetry.lock ├── pyproject.toml ├── setup.cfg └── work.sh /.dvc/.gitignore: -------------------------------------------------------------------------------- 1 | /config.local 2 | /tmp 3 | /cache 4 | -------------------------------------------------------------------------------- /.dvc/config: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/.dvc/config -------------------------------------------------------------------------------- /.dvcignore: -------------------------------------------------------------------------------- 1 | # Add patterns of files dvc should ignore, which could improve 2 | # the performance. Learn more at 3 | # https://dvc.org/doc/user-guide/dvcignore 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Caches 2 | **/.ipynb_checkpoints/* 3 | **/__pycache__/* 4 | *.pyc 5 | .Rhistory 6 | .venv/ 7 | venv/ 8 | 9 | # Data 10 | data/**/* 11 | !data/**/*.gitkeep 12 | 13 | # IDE config 14 | .idea/ 15 | ipython_config.py 16 | profile_default/ 17 | 18 | # Other 19 | .DS_Store 20 | config/config.py 21 | .Rproj.user 22 | .Rproj.user/ 23 | *.Rproj 24 | 25 | # VisualStudioCode 26 | .vscode/* 27 | !.vscode/settings.json 28 | !.vscode/tasks.json 29 | !.vscode/launch.json 30 | !.vscode/extensions.json 31 | 32 | # https://www.gitignore.io/ 33 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.4.0 4 | hooks: 5 | - id: end-of-file-fixer 6 | - id: trailing-whitespace 7 | - id: check-builtin-literals 8 | - id: check-byte-order-marker 9 | - id: check-case-conflict 10 | - id: check-merge-conflict 11 | - id: check-symlinks 12 | - id: check-toml 13 | - id: check-vcs-permalinks 14 | - id: check-xml 15 | - id: debug-statements 16 | - id: detect-private-key 17 | - id: mixed-line-ending 18 | - id: fix-encoding-pragma 19 | args: ["--remove"] 20 | - id: check-yaml 21 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.10.2 2 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.pylintEnabled": true, 3 | "python.linting.enabled": true, 4 | "python.formatting.provider": "black", 5 | "editor.formatOnSave": true, 6 | "rewrap.wrappingColumn": 87, 7 | "editor.rulers": [ 8 | 88 9 | ], 10 | // "peacock.color": "#42b883", 11 | "workbench.colorCustomizations": { 12 | "activityBar.activeBackground": "#65c89b", 13 | "activityBar.activeBorder": "#945bc4", 14 | "activityBar.background": "#65c89b", 15 | "activityBar.foreground": "#15202b", 16 | "activityBar.inactiveForeground": "#15202b99", 17 | "activityBarBadge.background": "#945bc4", 18 | "activityBarBadge.foreground": "#e7e7e7", 19 | "sash.hoverBorder": "#65c89b", 20 | "statusBar.background": "#42b883", 21 | "statusBar.foreground": "#15202b", 22 | "statusBarItem.hoverBackground": "#359268", 23 | "statusBarItem.remoteBackground": "#42b883", 24 | "statusBarItem.remoteForeground": "#15202b", 25 | "titleBar.activeBackground": "#42b883", 26 | "titleBar.activeForeground": "#15202b", 27 | "titleBar.inactiveBackground": "#42b88399", 28 | "titleBar.inactiveForeground": "#15202b99" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean requirements 2 | .PHONY: git-stats git-log cloc clean-git 3 | .PHONY: deploy 4 | .PHONY: test 5 | .PHONY: requirements 6 | .PHONY: help 7 | 8 | GIT := git 9 | CLOC := cloc 10 | 11 | ######### 12 | # UTILS # 13 | ######### 14 | 15 | help: 16 | @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) | sort 17 | 18 | clean: 19 | @echo "Cleaning up temporary and cache files" 20 | @find . -type f -name "*.pyc" -delete 21 | @find . -type d -name "__pycache__" -exec rm -rf {} + 22 | @find . -type d -name ".pytest_cache" -exec rm -rf {} + 23 | @find . -type d -name ".mypy_cache" -exec rm -rf {} + 24 | @find . -type d -name ".ipynb_checkpoints" -exec rm -rf {} + 25 | 26 | cloc: 27 | @echo "Code statistics using cloc:" 28 | $(CLOC) --exclude-dir=venv . 29 | 30 | ###################### 31 | # WORKING ON PROJECT # 32 | ###################### 33 | 34 | pre-commit-run: 35 | poetry run pre-commit run --all-files 36 | 37 | readme: ## Generate README file. 38 | poetry run python generate_readme.py 39 | 40 | # This'll just run through all the plots. 41 | repro: ## run dvc repro 42 | poetry run dvc repro dvc.yaml 43 | 44 | 45 | ######## 46 | # LINT # 47 | ######## 48 | 49 | mypy: 50 | poetry run mypy . --strict 51 | 52 | lint: mypy ## run linting - mypy,ruff 53 | poetry run ruff check . 54 | poetry run ruff format . --check 55 | @$(MAKE) --no-print-directory clean 56 | 57 | # Using this as format & lint really... 58 | format: pre-commit-run ## run formatters - pre-commit,ruff 59 | poetry run ruff format . 60 | poetry run ruff check . --fix --unsafe-fixes 61 | @$(MAKE) --no-print-directory clean 62 | 63 | 64 | ########## 65 | # POETRY # 66 | ########## 67 | 68 | poetry.lock: 69 | poetry lock --no-update 70 | 71 | install: poetry.lock 72 | poetry install 73 | @$(MAKE) --no-print-directory clean 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Example plots 2 | 3 | Example plots, typically using matplotlib. Mainly for personal use / so I have somewhere to remind 4 | myself of some things, but if they're of any use to anyone else then ace. Code/visuals often aren't 5 | great as they're mainly just scratch work, often copied straight over from a notebook with little 6 | cleanup. 7 | 8 | ---- 9 | 10 | [comment]: # (Automate plots beneath this.) 11 | 12 | # Plots 13 | 14 | * [`bar_plot_w_custom_cmap`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#bar_plot_w_custom_cmap) 15 | * [`binary_outcome_variable`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#binary_outcome_variable) 16 | * [`box_plot_w_scatter_distributions`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#box_plot_w_scatter_distributions) 17 | * [`cat_weight`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#cat_weight) 18 | * [`line_plot_fill_between`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#line_plot_fill_between) 19 | * [`meaningless_points`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#meaningless_points) 20 | * [`opinium_barchart`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#opinium_barchart) 21 | * [`pandas_stacked_bars_with_values`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#pandas_stacked_bars_with_values) 22 | * [`pos_neg_split_hbar`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#pos_neg_split_hbar) 23 | * [`scatter_distributions`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#scatter_distributions) 24 | * [`scatter_matrix_w_kde_on_diag`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#scatter_matrix_w_kde_on_diag) 25 | * [`scatter_w_outlined_text_insert`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#scatter_w_outlined_text_insert) 26 | * [`split_x_axis_custom_legend`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#split_x_axis_custom_legend) 27 | * [`stacked_bar_with_single_bars_layout`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#stacked_bar_with_single_bars_layout) 28 | * [`uk_hexmap`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#uk_hexmap) 29 | 30 | 31 | 32 | ## [`bar_plot_w_custom_cmap`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py) 33 | 34 | Bar plot with custom cmap. 35 | 36 | Based on this tweet: https://twitter.com/ryanburge/status/1505602885215834112 - wanted 37 | to create something with a similar effect using mpl. 38 | 39 | Example of: 40 | 41 | - Different font types (using monospace font) 42 | - using different colours for bars depending on their values (custom cmap). 43 | - padding around the axis using rc parameters 44 | 45 | ![](images/y2022/bar_plot_w_custom_cmap.png) 46 | 47 | ## [`binary_outcome_variable`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/binary_outcome_variable/plot.py) 48 | 49 | Plot dichotomous variable. 50 | 51 | Simple dots with median lines - might be nice to add a kde to this as well. 52 | 53 | The y-axis is redundant here as there are only two options (`0.6` doesn't make any 54 | sense). 55 | 56 | ![](images/y2022/binary_outcome_variable.png) 57 | 58 | ## [`box_plot_w_scatter_distributions`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py) 59 | 60 | Bar plot with distributions. 61 | 62 | Thought I'd create a bar plot with scatter plots of the distributions adjacent to the 63 | bars, it was based off something else but I can't remember what. Bar plots are created 64 | from scratch using hlines etc, for no particular reason. 65 | 66 | Data was from tidy tuesday. 67 | 68 | ![](images/y2022/box_plot_w_scatter_distributions.png) 69 | 70 | ## [`cat_weight`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2024/cat_weight/plot.py) 71 | 72 | Timeseries of the cats diet. 73 | 74 | Cat was getting a little chunky towards the end of 2023 so had a resolution made for 75 | them to lose a bit of weight. Data collection is just a daily weigh, the average of 76 | this is taken (as there are sometimes multiple entries in a day) and then plotted along 77 | with a ten day rolling average. Most days were covered, where there are missing days 78 | they're imputed using the average of the days either side, eg `(a, nan, b) -> (a, 79 | (a+b)/2, b)` though this is just a plot.. 80 | 81 | ![](images/y2024/cat_weight.png) 82 | 83 | ## [`line_plot_fill_between`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/line_plot_fill_between/plot.py) 84 | 85 | Visualise time tracking, how much over/under time. 86 | 87 | Mainly serves as an example of plotting with dates, and filling above / below 88 | particular values on a plot. 89 | 90 | Example of: 91 | 92 | - plotting with dates 93 | - different fonts 94 | - filling between lines 95 | 96 | ![](images/y2022/line_plot_fill_between.png) 97 | 98 | ## [`meaningless_points`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/meaningless_points/plot.py) 99 | 100 | Some random points. 101 | 102 | No real meaning to this - was messing about with some bokeh style bits (the effect, not 103 | the python library), so dumping here. Not sure I'm mad on the output - it's also slow 104 | as hell. 105 | 106 | ![](images/y2022/meaningless_points.png) 107 | 108 | ## [`opinium_barchart`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/opinium_barchart/plot.py) 109 | 110 | Bar chart style copied from Opinium. 111 | 112 | Saw this on twitter (i think) and thought I'd recreate it in mpl. 113 | 114 | ![](images/y2022/opinium_barchart.png) 115 | 116 | ## [`pandas_stacked_bars_with_values`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py) 117 | 118 | Horizontal stacked bars, based off of pandas. 119 | 120 | Could do these from scratch - pandas makes things a bit more straightforward though. 121 | 122 | Example of: 123 | 124 | - fixed formatting - setting categorical ticks at particular positions. 125 | 126 | ![](images/y2022/pandas_stacked_bars_with_values.png) 127 | 128 | ## [`pos_neg_split_hbar`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/pos_neg_split_hbar/plot.py) 129 | 130 | Create split horizontal bar chart. 131 | 132 | Split by dichotomous variable, with bar classifications. 133 | 134 | Can be a bit messy - not sure I'm much of a fan - but wanted to re-create anyway. 135 | 136 | ![](images/y2022/pos_neg_split_hbar.png) 137 | 138 | ## [`scatter_distributions`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/scatter_distributions/plot.py) 139 | 140 | Distributions of multiple variables. 141 | 142 | For a set of variables, each with an accompanying continuous variable on the same scale, 143 | plot the distributions of the continuous variable. Might be useful to have a kde 144 | overlaid here. 145 | 146 | Example of: 147 | 148 | - fixed formatting 149 | - setting categorical ticks at particular positions. 150 | 151 | ![](images/y2022/scatter_distributions.png) 152 | 153 | ## [`scatter_matrix_w_kde_on_diag`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py) 154 | 155 | Scatter matrix with kde instead of histogram on the diagonal. 156 | 157 | Could probably adapt pd.scatter_matrix instead of doing it from scratch. Though with 158 | this approach the non-diagonal plots could be whatever instead of a scatter plot I 159 | guess... 160 | 161 | Would be good to make the upper diagonals differ from the lower diagonals a bit... maybe 162 | some sort of table from pd.cut on the others or whatever. 163 | 164 | I'd probably just use subplot_mosaic as well now - that's grown on me a lot since this. 165 | 166 | ![](images/y2022/scatter_matrix_w_kde_on_diag.png) 167 | 168 | ## [`scatter_w_outlined_text_insert`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py) 169 | 170 | Scatter plot with text inserted to scatter points. 171 | 172 | Data was taken from a tidy tuesday. 173 | 174 | Example of: 175 | 176 | - Outlining text elements in a plot. 177 | 178 | ![](images/y2022/scatter_w_outlined_text_insert.png) 179 | 180 | ## [`split_x_axis_custom_legend`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/split_x_axis_custom_legend/plot.py) 181 | 182 | Example of creating multiple x-axis in order to plot year / months. 183 | 184 | The fig size needs to be pretty large in order to squeeze all the month names etc in 185 | here. Generated data looks a mess on these plots. 186 | 187 | Example of: 188 | 189 | - Custom legend 190 | - generating random date data 191 | - multiple x-axis to display years / months 192 | 193 | ![](images/y2022/split_x_axis_custom_legend.png) 194 | 195 | ## [`stacked_bar_with_single_bars_layout`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py) 196 | 197 | Layout containing two bar plots and a bivariate plot between them. 198 | 199 | In this case it's a silly example of some data containing the social grade of 200 | Labradors, as well as the education group. The main plot is a stacked bar containing 201 | the breakdown of education group for each social grade. 202 | 203 | Don't think I'm too keen on the code for this plot - though it's not always so clear 204 | (to me) how to make "nice" code with a lot of matplotlib stuff. 205 | 206 | Obviously, the data is made up. 207 | 208 | ![](images/y2022/stacked_bar_with_single_bars_layout.png) 209 | 210 | ## [`uk_hexmap`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/uk_hexmap/plot.py) 211 | 212 | Hex map for the UK constituencies. 213 | 214 | Some meaningless generated data - small multiples with hex maps can be useful sometimes 215 | though. Could be good to add in the geographically accurate version as well. 216 | 217 | ![](images/y2022/uk_hexmap.png) 218 | -------------------------------------------------------------------------------- /dvc.lock: -------------------------------------------------------------------------------- 1 | schema: '2.0' 2 | stages: 3 | sav_to_csv_and_json: 4 | cmd: python -m plotting_examples.binary_outcome_variable.plot 5 | deps: 6 | - path: plotting_examples/binary_outcome_variable/plot.py 7 | md5: 1427c5fcadda1f47e11a817d2f55e61e 8 | size: 2109 9 | binary_outcome: 10 | cmd: python -m plotting_examples.binary_outcome_variable.plot 11 | deps: 12 | - path: plotting_examples/binary_outcome_variable/plot.py 13 | md5: b8b284298598c316bd0c661e7705ffda 14 | size: 2101 15 | outs: 16 | - path: 17 | images/GSL_projects_plotting_examples_plotting_examples_binary_outcome_variable_plot.png 18 | md5: 3e15bc2bdba0ea318625e341fc600adc 19 | size: 68657 20 | histogram_with_two_variables: 21 | cmd: python -m plotting_examples.histogram_with_two_variables.plot 22 | deps: 23 | - path: plotting_examples/histogram_with_two_variables/plot.py 24 | md5: f176e507379760e333fd3d21a0a03f66 25 | size: 4958 26 | outs: 27 | - path: 28 | images/GSL_projects_plotting_examples_plotting_examples_histogram_with_two_variables_plot.png 29 | md5: a42aff20a7970bd5a593dbb5e4a72083 30 | size: 121044 31 | pandas_stacked_bars_with_values: 32 | cmd: python -m plotting_examples.pandas_stacked_bars_with_values.plot 33 | deps: 34 | - path: plotting_examples/pandas_stacked_bars_with_values/plot.py 35 | md5: 41f46cb867b53e3d1fa2bd9ce9a7e59b 36 | size: 4276 37 | outs: 38 | - path: 39 | images/GSL_projects_plotting_examples_plotting_examples_pandas_stacked_bars_with_values_plot.png 40 | md5: b444b9c40539997369b306794dd55383 41 | size: 109980 42 | scatter_distributions: 43 | cmd: python -m plotting_examples.scatter_distributions.plot 44 | deps: 45 | - path: plotting_examples/scatter_distributions/plot.py 46 | md5: ca9e0719035769cb951018cdc60cae5a 47 | size: 6101 48 | outs: 49 | - path: 50 | images/GSL_projects_plotting_examples_plotting_examples_scatter_distributions_plot.png 51 | md5: ff1ea112d284d6995f21d1555da17868 52 | size: 1048818 53 | split_x_axis_custom_legend: 54 | cmd: python -m plotting_examples.split_x_axis_custom_legend.plot 55 | deps: 56 | - path: plotting_examples/split_x_axis_custom_legend/plot.py 57 | md5: 4cbef0469f1542bbba40f26080fa8147 58 | size: 6216 59 | outs: 60 | - path: 61 | images/GSL_projects_plotting_examples_plotting_examples_split_x_axis_custom_legend_plot.png 62 | md5: e31a09751ac8a7cb68c33f573888c04c 63 | size: 2059481 64 | trump_church_votes_2020: 65 | cmd: python -m plotting_examples.trump_church_votes_2020.plot 66 | deps: 67 | - path: plotting_examples/trump_church_votes_2020/plot.py 68 | md5: df8965d04acf1363a89adc51dbf8d823 69 | size: 5092 70 | outs: 71 | - path: 72 | images/GSL_projects_plotting_examples_plotting_examples_trump_church_votes_2020_plot.png 73 | md5: 79f742eb79b7cbcac22518634984fefe 74 | size: 442400 75 | work_time_tracking_plot: 76 | cmd: python -m plotting_examples.work_time_tracking.plot 77 | deps: 78 | - path: plotting_examples/work_time_tracking/plot.py 79 | md5: 295be3a691582e23266edcfdf75abe9f 80 | size: 4618 81 | outs: 82 | - path: 83 | images/GSL_projects_plotting_examples_plotting_examples_work_time_tracking_plot.png 84 | md5: 022854ffd33d0c1052ad561d3b0a29e4 85 | size: 420434 86 | bar_plot_w_custom_cmap: 87 | cmd: python -m plotting_examples.2022.bar_plot_w_custom_cmap.plot 88 | deps: 89 | - path: plotting_examples/2022/bar_plot_w_custom_cmap/plot.py 90 | md5: 672de9f4bd9f4e7c2095b572b58e1b9e 91 | size: 5697 92 | outs: 93 | - path: 94 | images/GSL_projects_plotting_examples_plotting_examples_2022_bar_plot_w_custom_cmap_plot.png 95 | md5: 149eaf7e57549fb2119ef508746f653d 96 | size: 324973 97 | sns_violin_plot_custom: 98 | cmd: python -m plotting_examples.sns_violin_plot_custom.plot 99 | deps: 100 | - path: plotting_examples/sns_violin_plot_custom/plot.py 101 | md5: 01e55cb3dd5935e2f317fdc90dbe64a4 102 | size: 1523 103 | outs: 104 | - path: 105 | images/GSL_projects_plotting_examples_plotting_examples_sns_violin_plot_custom_plot.png 106 | md5: 54134457d7681189e81518fc6214f4ec 107 | size: 100869 108 | scatter_matrix_w_kde_on_diag: 109 | cmd: python -m plotting_examples.scatter_matrix_w_kde_on_diag.plot 110 | deps: 111 | - path: plotting_examples/scatter_matrix_w_kde_on_diag/plot.py 112 | md5: 5f9b69d8a3762e617653d044b3fec13a 113 | size: 2216 114 | outs: 115 | - path: 116 | images/GSL_projects_plotting_examples_plotting_examples_scatter_matrix_w_kde_on_diag_plot.png 117 | md5: 7a6dec845e9bb9fe0a667b8b7937ae9c 118 | size: 504189 119 | pos_neg_split_hbar: 120 | cmd: python -m plotting_examples.pos_neg_split_hbar.plot 121 | deps: 122 | - path: plotting_examples/pos_neg_split_hbar/plot.py 123 | md5: 882938cd457ffdec5c72e67f2235c181 124 | size: 5017 125 | outs: 126 | - path: 127 | images/GSL_projects_plotting_examples_plotting_examples_pos_neg_split_hbar_plot.png 128 | md5: ea865e0df1293cc02d7843aa4140a387 129 | size: 160025 130 | default_plot: 131 | cmd: python -m plotting_examples.default_plot.plot 132 | deps: 133 | - path: plotting_examples/default_plot/plot.py 134 | md5: 7b305baa02cf195d47332f3b2586265a 135 | size: 865 136 | outs: 137 | - path: images/GSL_projects_plotting_examples_plotting_examples_default_plot_plot.png 138 | md5: 215cc189594c56704b2ab62ea6983b6d 139 | size: 54181 140 | 2022_default_plot: 141 | cmd: python -m plotting_examples.2022.default_plot.plot 142 | deps: 143 | - path: plotting_examples/2022/default_plot/plot.py 144 | md5: 3006f0a7bbbea7a22224095255f4bb5e 145 | size: 838 146 | outs: 147 | - path: images/2022/default_plot.png 148 | md5: 63f2d738e25563eff978e01bc834e480 149 | size: 25121 150 | 2022_binary_outcome_variable: 151 | cmd: python -m plotting_examples.2022.binary_outcome_variable.plot 152 | deps: 153 | - path: plotting_examples/2022/binary_outcome_variable/plot.py 154 | md5: 97ac99d8c9299f723f4e730fbfea90e2 155 | size: 2182 156 | outs: 157 | - path: images/2022/binary_outcome_variable.png 158 | md5: 605d409516acb29e92ba53afd5203a94 159 | size: 36620 160 | 2022_pandas_stacked_bars_with_values: 161 | cmd: python -m plotting_examples.2022.pandas_stacked_bars_with_values.plot 162 | deps: 163 | - path: plotting_examples/2022/pandas_stacked_bars_with_values/plot.py 164 | md5: 51afea4e6b06813f61887aad963b2b5d 165 | size: 4357 166 | outs: 167 | - path: images/2022/pandas_stacked_bars_with_values.png 168 | md5: 187f7f8d78b2a7bee60517b0bca5f463 169 | size: 57606 170 | 2022_pos_neg_split_hbar: 171 | cmd: python -m plotting_examples.2022.pos_neg_split_hbar.plot 172 | deps: 173 | - path: plotting_examples/2022/pos_neg_split_hbar/plot.py 174 | md5: 606148fb6a0601d3a2828b24a62f8614 175 | size: 5097 176 | outs: 177 | - path: images/2022/pos_neg_split_hbar.png 178 | md5: 7102b82280627073c912a37cbb5bddfe 179 | size: 82422 180 | 2022_histogram_with_two_variables: 181 | cmd: python -m plotting_examples.2022.histogram_with_two_variables.plot 182 | deps: 183 | - path: plotting_examples/2022/histogram_with_two_variables/plot.py 184 | md5: f1a1d50c1b05b9927b749d0acaad438e 185 | size: 5248 186 | outs: 187 | - path: images/2022/histogram_with_two_variables.png 188 | md5: 5ee46a399d1635bbca0acbb38baa417d 189 | size: 67845 190 | 2022_split_x_axis_custom_legend: 191 | cmd: python -m plotting_examples.2022.split_x_axis_custom_legend.plot 192 | deps: 193 | - path: plotting_examples/2022/split_x_axis_custom_legend/plot.py 194 | md5: d96a9320c4b7009e9f110bb891d1182d 195 | size: 6297 196 | outs: 197 | - path: images/2022/split_x_axis_custom_legend.png 198 | md5: da1c3324e4d037a49696bdb1bae75bd5 199 | size: 1080487 200 | 2022_scatter_matrix_w_kde_on_diag: 201 | cmd: python -m plotting_examples.2022.scatter_matrix_w_kde_on_diag.plot 202 | deps: 203 | - path: plotting_examples/2022/scatter_matrix_w_kde_on_diag/plot.py 204 | md5: 5f0e7e326606d868b8c4411c288e00cb 205 | size: 2522 206 | outs: 207 | - path: images/2022/scatter_matrix_w_kde_on_diag.png 208 | md5: 14a39a1df810953354cde040c1cf10fb 209 | size: 217990 210 | 2022_sns_violin_plot_custom: 211 | cmd: python -m plotting_examples.2022.sns_violin_plot_custom.plot 212 | deps: 213 | - path: plotting_examples/2022/sns_violin_plot_custom/plot.py 214 | md5: e5aa38595903134348a4b97e12af4b51 215 | size: 1604 216 | outs: 217 | - path: images/2022/sns_violin_plot_custom.png 218 | md5: 7a451fc7c780d5e165f7325327ded3ce 219 | size: 56996 220 | 2022_work_time_tracking: 221 | cmd: python -m plotting_examples.2022.work_time_tracking.plot 222 | deps: 223 | - path: plotting_examples/2022/work_time_tracking/plot.py 224 | md5: 41af922980ad6d06a0813c3f6dff37af 225 | size: 4704 226 | outs: 227 | - path: images/2022/work_time_tracking.png 228 | md5: 5e468011855755d5a8238e7e8b51063c 229 | size: 227174 230 | 2022_scatter_distributions: 231 | cmd: python -m plotting_examples.2022.scatter_distributions.plot 232 | deps: 233 | - path: plotting_examples/2022/scatter_distributions/plot.py 234 | md5: 0be18ddf32bdd5a0a5768d214d49c5c2 235 | size: 6181 236 | outs: 237 | - path: images/2022/scatter_distributions.png 238 | md5: d102a1f3534a68d11439103b85f7b4b1 239 | size: 520358 240 | 2022_bar_plot_w_custom_cmap: 241 | cmd: python -m plotting_examples.2022.bar_plot_w_custom_cmap.plot 242 | deps: 243 | - path: plotting_examples/2022/bar_plot_w_custom_cmap/plot.py 244 | md5: d4e5771aeb11a5c78cf16bc01681b014 245 | size: 5778 246 | outs: 247 | - path: images/2022/bar_plot_w_custom_cmap.png 248 | md5: f90122b668c246a8ef62d4c8302daf68 249 | size: 174070 250 | 2022_week42: 251 | cmd: python -m plotting_examples.2022.week42.plot 252 | deps: 253 | - path: plotting_examples/2022/week42/plot.py 254 | md5: 1abbfdf49c2abe180492a606e4760c98 255 | size: 12120 256 | outs: 257 | - path: images/2022/week42.png 258 | md5: f6bcc5fd9c521a9a8b20c8a11659004d 259 | size: 1146056 260 | 2022_box_plot_w_scatter_distributions: 261 | cmd: python -m plotting_examples.2022.box_plot_w_scatter_distributions.plot 262 | deps: 263 | - path: plotting_examples/2022/box_plot_w_scatter_distributions/plot.py 264 | md5: 0ac3c9c54beeac3e1e5395ab8c6a04b0 265 | size: 12148 266 | outs: 267 | - path: images/2022/box_plot_w_scatter_distributions.png 268 | md5: a4db1dd69327dcf7166701d7dce61ef6 269 | size: 478450 270 | 2022_line_plot_fill_between: 271 | cmd: python -m plotting_examples.2022.line_plot_fill_between.plot 272 | deps: 273 | - path: plotting_examples/2022/line_plot_fill_between/plot.py 274 | md5: d4de88d563946fdd04daa65fa4e46280 275 | size: 5016 276 | outs: 277 | - path: images/2022/line_plot_fill_between.png 278 | md5: 31ccdfaec1719946aaaa86fe69e01626 279 | size: 231583 280 | y2022_box_plot_w_scatter_distributions: 281 | cmd: poetry run python -m plotting_examples.y2022.box_plot_w_scatter_distributions.plot 282 | deps: 283 | - path: plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py 284 | hash: md5 285 | md5: 75dc92ce6ff62d836d8d1cb15377579c 286 | size: 11629 287 | outs: 288 | - path: images/y2022/box_plot_w_scatter_distributions.png 289 | hash: md5 290 | md5: 33dc283243d8df737f43f92f990c62ca 291 | size: 469728 292 | y2022_sns_violin_plot_custom: 293 | cmd: poetry run python -m plotting_examples.y2022.sns_violin_plot_custom.plot 294 | deps: 295 | - path: plotting_examples/y2022/sns_violin_plot_custom/plot.py 296 | hash: md5 297 | md5: f1613352c0eabb1b8a7e40714b7ee8c3 298 | size: 1975 299 | outs: 300 | - path: images/y2022/sns_violin_plot_custom.png 301 | md5: 05e5eca57ef23097e878e8c603a3f22e 302 | size: 67429 303 | y2022_bar_plot_w_custom_cmap: 304 | cmd: poetry run python -m plotting_examples.y2022.bar_plot_w_custom_cmap.plot 305 | deps: 306 | - path: plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py 307 | hash: md5 308 | md5: f7d8241b1ed31f2754eed7cc64442423 309 | size: 6031 310 | outs: 311 | - path: images/y2022/bar_plot_w_custom_cmap.png 312 | hash: md5 313 | md5: 6be8b259164e4992a3e62f5255199e02 314 | size: 174412 315 | y2022_histogram_with_two_variables: 316 | cmd: poetry run python -m plotting_examples.y2022.histogram_with_two_variables.plot 317 | deps: 318 | - path: plotting_examples/y2022/histogram_with_two_variables/plot.py 319 | hash: md5 320 | md5: 16ab57c6b359fdee3d75e5dabc93d4ce 321 | size: 5849 322 | outs: 323 | - path: images/y2022/histogram_with_two_variables.png 324 | md5: 2e0d94432170ec007918cead8ddb08cb 325 | size: 58637 326 | y2022_binary_outcome_variable: 327 | cmd: poetry run python -m plotting_examples.y2022.binary_outcome_variable.plot 328 | deps: 329 | - path: plotting_examples/y2022/binary_outcome_variable/plot.py 330 | hash: md5 331 | md5: f8468c0554abebb6a4fc31f9ea511457 332 | size: 2790 333 | outs: 334 | - path: images/y2022/binary_outcome_variable.png 335 | hash: md5 336 | md5: 55b0cfcab7573baa0f5363f7f516e075 337 | size: 36069 338 | y2022_pos_neg_split_hbar: 339 | cmd: poetry run python -m plotting_examples.y2022.pos_neg_split_hbar.plot 340 | deps: 341 | - path: plotting_examples/y2022/pos_neg_split_hbar/plot.py 342 | hash: md5 343 | md5: 6fe3dcb9f263d6d5aa331ab522434de7 344 | size: 5744 345 | outs: 346 | - path: images/y2022/pos_neg_split_hbar.png 347 | md5: 09185fc58c132ebac7555ecf30463f81 348 | size: 73712 349 | y2022_scatter_matrix_w_kde_on_diag: 350 | cmd: poetry run python -m plotting_examples.y2022.scatter_matrix_w_kde_on_diag.plot 351 | deps: 352 | - path: plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py 353 | hash: md5 354 | md5: 3b183d57dce913fb84246f4dc10569be 355 | size: 3363 356 | outs: 357 | - path: images/y2022/scatter_matrix_w_kde_on_diag.png 358 | hash: md5 359 | md5: 9cbe0ed36d6b99d3832fe5d60b710cdd 360 | size: 325647 361 | y2022_line_plot_fill_between: 362 | cmd: poetry run python -m plotting_examples.y2022.line_plot_fill_between.plot 363 | deps: 364 | - path: plotting_examples/y2022/line_plot_fill_between/plot.py 365 | hash: md5 366 | md5: e1c730d690f93eb35b930ec00454fe08 367 | size: 5136 368 | outs: 369 | - path: images/y2022/line_plot_fill_between.png 370 | md5: 9dcd62a428039331ee813626f2c53089 371 | size: 231583 372 | y2022_pandas_stacked_bars_with_values: 373 | cmd: poetry run python -m plotting_examples.y2022.pandas_stacked_bars_with_values.plot 374 | deps: 375 | - path: plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py 376 | hash: md5 377 | md5: d13b08351135216105931d12ce145b3c 378 | size: 4375 379 | outs: 380 | - path: images/y2022/pandas_stacked_bars_with_values.png 381 | md5: 199cbc2e9ac00928ff31306bb0f4ab5e 382 | size: 61398 383 | y2022_split_x_axis_custom_legend: 384 | cmd: poetry run python -m plotting_examples.y2022.split_x_axis_custom_legend.plot 385 | deps: 386 | - path: plotting_examples/y2022/split_x_axis_custom_legend/plot.py 387 | hash: md5 388 | md5: e45ab4f896af74c62953922a2f03d39e 389 | size: 6806 390 | outs: 391 | - path: images/y2022/split_x_axis_custom_legend.png 392 | hash: md5 393 | md5: ad997f3edd4f6e0e3c68f3f6d51f6051 394 | size: 1691509 395 | y2022_scatter_distributions: 396 | cmd: poetry run python -m plotting_examples.y2022.scatter_distributions.plot 397 | deps: 398 | - path: plotting_examples/y2022/scatter_distributions/plot.py 399 | hash: md5 400 | md5: 7d668af948c9c04578842e57d5c1e0cd 401 | size: 7129 402 | outs: 403 | - path: images/y2022/scatter_distributions.png 404 | hash: md5 405 | md5: 78dcd94a72b9a63beb8c7f755d1aeb4a 406 | size: 549714 407 | y2022_default_plot: 408 | cmd: poetry run python -m plotting_examples.y2022.default_plot.plot 409 | deps: 410 | - path: plotting_examples/y2022/default_plot/plot.py 411 | hash: md5 412 | md5: 29d823025843b30bfc00b615c5c23edb 413 | size: 1182 414 | outs: 415 | - path: images/y2022/default_plot.png 416 | md5: 1d81f2b1567c55dec832acd4ac5dca60 417 | size: 24778 418 | y2022_scatter_w_outlined_text_insert: 419 | cmd: poetry run python -m plotting_examples.y2022.scatter_w_outlined_text_insert.plot 420 | deps: 421 | - path: plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py 422 | hash: md5 423 | md5: e775b5e81d6ef7ccb89058e5c78a1922 424 | size: 4853 425 | outs: 426 | - path: images/y2022/scatter_w_outlined_text_insert.png 427 | md5: 6de49552e5ea5e034d052d98943f89b9 428 | size: 481441 429 | y2022_opinium_barchart: 430 | cmd: poetry run python -m plotting_examples.y2022.opinium_barchart.plot 431 | deps: 432 | - path: plotting_examples/y2022/opinium_barchart/plot.py 433 | hash: md5 434 | md5: d464e5a4363c14d09582154a6f5392c8 435 | size: 5707 436 | outs: 437 | - path: images/y2022/opinium_barchart.png 438 | md5: 0f840e7d0a449d057ff8a5d5c27eda48 439 | size: 65128 440 | y2022_uk_hexmap: 441 | cmd: poetry run python -m plotting_examples.y2022.uk_hexmap.plot 442 | deps: 443 | - path: plotting_examples/y2022/uk_hexmap/plot.py 444 | hash: md5 445 | md5: 2f043cde045257f9a78e99e19989bc23 446 | size: 3956 447 | outs: 448 | - path: images/y2022/uk_hexmap.png 449 | hash: md5 450 | md5: e7fe06a9f73be99214870928b9768d3a 451 | size: 536769 452 | y2022_meaningless_points: 453 | cmd: poetry run python -m plotting_examples.y2022.meaningless_points.plot 454 | deps: 455 | - path: plotting_examples/y2022/meaningless_points/plot.py 456 | hash: md5 457 | md5: 93c3a423474a8309f9004adfcc4effda 458 | size: 2630 459 | outs: 460 | - path: images/y2022/meaningless_points.png 461 | md5: 0abf95fc710641a8564d5df871600131 462 | size: 273061 463 | y2022_stacked_bar_with_single_bars_layout: 464 | cmd: poetry run python -m plotting_examples.y2022.stacked_bar_with_single_bars_layout.plot 465 | deps: 466 | - path: plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py 467 | hash: md5 468 | md5: 101d705bcfee29021b8772ece52d8bd9 469 | size: 15024 470 | outs: 471 | - path: images/y2022/stacked_bar_with_single_bars_layout.png 472 | md5: a0f95c280b1fe5edf4f2946654be8ed6 473 | size: 229895 474 | y2024_stacked_bar_with_single_bars_layout: 475 | cmd: poetry run python -m plotting_examples.y2024.stacked_bar_with_single_bars_layout.plot 476 | deps: 477 | - path: plotting_examples/y2024/stacked_bar_with_single_bars_layout/plot.py 478 | hash: md5 479 | md5: 42d5d36996dcebf858d3dc20de2170e7 480 | size: 15223 481 | outs: 482 | - path: images/y2024/stacked_bar_with_single_bars_layout.png 483 | md5: a0f95c280b1fe5edf4f2946654be8ed6 484 | size: 229895 485 | y2024_mish_weight: 486 | cmd: poetry run python -m plotting_examples.y2024.mish_weight.plot 487 | deps: 488 | - path: plotting_examples/y2024/mish_weight/plot.py 489 | hash: md5 490 | md5: c23fddaa7cda28671d084731a63dab0e 491 | size: 15199 492 | outs: 493 | - path: images/y2024/mish_weight.png 494 | hash: md5 495 | md5: a0f95c280b1fe5edf4f2946654be8ed6 496 | size: 229895 497 | y2024_cat_weight: 498 | cmd: poetry run python -m plotting_examples.y2024.cat_weight.plot 499 | deps: 500 | - path: plotting_examples/y2024/cat_weight/plot.py 501 | hash: md5 502 | md5: ffb3af869133e167f72b1b9e234991c1 503 | size: 11067 504 | outs: 505 | - path: images/y2024/cat_weight.png 506 | hash: md5 507 | md5: 7ac39610e8a8e23a8582c232da7fc7c5 508 | size: 1563392 509 | -------------------------------------------------------------------------------- /dvc.yaml: -------------------------------------------------------------------------------- 1 | stages: 2 | y2022_bar_plot_w_custom_cmap: 3 | cmd: poetry run python -m plotting_examples.y2022.bar_plot_w_custom_cmap.plot 4 | deps: 5 | - plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py 6 | outs: 7 | - images/y2022/bar_plot_w_custom_cmap.png: 8 | cache: false 9 | wdir: . 10 | y2022_binary_outcome_variable: 11 | cmd: poetry run python -m plotting_examples.y2022.binary_outcome_variable.plot 12 | deps: 13 | - plotting_examples/y2022/binary_outcome_variable/plot.py 14 | outs: 15 | - images/y2022/binary_outcome_variable.png: 16 | cache: false 17 | wdir: . 18 | y2022_box_plot_w_scatter_distributions: 19 | cmd: poetry run python -m plotting_examples.y2022.box_plot_w_scatter_distributions.plot 20 | deps: 21 | - plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py 22 | outs: 23 | - images/y2022/box_plot_w_scatter_distributions.png: 24 | cache: false 25 | wdir: . 26 | y2022_default_plot: 27 | cmd: poetry run python -m plotting_examples.y2022.default_plot.plot 28 | deps: 29 | - plotting_examples/y2022/default_plot/plot.py 30 | outs: 31 | - images/y2022/default_plot.png: 32 | cache: false 33 | wdir: . 34 | y2022_histogram_with_two_variables: 35 | cmd: poetry run python -m plotting_examples.y2022.histogram_with_two_variables.plot 36 | deps: 37 | - plotting_examples/y2022/histogram_with_two_variables/plot.py 38 | outs: 39 | - images/y2022/histogram_with_two_variables.png: 40 | cache: false 41 | wdir: . 42 | y2022_line_plot_fill_between: 43 | cmd: poetry run python -m plotting_examples.y2022.line_plot_fill_between.plot 44 | deps: 45 | - plotting_examples/y2022/line_plot_fill_between/plot.py 46 | outs: 47 | - images/y2022/line_plot_fill_between.png: 48 | cache: false 49 | wdir: . 50 | y2022_meaningless_points: 51 | cmd: poetry run python -m plotting_examples.y2022.meaningless_points.plot 52 | deps: 53 | - plotting_examples/y2022/meaningless_points/plot.py 54 | outs: 55 | - images/y2022/meaningless_points.png: 56 | cache: false 57 | wdir: . 58 | y2022_opinium_barchart: 59 | cmd: poetry run python -m plotting_examples.y2022.opinium_barchart.plot 60 | deps: 61 | - plotting_examples/y2022/opinium_barchart/plot.py 62 | outs: 63 | - images/y2022/opinium_barchart.png: 64 | cache: false 65 | wdir: . 66 | y2022_pandas_stacked_bars_with_values: 67 | cmd: poetry run python -m plotting_examples.y2022.pandas_stacked_bars_with_values.plot 68 | deps: 69 | - plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py 70 | outs: 71 | - images/y2022/pandas_stacked_bars_with_values.png: 72 | cache: false 73 | wdir: . 74 | y2022_pos_neg_split_hbar: 75 | cmd: poetry run python -m plotting_examples.y2022.pos_neg_split_hbar.plot 76 | deps: 77 | - plotting_examples/y2022/pos_neg_split_hbar/plot.py 78 | outs: 79 | - images/y2022/pos_neg_split_hbar.png: 80 | cache: false 81 | wdir: . 82 | y2022_scatter_distributions: 83 | cmd: poetry run python -m plotting_examples.y2022.scatter_distributions.plot 84 | deps: 85 | - plotting_examples/y2022/scatter_distributions/plot.py 86 | outs: 87 | - images/y2022/scatter_distributions.png: 88 | cache: false 89 | wdir: . 90 | y2022_scatter_matrix_w_kde_on_diag: 91 | cmd: poetry run python -m plotting_examples.y2022.scatter_matrix_w_kde_on_diag.plot 92 | deps: 93 | - plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py 94 | outs: 95 | - images/y2022/scatter_matrix_w_kde_on_diag.png: 96 | cache: false 97 | wdir: . 98 | y2022_scatter_w_outlined_text_insert: 99 | cmd: poetry run python -m plotting_examples.y2022.scatter_w_outlined_text_insert.plot 100 | deps: 101 | - plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py 102 | outs: 103 | - images/y2022/scatter_w_outlined_text_insert.png: 104 | cache: false 105 | wdir: . 106 | y2022_sns_violin_plot_custom: 107 | cmd: poetry run python -m plotting_examples.y2022.sns_violin_plot_custom.plot 108 | deps: 109 | - plotting_examples/y2022/sns_violin_plot_custom/plot.py 110 | outs: 111 | - images/y2022/sns_violin_plot_custom.png: 112 | cache: false 113 | wdir: . 114 | y2022_split_x_axis_custom_legend: 115 | cmd: poetry run python -m plotting_examples.y2022.split_x_axis_custom_legend.plot 116 | deps: 117 | - plotting_examples/y2022/split_x_axis_custom_legend/plot.py 118 | outs: 119 | - images/y2022/split_x_axis_custom_legend.png: 120 | cache: false 121 | wdir: . 122 | y2022_stacked_bar_with_single_bars_layout: 123 | cmd: poetry run python -m plotting_examples.y2022.stacked_bar_with_single_bars_layout.plot 124 | deps: 125 | - plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py 126 | outs: 127 | - images/y2022/stacked_bar_with_single_bars_layout.png: 128 | cache: false 129 | wdir: . 130 | y2022_uk_hexmap: 131 | cmd: poetry run python -m plotting_examples.y2022.uk_hexmap.plot 132 | deps: 133 | - plotting_examples/y2022/uk_hexmap/plot.py 134 | outs: 135 | - images/y2022/uk_hexmap.png: 136 | cache: false 137 | wdir: . 138 | y2024_cat_weight: 139 | cmd: poetry run python -m plotting_examples.y2024.cat_weight.plot 140 | deps: 141 | - plotting_examples/y2024/cat_weight/plot.py 142 | outs: 143 | - images/y2024/cat_weight.png: 144 | cache: false 145 | wdir: . 146 | -------------------------------------------------------------------------------- /generate_readme.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate plots at the end of the README. 3 | 4 | Bit of a hack - but works for now, this is mainly just to display all the created plots 5 | in the README. 6 | """ 7 | 8 | from __future__ import annotations 9 | 10 | import ast 11 | import re 12 | from pathlib import Path 13 | 14 | from PIL import Image 15 | 16 | CODE = ( 17 | "https://github.com/geo7/plotting_examples/blob/main/plotting_examples/{}/plot.py" 18 | ) 19 | 20 | 21 | def resize_image_if_needed( 22 | *, 23 | im: str, 24 | ) -> None: 25 | """ 26 | Resize image to requred aspect ratio if needed. 27 | 28 | Given FIGSIZE (width, height) check to see if the aspect ratio (where 29 | aspect ratio = height/width) of the image file `im` matches that of 30 | FIGSIZE. If not then the image is resized to the correct dimensions in 31 | place, so the original is lost with this. 32 | 33 | Args: 34 | ---- 35 | im (str): 36 | Path to image file. 37 | FIGSIZE (tuple): 38 | Typically `figsize` tuple from `plt.subplots(figsize = FIGSIZE)`. 39 | 40 | """ 41 | image = Image.open(im) 42 | width, height = image.size 43 | 44 | m = 500 45 | if height > m: 46 | scale = m / height 47 | new_height = int(height * scale) 48 | new_width = int(width * scale) 49 | new_image = image.resize((new_width, new_height)) 50 | new_image.save(im) 51 | 52 | 53 | EXCLUDE_PLOTS = [ 54 | # This is just the template for starting a new plot off. 55 | "default_plot", 56 | # Got bored of seeing this one. 57 | "sns_violin_plot_custom", 58 | # This was was annoying as well - it's an example of creating a histogram from 59 | # scratch with patches which eh. 60 | "histogram_with_two_variables", 61 | ] 62 | 63 | 64 | def docstring_from_py_module(*, mod_path: str | Path) -> str: 65 | """ 66 | Docstrings in plot.py contain context about the plot. 67 | 68 | These are then used in the README. 69 | """ 70 | # with open(mod_path, encoding="utf8") as fh: 71 | with Path(mod_path).open() as fh: 72 | code_txt = fh.read() 73 | mod = ast.parse(code_txt) 74 | docstr = ast.get_docstring(mod) 75 | 76 | if docstr == "": 77 | msg = f"No docstring found for : {mod_path}" 78 | raise ValueError(msg) 79 | 80 | if docstr is None: 81 | msg = "Do not expect docstring to be None." 82 | raise ValueError(msg) 83 | 84 | return docstr 85 | 86 | 87 | def main() -> int: 88 | """Generate readme with plots and docstring extracts.""" 89 | year = "y2022" 90 | 91 | years = [ 92 | # This should get the years up to 2099... If I'm still using matplotlib 93 | # at that point I'll consider that a success.... or maybe a failure, 94 | # not sure. 95 | Path(x.name).stem 96 | for x in sorted(Path("./plotting_examples").glob("*")) 97 | if "y20" in str(x) 98 | ] 99 | 100 | readme_data = {} 101 | 102 | for year in years: 103 | # Will have to update this when there's a different year I guess but 104 | # for now meh. 105 | images = sorted(Path(f"./images/{year}").glob("*")) 106 | 107 | # For each image want to build up a dictionary of the image path within 108 | # the repo, and the docstring from the respective python module. Then 109 | # in the README the python docstring will be added alongside the image. 110 | for img in images: 111 | dir_from_img_path = Path(img.name).stem 112 | 113 | code_path = ( 114 | Path("./plotting_examples") / str(year) / dir_from_img_path / "plot.py" 115 | ) 116 | 117 | if "DS_Store" in str(code_path): 118 | continue 119 | 120 | # Not sure why this _wouldn't_ exist 121 | if not img.exists(): 122 | raise ValueError 123 | 124 | docstr = docstring_from_py_module(mod_path=code_path) 125 | 126 | readme_data[dir_from_img_path] = { 127 | "img_path": img, 128 | "doc_str": docstr, 129 | } 130 | 131 | # Might as well sort the generated plots. 132 | readme_data = { 133 | x: readme_data[x] 134 | for x in sorted(readme_data) 135 | if not any(exclude in x for exclude in EXCLUDE_PLOTS) 136 | } 137 | 138 | # Create values to append to readme. 139 | readme_update = "\n\n# Plots\n\n" 140 | 141 | # Create some bullet points with the plot names 142 | for title in readme_data: 143 | readme_update += f"* [`{title}`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#{title})\n" 144 | 145 | readme_update += "\n" 146 | 147 | for title, data in readme_data.items(): 148 | year = re.findall(r".*(y\d{4}).*", str(data["img_path"]))[0] 149 | readme_update += "\n\n" 150 | url_path = f"{year}/{title}" 151 | readme_update += f"## [`{title}`]({CODE.format(url_path)})\n\n" 152 | readme_update += str(data["doc_str"]) 153 | readme_update += "\n\n" 154 | md_img_format = f"![]({data['img_path']})" 155 | readme_update += md_img_format 156 | 157 | # Update README 158 | 159 | # This is used to signal where automated content starts. 160 | rm_split = "[comment]: # (Automate plots beneath this.)" 161 | with Path("README.md").open() as rm: 162 | rm_txt = rm.read() 163 | 164 | rm_txt = rm_txt.split(rm_split)[0] 165 | rm_txt = rm_txt + rm_split + readme_update 166 | # Ensure new line at eof 167 | rm_txt += "\n" 168 | 169 | with Path("README.md").open("w") as file: 170 | file.write(rm_txt) 171 | 172 | return 0 173 | 174 | 175 | if __name__ == "__main__": 176 | raise SystemExit(main()) 177 | -------------------------------------------------------------------------------- /images/y2022/bar_plot_w_custom_cmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/bar_plot_w_custom_cmap.png -------------------------------------------------------------------------------- /images/y2022/binary_outcome_variable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/binary_outcome_variable.png -------------------------------------------------------------------------------- /images/y2022/box_plot_w_scatter_distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/box_plot_w_scatter_distributions.png -------------------------------------------------------------------------------- /images/y2022/default_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/default_plot.png -------------------------------------------------------------------------------- /images/y2022/histogram_with_two_variables.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/histogram_with_two_variables.png -------------------------------------------------------------------------------- /images/y2022/line_plot_fill_between.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/line_plot_fill_between.png -------------------------------------------------------------------------------- /images/y2022/meaningless_points.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/meaningless_points.png -------------------------------------------------------------------------------- /images/y2022/opinium_barchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/opinium_barchart.png -------------------------------------------------------------------------------- /images/y2022/pandas_stacked_bars_with_values.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/pandas_stacked_bars_with_values.png -------------------------------------------------------------------------------- /images/y2022/pos_neg_split_hbar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/pos_neg_split_hbar.png -------------------------------------------------------------------------------- /images/y2022/scatter_distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/scatter_distributions.png -------------------------------------------------------------------------------- /images/y2022/scatter_matrix_w_kde_on_diag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/scatter_matrix_w_kde_on_diag.png -------------------------------------------------------------------------------- /images/y2022/scatter_w_outlined_text_insert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/scatter_w_outlined_text_insert.png -------------------------------------------------------------------------------- /images/y2022/sns_violin_plot_custom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/sns_violin_plot_custom.png -------------------------------------------------------------------------------- /images/y2022/split_x_axis_custom_legend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/split_x_axis_custom_legend.png -------------------------------------------------------------------------------- /images/y2022/stacked_bar_with_single_bars_layout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/stacked_bar_with_single_bars_layout.png -------------------------------------------------------------------------------- /images/y2022/uk_hexmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/uk_hexmap.png -------------------------------------------------------------------------------- /images/y2024/cat_weight.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2024/cat_weight.png -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | 3 | [mypy-numpy.*] 4 | ignore_missing_imports = True 5 | 6 | [mypy-pandas.*] 7 | ignore_missing_imports = True 8 | 9 | [mypy-geopandas.*] 10 | ignore_missing_imports = True 11 | 12 | [mypy-matplotlib.*] 13 | ignore_missing_imports = True 14 | 15 | [mypy-seaborn.*] 16 | ignore_missing_imports = True 17 | 18 | [mypy-PIL.*] 19 | ignore_missing_imports = True 20 | 21 | [mypy-yaml.*] 22 | ignore_missing_imports = True 23 | -------------------------------------------------------------------------------- /plotting_examples/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Plotting examples of various kinds. 3 | 4 | Some of these might be developed into little projects if they're interesting 5 | enough, main idea it that they're examples which can be leveraged for other 6 | work though. 7 | """ 8 | -------------------------------------------------------------------------------- /plotting_examples/dvc_entry.py: -------------------------------------------------------------------------------- 1 | """Create an entry in the dvc.yaml file for the particular plot.""" 2 | 3 | from __future__ import annotations 4 | 5 | import pathlib 6 | 7 | import yaml 8 | 9 | from plotting_examples.extract_year_name import extract_year_name_from_plot_py 10 | 11 | 12 | def add_to_dvc(*, path: pathlib.Path) -> None: 13 | """Add stages to dvc.yaml based on given path.""" 14 | year, name = extract_year_name_from_plot_py(file=str(path)) 15 | 16 | dvc = yaml.safe_load(pathlib.Path("dvc.yaml").read_text(encoding="utf8")) 17 | 18 | stage_name = f"{year}_{name}" 19 | if stage_name not in dvc["stages"]: 20 | # Project not yet added to dvc.yaml 21 | dvc["stages"][stage_name] = { 22 | "wdir": ".", 23 | "cmd": f"poetry run python -m plotting_examples.{year}.{name}.plot", 24 | "deps": [f"plotting_examples/{year}/{name}/plot.py"], 25 | "outs": [{f"images/{year}/{name}.png": {"cache": False}}], 26 | } 27 | 28 | with pathlib.Path("dvc.yaml").open("w") as file: 29 | file.write(yaml.dump(dvc)) 30 | -------------------------------------------------------------------------------- /plotting_examples/extract_year_name.py: -------------------------------------------------------------------------------- 1 | """Get year, name from path to plotting file.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | 7 | 8 | def extract_year_name_from_plot_py(*, file: str) -> tuple[str, str]: 9 | """ 10 | Given a path such as. 11 | 12 | >>> /home/.../plotting_examples/plotting_examples/y2022/default_plot/plot.py 13 | 14 | Return: 15 | ------ 16 | >>> 2022, default_plot 17 | 18 | """ 19 | pth = Path(file) 20 | if pth.suffix != ".py": 21 | msg = "Expect this to be run on .py files." 22 | raise ValueError(msg) 23 | year, name = ( 24 | str(pth) 25 | .rsplit("plotting_examples/plotting_examples/", maxsplit=1)[-1] 26 | .rsplit("/", maxsplit=1)[0] 27 | .split("/") 28 | ) 29 | return year, name 30 | -------------------------------------------------------------------------------- /plotting_examples/save_plot_output.py: -------------------------------------------------------------------------------- 1 | """Util for saving output from plots.""" 2 | 3 | from __future__ import annotations 4 | 5 | from pathlib import Path 6 | from typing import TYPE_CHECKING 7 | 8 | from plotting_examples.extract_year_name import extract_year_name_from_plot_py 9 | 10 | if TYPE_CHECKING: 11 | import matplotlib as mpl 12 | 13 | 14 | def save_plot( 15 | *, 16 | fig: mpl.figure.Figure, 17 | file: str, 18 | dpi: int = 150, 19 | ) -> None: 20 | """Util for saving plot to images dir.""" 21 | year, name = extract_year_name_from_plot_py(file=file) 22 | 23 | year_dir = Path("./images") / year 24 | # If the dir doesn't exist we need to make it... 25 | if not year_dir.exists(): 26 | year_dir.mkdir(exist_ok=False, parents=False) 27 | 28 | png_pth = year_dir / (name + ".png") 29 | fig.savefig(png_pth, dpi=dpi) 30 | -------------------------------------------------------------------------------- /plotting_examples/y2022/__init__.py: -------------------------------------------------------------------------------- 1 | """Plots from 2022.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/bar_plot_w_custom_cmap/__init__.py: -------------------------------------------------------------------------------- 1 | """Bar plot with custom color map.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Bar plot with custom cmap. 4 | 5 | Based on this tweet: https://twitter.com/ryanburge/status/1505602885215834112 - wanted 6 | to create something with a similar effect using mpl. 7 | 8 | Example of: 9 | 10 | - Different font types (using monospace font) 11 | - using different colours for bars depending on their values (custom cmap). 12 | - padding around the axis using rc parameters 13 | """ 14 | 15 | from __future__ import annotations 16 | 17 | import pathlib 18 | 19 | import matplotlib as mpl 20 | import matplotlib.pyplot as plt 21 | import matplotlib.ticker as plticker 22 | import numpy as np 23 | import pandas as pd 24 | 25 | from plotting_examples import dvc_entry, save_plot_output 26 | from plotting_examples.y2022 import metadata 27 | 28 | np_rnd = np.random.Generator(np.random.MT19937(0)) 29 | 30 | 31 | def generate_data() -> pd.DataFrame: 32 | """Create sample data.""" 33 | n = 1_000 34 | return pd.DataFrame( 35 | { 36 | "race": np_rnd.choice( 37 | ["White", "Black", "Hispanic", "Asian", "All Others"], 38 | size=n, 39 | ), 40 | "church_attendance": np_rnd.choice( 41 | ["Never", "Seldom", "Yearly", "Monthly", "Weekly", "Weekly+"], 42 | size=n, 43 | p=[ 44 | 0.1, 45 | 0.1, 46 | 0.1, 47 | 0.15, 48 | 0.25, 49 | 0.3, 50 | ], 51 | ), 52 | }, 53 | ).sort_values("race") 54 | 55 | 56 | def main() -> mpl.figure.Figure: 57 | """Main.""" 58 | data = generate_data() 59 | 60 | ordering = [ 61 | "Never", 62 | "Seldom", 63 | "Yearly", 64 | "Monthly", 65 | "Weekly", 66 | "Weekly+", 67 | ] 68 | 69 | loc = plticker.MultipleLocator( 70 | base=20.0, 71 | ) # this locator puts ticks at regular intervals 72 | 73 | with plt.rc_context( 74 | { 75 | "xtick.major.pad": 20, 76 | "font.family": "monospace", 77 | }, 78 | ): 79 | fig, axis = plt.subplots( 80 | figsize=(30, 12), 81 | ncols=3, 82 | nrows=2, 83 | sharey=True, 84 | constrained_layout=False, 85 | ) 86 | fig.tight_layout(h_pad=10, w_pad=10) 87 | 88 | axis = axis.flatten() 89 | 90 | # Style plots. 91 | for ax in axis: 92 | ax.grid(alpha=0.2, zorder=0) 93 | for x in ["top", "right", "left", "bottom"]: 94 | ax.spines[x].set_visible(False) 95 | ax.tick_params(axis="both", which="both", length=0, labelsize=18) 96 | 97 | fig.suptitle( 98 | "The Relationship Between Church Attendence and a Republican Vote by Race", 99 | fontsize=30, 100 | y=1.1, 101 | x=0.0, 102 | horizontalalignment="left", 103 | ) 104 | # needs mpl version >= 3.4 105 | fig.supylabel( 106 | "Vote for Trump in 2020", 107 | fontsize=25, 108 | x=-0.02, 109 | ) 110 | 111 | axis = iter(axis) 112 | 113 | for g, dfg in data.groupby("race"): 114 | color_map = mpl.colormaps["cool"].resampled(100) 115 | 116 | ax = next(axis) 117 | ax.yaxis.set_major_locator(loc) 118 | group_bar_values_unordered = ( 119 | dfg["church_attendance"].value_counts().to_dict() 120 | ) 121 | group_bar_values = {x: group_bar_values_unordered[x] for x in ordering} 122 | 123 | barplot = ax.bar( 124 | x=list(group_bar_values.keys()), 125 | height=list(group_bar_values.values()), 126 | zorder=3, 127 | ) 128 | ax.set_title(g, fontsize=25, y=1.0) 129 | ax.set_ylim(bottom=0, top=90) 130 | ax.set_yticks([], minor=True) 131 | 132 | def fmt(x: float, _pos: int) -> str: 133 | # Not _too_ sure what this is about - think it's just what 134 | # set_major_formatter applies? It passes two arguments though - the 135 | # tick value (x) and the position (pos)... 136 | return f"{int(x)}" 137 | 138 | ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(fmt)) 139 | 140 | for bar in barplot: 141 | # Set the bar color by bar height. 142 | bar.set_color(color_map(bar.get_height())) 143 | ax.text( 144 | x=bar.get_x() + 0.5 * (bar.get_width()), 145 | y=bar.get_y() + 2.5, 146 | s=f"{bar.get_height()}%", 147 | fontsize=20, 148 | ha="center", 149 | ) 150 | ax.vlines( 151 | x=bar.get_x() + 0.5 * (bar.get_width()), 152 | ymin=bar.get_height() - 5, 153 | ymax=bar.get_height() + 5, 154 | linewidth=4, 155 | zorder=5, 156 | color="#404040", 157 | ) 158 | ax.hlines( 159 | y=bar.get_height() - 5, 160 | xmin=(bar.get_x() + 0.5 * (bar.get_width())) - 0.1, 161 | xmax=(bar.get_x() + 0.5 * (bar.get_width())) + 0.1, 162 | zorder=5, 163 | linewidth=4, 164 | color="#404040", 165 | ) 166 | ax.hlines( 167 | y=bar.get_height() + 5, 168 | xmin=(bar.get_x() + 0.5 * (bar.get_width())) - 0.1, 169 | xmax=(bar.get_x() + 0.5 * (bar.get_width())) + 0.1, 170 | zorder=5, 171 | linewidth=4, 172 | color="#404040", 173 | ) 174 | 175 | ax.tick_params(axis="y", colors="grey") 176 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 177 | 178 | # Just format the final plot - it's blank - to just get rid of all plot params 179 | # here. If there was more than one would need to handle a bit differently here. 180 | ax = next(axis) 181 | ax.grid(alpha=0) 182 | ax.set_xticks([]) 183 | for x in ["top", "right", "left", "bottom"]: 184 | ax.spines[x].set_visible(False) 185 | 186 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 187 | 188 | fig.set_tight_layout(True) # type: ignore[attr-defined] 189 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 190 | return fig 191 | 192 | 193 | if __name__ == "__main__": 194 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 195 | save_plot_output.save_plot(fig=main(), file=__file__) 196 | raise SystemExit 197 | -------------------------------------------------------------------------------- /plotting_examples/y2022/binary_outcome_variable/__init__.py: -------------------------------------------------------------------------------- 1 | """Binary outcome plot.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/binary_outcome_variable/data.csv: -------------------------------------------------------------------------------- 1 | ,x,y 2 | 0,1.2747902159889748,1.0 3 | 1,0.10622657239729572,1.0 4 | 2,1.3243067956082084,0.0 5 | 3,2.264920571643043,1.0 6 | 4,1.525740164619661,1.0 7 | 5,1.0213314975629986,0.0 8 | 6,-2.0868721399805117,0.0 9 | 7,3.087253476354297,1.0 10 | 8,1.6930945436063571,0.0 11 | 9,1.177730197367664,0.0 12 | 10,-0.2951726334842677,1.0 13 | 11,1.3939955116958194,0.0 14 | 12,2.710887234482275,1.0 15 | 13,2.288566627212669,1.0 16 | 14,1.0063834913479486,1.0 17 | 15,0.40506607468755984,0.0 18 | 16,1.8317978988489252,1.0 19 | 17,-0.09961820872700582,0.0 20 | 18,0.8546664554219493,0.0 21 | 19,2.59869944125502,1.0 22 | 20,3.6930255818250184,1.0 23 | 21,3.2351943407464074,1.0 24 | 22,2.7795466504470148,1.0 25 | 23,-1.5141989332951926,1.0 26 | 24,-1.3342798893336982,1.0 27 | 25,1.3894518047828301,0.0 28 | 26,4.124574072040192,1.0 29 | 27,1.5275761412076085,1.0 30 | 28,3.0920328512031645,1.0 31 | 29,-0.6103591162672523,0.0 32 | 30,1.5711361222591333,1.0 33 | 31,0.3365630598553707,0.0 34 | 32,1.7740841144550268,1.0 35 | 33,-0.4240492991525555,0.0 36 | 34,1.9765103446881005,1.0 37 | 35,0.6875731648825243,0.0 38 | 36,0.48075160343621204,1.0 39 | 37,1.7917034287025366,1.0 40 | 38,0.29649576518401316,0.0 41 | 39,-0.20568700623329317,0.0 42 | 40,2.2529655667205586,1.0 43 | 41,-0.6138512476336486,0.0 44 | 42,1.6662716460969933,0.0 45 | 43,5.607955686672451,1.0 46 | 44,-1.7178950568217535,0.0 47 | 45,-1.5541952161418995,0.0 48 | 46,0.2149985878325868,0.0 49 | 47,2.2745722013953555,1.0 50 | 48,3.6688248016156075,1.0 51 | 49,2.888755806801737,1.0 52 | 50,3.892428495587999,1.0 53 | 51,1.8273090056796961,0.0 54 | 52,2.1998739159436442,1.0 55 | 53,2.1568554817879733,1.0 56 | 54,3.348012018831957,1.0 57 | 55,2.4655510026032945,1.0 58 | 56,-0.5665483983904225,0.0 59 | 57,-0.23387727182304746,0.0 60 | 58,2.7533905386624946,1.0 61 | 59,2.454163346936323,1.0 62 | 60,1.9106009445188148,1.0 63 | 61,-0.562556123742248,0.0 64 | 62,2.4476866391258056,1.0 65 | 63,-0.4973108111772468,0.0 66 | 64,0.7416802995622478,0.0 67 | 65,3.1769629339911583,1.0 68 | 66,-0.29643406638083747,0.0 69 | 67,-0.019125121483606716,0.0 70 | 68,0.543765521085082,0.0 71 | 69,3.2595168518283213,1.0 72 | 70,1.5714071630266757,1.0 73 | 71,-0.17876891111483648,0.0 74 | 72,-0.26221130572986856,0.0 75 | 73,-1.3534058345926328,0.0 76 | 74,0.9989509035674422,1.0 77 | 75,-2.2716152798398235,0.0 78 | 76,-0.8269491136353684,0.0 79 | 77,-0.7879429469570461,1.0 80 | 78,0.467851762636354,0.0 81 | 79,-1.4089215315339054,0.0 82 | 80,0.8116493521291177,0.0 83 | 81,-0.1801822831261996,0.0 84 | 82,2.6892138693830745,1.0 85 | 83,1.2602001853572777,1.0 86 | 84,3.7686379048258347,1.0 87 | 85,1.2021253923467357,1.0 88 | 86,0.03204825943738432,1.0 89 | 87,-1.5613869496540094,0.0 90 | 88,3.5978005775339157,1.0 91 | 89,0.2904121858658958,1.0 92 | 90,0.18389993640789415,1.0 93 | 91,-1.844690371525563,0.0 94 | 92,0.6033888587207837,0.0 95 | 93,4.471774864677858,1.0 96 | 94,1.9050254474289685,1.0 97 | 95,-0.41139165393189336,1.0 98 | 96,1.7644022575097518,1.0 99 | 97,0.685763582724977,0.0 100 | 98,-0.905286178158965,0.0 101 | 99,3.775046959012724,1.0 102 | -------------------------------------------------------------------------------- /plotting_examples/y2022/binary_outcome_variable/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Plot dichotomous variable. 4 | 5 | Simple dots with median lines - might be nice to add a kde to this as well. 6 | 7 | The y-axis is redundant here as there are only two options (`0.6` doesn't make any 8 | sense). 9 | """ 10 | 11 | from __future__ import annotations 12 | 13 | import pathlib 14 | 15 | import matplotlib as mpl 16 | import matplotlib.pyplot as plt 17 | import numpy as np 18 | import pandas as pd 19 | 20 | from plotting_examples import dvc_entry, save_plot_output 21 | from plotting_examples.y2022 import metadata 22 | 23 | np_rnd = np.random.Generator(np.random.MT19937(1)) 24 | 25 | 26 | def make_data() -> pd.DataFrame: 27 | """Generate some sample data for testing with.""" 28 | n = 1_00 29 | y = np_rnd.choice([0, 1], n) 30 | x = np_rnd.normal(0, 1, n) + np_rnd.normal(2, 1, n) * y 31 | return pd.DataFrame(np.array([x, y]).T, columns=["x", "y"]) 32 | 33 | 34 | def binary_outcome_plot( 35 | data: pd.DataFrame, 36 | x_var: str = "x", 37 | y: str = "y", 38 | fig: mpl.figure.Figure | None = None, 39 | ) -> mpl.figure.Figure: 40 | """ 41 | Create plot of continuous var by binary outcome. 42 | 43 | This is just pulled straight from a notebook so is pretty loose. Could improve the 44 | typing of this function, as well as it's name, and the use of mpl objects within 45 | it. 46 | """ 47 | # if ax is None: 48 | fig, ax = plt.subplots(figsize=(20, 3)) 49 | 50 | colors = { 51 | 0: metadata.color.PINK_COLOUR, 52 | 1: metadata.color.DEEPER_GREEN, 53 | } 54 | for g_, dfg in data.groupby([y]): 55 | if len(g_) != 1: 56 | msg = "Expect these to all be single?" 57 | raise ValueError(msg, g_) 58 | g = g_[0] 59 | ax.scatter( 60 | x=dfg[x_var], 61 | y=dfg[y], 62 | color=colors[g], 63 | ) 64 | 65 | med = dfg[x_var].median() 66 | ax.scatter( 67 | x=med, 68 | y=g, 69 | s=90, 70 | color=colors[g], 71 | ) 72 | ax.vlines( 73 | x=med, 74 | ymin=min(g, 0.5), 75 | ymax=max(g, 0.5), 76 | color=colors[g], 77 | ) 78 | 79 | ax.text( 80 | x=med + 0.5, 81 | y=abs(g - 0.15), 82 | s=f"Median {g} : {round(med,2)}", 83 | fontsize=15, 84 | ) 85 | ax.set_title( 86 | f"{x_var} x {y}", 87 | fontsize=20, 88 | ) 89 | ax.grid(alpha=0.2) 90 | 91 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 92 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 93 | return fig 94 | 95 | 96 | def main() -> mpl.figure.Figure: 97 | """Plot.""" 98 | with plt.rc_context( 99 | { 100 | "xtick.major.pad": 10, 101 | "font.family": "monospace", 102 | }, 103 | ): 104 | fig = binary_outcome_plot(data=make_data()) 105 | fig.set_tight_layout(True) # type: ignore[attr-defined] 106 | 107 | return fig 108 | 109 | 110 | if __name__ == "__main__": 111 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 112 | save_plot_output.save_plot(fig=main(), file=__file__) 113 | raise SystemExit 114 | -------------------------------------------------------------------------------- /plotting_examples/y2022/box_plot_w_scatter_distributions/__init__.py: -------------------------------------------------------------------------------- 1 | """Box plot with scatter dist.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/box_plot_w_scatter_distributions/data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/box_plot_w_scatter_distributions/data.parquet -------------------------------------------------------------------------------- /plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code,too-many-locals 2 | """ 3 | Bar plot with distributions. 4 | 5 | Thought I'd create a bar plot with scatter plots of the distributions adjacent to the 6 | bars, it was based off something else but I can't remember what. Bar plots are created 7 | from scratch using hlines etc, for no particular reason. 8 | 9 | Data was from tidy tuesday. 10 | """ 11 | 12 | from __future__ import annotations 13 | 14 | import pathlib 15 | from typing import TypeVar 16 | 17 | import attr 18 | import matplotlib as mpl 19 | import matplotlib.pyplot as plt 20 | import numpy as np 21 | import pandas as pd 22 | 23 | from plotting_examples import dvc_entry, save_plot_output 24 | from plotting_examples.y2022 import metadata 25 | 26 | np_rnd = np.random.Generator(np.random.MT19937(1)) 27 | 28 | T = TypeVar("T") 29 | 30 | WEEK = "week42" 31 | 32 | DATA_URL = ( 33 | "https://raw.githubusercontent.com/rfordatascience/tidytuesday/" 34 | "master/data/2021/2021-10-19/pumpkins.csv" 35 | ) 36 | 37 | BACKGROUND_COLOUR = "#f2f2f2" 38 | # https://mycolor.space/?hex=%23FF69B4&sub=1 39 | PINK_COLOUR = "#ff69b4" 40 | LABEL_FONTSIZE = 12 41 | 42 | 43 | def clean_comma(df: pd.DataFrame, *, column: str) -> pd.DataFrame: 44 | """Replace commas in series with empty strings.""" 45 | df = df.copy() 46 | row_mask = df[column].astype(str).str.contains(",") 47 | df.loc[row_mask, column] = ( 48 | df.loc[row_mask, column].str.replace(",", "").astype(float) 49 | ) 50 | return df 51 | 52 | 53 | def drop_rows_by_match_on_column( 54 | df: pd.DataFrame, 55 | *, 56 | column: str, 57 | regexp: str, 58 | ) -> pd.DataFrame: 59 | """Drop rows based on regex on a particular column.""" 60 | df = df.copy() 61 | return df.loc[~df[column].astype(str).str.contains(regexp, regex=True)] 62 | 63 | 64 | def top_n_groups( 65 | df: pd.DataFrame, 66 | *, 67 | column: str, 68 | n: int, 69 | rename: str = "Other", 70 | ) -> pd.DataFrame: 71 | """Get top n groups for a given column, re-write rest to other.""" 72 | df = df.copy() 73 | top_n = df[column].value_counts(dropna=False).head(n).index 74 | df.loc[~df[column].isin(top_n), column] = rename 75 | return df 76 | 77 | 78 | def clean( 79 | *, 80 | df: pd.DataFrame, 81 | ) -> pd.DataFrame: 82 | """Initial cleaning for all columns.""" 83 | df = df.copy() 84 | return ( 85 | df.pipe( 86 | drop_rows_by_match_on_column, 87 | column="country", 88 | regexp=".*Entries.*", 89 | ) 90 | .pipe(clean_comma, column="weight_lbs") 91 | .assign(weight_lbs=lambda x: x["weight_lbs"].astype(float)) 92 | .pipe(clean_comma, column="est_weight") 93 | .assign(est_weight=lambda x: x["est_weight"].astype(float)) 94 | .assign(ott=lambda x: x["ott"].astype(float)) 95 | .assign(pct_chart=lambda x: x["pct_chart"].astype(float)) 96 | ) 97 | 98 | 99 | @attr.frozen(kw_only=True) 100 | class PlotData: 101 | # pylint: disable=too-few-public-methods 102 | 103 | """Data for use in both box and scatter plotting.""" 104 | 105 | box: pd.DataFrame 106 | scatter: list[float] 107 | 108 | 109 | def plot_data_for_weight_by_country(df: pd.DataFrame) -> pd.DataFrame: 110 | """Generate plot data.""" 111 | df = df.copy() 112 | df = top_n_groups(df=df, column="country", n=9, rename="Other") 113 | df = df[["country", "weight_lbs"]] 114 | df = pd.concat( 115 | [ 116 | df[["country", "weight_lbs"]], 117 | df[["country", "weight_lbs"]].assign(country="All Countries"), 118 | ], 119 | axis=0, 120 | ) 121 | # we want to order the countries by the median of the weights for each group. 122 | sorting = list( 123 | df.groupby("country")["weight_lbs"].median().sort_values().index, 124 | ) 125 | df = df.iloc[pd.Categorical(df["country"], sorting).argsort()] 126 | 127 | country_data = {} 128 | for g, dfg in df.groupby("country"): 129 | country_data[g] = PlotData( 130 | box=dfg.describe(), 131 | scatter=dfg["weight_lbs"].to_list(), 132 | ) 133 | return country_data 134 | 135 | 136 | # -------------------------------------------------------------------------------------- 137 | 138 | # PLOTTING METHODS 139 | 140 | 141 | def top_bottom_whisker_y_values(*, values: list[float]) -> tuple[float, float]: 142 | """Get top/bottom for boxplot whiskers.""" 143 | series = pd.Series(values) 144 | quant_1 = series.describe().get("25%") 145 | quant_3 = series.describe().get("75%") 146 | iqr = quant_3 - quant_1 147 | top_range = quant_3 + 1.5 * (iqr) 148 | bottom_range = quant_3 - 1.5 * iqr 149 | # top of the boxplot 150 | box_plot_top = series[series.lt(top_range)].max() 151 | # bottom of the box_plot 152 | box_plot_bottom = series[series.gt(bottom_range)].min() 153 | return box_plot_bottom, box_plot_top 154 | 155 | 156 | def boxp_hline( 157 | *, 158 | ax: plt.Axes, # type: ignore[name-defined] 159 | x_center: float, 160 | y_value: float, 161 | box_width: float, 162 | linewidth: float, 163 | box_colour: str, 164 | ) -> None: 165 | """Plot top/bottom of box.""" 166 | ax.hlines( 167 | y=y_value, 168 | xmin=x_center - box_width * 0.5, 169 | xmax=x_center + box_width * 0.5, 170 | linewidth=linewidth, 171 | color=box_colour, 172 | zorder=3, 173 | capstyle="round", 174 | ) 175 | 176 | 177 | def boxp_vline( 178 | ax: plt.Axes, # type: ignore[name-defined] 179 | x: float, 180 | ymin: float, 181 | ymax: float, 182 | color: str, 183 | linewidth: float, 184 | ) -> None: 185 | """Plot sides of box.""" 186 | ax.vlines( 187 | x=x, 188 | ymin=ymin, 189 | ymax=ymax, 190 | color=color, 191 | linewidth=linewidth, 192 | zorder=3, 193 | capstyle="round", 194 | ) 195 | 196 | 197 | def whisker_tops( 198 | *, 199 | ax: plt.Axes, # type: ignore[name-defined] 200 | whisker_top: float, 201 | whisker_bottom: float, 202 | xmin: float, 203 | xmax: float, 204 | color: str, 205 | ) -> None: 206 | """Plot tops of the whiskers.""" 207 | ax.hlines( 208 | y=whisker_top, 209 | xmin=xmin, 210 | xmax=xmax, 211 | color=color, 212 | zorder=1, 213 | ) 214 | ax.hlines( 215 | y=whisker_bottom, 216 | xmin=xmin, 217 | xmax=xmax, 218 | color=color, 219 | zorder=1, 220 | ) 221 | 222 | 223 | def make_single_box( 224 | *, 225 | ax: plt.Axes, # type: ignore[name-defined] 226 | values: list[float], 227 | x_center: float, 228 | scatter_color: str, 229 | linewidth: float = 5, 230 | box_width: float = 0.14, 231 | box_colour: str = "#000000", 232 | whisker_color: str = "#000000", 233 | median_colour: str = "#000000", 234 | outlier_colour: str = "#000000", 235 | ) -> None: 236 | """Add boxplot to given axis.""" 237 | plotting_data = pd.Series(values).describe().to_dict() 238 | 239 | # ---------------------------------------------------------------------------------- 240 | # create the box - there's not _really_ any reason for this other than being 241 | # curious at the time about creating a boxplot from scratch... it'd be a better 242 | # idea i think to just create a rectangle instead. 243 | boxp_hline( 244 | ax=ax, 245 | x_center=x_center, 246 | y_value=plotting_data["25%"], 247 | box_width=box_width, 248 | linewidth=linewidth, 249 | box_colour=box_colour, 250 | ) 251 | boxp_hline( 252 | ax=ax, 253 | x_center=x_center, 254 | y_value=plotting_data["75%"], 255 | box_width=box_width, 256 | linewidth=linewidth, 257 | box_colour=box_colour, 258 | ) 259 | boxp_vline( 260 | ax=ax, 261 | x=x_center + box_width * 0.5, 262 | ymin=plotting_data["25%"], 263 | ymax=plotting_data["75%"], 264 | color=box_colour, 265 | linewidth=linewidth, 266 | ) 267 | boxp_vline( 268 | ax=ax, 269 | x=x_center - box_width * 0.5, 270 | ymin=plotting_data["25%"], 271 | ymax=plotting_data["75%"], 272 | color=box_colour, 273 | linewidth=linewidth, 274 | ) 275 | 276 | # ---------------------------------------------------------------------------------- 277 | # create the median line 278 | 279 | ax.hlines( 280 | y=plotting_data["50%"], 281 | xmin=x_center - box_width * 0.5, 282 | xmax=x_center + box_width * 0.5, 283 | color=median_colour, 284 | zorder=1, 285 | linewidth=linewidth, 286 | ) 287 | 288 | # ---------------------------------------------------------------------------------- 289 | # create top/bottom of whiskers 290 | whisker_bottom, whisker_top = top_bottom_whisker_y_values(values=values) 291 | 292 | # ---------------------------------------------------------------------------------- 293 | # plot vertial whisker lines 294 | 295 | # create vertical lines 296 | ax.vlines( 297 | x=x_center, 298 | ymin=plotting_data["75%"], 299 | ymax=whisker_top, 300 | color=whisker_color, 301 | capstyle="round", 302 | ) 303 | ax.vlines( 304 | x=x_center, 305 | ymin=plotting_data["25%"], 306 | ymax=whisker_bottom, 307 | color=whisker_color, 308 | capstyle="round", 309 | ) 310 | 311 | # ---------------------------------------------------------------------------------- 312 | # plot the outliers 313 | 314 | # plot outliers 315 | series = pd.Series(values) 316 | outliers = series[series.lt(whisker_bottom) | series.gt(whisker_top)] 317 | ax.scatter( 318 | x=[x_center for _ in outliers], 319 | y=list(outliers), 320 | color=outlier_colour, 321 | s=5, 322 | alpha=0.8, 323 | edgecolors=None, 324 | ) 325 | 326 | # ---------------------------------------------------------------------------------- 327 | # plot the scatter of values 328 | 329 | x_values = np_rnd.normal( 330 | loc=x_center + 0.2, 331 | scale=0.03, 332 | size=len(values), 333 | ) 334 | ax.scatter( 335 | x=x_values, 336 | y=values, 337 | alpha=0.1, 338 | s=10, 339 | zorder=-1, 340 | color=scatter_color, 341 | edgecolors=None, 342 | ) 343 | 344 | 345 | def example(*, df: pd.DataFrame) -> mpl.figure.Figure: 346 | """Generate example plot.""" 347 | country_data = plot_data_for_weight_by_country(df=df) 348 | fig, ax = plt.subplots(figsize=(20, 8)) 349 | 350 | country_metadata: dict[str, dict[str, str]] = { 351 | "France": {}, 352 | "Japan": {}, 353 | "Canada": {}, 354 | "Germany": {}, 355 | "United Kingdom": {}, 356 | "Italy": {}, 357 | "United States": {}, 358 | "Austria": {}, 359 | "Belgium": {}, 360 | "Other": {"scatter_color": "#919191"}, 361 | "All Countries": {"scatter_color": "#919191"}, 362 | } 363 | xpos = 1.0 364 | xpos_inc = 0.5 365 | 366 | for country in country_metadata: 367 | data = country_data[country] 368 | color = "#919191" if country in ["Other", "All Countries"] else PINK_COLOUR 369 | make_single_box( 370 | ax=ax, 371 | values=data.scatter, 372 | x_center=xpos, 373 | linewidth=1.5, 374 | scatter_color=color, 375 | outlier_colour="#000000", 376 | ) 377 | xpos += xpos_inc 378 | 379 | # ---------------------------------------------------------------------------------- 380 | # format tick labels 381 | 382 | ax.set_xticks(np.arange(1, xpos, xpos_inc)) 383 | ax.set_xticklabels(list(country_metadata.keys())) 384 | 385 | # ---------------------------------------------------------------------------------- 386 | # plot formatting / spines / background. 387 | 388 | ax.tick_params(axis="both", which="both", length=0) 389 | ax.spines["top"].set_visible(False) 390 | ax.spines["right"].set_visible(False) 391 | ax.spines["left"].set_visible(False) 392 | ax.spines["bottom"].set_visible(False) 393 | 394 | ax.grid(alpha=0.15, axis="y", zorder=0) 395 | 396 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 397 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 398 | 399 | # ---------------------------------------------------------------------------------- 400 | # titles and axis labels 401 | ax.set_title( 402 | "This is something about pumpkin competitions or something like that.", 403 | color="#919191", 404 | fontsize=LABEL_FONTSIZE, 405 | ) 406 | fig.suptitle( 407 | "Data Visualization of Competitive Pumpkin Sport 2013-2021", 408 | fontsize=20, 409 | ) 410 | ax.set_ylabel("Weight lbs", fontsize=LABEL_FONTSIZE) 411 | ax.yaxis.set_label_coords(-0.05, 0.5) 412 | return fig 413 | 414 | 415 | def main() -> mpl.figure.Figure: 416 | """Main.""" 417 | df = pd.read_parquet(pathlib.Path(__file__).parent / "data.parquet") 418 | df = clean(df=df) 419 | 420 | with plt.rc_context( 421 | { 422 | "xtick.major.pad": 10, 423 | "font.family": "monospace", 424 | }, 425 | ): 426 | return example(df=df) 427 | 428 | 429 | if __name__ == "__main__": 430 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 431 | save_plot_output.save_plot(fig=main(), file=__file__) 432 | raise SystemExit 433 | -------------------------------------------------------------------------------- /plotting_examples/y2022/default_plot/__init__.py: -------------------------------------------------------------------------------- 1 | """Default plot.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/default_plot/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """Default for plotting example - just to base others off.""" 3 | 4 | from __future__ import annotations 5 | 6 | import pathlib 7 | 8 | import matplotlib as mpl 9 | import matplotlib.pyplot as plt 10 | import pandas as pd 11 | 12 | from plotting_examples import dvc_entry, save_plot_output 13 | from plotting_examples.y2022 import metadata 14 | 15 | 16 | def get_sample_data() -> pd.DataFrame: 17 | """Sample data.""" 18 | return pd.DataFrame( 19 | { 20 | "x": [1, 2, 3, 4, 5], 21 | "y": [1, 2, 2, 3, 8], 22 | }, 23 | ) 24 | 25 | 26 | def main() -> mpl.figure.Figure: 27 | """Main.""" 28 | with plt.rc_context( 29 | { 30 | "xtick.major.pad": 10, 31 | "font.family": "monospace", 32 | }, 33 | ): 34 | fig, ax = plt.subplots( 35 | figsize=(10, 10), 36 | ) 37 | df = get_sample_data() 38 | 39 | ax.scatter(x=df["x"], y=df["y"]) 40 | ax.set_title("Default plotting.") 41 | 42 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 43 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 44 | return fig 45 | 46 | 47 | if __name__ == "__main__": 48 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 49 | save_plot_output.save_plot(fig=main(), file=__file__) 50 | raise SystemExit 51 | -------------------------------------------------------------------------------- /plotting_examples/y2022/histogram_with_two_variables/__init__.py: -------------------------------------------------------------------------------- 1 | """Histogram with overlap.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/histogram_with_two_variables/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Histogram created from scratch using matplotlib. 4 | 5 | There are custom bar's created for each bin, instead of using ax.bar, I think it was 6 | originally based on something but i can't find the original / reference now so am just 7 | left with this. 8 | 9 | The result is pretty rubbish :) 10 | """ 11 | 12 | from __future__ import annotations 13 | 14 | import pathlib 15 | 16 | import matplotlib as mpl 17 | import matplotlib.pyplot as plt 18 | import numpy as np 19 | import pandas as pd 20 | from matplotlib import patches, ticker 21 | 22 | from plotting_examples import dvc_entry, save_plot_output 23 | from plotting_examples.y2022 import metadata 24 | 25 | 26 | def gen_data() -> tuple[pd.DataFrame, dict[str, str]]: 27 | """ 28 | Generate sample data for plotting. 29 | 30 | Return data as: 31 | 32 | >>> male female row_min row_max color pain_scale 33 | >>> 0 6.8 0.8 0.8 6.8 #9A7AA0 1 34 | >>> 1 10.7 1.0 1.0 10.7 #9A7AA0 2 35 | >>> 2 14.8 4.3 4.3 14.8 #9A7AA0 3 36 | >>> 3 18.9 10.5 10.5 18.9 #9A7AA0 4 37 | >>> 4 19.3 14.0 14.0 19.3 #9A7AA0 5 38 | >>> 5 16.9 19.9 16.9 19.9 #B4EDD2 6 39 | >>> 6 6.8 16.6 6.8 16.6 #B4EDD2 7 40 | >>> 7 3.9 16.2 3.9 16.2 #B4EDD2 8 41 | >>> 8 1.3 9.3 1.3 9.3 #B4EDD2 9 42 | >>> 9 0.6 7.4 0.6 7.4 #B4EDD2 10 43 | 44 | """ 45 | rng = np.random.default_rng(1) 46 | n = 1_000 47 | df = pd.DataFrame( 48 | { 49 | "male": np.digitize( 50 | np.clip(rng.normal(loc=4, scale=2, size=n), 0, 10), 51 | range(10), 52 | ), 53 | "female": np.digitize( 54 | np.clip(rng.normal(loc=6, scale=2, size=n), 0, 10), 55 | range(10), 56 | ), 57 | }, 58 | ) 59 | # https://coolors.co/b4edd2-a0cfd3-8d94ba-9a7aa0-87677b 60 | colour_map = { 61 | "male": metadata.color.PINK_COLOUR, 62 | "female": metadata.color.LIGHT_GREEN, 63 | } 64 | 65 | # https://coolors.co/b4edd2-a0cfd3-8d94ba-9a7aa0-87677b 66 | plot_data = ( 67 | df.apply(lambda x: x.value_counts(normalize=True).mul(100)) 68 | .assign( 69 | row_min=lambda df: df.apply(lambda dt: min(dt.to_list()), axis=1), 70 | row_max=lambda df: df.apply(lambda dt: max(dt.to_list()), axis=1), 71 | # want to use this to determine colours 72 | color=lambda df: df.idxmax(axis=1).map(colour_map), 73 | pain_scale=lambda df: df.index, 74 | ) 75 | .reset_index(drop=True) 76 | ) 77 | return plot_data, colour_map 78 | 79 | 80 | def main() -> mpl.figure.Figure: 81 | """Create plot.""" 82 | plot_data, colour_map = gen_data() 83 | 84 | plt.style.use("./plotting_examples/rc.mplstyle") 85 | 86 | with plt.rc_context( 87 | { 88 | "xtick.major.pad": 10, 89 | "font.family": "monospace", 90 | }, 91 | ): 92 | fig, ax = plt.subplots(figsize=(15, 5)) 93 | 94 | # ensure that axis area covers data. 95 | ax.set_xlim(left=0, right=11) 96 | ax.set_ylim( 97 | bottom=0, 98 | top=plot_data["row_max"].max() + 5, 99 | ) 100 | 101 | def add_bar( 102 | ax: plt.Axes, # type: ignore[name-defined] 103 | x: int, 104 | y1: float, 105 | y2: float, 106 | facecolor: str, 107 | alpha: float, 108 | outline: bool, 109 | ) -> None: 110 | """Add a bar to the given ax object.""" 111 | width = 1 112 | rect = patches.Rectangle( 113 | xy=(x - 0.5 * width, y1), 114 | width=width, 115 | height=y2, 116 | linewidth=1, 117 | edgecolor="none", 118 | facecolor=facecolor, 119 | alpha=alpha, 120 | ) 121 | ax.add_patch(rect) 122 | if outline: 123 | ax.hlines( 124 | y=y2, 125 | xmin=x - 0.5 * width, 126 | xmax=x + 0.5 * width, 127 | ) 128 | 129 | for row in plot_data.itertuples(): 130 | # plot the diffs 131 | add_bar( 132 | ax=ax, 133 | x=row.pain_scale, 134 | y1=row.row_min, 135 | y2=(row.row_max - row.row_min), 136 | facecolor=row.color, 137 | alpha=0.8, 138 | outline=False, 139 | ) 140 | # plot beneath the diffs 141 | add_bar( 142 | ax=ax, 143 | x=row.pain_scale, 144 | y1=0, 145 | y2=row.row_min, 146 | facecolor=metadata.color.GREY, 147 | alpha=0.2, 148 | outline=False, 149 | ) 150 | 151 | ax.spines["right"].set_visible(False) 152 | ax.spines["top"].set_visible(False) 153 | 154 | label_fontsize = 15 155 | ax.set_ylabel("Percentage of respondents", fontsize=label_fontsize) 156 | ax.set_xlabel( 157 | "Some scale (1 least, 10 greatest)", 158 | fontsize=label_fontsize, 159 | ) 160 | ax.set_title( 161 | "Reporting of something for male, female respondents", 162 | fontsize=20, 163 | ) 164 | 165 | legend_elements = [ 166 | patches.Patch( 167 | facecolor=colour_map["male"], 168 | edgecolor="none", 169 | label="male", 170 | ), 171 | patches.Patch( 172 | facecolor=colour_map["female"], 173 | edgecolor="none", 174 | label="female", 175 | ), 176 | ] 177 | ax.legend( 178 | handles=legend_elements, 179 | frameon=False, 180 | fontsize=15, 181 | ) 182 | 183 | ax.yaxis.set_major_formatter(ticker.FormatStrFormatter("%d%%")) 184 | ax.xaxis.set_major_locator(ticker.MultipleLocator(1)) 185 | 186 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 187 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 188 | 189 | return fig 190 | 191 | 192 | if __name__ == "__main__": 193 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 194 | save_plot_output.save_plot(fig=main(), file=__file__) 195 | raise SystemExit 196 | -------------------------------------------------------------------------------- /plotting_examples/y2022/line_plot_fill_between/__init__.py: -------------------------------------------------------------------------------- 1 | """Line plot with fill.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/line_plot_fill_between/data.csv: -------------------------------------------------------------------------------- 1 | year,month,day,amount 2 | 2022,1,10,70 3 | 2022,1,11,-15 4 | 2022,1,11,30 5 | 2022,1,11,-40 6 | 2022,1,11,30 7 | 2022,1,13,-35 8 | 2022,1,14,-20 9 | 2022,1,14,15 10 | 2022,1,17,-10 11 | 2022,1,17,-5 12 | 2022,1,18,-15 13 | 2022,1,18,-25 14 | 2022,1,18,15 15 | 2022,1,19,-10 16 | 2022,1,19,60 17 | 2022,1,20,-20 18 | 2022,1,20,-30 19 | 2022,1,21,-15 20 | 2022,1,21,30 21 | 2022,1,25,-10 22 | 2022,1,26,-10 23 | 2022,1,26,10 24 | 2022,1,27,25 25 | 2022,1,27,80 26 | 2022,1,28,-15 27 | 2022,1,28,-15 28 | 2022,1,28,-20 29 | 2022,1,31,-10 30 | 2022,2,1,-15 31 | 2022,2,3,-10 32 | 2022,2,4,10 33 | 2022,2,7,-10 34 | 2022,2,7,-50 35 | 2022,2,7,20 36 | 2022,2,8,-5 37 | 2022,2,8,-45 38 | 2022,2,8,45 39 | 2022,2,9,-30 40 | 2022,2,10,50 41 | 2022,2,10,-40 42 | 2022,2,10,30 43 | 2022,2,11,-5 44 | 2022,2,11,-10 45 | 2022,2,14,10 46 | 2022,2,14,20 47 | 2022,2,15,-25 48 | 2022,2,15,90 49 | 2022,2,25,-15 50 | 2022,3,1,-15 51 | 2022,3,1,-70 52 | 2022,3,1,30 53 | 2022,3,3,-5 54 | 2022,3,4,45 55 | 2022,3,4,-65 56 | 2022,3,8,10 57 | 2022,3,8,15 58 | 2022,3,9,10 59 | 2022,3,10,30 60 | 2022,3,10,20 61 | 2022,3,11,10 62 | 2022,3,14,30 63 | 2022,3,15,-30 64 | 2022,3,15,-30 65 | 2022,3,16,-15 66 | 2022,3,16,15 67 | 2022,3,16,50 68 | 2022,3,17,-30 69 | 2022,3,17,-40 70 | 2022,3,17,30 71 | 2022,3,18,60 72 | 2022,3,18,60 73 | 2022,3,18,-50 74 | 2022,3,21,-15 75 | 2022,3,22,30 76 | 2022,3,22,-10 77 | 2022,3,23,-40 78 | 2022,3,24,160 79 | 2022,3,25,15 80 | 2022,3,25,120 81 | 2022,4,12,-15 82 | 2022,4,13,50 83 | 2022,4,14,-20 84 | 2022,4,14,300 85 | 2022,4,15,400 86 | 2022,4,16,480 87 | 2022,4,19,100 88 | -------------------------------------------------------------------------------- /plotting_examples/y2022/line_plot_fill_between/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Visualise time tracking, how much over/under time. 4 | 5 | Mainly serves as an example of plotting with dates, and filling above / below 6 | particular values on a plot. 7 | 8 | Example of: 9 | 10 | - plotting with dates 11 | - different fonts 12 | - filling between lines 13 | """ 14 | 15 | from __future__ import annotations 16 | 17 | import pathlib 18 | 19 | import matplotlib as mpl 20 | import matplotlib.dates as mdates 21 | import matplotlib.pyplot as plt 22 | import pandas as pd 23 | 24 | from plotting_examples import dvc_entry, save_plot_output 25 | from plotting_examples.y2022 import metadata 26 | 27 | PINK_COLOUR = "#ff69b4" 28 | 29 | 30 | def main() -> mpl.figure.Figure: 31 | """Main.""" 32 | with plt.rc_context( 33 | { 34 | "xtick.major.pad": 10, 35 | "font.family": "monospace", 36 | }, 37 | ): 38 | fig, ax = plt.subplots( 39 | figsize=(15, 5), 40 | ncols=1, 41 | nrows=1, 42 | sharey=True, 43 | constrained_layout=False, 44 | ) 45 | 46 | df = ( 47 | pd.read_csv( 48 | "./plotting_examples/y2022/line_plot_fill_between/data.csv", 49 | ) 50 | .rename(columns=lambda x: x.lower().replace(" ", "_").strip()) 51 | .assign(y=lambda df: df["amount"]) 52 | ) 53 | 54 | # Create date column from separate columns in sheet data. 55 | df["date"] = pd.to_datetime( 56 | df["day"].astype(str) 57 | + "/" 58 | + df["month"].astype(str) 59 | + "/" 60 | + df["year"].astype(str), 61 | format="%d/%m/%Y", 62 | ) 63 | 64 | # Interested in the cumulative sum either way. 65 | df["y_cumsum"] = df["y"].cumsum() 66 | 67 | # For creating the plot title. 68 | date_min = df["date"].min().date().strftime("%d/%m/%Y") 69 | date_max = df["date"].max().date().strftime("%d/%m/%Y") 70 | 71 | # highlight break. 72 | up_to_break = df["month"].le(3) & df["day"].le(28) 73 | past_break = df["month"].ge(4) & df["day"].ge(11) 74 | 75 | fig, ax = plt.subplots(figsize=(25, 15)) 76 | 77 | # before break 78 | ax.plot( 79 | df.loc[up_to_break, "date"], 80 | df.loc[up_to_break, "y_cumsum"], 81 | color="black", 82 | linewidth=2, 83 | ) 84 | # after break 85 | ax.plot( 86 | df.loc[past_break, "date"], 87 | df.loc[past_break, "y_cumsum"], 88 | color="black", 89 | linewidth=2, 90 | ) 91 | 92 | # Put black points on values which were over 60. 93 | ax.scatter( 94 | x=df.loc[df["y"].gt(60), "date"], 95 | y=df.loc[df["y"].gt(60), "y_cumsum"], 96 | s=100, 97 | color="black", 98 | zorder=3, 99 | ) 100 | 101 | ax.xaxis.set_major_locator(mdates.DayLocator(interval=1)) # type: ignore[no-untyped-call] 102 | ax.grid(alpha=0.15) 103 | 104 | # labels 105 | ax.set_ylabel( 106 | "Units over/under", 107 | fontsize=15, 108 | ) 109 | ax.set_title( 110 | f"Information about something useful, from {date_min} to {date_max}", 111 | fontsize=25, 112 | ) 113 | 114 | # Text 115 | ax.text( 116 | x=df["date"].to_list()[2], 117 | y=1500, 118 | s=( 119 | "Shows information about something for some time which was interesting." 120 | " \nBlack points indicate something of particular note." 121 | ), 122 | fontsize=25, 123 | ) 124 | 125 | # Color beneath plot based on whether it's over or under 0. 126 | # Before holiday. 127 | ax.fill_between( 128 | df.loc[up_to_break, "date"], 129 | 0, 130 | df.loc[up_to_break, "y_cumsum"], 131 | alpha=0.5, 132 | color=metadata.color.PINK_COLOUR, 133 | where=df.loc[up_to_break, "y_cumsum"] >= 0, 134 | ) 135 | ax.fill_between( 136 | df.loc[up_to_break, "date"], 137 | 0, 138 | df.loc[up_to_break, "y_cumsum"], 139 | alpha=0.5, 140 | color=metadata.color.GREY, 141 | where=df.loc[up_to_break, "y_cumsum"] <= 0, 142 | ) 143 | 144 | # Past holiday 145 | ax.fill_between( 146 | df.loc[past_break, "date"], 147 | 0, 148 | df.loc[past_break, "y_cumsum"], 149 | alpha=0.5, 150 | color=metadata.color.PINK_COLOUR, 151 | where=df.loc[past_break, "y_cumsum"] >= 0, 152 | ) 153 | ax.fill_between( 154 | df.loc[past_break, "date"], 155 | 0, 156 | df.loc[past_break, "y_cumsum"], 157 | alpha=0.5, 158 | color=metadata.color.GREY, 159 | where=df.loc[past_break, "y_cumsum"] <= 0, 160 | ) 161 | 162 | # Format default axis to just show the month/day. 163 | ax.xaxis.set_major_locator(mdates.DayLocator(interval=1)) # type: ignore[no-untyped-call] 164 | ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m")) # type: ignore[no-untyped-call] 165 | 166 | for label in ax.get_xticklabels(): 167 | label.set_rotation(80) 168 | label.set_ha("center") 169 | 170 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 171 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 172 | 173 | fig.set_tight_layout(True) # type: ignore[attr-defined] 174 | return fig 175 | 176 | 177 | if __name__ == "__main__": 178 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 179 | save_plot_output.save_plot(fig=main(), file=__file__) 180 | raise SystemExit 181 | -------------------------------------------------------------------------------- /plotting_examples/y2022/meaningless_points/__init__.py: -------------------------------------------------------------------------------- 1 | """Random viz.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/meaningless_points/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Some random points. 4 | 5 | No real meaning to this - was messing about with some bokeh style bits (the effect, not 6 | the python library), so dumping here. Not sure I'm mad on the output - it's also slow 7 | as hell. 8 | """ 9 | 10 | from __future__ import annotations 11 | 12 | import itertools 13 | import pathlib 14 | 15 | import matplotlib as mpl 16 | import matplotlib.pyplot as plt 17 | import numpy as np 18 | 19 | from plotting_examples import dvc_entry, save_plot_output 20 | from plotting_examples.y2022 import metadata 21 | 22 | np_rnd = np.random.Generator(np.random.MT19937()) 23 | 24 | 25 | def main() -> mpl.figure.Figure: 26 | """Main.""" 27 | fig, ax = plt.subplots(figsize=(10, 10)) 28 | ax.set_facecolor("black") 29 | 30 | def make_point( 31 | *, 32 | x: float, 33 | y: float, 34 | con_min: int = 10, 35 | con_max: int = 10_000, 36 | num_cont: int = 20, 37 | alpha_mult: float = 0.2, 38 | color: str = "black", 39 | ) -> None: 40 | concentric = np.flip(np.linspace(con_min, con_max, num=num_cont)) 41 | alphas = np.flip(concentric / con_max) * alpha_mult 42 | for con, alph in zip(concentric, alphas): 43 | ax.scatter( 44 | x=x, 45 | y=y, 46 | color=color, 47 | s=con, 48 | alpha=alph, 49 | ) 50 | 51 | colors = itertools.cycle( 52 | [ 53 | metadata.color.PINK_COLOUR, 54 | metadata.color.LIGHT_GREEN, 55 | metadata.color.BLUE, 56 | metadata.color.DEEPER_GREEN, 57 | ], 58 | ) 59 | 60 | plot_params = [ 61 | # size, alpha_mult, con_max, num_cont 62 | (2, 0.3, 8_00, 50), 63 | (2, 0.35, 2_00, 5), 64 | (5, 0.05, 5_00, 9), 65 | (4, 0.15, 5_00, 9), 66 | (5, 0.1, 2_000, 50), 67 | (3, 0.1, 3_000, 50), 68 | (2, 0.1, 6_000, 50), 69 | (2, 0.09, 5_000, 50), 70 | (5, 0.008, 15_000, 150), 71 | (3, 0.08, 2000, 20), 72 | ] 73 | rng = np.random.default_rng(2) 74 | 75 | for size, alpha_mult, con_max, num_cont in plot_params: 76 | xs = rng.random(size=size) 77 | ys = xs + rng.random(size=size) 78 | for x, y in zip(xs, ys): 79 | color = next(colors) 80 | make_point( 81 | x=x, 82 | y=y, 83 | color=color, 84 | alpha_mult=alpha_mult, 85 | con_max=con_max, 86 | num_cont=num_cont, 87 | ) 88 | 89 | ax.set_xticks([]) 90 | ax.set_xticks([], minor=True) 91 | ax.set_yticks([]) 92 | ax.set_yticks([], minor=True) 93 | 94 | fig.tight_layout() 95 | 96 | fig.patch.set_facecolor("black") 97 | 98 | return fig 99 | 100 | 101 | if __name__ == "__main__": 102 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 103 | save_plot_output.save_plot(fig=main(), file=__file__) 104 | raise SystemExit 105 | -------------------------------------------------------------------------------- /plotting_examples/y2022/metadata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Metadata for plotting. 3 | 4 | I probably could / should use an rc params file for some of this stuff instead of 5 | calling from here. 6 | """ 7 | 8 | from __future__ import annotations 9 | 10 | from dataclasses import dataclass 11 | 12 | 13 | @dataclass 14 | class Colors: 15 | 16 | """ 17 | Colors. 18 | 19 | https://mycolor.space/?hex=%23FF69B4&sub=1 20 | """ 21 | 22 | PINK_COLOUR = "#ff69b4" 23 | BACKGROUND_COLOUR = "#f2f2f2" 24 | GREY = "#919191" 25 | BLUE = "#007FCB" 26 | LIGHT_GREEN = "#B4EDD2" 27 | DEEPER_GREEN = "#51B9BE" 28 | BROWNY_RED = "#554149" 29 | PURPLEY = "#8F6E9B" 30 | TAN = "#DDD7C6" 31 | 32 | 33 | color = Colors() 34 | -------------------------------------------------------------------------------- /plotting_examples/y2022/opinium_barchart/__init__.py: -------------------------------------------------------------------------------- 1 | """Styled bar chart.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/opinium_barchart/opinium.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/opinium_barchart/opinium.png -------------------------------------------------------------------------------- /plotting_examples/y2022/opinium_barchart/opinium_barchart_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/opinium_barchart/opinium_barchart_example.png -------------------------------------------------------------------------------- /plotting_examples/y2022/opinium_barchart/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Bar chart style copied from Opinium. 4 | 5 | Saw this on twitter (i think) and thought I'd recreate it in mpl. 6 | """ 7 | 8 | from __future__ import annotations 9 | 10 | import pathlib 11 | 12 | import matplotlib as mpl 13 | import matplotlib.image as mpimg 14 | import matplotlib.pyplot as plt 15 | 16 | from plotting_examples import dvc_entry, save_plot_output 17 | from plotting_examples.y2022 import metadata 18 | 19 | 20 | def main() -> mpl.figure.Figure: 21 | """Main.""" 22 | with plt.rc_context( 23 | { 24 | "xtick.major.pad": 10, 25 | "font.family": "monospace", 26 | }, 27 | ): 28 | data = { 29 | "Trump": -63, 30 | "Johnson": -11, 31 | "O'Neill": 3, 32 | "Foster": 9, 33 | "Khan": 16, 34 | "Starmer": 18, 35 | "Sturgeon": 34, 36 | "Drakeford": 34, 37 | } 38 | 39 | fig, ax = plt.subplots(figsize=(15, 7)) 40 | 41 | # trying to setup as many variables as possible here - though there are still 42 | # some magic values 43 | 44 | min(list(data.values())) 45 | max_val = max(list(data.values())) 46 | 47 | line_width = 20 48 | start_offset = line_width * 0.08 49 | percentage_label_shift = 3 50 | positive_bar_color = metadata.color.DEEPER_GREEN 51 | negative_bar_color = metadata.color.PINK_COLOUR 52 | font_size = 12 53 | source_fontsize = 8 54 | footnote_location = (0, -0.3) 55 | 56 | # Johnson here is the value which is used as it's not the most negative, but is 57 | # negative. Really, this is just what kinda looked ok, with different data 58 | # there would likely have to be pretty different approaches to all of this i 59 | # think 60 | grey_bar_left_x = data["Johnson"] 61 | 62 | # shading every other bar a bit 63 | for bar_i, (name, y_val_) in enumerate(zip(data, range(8))): 64 | y_val = y_val_ * 2 65 | x_val = data[name] 66 | x_loc = 20 67 | direction = 1 68 | left_adjust = 0 69 | if x_val > 0: 70 | left_adjust = 9 71 | direction *= -1 72 | bar_color = positive_bar_color 73 | sign = "+" 74 | sign_align = "left" 75 | else: 76 | left_adjust = -15 77 | bar_color = negative_bar_color 78 | sign = "" 79 | sign_align = "right" 80 | 81 | ax.plot( 82 | [start_offset * -direction, data[name]], 83 | [y_val, y_val], 84 | linewidth=line_width, 85 | c=bar_color, 86 | ) 87 | ax.text( 88 | x=(x_loc * direction) + left_adjust, 89 | y=y_val, 90 | s=name, 91 | horizontalalignment="left", 92 | verticalalignment="center", 93 | fontsize=font_size, 94 | ) 95 | ax.text( 96 | x=data[name] + -direction * percentage_label_shift, 97 | y=y_val, 98 | s=f"{sign}{data[name]}", 99 | verticalalignment="center", 100 | horizontalalignment=sign_align, 101 | fontsize=font_size, 102 | ) 103 | 104 | if bar_i % 2 == 1: 105 | ax.plot( 106 | [grey_bar_left_x, max_val + 20], 107 | [y_val, y_val], 108 | linewidth=line_width, 109 | c="#a0a0a0", 110 | alpha=0.07, 111 | zorder=0, 112 | ) 113 | 114 | _ = [ax.spines[s].set_visible(False) for s in ax.spines] 115 | _ = ax.xaxis.set_ticklabels([]) 116 | _ = ax.yaxis.set_ticklabels([]) 117 | _ = ax.tick_params(axis="both", length=0) 118 | 119 | title_y = 1.2 120 | title_x = 0.45 121 | 122 | # Title 123 | ax.text( 124 | x=title_x, 125 | y=title_y, 126 | s="Level of Trust in information \nprovided on Coronavirus", 127 | transform=ax.transAxes, 128 | fontsize=20, 129 | horizontalalignment="left", 130 | ) 131 | 132 | # subtitle 133 | _ = ax.text( 134 | x=title_x, 135 | y=title_y - 0.11, 136 | s=( 137 | "Net Level of Trust in providing of information by party leaders\non" 138 | " Coronavirus" 139 | ), 140 | transform=ax.transAxes, 141 | c="#717171", 142 | ) 143 | 144 | # add rectangle 145 | rect = mpl.patches.Rectangle( 146 | (title_x - 0.015, title_y - 0.11), 147 | width=0.01, 148 | height=0.25, 149 | color=positive_bar_color, 150 | transform=ax.transAxes, 151 | clip_on=False, 152 | ) 153 | ax.add_patch(rect) 154 | 155 | # source of data 156 | _ = ax.text( 157 | x=footnote_location[0], 158 | y=footnote_location[1], 159 | s=( 160 | "https://www.opinium.com/wp-content/uploads/2020/06/" 161 | "VI-26-06-2020-Observer-Data-Tables.xlsx" 162 | ), 163 | transform=ax.transAxes, 164 | fontsize=source_fontsize, 165 | ) 166 | 167 | # add company logo to plot 168 | image = mpimg.imread( 169 | pathlib.Path(__file__).parent / "opinium.png", 170 | format="png", 171 | ) 172 | img_y = ax.bbox.ymin 173 | 174 | ax.text( 175 | x=ax.bbox.xmax + 400, 176 | y=img_y + 20, 177 | s="* Sample size: 2001\n25-26th June\nOpinium.co.uk", 178 | transform=None, 179 | verticalalignment="top", 180 | ) 181 | 182 | fig.figimage( 183 | image, 184 | ax.bbox.xmax + 659, 185 | 0, 186 | origin="upper", 187 | ) 188 | ax.axvline(0, linewidth=0.1, alpha=0.9, color="#212121") 189 | 190 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 191 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 192 | 193 | return fig 194 | 195 | 196 | if __name__ == "__main__": 197 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 198 | save_plot_output.save_plot(fig=main(), file=__file__) 199 | raise SystemExit 200 | -------------------------------------------------------------------------------- /plotting_examples/y2022/pandas_stacked_bars_with_values/__init__.py: -------------------------------------------------------------------------------- 1 | """Create stacked bar plot with pandas.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Horizontal stacked bars, based off of pandas. 4 | 5 | Could do these from scratch - pandas makes things a bit more straightforward though. 6 | 7 | Example of: 8 | 9 | - fixed formatting - setting categorical ticks at particular positions. 10 | """ 11 | 12 | from __future__ import annotations 13 | 14 | import io 15 | import pathlib 16 | 17 | import matplotlib as mpl 18 | import matplotlib.pyplot as plt 19 | import pandas as pd 20 | from matplotlib.lines import Line2D 21 | 22 | from plotting_examples import dvc_entry, save_plot_output 23 | from plotting_examples.y2022 import metadata 24 | 25 | 26 | def sample_data() -> tuple[pd.DataFrame, dict[int, dict[str, str]]]: 27 | """Generate sample data.""" 28 | csv = """\ 29 | Column A,Column B,Column C,Column D 30 | 22.9,21.4,26.6,27.1 31 | 40.0,28.9,38.1,40.9 32 | 20.9,22.0,18.7,15.3 33 | 10.5,18.9,8.5,8.4 34 | 5.7,8.8,8.1,8.3 35 | """ 36 | df_plot: pd.DataFrame = pd.read_csv(io.StringIO(csv)) 37 | index_labels = { 38 | 0: "Something", 39 | 1: "Another", 40 | 2: "This Thing", 41 | 3: "Thai Food", 42 | 4: "Finally", 43 | } 44 | index_colours = { 45 | 0: metadata.color.TAN, 46 | 1: metadata.color.DEEPER_GREEN, 47 | 2: metadata.color.PINK_COLOUR, 48 | 3: metadata.color.BLUE, 49 | 4: metadata.color.PURPLEY, 50 | } 51 | 52 | plot_metadata = {} 53 | for x in index_labels: 54 | plot_metadata[x] = { 55 | "colour": index_colours[x], 56 | "label": index_labels[x], 57 | } 58 | 59 | # Plot metadata has this form: 60 | # >>> { 61 | # >>> 0: {"colour": "red", "label": "Something"}, 62 | # >>> 1: {"colour": "grey", "label": "Another"}, 63 | # >>> 2: {"colour": "pink", "label": "This Thing"}, 64 | # >>> 3: {"colour": "blue", "label": "Thai Food"}, 65 | # >>> 4: {"colour": "green", "label": "Finally"}, 66 | # >>> } 67 | 68 | return df_plot, plot_metadata 69 | 70 | 71 | def main() -> mpl.figure.Figure: 72 | """Main.""" 73 | df_plot, plot_metadata = sample_data() 74 | 75 | # Reverse columns as want to plot A as first bar. 76 | df_plot = df_plot.loc[:, df_plot.columns[::-1]] 77 | 78 | # If you want to rename the axis y-labels it's easiest to just rename them in the 79 | # dataframe columns. 80 | 81 | with plt.rc_context( 82 | { 83 | "xtick.major.pad": 10, 84 | "font.family": "monospace", 85 | }, 86 | ): 87 | fig, ax = plt.subplots( 88 | figsize=(15, 5), 89 | ncols=1, 90 | nrows=1, 91 | sharey=True, 92 | constrained_layout=False, 93 | ) 94 | 95 | df_plot.T.plot.barh( 96 | stacked=True, 97 | ax=ax, 98 | color=[value["colour"] for value in plot_metadata.values()], 99 | ) 100 | 101 | handles = [ 102 | Line2D( 103 | [0], 104 | [0], 105 | color=value["colour"], 106 | label=value["label"], 107 | markersize=12, 108 | linewidth=7, 109 | ) 110 | for value in plot_metadata.values() 111 | ] 112 | 113 | ax.legend( 114 | handles=handles, 115 | frameon=False, 116 | ncol=1, 117 | bbox_to_anchor=(1.01, 0.7), 118 | fontsize=12, 119 | ) 120 | 121 | ax.set_title("This Is A Title", fontsize=20, y=1.05) 122 | ax.set_xlabel("%", fontsize=15) 123 | ax.grid(linewidth=0.2) 124 | ax.set_axisbelow(True) 125 | 126 | # Iterate over the data values, and patches of the axis, and plot the data 127 | # value over the relevant patch. 128 | data_matrix = df_plot.to_numpy().flatten() 129 | 130 | min_bar_size = 3 131 | for i, patch in enumerate(ax.patches): 132 | width = patch.get_width() 133 | height = patch.get_height() 134 | x, y = patch.get_xy() 135 | data_i = data_matrix[i] if data_matrix[i] >= min_bar_size else "-" 136 | ax.annotate( 137 | f"{data_i}", 138 | (x + width * 0.5, y + height * 0.5), 139 | ha="center", 140 | va="center", 141 | fontsize=12, 142 | ) 143 | 144 | _ = [ax.spines[x].set_visible(False) for x in ax.spines] 145 | 146 | loc = mpl.ticker.MultipleLocator(base=5.0) 147 | ax.set_xlim(0, 100) 148 | ax.xaxis.set_major_locator(loc) 149 | 150 | fig.set_tight_layout(True) # type: ignore[attr-defined] 151 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 152 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 153 | return fig 154 | 155 | 156 | if __name__ == "__main__": 157 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 158 | 159 | save_plot_output.save_plot(fig=main(), file=__file__) 160 | raise SystemExit 161 | -------------------------------------------------------------------------------- /plotting_examples/y2022/pos_neg_split_hbar/__init__.py: -------------------------------------------------------------------------------- 1 | """Create pos/neg hbar.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/pos_neg_split_hbar/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Create split horizontal bar chart. 4 | 5 | Split by dichotomous variable, with bar classifications. 6 | 7 | Can be a bit messy - not sure I'm much of a fan - but wanted to re-create anyway. 8 | """ 9 | 10 | from __future__ import annotations 11 | 12 | import io 13 | import pathlib 14 | 15 | import matplotlib as mpl 16 | import matplotlib.pyplot as plt 17 | import matplotlib.ticker as plt_ticker 18 | import pandas as pd 19 | 20 | from plotting_examples import dvc_entry, save_plot_output 21 | from plotting_examples.y2022 import metadata 22 | 23 | # This the dichotomy - could be anything though, eg good/bad, old/young or whatever. 24 | LEVEL_0 = "good" 25 | LEVEL_1 = "bad" 26 | 27 | 28 | def sample_data() -> tuple[pd.DataFrame, dict[int, str], dict[str, str]]: 29 | """ 30 | Return sample dataframe. 31 | 32 | Dogs are taken from here : https://dogtime.com/dog-breeds/profiles 33 | """ 34 | df = pd.read_csv( 35 | io.StringIO( 36 | ( 37 | f"{LEVEL_0},{LEVEL_1},nr,{LEVEL_0}_colour,{LEVEL_1}_colour,meaning\n" 38 | "47.303474,51.18364658,1.51287942,med,med,Akita\n" 39 | "34.10226721,44.76493548,21.13279731,med,med,Basset Hound\n" 40 | "12.08045446,69.67354868,18.24599686,low,med,Cavapoo\n" 41 | "60.91476132,22.71988935,16.36534934,med,low,Doberdor\n" 42 | "19.43282773,56.88924657,23.67792571,low,med,Greyhound\n" 43 | "54.05072885,29.96153606,15.98773508,med,low,Irish Terrier\n" 44 | "53.096035,35.37625972,11.52770528,med,med,Poodle\n" 45 | "78.23942162,17.26331569,4.497262699,high,low,Sloughi\n" 46 | "51.68818968,38.14985888,10.16195143,med,med,Whippet\n" 47 | "38.14462181,39.1176673,22.73771089,med,med,Xoloitzcuintli\n" 48 | ), 49 | ), 50 | ) 51 | index_to_meaning_map: dict[int, str] = df["meaning"].to_dict() 52 | # high/med/low represent some pretend classifications for this example. 53 | colour_map = { 54 | "high": metadata.color.PINK_COLOUR, 55 | "med": metadata.color.TAN, 56 | "low": metadata.color.LIGHT_GREEN, 57 | } 58 | return df, index_to_meaning_map, colour_map 59 | 60 | 61 | def plot_bar_percentages(df: pd.DataFrame, ax: plt.Axes) -> plt.Axes: # type: ignore[name-defined] 62 | """Plot percentages next to bars.""" 63 | # Plot the percentages. 64 | for i, patch in enumerate(ax.patches): 65 | width = patch.get_width() 66 | height = patch.get_height() 67 | x, y = patch.get_xy() 68 | # Shifting is different depending on whether it's a +ve of -ve 69 | val = round(patch.get_width() * 0.01, 2) 70 | 71 | nudge = 8 72 | if i <= df.index.max(): 73 | # Printing to the left 74 | ann = f"{-val:.0%}" 75 | ax.annotate( 76 | ann, 77 | ((x + width) - nudge, y + height * 0.5), 78 | ha="center", 79 | va="center", 80 | ) 81 | else: 82 | # Printing to the right 83 | ann = f"{val:.0%}" 84 | ax.annotate( 85 | ann, 86 | ((x + width) + nudge, y + height * 0.5), 87 | ha="center", 88 | va="center", 89 | ) 90 | return ax 91 | 92 | 93 | def main() -> mpl.figure.Figure: 94 | """Main.""" 95 | df, index_to_meaning_map, colour_map = sample_data() 96 | 97 | with plt.rc_context( 98 | { 99 | "xtick.major.pad": 10, 100 | "font.family": "monospace", 101 | }, 102 | ): 103 | # Create plot. 104 | fig, ax = plt.subplots(figsize=(15, 6)) 105 | 106 | ax.set_axisbelow(True) 107 | 108 | ax.barh( 109 | df.index, 110 | width=-df[LEVEL_0], 111 | height=0.8, 112 | color=df[f"{LEVEL_0}_colour"].map(colour_map), 113 | edgecolor="black", 114 | ) 115 | ax.barh( 116 | df.index, 117 | width=df[LEVEL_1], 118 | height=0.8, 119 | color=df[f"{LEVEL_1}_colour"].map(colour_map), 120 | edgecolor="black", 121 | ) 122 | 123 | ax = plot_bar_percentages(df=df, ax=ax) 124 | 125 | # remove spines for top/right 126 | ax.spines["top"].set_visible(False) 127 | ax.spines["right"].set_visible(False) 128 | 129 | # Set axis limits 130 | ax.set_ylim(bottom=-1, top=df.index.max() + 1) 131 | ax.set_xlim(left=-109, right=109) 132 | 133 | # Reformat tick frequency for x,y axis 134 | # x 135 | loc = plt_ticker.MultipleLocator(base=10) 136 | ax.xaxis.set_major_locator(loc) 137 | # y 138 | loc = plt_ticker.MultipleLocator(base=1) 139 | ax.yaxis.set_major_locator(loc) 140 | 141 | # Functions for reformatting plot tick values 142 | def x_fmt(x: float, _y: int) -> str: 143 | fmt = f"{int(x)} %" 144 | return fmt.replace("-", "") 145 | 146 | def y_fmt(_x: float, y: int) -> str: 147 | diff = -2 148 | return index_to_meaning_map.get(y + diff, "") 149 | 150 | ax.xaxis.set_major_formatter(plt_ticker.FuncFormatter(x_fmt)) 151 | ax.yaxis.set_major_formatter(plt_ticker.FuncFormatter(y_fmt)) 152 | 153 | # Plot text for Agree / Disagree 154 | agree_disagree_txt_height = 1.1 155 | ax.text( 156 | 0.48, 157 | agree_disagree_txt_height, 158 | s=LEVEL_0, 159 | transform=ax.transAxes, 160 | ha="right", 161 | fontsize=20, 162 | ) 163 | ax.text( 164 | 0.52, 165 | agree_disagree_txt_height, 166 | s=LEVEL_1, 167 | transform=ax.transAxes, 168 | ha="left", 169 | fontsize=20, 170 | ) 171 | 172 | for tick in ax.get_xticklabels(): 173 | tick.set_rotation(45) 174 | 175 | ax.grid(linewidth=0.2, which="major", axis="y") 176 | 177 | fig.set_tight_layout(True) # type: ignore[attr-defined] 178 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 179 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 180 | return fig 181 | 182 | 183 | if __name__ == "__main__": 184 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 185 | save_plot_output.save_plot(fig=main(), file=__file__) 186 | raise SystemExit 187 | -------------------------------------------------------------------------------- /plotting_examples/y2022/scatter_distributions/__init__.py: -------------------------------------------------------------------------------- 1 | """Create scatter distributions plot.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/scatter_distributions/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Distributions of multiple variables. 4 | 5 | For a set of variables, each with an accompanying continuous variable on the same scale, 6 | plot the distributions of the continuous variable. Might be useful to have a kde 7 | overlaid here. 8 | 9 | Example of: 10 | 11 | - fixed formatting 12 | - setting categorical ticks at particular positions. 13 | 14 | """ 15 | 16 | from __future__ import annotations 17 | 18 | import itertools 19 | import pathlib 20 | import re 21 | import textwrap 22 | from typing import TYPE_CHECKING, Any, cast 23 | 24 | import matplotlib as mpl 25 | import matplotlib.pyplot as plt 26 | import numpy as np 27 | import numpy.typing as npt 28 | import pandas as pd 29 | 30 | from plotting_examples import dvc_entry, save_plot_output 31 | from plotting_examples.y2022 import metadata 32 | 33 | np_rnd = np.random.Generator(np.random.MT19937(2)) 34 | 35 | 36 | if TYPE_CHECKING: 37 | from collections.abc import Mapping 38 | 39 | 40 | def sample_data(n_categories: int = 12) -> tuple[pd.DataFrame, dict[int, str]]: 41 | """Generate sample data.""" 42 | # random stuff from postgres website. 43 | document = ( 44 | "\n" 45 | "PostgreSQL is an object-relational database management system (ORDBMS) based " 46 | "on POSTGRES, Version 4.2, developed at the University of California at " 47 | "Berkeley Computer Science Department. POSTGRES pioneered many concepts that " 48 | "only became available in some commercial database systems much later.\n" 49 | "\n" 50 | "PostgreSQL is an open-source descendant of this original Berkeley code. It " 51 | "supports a large part of the SQL standard and offers many modern features:\n" 52 | "\n" 53 | "complex queries\n" 54 | "foreign keys\n" 55 | "triggers\n" 56 | "updatable views\n" 57 | "transactional integrity\n" 58 | "multiversion concurrency control\n" 59 | "Also, PostgreSQL can be extended by the user in many ways, for example by " 60 | "adding new\n" 61 | ) 62 | words = [x for x in re.sub(r"\n|\(|\)", " ", document, flags=re.M).split(" ") if x] 63 | 64 | def rand_string() -> str: 65 | """Random string to represent labelling.""" 66 | return " ".join( 67 | np_rnd.choice(words, size=np_rnd.integers(3, 15, size=1)), 68 | ).capitalize() 69 | 70 | def rand_cont() -> npt.NDArray[np.float64]: 71 | # Generates a random bimodal distribution so that it looks roughly similar to 72 | # what we might see from timing data or whatever. 73 | loc_min = 2 74 | loc_max = 7 75 | mode_1_loc = np_rnd.integers(loc_min, loc_max, size=1)[0] 76 | size = np_rnd.integers(10, 250, size=1)[0] 77 | mode_1 = np_rnd.normal( 78 | loc=mode_1_loc, 79 | scale=2, 80 | size=size, 81 | ) 82 | # product 83 | direction = 1 84 | if mode_1_loc > loc_max / (loc_max + loc_min): 85 | direction = -1 86 | 87 | def _np_array_to_int(arr: npt.ArrayLike | int) -> int: 88 | """ 89 | Convert single element ndarray to int. 90 | 91 | Mainly doing these mode_i checks as I'm updating some code 92 | following packages moving on. 93 | """ 94 | if isinstance(arr, np.ndarray): 95 | assert len(arr) == 1 96 | arr = arr[0] 97 | else: 98 | assert isinstance(arr, int | np.int64) 99 | return cast(int, arr) 100 | 101 | mode_1_loc = _np_array_to_int(arr=mode_1_loc) 102 | mode_2_loc = int(mode_1_loc + direction * mode_1_loc * 0.5) 103 | mode_2_loc = _np_array_to_int(arr=mode_2_loc) 104 | size = _np_array_to_int(arr=size) 105 | mode_2_size = int(size * 0.4) 106 | mode_2 = np_rnd.normal(loc=mode_2_loc, scale=2, size=mode_2_size) 107 | 108 | return cast( 109 | npt.NDArray[np.float64], 110 | np.clip(np.concatenate([mode_1, mode_2]), a_min=0, a_max=np.inf), 111 | ) 112 | 113 | data_dict: dict[str, list[float]] = {"cat": [], "cont": []} 114 | 115 | for category in range(1, n_categories + 1): 116 | conts = rand_cont() 117 | data_dict["cont"] = data_dict["cont"] + list(conts) 118 | data_dict["cat"].extend(list(np.repeat(category, len(conts)))) 119 | 120 | data = pd.DataFrame(data_dict) 121 | labels = {x: rand_string() for x in data["cat"].unique()} 122 | 123 | return data, labels 124 | 125 | 126 | def categorical_scatters( 127 | *, 128 | ax: plt.Axes, # type: ignore[name-defined] 129 | data: pd.DataFrame, 130 | cont_var: str, 131 | cat_var: str, 132 | labels: Mapping[Any, str], 133 | # Used if there are particular colours for particular categories, if they're all 134 | # meant to be the same color then just pass in with the same value for each category 135 | # - they should all still be represented though. 136 | color_map: Mapping[Any, str] | None = None, 137 | ) -> plt.Axes: # type: ignore[name-defined] 138 | """Create plot.""" 139 | # Can use this to get alternating colours, i did then went off it. 140 | colors = itertools.cycle( 141 | [metadata.color.PINK_COLOUR, metadata.color.PINK_COLOUR], 142 | ) 143 | 144 | y_ticks = [] 145 | 146 | for y_val, (g_, dfg) in enumerate(data.groupby([cat_var]), 1): 147 | g = g_[0] 148 | color = next(colors) 149 | color = color_map[g] if color_map else color 150 | 151 | y_values = np.repeat([y_val], len(dfg)) + np_rnd.normal( 152 | loc=0, 153 | scale=0.05, 154 | size=len(dfg), 155 | ) 156 | x_values = dfg.loc[dfg[cont_var].ne(88888), cont_var] 157 | ax.scatter( 158 | x=x_values, 159 | y=y_values, 160 | color=color, 161 | alpha=0.3, 162 | ) 163 | 164 | y_ticks.append((g, labels[g])) 165 | 166 | ax.grid(alpha=0.1) 167 | 168 | ax.yaxis.set_major_locator( 169 | mpl.ticker.FixedLocator([y_tick[0] for y_tick in y_ticks]), 170 | ) 171 | ax.yaxis.set_major_formatter( 172 | mpl.ticker.FixedFormatter( 173 | ["\n".join(textwrap.wrap(y_tick[1], width=30)) for y_tick in y_ticks], 174 | ), 175 | ) 176 | 177 | return ax 178 | 179 | 180 | def main() -> mpl.figure.Figure: 181 | """Main.""" 182 | data, labels = sample_data() 183 | 184 | cat_var = "cat" 185 | cont_var = "cont" 186 | 187 | # color 188 | color_map = {x: metadata.color.PINK_COLOUR for x in labels} 189 | # Maybe we want to highlight a particular value or whatever idk. 190 | color_map[3] = metadata.color.DEEPER_GREEN 191 | 192 | with plt.rc_context( 193 | { 194 | "xtick.major.pad": 10, 195 | "font.family": "monospace", 196 | }, 197 | ): 198 | fig, ax = plt.subplots( 199 | figsize=(20, 20), 200 | ncols=1, 201 | nrows=1, 202 | sharey=True, 203 | constrained_layout=False, 204 | ) 205 | ax = categorical_scatters( 206 | data=data, 207 | cont_var=cont_var, 208 | cat_var=cat_var, 209 | labels=labels, 210 | ax=ax, 211 | color_map=color_map, 212 | ) 213 | ax.set_title( 214 | "Scatter plot with categorical labels", 215 | fontsize=20, 216 | ) 217 | 218 | # axis styling 219 | ax.spines["top"].set_visible(False) 220 | ax.spines["right"].set_visible(False) 221 | ax.spines["left"].set_visible(False) 222 | ax.spines["bottom"].set_visible(False) 223 | 224 | fig.set_tight_layout(True) # type: ignore[attr-defined] 225 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 226 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 227 | return fig 228 | 229 | 230 | if __name__ == "__main__": 231 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 232 | save_plot_output.save_plot(fig=main(), file=__file__) 233 | raise SystemExit 234 | -------------------------------------------------------------------------------- /plotting_examples/y2022/scatter_matrix_w_kde_on_diag/__init__.py: -------------------------------------------------------------------------------- 1 | """Scatter matrix with sns kde on diagonal.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Scatter matrix with kde instead of histogram on the diagonal. 4 | 5 | Could probably adapt pd.scatter_matrix instead of doing it from scratch. Though with 6 | this approach the non-diagonal plots could be whatever instead of a scatter plot I 7 | guess... 8 | 9 | Would be good to make the upper diagonals differ from the lower diagonals a bit... maybe 10 | some sort of table from pd.cut on the others or whatever. 11 | 12 | I'd probably just use subplot_mosaic as well now - that's grown on me a lot since this. 13 | """ 14 | 15 | from __future__ import annotations 16 | 17 | import itertools 18 | import pathlib 19 | 20 | import matplotlib as mpl 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | import seaborn as sns 24 | 25 | from plotting_examples import dvc_entry, save_plot_output 26 | from plotting_examples.y2022 import metadata 27 | 28 | np_rnd = np.random.Generator(np.random.MT19937(1977)) 29 | 30 | 31 | def main() -> mpl.figure.Figure: 32 | """Main.""" 33 | numvars, numdata = 4, 50 34 | 35 | data = 10 * np_rnd.chisquare(df=4, size=(numvars, numdata)) 36 | 37 | names = ["mpg", "disp", "drat", "wt"] 38 | 39 | numvars, numdata = data.shape 40 | 41 | with plt.rc_context( 42 | { 43 | "xtick.major.pad": 10, 44 | "font.family": "monospace", 45 | }, 46 | ): 47 | fig, axes = plt.subplots( 48 | nrows=numvars, 49 | ncols=numvars, 50 | figsize=(15, 15), 51 | constrained_layout=True, 52 | ) 53 | 54 | for ax in axes.flat: 55 | # Hide all ticks and labels 56 | ax.xaxis.set_visible(False) 57 | ax.yaxis.set_visible(False) 58 | 59 | # Plot the data. 60 | for i, j in zip(*np.triu_indices_from(axes, k=1)): 61 | for x, y in [(i, j), (j, i)]: 62 | axes[x, y].scatter( 63 | data[x], 64 | data[y], 65 | color=metadata.color.PINK_COLOUR, 66 | ) 67 | axes[x, y].set_facecolor(metadata.color.BACKGROUND_COLOUR) 68 | axes[x, y].grid(linestyle=":", alpha=0.2) 69 | 70 | # Label the diagonal subplots... 71 | for i, label in enumerate(names): 72 | axes[i, i].annotate( 73 | label, 74 | (0.5, 0.5), 75 | xycoords="axes fraction", 76 | ha="center", 77 | va="center", 78 | fontsize=15, 79 | fontweight="bold", 80 | ) 81 | 82 | rotate = 45 83 | 84 | for i, j in itertools.product(range(numvars), range(numvars)): 85 | if i != j: 86 | axes[i, j].xaxis.set_visible(True) 87 | for tick in axes[i, j].get_xticklabels(): 88 | tick.set_rotation(rotate) 89 | 90 | # plot the densities on the diagonal 91 | for i, j in zip(range(numvars), range(numvars)): 92 | ax = axes[i, j] 93 | sns.kdeplot( 94 | x=data[i], 95 | ax=ax, 96 | alpha=0.1, 97 | fill=True, 98 | color=metadata.color.PINK_COLOUR, 99 | ) 100 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 101 | 102 | for i, j in zip(range(1, numvars), itertools.cycle([0])): 103 | axes[i, j].yaxis.set_visible(True) 104 | 105 | _ = fig.suptitle("Example Scatterplots", fontsize=20) 106 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 107 | 108 | return fig 109 | 110 | 111 | if __name__ == "__main__": 112 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 113 | save_plot_output.save_plot(fig=main(), file=__file__) 114 | raise SystemExit 115 | -------------------------------------------------------------------------------- /plotting_examples/y2022/scatter_w_outlined_text_insert/__init__.py: -------------------------------------------------------------------------------- 1 | """Scatter plot with outlined text.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/scatter_w_outlined_text_insert/data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/scatter_w_outlined_text_insert/data.parquet -------------------------------------------------------------------------------- /plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Scatter plot with text inserted to scatter points. 4 | 5 | Data was taken from a tidy tuesday. 6 | 7 | Example of: 8 | 9 | - Outlining text elements in a plot. 10 | """ 11 | 12 | from __future__ import annotations 13 | 14 | import pathlib 15 | from typing import TypeVar 16 | 17 | import matplotlib as mpl 18 | import matplotlib.patheffects as pe 19 | import matplotlib.pyplot as plt 20 | import matplotlib.ticker as mtick 21 | import pandas as pd 22 | from matplotlib.dates import DateFormatter, YearLocator 23 | 24 | from plotting_examples import dvc_entry, save_plot_output 25 | from plotting_examples.y2022 import metadata 26 | 27 | T = TypeVar("T") 28 | 29 | 30 | def get_plotting_data() -> pd.DataFrame: 31 | """Plotting dataframe.""" 32 | df = pd.read_parquet(pathlib.Path(__file__).parent / "data.parquet") 33 | 34 | data_list = [] 35 | for g_, dfg in df.groupby(["year"]): 36 | g = g_[0] 37 | x = dfg["distributor"] 38 | df_a = x.value_counts().reset_index().assign(year=g) 39 | 40 | df_b = ( 41 | x.value_counts(normalize=True) 42 | .reset_index() 43 | .rename(columns={"proportion": "percentage"}) 44 | .assign(percentage=lambda x: x["percentage"].mul(100).round(1), year=g) 45 | ) 46 | 47 | df_c = pd.merge(df_a, df_b, on=["distributor", "year"]) 48 | df_c = df_c.sort_values("count", ascending=False) 49 | top = ["#ff2309"] 50 | other_colour = "#d0d0d0" 51 | n_size = 1 52 | if len(df_c) > n_size: 53 | df_c["colour"] = top + [other_colour for _ in range(len(df_c) - n_size)] 54 | else: 55 | df_c["colour"] = top 56 | 57 | if df_c["colour"].isna().any(): 58 | raise ValueError 59 | 60 | data_list.append(df_c) 61 | 62 | plotting_data = pd.concat(data_list) 63 | plotting_data["year"] = pd.to_datetime(plotting_data["year"], format="%Y") 64 | 65 | return plotting_data 66 | 67 | 68 | def main() -> mpl.figure.Figure: 69 | """Main.""" 70 | plotting_data = get_plotting_data() 71 | 72 | year_counts = ( 73 | plotting_data.groupby("year").size().rename("year_counts").reset_index() 74 | ) 75 | 76 | # want to know how many there were each year. 77 | plotting_data = pd.merge(plotting_data, year_counts, on="year") 78 | 79 | with plt.rc_context( 80 | { 81 | "xtick.major.pad": 10, 82 | "font.family": "monospace", 83 | }, 84 | ): 85 | fig, ax = plt.subplots(figsize=(40, 15)) 86 | 87 | other_colour = "#d0d0d0" 88 | 89 | for _, dfg in plotting_data.groupby("distributor"): 90 | # plot text of distributor. 91 | for _, row in dfg.iterrows(): 92 | if row["colour"] == other_colour: 93 | ax.scatter( 94 | x=row["year"], 95 | y=row["percentage"], 96 | alpha=0.2, 97 | s=300, 98 | color=metadata.color.PINK_COLOUR, 99 | zorder=1, 100 | ) 101 | else: 102 | ax.scatter( 103 | x=row["year"], 104 | y=row["percentage"], 105 | alpha=1, 106 | s=800, 107 | color=metadata.color.PINK_COLOUR, 108 | zorder=2, 109 | ) 110 | ax.text( 111 | x=row["year"], 112 | y=row["percentage"], 113 | s=row["distributor"], 114 | horizontalalignment="center", 115 | verticalalignment="center", 116 | color="black", 117 | size=14, 118 | path_effects=[ 119 | pe.withStroke( 120 | linewidth=4, 121 | foreground=metadata.color.PINK_COLOUR, 122 | ), 123 | ], 124 | ) 125 | 126 | ax.yaxis.set_major_formatter(mtick.PercentFormatter()) 127 | ax.set_title("Top film distributor, 1957 - 2021", fontsize=35, y=1.05) 128 | 129 | for tick in ax.xaxis.get_major_ticks(): 130 | tick.label1.set_fontsize(15) 131 | 132 | for tick in ax.yaxis.get_major_ticks(): 133 | tick.label1.set_fontsize(15) 134 | 135 | ax.tick_params(axis="both", which="both", length=0) 136 | 137 | ax.spines["top"].set_visible(False) 138 | ax.spines["right"].set_visible(False) 139 | ax.spines["left"].set_visible(False) 140 | ax.spines["bottom"].set_visible(False) 141 | 142 | ax.grid(alpha=0.15, axis="y", zorder=0) 143 | 144 | years = YearLocator(5) # type: ignore[no-untyped-call] 145 | years_fmt = DateFormatter("%Y") # type: ignore[no-untyped-call] 146 | ax.xaxis.set_major_locator(years) 147 | ax.xaxis.set_major_formatter(years_fmt) 148 | 149 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 150 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 151 | 152 | return fig 153 | 154 | 155 | if __name__ == "__main__": 156 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 157 | save_plot_output.save_plot(fig=main(), file=__file__) 158 | raise SystemExit 159 | -------------------------------------------------------------------------------- /plotting_examples/y2022/sns_violin_plot_custom/__init__.py: -------------------------------------------------------------------------------- 1 | """Customise sns violin plot.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/sns_violin_plot_custom/data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/sns_violin_plot_custom/data.parquet -------------------------------------------------------------------------------- /plotting_examples/y2022/sns_violin_plot_custom/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Edit SNS violin plot. 4 | 5 | Simple example of adjusting the output of a sns plot - I don't typically use sns, but 6 | ofc the objects can be accessed/iterated/edited over as with any other mpl axis. 7 | 8 | What's here doesn't look good - just an example of changing defaults. 9 | """ 10 | 11 | from __future__ import annotations 12 | 13 | import pathlib 14 | 15 | import matplotlib as mpl 16 | import matplotlib.pyplot as plt 17 | import pandas as pd 18 | import seaborn as sns 19 | 20 | from plotting_examples import dvc_entry, save_plot_output 21 | from plotting_examples.y2022 import metadata 22 | 23 | 24 | def main() -> mpl.figure.Figure: 25 | """Main.""" 26 | with plt.rc_context( 27 | { 28 | "xtick.major.pad": 10, 29 | "font.family": "monospace", 30 | }, 31 | ): 32 | fig, axis = plt.subplots( 33 | figsize=(10, 5), 34 | constrained_layout=False, 35 | ) 36 | df = pd.read_parquet(pathlib.Path(__file__).parent / "data.parquet") 37 | 38 | vio = sns.violinplot( 39 | data=df, 40 | x="species", 41 | y="flipper_length_mm", 42 | density_norm="count", 43 | inner="box", 44 | linewidth=4, 45 | ax=axis, 46 | color=metadata.color.PINK_COLOUR, 47 | ) 48 | 49 | vio.grid(alpha=0.2) 50 | # What size to increase/decreate the central boxplot section to. 51 | new_width = 30 52 | 53 | # adjust the size of the boxplot, which of these list elements to edit 54 | # is just guess and check. 55 | for vio_line in vio.lines[1::2]: 56 | vio_line.set_linewidth(new_width) 57 | 58 | # Adjust the median point markers within the boxplot. 59 | for child in vio.get_children()[1:6:2]: 60 | child.set_linewidth(5) 61 | 62 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 63 | vio.set_facecolor(metadata.color.BACKGROUND_COLOUR) 64 | return fig 65 | 66 | 67 | if __name__ == "__main__": 68 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 69 | save_plot_output.save_plot(fig=main(), file=__file__) 70 | raise SystemExit 71 | -------------------------------------------------------------------------------- /plotting_examples/y2022/split_x_axis_custom_legend/__init__.py: -------------------------------------------------------------------------------- 1 | """Create plot with custom legend.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/split_x_axis_custom_legend/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Example of creating multiple x-axis in order to plot year / months. 4 | 5 | The fig size needs to be pretty large in order to squeeze all the month names etc in 6 | here. Generated data looks a mess on these plots. 7 | 8 | Example of: 9 | 10 | - Custom legend 11 | - generating random date data 12 | - multiple x-axis to display years / months 13 | """ 14 | 15 | from __future__ import annotations 16 | 17 | import pathlib 18 | 19 | import matplotlib as mpl 20 | import matplotlib.dates as mdates 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | import pandas as pd 24 | from matplotlib.lines import Line2D 25 | 26 | from plotting_examples import dvc_entry, save_plot_output 27 | from plotting_examples.y2022 import metadata 28 | 29 | np_rnd = np.random.Generator(np.random.MT19937(seed=0)) 30 | 31 | 32 | def random_dates( 33 | start: pd._libs.tslibs.timestamps.Timestamp, 34 | end: pd._libs.tslibs.timestamps.Timestamp, 35 | n_days: int, 36 | unit: str = "D", 37 | ) -> pd.Series: 38 | """ 39 | Generate random dates. 40 | 41 | >>> start = pd.to_datetime('2015-01-01') 42 | >>> end = pd.to_datetime('2018-01-01') 43 | 44 | Found on a SO post, can't remember where now though. 45 | """ 46 | ndays = (end - start).days + 1 47 | return pd.to_timedelta(np_rnd.random(n_days) * ndays, unit=unit) + start 48 | 49 | 50 | def main() -> mpl.figure.Figure: 51 | """Main.""" 52 | n = 10_000 53 | # generate sample data 54 | df = pd.DataFrame( 55 | { 56 | "location": np_rnd.choice( 57 | ["UK", "US", "FR", "JP", "DE"], 58 | size=n, 59 | ), 60 | "song": np_rnd.choice( 61 | [ 62 | "one two three", 63 | "four five six", 64 | "seven eight nine", 65 | "ten eleven twelve", 66 | "thirteen", 67 | "fourteen", 68 | "fifteen sixteen", 69 | ], 70 | size=n, 71 | ), 72 | "streams": np_rnd.integers(1_000, 10_000, size=n), 73 | "date": random_dates( 74 | start=pd.to_datetime("2020-01-01"), 75 | end=pd.to_datetime("2022-03-01"), 76 | n_days=n, 77 | ), 78 | }, 79 | ) 80 | # aggregate for plotting 81 | df = ( 82 | df.groupby(["location", "song", pd.Grouper(key="date", freq="ME")])["streams"] 83 | .sum() 84 | .reset_index() 85 | # Aggregated to months so don't need date names here. 86 | .assign( 87 | date_name=df.date.dt.month_name() + " " + df.date.dt.year.astype(str), 88 | # Color mapping for song names to use in plotting 89 | color=lambda df: df["song"].map( 90 | { 91 | "fifteen sixteen": metadata.color.TAN, 92 | "four five six": metadata.color.PURPLEY, 93 | "fourteen": "black", 94 | "one two three": metadata.color.PINK_COLOUR, 95 | "seven eight nine": metadata.color.DEEPER_GREEN, 96 | "ten eleven twelve": metadata.color.BLUE, 97 | "thirteen": metadata.color.BROWNY_RED, 98 | }, 99 | ), 100 | ) 101 | ) 102 | 103 | def format_axis(ax: plt.Axes) -> None: # type: ignore[name-defined] 104 | """Format axis.""" 105 | ax.grid(alpha=0.2) 106 | 107 | def stream_plot(df: pd.DataFrame, country: str, ax: plt.Axes) -> None: # type: ignore[name-defined] 108 | for _, song_data in df.groupby("song"): 109 | ax.plot( 110 | song_data["date"], 111 | song_data["streams"], 112 | color=song_data["color"].to_list().pop(), 113 | alpha=0.7, 114 | linewidth=3, 115 | ) 116 | format_axis(ax=ax) 117 | ax.set_title( 118 | country, 119 | fontsize=20, 120 | ) 121 | 122 | for label in ax.get_xticklabels(): 123 | label.set_rotation(45) 124 | label.set_ha("right") 125 | 126 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 127 | 128 | # want to format 1000 -> 1,000 129 | ax.get_yaxis().set_major_formatter( 130 | mpl.ticker.FuncFormatter(lambda x, _: format(int(x), ",")), 131 | ) 132 | # reduce some noise 133 | ax.spines["top"].set_visible(False) 134 | ax.spines["right"].set_visible(False) 135 | 136 | fmt_month = mdates.MonthLocator(interval=1) # type: ignore[no-untyped-call] 137 | fmt_year = mdates.YearLocator() # type: ignore[no-untyped-call] 138 | ax.xaxis.set_minor_locator(fmt_month) 139 | ax.xaxis.set_minor_formatter(mdates.DateFormatter("%b")) # type: ignore[no-untyped-call] 140 | ax.xaxis.set_ticks([]) 141 | 142 | ax.tick_params(axis="x", which="minor", labelsize=8) 143 | 144 | sec_xaxis = ax.secondary_xaxis(-0.1) 145 | sec_xaxis.xaxis.set_major_locator(fmt_year) 146 | sec_xaxis.xaxis.set_major_formatter(mdates.DateFormatter("%Y")) # type: ignore[no-untyped-call] 147 | sec_xaxis.spines["bottom"].set_visible(False) 148 | sec_xaxis.tick_params(length=0, labelsize=12) 149 | 150 | color_dict = df.drop_duplicates("song").set_index("song")["color"].to_dict() 151 | 152 | fig, axis = plt.subplots( 153 | ncols=3, 154 | nrows=2, 155 | figsize=(35, 20), 156 | ) 157 | plt.subplots_adjust( 158 | left=None, 159 | bottom=None, 160 | right=None, 161 | top=None, 162 | wspace=None, 163 | hspace=0.5, 164 | ) 165 | 166 | axis = axis.flatten() 167 | iter(axis.flatten()) 168 | 169 | plt.suptitle( 170 | "Streaming across different countries for different songs", 171 | fontsize=25, 172 | ) 173 | 174 | stream_plot( 175 | df=df.loc[df["location"].eq("DE")], 176 | country="DE", 177 | ax=axis[0], 178 | ) 179 | 180 | stream_plot( 181 | df=df.loc[df["location"].eq("FR")], 182 | country="FR", 183 | ax=axis[1], 184 | ) 185 | 186 | stream_plot( 187 | df=df.loc[df["location"].eq("JP")], 188 | country="JP", 189 | ax=axis[2], 190 | ) 191 | 192 | stream_plot( 193 | df=df.loc[df["location"].eq("UK")], 194 | country="UK", 195 | ax=axis[3], 196 | ) 197 | 198 | stream_plot( 199 | df=df.loc[df["location"].eq("US")], 200 | country="US", 201 | ax=axis[5], 202 | ) 203 | 204 | # Plot legend 205 | 206 | ax = axis[4] 207 | custom_lines = [Line2D([0], [0], color=x, lw=6) for x in color_dict.values()] 208 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 209 | 210 | ax.legend( 211 | custom_lines, 212 | list(color_dict.keys()), 213 | loc="center", 214 | fontsize=16, 215 | frameon=False, 216 | borderpad=2, 217 | ) 218 | 219 | for spine in ax.spines: 220 | ax.spines[spine].set_visible(False) 221 | 222 | ax.get_xaxis().set_ticks([]) 223 | ax.get_yaxis().set_ticks([]) 224 | 225 | fig.supylabel( 226 | "Something about the y-axis", 227 | x=0.09, 228 | fontsize=20, 229 | ) 230 | 231 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 232 | return fig 233 | 234 | 235 | if __name__ == "__main__": 236 | with plt.rc_context( 237 | { 238 | "xtick.major.pad": 10, 239 | "font.family": "monospace", 240 | }, 241 | ): 242 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 243 | save_plot_output.save_plot(fig=main(), file=__file__) 244 | raise SystemExit 245 | -------------------------------------------------------------------------------- /plotting_examples/y2022/stacked_bar_with_single_bars_layout/__init__.py: -------------------------------------------------------------------------------- 1 | """Stacked bar chart.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/stacked_bar_with_single_bars_layout/data/lab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/stacked_bar_with_single_bars_layout/data/lab.png -------------------------------------------------------------------------------- /plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Layout containing two bar plots and a bivariate plot between them. 4 | 5 | In this case it's a silly example of some data containing the social grade of 6 | Labradors, as well as the education group. The main plot is a stacked bar containing 7 | the breakdown of education group for each social grade. 8 | 9 | Don't think I'm too keen on the code for this plot - though it's not always so clear 10 | (to me) how to make "nice" code with a lot of matplotlib stuff. 11 | 12 | Obviously, the data is made up. 13 | """ 14 | 15 | from __future__ import annotations 16 | 17 | import pathlib 18 | 19 | import matplotlib as mpl 20 | import matplotlib.pyplot as plt 21 | import numpy as np 22 | import pandas as pd 23 | from PIL import Image 24 | 25 | from plotting_examples import dvc_entry, save_plot_output 26 | from plotting_examples.y2022 import metadata 27 | 28 | # Fontsize for the main title and subtitle 29 | FONTSIZE_TITLE = 30 30 | FONTSIZE_SUBTITLE = 20 31 | # Fontsize for the numbers displayed on bars. 32 | FONTSIZE_PLT_TXT = 10 33 | 34 | # What colour to outline the edges of bars with - if None then there's no outline 35 | # created. 36 | BAR_EDGECOLOR: str | None = None 37 | # What level of rounding to apply to percentages displayed on bars. 38 | ROUNDING_PCTS = 1 39 | 40 | # Dependent var 41 | VAR_DEPENDENT = "dependent_var" 42 | # Independent var 43 | VAR_INDEPENDENT = "independent_var" 44 | # Text for the main title - the subtitle is generated from the metadata atm. 45 | TEXT_TITLE = "Labradors\neducation ~ social grade" 46 | # Image to display in teh top left. 47 | IMAGE_PATH = ( 48 | "./plotting_examples/y2022/stacked_bar_with_single_bars_layout/data/lab.png" 49 | ) 50 | 51 | COLORS = [ 52 | metadata.color.PINK_COLOUR, 53 | metadata.color.DEEPER_GREEN, 54 | metadata.color.BROWNY_RED, 55 | ] 56 | COLOR_SUBTITLE_TEXT = "#808080" 57 | 58 | # subplot_mosaic layout definition. 59 | LAYOUT = [ 60 | ["top_left_corner", "title", "title", "title", "top_right_corner"], 61 | ["main", "main", "main", "main", "side"], 62 | ["main", "main", "main", "main", "side"], 63 | ["bottom", "bottom", "bottom", "bottom", "bottom_right_corner"], 64 | ] 65 | 66 | # Colors which are used when the bar colour is dark/light respectively - so that the 67 | # text is readable (not dark font on dark bars etd). 68 | COLOR_FONT_LIGHT = "#000000" 69 | COLOR_FONT_DARK = "#ffffff" 70 | 71 | 72 | def get_sample_data() -> ( 73 | tuple[ 74 | pd.DataFrame, 75 | dict[str, dict[float, str]], 76 | dict[str, str], 77 | ] 78 | ): 79 | """ 80 | Generate sample data. 81 | 82 | Data structured similar to what you'd find in an SPSS sav file - where there's the 83 | df (responses), cnl (metadata about the columns) and vvl (metadata about the values 84 | within the columns) 85 | """ 86 | rng = np.random.default_rng(1) 87 | # Create dataframe with different distributions for each of the independent 88 | # variable levels. 89 | df = ( 90 | pd.concat( 91 | [ 92 | pd.DataFrame( 93 | { 94 | VAR_DEPENDENT: rng.choice( 95 | [1, 2, 3], 96 | size=330, 97 | p=(0.87, 0.1, 0.03), 98 | ), 99 | VAR_INDEPENDENT: 5, 100 | }, 101 | ), 102 | pd.DataFrame( 103 | { 104 | VAR_DEPENDENT: rng.choice( 105 | [1, 2, 3], 106 | size=410, 107 | p=(0.44, 0.54, 0.02), 108 | ), 109 | VAR_INDEPENDENT: 4, 110 | }, 111 | ), 112 | pd.DataFrame( 113 | { 114 | VAR_DEPENDENT: rng.choice( 115 | [1, 2, 3], 116 | size=510, 117 | p=(0.26, 0.61, 0.13), 118 | ), 119 | VAR_INDEPENDENT: 3, 120 | }, 121 | ), 122 | pd.DataFrame( 123 | { 124 | VAR_DEPENDENT: rng.choice( 125 | [1, 2, 3], 126 | size=800, 127 | p=(0.105, 0.565, 0.33), 128 | ), 129 | VAR_INDEPENDENT: 2, 130 | }, 131 | ), 132 | pd.DataFrame( 133 | { 134 | VAR_DEPENDENT: rng.choice( 135 | [1, 2, 3], 136 | size=950, 137 | p=(0.08, 0.33, 0.59), 138 | ), 139 | VAR_INDEPENDENT: 1, 140 | }, 141 | ), 142 | ], 143 | ) 144 | .assign(weight=1) 145 | .reset_index(drop=True) 146 | ) 147 | vvl = { 148 | VAR_INDEPENDENT: { 149 | 1.0: "Upper management", 150 | 2.0: "Lower Management", 151 | 3.0: "Intermediate", 152 | 4.0: "Routine", 153 | 5.0: "Never worked", 154 | }, 155 | VAR_DEPENDENT: { 156 | 1.0: "Low", 157 | 2.0: "Medium", 158 | 3.0: "High", 159 | }, 160 | } 161 | cnl = { 162 | VAR_INDEPENDENT: "Social Grade", 163 | VAR_DEPENDENT: "Education Level", 164 | } 165 | 166 | return df, vvl, cnl 167 | 168 | 169 | def patch_color_light(patch: mpl.patches.Rectangle) -> bool: 170 | """Determine if mpl patch is light or dark.""" 171 | # TODO: Put this into a global helper module. 172 | bar_col = mpl.colors.to_hex(patch.get_facecolor()) 173 | hex_col = bar_col[1:] 174 | red, green, blue = ( 175 | int(hex_col[0:2], 16), 176 | int(hex_col[2:4], 16), 177 | int(hex_col[4:6], 16), 178 | ) 179 | # https://stackoverflow.com/questions/3942878/how-to-decide- 180 | # font-color-in-white-or-black-depending-on-background-color 181 | threshold = 100 182 | if (red * 0.299 + green * 0.587 + blue * 0.114) > threshold: 183 | return True 184 | return False 185 | 186 | 187 | class PlotSections: 188 | 189 | """ 190 | Holds plotting sections. 191 | 192 | Just using this for namespacing really! Which was triggered by pylint complaining, 193 | which probably isn't a good reason... Might usually just put this in a module but 194 | wanted all the code in plot.py 195 | 196 | Considered adding the df, vvl, cnl to the class in an __init__ or whatever but left 197 | it as-is. 198 | """ 199 | 200 | # rename to bivariate. 201 | @staticmethod 202 | def main( 203 | ax: plt.Axes, # type: ignore[name-defined] 204 | df: pd.DataFrame, 205 | vvl: dict[str, dict[float, str]], 206 | # cnl: dict[str, str], 207 | ) -> None: 208 | # pylint: disable=too-many-locals 209 | """Plot the stacked bars.""" 210 | df_plot = ( 211 | pd.crosstab( 212 | df[VAR_DEPENDENT], 213 | df[VAR_INDEPENDENT].replace(vvl[VAR_INDEPENDENT]), 214 | normalize="columns", 215 | ) 216 | .mul(100) 217 | .round(1) 218 | .loc[:, list(vvl[VAR_INDEPENDENT].values())] 219 | ) 220 | df_plot_counts = pd.crosstab( 221 | df[VAR_DEPENDENT], 222 | df[VAR_INDEPENDENT], 223 | ) 224 | df_plot.T.plot.barh( 225 | stacked=True, 226 | ax=ax, 227 | color=COLORS, 228 | edgecolor=BAR_EDGECOLOR, 229 | ) 230 | 231 | ax.grid(linestyle=":", alpha=0.3) 232 | 233 | # The legend _should_ be self explanatory from the context of the plot. 234 | ax.get_legend().remove() 235 | 236 | # Not interested in seeing the col name on the y axis for the main plot 237 | ax.set_ylabel("") 238 | 239 | data_matrix = df_plot.to_numpy().flatten() 240 | data_matrix_counts = df_plot_counts.to_numpy().flatten() 241 | min_bar_size = 3 242 | for i, patch in enumerate(ax.patches): 243 | width = patch.get_width() 244 | height = patch.get_height() 245 | x, y = patch.get_xy() 246 | data_i = data_matrix[i] if data_matrix[i] >= min_bar_size else "-" 247 | data_count_i = ( 248 | data_matrix_counts[i] if data_matrix[i] >= min_bar_size else None 249 | ) 250 | 251 | ann = f"{data_i} ({data_count_i})" if data_count_i is not None else "-" 252 | 253 | text_col = COLOR_FONT_LIGHT if patch_color_light(patch) else COLOR_FONT_DARK 254 | 255 | ax.annotate( 256 | f"{ann}", 257 | (x + width * 0.5, y + height * 0.5), 258 | ha="center", 259 | va="center", 260 | fontsize=10, 261 | zorder=12, 262 | color=text_col, 263 | ) 264 | 265 | ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(base=5)) 266 | ax.set_xlabel("%", fontsize=10) 267 | ax.set_xlim(0, 100) 268 | 269 | ax.spines.top.set_visible(False) 270 | ax.spines.right.set_visible(False) 271 | 272 | @staticmethod 273 | def side_marginal( 274 | ax: plt.Axes, # type: ignore[name-defined] 275 | df: pd.DataFrame, 276 | vvl: dict[str, dict[float, str]], 277 | cnl: dict[str, str], 278 | ) -> None: 279 | """Bar plot of the independent var.""" 280 | counts = df[VAR_INDEPENDENT].replace(vvl[VAR_INDEPENDENT]).value_counts() 281 | 282 | ax.barh( 283 | counts.index, 284 | counts, 285 | color=metadata.color.TAN, 286 | edgecolor=BAR_EDGECOLOR, 287 | height=0.5, 288 | ) 289 | ax.grid(alpha=0.2, linestyle=":") 290 | ax.set_title(cnl[VAR_INDEPENDENT], loc="left") 291 | 292 | counts_list = list(counts) 293 | 294 | for count, patch in zip(counts, ax.patches): 295 | count_pct = round((count / sum(counts_list)) * 100, ROUNDING_PCTS) 296 | width = patch.get_width() 297 | height = patch.get_height() 298 | x, y = patch.get_xy() 299 | txt_color = "#000000" if patch_color_light(patch) else "#ffffff" 300 | ax.text( 301 | s=f"{count_pct}%\n({count})", 302 | x=x + width * 0.5, 303 | y=y + height * 0.5, 304 | va="center", 305 | ha="center", 306 | color=txt_color, 307 | fontsize=FONTSIZE_PLT_TXT, 308 | ) 309 | 310 | ax.set_xticks([]) 311 | ax.set_yticks([]) 312 | 313 | ax.spines.right.set_visible(False) 314 | ax.spines.top.set_visible(False) 315 | ax.spines.bottom.set_visible(False) 316 | ax.spines.left.set_visible(False) 317 | 318 | @staticmethod 319 | def bottom_marginal( 320 | ax: plt.Axes, # type: ignore[name-defined] 321 | df: pd.DataFrame, 322 | vvl: dict[str, dict[float, str]], 323 | cnl: dict[str, str], 324 | ) -> None: 325 | """Bar plot of the dependent variable.""" 326 | counts = df[VAR_DEPENDENT].value_counts().sort_index() 327 | ax.bar( 328 | x=list(vvl[VAR_DEPENDENT].values()), 329 | height=counts, 330 | color=COLORS, 331 | edgecolor=BAR_EDGECOLOR, 332 | ) 333 | ax.set_title(cnl[VAR_DEPENDENT]) 334 | ax.set_yticks([]) 335 | 336 | counts_list = list(counts) 337 | for count, patch in zip(counts, ax.patches): 338 | count_pct = round((count / sum(counts_list)) * 100, 2) 339 | width = patch.get_width() 340 | height = patch.get_height() 341 | x, y = patch.get_xy() 342 | if patch_color_light(patch): 343 | txt_color = COLOR_FONT_LIGHT 344 | else: 345 | txt_color = COLOR_FONT_DARK 346 | ax.text( 347 | s=f"{count_pct}\n({count})", 348 | x=x + width * 0.5, 349 | y=y + height * 0.5, 350 | va="center", 351 | ha="center", 352 | color=txt_color, 353 | fontsize=FONTSIZE_PLT_TXT, 354 | ) 355 | 356 | ax.spines.top.set_visible(False) 357 | ax.spines.right.set_visible(False) 358 | ax.spines.left.set_visible(False) 359 | 360 | @staticmethod 361 | def title(ax: plt.Axes, cnl: dict[str, str]) -> None: # type: ignore[name-defined] 362 | """Overall title.""" 363 | ax.text( 364 | s="Labradors", 365 | x=0.1, 366 | y=0.5, 367 | fontsize=FONTSIZE_TITLE, 368 | horizontalalignment="left", 369 | verticalalignment="bottom", 370 | ) 371 | 372 | # Just using this to nudge the text placement around... 373 | diff = 0.3 374 | ax.text( 375 | # Assuming that the metadata is reasonably nice for this. 376 | s=f"{cnl[VAR_DEPENDENT]} ~ {cnl[VAR_INDEPENDENT]}", 377 | x=0.1, 378 | y=0.5 - diff, 379 | fontsize=FONTSIZE_SUBTITLE, 380 | horizontalalignment="left", 381 | verticalalignment="bottom", 382 | color=COLOR_SUBTITLE_TEXT, 383 | ) 384 | 385 | ax.axis("off") 386 | 387 | @staticmethod 388 | def top_left_corner(ax: plt.Axes) -> None: # type: ignore[name-defined] 389 | """Plot logo.""" 390 | img_path = IMAGE_PATH 391 | club_icon = Image.open(img_path) 392 | ax.imshow(club_icon) 393 | ax.axis("off") 394 | 395 | @staticmethod 396 | def top_right_corner(ax: plt.Axes) -> None: # type: ignore[name-defined] 397 | """Just leaving this empty for now.""" 398 | ax.axis("off") 399 | 400 | @staticmethod 401 | def bottom_right_corner(ax: plt.Axes, df: pd.DataFrame) -> None: # type: ignore[name-defined] 402 | """Some random information like data source etc.""" 403 | ax.text( 404 | s=( 405 | # 406 | "2022 Labrador educational \ndata and social grades" 407 | "\n" 408 | "\n" 409 | f"Sample size : {df.shape[0]}" 410 | "\n" 411 | "\n" 412 | "source: somedogdata.com" 413 | ), 414 | x=0, 415 | y=0.5, 416 | fontsize=FONTSIZE_PLT_TXT, 417 | va="center", 418 | ha="left", 419 | color=COLOR_SUBTITLE_TEXT, 420 | ) 421 | ax.axis("off") 422 | 423 | @staticmethod 424 | def footnote(ax: plt.Axes) -> None: # type: ignore[name-defined] 425 | """ 426 | Plot footnote. 427 | 428 | Didn't bother using this in the end. 429 | """ 430 | ax.text( 431 | s=( 432 | # 433 | "Some text about the data, Labradors, whatever." 434 | ), 435 | x=0, 436 | y=1, 437 | fontsize=10, 438 | style="italic", 439 | va="top", 440 | ha="left", 441 | color=COLOR_SUBTITLE_TEXT, 442 | ) 443 | ax.set_xticks([]) 444 | ax.set_yticks([]) 445 | 446 | 447 | def main() -> mpl.figure.Figure: 448 | """Main.""" 449 | df, vvl, cnl = get_sample_data() 450 | 451 | plot_sections = PlotSections() 452 | 453 | with plt.rc_context( 454 | { 455 | "xtick.major.pad": 10, 456 | "font.family": "monospace", 457 | }, 458 | ): 459 | fig = plt.figure( 460 | figsize=(15, 10), 461 | ) 462 | ax_dict = fig.subplot_mosaic(LAYOUT) # type: ignore[arg-type] 463 | 464 | plot_sections.title(ax=ax_dict["title"], cnl=cnl) 465 | plot_sections.bottom_marginal( 466 | ax=ax_dict["bottom"], 467 | df=df, 468 | cnl=cnl, 469 | vvl=vvl, 470 | ) 471 | plot_sections.main( 472 | ax=ax_dict["main"], 473 | df=df, 474 | vvl=vvl, 475 | ) 476 | plot_sections.side_marginal(ax=ax_dict["side"], df=df, vvl=vvl, cnl=cnl) 477 | plot_sections.top_left_corner(ax=ax_dict["top_left_corner"]) 478 | plot_sections.top_right_corner(ax=ax_dict["top_right_corner"]) 479 | plot_sections.bottom_right_corner( 480 | ax=ax_dict["bottom_right_corner"], 481 | df=df, 482 | ) 483 | 484 | fig.tight_layout() 485 | 486 | # Set background colours. 487 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 488 | for ax_name in ax_dict: 489 | ax_dict[ax_name].set_facecolor(metadata.color.BACKGROUND_COLOUR) 490 | 491 | return fig 492 | 493 | 494 | if __name__ == "__main__": 495 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 496 | save_plot_output.save_plot(fig=main(), file=__file__) 497 | raise SystemExit 498 | -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/__init__.py: -------------------------------------------------------------------------------- 1 | """Create hexmap.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.cpg: -------------------------------------------------------------------------------- 1 | UTF-8 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.dbf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.dbf -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.prj: -------------------------------------------------------------------------------- 1 | PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]] 2 | -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbn -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbx -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp.xml: -------------------------------------------------------------------------------- 1 | 2 | 20161025170019001.0TRUECalculateField Hex_All_Data Region_Name "Northern Ireland" VB #CalculateField Hex_All_Data Region_ID 12 VB #CalculateField Hex_All_Data DESCRIPTIO "Westminster Constituency" VB #file://\\AYL-LT-02839\Users\bflanagan\OneDrive - ESRI (UK) Ltd\Carto\BlogData\Cartograms.gdbLocal Area Network 3 | -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shx -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.zip -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/plot.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=duplicate-code 2 | """ 3 | Hex map for the UK constituencies. 4 | 5 | Some meaningless generated data - small multiples with hex maps can be useful sometimes 6 | though. Could be good to add in the geographically accurate version as well. 7 | """ 8 | 9 | from __future__ import annotations 10 | 11 | import pathlib 12 | 13 | import geopandas 14 | import matplotlib as mpl 15 | import matplotlib.pyplot as plt 16 | import numpy as np 17 | from matplotlib.colors import ListedColormap 18 | 19 | from plotting_examples import dvc_entry, save_plot_output 20 | from plotting_examples.y2022 import metadata 21 | 22 | random_choice = np.random.Generator(np.random.MT19937(1)).choice 23 | 24 | 25 | def main() -> mpl.figure.Figure: 26 | """Main.""" 27 | election_data = ( 28 | pathlib.Path(__file__).parent 29 | / "data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp" 30 | ) 31 | gdf = geopandas.read_file(election_data) 32 | 33 | # Set up color maps by party 34 | partycolors = { 35 | "A": metadata.color.DEEPER_GREEN, 36 | "B": metadata.color.PINK_COLOUR, 37 | "C": metadata.color.TAN, 38 | } 39 | 40 | parties = ["A", "B", "C"] 41 | pcols = {c: partycolors[c] for c in parties} 42 | colors = [pcols[k] for k in sorted(pcols.keys())] 43 | with plt.rc_context( 44 | { 45 | "xtick.major.pad": 10, 46 | "font.family": "monospace", 47 | }, 48 | ): 49 | fig, axes = plt.subplots( 50 | nrows=1, 51 | ncols=3, 52 | figsize=(15, 5), 53 | ) 54 | 55 | font_size = 15 56 | edgecolor = "black" 57 | edge_width = 0.5 58 | 59 | ax = axes[0] 60 | gdf["Party"] = list( 61 | random_choice( 62 | parties, 63 | size=len(gdf), 64 | replace=True, 65 | p=[0.4, 0.3, 0.3], 66 | ), 67 | ) 68 | gdf.plot( 69 | ax=ax, 70 | column="Party", 71 | cmap=ListedColormap(colors), 72 | edgecolor=edgecolor, 73 | linewidth=edge_width, 74 | ) 75 | _ = ax.axis("off") 76 | _ = ax.set_title("Current", fontsize=font_size, loc="left") 77 | 78 | ax = axes[1] 79 | gdf["Party"] = list( 80 | random_choice( 81 | parties, 82 | size=len(gdf), 83 | replace=True, 84 | p=[0.3, 0.6, 0.1], 85 | ), 86 | ) 87 | gdf.plot( 88 | ax=ax, 89 | column="Party", 90 | cmap=ListedColormap(colors), 91 | edgecolor=edgecolor, 92 | linewidth=edge_width, 93 | ) 94 | _ = ax.axis("off") 95 | _ = ax.set_title("Scenario A", fontsize=font_size, loc="left") 96 | 97 | ax = axes[2] 98 | gdf["Party"] = list( 99 | random_choice( 100 | parties, 101 | size=len(gdf), 102 | replace=True, 103 | p=[0.1, 0.8, 0.1], 104 | ), 105 | ) 106 | gdf.plot( 107 | ax=ax, 108 | column="Party", 109 | cmap=ListedColormap(colors), 110 | edgecolor=edgecolor, 111 | linewidth=edge_width, 112 | ) 113 | _ = ax.axis("off") 114 | _ = ax.set_title("Scenario B", fontsize=font_size, loc="left") 115 | 116 | # Create legend. 117 | custom_lines = [ 118 | mpl.lines.Line2D([0], [0], color=x, lw=6) for x in partycolors.values() 119 | ] 120 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 121 | ax.legend( 122 | custom_lines, 123 | list(partycolors.keys()), 124 | loc=(0.7, 0.7), 125 | fontsize=12, 126 | frameon=False, 127 | borderpad=2, 128 | ) 129 | 130 | # The dataframe seems to assign items to categories based on the selected column 131 | # sort order We can define a color map with a similar sorting 132 | colors = [partycolors[k] for k in sorted(partycolors.keys())] 133 | 134 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 135 | fig.set_tight_layout(True) # type: ignore[attr-defined] 136 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR) 137 | return fig 138 | 139 | 140 | if __name__ == "__main__": 141 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 142 | save_plot_output.save_plot(fig=main(), file=__file__) 143 | raise SystemExit 144 | -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/uk_hex_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/uk_hex_example.png -------------------------------------------------------------------------------- /plotting_examples/y2022/uk_hexmap/uk_hex_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hex plotting example. 3 | 4 | Move hex example from notebook into here. 5 | """ 6 | -------------------------------------------------------------------------------- /plotting_examples/y2024/__init__.py: -------------------------------------------------------------------------------- 1 | """Plots from 2024.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2024/cat_weight/__init__.py: -------------------------------------------------------------------------------- 1 | """Plot for cats weight loss.""" 2 | -------------------------------------------------------------------------------- /plotting_examples/y2024/cat_weight/data/cat_looking_to_side.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2024/cat_weight/data/cat_looking_to_side.jpeg -------------------------------------------------------------------------------- /plotting_examples/y2024/cat_weight/data/weight_data.parquet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2024/cat_weight/data/weight_data.parquet -------------------------------------------------------------------------------- /plotting_examples/y2024/cat_weight/plot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Timeseries of the cats diet. 3 | 4 | Cat was getting a little chunky towards the end of 2023 so had a resolution made for 5 | them to lose a bit of weight. Data collection is just a daily weigh, the average of 6 | this is taken (as there are sometimes multiple entries in a day) and then plotted along 7 | with a ten day rolling average. Most days were covered, where there are missing days 8 | they're imputed using the average of the days either side, eg `(a, nan, b) -> (a, 9 | (a+b)/2, b)` though this is just a plot... 10 | """ 11 | 12 | from __future__ import annotations 13 | 14 | import datetime as dt 15 | import pathlib 16 | from pathlib import Path 17 | 18 | import matplotlib as mpl 19 | import matplotlib.dates as mdates 20 | import matplotlib.pyplot as plt 21 | import numpy as np 22 | import pandas as pd 23 | from PIL import Image 24 | 25 | from plotting_examples import dvc_entry, save_plot_output 26 | from plotting_examples.y2024 import metadata 27 | 28 | np_rnd = np.random.Generator(np.random.MT19937(0)) 29 | 30 | LAYOUT = [ 31 | ["title", "title", "title", "title", "top_right_corner", "top_right_corner"], 32 | ["main", "main", "main", "main", "side", "side"], 33 | ["main", "main", "main", "main", "side", "side"], 34 | ["main", "main", "main", "main", "side", "side"], 35 | ["main", "main", "main", "main", "side", "side"], 36 | ] 37 | FONTSIZE_TITLE = 25 38 | FONTSIZE_SUBTITLE = 15 39 | COLOR_SUBTITLE_TEXT = "#808080" 40 | 41 | 42 | def get_xlsx_from_downloads() -> Path: 43 | """ 44 | Get xlsx file from Downloads. 45 | 46 | Pretty janky approach but it's near enough whilst doing this - workflow is just to 47 | download the xlsx file containing the gform responses from gsheets then this will 48 | pick it up and move it to this project as a parquet file. Wasn't worth using the 49 | API for it. 50 | """ 51 | output_name = Path(__file__).parent / "data" / "weight_data.parquet" 52 | xlsx_files = sorted((pathlib.Path.home() / "Downloads").glob("*espon*xlsx")) 53 | 54 | if len(xlsx_files) == 0: 55 | # Most likely this is re-running and has already been moved so just use 56 | # whatever's already in data/. 57 | return output_name 58 | 59 | if len(xlsx_files) > 1: 60 | msg = "Expected a single file: " 61 | raise ValueError(msg, xlsx_files) 62 | 63 | # Get response data from xlsx sheet, pull out required columns and create date 64 | # column for grouping on 65 | df_response = ( 66 | pd.ExcelFile(xlsx_files[0]) 67 | .parse("Form responses 1") 68 | .rename(columns=lambda x: x.lower()) 69 | .assign( 70 | timestamp=lambda x: pd.to_datetime( 71 | x["timestamp"], 72 | format="%d/%m/%Y %H:%M:%S", 73 | ), 74 | # Some of the days have multiple weigh-ins so want datestamp to group by on 75 | # in those days. 76 | datestamp=lambda x: pd.to_datetime( 77 | x["timestamp"].apply(lambda x: x.date()) 78 | ), 79 | mish_weight=lambda x: x["with_mish"].sub(x["without_mish"]), 80 | ) 81 | .loc[:, ["timestamp", "datestamp", "mish_weight"]] 82 | ).rename(columns={"mish_weight": "cat_weight"}) 83 | 84 | df_response.to_parquet(output_name) 85 | return output_name 86 | 87 | 88 | def load_data(*, data_path: Path) -> pd.DataFrame: 89 | """Get response dataframe from downloaded xlsx file.""" 90 | response_data = pd.read_parquet(data_path) 91 | return ( 92 | response_data 93 | # Only need these two columns. 94 | .groupby("datestamp")["cat_weight"] 95 | # Sometimes there are multiple readings in a day 96 | .mean() 97 | .reset_index() 98 | .rename(columns={"cat_weight": "cat_daily_avg"}) 99 | ) 100 | 101 | 102 | def main() -> mpl.figure.Figure: 103 | """ 104 | Main. 105 | 106 | I did consider adding some figure shapes along the lines of: 107 | 108 | >>> for _ in range(1, 30): 109 | >>> factor = 20 110 | >>> radius = np_rnd.random() / factor 111 | >>> alpha = ((1 / factor) - radius) + 0.1 112 | >>> circ = patches.Circle( 113 | >>> (np_rnd.random(), np_rnd.random()), 114 | >>> radius=radius, 115 | >>> zorder=1, 116 | >>> color=color.PINK_COLOUR, 117 | >>> alpha=alpha, 118 | >>> ) 119 | >>> circ.set_transform(fig.transFigure) 120 | >>> fig.patches.append(circ) 121 | 122 | But left it. 123 | """ 124 | df = load_data(data_path=get_xlsx_from_downloads()) 125 | df_dates = pd.DataFrame( 126 | { 127 | "dates": pd.date_range( 128 | df["datestamp"].min(), 129 | df["datestamp"].max(), 130 | ) 131 | } 132 | ).assign(month_name=lambda x: x["dates"].dt.strftime("%B")) 133 | 134 | # Ensure that all dates are represented (in case there's missed weigh-in days). 135 | df = ( 136 | pd.merge( 137 | df, df_dates[["dates"]], left_on="datestamp", right_on="dates", how="right" 138 | ) 139 | .drop(columns="datestamp") 140 | .rename(columns={"dates": "datestamp"}) 141 | .set_index("datestamp") 142 | .reset_index() 143 | .assign(imputed=lambda x: x["cat_daily_avg"].isna()) 144 | ) 145 | # Handle missing data - only expecting there to be a day of missing data at 146 | # most! 147 | df["cat_daily_avg"] = df.assign( 148 | ff=df["cat_daily_avg"].ffill(), 149 | bf=df["cat_daily_avg"].bfill(), 150 | filled=lambda x: x["ff"].add(x["bf"]).div(2), 151 | )["filled"] 152 | 153 | df = df.assign(r10=lambda x: x["cat_daily_avg"].rolling(10).mean()) 154 | 155 | color = metadata.color 156 | 157 | # Create some columns for styling the scatter points - mainly in order to 158 | # differentiate between imputed days and actual days. 159 | df["scatter_color"] = color.GREY 160 | df.loc[df["imputed"], "scatter_color"] = color.GREY 161 | df["scatter_size"] = 10 162 | df.loc[df["imputed"], "scatter_size"] = 0 163 | 164 | with plt.rc_context( 165 | { 166 | "xtick.major.pad": 5, 167 | "font.family": "monospace", 168 | }, 169 | ): 170 | fig = plt.figure(figsize=(28, 10)) 171 | ax_dict = fig.subplot_mosaic(LAYOUT) # type: ignore[arg-type] 172 | 173 | # Plot rolling average 174 | ax_dict["main"].plot( 175 | df["datestamp"], 176 | df["r10"], 177 | color=color.PINK_COLOUR, 178 | lw=3, 179 | zorder=10, 180 | ) 181 | 182 | # Want to ensure that no daily lines are drawn where data has been imputed - 183 | # will still create the rolling average line here. 184 | for _, data in df.assign(groups=df["imputed"].cumsum()).groupby("groups"): 185 | ax_dict["main"].plot( 186 | data["datestamp"].loc[~data["imputed"]], 187 | data["cat_daily_avg"].loc[~data["imputed"]], 188 | color=color.GREY, 189 | lw=1, 190 | zorder=5, 191 | ) 192 | 193 | ax_dict["main"].scatter( 194 | df["datestamp"], 195 | df["cat_daily_avg"], 196 | color=df["scatter_color"], 197 | s=df["scatter_size"], 198 | zorder=5, 199 | ) 200 | 201 | ax_dict["main"].set_ylabel("Weight kg") 202 | ax_dict["main"].xaxis.set_major_locator(mdates.DayLocator(interval=7)) # type: ignore[no-untyped-call] 203 | 204 | for label in ax_dict["main"].get_xticklabels(): 205 | label.set_rotation(80) 206 | label.set_ha("center") # type: ignore[attr-defined] 207 | 208 | # Remove spines for top/right 209 | ax_dict["main"].spines["top"].set_visible(False) 210 | ax_dict["main"].spines["right"].set_visible(False) 211 | 212 | # Set x-axis dates to just be day/month instead of year day month. 213 | ax_dict["main"].xaxis.set_major_locator(mdates.DayLocator(interval=7)) # type: ignore[no-untyped-call] 214 | ax_dict["main"].xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) # type: ignore[no-untyped-call] 215 | 216 | for label in ax_dict["main"].get_xticklabels(): 217 | label.set_rotation(80) 218 | label.set_ha("center") 219 | 220 | heaviest_idx = df["cat_daily_avg"].idxmax() 221 | _ = ax_dict["main"].annotate( 222 | f"{df['cat_daily_avg'].loc[heaviest_idx].round(2)} kg", 223 | # where the arrow should end up 224 | xy=( 225 | df["datestamp"].iloc[heaviest_idx], 226 | df["cat_daily_avg"].iloc[heaviest_idx], 227 | ), 228 | # where the text should be 229 | xytext=( 230 | df["datestamp"].iloc[heaviest_idx + 5], 231 | df["cat_daily_avg"].iloc[heaviest_idx + 1] + 0.25, 232 | ), 233 | ha="center", 234 | va="bottom", 235 | arrowprops={ 236 | "arrowstyle": "->", 237 | "connectionstyle": "arc3,rad=0.2", 238 | "color": color.PINK_COLOUR, 239 | }, 240 | ) 241 | 242 | lightest_idx = df["cat_daily_avg"].idxmin() 243 | _ = ax_dict["main"].annotate( 244 | f"{df['cat_daily_avg'].loc[lightest_idx].round(2)} kg", 245 | # where the arrow should end up 246 | xy=( 247 | df["datestamp"].iloc[lightest_idx], 248 | df["cat_daily_avg"].iloc[lightest_idx], 249 | ), 250 | # where the text should be 251 | xytext=( 252 | df["datestamp"].iloc[lightest_idx - 5], 253 | df["cat_daily_avg"].iloc[lightest_idx], 254 | ), 255 | ha="center", 256 | va="bottom", 257 | arrowprops={ 258 | "arrowstyle": "->", 259 | "connectionstyle": "arc3,rad=0.35", 260 | "color": color.PINK_COLOUR, 261 | }, 262 | ) 263 | 264 | ax_dict["main"].grid(linewidth=0.2, which="major", axis="y") 265 | 266 | # Put cat picture in top left 267 | img_path = Path(__file__).parent / "data" / "cat_looking_to_side.jpeg" 268 | cat_img = Image.open(img_path) 269 | ax_dict["side"].imshow(cat_img, zorder=10) 270 | ax_dict["side"].axis("off") 271 | 272 | # Remove axis from particular layouts 273 | for section in { 274 | x 275 | for lst in LAYOUT 276 | for x in lst 277 | if x 278 | not in [ 279 | "main", 280 | ] 281 | }: 282 | ax_dict[section].axis("off") 283 | 284 | # Add Title 285 | data_from = dt.datetime( 286 | df["datestamp"].min().year, 287 | df["datestamp"].min().month, 288 | df["datestamp"].min().day, 289 | tzinfo=dt.UTC, 290 | ).strftime("%Y-%m-%d") 291 | data_to = dt.datetime( 292 | df["datestamp"].max().year, 293 | df["datestamp"].max().month, 294 | df["datestamp"].max().day, 295 | tzinfo=dt.UTC, 296 | ).strftime("%Y-%m-%d") 297 | 298 | diff = 0.3 299 | title_x = 0.1 300 | title_x = 0.0 301 | ax_dict["title"].text( 302 | s="Cat Weight", 303 | x=title_x, 304 | y=0.5, 305 | fontsize=FONTSIZE_TITLE, 306 | horizontalalignment="left", 307 | verticalalignment="bottom", 308 | ) 309 | 310 | ax_dict["title"].text( 311 | s=f"{data_from} -> {data_to}", 312 | x=title_x, 313 | y=0.5 - diff, 314 | fontsize=FONTSIZE_SUBTITLE, 315 | horizontalalignment="left", 316 | verticalalignment="bottom", 317 | color=COLOR_SUBTITLE_TEXT, 318 | ) 319 | 320 | for axis in {x for lst in LAYOUT for x in lst}: 321 | ax_dict[axis].set_facecolor(metadata.color.BACKGROUND_COLOUR) 322 | 323 | fig.set_tight_layout(True) # type: ignore[attr-defined] 324 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR) 325 | 326 | return fig 327 | 328 | 329 | if __name__ == "__main__": 330 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__)) 331 | save_plot_output.save_plot(fig=main(), file=__file__) 332 | raise SystemExit 333 | -------------------------------------------------------------------------------- /plotting_examples/y2024/metadata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Metadata for plotting. 3 | 4 | I probably could / should use an rc params file for some of this stuff instead of 5 | calling from here. 6 | """ 7 | 8 | from __future__ import annotations 9 | 10 | from dataclasses import dataclass 11 | 12 | 13 | @dataclass 14 | class Colors: 15 | 16 | """ 17 | Colors. 18 | 19 | https://mycolor.space/?hex=%23FF69B4&sub=1 20 | """ 21 | 22 | PINK_COLOUR = "#ff69b4" 23 | BACKGROUND_COLOUR = "#f2f2f2" 24 | GREY = "#919191" 25 | BLUE = "#007FCB" 26 | LIGHT_GREEN = "#B4EDD2" 27 | DEEPER_GREEN = "#51B9BE" 28 | BROWNY_RED = "#554149" 29 | PURPLEY = "#8F6E9B" 30 | TAN = "#DDD7C6" 31 | BLACK = "#000000" 32 | 33 | 34 | color = Colors() 35 | 36 | dir_year = "y2024" 37 | year = 2024 38 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "plotting-examples" 3 | version = "0.1.0" 4 | description = "Example plots" 5 | authors = ["George Lenton "] 6 | readme = "README.md" 7 | packages = [{ include = "plotting_examples" }] 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.12" 11 | dvc = "^3.0.0" 12 | fastparquet = "^2024.2.0" 13 | matplotlib = "^3.7.1" 14 | pyarrow = "^15.0.0" 15 | seaborn = "^0.13.0" 16 | geopandas = "^0.14.3" 17 | pandas = "^2.0.1" 18 | openpyxl = "^3.1.2" 19 | 20 | [tool.poetry.group.dev.dependencies] 21 | pre-commit = "^3.3.1" 22 | ruff = "^0.3.2" 23 | mypy = "^1.3.0" 24 | jupyterlab = "^4.1.0" 25 | pdbpp = "^0.10.3" 26 | 27 | 28 | [build-system] 29 | requires = ["poetry-core"] 30 | build-backend = "poetry.core.masonry.api" 31 | 32 | 33 | [tool.ruff] 34 | line-length = 88 35 | 36 | [tool.ruff.lint] 37 | select = ["ALL"] 38 | ignore = [ 39 | "ANN101", # Type annotation for 'self' 40 | "COM812", # trailing comma - conflicted 41 | "ISC001", 42 | "D211", # `one-blank-line-before-class`. 43 | "D212", # warning: `multi-line-summary-first-line` 44 | "D401", # First line of docstring should be in imperative mood: "Main." 45 | "FIX002", # Line contains TODO, consider resolving the issue 46 | "PD015", # Use `.merge` method instead of `pd.merge` function. They have equivalent functionality. 47 | "PD901", # Avoid using the generic variable name `df` for DataFrames 48 | "TD002", # Missing author in TODO; try: `# TODO(): ...` or `# TODO @: ...` 49 | "TD003", # Missing issue link on the line following this TODO 50 | ] 51 | fixable = ["ALL"] 52 | unfixable = [] 53 | # Allow unused variables when underscore-prefixed. 54 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" 55 | 56 | 57 | [tool.ruff.lint.per-file-ignores] 58 | "__init__.py" = ["F401"] 59 | "plotting_examples/y2022/scatter_distributions/plot.py" = [ 60 | "PLR0913", # Too many arguments in function definition (6 > 5) 61 | "S101", # Use of `assert` detected 62 | ] 63 | 64 | "plotting_examples/y2022/opinium_barchart/plot.py" = [ 65 | "PLR0915", # Too many statements (54 > 50) 66 | ] 67 | 68 | "plotting_examples/y2022/meaningless_points/plot.py" = [ 69 | "PLR0913", # Too many arguments in function definition (7 > 5) 70 | ] 71 | 72 | 73 | "plotting_examples/y2022/histogram_with_two_variables/plot.py" = [ 74 | "PLR0913", # Too many arguments in function definition (7 > 5) 75 | "FBT001", # Boolean-typed positional argument in function definition 76 | ] 77 | 78 | "plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py" = [ 79 | "PLR0913", # Too many arguments in function definition (10 > 5) 80 | ] 81 | 82 | 83 | [tool.ruff.lint.flake8-type-checking] 84 | # Don't want to have things move to TYPE_CHECKING if needed by pydantic. 85 | runtime-evaluated-base-classes = ["pydantic.BaseModel"] 86 | 87 | [tool.ruff.lint.isort] 88 | section-order = [ 89 | "future", 90 | "standard-library", 91 | "third-party", 92 | "first-party", 93 | "local-folder", 94 | ] 95 | case-sensitive = true 96 | combine-as-imports = true 97 | 98 | 99 | # warning: The top-level linter settings are deprecated in favour of their counterparts in the `lint` section. Please update the following options in `pyproject.toml`: 100 | # - 'flake8-type-checking' -> 'lint.flake8-type-checking' 101 | # - 'isort' -> 'lint.isort' 102 | # - 'per-file-ignores' -> 'lint.per-file-ignores' 103 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | disable-noqa = True 3 | max-line-length = 100 4 | extend-ignore = 5 | E203, # whitespace before : is not PEP8 compliant (& conflicts with black) 6 | 7 | DAR003, # Incorrect indentation: ~< 8 | DAR102, # Excess parameter(s) in Docstring: + words_freq[ 9 | DAR201, # Missing "Returns" in Docstring: - return 10 | DAR202, # Excess "Returns" in Docstring: + return 11 | 12 | per-file-ignores = 13 | **/__init__.py: 14 | # Missing docstring in public package 15 | D104, 16 | 17 | [flake8_nb] 18 | disable-noqa = True 19 | max-line-length = 100 20 | extend-ignore = 21 | E203, # whitespace before : is not PEP8 compliant (& conflicts with black) 22 | 23 | D100, # Missing docstring in public module 24 | E402, # module level import not at top of file 25 | 26 | D103, # Missing docstring in public function 27 | D104, # Missing docstring in public package 28 | D400, # First line should end with a period 29 | D403, # First word of the first line should be properly capitalized 30 | DAR003, # Incorrect indentation: ~< 31 | DAR102, # Excess parameter(s) in Docstring: + words_freq[ 32 | DAR201, # Missing "Returns" in Docstring: - return 33 | DAR202, # Excess "Returns" in Docstring: + return 34 | E231, # missing whitespace after ',' 35 | F401, # '...' imported but unused 36 | F811, # redefinition of unused '..' from line 7 37 | F821, # undefined name '...' 38 | 39 | [darglint] 40 | strictness=long 41 | -------------------------------------------------------------------------------- /work.sh: -------------------------------------------------------------------------------- 1 | # simple script to run a few things when working on stuff. 2 | poetry run dvc repro dvc.yaml 3 | poetry run mypy --strict . 4 | poetry run pre-commit run --all-files 5 | poetry run python -m generate_readme 6 | 7 | 8 | 9 | 10 | # add changes to dvc.lock if there are any 11 | git diff --name-only HEAD -- dvc.lock && git add dvc.lock && git commit -m 'update dvc.lock' 12 | # automatically add changes to image files 13 | git diff --name-only --diff-filter=dM HEAD | egrep '.*images.*\.png$' | xargs -r git add && git commit -m 'updated generated image' 14 | --------------------------------------------------------------------------------