├── .dvc
    ├── .gitignore
    └── config
├── .dvcignore
├── .gitignore
├── .pre-commit-config.yaml
├── .python-version
├── .vscode
    └── settings.json
├── Makefile
├── README.md
├── dvc.lock
├── dvc.yaml
├── generate_readme.py
├── images
    ├── y2022
    │   ├── bar_plot_w_custom_cmap.png
    │   ├── binary_outcome_variable.png
    │   ├── box_plot_w_scatter_distributions.png
    │   ├── default_plot.png
    │   ├── histogram_with_two_variables.png
    │   ├── line_plot_fill_between.png
    │   ├── meaningless_points.png
    │   ├── opinium_barchart.png
    │   ├── pandas_stacked_bars_with_values.png
    │   ├── pos_neg_split_hbar.png
    │   ├── scatter_distributions.png
    │   ├── scatter_matrix_w_kde_on_diag.png
    │   ├── scatter_w_outlined_text_insert.png
    │   ├── sns_violin_plot_custom.png
    │   ├── split_x_axis_custom_legend.png
    │   ├── stacked_bar_with_single_bars_layout.png
    │   └── uk_hexmap.png
    └── y2024
    │   └── cat_weight.png
├── mypy.ini
├── plotting_examples
    ├── __init__.py
    ├── dvc_entry.py
    ├── extract_year_name.py
    ├── rc.mplstyle
    ├── save_plot_output.py
    ├── y2022
    │   ├── __init__.py
    │   ├── bar_plot_w_custom_cmap
    │   │   ├── __init__.py
    │   │   └── plot.py
    │   ├── binary_outcome_variable
    │   │   ├── __init__.py
    │   │   ├── data.csv
    │   │   └── plot.py
    │   ├── box_plot_w_scatter_distributions
    │   │   ├── __init__.py
    │   │   ├── data.parquet
    │   │   └── plot.py
    │   ├── default_plot
    │   │   ├── __init__.py
    │   │   └── plot.py
    │   ├── histogram_with_two_variables
    │   │   ├── __init__.py
    │   │   └── plot.py
    │   ├── line_plot_fill_between
    │   │   ├── __init__.py
    │   │   ├── data.csv
    │   │   └── plot.py
    │   ├── meaningless_points
    │   │   ├── __init__.py
    │   │   └── plot.py
    │   ├── metadata.py
    │   ├── opinium_barchart
    │   │   ├── __init__.py
    │   │   ├── opinium.png
    │   │   ├── opinium_barchart_example.png
    │   │   └── plot.py
    │   ├── pandas_stacked_bars_with_values
    │   │   ├── __init__.py
    │   │   └── plot.py
    │   ├── pos_neg_split_hbar
    │   │   ├── __init__.py
    │   │   └── plot.py
    │   ├── scatter_distributions
    │   │   ├── __init__.py
    │   │   └── plot.py
    │   ├── scatter_matrix_w_kde_on_diag
    │   │   ├── __init__.py
    │   │   └── plot.py
    │   ├── scatter_w_outlined_text_insert
    │   │   ├── __init__.py
    │   │   ├── data.parquet
    │   │   └── plot.py
    │   ├── sns_violin_plot_custom
    │   │   ├── __init__.py
    │   │   ├── data.parquet
    │   │   └── plot.py
    │   ├── split_x_axis_custom_legend
    │   │   ├── __init__.py
    │   │   └── plot.py
    │   ├── stacked_bar_with_single_bars_layout
    │   │   ├── __init__.py
    │   │   ├── data
    │   │   │   └── lab.png
    │   │   └── plot.py
    │   └── uk_hexmap
    │   │   ├── __init__.py
    │   │   ├── data
    │   │       ├── gb_hex_cartogram
    │   │       │   ├── GB_Hex_Cartogram_Const.cpg
    │   │       │   ├── GB_Hex_Cartogram_Const.dbf
    │   │       │   ├── GB_Hex_Cartogram_Const.prj
    │   │       │   ├── GB_Hex_Cartogram_Const.sbn
    │   │       │   ├── GB_Hex_Cartogram_Const.sbx
    │   │       │   ├── GB_Hex_Cartogram_Const.shp
    │   │       │   ├── GB_Hex_Cartogram_Const.shp.xml
    │   │       │   ├── GB_Hex_Cartogram_Const.shx
    │   │       │   └── GB_Hex_Cartogram_Const.zip
    │   │       ├── petition_data.csv
    │   │       └── petition_data.json
    │   │   ├── plot.py
    │   │   ├── uk_hex_example.png
    │   │   └── uk_hex_example.py
    └── y2024
    │   ├── __init__.py
    │   ├── cat_weight
    │       ├── __init__.py
    │       ├── data
    │       │   ├── cat_looking_to_side.jpeg
    │       │   └── weight_data.parquet
    │       └── plot.py
    │   └── metadata.py
├── poetry.lock
├── pyproject.toml
├── setup.cfg
└── work.sh


/.dvc/.gitignore:
--------------------------------------------------------------------------------
1 | /config.local
2 | /tmp
3 | /cache
4 | 


--------------------------------------------------------------------------------
/.dvc/config:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/.dvc/config


--------------------------------------------------------------------------------
/.dvcignore:
--------------------------------------------------------------------------------
1 | # Add patterns of files dvc should ignore, which could improve
2 | # the performance. Learn more at
3 | # https://dvc.org/doc/user-guide/dvcignore
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Caches
 2 | **/.ipynb_checkpoints/*
 3 | **/__pycache__/*
 4 | *.pyc
 5 | .Rhistory
 6 | .venv/
 7 | venv/
 8 | 
 9 | # Data
10 | data/**/*
11 | !data/**/*.gitkeep
12 | 
13 | # IDE config
14 | .idea/
15 | ipython_config.py
16 | profile_default/
17 | 
18 | # Other
19 | .DS_Store
20 | config/config.py
21 | .Rproj.user
22 | .Rproj.user/
23 | *.Rproj
24 | 
25 | # VisualStudioCode
26 | .vscode/*
27 | !.vscode/settings.json
28 | !.vscode/tasks.json
29 | !.vscode/launch.json
30 | !.vscode/extensions.json
31 | 
32 | # https://www.gitignore.io/
33 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.4.0
 4 |     hooks:
 5 |     - id: end-of-file-fixer
 6 |     - id: trailing-whitespace
 7 |     - id: check-builtin-literals
 8 |     - id: check-byte-order-marker
 9 |     - id: check-case-conflict
10 |     - id: check-merge-conflict
11 |     - id: check-symlinks
12 |     - id: check-toml
13 |     - id: check-vcs-permalinks
14 |     - id: check-xml
15 |     - id: debug-statements
16 |     - id: detect-private-key
17 |     - id: mixed-line-ending
18 |     - id: fix-encoding-pragma
19 |       args: ["--remove"]
20 |     - id: check-yaml
21 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.10.2
2 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.linting.pylintEnabled": true,
 3 |     "python.linting.enabled": true,
 4 |     "python.formatting.provider": "black",
 5 |     "editor.formatOnSave": true,
 6 |     "rewrap.wrappingColumn": 87,
 7 |     "editor.rulers": [
 8 |         88
 9 |     ],
10 |     // "peacock.color": "#42b883",
11 |     "workbench.colorCustomizations": {
12 |         "activityBar.activeBackground": "#65c89b",
13 |         "activityBar.activeBorder": "#945bc4",
14 |         "activityBar.background": "#65c89b",
15 |         "activityBar.foreground": "#15202b",
16 |         "activityBar.inactiveForeground": "#15202b99",
17 |         "activityBarBadge.background": "#945bc4",
18 |         "activityBarBadge.foreground": "#e7e7e7",
19 |         "sash.hoverBorder": "#65c89b",
20 |         "statusBar.background": "#42b883",
21 |         "statusBar.foreground": "#15202b",
22 |         "statusBarItem.hoverBackground": "#359268",
23 |         "statusBarItem.remoteBackground": "#42b883",
24 |         "statusBarItem.remoteForeground": "#15202b",
25 |         "titleBar.activeBackground": "#42b883",
26 |         "titleBar.activeForeground": "#15202b",
27 |         "titleBar.inactiveBackground": "#42b88399",
28 |         "titleBar.inactiveForeground": "#15202b99"
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean requirements
 2 | .PHONY: git-stats git-log cloc clean-git
 3 | .PHONY: deploy
 4 | .PHONY: test
 5 | .PHONY: requirements
 6 | .PHONY: help
 7 | 
 8 | GIT := git
 9 | CLOC := cloc
10 | 
11 | #########
12 | # UTILS #
13 | #########
14 | 
15 | help:
16 | 	@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) | sort
17 | 
18 | clean:
19 | 	@echo "Cleaning up temporary and cache files"
20 | 	@find . -type f -name "*.pyc" -delete
21 | 	@find . -type d -name "__pycache__" -exec rm -rf {} +
22 | 	@find . -type d -name ".pytest_cache" -exec rm -rf {} +
23 | 	@find . -type d -name ".mypy_cache" -exec rm -rf {} +
24 | 	@find . -type d -name ".ipynb_checkpoints" -exec rm -rf {} +
25 | 
26 | cloc:
27 | 	@echo "Code statistics using cloc:"
28 | 	$(CLOC) --exclude-dir=venv .
29 | 
30 | ######################
31 | # WORKING ON PROJECT #
32 | ######################
33 | 
34 | pre-commit-run:
35 | 	poetry run pre-commit run --all-files
36 | 
37 | readme: ## Generate README file.
38 | 	poetry run python generate_readme.py
39 | 
40 | # This'll just run through all the plots.
41 | repro: ## run dvc repro
42 | 	poetry run dvc repro dvc.yaml
43 | 
44 | 
45 | ########
46 | # LINT #
47 | ########
48 | 
49 | mypy:
50 | 	poetry run mypy . --strict
51 | 
52 | lint: mypy ## run linting - mypy,ruff
53 | 	poetry run ruff check .
54 | 	poetry run ruff format . --check
55 | 	@$(MAKE) --no-print-directory clean
56 | 
57 | # Using this as format & lint really...
58 | format: pre-commit-run ## run formatters - pre-commit,ruff
59 | 	poetry run ruff format .
60 | 	poetry run ruff check . --fix --unsafe-fixes
61 | 	@$(MAKE) --no-print-directory clean
62 | 
63 | 
64 | ##########
65 | # POETRY #
66 | ##########
67 | 
68 | poetry.lock:
69 | 	poetry lock --no-update
70 | 
71 | install: poetry.lock
72 | 	poetry install
73 | 	@$(MAKE) --no-print-directory clean
74 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Example plots
  2 | 
  3 | Example plots, typically using matplotlib. Mainly for personal use / so I have somewhere to remind
  4 | myself of some things, but if they're of any use to anyone else then ace. Code/visuals often aren't
  5 | great as they're mainly just scratch work, often copied straight over from a notebook with little
  6 | cleanup.
  7 | 
  8 | ----
  9 | 
 10 | [comment]: # (Automate plots beneath this.)
 11 | 
 12 | # Plots
 13 | 
 14 | * [`bar_plot_w_custom_cmap`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#bar_plot_w_custom_cmap)
 15 | * [`binary_outcome_variable`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#binary_outcome_variable)
 16 | * [`box_plot_w_scatter_distributions`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#box_plot_w_scatter_distributions)
 17 | * [`cat_weight`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#cat_weight)
 18 | * [`line_plot_fill_between`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#line_plot_fill_between)
 19 | * [`meaningless_points`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#meaningless_points)
 20 | * [`opinium_barchart`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#opinium_barchart)
 21 | * [`pandas_stacked_bars_with_values`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#pandas_stacked_bars_with_values)
 22 | * [`pos_neg_split_hbar`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#pos_neg_split_hbar)
 23 | * [`scatter_distributions`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#scatter_distributions)
 24 | * [`scatter_matrix_w_kde_on_diag`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#scatter_matrix_w_kde_on_diag)
 25 | * [`scatter_w_outlined_text_insert`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#scatter_w_outlined_text_insert)
 26 | * [`split_x_axis_custom_legend`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#split_x_axis_custom_legend)
 27 | * [`stacked_bar_with_single_bars_layout`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#stacked_bar_with_single_bars_layout)
 28 | * [`uk_hexmap`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#uk_hexmap)
 29 | 
 30 | 
 31 | 
 32 | ## [`bar_plot_w_custom_cmap`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py)
 33 | 
 34 | Bar plot with custom cmap.
 35 | 
 36 | Based on this tweet: https://twitter.com/ryanburge/status/1505602885215834112 - wanted
 37 | to create something with a similar effect using mpl.
 38 | 
 39 | Example of:
 40 | 
 41 | - Different font types (using monospace font)
 42 | - using different colours for bars depending on their values (custom cmap).
 43 | - padding around the axis using rc parameters
 44 | 
 45 | ![](images/y2022/bar_plot_w_custom_cmap.png)
 46 | 
 47 | ## [`binary_outcome_variable`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/binary_outcome_variable/plot.py)
 48 | 
 49 | Plot dichotomous variable.
 50 | 
 51 | Simple dots with median lines - might be nice to add a kde to this as well.
 52 | 
 53 | The y-axis is redundant here as there are only two options (`0.6` doesn't make any
 54 | sense).
 55 | 
 56 | ![](images/y2022/binary_outcome_variable.png)
 57 | 
 58 | ## [`box_plot_w_scatter_distributions`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py)
 59 | 
 60 | Bar plot with distributions.
 61 | 
 62 | Thought I'd create a bar plot with scatter plots of the distributions adjacent to the
 63 | bars, it was based off something else but I can't remember what. Bar plots are created
 64 | from scratch using hlines etc, for no particular reason.
 65 | 
 66 | Data was from tidy tuesday.
 67 | 
 68 | ![](images/y2022/box_plot_w_scatter_distributions.png)
 69 | 
 70 | ## [`cat_weight`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2024/cat_weight/plot.py)
 71 | 
 72 | Timeseries of the cats diet.
 73 | 
 74 | Cat was getting a little chunky towards the end of 2023 so had a resolution made for
 75 | them to lose a bit of weight. Data collection is just a daily weigh, the average of
 76 | this is taken (as there are sometimes multiple entries in a day) and then plotted along
 77 | with a ten day rolling average. Most days were covered, where there are missing days
 78 | they're imputed using the average of the days either side, eg `(a, nan, b) -> (a,
 79 | (a+b)/2, b)` though this is just a plot..
 80 | 
 81 | ![](images/y2024/cat_weight.png)
 82 | 
 83 | ## [`line_plot_fill_between`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/line_plot_fill_between/plot.py)
 84 | 
 85 | Visualise time tracking, how much over/under time.
 86 | 
 87 | Mainly serves as an example of plotting with dates, and filling above / below
 88 | particular values on a plot.
 89 | 
 90 | Example of:
 91 | 
 92 | - plotting with dates
 93 | - different fonts
 94 | - filling between lines
 95 | 
 96 | ![](images/y2022/line_plot_fill_between.png)
 97 | 
 98 | ## [`meaningless_points`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/meaningless_points/plot.py)
 99 | 
100 | Some random points.
101 | 
102 | No real meaning to this - was messing about with some bokeh style bits (the effect, not
103 | the python library), so dumping here. Not sure I'm mad on the output - it's also slow
104 | as hell.
105 | 
106 | ![](images/y2022/meaningless_points.png)
107 | 
108 | ## [`opinium_barchart`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/opinium_barchart/plot.py)
109 | 
110 | Bar chart style copied from Opinium.
111 | 
112 | Saw this on twitter (i think) and thought I'd recreate it in mpl.
113 | 
114 | ![](images/y2022/opinium_barchart.png)
115 | 
116 | ## [`pandas_stacked_bars_with_values`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py)
117 | 
118 | Horizontal stacked bars, based off of pandas.
119 | 
120 | Could do these from scratch - pandas makes things a bit more straightforward though.
121 | 
122 | Example of:
123 | 
124 | - fixed formatting - setting categorical ticks at particular positions.
125 | 
126 | ![](images/y2022/pandas_stacked_bars_with_values.png)
127 | 
128 | ## [`pos_neg_split_hbar`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/pos_neg_split_hbar/plot.py)
129 | 
130 | Create split horizontal bar chart.
131 | 
132 | Split by dichotomous variable, with bar classifications.
133 | 
134 | Can be a bit messy - not sure I'm much of a fan - but wanted to re-create anyway.
135 | 
136 | ![](images/y2022/pos_neg_split_hbar.png)
137 | 
138 | ## [`scatter_distributions`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/scatter_distributions/plot.py)
139 | 
140 | Distributions of multiple variables.
141 | 
142 | For a set of variables, each with an accompanying continuous variable on the same scale,
143 | plot the distributions of the continuous variable. Might be useful to have a kde
144 | overlaid here.
145 | 
146 | Example of:
147 | 
148 | - fixed formatting
149 | - setting categorical ticks at particular positions.
150 | 
151 | ![](images/y2022/scatter_distributions.png)
152 | 
153 | ## [`scatter_matrix_w_kde_on_diag`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py)
154 | 
155 | Scatter matrix with kde instead of histogram on the diagonal.
156 | 
157 | Could probably adapt pd.scatter_matrix instead of doing it from scratch. Though with
158 | this approach the non-diagonal plots could be whatever instead of a scatter plot I
159 | guess...
160 | 
161 | Would be good to make the upper diagonals differ from the lower diagonals a bit... maybe
162 | some sort of table from pd.cut on the others or whatever.
163 | 
164 | I'd probably just use subplot_mosaic as well now - that's grown on me a lot since this.
165 | 
166 | ![](images/y2022/scatter_matrix_w_kde_on_diag.png)
167 | 
168 | ## [`scatter_w_outlined_text_insert`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py)
169 | 
170 | Scatter plot with text inserted to scatter points.
171 | 
172 | Data was taken from a tidy tuesday.
173 | 
174 | Example of:
175 | 
176 | - Outlining text elements in a plot.
177 | 
178 | ![](images/y2022/scatter_w_outlined_text_insert.png)
179 | 
180 | ## [`split_x_axis_custom_legend`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/split_x_axis_custom_legend/plot.py)
181 | 
182 | Example of creating multiple x-axis in order to plot year / months.
183 | 
184 | The fig size needs to be pretty large in order to squeeze all the month names etc in
185 | here. Generated data looks a mess on these plots.
186 | 
187 | Example of:
188 | 
189 | - Custom legend
190 | - generating random date data
191 | - multiple x-axis to display years / months
192 | 
193 | ![](images/y2022/split_x_axis_custom_legend.png)
194 | 
195 | ## [`stacked_bar_with_single_bars_layout`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py)
196 | 
197 | Layout containing two bar plots and a bivariate plot between them.
198 | 
199 | In this case it's a silly example of some data containing the social grade of
200 | Labradors, as well as the education group. The main plot is a stacked bar containing
201 | the breakdown of education group for each social grade.
202 | 
203 | Don't think I'm too keen on the code for this plot - though it's not always so clear
204 | (to me) how to make "nice" code with a lot of matplotlib stuff.
205 | 
206 | Obviously, the data is made up.
207 | 
208 | ![](images/y2022/stacked_bar_with_single_bars_layout.png)
209 | 
210 | ## [`uk_hexmap`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/uk_hexmap/plot.py)
211 | 
212 | Hex map for the UK constituencies.
213 | 
214 | Some meaningless generated data - small multiples with hex maps can be useful sometimes
215 | though. Could be good to add in the geographically accurate version as well.
216 | 
217 | ![](images/y2022/uk_hexmap.png)
218 | 


--------------------------------------------------------------------------------
/dvc.lock:
--------------------------------------------------------------------------------
  1 | schema: '2.0'
  2 | stages:
  3 |   sav_to_csv_and_json:
  4 |     cmd: python -m plotting_examples.binary_outcome_variable.plot
  5 |     deps:
  6 |     - path: plotting_examples/binary_outcome_variable/plot.py
  7 |       md5: 1427c5fcadda1f47e11a817d2f55e61e
  8 |       size: 2109
  9 |   binary_outcome:
 10 |     cmd: python -m plotting_examples.binary_outcome_variable.plot
 11 |     deps:
 12 |     - path: plotting_examples/binary_outcome_variable/plot.py
 13 |       md5: b8b284298598c316bd0c661e7705ffda
 14 |       size: 2101
 15 |     outs:
 16 |     - path:
 17 |         images/GSL_projects_plotting_examples_plotting_examples_binary_outcome_variable_plot.png
 18 |       md5: 3e15bc2bdba0ea318625e341fc600adc
 19 |       size: 68657
 20 |   histogram_with_two_variables:
 21 |     cmd: python -m plotting_examples.histogram_with_two_variables.plot
 22 |     deps:
 23 |     - path: plotting_examples/histogram_with_two_variables/plot.py
 24 |       md5: f176e507379760e333fd3d21a0a03f66
 25 |       size: 4958
 26 |     outs:
 27 |     - path:
 28 |         images/GSL_projects_plotting_examples_plotting_examples_histogram_with_two_variables_plot.png
 29 |       md5: a42aff20a7970bd5a593dbb5e4a72083
 30 |       size: 121044
 31 |   pandas_stacked_bars_with_values:
 32 |     cmd: python -m plotting_examples.pandas_stacked_bars_with_values.plot
 33 |     deps:
 34 |     - path: plotting_examples/pandas_stacked_bars_with_values/plot.py
 35 |       md5: 41f46cb867b53e3d1fa2bd9ce9a7e59b
 36 |       size: 4276
 37 |     outs:
 38 |     - path:
 39 |         images/GSL_projects_plotting_examples_plotting_examples_pandas_stacked_bars_with_values_plot.png
 40 |       md5: b444b9c40539997369b306794dd55383
 41 |       size: 109980
 42 |   scatter_distributions:
 43 |     cmd: python -m plotting_examples.scatter_distributions.plot
 44 |     deps:
 45 |     - path: plotting_examples/scatter_distributions/plot.py
 46 |       md5: ca9e0719035769cb951018cdc60cae5a
 47 |       size: 6101
 48 |     outs:
 49 |     - path:
 50 |         images/GSL_projects_plotting_examples_plotting_examples_scatter_distributions_plot.png
 51 |       md5: ff1ea112d284d6995f21d1555da17868
 52 |       size: 1048818
 53 |   split_x_axis_custom_legend:
 54 |     cmd: python -m plotting_examples.split_x_axis_custom_legend.plot
 55 |     deps:
 56 |     - path: plotting_examples/split_x_axis_custom_legend/plot.py
 57 |       md5: 4cbef0469f1542bbba40f26080fa8147
 58 |       size: 6216
 59 |     outs:
 60 |     - path:
 61 |         images/GSL_projects_plotting_examples_plotting_examples_split_x_axis_custom_legend_plot.png
 62 |       md5: e31a09751ac8a7cb68c33f573888c04c
 63 |       size: 2059481
 64 |   trump_church_votes_2020:
 65 |     cmd: python -m plotting_examples.trump_church_votes_2020.plot
 66 |     deps:
 67 |     - path: plotting_examples/trump_church_votes_2020/plot.py
 68 |       md5: df8965d04acf1363a89adc51dbf8d823
 69 |       size: 5092
 70 |     outs:
 71 |     - path:
 72 |         images/GSL_projects_plotting_examples_plotting_examples_trump_church_votes_2020_plot.png
 73 |       md5: 79f742eb79b7cbcac22518634984fefe
 74 |       size: 442400
 75 |   work_time_tracking_plot:
 76 |     cmd: python -m plotting_examples.work_time_tracking.plot
 77 |     deps:
 78 |     - path: plotting_examples/work_time_tracking/plot.py
 79 |       md5: 295be3a691582e23266edcfdf75abe9f
 80 |       size: 4618
 81 |     outs:
 82 |     - path:
 83 |         images/GSL_projects_plotting_examples_plotting_examples_work_time_tracking_plot.png
 84 |       md5: 022854ffd33d0c1052ad561d3b0a29e4
 85 |       size: 420434
 86 |   bar_plot_w_custom_cmap:
 87 |     cmd: python -m plotting_examples.2022.bar_plot_w_custom_cmap.plot
 88 |     deps:
 89 |     - path: plotting_examples/2022/bar_plot_w_custom_cmap/plot.py
 90 |       md5: 672de9f4bd9f4e7c2095b572b58e1b9e
 91 |       size: 5697
 92 |     outs:
 93 |     - path:
 94 |         images/GSL_projects_plotting_examples_plotting_examples_2022_bar_plot_w_custom_cmap_plot.png
 95 |       md5: 149eaf7e57549fb2119ef508746f653d
 96 |       size: 324973
 97 |   sns_violin_plot_custom:
 98 |     cmd: python -m plotting_examples.sns_violin_plot_custom.plot
 99 |     deps:
100 |     - path: plotting_examples/sns_violin_plot_custom/plot.py
101 |       md5: 01e55cb3dd5935e2f317fdc90dbe64a4
102 |       size: 1523
103 |     outs:
104 |     - path:
105 |         images/GSL_projects_plotting_examples_plotting_examples_sns_violin_plot_custom_plot.png
106 |       md5: 54134457d7681189e81518fc6214f4ec
107 |       size: 100869
108 |   scatter_matrix_w_kde_on_diag:
109 |     cmd: python -m plotting_examples.scatter_matrix_w_kde_on_diag.plot
110 |     deps:
111 |     - path: plotting_examples/scatter_matrix_w_kde_on_diag/plot.py
112 |       md5: 5f9b69d8a3762e617653d044b3fec13a
113 |       size: 2216
114 |     outs:
115 |     - path:
116 |         images/GSL_projects_plotting_examples_plotting_examples_scatter_matrix_w_kde_on_diag_plot.png
117 |       md5: 7a6dec845e9bb9fe0a667b8b7937ae9c
118 |       size: 504189
119 |   pos_neg_split_hbar:
120 |     cmd: python -m plotting_examples.pos_neg_split_hbar.plot
121 |     deps:
122 |     - path: plotting_examples/pos_neg_split_hbar/plot.py
123 |       md5: 882938cd457ffdec5c72e67f2235c181
124 |       size: 5017
125 |     outs:
126 |     - path:
127 |         images/GSL_projects_plotting_examples_plotting_examples_pos_neg_split_hbar_plot.png
128 |       md5: ea865e0df1293cc02d7843aa4140a387
129 |       size: 160025
130 |   default_plot:
131 |     cmd: python -m plotting_examples.default_plot.plot
132 |     deps:
133 |     - path: plotting_examples/default_plot/plot.py
134 |       md5: 7b305baa02cf195d47332f3b2586265a
135 |       size: 865
136 |     outs:
137 |     - path: images/GSL_projects_plotting_examples_plotting_examples_default_plot_plot.png
138 |       md5: 215cc189594c56704b2ab62ea6983b6d
139 |       size: 54181
140 |   2022_default_plot:
141 |     cmd: python -m plotting_examples.2022.default_plot.plot
142 |     deps:
143 |     - path: plotting_examples/2022/default_plot/plot.py
144 |       md5: 3006f0a7bbbea7a22224095255f4bb5e
145 |       size: 838
146 |     outs:
147 |     - path: images/2022/default_plot.png
148 |       md5: 63f2d738e25563eff978e01bc834e480
149 |       size: 25121
150 |   2022_binary_outcome_variable:
151 |     cmd: python -m plotting_examples.2022.binary_outcome_variable.plot
152 |     deps:
153 |     - path: plotting_examples/2022/binary_outcome_variable/plot.py
154 |       md5: 97ac99d8c9299f723f4e730fbfea90e2
155 |       size: 2182
156 |     outs:
157 |     - path: images/2022/binary_outcome_variable.png
158 |       md5: 605d409516acb29e92ba53afd5203a94
159 |       size: 36620
160 |   2022_pandas_stacked_bars_with_values:
161 |     cmd: python -m plotting_examples.2022.pandas_stacked_bars_with_values.plot
162 |     deps:
163 |     - path: plotting_examples/2022/pandas_stacked_bars_with_values/plot.py
164 |       md5: 51afea4e6b06813f61887aad963b2b5d
165 |       size: 4357
166 |     outs:
167 |     - path: images/2022/pandas_stacked_bars_with_values.png
168 |       md5: 187f7f8d78b2a7bee60517b0bca5f463
169 |       size: 57606
170 |   2022_pos_neg_split_hbar:
171 |     cmd: python -m plotting_examples.2022.pos_neg_split_hbar.plot
172 |     deps:
173 |     - path: plotting_examples/2022/pos_neg_split_hbar/plot.py
174 |       md5: 606148fb6a0601d3a2828b24a62f8614
175 |       size: 5097
176 |     outs:
177 |     - path: images/2022/pos_neg_split_hbar.png
178 |       md5: 7102b82280627073c912a37cbb5bddfe
179 |       size: 82422
180 |   2022_histogram_with_two_variables:
181 |     cmd: python -m plotting_examples.2022.histogram_with_two_variables.plot
182 |     deps:
183 |     - path: plotting_examples/2022/histogram_with_two_variables/plot.py
184 |       md5: f1a1d50c1b05b9927b749d0acaad438e
185 |       size: 5248
186 |     outs:
187 |     - path: images/2022/histogram_with_two_variables.png
188 |       md5: 5ee46a399d1635bbca0acbb38baa417d
189 |       size: 67845
190 |   2022_split_x_axis_custom_legend:
191 |     cmd: python -m plotting_examples.2022.split_x_axis_custom_legend.plot
192 |     deps:
193 |     - path: plotting_examples/2022/split_x_axis_custom_legend/plot.py
194 |       md5: d96a9320c4b7009e9f110bb891d1182d
195 |       size: 6297
196 |     outs:
197 |     - path: images/2022/split_x_axis_custom_legend.png
198 |       md5: da1c3324e4d037a49696bdb1bae75bd5
199 |       size: 1080487
200 |   2022_scatter_matrix_w_kde_on_diag:
201 |     cmd: python -m plotting_examples.2022.scatter_matrix_w_kde_on_diag.plot
202 |     deps:
203 |     - path: plotting_examples/2022/scatter_matrix_w_kde_on_diag/plot.py
204 |       md5: 5f0e7e326606d868b8c4411c288e00cb
205 |       size: 2522
206 |     outs:
207 |     - path: images/2022/scatter_matrix_w_kde_on_diag.png
208 |       md5: 14a39a1df810953354cde040c1cf10fb
209 |       size: 217990
210 |   2022_sns_violin_plot_custom:
211 |     cmd: python -m plotting_examples.2022.sns_violin_plot_custom.plot
212 |     deps:
213 |     - path: plotting_examples/2022/sns_violin_plot_custom/plot.py
214 |       md5: e5aa38595903134348a4b97e12af4b51
215 |       size: 1604
216 |     outs:
217 |     - path: images/2022/sns_violin_plot_custom.png
218 |       md5: 7a451fc7c780d5e165f7325327ded3ce
219 |       size: 56996
220 |   2022_work_time_tracking:
221 |     cmd: python -m plotting_examples.2022.work_time_tracking.plot
222 |     deps:
223 |     - path: plotting_examples/2022/work_time_tracking/plot.py
224 |       md5: 41af922980ad6d06a0813c3f6dff37af
225 |       size: 4704
226 |     outs:
227 |     - path: images/2022/work_time_tracking.png
228 |       md5: 5e468011855755d5a8238e7e8b51063c
229 |       size: 227174
230 |   2022_scatter_distributions:
231 |     cmd: python -m plotting_examples.2022.scatter_distributions.plot
232 |     deps:
233 |     - path: plotting_examples/2022/scatter_distributions/plot.py
234 |       md5: 0be18ddf32bdd5a0a5768d214d49c5c2
235 |       size: 6181
236 |     outs:
237 |     - path: images/2022/scatter_distributions.png
238 |       md5: d102a1f3534a68d11439103b85f7b4b1
239 |       size: 520358
240 |   2022_bar_plot_w_custom_cmap:
241 |     cmd: python -m plotting_examples.2022.bar_plot_w_custom_cmap.plot
242 |     deps:
243 |     - path: plotting_examples/2022/bar_plot_w_custom_cmap/plot.py
244 |       md5: d4e5771aeb11a5c78cf16bc01681b014
245 |       size: 5778
246 |     outs:
247 |     - path: images/2022/bar_plot_w_custom_cmap.png
248 |       md5: f90122b668c246a8ef62d4c8302daf68
249 |       size: 174070
250 |   2022_week42:
251 |     cmd: python -m plotting_examples.2022.week42.plot
252 |     deps:
253 |     - path: plotting_examples/2022/week42/plot.py
254 |       md5: 1abbfdf49c2abe180492a606e4760c98
255 |       size: 12120
256 |     outs:
257 |     - path: images/2022/week42.png
258 |       md5: f6bcc5fd9c521a9a8b20c8a11659004d
259 |       size: 1146056
260 |   2022_box_plot_w_scatter_distributions:
261 |     cmd: python -m plotting_examples.2022.box_plot_w_scatter_distributions.plot
262 |     deps:
263 |     - path: plotting_examples/2022/box_plot_w_scatter_distributions/plot.py
264 |       md5: 0ac3c9c54beeac3e1e5395ab8c6a04b0
265 |       size: 12148
266 |     outs:
267 |     - path: images/2022/box_plot_w_scatter_distributions.png
268 |       md5: a4db1dd69327dcf7166701d7dce61ef6
269 |       size: 478450
270 |   2022_line_plot_fill_between:
271 |     cmd: python -m plotting_examples.2022.line_plot_fill_between.plot
272 |     deps:
273 |     - path: plotting_examples/2022/line_plot_fill_between/plot.py
274 |       md5: d4de88d563946fdd04daa65fa4e46280
275 |       size: 5016
276 |     outs:
277 |     - path: images/2022/line_plot_fill_between.png
278 |       md5: 31ccdfaec1719946aaaa86fe69e01626
279 |       size: 231583
280 |   y2022_box_plot_w_scatter_distributions:
281 |     cmd: poetry run python -m plotting_examples.y2022.box_plot_w_scatter_distributions.plot
282 |     deps:
283 |     - path: plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py
284 |       hash: md5
285 |       md5: 75dc92ce6ff62d836d8d1cb15377579c
286 |       size: 11629
287 |     outs:
288 |     - path: images/y2022/box_plot_w_scatter_distributions.png
289 |       hash: md5
290 |       md5: 33dc283243d8df737f43f92f990c62ca
291 |       size: 469728
292 |   y2022_sns_violin_plot_custom:
293 |     cmd: poetry run python -m plotting_examples.y2022.sns_violin_plot_custom.plot
294 |     deps:
295 |     - path: plotting_examples/y2022/sns_violin_plot_custom/plot.py
296 |       hash: md5
297 |       md5: f1613352c0eabb1b8a7e40714b7ee8c3
298 |       size: 1975
299 |     outs:
300 |     - path: images/y2022/sns_violin_plot_custom.png
301 |       md5: 05e5eca57ef23097e878e8c603a3f22e
302 |       size: 67429
303 |   y2022_bar_plot_w_custom_cmap:
304 |     cmd: poetry run python -m plotting_examples.y2022.bar_plot_w_custom_cmap.plot
305 |     deps:
306 |     - path: plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py
307 |       hash: md5
308 |       md5: f7d8241b1ed31f2754eed7cc64442423
309 |       size: 6031
310 |     outs:
311 |     - path: images/y2022/bar_plot_w_custom_cmap.png
312 |       hash: md5
313 |       md5: 6be8b259164e4992a3e62f5255199e02
314 |       size: 174412
315 |   y2022_histogram_with_two_variables:
316 |     cmd: poetry run python -m plotting_examples.y2022.histogram_with_two_variables.plot
317 |     deps:
318 |     - path: plotting_examples/y2022/histogram_with_two_variables/plot.py
319 |       hash: md5
320 |       md5: 16ab57c6b359fdee3d75e5dabc93d4ce
321 |       size: 5849
322 |     outs:
323 |     - path: images/y2022/histogram_with_two_variables.png
324 |       md5: 2e0d94432170ec007918cead8ddb08cb
325 |       size: 58637
326 |   y2022_binary_outcome_variable:
327 |     cmd: poetry run python -m plotting_examples.y2022.binary_outcome_variable.plot
328 |     deps:
329 |     - path: plotting_examples/y2022/binary_outcome_variable/plot.py
330 |       hash: md5
331 |       md5: f8468c0554abebb6a4fc31f9ea511457
332 |       size: 2790
333 |     outs:
334 |     - path: images/y2022/binary_outcome_variable.png
335 |       hash: md5
336 |       md5: 55b0cfcab7573baa0f5363f7f516e075
337 |       size: 36069
338 |   y2022_pos_neg_split_hbar:
339 |     cmd: poetry run python -m plotting_examples.y2022.pos_neg_split_hbar.plot
340 |     deps:
341 |     - path: plotting_examples/y2022/pos_neg_split_hbar/plot.py
342 |       hash: md5
343 |       md5: 6fe3dcb9f263d6d5aa331ab522434de7
344 |       size: 5744
345 |     outs:
346 |     - path: images/y2022/pos_neg_split_hbar.png
347 |       md5: 09185fc58c132ebac7555ecf30463f81
348 |       size: 73712
349 |   y2022_scatter_matrix_w_kde_on_diag:
350 |     cmd: poetry run python -m plotting_examples.y2022.scatter_matrix_w_kde_on_diag.plot
351 |     deps:
352 |     - path: plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py
353 |       hash: md5
354 |       md5: 3b183d57dce913fb84246f4dc10569be
355 |       size: 3363
356 |     outs:
357 |     - path: images/y2022/scatter_matrix_w_kde_on_diag.png
358 |       hash: md5
359 |       md5: 9cbe0ed36d6b99d3832fe5d60b710cdd
360 |       size: 325647
361 |   y2022_line_plot_fill_between:
362 |     cmd: poetry run python -m plotting_examples.y2022.line_plot_fill_between.plot
363 |     deps:
364 |     - path: plotting_examples/y2022/line_plot_fill_between/plot.py
365 |       hash: md5
366 |       md5: e1c730d690f93eb35b930ec00454fe08
367 |       size: 5136
368 |     outs:
369 |     - path: images/y2022/line_plot_fill_between.png
370 |       md5: 9dcd62a428039331ee813626f2c53089
371 |       size: 231583
372 |   y2022_pandas_stacked_bars_with_values:
373 |     cmd: poetry run python -m plotting_examples.y2022.pandas_stacked_bars_with_values.plot
374 |     deps:
375 |     - path: plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py
376 |       hash: md5
377 |       md5: d13b08351135216105931d12ce145b3c
378 |       size: 4375
379 |     outs:
380 |     - path: images/y2022/pandas_stacked_bars_with_values.png
381 |       md5: 199cbc2e9ac00928ff31306bb0f4ab5e
382 |       size: 61398
383 |   y2022_split_x_axis_custom_legend:
384 |     cmd: poetry run python -m plotting_examples.y2022.split_x_axis_custom_legend.plot
385 |     deps:
386 |     - path: plotting_examples/y2022/split_x_axis_custom_legend/plot.py
387 |       hash: md5
388 |       md5: e45ab4f896af74c62953922a2f03d39e
389 |       size: 6806
390 |     outs:
391 |     - path: images/y2022/split_x_axis_custom_legend.png
392 |       hash: md5
393 |       md5: ad997f3edd4f6e0e3c68f3f6d51f6051
394 |       size: 1691509
395 |   y2022_scatter_distributions:
396 |     cmd: poetry run python -m plotting_examples.y2022.scatter_distributions.plot
397 |     deps:
398 |     - path: plotting_examples/y2022/scatter_distributions/plot.py
399 |       hash: md5
400 |       md5: 7d668af948c9c04578842e57d5c1e0cd
401 |       size: 7129
402 |     outs:
403 |     - path: images/y2022/scatter_distributions.png
404 |       hash: md5
405 |       md5: 78dcd94a72b9a63beb8c7f755d1aeb4a
406 |       size: 549714
407 |   y2022_default_plot:
408 |     cmd: poetry run python -m plotting_examples.y2022.default_plot.plot
409 |     deps:
410 |     - path: plotting_examples/y2022/default_plot/plot.py
411 |       hash: md5
412 |       md5: 29d823025843b30bfc00b615c5c23edb
413 |       size: 1182
414 |     outs:
415 |     - path: images/y2022/default_plot.png
416 |       md5: 1d81f2b1567c55dec832acd4ac5dca60
417 |       size: 24778
418 |   y2022_scatter_w_outlined_text_insert:
419 |     cmd: poetry run python -m plotting_examples.y2022.scatter_w_outlined_text_insert.plot
420 |     deps:
421 |     - path: plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py
422 |       hash: md5
423 |       md5: e775b5e81d6ef7ccb89058e5c78a1922
424 |       size: 4853
425 |     outs:
426 |     - path: images/y2022/scatter_w_outlined_text_insert.png
427 |       md5: 6de49552e5ea5e034d052d98943f89b9
428 |       size: 481441
429 |   y2022_opinium_barchart:
430 |     cmd: poetry run python -m plotting_examples.y2022.opinium_barchart.plot
431 |     deps:
432 |     - path: plotting_examples/y2022/opinium_barchart/plot.py
433 |       hash: md5
434 |       md5: d464e5a4363c14d09582154a6f5392c8
435 |       size: 5707
436 |     outs:
437 |     - path: images/y2022/opinium_barchart.png
438 |       md5: 0f840e7d0a449d057ff8a5d5c27eda48
439 |       size: 65128
440 |   y2022_uk_hexmap:
441 |     cmd: poetry run python -m plotting_examples.y2022.uk_hexmap.plot
442 |     deps:
443 |     - path: plotting_examples/y2022/uk_hexmap/plot.py
444 |       hash: md5
445 |       md5: 2f043cde045257f9a78e99e19989bc23
446 |       size: 3956
447 |     outs:
448 |     - path: images/y2022/uk_hexmap.png
449 |       hash: md5
450 |       md5: e7fe06a9f73be99214870928b9768d3a
451 |       size: 536769
452 |   y2022_meaningless_points:
453 |     cmd: poetry run python -m plotting_examples.y2022.meaningless_points.plot
454 |     deps:
455 |     - path: plotting_examples/y2022/meaningless_points/plot.py
456 |       hash: md5
457 |       md5: 93c3a423474a8309f9004adfcc4effda
458 |       size: 2630
459 |     outs:
460 |     - path: images/y2022/meaningless_points.png
461 |       md5: 0abf95fc710641a8564d5df871600131
462 |       size: 273061
463 |   y2022_stacked_bar_with_single_bars_layout:
464 |     cmd: poetry run python -m plotting_examples.y2022.stacked_bar_with_single_bars_layout.plot
465 |     deps:
466 |     - path: plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py
467 |       hash: md5
468 |       md5: 101d705bcfee29021b8772ece52d8bd9
469 |       size: 15024
470 |     outs:
471 |     - path: images/y2022/stacked_bar_with_single_bars_layout.png
472 |       md5: a0f95c280b1fe5edf4f2946654be8ed6
473 |       size: 229895
474 |   y2024_stacked_bar_with_single_bars_layout:
475 |     cmd: poetry run python -m plotting_examples.y2024.stacked_bar_with_single_bars_layout.plot
476 |     deps:
477 |     - path: plotting_examples/y2024/stacked_bar_with_single_bars_layout/plot.py
478 |       hash: md5
479 |       md5: 42d5d36996dcebf858d3dc20de2170e7
480 |       size: 15223
481 |     outs:
482 |     - path: images/y2024/stacked_bar_with_single_bars_layout.png
483 |       md5: a0f95c280b1fe5edf4f2946654be8ed6
484 |       size: 229895
485 |   y2024_mish_weight:
486 |     cmd: poetry run python -m plotting_examples.y2024.mish_weight.plot
487 |     deps:
488 |     - path: plotting_examples/y2024/mish_weight/plot.py
489 |       hash: md5
490 |       md5: c23fddaa7cda28671d084731a63dab0e
491 |       size: 15199
492 |     outs:
493 |     - path: images/y2024/mish_weight.png
494 |       hash: md5
495 |       md5: a0f95c280b1fe5edf4f2946654be8ed6
496 |       size: 229895
497 |   y2024_cat_weight:
498 |     cmd: poetry run python -m plotting_examples.y2024.cat_weight.plot
499 |     deps:
500 |     - path: plotting_examples/y2024/cat_weight/plot.py
501 |       hash: md5
502 |       md5: ffb3af869133e167f72b1b9e234991c1
503 |       size: 11067
504 |     outs:
505 |     - path: images/y2024/cat_weight.png
506 |       hash: md5
507 |       md5: 7ac39610e8a8e23a8582c232da7fc7c5
508 |       size: 1563392
509 | 


--------------------------------------------------------------------------------
/dvc.yaml:
--------------------------------------------------------------------------------
  1 | stages:
  2 |   y2022_bar_plot_w_custom_cmap:
  3 |     cmd: poetry run python -m plotting_examples.y2022.bar_plot_w_custom_cmap.plot
  4 |     deps:
  5 |     - plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py
  6 |     outs:
  7 |     - images/y2022/bar_plot_w_custom_cmap.png:
  8 |         cache: false
  9 |     wdir: .
 10 |   y2022_binary_outcome_variable:
 11 |     cmd: poetry run python -m plotting_examples.y2022.binary_outcome_variable.plot
 12 |     deps:
 13 |     - plotting_examples/y2022/binary_outcome_variable/plot.py
 14 |     outs:
 15 |     - images/y2022/binary_outcome_variable.png:
 16 |         cache: false
 17 |     wdir: .
 18 |   y2022_box_plot_w_scatter_distributions:
 19 |     cmd: poetry run python -m plotting_examples.y2022.box_plot_w_scatter_distributions.plot
 20 |     deps:
 21 |     - plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py
 22 |     outs:
 23 |     - images/y2022/box_plot_w_scatter_distributions.png:
 24 |         cache: false
 25 |     wdir: .
 26 |   y2022_default_plot:
 27 |     cmd: poetry run python -m plotting_examples.y2022.default_plot.plot
 28 |     deps:
 29 |     - plotting_examples/y2022/default_plot/plot.py
 30 |     outs:
 31 |     - images/y2022/default_plot.png:
 32 |         cache: false
 33 |     wdir: .
 34 |   y2022_histogram_with_two_variables:
 35 |     cmd: poetry run python -m plotting_examples.y2022.histogram_with_two_variables.plot
 36 |     deps:
 37 |     - plotting_examples/y2022/histogram_with_two_variables/plot.py
 38 |     outs:
 39 |     - images/y2022/histogram_with_two_variables.png:
 40 |         cache: false
 41 |     wdir: .
 42 |   y2022_line_plot_fill_between:
 43 |     cmd: poetry run python -m plotting_examples.y2022.line_plot_fill_between.plot
 44 |     deps:
 45 |     - plotting_examples/y2022/line_plot_fill_between/plot.py
 46 |     outs:
 47 |     - images/y2022/line_plot_fill_between.png:
 48 |         cache: false
 49 |     wdir: .
 50 |   y2022_meaningless_points:
 51 |     cmd: poetry run python -m plotting_examples.y2022.meaningless_points.plot
 52 |     deps:
 53 |     - plotting_examples/y2022/meaningless_points/plot.py
 54 |     outs:
 55 |     - images/y2022/meaningless_points.png:
 56 |         cache: false
 57 |     wdir: .
 58 |   y2022_opinium_barchart:
 59 |     cmd: poetry run python -m plotting_examples.y2022.opinium_barchart.plot
 60 |     deps:
 61 |     - plotting_examples/y2022/opinium_barchart/plot.py
 62 |     outs:
 63 |     - images/y2022/opinium_barchart.png:
 64 |         cache: false
 65 |     wdir: .
 66 |   y2022_pandas_stacked_bars_with_values:
 67 |     cmd: poetry run python -m plotting_examples.y2022.pandas_stacked_bars_with_values.plot
 68 |     deps:
 69 |     - plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py
 70 |     outs:
 71 |     - images/y2022/pandas_stacked_bars_with_values.png:
 72 |         cache: false
 73 |     wdir: .
 74 |   y2022_pos_neg_split_hbar:
 75 |     cmd: poetry run python -m plotting_examples.y2022.pos_neg_split_hbar.plot
 76 |     deps:
 77 |     - plotting_examples/y2022/pos_neg_split_hbar/plot.py
 78 |     outs:
 79 |     - images/y2022/pos_neg_split_hbar.png:
 80 |         cache: false
 81 |     wdir: .
 82 |   y2022_scatter_distributions:
 83 |     cmd: poetry run python -m plotting_examples.y2022.scatter_distributions.plot
 84 |     deps:
 85 |     - plotting_examples/y2022/scatter_distributions/plot.py
 86 |     outs:
 87 |     - images/y2022/scatter_distributions.png:
 88 |         cache: false
 89 |     wdir: .
 90 |   y2022_scatter_matrix_w_kde_on_diag:
 91 |     cmd: poetry run python -m plotting_examples.y2022.scatter_matrix_w_kde_on_diag.plot
 92 |     deps:
 93 |     - plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py
 94 |     outs:
 95 |     - images/y2022/scatter_matrix_w_kde_on_diag.png:
 96 |         cache: false
 97 |     wdir: .
 98 |   y2022_scatter_w_outlined_text_insert:
 99 |     cmd: poetry run python -m plotting_examples.y2022.scatter_w_outlined_text_insert.plot
100 |     deps:
101 |     - plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py
102 |     outs:
103 |     - images/y2022/scatter_w_outlined_text_insert.png:
104 |         cache: false
105 |     wdir: .
106 |   y2022_sns_violin_plot_custom:
107 |     cmd: poetry run python -m plotting_examples.y2022.sns_violin_plot_custom.plot
108 |     deps:
109 |     - plotting_examples/y2022/sns_violin_plot_custom/plot.py
110 |     outs:
111 |     - images/y2022/sns_violin_plot_custom.png:
112 |         cache: false
113 |     wdir: .
114 |   y2022_split_x_axis_custom_legend:
115 |     cmd: poetry run python -m plotting_examples.y2022.split_x_axis_custom_legend.plot
116 |     deps:
117 |     - plotting_examples/y2022/split_x_axis_custom_legend/plot.py
118 |     outs:
119 |     - images/y2022/split_x_axis_custom_legend.png:
120 |         cache: false
121 |     wdir: .
122 |   y2022_stacked_bar_with_single_bars_layout:
123 |     cmd: poetry run python -m plotting_examples.y2022.stacked_bar_with_single_bars_layout.plot
124 |     deps:
125 |     - plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py
126 |     outs:
127 |     - images/y2022/stacked_bar_with_single_bars_layout.png:
128 |         cache: false
129 |     wdir: .
130 |   y2022_uk_hexmap:
131 |     cmd: poetry run python -m plotting_examples.y2022.uk_hexmap.plot
132 |     deps:
133 |     - plotting_examples/y2022/uk_hexmap/plot.py
134 |     outs:
135 |     - images/y2022/uk_hexmap.png:
136 |         cache: false
137 |     wdir: .
138 |   y2024_cat_weight:
139 |     cmd: poetry run python -m plotting_examples.y2024.cat_weight.plot
140 |     deps:
141 |     - plotting_examples/y2024/cat_weight/plot.py
142 |     outs:
143 |     - images/y2024/cat_weight.png:
144 |         cache: false
145 |     wdir: .
146 | 


--------------------------------------------------------------------------------
/generate_readme.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generate plots at the end of the README.
  3 | 
  4 | Bit of a hack - but works for now, this is mainly just to display all the created plots
  5 | in the README.
  6 | """
  7 | 
  8 | from __future__ import annotations
  9 | 
 10 | import ast
 11 | import re
 12 | from pathlib import Path
 13 | 
 14 | from PIL import Image
 15 | 
 16 | CODE = (
 17 |     "https://github.com/geo7/plotting_examples/blob/main/plotting_examples/{}/plot.py"
 18 | )
 19 | 
 20 | 
 21 | def resize_image_if_needed(
 22 |     *,
 23 |     im: str,
 24 | ) -> None:
 25 |     """
 26 |     Resize image to requred aspect ratio if needed.
 27 | 
 28 |     Given FIGSIZE (width, height) check to see if the aspect ratio (where
 29 |     aspect ratio = height/width) of the image file `im` matches that of
 30 |     FIGSIZE. If not then the image is resized to the correct dimensions in
 31 |     place, so the original is lost with this.
 32 | 
 33 |     Args:
 34 |     ----
 35 |         im (str):
 36 |             Path to image file.
 37 |         FIGSIZE (tuple):
 38 |             Typically `figsize` tuple from `plt.subplots(figsize = FIGSIZE)`.
 39 | 
 40 |     """
 41 |     image = Image.open(im)
 42 |     width, height = image.size
 43 | 
 44 |     m = 500
 45 |     if height > m:
 46 |         scale = m / height
 47 |         new_height = int(height * scale)
 48 |         new_width = int(width * scale)
 49 |         new_image = image.resize((new_width, new_height))
 50 |         new_image.save(im)
 51 | 
 52 | 
 53 | EXCLUDE_PLOTS = [
 54 |     # This is just the template for starting a new plot off.
 55 |     "default_plot",
 56 |     # Got bored of seeing this one.
 57 |     "sns_violin_plot_custom",
 58 |     # This was was annoying as well - it's an example of creating a histogram from
 59 |     # scratch with patches which eh.
 60 |     "histogram_with_two_variables",
 61 | ]
 62 | 
 63 | 
 64 | def docstring_from_py_module(*, mod_path: str | Path) -> str:
 65 |     """
 66 |     Docstrings in plot.py contain context about the plot.
 67 | 
 68 |     These are then used in the README.
 69 |     """
 70 |     # with open(mod_path, encoding="utf8") as fh:
 71 |     with Path(mod_path).open() as fh:
 72 |         code_txt = fh.read()
 73 |     mod = ast.parse(code_txt)
 74 |     docstr = ast.get_docstring(mod)
 75 | 
 76 |     if docstr == "":
 77 |         msg = f"No docstring found for : {mod_path}"
 78 |         raise ValueError(msg)
 79 | 
 80 |     if docstr is None:
 81 |         msg = "Do not expect docstring to be None."
 82 |         raise ValueError(msg)
 83 | 
 84 |     return docstr
 85 | 
 86 | 
 87 | def main() -> int:
 88 |     """Generate readme with plots and docstring extracts."""
 89 |     year = "y2022"
 90 | 
 91 |     years = [
 92 |         # This should get the years up to 2099... If I'm still using matplotlib
 93 |         # at that point I'll consider that a success.... or maybe a failure,
 94 |         # not sure.
 95 |         Path(x.name).stem
 96 |         for x in sorted(Path("./plotting_examples").glob("*"))
 97 |         if "y20" in str(x)
 98 |     ]
 99 | 
100 |     readme_data = {}
101 | 
102 |     for year in years:
103 |         # Will have to update this when there's a different year I guess but
104 |         # for now meh.
105 |         images = sorted(Path(f"./images/{year}").glob("*"))
106 | 
107 |         # For each image want to build up a dictionary of the image path within
108 |         # the repo, and the docstring from the respective python module. Then
109 |         # in the README the python docstring will be added alongside the image.
110 |         for img in images:
111 |             dir_from_img_path = Path(img.name).stem
112 | 
113 |             code_path = (
114 |                 Path("./plotting_examples") / str(year) / dir_from_img_path / "plot.py"
115 |             )
116 | 
117 |             if "DS_Store" in str(code_path):
118 |                 continue
119 | 
120 |             # Not sure why this _wouldn't_ exist
121 |             if not img.exists():
122 |                 raise ValueError
123 | 
124 |             docstr = docstring_from_py_module(mod_path=code_path)
125 | 
126 |             readme_data[dir_from_img_path] = {
127 |                 "img_path": img,
128 |                 "doc_str": docstr,
129 |             }
130 | 
131 |     # Might as well sort the generated plots.
132 |     readme_data = {
133 |         x: readme_data[x]
134 |         for x in sorted(readme_data)
135 |         if not any(exclude in x for exclude in EXCLUDE_PLOTS)
136 |     }
137 | 
138 |     # Create values to append to readme.
139 |     readme_update = "\n\n# Plots\n\n"
140 | 
141 |     # Create some bullet points with the plot names
142 |     for title in readme_data:
143 |         readme_update += f"* [`{title}`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#{title})\n"
144 | 
145 |     readme_update += "\n"
146 | 
147 |     for title, data in readme_data.items():
148 |         year = re.findall(r".*(y\d{4}).*", str(data["img_path"]))[0]
149 |         readme_update += "\n\n"
150 |         url_path = f"{year}/{title}"
151 |         readme_update += f"## [`{title}`]({CODE.format(url_path)})\n\n"
152 |         readme_update += str(data["doc_str"])
153 |         readme_update += "\n\n"
154 |         md_img_format = f"![]({data['img_path']})"
155 |         readme_update += md_img_format
156 | 
157 |     # Update README
158 | 
159 |     # This is used to signal where automated content starts.
160 |     rm_split = "[comment]: # (Automate plots beneath this.)"
161 |     with Path("README.md").open() as rm:
162 |         rm_txt = rm.read()
163 | 
164 |     rm_txt = rm_txt.split(rm_split)[0]
165 |     rm_txt = rm_txt + rm_split + readme_update
166 |     # Ensure new line at eof
167 |     rm_txt += "\n"
168 | 
169 |     with Path("README.md").open("w") as file:
170 |         file.write(rm_txt)
171 | 
172 |     return 0
173 | 
174 | 
175 | if __name__ == "__main__":
176 |     raise SystemExit(main())
177 | 


--------------------------------------------------------------------------------
/images/y2022/bar_plot_w_custom_cmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/bar_plot_w_custom_cmap.png


--------------------------------------------------------------------------------
/images/y2022/binary_outcome_variable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/binary_outcome_variable.png


--------------------------------------------------------------------------------
/images/y2022/box_plot_w_scatter_distributions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/box_plot_w_scatter_distributions.png


--------------------------------------------------------------------------------
/images/y2022/default_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/default_plot.png


--------------------------------------------------------------------------------
/images/y2022/histogram_with_two_variables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/histogram_with_two_variables.png


--------------------------------------------------------------------------------
/images/y2022/line_plot_fill_between.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/line_plot_fill_between.png


--------------------------------------------------------------------------------
/images/y2022/meaningless_points.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/meaningless_points.png


--------------------------------------------------------------------------------
/images/y2022/opinium_barchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/opinium_barchart.png


--------------------------------------------------------------------------------
/images/y2022/pandas_stacked_bars_with_values.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/pandas_stacked_bars_with_values.png


--------------------------------------------------------------------------------
/images/y2022/pos_neg_split_hbar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/pos_neg_split_hbar.png


--------------------------------------------------------------------------------
/images/y2022/scatter_distributions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/scatter_distributions.png


--------------------------------------------------------------------------------
/images/y2022/scatter_matrix_w_kde_on_diag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/scatter_matrix_w_kde_on_diag.png


--------------------------------------------------------------------------------
/images/y2022/scatter_w_outlined_text_insert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/scatter_w_outlined_text_insert.png


--------------------------------------------------------------------------------
/images/y2022/sns_violin_plot_custom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/sns_violin_plot_custom.png


--------------------------------------------------------------------------------
/images/y2022/split_x_axis_custom_legend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/split_x_axis_custom_legend.png


--------------------------------------------------------------------------------
/images/y2022/stacked_bar_with_single_bars_layout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/stacked_bar_with_single_bars_layout.png


--------------------------------------------------------------------------------
/images/y2022/uk_hexmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/uk_hexmap.png


--------------------------------------------------------------------------------
/images/y2024/cat_weight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2024/cat_weight.png


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | 
 3 | [mypy-numpy.*]
 4 | ignore_missing_imports = True
 5 | 
 6 | [mypy-pandas.*]
 7 | ignore_missing_imports = True
 8 | 
 9 | [mypy-geopandas.*]
10 | ignore_missing_imports = True
11 | 
12 | [mypy-matplotlib.*]
13 | ignore_missing_imports = True
14 | 
15 | [mypy-seaborn.*]
16 | ignore_missing_imports = True
17 | 
18 | [mypy-PIL.*]
19 | ignore_missing_imports = True
20 | 
21 | [mypy-yaml.*]
22 | ignore_missing_imports = True
23 | 


--------------------------------------------------------------------------------
/plotting_examples/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Plotting examples of various kinds.
3 | 
4 | Some of these might be developed into little projects if they're interesting
5 | enough, main idea it that they're examples which can be leveraged for other
6 | work though.
7 | """
8 | 


--------------------------------------------------------------------------------
/plotting_examples/dvc_entry.py:
--------------------------------------------------------------------------------
 1 | """Create an entry in the dvc.yaml file for the particular plot."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | import pathlib
 6 | 
 7 | import yaml
 8 | 
 9 | from plotting_examples.extract_year_name import extract_year_name_from_plot_py
10 | 
11 | 
12 | def add_to_dvc(*, path: pathlib.Path) -> None:
13 |     """Add stages to dvc.yaml based on given path."""
14 |     year, name = extract_year_name_from_plot_py(file=str(path))
15 | 
16 |     dvc = yaml.safe_load(pathlib.Path("dvc.yaml").read_text(encoding="utf8"))
17 | 
18 |     stage_name = f"{year}_{name}"
19 |     if stage_name not in dvc["stages"]:
20 |         # Project not yet added to dvc.yaml
21 |         dvc["stages"][stage_name] = {
22 |             "wdir": ".",
23 |             "cmd": f"poetry run python -m plotting_examples.{year}.{name}.plot",
24 |             "deps": [f"plotting_examples/{year}/{name}/plot.py"],
25 |             "outs": [{f"images/{year}/{name}.png": {"cache": False}}],
26 |         }
27 | 
28 |         with pathlib.Path("dvc.yaml").open("w") as file:
29 |             file.write(yaml.dump(dvc))
30 | 


--------------------------------------------------------------------------------
/plotting_examples/extract_year_name.py:
--------------------------------------------------------------------------------
 1 | """Get year, name from path to plotting file."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | 
 7 | 
 8 | def extract_year_name_from_plot_py(*, file: str) -> tuple[str, str]:
 9 |     """
10 |     Given a path such as.
11 | 
12 |     >>> /home/.../plotting_examples/plotting_examples/y2022/default_plot/plot.py
13 | 
14 |     Return:
15 |     ------
16 |     >>> 2022, default_plot
17 | 
18 |     """
19 |     pth = Path(file)
20 |     if pth.suffix != ".py":
21 |         msg = "Expect this to be run on .py files."
22 |         raise ValueError(msg)
23 |     year, name = (
24 |         str(pth)
25 |         .rsplit("plotting_examples/plotting_examples/", maxsplit=1)[-1]
26 |         .rsplit("/", maxsplit=1)[0]
27 |         .split("/")
28 |     )
29 |     return year, name
30 | 


--------------------------------------------------------------------------------
/plotting_examples/save_plot_output.py:
--------------------------------------------------------------------------------
 1 | """Util for saving output from plots."""
 2 | 
 3 | from __future__ import annotations
 4 | 
 5 | from pathlib import Path
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | from plotting_examples.extract_year_name import extract_year_name_from_plot_py
 9 | 
10 | if TYPE_CHECKING:
11 |     import matplotlib as mpl
12 | 
13 | 
14 | def save_plot(
15 |     *,
16 |     fig: mpl.figure.Figure,
17 |     file: str,
18 |     dpi: int = 150,
19 | ) -> None:
20 |     """Util for saving plot to images dir."""
21 |     year, name = extract_year_name_from_plot_py(file=file)
22 | 
23 |     year_dir = Path("./images") / year
24 |     # If the dir doesn't exist we need to make it...
25 |     if not year_dir.exists():
26 |         year_dir.mkdir(exist_ok=False, parents=False)
27 | 
28 |     png_pth = year_dir / (name + ".png")
29 |     fig.savefig(png_pth, dpi=dpi)
30 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/__init__.py:
--------------------------------------------------------------------------------
1 | """Plots from 2022."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/bar_plot_w_custom_cmap/__init__.py:
--------------------------------------------------------------------------------
1 | """Bar plot with custom color map."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Bar plot with custom cmap.
  4 | 
  5 | Based on this tweet: https://twitter.com/ryanburge/status/1505602885215834112 - wanted
  6 | to create something with a similar effect using mpl.
  7 | 
  8 | Example of:
  9 | 
 10 | - Different font types (using monospace font)
 11 | - using different colours for bars depending on their values (custom cmap).
 12 | - padding around the axis using rc parameters
 13 | """
 14 | 
 15 | from __future__ import annotations
 16 | 
 17 | import pathlib
 18 | 
 19 | import matplotlib as mpl
 20 | import matplotlib.pyplot as plt
 21 | import matplotlib.ticker as plticker
 22 | import numpy as np
 23 | import pandas as pd
 24 | 
 25 | from plotting_examples import dvc_entry, save_plot_output
 26 | from plotting_examples.y2022 import metadata
 27 | 
 28 | np_rnd = np.random.Generator(np.random.MT19937(0))
 29 | 
 30 | 
 31 | def generate_data() -> pd.DataFrame:
 32 |     """Create sample data."""
 33 |     n = 1_000
 34 |     return pd.DataFrame(
 35 |         {
 36 |             "race": np_rnd.choice(
 37 |                 ["White", "Black", "Hispanic", "Asian", "All Others"],
 38 |                 size=n,
 39 |             ),
 40 |             "church_attendance": np_rnd.choice(
 41 |                 ["Never", "Seldom", "Yearly", "Monthly", "Weekly", "Weekly+"],
 42 |                 size=n,
 43 |                 p=[
 44 |                     0.1,
 45 |                     0.1,
 46 |                     0.1,
 47 |                     0.15,
 48 |                     0.25,
 49 |                     0.3,
 50 |                 ],
 51 |             ),
 52 |         },
 53 |     ).sort_values("race")
 54 | 
 55 | 
 56 | def main() -> mpl.figure.Figure:
 57 |     """Main."""
 58 |     data = generate_data()
 59 | 
 60 |     ordering = [
 61 |         "Never",
 62 |         "Seldom",
 63 |         "Yearly",
 64 |         "Monthly",
 65 |         "Weekly",
 66 |         "Weekly+",
 67 |     ]
 68 | 
 69 |     loc = plticker.MultipleLocator(
 70 |         base=20.0,
 71 |     )  # this locator puts ticks at regular intervals
 72 | 
 73 |     with plt.rc_context(
 74 |         {
 75 |             "xtick.major.pad": 20,
 76 |             "font.family": "monospace",
 77 |         },
 78 |     ):
 79 |         fig, axis = plt.subplots(
 80 |             figsize=(30, 12),
 81 |             ncols=3,
 82 |             nrows=2,
 83 |             sharey=True,
 84 |             constrained_layout=False,
 85 |         )
 86 |         fig.tight_layout(h_pad=10, w_pad=10)
 87 | 
 88 |         axis = axis.flatten()
 89 | 
 90 |         # Style plots.
 91 |         for ax in axis:
 92 |             ax.grid(alpha=0.2, zorder=0)
 93 |             for x in ["top", "right", "left", "bottom"]:
 94 |                 ax.spines[x].set_visible(False)
 95 |             ax.tick_params(axis="both", which="both", length=0, labelsize=18)
 96 | 
 97 |         fig.suptitle(
 98 |             "The Relationship Between Church Attendence and a Republican Vote by Race",
 99 |             fontsize=30,
100 |             y=1.1,
101 |             x=0.0,
102 |             horizontalalignment="left",
103 |         )
104 |         # needs mpl version >= 3.4
105 |         fig.supylabel(
106 |             "Vote for Trump in 2020",
107 |             fontsize=25,
108 |             x=-0.02,
109 |         )
110 | 
111 |         axis = iter(axis)
112 | 
113 |         for g, dfg in data.groupby("race"):
114 |             color_map = mpl.colormaps["cool"].resampled(100)
115 | 
116 |             ax = next(axis)
117 |             ax.yaxis.set_major_locator(loc)
118 |             group_bar_values_unordered = (
119 |                 dfg["church_attendance"].value_counts().to_dict()
120 |             )
121 |             group_bar_values = {x: group_bar_values_unordered[x] for x in ordering}
122 | 
123 |             barplot = ax.bar(
124 |                 x=list(group_bar_values.keys()),
125 |                 height=list(group_bar_values.values()),
126 |                 zorder=3,
127 |             )
128 |             ax.set_title(g, fontsize=25, y=1.0)
129 |             ax.set_ylim(bottom=0, top=90)
130 |             ax.set_yticks([], minor=True)
131 | 
132 |             def fmt(x: float, _pos: int) -> str:
133 |                 # Not _too_ sure what this is about - think it's just what
134 |                 # set_major_formatter applies? It passes two arguments though - the
135 |                 # tick value (x) and the position (pos)...
136 |                 return f"{int(x)}"
137 | 
138 |             ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(fmt))
139 | 
140 |             for bar in barplot:
141 |                 # Set the bar color by bar height.
142 |                 bar.set_color(color_map(bar.get_height()))
143 |                 ax.text(
144 |                     x=bar.get_x() + 0.5 * (bar.get_width()),
145 |                     y=bar.get_y() + 2.5,
146 |                     s=f"{bar.get_height()}%",
147 |                     fontsize=20,
148 |                     ha="center",
149 |                 )
150 |                 ax.vlines(
151 |                     x=bar.get_x() + 0.5 * (bar.get_width()),
152 |                     ymin=bar.get_height() - 5,
153 |                     ymax=bar.get_height() + 5,
154 |                     linewidth=4,
155 |                     zorder=5,
156 |                     color="#404040",
157 |                 )
158 |                 ax.hlines(
159 |                     y=bar.get_height() - 5,
160 |                     xmin=(bar.get_x() + 0.5 * (bar.get_width())) - 0.1,
161 |                     xmax=(bar.get_x() + 0.5 * (bar.get_width())) + 0.1,
162 |                     zorder=5,
163 |                     linewidth=4,
164 |                     color="#404040",
165 |                 )
166 |                 ax.hlines(
167 |                     y=bar.get_height() + 5,
168 |                     xmin=(bar.get_x() + 0.5 * (bar.get_width())) - 0.1,
169 |                     xmax=(bar.get_x() + 0.5 * (bar.get_width())) + 0.1,
170 |                     zorder=5,
171 |                     linewidth=4,
172 |                     color="#404040",
173 |                 )
174 | 
175 |             ax.tick_params(axis="y", colors="grey")
176 |             ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
177 | 
178 |         # Just format the final plot - it's blank - to just get rid of all plot params
179 |         # here. If there was more than one would need to handle a bit differently here.
180 |         ax = next(axis)
181 |         ax.grid(alpha=0)
182 |         ax.set_xticks([])
183 |         for x in ["top", "right", "left", "bottom"]:
184 |             ax.spines[x].set_visible(False)
185 | 
186 |         ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
187 | 
188 |     fig.set_tight_layout(True)  # type: ignore[attr-defined]
189 |     fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
190 |     return fig
191 | 
192 | 
193 | if __name__ == "__main__":
194 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
195 |     save_plot_output.save_plot(fig=main(), file=__file__)
196 |     raise SystemExit
197 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/binary_outcome_variable/__init__.py:
--------------------------------------------------------------------------------
1 | """Binary outcome plot."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/binary_outcome_variable/data.csv:
--------------------------------------------------------------------------------
  1 | ,x,y
  2 | 0,1.2747902159889748,1.0
  3 | 1,0.10622657239729572,1.0
  4 | 2,1.3243067956082084,0.0
  5 | 3,2.264920571643043,1.0
  6 | 4,1.525740164619661,1.0
  7 | 5,1.0213314975629986,0.0
  8 | 6,-2.0868721399805117,0.0
  9 | 7,3.087253476354297,1.0
 10 | 8,1.6930945436063571,0.0
 11 | 9,1.177730197367664,0.0
 12 | 10,-0.2951726334842677,1.0
 13 | 11,1.3939955116958194,0.0
 14 | 12,2.710887234482275,1.0
 15 | 13,2.288566627212669,1.0
 16 | 14,1.0063834913479486,1.0
 17 | 15,0.40506607468755984,0.0
 18 | 16,1.8317978988489252,1.0
 19 | 17,-0.09961820872700582,0.0
 20 | 18,0.8546664554219493,0.0
 21 | 19,2.59869944125502,1.0
 22 | 20,3.6930255818250184,1.0
 23 | 21,3.2351943407464074,1.0
 24 | 22,2.7795466504470148,1.0
 25 | 23,-1.5141989332951926,1.0
 26 | 24,-1.3342798893336982,1.0
 27 | 25,1.3894518047828301,0.0
 28 | 26,4.124574072040192,1.0
 29 | 27,1.5275761412076085,1.0
 30 | 28,3.0920328512031645,1.0
 31 | 29,-0.6103591162672523,0.0
 32 | 30,1.5711361222591333,1.0
 33 | 31,0.3365630598553707,0.0
 34 | 32,1.7740841144550268,1.0
 35 | 33,-0.4240492991525555,0.0
 36 | 34,1.9765103446881005,1.0
 37 | 35,0.6875731648825243,0.0
 38 | 36,0.48075160343621204,1.0
 39 | 37,1.7917034287025366,1.0
 40 | 38,0.29649576518401316,0.0
 41 | 39,-0.20568700623329317,0.0
 42 | 40,2.2529655667205586,1.0
 43 | 41,-0.6138512476336486,0.0
 44 | 42,1.6662716460969933,0.0
 45 | 43,5.607955686672451,1.0
 46 | 44,-1.7178950568217535,0.0
 47 | 45,-1.5541952161418995,0.0
 48 | 46,0.2149985878325868,0.0
 49 | 47,2.2745722013953555,1.0
 50 | 48,3.6688248016156075,1.0
 51 | 49,2.888755806801737,1.0
 52 | 50,3.892428495587999,1.0
 53 | 51,1.8273090056796961,0.0
 54 | 52,2.1998739159436442,1.0
 55 | 53,2.1568554817879733,1.0
 56 | 54,3.348012018831957,1.0
 57 | 55,2.4655510026032945,1.0
 58 | 56,-0.5665483983904225,0.0
 59 | 57,-0.23387727182304746,0.0
 60 | 58,2.7533905386624946,1.0
 61 | 59,2.454163346936323,1.0
 62 | 60,1.9106009445188148,1.0
 63 | 61,-0.562556123742248,0.0
 64 | 62,2.4476866391258056,1.0
 65 | 63,-0.4973108111772468,0.0
 66 | 64,0.7416802995622478,0.0
 67 | 65,3.1769629339911583,1.0
 68 | 66,-0.29643406638083747,0.0
 69 | 67,-0.019125121483606716,0.0
 70 | 68,0.543765521085082,0.0
 71 | 69,3.2595168518283213,1.0
 72 | 70,1.5714071630266757,1.0
 73 | 71,-0.17876891111483648,0.0
 74 | 72,-0.26221130572986856,0.0
 75 | 73,-1.3534058345926328,0.0
 76 | 74,0.9989509035674422,1.0
 77 | 75,-2.2716152798398235,0.0
 78 | 76,-0.8269491136353684,0.0
 79 | 77,-0.7879429469570461,1.0
 80 | 78,0.467851762636354,0.0
 81 | 79,-1.4089215315339054,0.0
 82 | 80,0.8116493521291177,0.0
 83 | 81,-0.1801822831261996,0.0
 84 | 82,2.6892138693830745,1.0
 85 | 83,1.2602001853572777,1.0
 86 | 84,3.7686379048258347,1.0
 87 | 85,1.2021253923467357,1.0
 88 | 86,0.03204825943738432,1.0
 89 | 87,-1.5613869496540094,0.0
 90 | 88,3.5978005775339157,1.0
 91 | 89,0.2904121858658958,1.0
 92 | 90,0.18389993640789415,1.0
 93 | 91,-1.844690371525563,0.0
 94 | 92,0.6033888587207837,0.0
 95 | 93,4.471774864677858,1.0
 96 | 94,1.9050254474289685,1.0
 97 | 95,-0.41139165393189336,1.0
 98 | 96,1.7644022575097518,1.0
 99 | 97,0.685763582724977,0.0
100 | 98,-0.905286178158965,0.0
101 | 99,3.775046959012724,1.0
102 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/binary_outcome_variable/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Plot dichotomous variable.
  4 | 
  5 | Simple dots with median lines - might be nice to add a kde to this as well.
  6 | 
  7 | The y-axis is redundant here as there are only two options (`0.6` doesn't make any
  8 | sense).
  9 | """
 10 | 
 11 | from __future__ import annotations
 12 | 
 13 | import pathlib
 14 | 
 15 | import matplotlib as mpl
 16 | import matplotlib.pyplot as plt
 17 | import numpy as np
 18 | import pandas as pd
 19 | 
 20 | from plotting_examples import dvc_entry, save_plot_output
 21 | from plotting_examples.y2022 import metadata
 22 | 
 23 | np_rnd = np.random.Generator(np.random.MT19937(1))
 24 | 
 25 | 
 26 | def make_data() -> pd.DataFrame:
 27 |     """Generate some sample data for testing with."""
 28 |     n = 1_00
 29 |     y = np_rnd.choice([0, 1], n)
 30 |     x = np_rnd.normal(0, 1, n) + np_rnd.normal(2, 1, n) * y
 31 |     return pd.DataFrame(np.array([x, y]).T, columns=["x", "y"])
 32 | 
 33 | 
 34 | def binary_outcome_plot(
 35 |     data: pd.DataFrame,
 36 |     x_var: str = "x",
 37 |     y: str = "y",
 38 |     fig: mpl.figure.Figure | None = None,
 39 | ) -> mpl.figure.Figure:
 40 |     """
 41 |     Create plot of continuous var by binary outcome.
 42 | 
 43 |     This is just pulled straight from a notebook so is pretty loose. Could improve the
 44 |     typing of this function, as well as it's name, and the use of mpl objects within
 45 |     it.
 46 |     """
 47 |     # if ax is None:
 48 |     fig, ax = plt.subplots(figsize=(20, 3))
 49 | 
 50 |     colors = {
 51 |         0: metadata.color.PINK_COLOUR,
 52 |         1: metadata.color.DEEPER_GREEN,
 53 |     }
 54 |     for g_, dfg in data.groupby([y]):
 55 |         if len(g_) != 1:
 56 |             msg = "Expect these to all be single?"
 57 |             raise ValueError(msg, g_)
 58 |         g = g_[0]
 59 |         ax.scatter(
 60 |             x=dfg[x_var],
 61 |             y=dfg[y],
 62 |             color=colors[g],
 63 |         )
 64 | 
 65 |         med = dfg[x_var].median()
 66 |         ax.scatter(
 67 |             x=med,
 68 |             y=g,
 69 |             s=90,
 70 |             color=colors[g],
 71 |         )
 72 |         ax.vlines(
 73 |             x=med,
 74 |             ymin=min(g, 0.5),
 75 |             ymax=max(g, 0.5),
 76 |             color=colors[g],
 77 |         )
 78 | 
 79 |         ax.text(
 80 |             x=med + 0.5,
 81 |             y=abs(g - 0.15),
 82 |             s=f"Median {g} : {round(med,2)}",
 83 |             fontsize=15,
 84 |         )
 85 |     ax.set_title(
 86 |         f"{x_var} x {y}",
 87 |         fontsize=20,
 88 |     )
 89 |     ax.grid(alpha=0.2)
 90 | 
 91 |     fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
 92 |     ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
 93 |     return fig
 94 | 
 95 | 
 96 | def main() -> mpl.figure.Figure:
 97 |     """Plot."""
 98 |     with plt.rc_context(
 99 |         {
100 |             "xtick.major.pad": 10,
101 |             "font.family": "monospace",
102 |         },
103 |     ):
104 |         fig = binary_outcome_plot(data=make_data())
105 |         fig.set_tight_layout(True)  # type: ignore[attr-defined]
106 | 
107 |     return fig
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
112 |     save_plot_output.save_plot(fig=main(), file=__file__)
113 |     raise SystemExit
114 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/box_plot_w_scatter_distributions/__init__.py:
--------------------------------------------------------------------------------
1 | """Box plot with scatter dist."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/box_plot_w_scatter_distributions/data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/box_plot_w_scatter_distributions/data.parquet


--------------------------------------------------------------------------------
/plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code,too-many-locals
  2 | """
  3 | Bar plot with distributions.
  4 | 
  5 | Thought I'd create a bar plot with scatter plots of the distributions adjacent to the
  6 | bars, it was based off something else but I can't remember what. Bar plots are created
  7 | from scratch using hlines etc, for no particular reason.
  8 | 
  9 | Data was from tidy tuesday.
 10 | """
 11 | 
 12 | from __future__ import annotations
 13 | 
 14 | import pathlib
 15 | from typing import TypeVar
 16 | 
 17 | import attr
 18 | import matplotlib as mpl
 19 | import matplotlib.pyplot as plt
 20 | import numpy as np
 21 | import pandas as pd
 22 | 
 23 | from plotting_examples import dvc_entry, save_plot_output
 24 | from plotting_examples.y2022 import metadata
 25 | 
 26 | np_rnd = np.random.Generator(np.random.MT19937(1))
 27 | 
 28 | T = TypeVar("T")
 29 | 
 30 | WEEK = "week42"
 31 | 
 32 | DATA_URL = (
 33 |     "https://raw.githubusercontent.com/rfordatascience/tidytuesday/"
 34 |     "master/data/2021/2021-10-19/pumpkins.csv"
 35 | )
 36 | 
 37 | BACKGROUND_COLOUR = "#f2f2f2"
 38 | # https://mycolor.space/?hex=%23FF69B4&sub=1
 39 | PINK_COLOUR = "#ff69b4"
 40 | LABEL_FONTSIZE = 12
 41 | 
 42 | 
 43 | def clean_comma(df: pd.DataFrame, *, column: str) -> pd.DataFrame:
 44 |     """Replace commas in series with empty strings."""
 45 |     df = df.copy()
 46 |     row_mask = df[column].astype(str).str.contains(",")
 47 |     df.loc[row_mask, column] = (
 48 |         df.loc[row_mask, column].str.replace(",", "").astype(float)
 49 |     )
 50 |     return df
 51 | 
 52 | 
 53 | def drop_rows_by_match_on_column(
 54 |     df: pd.DataFrame,
 55 |     *,
 56 |     column: str,
 57 |     regexp: str,
 58 | ) -> pd.DataFrame:
 59 |     """Drop rows based on regex on a particular column."""
 60 |     df = df.copy()
 61 |     return df.loc[~df[column].astype(str).str.contains(regexp, regex=True)]
 62 | 
 63 | 
 64 | def top_n_groups(
 65 |     df: pd.DataFrame,
 66 |     *,
 67 |     column: str,
 68 |     n: int,
 69 |     rename: str = "Other",
 70 | ) -> pd.DataFrame:
 71 |     """Get top n groups for a given column, re-write rest to other."""
 72 |     df = df.copy()
 73 |     top_n = df[column].value_counts(dropna=False).head(n).index
 74 |     df.loc[~df[column].isin(top_n), column] = rename
 75 |     return df
 76 | 
 77 | 
 78 | def clean(
 79 |     *,
 80 |     df: pd.DataFrame,
 81 | ) -> pd.DataFrame:
 82 |     """Initial cleaning for all columns."""
 83 |     df = df.copy()
 84 |     return (
 85 |         df.pipe(
 86 |             drop_rows_by_match_on_column,
 87 |             column="country",
 88 |             regexp=".*Entries.*",
 89 |         )
 90 |         .pipe(clean_comma, column="weight_lbs")
 91 |         .assign(weight_lbs=lambda x: x["weight_lbs"].astype(float))
 92 |         .pipe(clean_comma, column="est_weight")
 93 |         .assign(est_weight=lambda x: x["est_weight"].astype(float))
 94 |         .assign(ott=lambda x: x["ott"].astype(float))
 95 |         .assign(pct_chart=lambda x: x["pct_chart"].astype(float))
 96 |     )
 97 | 
 98 | 
 99 | @attr.frozen(kw_only=True)
100 | class PlotData:
101 |     # pylint: disable=too-few-public-methods
102 | 
103 |     """Data for use in both box and scatter plotting."""
104 | 
105 |     box: pd.DataFrame
106 |     scatter: list[float]
107 | 
108 | 
109 | def plot_data_for_weight_by_country(df: pd.DataFrame) -> pd.DataFrame:
110 |     """Generate plot data."""
111 |     df = df.copy()
112 |     df = top_n_groups(df=df, column="country", n=9, rename="Other")
113 |     df = df[["country", "weight_lbs"]]
114 |     df = pd.concat(
115 |         [
116 |             df[["country", "weight_lbs"]],
117 |             df[["country", "weight_lbs"]].assign(country="All Countries"),
118 |         ],
119 |         axis=0,
120 |     )
121 |     # we want to order the countries by the median of the weights for each group.
122 |     sorting = list(
123 |         df.groupby("country")["weight_lbs"].median().sort_values().index,
124 |     )
125 |     df = df.iloc[pd.Categorical(df["country"], sorting).argsort()]
126 | 
127 |     country_data = {}
128 |     for g, dfg in df.groupby("country"):
129 |         country_data[g] = PlotData(
130 |             box=dfg.describe(),
131 |             scatter=dfg["weight_lbs"].to_list(),
132 |         )
133 |     return country_data
134 | 
135 | 
136 | # --------------------------------------------------------------------------------------
137 | 
138 | # PLOTTING METHODS
139 | 
140 | 
141 | def top_bottom_whisker_y_values(*, values: list[float]) -> tuple[float, float]:
142 |     """Get top/bottom for boxplot whiskers."""
143 |     series = pd.Series(values)
144 |     quant_1 = series.describe().get("25%")
145 |     quant_3 = series.describe().get("75%")
146 |     iqr = quant_3 - quant_1
147 |     top_range = quant_3 + 1.5 * (iqr)
148 |     bottom_range = quant_3 - 1.5 * iqr
149 |     # top of the boxplot
150 |     box_plot_top = series[series.lt(top_range)].max()
151 |     # bottom of the box_plot
152 |     box_plot_bottom = series[series.gt(bottom_range)].min()
153 |     return box_plot_bottom, box_plot_top
154 | 
155 | 
156 | def boxp_hline(
157 |     *,
158 |     ax: plt.Axes,  # type: ignore[name-defined]
159 |     x_center: float,
160 |     y_value: float,
161 |     box_width: float,
162 |     linewidth: float,
163 |     box_colour: str,
164 | ) -> None:
165 |     """Plot top/bottom of box."""
166 |     ax.hlines(
167 |         y=y_value,
168 |         xmin=x_center - box_width * 0.5,
169 |         xmax=x_center + box_width * 0.5,
170 |         linewidth=linewidth,
171 |         color=box_colour,
172 |         zorder=3,
173 |         capstyle="round",
174 |     )
175 | 
176 | 
177 | def boxp_vline(
178 |     ax: plt.Axes,  # type: ignore[name-defined]
179 |     x: float,
180 |     ymin: float,
181 |     ymax: float,
182 |     color: str,
183 |     linewidth: float,
184 | ) -> None:
185 |     """Plot sides of box."""
186 |     ax.vlines(
187 |         x=x,
188 |         ymin=ymin,
189 |         ymax=ymax,
190 |         color=color,
191 |         linewidth=linewidth,
192 |         zorder=3,
193 |         capstyle="round",
194 |     )
195 | 
196 | 
197 | def whisker_tops(
198 |     *,
199 |     ax: plt.Axes,  # type: ignore[name-defined]
200 |     whisker_top: float,
201 |     whisker_bottom: float,
202 |     xmin: float,
203 |     xmax: float,
204 |     color: str,
205 | ) -> None:
206 |     """Plot tops of the whiskers."""
207 |     ax.hlines(
208 |         y=whisker_top,
209 |         xmin=xmin,
210 |         xmax=xmax,
211 |         color=color,
212 |         zorder=1,
213 |     )
214 |     ax.hlines(
215 |         y=whisker_bottom,
216 |         xmin=xmin,
217 |         xmax=xmax,
218 |         color=color,
219 |         zorder=1,
220 |     )
221 | 
222 | 
223 | def make_single_box(
224 |     *,
225 |     ax: plt.Axes,  # type: ignore[name-defined]
226 |     values: list[float],
227 |     x_center: float,
228 |     scatter_color: str,
229 |     linewidth: float = 5,
230 |     box_width: float = 0.14,
231 |     box_colour: str = "#000000",
232 |     whisker_color: str = "#000000",
233 |     median_colour: str = "#000000",
234 |     outlier_colour: str = "#000000",
235 | ) -> None:
236 |     """Add boxplot to given axis."""
237 |     plotting_data = pd.Series(values).describe().to_dict()
238 | 
239 |     # ----------------------------------------------------------------------------------
240 |     # create the box - there's not _really_ any reason for this other than being
241 |     # curious at the time about creating a boxplot from scratch... it'd be a better
242 |     # idea i think to just create a rectangle instead.
243 |     boxp_hline(
244 |         ax=ax,
245 |         x_center=x_center,
246 |         y_value=plotting_data["25%"],
247 |         box_width=box_width,
248 |         linewidth=linewidth,
249 |         box_colour=box_colour,
250 |     )
251 |     boxp_hline(
252 |         ax=ax,
253 |         x_center=x_center,
254 |         y_value=plotting_data["75%"],
255 |         box_width=box_width,
256 |         linewidth=linewidth,
257 |         box_colour=box_colour,
258 |     )
259 |     boxp_vline(
260 |         ax=ax,
261 |         x=x_center + box_width * 0.5,
262 |         ymin=plotting_data["25%"],
263 |         ymax=plotting_data["75%"],
264 |         color=box_colour,
265 |         linewidth=linewidth,
266 |     )
267 |     boxp_vline(
268 |         ax=ax,
269 |         x=x_center - box_width * 0.5,
270 |         ymin=plotting_data["25%"],
271 |         ymax=plotting_data["75%"],
272 |         color=box_colour,
273 |         linewidth=linewidth,
274 |     )
275 | 
276 |     # ----------------------------------------------------------------------------------
277 |     # create the median line
278 | 
279 |     ax.hlines(
280 |         y=plotting_data["50%"],
281 |         xmin=x_center - box_width * 0.5,
282 |         xmax=x_center + box_width * 0.5,
283 |         color=median_colour,
284 |         zorder=1,
285 |         linewidth=linewidth,
286 |     )
287 | 
288 |     # ----------------------------------------------------------------------------------
289 |     # create top/bottom of whiskers
290 |     whisker_bottom, whisker_top = top_bottom_whisker_y_values(values=values)
291 | 
292 |     # ----------------------------------------------------------------------------------
293 |     # plot vertial whisker lines
294 | 
295 |     # create vertical lines
296 |     ax.vlines(
297 |         x=x_center,
298 |         ymin=plotting_data["75%"],
299 |         ymax=whisker_top,
300 |         color=whisker_color,
301 |         capstyle="round",
302 |     )
303 |     ax.vlines(
304 |         x=x_center,
305 |         ymin=plotting_data["25%"],
306 |         ymax=whisker_bottom,
307 |         color=whisker_color,
308 |         capstyle="round",
309 |     )
310 | 
311 |     # ----------------------------------------------------------------------------------
312 |     # plot the outliers
313 | 
314 |     # plot outliers
315 |     series = pd.Series(values)
316 |     outliers = series[series.lt(whisker_bottom) | series.gt(whisker_top)]
317 |     ax.scatter(
318 |         x=[x_center for _ in outliers],
319 |         y=list(outliers),
320 |         color=outlier_colour,
321 |         s=5,
322 |         alpha=0.8,
323 |         edgecolors=None,
324 |     )
325 | 
326 |     # ----------------------------------------------------------------------------------
327 |     # plot the scatter of values
328 | 
329 |     x_values = np_rnd.normal(
330 |         loc=x_center + 0.2,
331 |         scale=0.03,
332 |         size=len(values),
333 |     )
334 |     ax.scatter(
335 |         x=x_values,
336 |         y=values,
337 |         alpha=0.1,
338 |         s=10,
339 |         zorder=-1,
340 |         color=scatter_color,
341 |         edgecolors=None,
342 |     )
343 | 
344 | 
345 | def example(*, df: pd.DataFrame) -> mpl.figure.Figure:
346 |     """Generate example plot."""
347 |     country_data = plot_data_for_weight_by_country(df=df)
348 |     fig, ax = plt.subplots(figsize=(20, 8))
349 | 
350 |     country_metadata: dict[str, dict[str, str]] = {
351 |         "France": {},
352 |         "Japan": {},
353 |         "Canada": {},
354 |         "Germany": {},
355 |         "United Kingdom": {},
356 |         "Italy": {},
357 |         "United States": {},
358 |         "Austria": {},
359 |         "Belgium": {},
360 |         "Other": {"scatter_color": "#919191"},
361 |         "All Countries": {"scatter_color": "#919191"},
362 |     }
363 |     xpos = 1.0
364 |     xpos_inc = 0.5
365 | 
366 |     for country in country_metadata:
367 |         data = country_data[country]
368 |         color = "#919191" if country in ["Other", "All Countries"] else PINK_COLOUR
369 |         make_single_box(
370 |             ax=ax,
371 |             values=data.scatter,
372 |             x_center=xpos,
373 |             linewidth=1.5,
374 |             scatter_color=color,
375 |             outlier_colour="#000000",
376 |         )
377 |         xpos += xpos_inc
378 | 
379 |     # ----------------------------------------------------------------------------------
380 |     # format tick labels
381 | 
382 |     ax.set_xticks(np.arange(1, xpos, xpos_inc))
383 |     ax.set_xticklabels(list(country_metadata.keys()))
384 | 
385 |     # ----------------------------------------------------------------------------------
386 |     # plot formatting / spines / background.
387 | 
388 |     ax.tick_params(axis="both", which="both", length=0)
389 |     ax.spines["top"].set_visible(False)
390 |     ax.spines["right"].set_visible(False)
391 |     ax.spines["left"].set_visible(False)
392 |     ax.spines["bottom"].set_visible(False)
393 | 
394 |     ax.grid(alpha=0.15, axis="y", zorder=0)
395 | 
396 |     fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
397 |     ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
398 | 
399 |     # ----------------------------------------------------------------------------------
400 |     # titles and axis labels
401 |     ax.set_title(
402 |         "This is something about pumpkin competitions or something like that.",
403 |         color="#919191",
404 |         fontsize=LABEL_FONTSIZE,
405 |     )
406 |     fig.suptitle(
407 |         "Data Visualization of Competitive Pumpkin Sport 2013-2021",
408 |         fontsize=20,
409 |     )
410 |     ax.set_ylabel("Weight lbs", fontsize=LABEL_FONTSIZE)
411 |     ax.yaxis.set_label_coords(-0.05, 0.5)
412 |     return fig
413 | 
414 | 
415 | def main() -> mpl.figure.Figure:
416 |     """Main."""
417 |     df = pd.read_parquet(pathlib.Path(__file__).parent / "data.parquet")
418 |     df = clean(df=df)
419 | 
420 |     with plt.rc_context(
421 |         {
422 |             "xtick.major.pad": 10,
423 |             "font.family": "monospace",
424 |         },
425 |     ):
426 |         return example(df=df)
427 | 
428 | 
429 | if __name__ == "__main__":
430 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
431 |     save_plot_output.save_plot(fig=main(), file=__file__)
432 |     raise SystemExit
433 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/default_plot/__init__.py:
--------------------------------------------------------------------------------
1 | """Default plot."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/default_plot/plot.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=duplicate-code
 2 | """Default for plotting example - just to base others off."""
 3 | 
 4 | from __future__ import annotations
 5 | 
 6 | import pathlib
 7 | 
 8 | import matplotlib as mpl
 9 | import matplotlib.pyplot as plt
10 | import pandas as pd
11 | 
12 | from plotting_examples import dvc_entry, save_plot_output
13 | from plotting_examples.y2022 import metadata
14 | 
15 | 
16 | def get_sample_data() -> pd.DataFrame:
17 |     """Sample data."""
18 |     return pd.DataFrame(
19 |         {
20 |             "x": [1, 2, 3, 4, 5],
21 |             "y": [1, 2, 2, 3, 8],
22 |         },
23 |     )
24 | 
25 | 
26 | def main() -> mpl.figure.Figure:
27 |     """Main."""
28 |     with plt.rc_context(
29 |         {
30 |             "xtick.major.pad": 10,
31 |             "font.family": "monospace",
32 |         },
33 |     ):
34 |         fig, ax = plt.subplots(
35 |             figsize=(10, 10),
36 |         )
37 |         df = get_sample_data()
38 | 
39 |         ax.scatter(x=df["x"], y=df["y"])
40 |         ax.set_title("Default plotting.")
41 | 
42 |         fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
43 |         ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
44 |     return fig
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
49 |     save_plot_output.save_plot(fig=main(), file=__file__)
50 |     raise SystemExit
51 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/histogram_with_two_variables/__init__.py:
--------------------------------------------------------------------------------
1 | """Histogram with overlap."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/histogram_with_two_variables/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Histogram created from scratch using matplotlib.
  4 | 
  5 | There are custom bar's created for each bin, instead of using ax.bar, I think it was
  6 | originally based on something but i can't find the original / reference now so am just
  7 | left with this.
  8 | 
  9 | The result is pretty rubbish :)
 10 | """
 11 | 
 12 | from __future__ import annotations
 13 | 
 14 | import pathlib
 15 | 
 16 | import matplotlib as mpl
 17 | import matplotlib.pyplot as plt
 18 | import numpy as np
 19 | import pandas as pd
 20 | from matplotlib import patches, ticker
 21 | 
 22 | from plotting_examples import dvc_entry, save_plot_output
 23 | from plotting_examples.y2022 import metadata
 24 | 
 25 | 
 26 | def gen_data() -> tuple[pd.DataFrame, dict[str, str]]:
 27 |     """
 28 |     Generate sample data for plotting.
 29 | 
 30 |     Return data as:
 31 | 
 32 |     >>>     male  female  row_min  row_max    color  pain_scale
 33 |     >>> 0   6.8     0.8      0.8      6.8  #9A7AA0           1
 34 |     >>> 1  10.7     1.0      1.0     10.7  #9A7AA0           2
 35 |     >>> 2  14.8     4.3      4.3     14.8  #9A7AA0           3
 36 |     >>> 3  18.9    10.5     10.5     18.9  #9A7AA0           4
 37 |     >>> 4  19.3    14.0     14.0     19.3  #9A7AA0           5
 38 |     >>> 5  16.9    19.9     16.9     19.9  #B4EDD2           6
 39 |     >>> 6   6.8    16.6      6.8     16.6  #B4EDD2           7
 40 |     >>> 7   3.9    16.2      3.9     16.2  #B4EDD2           8
 41 |     >>> 8   1.3     9.3      1.3      9.3  #B4EDD2           9
 42 |     >>> 9   0.6     7.4      0.6      7.4  #B4EDD2          10
 43 | 
 44 |     """
 45 |     rng = np.random.default_rng(1)
 46 |     n = 1_000
 47 |     df = pd.DataFrame(
 48 |         {
 49 |             "male": np.digitize(
 50 |                 np.clip(rng.normal(loc=4, scale=2, size=n), 0, 10),
 51 |                 range(10),
 52 |             ),
 53 |             "female": np.digitize(
 54 |                 np.clip(rng.normal(loc=6, scale=2, size=n), 0, 10),
 55 |                 range(10),
 56 |             ),
 57 |         },
 58 |     )
 59 |     # https://coolors.co/b4edd2-a0cfd3-8d94ba-9a7aa0-87677b
 60 |     colour_map = {
 61 |         "male": metadata.color.PINK_COLOUR,
 62 |         "female": metadata.color.LIGHT_GREEN,
 63 |     }
 64 | 
 65 |     # https://coolors.co/b4edd2-a0cfd3-8d94ba-9a7aa0-87677b
 66 |     plot_data = (
 67 |         df.apply(lambda x: x.value_counts(normalize=True).mul(100))
 68 |         .assign(
 69 |             row_min=lambda df: df.apply(lambda dt: min(dt.to_list()), axis=1),
 70 |             row_max=lambda df: df.apply(lambda dt: max(dt.to_list()), axis=1),
 71 |             # want to use this to determine colours
 72 |             color=lambda df: df.idxmax(axis=1).map(colour_map),
 73 |             pain_scale=lambda df: df.index,
 74 |         )
 75 |         .reset_index(drop=True)
 76 |     )
 77 |     return plot_data, colour_map
 78 | 
 79 | 
 80 | def main() -> mpl.figure.Figure:
 81 |     """Create plot."""
 82 |     plot_data, colour_map = gen_data()
 83 | 
 84 |     plt.style.use("./plotting_examples/rc.mplstyle")
 85 | 
 86 |     with plt.rc_context(
 87 |         {
 88 |             "xtick.major.pad": 10,
 89 |             "font.family": "monospace",
 90 |         },
 91 |     ):
 92 |         fig, ax = plt.subplots(figsize=(15, 5))
 93 | 
 94 |         # ensure that axis area covers data.
 95 |         ax.set_xlim(left=0, right=11)
 96 |         ax.set_ylim(
 97 |             bottom=0,
 98 |             top=plot_data["row_max"].max() + 5,
 99 |         )
100 | 
101 |         def add_bar(
102 |             ax: plt.Axes,  # type: ignore[name-defined]
103 |             x: int,
104 |             y1: float,
105 |             y2: float,
106 |             facecolor: str,
107 |             alpha: float,
108 |             outline: bool,
109 |         ) -> None:
110 |             """Add a bar to the given ax object."""
111 |             width = 1
112 |             rect = patches.Rectangle(
113 |                 xy=(x - 0.5 * width, y1),
114 |                 width=width,
115 |                 height=y2,
116 |                 linewidth=1,
117 |                 edgecolor="none",
118 |                 facecolor=facecolor,
119 |                 alpha=alpha,
120 |             )
121 |             ax.add_patch(rect)
122 |             if outline:
123 |                 ax.hlines(
124 |                     y=y2,
125 |                     xmin=x - 0.5 * width,
126 |                     xmax=x + 0.5 * width,
127 |                 )
128 | 
129 |         for row in plot_data.itertuples():
130 |             # plot the diffs
131 |             add_bar(
132 |                 ax=ax,
133 |                 x=row.pain_scale,
134 |                 y1=row.row_min,
135 |                 y2=(row.row_max - row.row_min),
136 |                 facecolor=row.color,
137 |                 alpha=0.8,
138 |                 outline=False,
139 |             )
140 |             # plot beneath the diffs
141 |             add_bar(
142 |                 ax=ax,
143 |                 x=row.pain_scale,
144 |                 y1=0,
145 |                 y2=row.row_min,
146 |                 facecolor=metadata.color.GREY,
147 |                 alpha=0.2,
148 |                 outline=False,
149 |             )
150 | 
151 |         ax.spines["right"].set_visible(False)
152 |         ax.spines["top"].set_visible(False)
153 | 
154 |         label_fontsize = 15
155 |         ax.set_ylabel("Percentage of respondents", fontsize=label_fontsize)
156 |         ax.set_xlabel(
157 |             "Some scale (1 least, 10 greatest)",
158 |             fontsize=label_fontsize,
159 |         )
160 |         ax.set_title(
161 |             "Reporting of something for male, female respondents",
162 |             fontsize=20,
163 |         )
164 | 
165 |         legend_elements = [
166 |             patches.Patch(
167 |                 facecolor=colour_map["male"],
168 |                 edgecolor="none",
169 |                 label="male",
170 |             ),
171 |             patches.Patch(
172 |                 facecolor=colour_map["female"],
173 |                 edgecolor="none",
174 |                 label="female",
175 |             ),
176 |         ]
177 |         ax.legend(
178 |             handles=legend_elements,
179 |             frameon=False,
180 |             fontsize=15,
181 |         )
182 | 
183 |         ax.yaxis.set_major_formatter(ticker.FormatStrFormatter("%d%%"))
184 |         ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
185 | 
186 |         fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
187 |         ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
188 | 
189 |     return fig
190 | 
191 | 
192 | if __name__ == "__main__":
193 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
194 |     save_plot_output.save_plot(fig=main(), file=__file__)
195 |     raise SystemExit
196 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/line_plot_fill_between/__init__.py:
--------------------------------------------------------------------------------
1 | """Line plot with fill."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/line_plot_fill_between/data.csv:
--------------------------------------------------------------------------------
 1 | year,month,day,amount
 2 | 2022,1,10,70
 3 | 2022,1,11,-15
 4 | 2022,1,11,30
 5 | 2022,1,11,-40
 6 | 2022,1,11,30
 7 | 2022,1,13,-35
 8 | 2022,1,14,-20
 9 | 2022,1,14,15
10 | 2022,1,17,-10
11 | 2022,1,17,-5
12 | 2022,1,18,-15
13 | 2022,1,18,-25
14 | 2022,1,18,15
15 | 2022,1,19,-10
16 | 2022,1,19,60
17 | 2022,1,20,-20
18 | 2022,1,20,-30
19 | 2022,1,21,-15
20 | 2022,1,21,30
21 | 2022,1,25,-10
22 | 2022,1,26,-10
23 | 2022,1,26,10
24 | 2022,1,27,25
25 | 2022,1,27,80
26 | 2022,1,28,-15
27 | 2022,1,28,-15
28 | 2022,1,28,-20
29 | 2022,1,31,-10
30 | 2022,2,1,-15
31 | 2022,2,3,-10
32 | 2022,2,4,10
33 | 2022,2,7,-10
34 | 2022,2,7,-50
35 | 2022,2,7,20
36 | 2022,2,8,-5
37 | 2022,2,8,-45
38 | 2022,2,8,45
39 | 2022,2,9,-30
40 | 2022,2,10,50
41 | 2022,2,10,-40
42 | 2022,2,10,30
43 | 2022,2,11,-5
44 | 2022,2,11,-10
45 | 2022,2,14,10
46 | 2022,2,14,20
47 | 2022,2,15,-25
48 | 2022,2,15,90
49 | 2022,2,25,-15
50 | 2022,3,1,-15
51 | 2022,3,1,-70
52 | 2022,3,1,30
53 | 2022,3,3,-5
54 | 2022,3,4,45
55 | 2022,3,4,-65
56 | 2022,3,8,10
57 | 2022,3,8,15
58 | 2022,3,9,10
59 | 2022,3,10,30
60 | 2022,3,10,20
61 | 2022,3,11,10
62 | 2022,3,14,30
63 | 2022,3,15,-30
64 | 2022,3,15,-30
65 | 2022,3,16,-15
66 | 2022,3,16,15
67 | 2022,3,16,50
68 | 2022,3,17,-30
69 | 2022,3,17,-40
70 | 2022,3,17,30
71 | 2022,3,18,60
72 | 2022,3,18,60
73 | 2022,3,18,-50
74 | 2022,3,21,-15
75 | 2022,3,22,30
76 | 2022,3,22,-10
77 | 2022,3,23,-40
78 | 2022,3,24,160
79 | 2022,3,25,15
80 | 2022,3,25,120
81 | 2022,4,12,-15
82 | 2022,4,13,50
83 | 2022,4,14,-20
84 | 2022,4,14,300
85 | 2022,4,15,400
86 | 2022,4,16,480
87 | 2022,4,19,100
88 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/line_plot_fill_between/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Visualise time tracking, how much over/under time.
  4 | 
  5 | Mainly serves as an example of plotting with dates, and filling above / below
  6 | particular values on a plot.
  7 | 
  8 | Example of:
  9 | 
 10 | - plotting with dates
 11 | - different fonts
 12 | - filling between lines
 13 | """
 14 | 
 15 | from __future__ import annotations
 16 | 
 17 | import pathlib
 18 | 
 19 | import matplotlib as mpl
 20 | import matplotlib.dates as mdates
 21 | import matplotlib.pyplot as plt
 22 | import pandas as pd
 23 | 
 24 | from plotting_examples import dvc_entry, save_plot_output
 25 | from plotting_examples.y2022 import metadata
 26 | 
 27 | PINK_COLOUR = "#ff69b4"
 28 | 
 29 | 
 30 | def main() -> mpl.figure.Figure:
 31 |     """Main."""
 32 |     with plt.rc_context(
 33 |         {
 34 |             "xtick.major.pad": 10,
 35 |             "font.family": "monospace",
 36 |         },
 37 |     ):
 38 |         fig, ax = plt.subplots(
 39 |             figsize=(15, 5),
 40 |             ncols=1,
 41 |             nrows=1,
 42 |             sharey=True,
 43 |             constrained_layout=False,
 44 |         )
 45 | 
 46 |         df = (
 47 |             pd.read_csv(
 48 |                 "./plotting_examples/y2022/line_plot_fill_between/data.csv",
 49 |             )
 50 |             .rename(columns=lambda x: x.lower().replace(" ", "_").strip())
 51 |             .assign(y=lambda df: df["amount"])
 52 |         )
 53 | 
 54 |         # Create date column from separate columns in sheet data.
 55 |         df["date"] = pd.to_datetime(
 56 |             df["day"].astype(str)
 57 |             + "/"
 58 |             + df["month"].astype(str)
 59 |             + "/"
 60 |             + df["year"].astype(str),
 61 |             format="%d/%m/%Y",
 62 |         )
 63 | 
 64 |         # Interested in the cumulative sum either way.
 65 |         df["y_cumsum"] = df["y"].cumsum()
 66 | 
 67 |         # For creating the plot title.
 68 |         date_min = df["date"].min().date().strftime("%d/%m/%Y")
 69 |         date_max = df["date"].max().date().strftime("%d/%m/%Y")
 70 | 
 71 |         # highlight break.
 72 |         up_to_break = df["month"].le(3) & df["day"].le(28)
 73 |         past_break = df["month"].ge(4) & df["day"].ge(11)
 74 | 
 75 |         fig, ax = plt.subplots(figsize=(25, 15))
 76 | 
 77 |         # before break
 78 |         ax.plot(
 79 |             df.loc[up_to_break, "date"],
 80 |             df.loc[up_to_break, "y_cumsum"],
 81 |             color="black",
 82 |             linewidth=2,
 83 |         )
 84 |         # after break
 85 |         ax.plot(
 86 |             df.loc[past_break, "date"],
 87 |             df.loc[past_break, "y_cumsum"],
 88 |             color="black",
 89 |             linewidth=2,
 90 |         )
 91 | 
 92 |         # Put black points on values which were over 60.
 93 |         ax.scatter(
 94 |             x=df.loc[df["y"].gt(60), "date"],
 95 |             y=df.loc[df["y"].gt(60), "y_cumsum"],
 96 |             s=100,
 97 |             color="black",
 98 |             zorder=3,
 99 |         )
100 | 
101 |         ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))  # type: ignore[no-untyped-call]
102 |         ax.grid(alpha=0.15)
103 | 
104 |         # labels
105 |         ax.set_ylabel(
106 |             "Units over/under",
107 |             fontsize=15,
108 |         )
109 |         ax.set_title(
110 |             f"Information about something useful, from {date_min} to {date_max}",
111 |             fontsize=25,
112 |         )
113 | 
114 |         # Text
115 |         ax.text(
116 |             x=df["date"].to_list()[2],
117 |             y=1500,
118 |             s=(
119 |                 "Shows information about something for some time which was interesting."
120 |                 " \nBlack points indicate something of particular note."
121 |             ),
122 |             fontsize=25,
123 |         )
124 | 
125 |         # Color beneath plot based on whether it's over or under 0.
126 |         # Before holiday.
127 |         ax.fill_between(
128 |             df.loc[up_to_break, "date"],
129 |             0,
130 |             df.loc[up_to_break, "y_cumsum"],
131 |             alpha=0.5,
132 |             color=metadata.color.PINK_COLOUR,
133 |             where=df.loc[up_to_break, "y_cumsum"] >= 0,
134 |         )
135 |         ax.fill_between(
136 |             df.loc[up_to_break, "date"],
137 |             0,
138 |             df.loc[up_to_break, "y_cumsum"],
139 |             alpha=0.5,
140 |             color=metadata.color.GREY,
141 |             where=df.loc[up_to_break, "y_cumsum"] <= 0,
142 |         )
143 | 
144 |         # Past holiday
145 |         ax.fill_between(
146 |             df.loc[past_break, "date"],
147 |             0,
148 |             df.loc[past_break, "y_cumsum"],
149 |             alpha=0.5,
150 |             color=metadata.color.PINK_COLOUR,
151 |             where=df.loc[past_break, "y_cumsum"] >= 0,
152 |         )
153 |         ax.fill_between(
154 |             df.loc[past_break, "date"],
155 |             0,
156 |             df.loc[past_break, "y_cumsum"],
157 |             alpha=0.5,
158 |             color=metadata.color.GREY,
159 |             where=df.loc[past_break, "y_cumsum"] <= 0,
160 |         )
161 | 
162 |         # Format default axis to just show the month/day.
163 |         ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))  # type: ignore[no-untyped-call]
164 |         ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m"))  # type: ignore[no-untyped-call]
165 | 
166 |         for label in ax.get_xticklabels():
167 |             label.set_rotation(80)
168 |             label.set_ha("center")
169 | 
170 |     fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
171 |     ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
172 | 
173 |     fig.set_tight_layout(True)  # type: ignore[attr-defined]
174 |     return fig
175 | 
176 | 
177 | if __name__ == "__main__":
178 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
179 |     save_plot_output.save_plot(fig=main(), file=__file__)
180 |     raise SystemExit
181 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/meaningless_points/__init__.py:
--------------------------------------------------------------------------------
1 | """Random viz."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/meaningless_points/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Some random points.
  4 | 
  5 | No real meaning to this - was messing about with some bokeh style bits (the effect, not
  6 | the python library), so dumping here. Not sure I'm mad on the output - it's also slow
  7 | as hell.
  8 | """
  9 | 
 10 | from __future__ import annotations
 11 | 
 12 | import itertools
 13 | import pathlib
 14 | 
 15 | import matplotlib as mpl
 16 | import matplotlib.pyplot as plt
 17 | import numpy as np
 18 | 
 19 | from plotting_examples import dvc_entry, save_plot_output
 20 | from plotting_examples.y2022 import metadata
 21 | 
 22 | np_rnd = np.random.Generator(np.random.MT19937())
 23 | 
 24 | 
 25 | def main() -> mpl.figure.Figure:
 26 |     """Main."""
 27 |     fig, ax = plt.subplots(figsize=(10, 10))
 28 |     ax.set_facecolor("black")
 29 | 
 30 |     def make_point(
 31 |         *,
 32 |         x: float,
 33 |         y: float,
 34 |         con_min: int = 10,
 35 |         con_max: int = 10_000,
 36 |         num_cont: int = 20,
 37 |         alpha_mult: float = 0.2,
 38 |         color: str = "black",
 39 |     ) -> None:
 40 |         concentric = np.flip(np.linspace(con_min, con_max, num=num_cont))
 41 |         alphas = np.flip(concentric / con_max) * alpha_mult
 42 |         for con, alph in zip(concentric, alphas):
 43 |             ax.scatter(
 44 |                 x=x,
 45 |                 y=y,
 46 |                 color=color,
 47 |                 s=con,
 48 |                 alpha=alph,
 49 |             )
 50 | 
 51 |     colors = itertools.cycle(
 52 |         [
 53 |             metadata.color.PINK_COLOUR,
 54 |             metadata.color.LIGHT_GREEN,
 55 |             metadata.color.BLUE,
 56 |             metadata.color.DEEPER_GREEN,
 57 |         ],
 58 |     )
 59 | 
 60 |     plot_params = [
 61 |         # size, alpha_mult, con_max, num_cont
 62 |         (2, 0.3, 8_00, 50),
 63 |         (2, 0.35, 2_00, 5),
 64 |         (5, 0.05, 5_00, 9),
 65 |         (4, 0.15, 5_00, 9),
 66 |         (5, 0.1, 2_000, 50),
 67 |         (3, 0.1, 3_000, 50),
 68 |         (2, 0.1, 6_000, 50),
 69 |         (2, 0.09, 5_000, 50),
 70 |         (5, 0.008, 15_000, 150),
 71 |         (3, 0.08, 2000, 20),
 72 |     ]
 73 |     rng = np.random.default_rng(2)
 74 | 
 75 |     for size, alpha_mult, con_max, num_cont in plot_params:
 76 |         xs = rng.random(size=size)
 77 |         ys = xs + rng.random(size=size)
 78 |         for x, y in zip(xs, ys):
 79 |             color = next(colors)
 80 |             make_point(
 81 |                 x=x,
 82 |                 y=y,
 83 |                 color=color,
 84 |                 alpha_mult=alpha_mult,
 85 |                 con_max=con_max,
 86 |                 num_cont=num_cont,
 87 |             )
 88 | 
 89 |     ax.set_xticks([])
 90 |     ax.set_xticks([], minor=True)
 91 |     ax.set_yticks([])
 92 |     ax.set_yticks([], minor=True)
 93 | 
 94 |     fig.tight_layout()
 95 | 
 96 |     fig.patch.set_facecolor("black")
 97 | 
 98 |     return fig
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
103 |     save_plot_output.save_plot(fig=main(), file=__file__)
104 |     raise SystemExit
105 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/metadata.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Metadata for plotting.
 3 | 
 4 | I probably could / should use an rc params file for some of this stuff instead of
 5 | calling from here.
 6 | """
 7 | 
 8 | from __future__ import annotations
 9 | 
10 | from dataclasses import dataclass
11 | 
12 | 
13 | @dataclass
14 | class Colors:
15 | 
16 |     """
17 |     Colors.
18 | 
19 |     https://mycolor.space/?hex=%23FF69B4&sub=1
20 |     """
21 | 
22 |     PINK_COLOUR = "#ff69b4"
23 |     BACKGROUND_COLOUR = "#f2f2f2"
24 |     GREY = "#919191"
25 |     BLUE = "#007FCB"
26 |     LIGHT_GREEN = "#B4EDD2"
27 |     DEEPER_GREEN = "#51B9BE"
28 |     BROWNY_RED = "#554149"
29 |     PURPLEY = "#8F6E9B"
30 |     TAN = "#DDD7C6"
31 | 
32 | 
33 | color = Colors()
34 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/opinium_barchart/__init__.py:
--------------------------------------------------------------------------------
1 | """Styled bar chart."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/opinium_barchart/opinium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/opinium_barchart/opinium.png


--------------------------------------------------------------------------------
/plotting_examples/y2022/opinium_barchart/opinium_barchart_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/opinium_barchart/opinium_barchart_example.png


--------------------------------------------------------------------------------
/plotting_examples/y2022/opinium_barchart/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Bar chart style copied from Opinium.
  4 | 
  5 | Saw this on twitter (i think) and thought I'd recreate it in mpl.
  6 | """
  7 | 
  8 | from __future__ import annotations
  9 | 
 10 | import pathlib
 11 | 
 12 | import matplotlib as mpl
 13 | import matplotlib.image as mpimg
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | from plotting_examples import dvc_entry, save_plot_output
 17 | from plotting_examples.y2022 import metadata
 18 | 
 19 | 
 20 | def main() -> mpl.figure.Figure:
 21 |     """Main."""
 22 |     with plt.rc_context(
 23 |         {
 24 |             "xtick.major.pad": 10,
 25 |             "font.family": "monospace",
 26 |         },
 27 |     ):
 28 |         data = {
 29 |             "Trump": -63,
 30 |             "Johnson": -11,
 31 |             "O'Neill": 3,
 32 |             "Foster": 9,
 33 |             "Khan": 16,
 34 |             "Starmer": 18,
 35 |             "Sturgeon": 34,
 36 |             "Drakeford": 34,
 37 |         }
 38 | 
 39 |         fig, ax = plt.subplots(figsize=(15, 7))
 40 | 
 41 |         # trying to setup as many variables as possible here - though there are still
 42 |         # some magic values
 43 | 
 44 |         min(list(data.values()))
 45 |         max_val = max(list(data.values()))
 46 | 
 47 |         line_width = 20
 48 |         start_offset = line_width * 0.08
 49 |         percentage_label_shift = 3
 50 |         positive_bar_color = metadata.color.DEEPER_GREEN
 51 |         negative_bar_color = metadata.color.PINK_COLOUR
 52 |         font_size = 12
 53 |         source_fontsize = 8
 54 |         footnote_location = (0, -0.3)
 55 | 
 56 |         # Johnson here is the value which is used as it's not the most negative, but is
 57 |         # negative. Really, this is just what kinda looked ok, with different data
 58 |         # there would likely have to be pretty different approaches to all of this i
 59 |         # think
 60 |         grey_bar_left_x = data["Johnson"]
 61 | 
 62 |         # shading every other bar a bit
 63 |         for bar_i, (name, y_val_) in enumerate(zip(data, range(8))):
 64 |             y_val = y_val_ * 2
 65 |             x_val = data[name]
 66 |             x_loc = 20
 67 |             direction = 1
 68 |             left_adjust = 0
 69 |             if x_val > 0:
 70 |                 left_adjust = 9
 71 |                 direction *= -1
 72 |                 bar_color = positive_bar_color
 73 |                 sign = "+"
 74 |                 sign_align = "left"
 75 |             else:
 76 |                 left_adjust = -15
 77 |                 bar_color = negative_bar_color
 78 |                 sign = ""
 79 |                 sign_align = "right"
 80 | 
 81 |             ax.plot(
 82 |                 [start_offset * -direction, data[name]],
 83 |                 [y_val, y_val],
 84 |                 linewidth=line_width,
 85 |                 c=bar_color,
 86 |             )
 87 |             ax.text(
 88 |                 x=(x_loc * direction) + left_adjust,
 89 |                 y=y_val,
 90 |                 s=name,
 91 |                 horizontalalignment="left",
 92 |                 verticalalignment="center",
 93 |                 fontsize=font_size,
 94 |             )
 95 |             ax.text(
 96 |                 x=data[name] + -direction * percentage_label_shift,
 97 |                 y=y_val,
 98 |                 s=f"{sign}{data[name]}",
 99 |                 verticalalignment="center",
100 |                 horizontalalignment=sign_align,
101 |                 fontsize=font_size,
102 |             )
103 | 
104 |             if bar_i % 2 == 1:
105 |                 ax.plot(
106 |                     [grey_bar_left_x, max_val + 20],
107 |                     [y_val, y_val],
108 |                     linewidth=line_width,
109 |                     c="#a0a0a0",
110 |                     alpha=0.07,
111 |                     zorder=0,
112 |                 )
113 | 
114 |         _ = [ax.spines[s].set_visible(False) for s in ax.spines]
115 |         _ = ax.xaxis.set_ticklabels([])
116 |         _ = ax.yaxis.set_ticklabels([])
117 |         _ = ax.tick_params(axis="both", length=0)
118 | 
119 |         title_y = 1.2
120 |         title_x = 0.45
121 | 
122 |         # Title
123 |         ax.text(
124 |             x=title_x,
125 |             y=title_y,
126 |             s="Level of Trust in information \nprovided on Coronavirus",
127 |             transform=ax.transAxes,
128 |             fontsize=20,
129 |             horizontalalignment="left",
130 |         )
131 | 
132 |         # subtitle
133 |         _ = ax.text(
134 |             x=title_x,
135 |             y=title_y - 0.11,
136 |             s=(
137 |                 "Net Level of Trust in providing of information by party leaders\non"
138 |                 " Coronavirus"
139 |             ),
140 |             transform=ax.transAxes,
141 |             c="#717171",
142 |         )
143 | 
144 |         # add rectangle
145 |         rect = mpl.patches.Rectangle(
146 |             (title_x - 0.015, title_y - 0.11),
147 |             width=0.01,
148 |             height=0.25,
149 |             color=positive_bar_color,
150 |             transform=ax.transAxes,
151 |             clip_on=False,
152 |         )
153 |         ax.add_patch(rect)
154 | 
155 |         # source of data
156 |         _ = ax.text(
157 |             x=footnote_location[0],
158 |             y=footnote_location[1],
159 |             s=(
160 |                 "https://www.opinium.com/wp-content/uploads/2020/06/"
161 |                 "VI-26-06-2020-Observer-Data-Tables.xlsx"
162 |             ),
163 |             transform=ax.transAxes,
164 |             fontsize=source_fontsize,
165 |         )
166 | 
167 |         # add company logo to plot
168 |         image = mpimg.imread(
169 |             pathlib.Path(__file__).parent / "opinium.png",
170 |             format="png",
171 |         )
172 |         img_y = ax.bbox.ymin
173 | 
174 |         ax.text(
175 |             x=ax.bbox.xmax + 400,
176 |             y=img_y + 20,
177 |             s="* Sample size: 2001\n25-26th June\nOpinium.co.uk",
178 |             transform=None,
179 |             verticalalignment="top",
180 |         )
181 | 
182 |         fig.figimage(
183 |             image,
184 |             ax.bbox.xmax + 659,
185 |             0,
186 |             origin="upper",
187 |         )
188 |         ax.axvline(0, linewidth=0.1, alpha=0.9, color="#212121")
189 | 
190 |     fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
191 |     ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
192 | 
193 |     return fig
194 | 
195 | 
196 | if __name__ == "__main__":
197 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
198 |     save_plot_output.save_plot(fig=main(), file=__file__)
199 |     raise SystemExit
200 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/pandas_stacked_bars_with_values/__init__.py:
--------------------------------------------------------------------------------
1 | """Create stacked bar plot with pandas."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Horizontal stacked bars, based off of pandas.
  4 | 
  5 | Could do these from scratch - pandas makes things a bit more straightforward though.
  6 | 
  7 | Example of:
  8 | 
  9 | - fixed formatting - setting categorical ticks at particular positions.
 10 | """
 11 | 
 12 | from __future__ import annotations
 13 | 
 14 | import io
 15 | import pathlib
 16 | 
 17 | import matplotlib as mpl
 18 | import matplotlib.pyplot as plt
 19 | import pandas as pd
 20 | from matplotlib.lines import Line2D
 21 | 
 22 | from plotting_examples import dvc_entry, save_plot_output
 23 | from plotting_examples.y2022 import metadata
 24 | 
 25 | 
 26 | def sample_data() -> tuple[pd.DataFrame, dict[int, dict[str, str]]]:
 27 |     """Generate sample data."""
 28 |     csv = """\
 29 |     Column A,Column B,Column C,Column D
 30 |     22.9,21.4,26.6,27.1
 31 |     40.0,28.9,38.1,40.9
 32 |     20.9,22.0,18.7,15.3
 33 |     10.5,18.9,8.5,8.4
 34 |     5.7,8.8,8.1,8.3
 35 |     """
 36 |     df_plot: pd.DataFrame = pd.read_csv(io.StringIO(csv))
 37 |     index_labels = {
 38 |         0: "Something",
 39 |         1: "Another",
 40 |         2: "This Thing",
 41 |         3: "Thai Food",
 42 |         4: "Finally",
 43 |     }
 44 |     index_colours = {
 45 |         0: metadata.color.TAN,
 46 |         1: metadata.color.DEEPER_GREEN,
 47 |         2: metadata.color.PINK_COLOUR,
 48 |         3: metadata.color.BLUE,
 49 |         4: metadata.color.PURPLEY,
 50 |     }
 51 | 
 52 |     plot_metadata = {}
 53 |     for x in index_labels:
 54 |         plot_metadata[x] = {
 55 |             "colour": index_colours[x],
 56 |             "label": index_labels[x],
 57 |         }
 58 | 
 59 |     # Plot metadata has this form:
 60 |     # >>> {
 61 |     # >>>     0: {"colour": "red", "label": "Something"},
 62 |     # >>>     1: {"colour": "grey", "label": "Another"},
 63 |     # >>>     2: {"colour": "pink", "label": "This Thing"},
 64 |     # >>>     3: {"colour": "blue", "label": "Thai Food"},
 65 |     # >>>     4: {"colour": "green", "label": "Finally"},
 66 |     # >>> }
 67 | 
 68 |     return df_plot, plot_metadata
 69 | 
 70 | 
 71 | def main() -> mpl.figure.Figure:
 72 |     """Main."""
 73 |     df_plot, plot_metadata = sample_data()
 74 | 
 75 |     # Reverse columns as want to plot A as first bar.
 76 |     df_plot = df_plot.loc[:, df_plot.columns[::-1]]
 77 | 
 78 |     # If you want to rename the axis y-labels it's easiest to just rename them in the
 79 |     # dataframe columns.
 80 | 
 81 |     with plt.rc_context(
 82 |         {
 83 |             "xtick.major.pad": 10,
 84 |             "font.family": "monospace",
 85 |         },
 86 |     ):
 87 |         fig, ax = plt.subplots(
 88 |             figsize=(15, 5),
 89 |             ncols=1,
 90 |             nrows=1,
 91 |             sharey=True,
 92 |             constrained_layout=False,
 93 |         )
 94 | 
 95 |         df_plot.T.plot.barh(
 96 |             stacked=True,
 97 |             ax=ax,
 98 |             color=[value["colour"] for value in plot_metadata.values()],
 99 |         )
100 | 
101 |         handles = [
102 |             Line2D(
103 |                 [0],
104 |                 [0],
105 |                 color=value["colour"],
106 |                 label=value["label"],
107 |                 markersize=12,
108 |                 linewidth=7,
109 |             )
110 |             for value in plot_metadata.values()
111 |         ]
112 | 
113 |         ax.legend(
114 |             handles=handles,
115 |             frameon=False,
116 |             ncol=1,
117 |             bbox_to_anchor=(1.01, 0.7),
118 |             fontsize=12,
119 |         )
120 | 
121 |         ax.set_title("This Is A Title", fontsize=20, y=1.05)
122 |         ax.set_xlabel("%", fontsize=15)
123 |         ax.grid(linewidth=0.2)
124 |         ax.set_axisbelow(True)
125 | 
126 |         # Iterate over the data values, and patches of the axis, and plot the data
127 |         # value over the relevant patch.
128 |         data_matrix = df_plot.to_numpy().flatten()
129 | 
130 |         min_bar_size = 3
131 |         for i, patch in enumerate(ax.patches):
132 |             width = patch.get_width()
133 |             height = patch.get_height()
134 |             x, y = patch.get_xy()
135 |             data_i = data_matrix[i] if data_matrix[i] >= min_bar_size else "-"
136 |             ax.annotate(
137 |                 f"{data_i}",
138 |                 (x + width * 0.5, y + height * 0.5),
139 |                 ha="center",
140 |                 va="center",
141 |                 fontsize=12,
142 |             )
143 | 
144 |         _ = [ax.spines[x].set_visible(False) for x in ax.spines]
145 | 
146 |         loc = mpl.ticker.MultipleLocator(base=5.0)
147 |         ax.set_xlim(0, 100)
148 |         ax.xaxis.set_major_locator(loc)
149 | 
150 |     fig.set_tight_layout(True)  # type: ignore[attr-defined]
151 |     fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
152 |     ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
153 |     return fig
154 | 
155 | 
156 | if __name__ == "__main__":
157 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
158 | 
159 |     save_plot_output.save_plot(fig=main(), file=__file__)
160 |     raise SystemExit
161 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/pos_neg_split_hbar/__init__.py:
--------------------------------------------------------------------------------
1 | """Create pos/neg hbar."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/pos_neg_split_hbar/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Create split horizontal bar chart.
  4 | 
  5 | Split by dichotomous variable, with bar classifications.
  6 | 
  7 | Can be a bit messy - not sure I'm much of a fan - but wanted to re-create anyway.
  8 | """
  9 | 
 10 | from __future__ import annotations
 11 | 
 12 | import io
 13 | import pathlib
 14 | 
 15 | import matplotlib as mpl
 16 | import matplotlib.pyplot as plt
 17 | import matplotlib.ticker as plt_ticker
 18 | import pandas as pd
 19 | 
 20 | from plotting_examples import dvc_entry, save_plot_output
 21 | from plotting_examples.y2022 import metadata
 22 | 
 23 | # This the dichotomy - could be anything though, eg good/bad, old/young or whatever.
 24 | LEVEL_0 = "good"
 25 | LEVEL_1 = "bad"
 26 | 
 27 | 
 28 | def sample_data() -> tuple[pd.DataFrame, dict[int, str], dict[str, str]]:
 29 |     """
 30 |     Return sample dataframe.
 31 | 
 32 |     Dogs are taken from here : https://dogtime.com/dog-breeds/profiles
 33 |     """
 34 |     df = pd.read_csv(
 35 |         io.StringIO(
 36 |             (
 37 |                 f"{LEVEL_0},{LEVEL_1},nr,{LEVEL_0}_colour,{LEVEL_1}_colour,meaning\n"
 38 |                 "47.303474,51.18364658,1.51287942,med,med,Akita\n"
 39 |                 "34.10226721,44.76493548,21.13279731,med,med,Basset Hound\n"
 40 |                 "12.08045446,69.67354868,18.24599686,low,med,Cavapoo\n"
 41 |                 "60.91476132,22.71988935,16.36534934,med,low,Doberdor\n"
 42 |                 "19.43282773,56.88924657,23.67792571,low,med,Greyhound\n"
 43 |                 "54.05072885,29.96153606,15.98773508,med,low,Irish Terrier\n"
 44 |                 "53.096035,35.37625972,11.52770528,med,med,Poodle\n"
 45 |                 "78.23942162,17.26331569,4.497262699,high,low,Sloughi\n"
 46 |                 "51.68818968,38.14985888,10.16195143,med,med,Whippet\n"
 47 |                 "38.14462181,39.1176673,22.73771089,med,med,Xoloitzcuintli\n"
 48 |             ),
 49 |         ),
 50 |     )
 51 |     index_to_meaning_map: dict[int, str] = df["meaning"].to_dict()
 52 |     # high/med/low represent some pretend classifications for this example.
 53 |     colour_map = {
 54 |         "high": metadata.color.PINK_COLOUR,
 55 |         "med": metadata.color.TAN,
 56 |         "low": metadata.color.LIGHT_GREEN,
 57 |     }
 58 |     return df, index_to_meaning_map, colour_map
 59 | 
 60 | 
 61 | def plot_bar_percentages(df: pd.DataFrame, ax: plt.Axes) -> plt.Axes:  # type: ignore[name-defined]
 62 |     """Plot percentages next to bars."""
 63 |     # Plot the percentages.
 64 |     for i, patch in enumerate(ax.patches):
 65 |         width = patch.get_width()
 66 |         height = patch.get_height()
 67 |         x, y = patch.get_xy()
 68 |         # Shifting is different depending on whether it's a +ve of -ve
 69 |         val = round(patch.get_width() * 0.01, 2)
 70 | 
 71 |         nudge = 8
 72 |         if i <= df.index.max():
 73 |             # Printing to the left
 74 |             ann = f"{-val:.0%}"
 75 |             ax.annotate(
 76 |                 ann,
 77 |                 ((x + width) - nudge, y + height * 0.5),
 78 |                 ha="center",
 79 |                 va="center",
 80 |             )
 81 |         else:
 82 |             # Printing to the right
 83 |             ann = f"{val:.0%}"
 84 |             ax.annotate(
 85 |                 ann,
 86 |                 ((x + width) + nudge, y + height * 0.5),
 87 |                 ha="center",
 88 |                 va="center",
 89 |             )
 90 |     return ax
 91 | 
 92 | 
 93 | def main() -> mpl.figure.Figure:
 94 |     """Main."""
 95 |     df, index_to_meaning_map, colour_map = sample_data()
 96 | 
 97 |     with plt.rc_context(
 98 |         {
 99 |             "xtick.major.pad": 10,
100 |             "font.family": "monospace",
101 |         },
102 |     ):
103 |         # Create plot.
104 |         fig, ax = plt.subplots(figsize=(15, 6))
105 | 
106 |         ax.set_axisbelow(True)
107 | 
108 |         ax.barh(
109 |             df.index,
110 |             width=-df[LEVEL_0],
111 |             height=0.8,
112 |             color=df[f"{LEVEL_0}_colour"].map(colour_map),
113 |             edgecolor="black",
114 |         )
115 |         ax.barh(
116 |             df.index,
117 |             width=df[LEVEL_1],
118 |             height=0.8,
119 |             color=df[f"{LEVEL_1}_colour"].map(colour_map),
120 |             edgecolor="black",
121 |         )
122 | 
123 |         ax = plot_bar_percentages(df=df, ax=ax)
124 | 
125 |         # remove spines for top/right
126 |         ax.spines["top"].set_visible(False)
127 |         ax.spines["right"].set_visible(False)
128 | 
129 |         # Set axis limits
130 |         ax.set_ylim(bottom=-1, top=df.index.max() + 1)
131 |         ax.set_xlim(left=-109, right=109)
132 | 
133 |         # Reformat tick frequency for x,y axis
134 |         # x
135 |         loc = plt_ticker.MultipleLocator(base=10)
136 |         ax.xaxis.set_major_locator(loc)
137 |         # y
138 |         loc = plt_ticker.MultipleLocator(base=1)
139 |         ax.yaxis.set_major_locator(loc)
140 | 
141 |         # Functions for reformatting plot tick values
142 |         def x_fmt(x: float, _y: int) -> str:
143 |             fmt = f"{int(x)} %"
144 |             return fmt.replace("-", "")
145 | 
146 |         def y_fmt(_x: float, y: int) -> str:
147 |             diff = -2
148 |             return index_to_meaning_map.get(y + diff, "")
149 | 
150 |         ax.xaxis.set_major_formatter(plt_ticker.FuncFormatter(x_fmt))
151 |         ax.yaxis.set_major_formatter(plt_ticker.FuncFormatter(y_fmt))
152 | 
153 |         # Plot text for Agree / Disagree
154 |         agree_disagree_txt_height = 1.1
155 |         ax.text(
156 |             0.48,
157 |             agree_disagree_txt_height,
158 |             s=LEVEL_0,
159 |             transform=ax.transAxes,
160 |             ha="right",
161 |             fontsize=20,
162 |         )
163 |         ax.text(
164 |             0.52,
165 |             agree_disagree_txt_height,
166 |             s=LEVEL_1,
167 |             transform=ax.transAxes,
168 |             ha="left",
169 |             fontsize=20,
170 |         )
171 | 
172 |         for tick in ax.get_xticklabels():
173 |             tick.set_rotation(45)
174 | 
175 |         ax.grid(linewidth=0.2, which="major", axis="y")
176 | 
177 |         fig.set_tight_layout(True)  # type: ignore[attr-defined]
178 |         fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
179 |         ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
180 |     return fig
181 | 
182 | 
183 | if __name__ == "__main__":
184 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
185 |     save_plot_output.save_plot(fig=main(), file=__file__)
186 |     raise SystemExit
187 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_distributions/__init__.py:
--------------------------------------------------------------------------------
1 | """Create scatter distributions plot."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_distributions/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Distributions of multiple variables.
  4 | 
  5 | For a set of variables, each with an accompanying continuous variable on the same scale,
  6 | plot the distributions of the continuous variable. Might be useful to have a kde
  7 | overlaid here.
  8 | 
  9 | Example of:
 10 | 
 11 | - fixed formatting
 12 | - setting categorical ticks at particular positions.
 13 | 
 14 | """
 15 | 
 16 | from __future__ import annotations
 17 | 
 18 | import itertools
 19 | import pathlib
 20 | import re
 21 | import textwrap
 22 | from typing import TYPE_CHECKING, Any, cast
 23 | 
 24 | import matplotlib as mpl
 25 | import matplotlib.pyplot as plt
 26 | import numpy as np
 27 | import numpy.typing as npt
 28 | import pandas as pd
 29 | 
 30 | from plotting_examples import dvc_entry, save_plot_output
 31 | from plotting_examples.y2022 import metadata
 32 | 
 33 | np_rnd = np.random.Generator(np.random.MT19937(2))
 34 | 
 35 | 
 36 | if TYPE_CHECKING:
 37 |     from collections.abc import Mapping
 38 | 
 39 | 
 40 | def sample_data(n_categories: int = 12) -> tuple[pd.DataFrame, dict[int, str]]:
 41 |     """Generate sample data."""
 42 |     # random stuff from postgres website.
 43 |     document = (
 44 |         "\n"
 45 |         "PostgreSQL is an object-relational database management system (ORDBMS) based "
 46 |         "on POSTGRES, Version 4.2, developed at the University of California at "
 47 |         "Berkeley Computer Science Department. POSTGRES pioneered many concepts that "
 48 |         "only became available in some commercial database systems much later.\n"
 49 |         "\n"
 50 |         "PostgreSQL is an open-source descendant of this original Berkeley code. It "
 51 |         "supports a large part of the SQL standard and offers many modern features:\n"
 52 |         "\n"
 53 |         "complex queries\n"
 54 |         "foreign keys\n"
 55 |         "triggers\n"
 56 |         "updatable views\n"
 57 |         "transactional integrity\n"
 58 |         "multiversion concurrency control\n"
 59 |         "Also, PostgreSQL can be extended by the user in many ways, for example by "
 60 |         "adding new\n"
 61 |     )
 62 |     words = [x for x in re.sub(r"\n|\(|\)", " ", document, flags=re.M).split(" ") if x]
 63 | 
 64 |     def rand_string() -> str:
 65 |         """Random string to represent labelling."""
 66 |         return " ".join(
 67 |             np_rnd.choice(words, size=np_rnd.integers(3, 15, size=1)),
 68 |         ).capitalize()
 69 | 
 70 |     def rand_cont() -> npt.NDArray[np.float64]:
 71 |         # Generates a random bimodal distribution so that it looks roughly similar to
 72 |         # what we might see from timing data or whatever.
 73 |         loc_min = 2
 74 |         loc_max = 7
 75 |         mode_1_loc = np_rnd.integers(loc_min, loc_max, size=1)[0]
 76 |         size = np_rnd.integers(10, 250, size=1)[0]
 77 |         mode_1 = np_rnd.normal(
 78 |             loc=mode_1_loc,
 79 |             scale=2,
 80 |             size=size,
 81 |         )
 82 |         # product
 83 |         direction = 1
 84 |         if mode_1_loc > loc_max / (loc_max + loc_min):
 85 |             direction = -1
 86 | 
 87 |         def _np_array_to_int(arr: npt.ArrayLike | int) -> int:
 88 |             """
 89 |             Convert single element ndarray to int.
 90 | 
 91 |             Mainly doing these mode_i checks as I'm updating some code
 92 |             following packages moving on.
 93 |             """
 94 |             if isinstance(arr, np.ndarray):
 95 |                 assert len(arr) == 1
 96 |                 arr = arr[0]
 97 |             else:
 98 |                 assert isinstance(arr, int | np.int64)
 99 |             return cast(int, arr)
100 | 
101 |         mode_1_loc = _np_array_to_int(arr=mode_1_loc)
102 |         mode_2_loc = int(mode_1_loc + direction * mode_1_loc * 0.5)
103 |         mode_2_loc = _np_array_to_int(arr=mode_2_loc)
104 |         size = _np_array_to_int(arr=size)
105 |         mode_2_size = int(size * 0.4)
106 |         mode_2 = np_rnd.normal(loc=mode_2_loc, scale=2, size=mode_2_size)
107 | 
108 |         return cast(
109 |             npt.NDArray[np.float64],
110 |             np.clip(np.concatenate([mode_1, mode_2]), a_min=0, a_max=np.inf),
111 |         )
112 | 
113 |     data_dict: dict[str, list[float]] = {"cat": [], "cont": []}
114 | 
115 |     for category in range(1, n_categories + 1):
116 |         conts = rand_cont()
117 |         data_dict["cont"] = data_dict["cont"] + list(conts)
118 |         data_dict["cat"].extend(list(np.repeat(category, len(conts))))
119 | 
120 |     data = pd.DataFrame(data_dict)
121 |     labels = {x: rand_string() for x in data["cat"].unique()}
122 | 
123 |     return data, labels
124 | 
125 | 
126 | def categorical_scatters(
127 |     *,
128 |     ax: plt.Axes,  # type: ignore[name-defined]
129 |     data: pd.DataFrame,
130 |     cont_var: str,
131 |     cat_var: str,
132 |     labels: Mapping[Any, str],
133 |     # Used if there are particular colours for particular categories, if they're all
134 |     # meant to be the same color then just pass in with the same value for each category
135 |     # - they should all still be represented though.
136 |     color_map: Mapping[Any, str] | None = None,
137 | ) -> plt.Axes:  # type: ignore[name-defined]
138 |     """Create plot."""
139 |     # Can use this to get alternating colours, i did then went off it.
140 |     colors = itertools.cycle(
141 |         [metadata.color.PINK_COLOUR, metadata.color.PINK_COLOUR],
142 |     )
143 | 
144 |     y_ticks = []
145 | 
146 |     for y_val, (g_, dfg) in enumerate(data.groupby([cat_var]), 1):
147 |         g = g_[0]
148 |         color = next(colors)
149 |         color = color_map[g] if color_map else color
150 | 
151 |         y_values = np.repeat([y_val], len(dfg)) + np_rnd.normal(
152 |             loc=0,
153 |             scale=0.05,
154 |             size=len(dfg),
155 |         )
156 |         x_values = dfg.loc[dfg[cont_var].ne(88888), cont_var]
157 |         ax.scatter(
158 |             x=x_values,
159 |             y=y_values,
160 |             color=color,
161 |             alpha=0.3,
162 |         )
163 | 
164 |         y_ticks.append((g, labels[g]))
165 | 
166 |     ax.grid(alpha=0.1)
167 | 
168 |     ax.yaxis.set_major_locator(
169 |         mpl.ticker.FixedLocator([y_tick[0] for y_tick in y_ticks]),
170 |     )
171 |     ax.yaxis.set_major_formatter(
172 |         mpl.ticker.FixedFormatter(
173 |             ["\n".join(textwrap.wrap(y_tick[1], width=30)) for y_tick in y_ticks],
174 |         ),
175 |     )
176 | 
177 |     return ax
178 | 
179 | 
180 | def main() -> mpl.figure.Figure:
181 |     """Main."""
182 |     data, labels = sample_data()
183 | 
184 |     cat_var = "cat"
185 |     cont_var = "cont"
186 | 
187 |     # color
188 |     color_map = {x: metadata.color.PINK_COLOUR for x in labels}
189 |     # Maybe we want to highlight a particular value or whatever idk.
190 |     color_map[3] = metadata.color.DEEPER_GREEN
191 | 
192 |     with plt.rc_context(
193 |         {
194 |             "xtick.major.pad": 10,
195 |             "font.family": "monospace",
196 |         },
197 |     ):
198 |         fig, ax = plt.subplots(
199 |             figsize=(20, 20),
200 |             ncols=1,
201 |             nrows=1,
202 |             sharey=True,
203 |             constrained_layout=False,
204 |         )
205 |         ax = categorical_scatters(
206 |             data=data,
207 |             cont_var=cont_var,
208 |             cat_var=cat_var,
209 |             labels=labels,
210 |             ax=ax,
211 |             color_map=color_map,
212 |         )
213 |         ax.set_title(
214 |             "Scatter plot with categorical labels",
215 |             fontsize=20,
216 |         )
217 | 
218 |     # axis styling
219 |     ax.spines["top"].set_visible(False)
220 |     ax.spines["right"].set_visible(False)
221 |     ax.spines["left"].set_visible(False)
222 |     ax.spines["bottom"].set_visible(False)
223 | 
224 |     fig.set_tight_layout(True)  # type: ignore[attr-defined]
225 |     fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
226 |     ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
227 |     return fig
228 | 
229 | 
230 | if __name__ == "__main__":
231 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
232 |     save_plot_output.save_plot(fig=main(), file=__file__)
233 |     raise SystemExit
234 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_matrix_w_kde_on_diag/__init__.py:
--------------------------------------------------------------------------------
1 | """Scatter matrix with sns kde on diagonal."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Scatter matrix with kde instead of histogram on the diagonal.
  4 | 
  5 | Could probably adapt pd.scatter_matrix instead of doing it from scratch. Though with
  6 | this approach the non-diagonal plots could be whatever instead of a scatter plot I
  7 | guess...
  8 | 
  9 | Would be good to make the upper diagonals differ from the lower diagonals a bit... maybe
 10 | some sort of table from pd.cut on the others or whatever.
 11 | 
 12 | I'd probably just use subplot_mosaic as well now - that's grown on me a lot since this.
 13 | """
 14 | 
 15 | from __future__ import annotations
 16 | 
 17 | import itertools
 18 | import pathlib
 19 | 
 20 | import matplotlib as mpl
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import seaborn as sns
 24 | 
 25 | from plotting_examples import dvc_entry, save_plot_output
 26 | from plotting_examples.y2022 import metadata
 27 | 
 28 | np_rnd = np.random.Generator(np.random.MT19937(1977))
 29 | 
 30 | 
 31 | def main() -> mpl.figure.Figure:
 32 |     """Main."""
 33 |     numvars, numdata = 4, 50
 34 | 
 35 |     data = 10 * np_rnd.chisquare(df=4, size=(numvars, numdata))
 36 | 
 37 |     names = ["mpg", "disp", "drat", "wt"]
 38 | 
 39 |     numvars, numdata = data.shape
 40 | 
 41 |     with plt.rc_context(
 42 |         {
 43 |             "xtick.major.pad": 10,
 44 |             "font.family": "monospace",
 45 |         },
 46 |     ):
 47 |         fig, axes = plt.subplots(
 48 |             nrows=numvars,
 49 |             ncols=numvars,
 50 |             figsize=(15, 15),
 51 |             constrained_layout=True,
 52 |         )
 53 | 
 54 |         for ax in axes.flat:
 55 |             # Hide all ticks and labels
 56 |             ax.xaxis.set_visible(False)
 57 |             ax.yaxis.set_visible(False)
 58 | 
 59 |         # Plot the data.
 60 |         for i, j in zip(*np.triu_indices_from(axes, k=1)):
 61 |             for x, y in [(i, j), (j, i)]:
 62 |                 axes[x, y].scatter(
 63 |                     data[x],
 64 |                     data[y],
 65 |                     color=metadata.color.PINK_COLOUR,
 66 |                 )
 67 |                 axes[x, y].set_facecolor(metadata.color.BACKGROUND_COLOUR)
 68 |                 axes[x, y].grid(linestyle=":", alpha=0.2)
 69 | 
 70 |         # Label the diagonal subplots...
 71 |         for i, label in enumerate(names):
 72 |             axes[i, i].annotate(
 73 |                 label,
 74 |                 (0.5, 0.5),
 75 |                 xycoords="axes fraction",
 76 |                 ha="center",
 77 |                 va="center",
 78 |                 fontsize=15,
 79 |                 fontweight="bold",
 80 |             )
 81 | 
 82 |         rotate = 45
 83 | 
 84 |         for i, j in itertools.product(range(numvars), range(numvars)):
 85 |             if i != j:
 86 |                 axes[i, j].xaxis.set_visible(True)
 87 |                 for tick in axes[i, j].get_xticklabels():
 88 |                     tick.set_rotation(rotate)
 89 | 
 90 |         # plot the densities on the diagonal
 91 |         for i, j in zip(range(numvars), range(numvars)):
 92 |             ax = axes[i, j]
 93 |             sns.kdeplot(
 94 |                 x=data[i],
 95 |                 ax=ax,
 96 |                 alpha=0.1,
 97 |                 fill=True,
 98 |                 color=metadata.color.PINK_COLOUR,
 99 |             )
100 |             ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
101 | 
102 |         for i, j in zip(range(1, numvars), itertools.cycle([0])):
103 |             axes[i, j].yaxis.set_visible(True)
104 | 
105 |         _ = fig.suptitle("Example Scatterplots", fontsize=20)
106 |         fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
107 | 
108 |     return fig
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
113 |     save_plot_output.save_plot(fig=main(), file=__file__)
114 |     raise SystemExit
115 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_w_outlined_text_insert/__init__.py:
--------------------------------------------------------------------------------
1 | """Scatter plot with outlined text."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_w_outlined_text_insert/data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/scatter_w_outlined_text_insert/data.parquet


--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Scatter plot with text inserted to scatter points.
  4 | 
  5 | Data was taken from a tidy tuesday.
  6 | 
  7 | Example of:
  8 | 
  9 | - Outlining text elements in a plot.
 10 | """
 11 | 
 12 | from __future__ import annotations
 13 | 
 14 | import pathlib
 15 | from typing import TypeVar
 16 | 
 17 | import matplotlib as mpl
 18 | import matplotlib.patheffects as pe
 19 | import matplotlib.pyplot as plt
 20 | import matplotlib.ticker as mtick
 21 | import pandas as pd
 22 | from matplotlib.dates import DateFormatter, YearLocator
 23 | 
 24 | from plotting_examples import dvc_entry, save_plot_output
 25 | from plotting_examples.y2022 import metadata
 26 | 
 27 | T = TypeVar("T")
 28 | 
 29 | 
 30 | def get_plotting_data() -> pd.DataFrame:
 31 |     """Plotting dataframe."""
 32 |     df = pd.read_parquet(pathlib.Path(__file__).parent / "data.parquet")
 33 | 
 34 |     data_list = []
 35 |     for g_, dfg in df.groupby(["year"]):
 36 |         g = g_[0]
 37 |         x = dfg["distributor"]
 38 |         df_a = x.value_counts().reset_index().assign(year=g)
 39 | 
 40 |         df_b = (
 41 |             x.value_counts(normalize=True)
 42 |             .reset_index()
 43 |             .rename(columns={"proportion": "percentage"})
 44 |             .assign(percentage=lambda x: x["percentage"].mul(100).round(1), year=g)
 45 |         )
 46 | 
 47 |         df_c = pd.merge(df_a, df_b, on=["distributor", "year"])
 48 |         df_c = df_c.sort_values("count", ascending=False)
 49 |         top = ["#ff2309"]
 50 |         other_colour = "#d0d0d0"
 51 |         n_size = 1
 52 |         if len(df_c) > n_size:
 53 |             df_c["colour"] = top + [other_colour for _ in range(len(df_c) - n_size)]
 54 |         else:
 55 |             df_c["colour"] = top
 56 | 
 57 |         if df_c["colour"].isna().any():
 58 |             raise ValueError
 59 | 
 60 |         data_list.append(df_c)
 61 | 
 62 |     plotting_data = pd.concat(data_list)
 63 |     plotting_data["year"] = pd.to_datetime(plotting_data["year"], format="%Y")
 64 | 
 65 |     return plotting_data
 66 | 
 67 | 
 68 | def main() -> mpl.figure.Figure:
 69 |     """Main."""
 70 |     plotting_data = get_plotting_data()
 71 | 
 72 |     year_counts = (
 73 |         plotting_data.groupby("year").size().rename("year_counts").reset_index()
 74 |     )
 75 | 
 76 |     # want to know how many there were each year.
 77 |     plotting_data = pd.merge(plotting_data, year_counts, on="year")
 78 | 
 79 |     with plt.rc_context(
 80 |         {
 81 |             "xtick.major.pad": 10,
 82 |             "font.family": "monospace",
 83 |         },
 84 |     ):
 85 |         fig, ax = plt.subplots(figsize=(40, 15))
 86 | 
 87 |         other_colour = "#d0d0d0"
 88 | 
 89 |         for _, dfg in plotting_data.groupby("distributor"):
 90 |             # plot text of distributor.
 91 |             for _, row in dfg.iterrows():
 92 |                 if row["colour"] == other_colour:
 93 |                     ax.scatter(
 94 |                         x=row["year"],
 95 |                         y=row["percentage"],
 96 |                         alpha=0.2,
 97 |                         s=300,
 98 |                         color=metadata.color.PINK_COLOUR,
 99 |                         zorder=1,
100 |                     )
101 |                 else:
102 |                     ax.scatter(
103 |                         x=row["year"],
104 |                         y=row["percentage"],
105 |                         alpha=1,
106 |                         s=800,
107 |                         color=metadata.color.PINK_COLOUR,
108 |                         zorder=2,
109 |                     )
110 |                     ax.text(
111 |                         x=row["year"],
112 |                         y=row["percentage"],
113 |                         s=row["distributor"],
114 |                         horizontalalignment="center",
115 |                         verticalalignment="center",
116 |                         color="black",
117 |                         size=14,
118 |                         path_effects=[
119 |                             pe.withStroke(
120 |                                 linewidth=4,
121 |                                 foreground=metadata.color.PINK_COLOUR,
122 |                             ),
123 |                         ],
124 |                     )
125 | 
126 |         ax.yaxis.set_major_formatter(mtick.PercentFormatter())
127 |         ax.set_title("Top film distributor, 1957 - 2021", fontsize=35, y=1.05)
128 | 
129 |         for tick in ax.xaxis.get_major_ticks():
130 |             tick.label1.set_fontsize(15)
131 | 
132 |         for tick in ax.yaxis.get_major_ticks():
133 |             tick.label1.set_fontsize(15)
134 | 
135 |         ax.tick_params(axis="both", which="both", length=0)
136 | 
137 |         ax.spines["top"].set_visible(False)
138 |         ax.spines["right"].set_visible(False)
139 |         ax.spines["left"].set_visible(False)
140 |         ax.spines["bottom"].set_visible(False)
141 | 
142 |         ax.grid(alpha=0.15, axis="y", zorder=0)
143 | 
144 |         years = YearLocator(5)  # type: ignore[no-untyped-call]
145 |         years_fmt = DateFormatter("%Y")  # type: ignore[no-untyped-call]
146 |         ax.xaxis.set_major_locator(years)
147 |         ax.xaxis.set_major_formatter(years_fmt)
148 | 
149 |         fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
150 |         ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
151 | 
152 |     return fig
153 | 
154 | 
155 | if __name__ == "__main__":
156 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
157 |     save_plot_output.save_plot(fig=main(), file=__file__)
158 |     raise SystemExit
159 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/sns_violin_plot_custom/__init__.py:
--------------------------------------------------------------------------------
1 | """Customise sns violin plot."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/sns_violin_plot_custom/data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/sns_violin_plot_custom/data.parquet


--------------------------------------------------------------------------------
/plotting_examples/y2022/sns_violin_plot_custom/plot.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=duplicate-code
 2 | """
 3 | Edit SNS violin plot.
 4 | 
 5 | Simple example of adjusting the output of a sns plot - I don't typically use sns, but
 6 | ofc the objects can be accessed/iterated/edited over as with any other mpl axis.
 7 | 
 8 | What's here doesn't look good - just an example of changing defaults.
 9 | """
10 | 
11 | from __future__ import annotations
12 | 
13 | import pathlib
14 | 
15 | import matplotlib as mpl
16 | import matplotlib.pyplot as plt
17 | import pandas as pd
18 | import seaborn as sns
19 | 
20 | from plotting_examples import dvc_entry, save_plot_output
21 | from plotting_examples.y2022 import metadata
22 | 
23 | 
24 | def main() -> mpl.figure.Figure:
25 |     """Main."""
26 |     with plt.rc_context(
27 |         {
28 |             "xtick.major.pad": 10,
29 |             "font.family": "monospace",
30 |         },
31 |     ):
32 |         fig, axis = plt.subplots(
33 |             figsize=(10, 5),
34 |             constrained_layout=False,
35 |         )
36 |         df = pd.read_parquet(pathlib.Path(__file__).parent / "data.parquet")
37 | 
38 |         vio = sns.violinplot(
39 |             data=df,
40 |             x="species",
41 |             y="flipper_length_mm",
42 |             density_norm="count",
43 |             inner="box",
44 |             linewidth=4,
45 |             ax=axis,
46 |             color=metadata.color.PINK_COLOUR,
47 |         )
48 | 
49 |         vio.grid(alpha=0.2)
50 |         # What size to increase/decreate the central boxplot section to.
51 |         new_width = 30
52 | 
53 |         # adjust the size of the boxplot, which of these list elements to edit
54 |         # is just guess and check.
55 |         for vio_line in vio.lines[1::2]:
56 |             vio_line.set_linewidth(new_width)
57 | 
58 |         # Adjust the median point markers within the boxplot.
59 |         for child in vio.get_children()[1:6:2]:
60 |             child.set_linewidth(5)
61 | 
62 |         fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
63 |         vio.set_facecolor(metadata.color.BACKGROUND_COLOUR)
64 |     return fig
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
69 |     save_plot_output.save_plot(fig=main(), file=__file__)
70 |     raise SystemExit
71 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/split_x_axis_custom_legend/__init__.py:
--------------------------------------------------------------------------------
1 | """Create plot with custom legend."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/split_x_axis_custom_legend/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Example of creating multiple x-axis in order to plot year / months.
  4 | 
  5 | The fig size needs to be pretty large in order to squeeze all the month names etc in
  6 | here. Generated data looks a mess on these plots.
  7 | 
  8 | Example of:
  9 | 
 10 | - Custom legend
 11 | - generating random date data
 12 | - multiple x-axis to display years / months
 13 | """
 14 | 
 15 | from __future__ import annotations
 16 | 
 17 | import pathlib
 18 | 
 19 | import matplotlib as mpl
 20 | import matplotlib.dates as mdates
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | import pandas as pd
 24 | from matplotlib.lines import Line2D
 25 | 
 26 | from plotting_examples import dvc_entry, save_plot_output
 27 | from plotting_examples.y2022 import metadata
 28 | 
 29 | np_rnd = np.random.Generator(np.random.MT19937(seed=0))
 30 | 
 31 | 
 32 | def random_dates(
 33 |     start: pd._libs.tslibs.timestamps.Timestamp,
 34 |     end: pd._libs.tslibs.timestamps.Timestamp,
 35 |     n_days: int,
 36 |     unit: str = "D",
 37 | ) -> pd.Series:
 38 |     """
 39 |     Generate random dates.
 40 | 
 41 |     >>> start = pd.to_datetime('2015-01-01')
 42 |     >>> end = pd.to_datetime('2018-01-01')
 43 | 
 44 |     Found on a SO post, can't remember where now though.
 45 |     """
 46 |     ndays = (end - start).days + 1
 47 |     return pd.to_timedelta(np_rnd.random(n_days) * ndays, unit=unit) + start
 48 | 
 49 | 
 50 | def main() -> mpl.figure.Figure:
 51 |     """Main."""
 52 |     n = 10_000
 53 |     # generate sample data
 54 |     df = pd.DataFrame(
 55 |         {
 56 |             "location": np_rnd.choice(
 57 |                 ["UK", "US", "FR", "JP", "DE"],
 58 |                 size=n,
 59 |             ),
 60 |             "song": np_rnd.choice(
 61 |                 [
 62 |                     "one two three",
 63 |                     "four five six",
 64 |                     "seven eight nine",
 65 |                     "ten eleven twelve",
 66 |                     "thirteen",
 67 |                     "fourteen",
 68 |                     "fifteen sixteen",
 69 |                 ],
 70 |                 size=n,
 71 |             ),
 72 |             "streams": np_rnd.integers(1_000, 10_000, size=n),
 73 |             "date": random_dates(
 74 |                 start=pd.to_datetime("2020-01-01"),
 75 |                 end=pd.to_datetime("2022-03-01"),
 76 |                 n_days=n,
 77 |             ),
 78 |         },
 79 |     )
 80 |     # aggregate for plotting
 81 |     df = (
 82 |         df.groupby(["location", "song", pd.Grouper(key="date", freq="ME")])["streams"]
 83 |         .sum()
 84 |         .reset_index()
 85 |         # Aggregated to months so don't need date names here.
 86 |         .assign(
 87 |             date_name=df.date.dt.month_name() + " " + df.date.dt.year.astype(str),
 88 |             # Color mapping for song names to use in plotting
 89 |             color=lambda df: df["song"].map(
 90 |                 {
 91 |                     "fifteen sixteen": metadata.color.TAN,
 92 |                     "four five six": metadata.color.PURPLEY,
 93 |                     "fourteen": "black",
 94 |                     "one two three": metadata.color.PINK_COLOUR,
 95 |                     "seven eight nine": metadata.color.DEEPER_GREEN,
 96 |                     "ten eleven twelve": metadata.color.BLUE,
 97 |                     "thirteen": metadata.color.BROWNY_RED,
 98 |                 },
 99 |             ),
100 |         )
101 |     )
102 | 
103 |     def format_axis(ax: plt.Axes) -> None:  # type: ignore[name-defined]
104 |         """Format axis."""
105 |         ax.grid(alpha=0.2)
106 | 
107 |     def stream_plot(df: pd.DataFrame, country: str, ax: plt.Axes) -> None:  # type: ignore[name-defined]
108 |         for _, song_data in df.groupby("song"):
109 |             ax.plot(
110 |                 song_data["date"],
111 |                 song_data["streams"],
112 |                 color=song_data["color"].to_list().pop(),
113 |                 alpha=0.7,
114 |                 linewidth=3,
115 |             )
116 |             format_axis(ax=ax)
117 |             ax.set_title(
118 |                 country,
119 |                 fontsize=20,
120 |             )
121 | 
122 |             for label in ax.get_xticklabels():
123 |                 label.set_rotation(45)
124 |                 label.set_ha("right")
125 | 
126 |             ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
127 | 
128 |         # want to format 1000 -> 1,000
129 |         ax.get_yaxis().set_major_formatter(
130 |             mpl.ticker.FuncFormatter(lambda x, _: format(int(x), ",")),
131 |         )
132 |         # reduce some noise
133 |         ax.spines["top"].set_visible(False)
134 |         ax.spines["right"].set_visible(False)
135 | 
136 |         fmt_month = mdates.MonthLocator(interval=1)  # type: ignore[no-untyped-call]
137 |         fmt_year = mdates.YearLocator()  # type: ignore[no-untyped-call]
138 |         ax.xaxis.set_minor_locator(fmt_month)
139 |         ax.xaxis.set_minor_formatter(mdates.DateFormatter("%b"))  # type: ignore[no-untyped-call]
140 |         ax.xaxis.set_ticks([])
141 | 
142 |         ax.tick_params(axis="x", which="minor", labelsize=8)
143 | 
144 |         sec_xaxis = ax.secondary_xaxis(-0.1)
145 |         sec_xaxis.xaxis.set_major_locator(fmt_year)
146 |         sec_xaxis.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))  # type: ignore[no-untyped-call]
147 |         sec_xaxis.spines["bottom"].set_visible(False)
148 |         sec_xaxis.tick_params(length=0, labelsize=12)
149 | 
150 |     color_dict = df.drop_duplicates("song").set_index("song")["color"].to_dict()
151 | 
152 |     fig, axis = plt.subplots(
153 |         ncols=3,
154 |         nrows=2,
155 |         figsize=(35, 20),
156 |     )
157 |     plt.subplots_adjust(
158 |         left=None,
159 |         bottom=None,
160 |         right=None,
161 |         top=None,
162 |         wspace=None,
163 |         hspace=0.5,
164 |     )
165 | 
166 |     axis = axis.flatten()
167 |     iter(axis.flatten())
168 | 
169 |     plt.suptitle(
170 |         "Streaming across different countries for different songs",
171 |         fontsize=25,
172 |     )
173 | 
174 |     stream_plot(
175 |         df=df.loc[df["location"].eq("DE")],
176 |         country="DE",
177 |         ax=axis[0],
178 |     )
179 | 
180 |     stream_plot(
181 |         df=df.loc[df["location"].eq("FR")],
182 |         country="FR",
183 |         ax=axis[1],
184 |     )
185 | 
186 |     stream_plot(
187 |         df=df.loc[df["location"].eq("JP")],
188 |         country="JP",
189 |         ax=axis[2],
190 |     )
191 | 
192 |     stream_plot(
193 |         df=df.loc[df["location"].eq("UK")],
194 |         country="UK",
195 |         ax=axis[3],
196 |     )
197 | 
198 |     stream_plot(
199 |         df=df.loc[df["location"].eq("US")],
200 |         country="US",
201 |         ax=axis[5],
202 |     )
203 | 
204 |     # Plot legend
205 | 
206 |     ax = axis[4]
207 |     custom_lines = [Line2D([0], [0], color=x, lw=6) for x in color_dict.values()]
208 |     ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
209 | 
210 |     ax.legend(
211 |         custom_lines,
212 |         list(color_dict.keys()),
213 |         loc="center",
214 |         fontsize=16,
215 |         frameon=False,
216 |         borderpad=2,
217 |     )
218 | 
219 |     for spine in ax.spines:
220 |         ax.spines[spine].set_visible(False)
221 | 
222 |     ax.get_xaxis().set_ticks([])
223 |     ax.get_yaxis().set_ticks([])
224 | 
225 |     fig.supylabel(
226 |         "Something about the y-axis",
227 |         x=0.09,
228 |         fontsize=20,
229 |     )
230 | 
231 |     fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
232 |     return fig
233 | 
234 | 
235 | if __name__ == "__main__":
236 |     with plt.rc_context(
237 |         {
238 |             "xtick.major.pad": 10,
239 |             "font.family": "monospace",
240 |         },
241 |     ):
242 |         dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
243 |         save_plot_output.save_plot(fig=main(), file=__file__)
244 |     raise SystemExit
245 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/stacked_bar_with_single_bars_layout/__init__.py:
--------------------------------------------------------------------------------
1 | """Stacked bar chart."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/stacked_bar_with_single_bars_layout/data/lab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/stacked_bar_with_single_bars_layout/data/lab.png


--------------------------------------------------------------------------------
/plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Layout containing two bar plots and a bivariate plot between them.
  4 | 
  5 | In this case it's a silly example of some data containing the social grade of
  6 | Labradors, as well as the education group. The main plot is a stacked bar containing
  7 | the breakdown of education group for each social grade.
  8 | 
  9 | Don't think I'm too keen on the code for this plot - though it's not always so clear
 10 | (to me) how to make "nice" code with a lot of matplotlib stuff.
 11 | 
 12 | Obviously, the data is made up.
 13 | """
 14 | 
 15 | from __future__ import annotations
 16 | 
 17 | import pathlib
 18 | 
 19 | import matplotlib as mpl
 20 | import matplotlib.pyplot as plt
 21 | import numpy as np
 22 | import pandas as pd
 23 | from PIL import Image
 24 | 
 25 | from plotting_examples import dvc_entry, save_plot_output
 26 | from plotting_examples.y2022 import metadata
 27 | 
 28 | # Fontsize for the main title and subtitle
 29 | FONTSIZE_TITLE = 30
 30 | FONTSIZE_SUBTITLE = 20
 31 | # Fontsize for the numbers displayed on bars.
 32 | FONTSIZE_PLT_TXT = 10
 33 | 
 34 | # What colour to outline the edges of bars with - if None then there's no outline
 35 | # created.
 36 | BAR_EDGECOLOR: str | None = None
 37 | # What level of rounding to apply to percentages displayed on bars.
 38 | ROUNDING_PCTS = 1
 39 | 
 40 | # Dependent var
 41 | VAR_DEPENDENT = "dependent_var"
 42 | # Independent var
 43 | VAR_INDEPENDENT = "independent_var"
 44 | # Text for the main title - the subtitle is generated from the metadata atm.
 45 | TEXT_TITLE = "Labradors\neducation ~ social grade"
 46 | # Image to display in teh top left.
 47 | IMAGE_PATH = (
 48 |     "./plotting_examples/y2022/stacked_bar_with_single_bars_layout/data/lab.png"
 49 | )
 50 | 
 51 | COLORS = [
 52 |     metadata.color.PINK_COLOUR,
 53 |     metadata.color.DEEPER_GREEN,
 54 |     metadata.color.BROWNY_RED,
 55 | ]
 56 | COLOR_SUBTITLE_TEXT = "#808080"
 57 | 
 58 | # subplot_mosaic layout definition.
 59 | LAYOUT = [
 60 |     ["top_left_corner", "title", "title", "title", "top_right_corner"],
 61 |     ["main", "main", "main", "main", "side"],
 62 |     ["main", "main", "main", "main", "side"],
 63 |     ["bottom", "bottom", "bottom", "bottom", "bottom_right_corner"],
 64 | ]
 65 | 
 66 | # Colors which are used when the bar colour is dark/light respectively - so that the
 67 | # text is readable (not dark font on dark bars etd).
 68 | COLOR_FONT_LIGHT = "#000000"
 69 | COLOR_FONT_DARK = "#ffffff"
 70 | 
 71 | 
 72 | def get_sample_data() -> (
 73 |     tuple[
 74 |         pd.DataFrame,
 75 |         dict[str, dict[float, str]],
 76 |         dict[str, str],
 77 |     ]
 78 | ):
 79 |     """
 80 |     Generate sample data.
 81 | 
 82 |     Data structured similar to what you'd find in an SPSS sav file - where there's the
 83 |     df (responses), cnl (metadata about the columns) and vvl (metadata about the values
 84 |     within the columns)
 85 |     """
 86 |     rng = np.random.default_rng(1)
 87 |     # Create dataframe with different distributions for each of the independent
 88 |     # variable levels.
 89 |     df = (
 90 |         pd.concat(
 91 |             [
 92 |                 pd.DataFrame(
 93 |                     {
 94 |                         VAR_DEPENDENT: rng.choice(
 95 |                             [1, 2, 3],
 96 |                             size=330,
 97 |                             p=(0.87, 0.1, 0.03),
 98 |                         ),
 99 |                         VAR_INDEPENDENT: 5,
100 |                     },
101 |                 ),
102 |                 pd.DataFrame(
103 |                     {
104 |                         VAR_DEPENDENT: rng.choice(
105 |                             [1, 2, 3],
106 |                             size=410,
107 |                             p=(0.44, 0.54, 0.02),
108 |                         ),
109 |                         VAR_INDEPENDENT: 4,
110 |                     },
111 |                 ),
112 |                 pd.DataFrame(
113 |                     {
114 |                         VAR_DEPENDENT: rng.choice(
115 |                             [1, 2, 3],
116 |                             size=510,
117 |                             p=(0.26, 0.61, 0.13),
118 |                         ),
119 |                         VAR_INDEPENDENT: 3,
120 |                     },
121 |                 ),
122 |                 pd.DataFrame(
123 |                     {
124 |                         VAR_DEPENDENT: rng.choice(
125 |                             [1, 2, 3],
126 |                             size=800,
127 |                             p=(0.105, 0.565, 0.33),
128 |                         ),
129 |                         VAR_INDEPENDENT: 2,
130 |                     },
131 |                 ),
132 |                 pd.DataFrame(
133 |                     {
134 |                         VAR_DEPENDENT: rng.choice(
135 |                             [1, 2, 3],
136 |                             size=950,
137 |                             p=(0.08, 0.33, 0.59),
138 |                         ),
139 |                         VAR_INDEPENDENT: 1,
140 |                     },
141 |                 ),
142 |             ],
143 |         )
144 |         .assign(weight=1)
145 |         .reset_index(drop=True)
146 |     )
147 |     vvl = {
148 |         VAR_INDEPENDENT: {
149 |             1.0: "Upper management",
150 |             2.0: "Lower Management",
151 |             3.0: "Intermediate",
152 |             4.0: "Routine",
153 |             5.0: "Never worked",
154 |         },
155 |         VAR_DEPENDENT: {
156 |             1.0: "Low",
157 |             2.0: "Medium",
158 |             3.0: "High",
159 |         },
160 |     }
161 |     cnl = {
162 |         VAR_INDEPENDENT: "Social Grade",
163 |         VAR_DEPENDENT: "Education Level",
164 |     }
165 | 
166 |     return df, vvl, cnl
167 | 
168 | 
169 | def patch_color_light(patch: mpl.patches.Rectangle) -> bool:
170 |     """Determine if mpl patch is light or dark."""
171 |     # TODO: Put this into a global helper module.
172 |     bar_col = mpl.colors.to_hex(patch.get_facecolor())
173 |     hex_col = bar_col[1:]
174 |     red, green, blue = (
175 |         int(hex_col[0:2], 16),
176 |         int(hex_col[2:4], 16),
177 |         int(hex_col[4:6], 16),
178 |     )
179 |     # https://stackoverflow.com/questions/3942878/how-to-decide-
180 |     # font-color-in-white-or-black-depending-on-background-color
181 |     threshold = 100
182 |     if (red * 0.299 + green * 0.587 + blue * 0.114) > threshold:
183 |         return True
184 |     return False
185 | 
186 | 
187 | class PlotSections:
188 | 
189 |     """
190 |     Holds plotting sections.
191 | 
192 |     Just using this for namespacing really! Which was triggered by pylint complaining,
193 |     which probably isn't a good reason... Might usually just put this in a module but
194 |     wanted all the code in plot.py
195 | 
196 |     Considered adding the df, vvl, cnl to the class in an __init__ or whatever but left
197 |     it as-is.
198 |     """
199 | 
200 |     # rename to bivariate.
201 |     @staticmethod
202 |     def main(
203 |         ax: plt.Axes,  # type: ignore[name-defined]
204 |         df: pd.DataFrame,
205 |         vvl: dict[str, dict[float, str]],
206 |         # cnl: dict[str, str],
207 |     ) -> None:
208 |         # pylint: disable=too-many-locals
209 |         """Plot the stacked bars."""
210 |         df_plot = (
211 |             pd.crosstab(
212 |                 df[VAR_DEPENDENT],
213 |                 df[VAR_INDEPENDENT].replace(vvl[VAR_INDEPENDENT]),
214 |                 normalize="columns",
215 |             )
216 |             .mul(100)
217 |             .round(1)
218 |             .loc[:, list(vvl[VAR_INDEPENDENT].values())]
219 |         )
220 |         df_plot_counts = pd.crosstab(
221 |             df[VAR_DEPENDENT],
222 |             df[VAR_INDEPENDENT],
223 |         )
224 |         df_plot.T.plot.barh(
225 |             stacked=True,
226 |             ax=ax,
227 |             color=COLORS,
228 |             edgecolor=BAR_EDGECOLOR,
229 |         )
230 | 
231 |         ax.grid(linestyle=":", alpha=0.3)
232 | 
233 |         # The legend _should_ be self explanatory from the context of the plot.
234 |         ax.get_legend().remove()
235 | 
236 |         # Not interested in seeing the col name on the y axis for the main plot
237 |         ax.set_ylabel("")
238 | 
239 |         data_matrix = df_plot.to_numpy().flatten()
240 |         data_matrix_counts = df_plot_counts.to_numpy().flatten()
241 |         min_bar_size = 3
242 |         for i, patch in enumerate(ax.patches):
243 |             width = patch.get_width()
244 |             height = patch.get_height()
245 |             x, y = patch.get_xy()
246 |             data_i = data_matrix[i] if data_matrix[i] >= min_bar_size else "-"
247 |             data_count_i = (
248 |                 data_matrix_counts[i] if data_matrix[i] >= min_bar_size else None
249 |             )
250 | 
251 |             ann = f"{data_i} ({data_count_i})" if data_count_i is not None else "-"
252 | 
253 |             text_col = COLOR_FONT_LIGHT if patch_color_light(patch) else COLOR_FONT_DARK
254 | 
255 |             ax.annotate(
256 |                 f"{ann}",
257 |                 (x + width * 0.5, y + height * 0.5),
258 |                 ha="center",
259 |                 va="center",
260 |                 fontsize=10,
261 |                 zorder=12,
262 |                 color=text_col,
263 |             )
264 | 
265 |         ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(base=5))
266 |         ax.set_xlabel("%", fontsize=10)
267 |         ax.set_xlim(0, 100)
268 | 
269 |         ax.spines.top.set_visible(False)
270 |         ax.spines.right.set_visible(False)
271 | 
272 |     @staticmethod
273 |     def side_marginal(
274 |         ax: plt.Axes,  # type: ignore[name-defined]
275 |         df: pd.DataFrame,
276 |         vvl: dict[str, dict[float, str]],
277 |         cnl: dict[str, str],
278 |     ) -> None:
279 |         """Bar plot of the independent var."""
280 |         counts = df[VAR_INDEPENDENT].replace(vvl[VAR_INDEPENDENT]).value_counts()
281 | 
282 |         ax.barh(
283 |             counts.index,
284 |             counts,
285 |             color=metadata.color.TAN,
286 |             edgecolor=BAR_EDGECOLOR,
287 |             height=0.5,
288 |         )
289 |         ax.grid(alpha=0.2, linestyle=":")
290 |         ax.set_title(cnl[VAR_INDEPENDENT], loc="left")
291 | 
292 |         counts_list = list(counts)
293 | 
294 |         for count, patch in zip(counts, ax.patches):
295 |             count_pct = round((count / sum(counts_list)) * 100, ROUNDING_PCTS)
296 |             width = patch.get_width()
297 |             height = patch.get_height()
298 |             x, y = patch.get_xy()
299 |             txt_color = "#000000" if patch_color_light(patch) else "#ffffff"
300 |             ax.text(
301 |                 s=f"{count_pct}%\n({count})",
302 |                 x=x + width * 0.5,
303 |                 y=y + height * 0.5,
304 |                 va="center",
305 |                 ha="center",
306 |                 color=txt_color,
307 |                 fontsize=FONTSIZE_PLT_TXT,
308 |             )
309 | 
310 |         ax.set_xticks([])
311 |         ax.set_yticks([])
312 | 
313 |         ax.spines.right.set_visible(False)
314 |         ax.spines.top.set_visible(False)
315 |         ax.spines.bottom.set_visible(False)
316 |         ax.spines.left.set_visible(False)
317 | 
318 |     @staticmethod
319 |     def bottom_marginal(
320 |         ax: plt.Axes,  # type: ignore[name-defined]
321 |         df: pd.DataFrame,
322 |         vvl: dict[str, dict[float, str]],
323 |         cnl: dict[str, str],
324 |     ) -> None:
325 |         """Bar plot of the dependent variable."""
326 |         counts = df[VAR_DEPENDENT].value_counts().sort_index()
327 |         ax.bar(
328 |             x=list(vvl[VAR_DEPENDENT].values()),
329 |             height=counts,
330 |             color=COLORS,
331 |             edgecolor=BAR_EDGECOLOR,
332 |         )
333 |         ax.set_title(cnl[VAR_DEPENDENT])
334 |         ax.set_yticks([])
335 | 
336 |         counts_list = list(counts)
337 |         for count, patch in zip(counts, ax.patches):
338 |             count_pct = round((count / sum(counts_list)) * 100, 2)
339 |             width = patch.get_width()
340 |             height = patch.get_height()
341 |             x, y = patch.get_xy()
342 |             if patch_color_light(patch):
343 |                 txt_color = COLOR_FONT_LIGHT
344 |             else:
345 |                 txt_color = COLOR_FONT_DARK
346 |             ax.text(
347 |                 s=f"{count_pct}\n({count})",
348 |                 x=x + width * 0.5,
349 |                 y=y + height * 0.5,
350 |                 va="center",
351 |                 ha="center",
352 |                 color=txt_color,
353 |                 fontsize=FONTSIZE_PLT_TXT,
354 |             )
355 | 
356 |         ax.spines.top.set_visible(False)
357 |         ax.spines.right.set_visible(False)
358 |         ax.spines.left.set_visible(False)
359 | 
360 |     @staticmethod
361 |     def title(ax: plt.Axes, cnl: dict[str, str]) -> None:  # type: ignore[name-defined]
362 |         """Overall title."""
363 |         ax.text(
364 |             s="Labradors",
365 |             x=0.1,
366 |             y=0.5,
367 |             fontsize=FONTSIZE_TITLE,
368 |             horizontalalignment="left",
369 |             verticalalignment="bottom",
370 |         )
371 | 
372 |         # Just using this to nudge the text placement around...
373 |         diff = 0.3
374 |         ax.text(
375 |             # Assuming that the metadata is reasonably nice for this.
376 |             s=f"{cnl[VAR_DEPENDENT]} ~ {cnl[VAR_INDEPENDENT]}",
377 |             x=0.1,
378 |             y=0.5 - diff,
379 |             fontsize=FONTSIZE_SUBTITLE,
380 |             horizontalalignment="left",
381 |             verticalalignment="bottom",
382 |             color=COLOR_SUBTITLE_TEXT,
383 |         )
384 | 
385 |         ax.axis("off")
386 | 
387 |     @staticmethod
388 |     def top_left_corner(ax: plt.Axes) -> None:  # type: ignore[name-defined]
389 |         """Plot logo."""
390 |         img_path = IMAGE_PATH
391 |         club_icon = Image.open(img_path)
392 |         ax.imshow(club_icon)
393 |         ax.axis("off")
394 | 
395 |     @staticmethod
396 |     def top_right_corner(ax: plt.Axes) -> None:  # type: ignore[name-defined]
397 |         """Just leaving this empty for now."""
398 |         ax.axis("off")
399 | 
400 |     @staticmethod
401 |     def bottom_right_corner(ax: plt.Axes, df: pd.DataFrame) -> None:  # type: ignore[name-defined]
402 |         """Some random information like data source etc."""
403 |         ax.text(
404 |             s=(
405 |                 #
406 |                 "2022 Labrador educational \ndata and social grades"
407 |                 "\n"
408 |                 "\n"
409 |                 f"Sample size : {df.shape[0]}"
410 |                 "\n"
411 |                 "\n"
412 |                 "source: somedogdata.com"
413 |             ),
414 |             x=0,
415 |             y=0.5,
416 |             fontsize=FONTSIZE_PLT_TXT,
417 |             va="center",
418 |             ha="left",
419 |             color=COLOR_SUBTITLE_TEXT,
420 |         )
421 |         ax.axis("off")
422 | 
423 |     @staticmethod
424 |     def footnote(ax: plt.Axes) -> None:  # type: ignore[name-defined]
425 |         """
426 |         Plot footnote.
427 | 
428 |         Didn't bother using this in the end.
429 |         """
430 |         ax.text(
431 |             s=(
432 |                 #
433 |                 "Some text about the data, Labradors, whatever."
434 |             ),
435 |             x=0,
436 |             y=1,
437 |             fontsize=10,
438 |             style="italic",
439 |             va="top",
440 |             ha="left",
441 |             color=COLOR_SUBTITLE_TEXT,
442 |         )
443 |         ax.set_xticks([])
444 |         ax.set_yticks([])
445 | 
446 | 
447 | def main() -> mpl.figure.Figure:
448 |     """Main."""
449 |     df, vvl, cnl = get_sample_data()
450 | 
451 |     plot_sections = PlotSections()
452 | 
453 |     with plt.rc_context(
454 |         {
455 |             "xtick.major.pad": 10,
456 |             "font.family": "monospace",
457 |         },
458 |     ):
459 |         fig = plt.figure(
460 |             figsize=(15, 10),
461 |         )
462 |         ax_dict = fig.subplot_mosaic(LAYOUT)  # type: ignore[arg-type]
463 | 
464 |         plot_sections.title(ax=ax_dict["title"], cnl=cnl)
465 |         plot_sections.bottom_marginal(
466 |             ax=ax_dict["bottom"],
467 |             df=df,
468 |             cnl=cnl,
469 |             vvl=vvl,
470 |         )
471 |         plot_sections.main(
472 |             ax=ax_dict["main"],
473 |             df=df,
474 |             vvl=vvl,
475 |         )
476 |         plot_sections.side_marginal(ax=ax_dict["side"], df=df, vvl=vvl, cnl=cnl)
477 |         plot_sections.top_left_corner(ax=ax_dict["top_left_corner"])
478 |         plot_sections.top_right_corner(ax=ax_dict["top_right_corner"])
479 |         plot_sections.bottom_right_corner(
480 |             ax=ax_dict["bottom_right_corner"],
481 |             df=df,
482 |         )
483 | 
484 |         fig.tight_layout()
485 | 
486 |     # Set background colours.
487 |     fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
488 |     for ax_name in ax_dict:
489 |         ax_dict[ax_name].set_facecolor(metadata.color.BACKGROUND_COLOUR)
490 | 
491 |     return fig
492 | 
493 | 
494 | if __name__ == "__main__":
495 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
496 |     save_plot_output.save_plot(fig=main(), file=__file__)
497 |     raise SystemExit
498 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/__init__.py:
--------------------------------------------------------------------------------
1 | """Create hexmap."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.cpg:
--------------------------------------------------------------------------------
1 | UTF-8
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.dbf


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.prj:
--------------------------------------------------------------------------------
1 | PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]]
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbn


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbx


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <metadata xml:lang="en"><Esri><CreaDate>20161025</CreaDate><CreaTime>17001900</CreaTime><ArcGISFormat>1.0</ArcGISFormat><SyncOnce>TRUE</SyncOnce><DataProperties><lineage><Process ToolSource="c:\program files (x86)\arcgis\desktop10.4\ArcToolbox\Toolboxes\Data Management Tools.tbx\CalculateField" Date="20160602" Time="215304">CalculateField Hex_All_Data Region_Name "Northern Ireland" VB #</Process><Process ToolSource="c:\program files (x86)\arcgis\desktop10.4\ArcToolbox\Toolboxes\Data Management Tools.tbx\CalculateField" Date="20160602" Time="215317">CalculateField Hex_All_Data Region_ID 12 VB #</Process><Process ToolSource="c:\program files (x86)\arcgis\desktop10.4\ArcToolbox\Toolboxes\Data Management Tools.tbx\CalculateField" Date="20160602" Time="215337">CalculateField Hex_All_Data DESCRIPTIO "Westminster Constituency" VB #</Process></lineage><itemProps><itemLocation><linkage Sync="TRUE">file://\\AYL-LT-02839\Users\bflanagan\OneDrive - ESRI (UK) Ltd\Carto\BlogData\Cartograms.gdb</linkage><protocol Sync="TRUE">Local Area Network</protocol></itemLocation></itemProps></DataProperties></Esri></metadata>
3 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shx


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.zip


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/plot.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=duplicate-code
  2 | """
  3 | Hex map for the UK constituencies.
  4 | 
  5 | Some meaningless generated data - small multiples with hex maps can be useful sometimes
  6 | though. Could be good to add in the geographically accurate version as well.
  7 | """
  8 | 
  9 | from __future__ import annotations
 10 | 
 11 | import pathlib
 12 | 
 13 | import geopandas
 14 | import matplotlib as mpl
 15 | import matplotlib.pyplot as plt
 16 | import numpy as np
 17 | from matplotlib.colors import ListedColormap
 18 | 
 19 | from plotting_examples import dvc_entry, save_plot_output
 20 | from plotting_examples.y2022 import metadata
 21 | 
 22 | random_choice = np.random.Generator(np.random.MT19937(1)).choice
 23 | 
 24 | 
 25 | def main() -> mpl.figure.Figure:
 26 |     """Main."""
 27 |     election_data = (
 28 |         pathlib.Path(__file__).parent
 29 |         / "data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp"
 30 |     )
 31 |     gdf = geopandas.read_file(election_data)
 32 | 
 33 |     # Set up color maps by party
 34 |     partycolors = {
 35 |         "A": metadata.color.DEEPER_GREEN,
 36 |         "B": metadata.color.PINK_COLOUR,
 37 |         "C": metadata.color.TAN,
 38 |     }
 39 | 
 40 |     parties = ["A", "B", "C"]
 41 |     pcols = {c: partycolors[c] for c in parties}
 42 |     colors = [pcols[k] for k in sorted(pcols.keys())]
 43 |     with plt.rc_context(
 44 |         {
 45 |             "xtick.major.pad": 10,
 46 |             "font.family": "monospace",
 47 |         },
 48 |     ):
 49 |         fig, axes = plt.subplots(
 50 |             nrows=1,
 51 |             ncols=3,
 52 |             figsize=(15, 5),
 53 |         )
 54 | 
 55 |         font_size = 15
 56 |         edgecolor = "black"
 57 |         edge_width = 0.5
 58 | 
 59 |         ax = axes[0]
 60 |         gdf["Party"] = list(
 61 |             random_choice(
 62 |                 parties,
 63 |                 size=len(gdf),
 64 |                 replace=True,
 65 |                 p=[0.4, 0.3, 0.3],
 66 |             ),
 67 |         )
 68 |         gdf.plot(
 69 |             ax=ax,
 70 |             column="Party",
 71 |             cmap=ListedColormap(colors),
 72 |             edgecolor=edgecolor,
 73 |             linewidth=edge_width,
 74 |         )
 75 |         _ = ax.axis("off")
 76 |         _ = ax.set_title("Current", fontsize=font_size, loc="left")
 77 | 
 78 |         ax = axes[1]
 79 |         gdf["Party"] = list(
 80 |             random_choice(
 81 |                 parties,
 82 |                 size=len(gdf),
 83 |                 replace=True,
 84 |                 p=[0.3, 0.6, 0.1],
 85 |             ),
 86 |         )
 87 |         gdf.plot(
 88 |             ax=ax,
 89 |             column="Party",
 90 |             cmap=ListedColormap(colors),
 91 |             edgecolor=edgecolor,
 92 |             linewidth=edge_width,
 93 |         )
 94 |         _ = ax.axis("off")
 95 |         _ = ax.set_title("Scenario A", fontsize=font_size, loc="left")
 96 | 
 97 |         ax = axes[2]
 98 |         gdf["Party"] = list(
 99 |             random_choice(
100 |                 parties,
101 |                 size=len(gdf),
102 |                 replace=True,
103 |                 p=[0.1, 0.8, 0.1],
104 |             ),
105 |         )
106 |         gdf.plot(
107 |             ax=ax,
108 |             column="Party",
109 |             cmap=ListedColormap(colors),
110 |             edgecolor=edgecolor,
111 |             linewidth=edge_width,
112 |         )
113 |         _ = ax.axis("off")
114 |         _ = ax.set_title("Scenario B", fontsize=font_size, loc="left")
115 | 
116 |         # Create legend.
117 |         custom_lines = [
118 |             mpl.lines.Line2D([0], [0], color=x, lw=6) for x in partycolors.values()
119 |         ]
120 |         ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
121 |         ax.legend(
122 |             custom_lines,
123 |             list(partycolors.keys()),
124 |             loc=(0.7, 0.7),
125 |             fontsize=12,
126 |             frameon=False,
127 |             borderpad=2,
128 |         )
129 | 
130 |         # The dataframe seems to assign items to categories based on the selected column
131 |         # sort order We can define a color map with a similar sorting
132 |         colors = [partycolors[k] for k in sorted(partycolors.keys())]
133 | 
134 |     fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
135 |     fig.set_tight_layout(True)  # type: ignore[attr-defined]
136 |     ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
137 |     return fig
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
142 |     save_plot_output.save_plot(fig=main(), file=__file__)
143 |     raise SystemExit
144 | 


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/uk_hex_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/uk_hex_example.png


--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/uk_hex_example.py:
--------------------------------------------------------------------------------
1 | """
2 | Hex plotting example.
3 | 
4 | Move hex example from notebook into here.
5 | """
6 | 


--------------------------------------------------------------------------------
/plotting_examples/y2024/__init__.py:
--------------------------------------------------------------------------------
1 | """Plots from 2024."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2024/cat_weight/__init__.py:
--------------------------------------------------------------------------------
1 | """Plot for cats weight loss."""
2 | 


--------------------------------------------------------------------------------
/plotting_examples/y2024/cat_weight/data/cat_looking_to_side.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2024/cat_weight/data/cat_looking_to_side.jpeg


--------------------------------------------------------------------------------
/plotting_examples/y2024/cat_weight/data/weight_data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2024/cat_weight/data/weight_data.parquet


--------------------------------------------------------------------------------
/plotting_examples/y2024/cat_weight/plot.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Timeseries of the cats diet.
  3 | 
  4 | Cat was getting a little chunky towards the end of 2023 so had a resolution made for
  5 | them to lose a bit of weight. Data collection is just a daily weigh, the average of
  6 | this is taken (as there are sometimes multiple entries in a day) and then plotted along
  7 | with a ten day rolling average. Most days were covered, where there are missing days
  8 | they're imputed using the average of the days either side, eg `(a, nan, b) -> (a,
  9 | (a+b)/2, b)` though this is just a plot...
 10 | """
 11 | 
 12 | from __future__ import annotations
 13 | 
 14 | import datetime as dt
 15 | import pathlib
 16 | from pathlib import Path
 17 | 
 18 | import matplotlib as mpl
 19 | import matplotlib.dates as mdates
 20 | import matplotlib.pyplot as plt
 21 | import numpy as np
 22 | import pandas as pd
 23 | from PIL import Image
 24 | 
 25 | from plotting_examples import dvc_entry, save_plot_output
 26 | from plotting_examples.y2024 import metadata
 27 | 
 28 | np_rnd = np.random.Generator(np.random.MT19937(0))
 29 | 
 30 | LAYOUT = [
 31 |     ["title", "title", "title", "title", "top_right_corner", "top_right_corner"],
 32 |     ["main", "main", "main", "main", "side", "side"],
 33 |     ["main", "main", "main", "main", "side", "side"],
 34 |     ["main", "main", "main", "main", "side", "side"],
 35 |     ["main", "main", "main", "main", "side", "side"],
 36 | ]
 37 | FONTSIZE_TITLE = 25
 38 | FONTSIZE_SUBTITLE = 15
 39 | COLOR_SUBTITLE_TEXT = "#808080"
 40 | 
 41 | 
 42 | def get_xlsx_from_downloads() -> Path:
 43 |     """
 44 |     Get xlsx file from Downloads.
 45 | 
 46 |     Pretty janky approach but it's near enough whilst doing this - workflow is just to
 47 |     download the xlsx file containing the gform responses from gsheets then this will
 48 |     pick it up and move it to this project as a parquet file. Wasn't worth using the
 49 |     API for it.
 50 |     """
 51 |     output_name = Path(__file__).parent / "data" / "weight_data.parquet"
 52 |     xlsx_files = sorted((pathlib.Path.home() / "Downloads").glob("*espon*xlsx"))
 53 | 
 54 |     if len(xlsx_files) == 0:
 55 |         # Most likely this is re-running and has already been moved so just use
 56 |         # whatever's already in data/.
 57 |         return output_name
 58 | 
 59 |     if len(xlsx_files) > 1:
 60 |         msg = "Expected a single file: "
 61 |         raise ValueError(msg, xlsx_files)
 62 | 
 63 |     # Get response data from xlsx sheet, pull out required columns and create date
 64 |     # column for grouping on
 65 |     df_response = (
 66 |         pd.ExcelFile(xlsx_files[0])
 67 |         .parse("Form responses 1")
 68 |         .rename(columns=lambda x: x.lower())
 69 |         .assign(
 70 |             timestamp=lambda x: pd.to_datetime(
 71 |                 x["timestamp"],
 72 |                 format="%d/%m/%Y %H:%M:%S",
 73 |             ),
 74 |             # Some of the days have multiple weigh-ins so want datestamp to group by on
 75 |             # in those days.
 76 |             datestamp=lambda x: pd.to_datetime(
 77 |                 x["timestamp"].apply(lambda x: x.date())
 78 |             ),
 79 |             mish_weight=lambda x: x["with_mish"].sub(x["without_mish"]),
 80 |         )
 81 |         .loc[:, ["timestamp", "datestamp", "mish_weight"]]
 82 |     ).rename(columns={"mish_weight": "cat_weight"})
 83 | 
 84 |     df_response.to_parquet(output_name)
 85 |     return output_name
 86 | 
 87 | 
 88 | def load_data(*, data_path: Path) -> pd.DataFrame:
 89 |     """Get response dataframe from downloaded xlsx file."""
 90 |     response_data = pd.read_parquet(data_path)
 91 |     return (
 92 |         response_data
 93 |         # Only need these two columns.
 94 |         .groupby("datestamp")["cat_weight"]
 95 |         # Sometimes there are multiple readings in a day
 96 |         .mean()
 97 |         .reset_index()
 98 |         .rename(columns={"cat_weight": "cat_daily_avg"})
 99 |     )
100 | 
101 | 
102 | def main() -> mpl.figure.Figure:
103 |     """
104 |     Main.
105 | 
106 |     I did consider adding some figure shapes along the lines of:
107 | 
108 |     >>> for _ in range(1, 30):
109 |     >>>     factor = 20
110 |     >>>     radius = np_rnd.random() / factor
111 |     >>>     alpha = ((1 / factor) - radius) + 0.1
112 |     >>>     circ = patches.Circle(
113 |     >>>         (np_rnd.random(), np_rnd.random()),
114 |     >>>         radius=radius,
115 |     >>>         zorder=1,
116 |     >>>         color=color.PINK_COLOUR,
117 |     >>>         alpha=alpha,
118 |     >>>     )
119 |     >>>     circ.set_transform(fig.transFigure)
120 |     >>>     fig.patches.append(circ)
121 | 
122 |     But left it.
123 |     """
124 |     df = load_data(data_path=get_xlsx_from_downloads())
125 |     df_dates = pd.DataFrame(
126 |         {
127 |             "dates": pd.date_range(
128 |                 df["datestamp"].min(),
129 |                 df["datestamp"].max(),
130 |             )
131 |         }
132 |     ).assign(month_name=lambda x: x["dates"].dt.strftime("%B"))
133 | 
134 |     # Ensure that all dates are represented (in case there's missed weigh-in days).
135 |     df = (
136 |         pd.merge(
137 |             df, df_dates[["dates"]], left_on="datestamp", right_on="dates", how="right"
138 |         )
139 |         .drop(columns="datestamp")
140 |         .rename(columns={"dates": "datestamp"})
141 |         .set_index("datestamp")
142 |         .reset_index()
143 |         .assign(imputed=lambda x: x["cat_daily_avg"].isna())
144 |     )
145 |     # Handle missing data - only expecting there to be a day of missing data at
146 |     # most!
147 |     df["cat_daily_avg"] = df.assign(
148 |         ff=df["cat_daily_avg"].ffill(),
149 |         bf=df["cat_daily_avg"].bfill(),
150 |         filled=lambda x: x["ff"].add(x["bf"]).div(2),
151 |     )["filled"]
152 | 
153 |     df = df.assign(r10=lambda x: x["cat_daily_avg"].rolling(10).mean())
154 | 
155 |     color = metadata.color
156 | 
157 |     # Create some columns for styling the scatter points - mainly in order to
158 |     # differentiate between imputed days and actual days.
159 |     df["scatter_color"] = color.GREY
160 |     df.loc[df["imputed"], "scatter_color"] = color.GREY
161 |     df["scatter_size"] = 10
162 |     df.loc[df["imputed"], "scatter_size"] = 0
163 | 
164 |     with plt.rc_context(
165 |         {
166 |             "xtick.major.pad": 5,
167 |             "font.family": "monospace",
168 |         },
169 |     ):
170 |         fig = plt.figure(figsize=(28, 10))
171 |         ax_dict = fig.subplot_mosaic(LAYOUT)  # type: ignore[arg-type]
172 | 
173 |         # Plot rolling average
174 |         ax_dict["main"].plot(
175 |             df["datestamp"],
176 |             df["r10"],
177 |             color=color.PINK_COLOUR,
178 |             lw=3,
179 |             zorder=10,
180 |         )
181 | 
182 |         # Want to ensure that no daily lines are drawn where data has been imputed -
183 |         # will still create the rolling average line here.
184 |         for _, data in df.assign(groups=df["imputed"].cumsum()).groupby("groups"):
185 |             ax_dict["main"].plot(
186 |                 data["datestamp"].loc[~data["imputed"]],
187 |                 data["cat_daily_avg"].loc[~data["imputed"]],
188 |                 color=color.GREY,
189 |                 lw=1,
190 |                 zorder=5,
191 |             )
192 | 
193 |         ax_dict["main"].scatter(
194 |             df["datestamp"],
195 |             df["cat_daily_avg"],
196 |             color=df["scatter_color"],
197 |             s=df["scatter_size"],
198 |             zorder=5,
199 |         )
200 | 
201 |         ax_dict["main"].set_ylabel("Weight kg")
202 |         ax_dict["main"].xaxis.set_major_locator(mdates.DayLocator(interval=7))  # type: ignore[no-untyped-call]
203 | 
204 |         for label in ax_dict["main"].get_xticklabels():
205 |             label.set_rotation(80)
206 |             label.set_ha("center")  # type: ignore[attr-defined]
207 | 
208 |         # Remove spines for top/right
209 |         ax_dict["main"].spines["top"].set_visible(False)
210 |         ax_dict["main"].spines["right"].set_visible(False)
211 | 
212 |         # Set x-axis dates to just be day/month instead of year day month.
213 |         ax_dict["main"].xaxis.set_major_locator(mdates.DayLocator(interval=7))  # type: ignore[no-untyped-call]
214 |         ax_dict["main"].xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y"))  # type: ignore[no-untyped-call]
215 | 
216 |         for label in ax_dict["main"].get_xticklabels():
217 |             label.set_rotation(80)
218 |             label.set_ha("center")
219 | 
220 |         heaviest_idx = df["cat_daily_avg"].idxmax()
221 |         _ = ax_dict["main"].annotate(
222 |             f"{df['cat_daily_avg'].loc[heaviest_idx].round(2)} kg",
223 |             # where the arrow should end up
224 |             xy=(
225 |                 df["datestamp"].iloc[heaviest_idx],
226 |                 df["cat_daily_avg"].iloc[heaviest_idx],
227 |             ),
228 |             # where the text should be
229 |             xytext=(
230 |                 df["datestamp"].iloc[heaviest_idx + 5],
231 |                 df["cat_daily_avg"].iloc[heaviest_idx + 1] + 0.25,
232 |             ),
233 |             ha="center",
234 |             va="bottom",
235 |             arrowprops={
236 |                 "arrowstyle": "->",
237 |                 "connectionstyle": "arc3,rad=0.2",
238 |                 "color": color.PINK_COLOUR,
239 |             },
240 |         )
241 | 
242 |         lightest_idx = df["cat_daily_avg"].idxmin()
243 |         _ = ax_dict["main"].annotate(
244 |             f"{df['cat_daily_avg'].loc[lightest_idx].round(2)} kg",
245 |             # where the arrow should end up
246 |             xy=(
247 |                 df["datestamp"].iloc[lightest_idx],
248 |                 df["cat_daily_avg"].iloc[lightest_idx],
249 |             ),
250 |             # where the text should be
251 |             xytext=(
252 |                 df["datestamp"].iloc[lightest_idx - 5],
253 |                 df["cat_daily_avg"].iloc[lightest_idx],
254 |             ),
255 |             ha="center",
256 |             va="bottom",
257 |             arrowprops={
258 |                 "arrowstyle": "->",
259 |                 "connectionstyle": "arc3,rad=0.35",
260 |                 "color": color.PINK_COLOUR,
261 |             },
262 |         )
263 | 
264 |         ax_dict["main"].grid(linewidth=0.2, which="major", axis="y")
265 | 
266 |         # Put cat picture in top left
267 |         img_path = Path(__file__).parent / "data" / "cat_looking_to_side.jpeg"
268 |         cat_img = Image.open(img_path)
269 |         ax_dict["side"].imshow(cat_img, zorder=10)
270 |         ax_dict["side"].axis("off")
271 | 
272 |         # Remove axis from particular layouts
273 |         for section in {
274 |             x
275 |             for lst in LAYOUT
276 |             for x in lst
277 |             if x
278 |             not in [
279 |                 "main",
280 |             ]
281 |         }:
282 |             ax_dict[section].axis("off")
283 | 
284 |         # Add Title
285 |         data_from = dt.datetime(
286 |             df["datestamp"].min().year,
287 |             df["datestamp"].min().month,
288 |             df["datestamp"].min().day,
289 |             tzinfo=dt.UTC,
290 |         ).strftime("%Y-%m-%d")
291 |         data_to = dt.datetime(
292 |             df["datestamp"].max().year,
293 |             df["datestamp"].max().month,
294 |             df["datestamp"].max().day,
295 |             tzinfo=dt.UTC,
296 |         ).strftime("%Y-%m-%d")
297 | 
298 |         diff = 0.3
299 |         title_x = 0.1
300 |         title_x = 0.0
301 |         ax_dict["title"].text(
302 |             s="Cat Weight",
303 |             x=title_x,
304 |             y=0.5,
305 |             fontsize=FONTSIZE_TITLE,
306 |             horizontalalignment="left",
307 |             verticalalignment="bottom",
308 |         )
309 | 
310 |         ax_dict["title"].text(
311 |             s=f"{data_from} -> {data_to}",
312 |             x=title_x,
313 |             y=0.5 - diff,
314 |             fontsize=FONTSIZE_SUBTITLE,
315 |             horizontalalignment="left",
316 |             verticalalignment="bottom",
317 |             color=COLOR_SUBTITLE_TEXT,
318 |         )
319 | 
320 |         for axis in {x for lst in LAYOUT for x in lst}:
321 |             ax_dict[axis].set_facecolor(metadata.color.BACKGROUND_COLOUR)
322 | 
323 |         fig.set_tight_layout(True)  # type: ignore[attr-defined]
324 |         fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
325 | 
326 |         return fig
327 | 
328 | 
329 | if __name__ == "__main__":
330 |     dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
331 |     save_plot_output.save_plot(fig=main(), file=__file__)
332 |     raise SystemExit
333 | 


--------------------------------------------------------------------------------
/plotting_examples/y2024/metadata.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Metadata for plotting.
 3 | 
 4 | I probably could / should use an rc params file for some of this stuff instead of
 5 | calling from here.
 6 | """
 7 | 
 8 | from __future__ import annotations
 9 | 
10 | from dataclasses import dataclass
11 | 
12 | 
13 | @dataclass
14 | class Colors:
15 | 
16 |     """
17 |     Colors.
18 | 
19 |     https://mycolor.space/?hex=%23FF69B4&sub=1
20 |     """
21 | 
22 |     PINK_COLOUR = "#ff69b4"
23 |     BACKGROUND_COLOUR = "#f2f2f2"
24 |     GREY = "#919191"
25 |     BLUE = "#007FCB"
26 |     LIGHT_GREEN = "#B4EDD2"
27 |     DEEPER_GREEN = "#51B9BE"
28 |     BROWNY_RED = "#554149"
29 |     PURPLEY = "#8F6E9B"
30 |     TAN = "#DDD7C6"
31 |     BLACK = "#000000"
32 | 
33 | 
34 | color = Colors()
35 | 
36 | dir_year = "y2024"
37 | year = 2024
38 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [tool.poetry]
  2 | name = "plotting-examples"
  3 | version = "0.1.0"
  4 | description = "Example plots"
  5 | authors = ["George Lenton <georgelenton@gmail.com>"]
  6 | readme = "README.md"
  7 | packages = [{ include = "plotting_examples" }]
  8 | 
  9 | [tool.poetry.dependencies]
 10 | python = "^3.12"
 11 | dvc = "^3.0.0"
 12 | fastparquet = "^2024.2.0"
 13 | matplotlib = "^3.7.1"
 14 | pyarrow = "^15.0.0"
 15 | seaborn = "^0.13.0"
 16 | geopandas = "^0.14.3"
 17 | pandas = "^2.0.1"
 18 | openpyxl = "^3.1.2"
 19 | 
 20 | [tool.poetry.group.dev.dependencies]
 21 | pre-commit = "^3.3.1"
 22 | ruff = "^0.3.2"
 23 | mypy = "^1.3.0"
 24 | jupyterlab = "^4.1.0"
 25 | pdbpp = "^0.10.3"
 26 | 
 27 | 
 28 | [build-system]
 29 | requires = ["poetry-core"]
 30 | build-backend = "poetry.core.masonry.api"
 31 | 
 32 | 
 33 | [tool.ruff]
 34 | line-length = 88
 35 | 
 36 | [tool.ruff.lint]
 37 | select = ["ALL"]
 38 | ignore = [
 39 |     "ANN101", # Type annotation for 'self'
 40 |     "COM812", # trailing comma - conflicted
 41 |     "ISC001",
 42 |     "D211",   # `one-blank-line-before-class`.
 43 |     "D212",   # warning: `multi-line-summary-first-line`
 44 |     "D401",   # First line of docstring should be in imperative mood: "Main."
 45 |     "FIX002", # Line contains TODO, consider resolving the issue
 46 |     "PD015",  # Use `.merge` method instead of `pd.merge` function. They have equivalent functionality.
 47 |     "PD901",  # Avoid using the generic variable name `df` for DataFrames
 48 |     "TD002",  # Missing author in TODO; try: `# TODO(<author_name>): ...` or `# TODO @<author_name>: ...`
 49 |     "TD003",  # Missing issue link on the line following this TODO
 50 | ]
 51 | fixable = ["ALL"]
 52 | unfixable = []
 53 | # Allow unused variables when underscore-prefixed.
 54 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 55 | 
 56 | 
 57 | [tool.ruff.lint.per-file-ignores]
 58 | "__init__.py" = ["F401"]
 59 | "plotting_examples/y2022/scatter_distributions/plot.py" = [
 60 |     "PLR0913", # Too many arguments in function definition (6 > 5)
 61 |     "S101",    # Use of `assert` detected
 62 | ]
 63 | 
 64 | "plotting_examples/y2022/opinium_barchart/plot.py" = [
 65 |     "PLR0915", # Too many statements (54 > 50)
 66 | ]
 67 | 
 68 | "plotting_examples/y2022/meaningless_points/plot.py" = [
 69 |     "PLR0913", # Too many arguments in function definition (7 > 5)
 70 | ]
 71 | 
 72 | 
 73 | "plotting_examples/y2022/histogram_with_two_variables/plot.py" = [
 74 |     "PLR0913", # Too many arguments in function definition (7 > 5)
 75 |     "FBT001",  #  Boolean-typed positional argument in function definition
 76 | ]
 77 | 
 78 | "plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py" = [
 79 |     "PLR0913", # Too many arguments in function definition (10 > 5)
 80 | ]
 81 | 
 82 | 
 83 | [tool.ruff.lint.flake8-type-checking]
 84 | # Don't want to have things move to TYPE_CHECKING if needed by pydantic.
 85 | runtime-evaluated-base-classes = ["pydantic.BaseModel"]
 86 | 
 87 | [tool.ruff.lint.isort]
 88 | section-order = [
 89 |     "future",
 90 |     "standard-library",
 91 |     "third-party",
 92 |     "first-party",
 93 |     "local-folder",
 94 | ]
 95 | case-sensitive = true
 96 | combine-as-imports = true
 97 | 
 98 | 
 99 | # warning: The top-level linter settings are deprecated in favour of their counterparts in the `lint` section. Please update the following options in `pyproject.toml`:
100 | #   - 'flake8-type-checking' -> 'lint.flake8-type-checking'
101 | #   - 'isort' -> 'lint.isort'
102 | #   - 'per-file-ignores' -> 'lint.per-file-ignores'
103 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | disable-noqa = True
 3 | max-line-length = 100
 4 | extend-ignore =
 5 |    E203,  # whitespace before : is not PEP8 compliant (& conflicts with black)
 6 | 
 7 |    DAR003,  # Incorrect indentation: ~<
 8 |    DAR102,  # Excess parameter(s) in Docstring: + words_freq[
 9 |    DAR201,  # Missing "Returns" in Docstring: - return
10 |    DAR202,  # Excess "Returns" in Docstring: + return
11 | 
12 | per-file-ignores =
13 |     **/__init__.py:
14 |         # Missing docstring in public package
15 |         D104,
16 | 
17 | [flake8_nb]
18 | disable-noqa = True
19 | max-line-length = 100
20 | extend-ignore =
21 |    E203,  # whitespace before : is not PEP8 compliant (& conflicts with black)
22 | 
23 |    D100,  # Missing docstring in public module
24 |    E402,  # module level import not at top of file
25 | 
26 |    D103,  # Missing docstring in public function
27 |    D104,  # Missing docstring in public package
28 |    D400,  # First line should end with a period
29 |    D403,  # First word of the first line should be properly capitalized
30 |    DAR003,  # Incorrect indentation: ~<
31 |    DAR102,  # Excess parameter(s) in Docstring: + words_freq[
32 |    DAR201,  # Missing "Returns" in Docstring: - return
33 |    DAR202,  # Excess "Returns" in Docstring: + return
34 |    E231,  # missing whitespace after ','
35 |    F401,  # '...' imported but unused
36 |    F811,  # redefinition of unused '..' from line 7
37 |    F821,  # undefined name '...'
38 | 
39 | [darglint]
40 | strictness=long
41 | 


--------------------------------------------------------------------------------
/work.sh:
--------------------------------------------------------------------------------
 1 | # simple script to run a few things when working on stuff.
 2 | poetry run dvc repro dvc.yaml
 3 | poetry run mypy --strict .
 4 | poetry run pre-commit run --all-files
 5 | poetry run python -m generate_readme
 6 | 
 7 | 
 8 | 
 9 | 
10 | # add changes to dvc.lock if there are any
11 | git diff --name-only HEAD -- dvc.lock && git add dvc.lock && git commit -m 'update dvc.lock'
12 | # automatically add changes to image files
13 | git diff --name-only --diff-filter=dM HEAD | egrep '.*images.*\.png$' | xargs -r git add && git commit -m 'updated generated image'
14 | 


--------------------------------------------------------------------------------